alt-text-llm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alt-text-llm might be problematic. Click here for more details.

alt_text_llm/main.py ADDED
@@ -0,0 +1,235 @@
1
+ """Main entry point for alt text generation and labeling workflows."""
2
+
3
+ import argparse
4
+ import asyncio
5
+ import json
6
+ from enum import StrEnum
7
+ from pathlib import Path
8
+
9
+ from rich.console import Console
10
+
11
+ from alt_text_llm import generate, label, scan, utils
12
+
13
+ _JSON_INDENT: int = 2
14
+
15
+
16
+ class Command(StrEnum):
17
+ """Available commands for alt text workflows."""
18
+
19
+ SCAN = "scan"
20
+ GENERATE = "generate"
21
+ LABEL = "label"
22
+
23
+
24
+ def _scan_command(args: argparse.Namespace) -> None:
25
+ """Execute the scan sub-command."""
26
+ output_path = (
27
+ args.output or utils.get_git_root() / "scripts" / "asset_queue.json"
28
+ )
29
+ queue_items = scan.build_queue(args.root)
30
+
31
+ output_path.write_text(
32
+ json.dumps(
33
+ [item.to_json() for item in queue_items],
34
+ indent=_JSON_INDENT,
35
+ ensure_ascii=False,
36
+ ),
37
+ encoding="utf-8",
38
+ )
39
+ print(f"Wrote {len(queue_items)} queue item(s) to {output_path}")
40
+
41
+
42
+ def _generate_command(args: argparse.Namespace) -> None:
43
+ """Execute the generate sub-command."""
44
+ if not args.model:
45
+ print("Error: --model is required for the generate command")
46
+ exit(1)
47
+
48
+ opts = generate.GenerateAltTextOptions(
49
+ root=args.root,
50
+ model=args.model,
51
+ max_chars=args.max_chars,
52
+ timeout=args.timeout,
53
+ output_path=args.captions,
54
+ skip_existing=args.skip_existing,
55
+ )
56
+
57
+ suggestions_path = args.suggestions_file
58
+ console = Console()
59
+ queue_items = scan.build_queue(opts.root)
60
+
61
+ if opts.skip_existing:
62
+ queue_items = generate.filter_existing_captions(
63
+ queue_items,
64
+ [opts.output_path, suggestions_path],
65
+ console,
66
+ verbose=False if args.estimate_only else True,
67
+ )
68
+
69
+ # Show cost estimate
70
+ cost_est = generate.estimate_cost(opts.model, len(queue_items))
71
+ console.print(
72
+ f"[bold blue]{len(queue_items)} items → {cost_est} using model '{opts.model}'[/bold blue]"
73
+ )
74
+
75
+ # If estimate-only mode, exit here
76
+ if args.estimate_only:
77
+ return
78
+
79
+ # Run generation
80
+ if not queue_items:
81
+ console.print("[yellow]No items to process.[/yellow]")
82
+ return
83
+
84
+ console.print(
85
+ f"[bold green]Generating {len(queue_items)} suggestions with '{opts.model}'[/bold green]"
86
+ )
87
+
88
+ suggestions = []
89
+ try:
90
+ suggestions = asyncio.run(
91
+ generate.async_generate_suggestions(queue_items, opts)
92
+ )
93
+ finally:
94
+ utils.write_output(suggestions, suggestions_path, append_mode=True)
95
+ console.print(
96
+ f"[green]Saved {len(suggestions)} suggestions to {suggestions_path}[/green]"
97
+ )
98
+
99
+
100
+ def _label_command(args: argparse.Namespace) -> None:
101
+ """Execute the label sub-command."""
102
+ label.label_from_suggestions_file(
103
+ args.suggestions_file, args.output, args.skip_existing, args.vi_mode
104
+ )
105
+
106
+
107
+ def _parse_args() -> argparse.Namespace:
108
+ """Parse command-line arguments for all alt text workflows."""
109
+ git_root = utils.get_git_root()
110
+
111
+ parser = argparse.ArgumentParser(
112
+ description="Alt text generation and labeling workflows"
113
+ )
114
+ subparsers = parser.add_subparsers(
115
+ dest="command", help="Available commands"
116
+ )
117
+
118
+ # ---------------------------------------------------------------------------
119
+ # scan sub-command
120
+ # ---------------------------------------------------------------------------
121
+ scan_parser = subparsers.add_parser(
122
+ Command.SCAN,
123
+ help="Scan markdown files for assets without meaningful alt text",
124
+ )
125
+ scan_parser.add_argument(
126
+ "--root",
127
+ type=Path,
128
+ default=git_root / "website_content",
129
+ help="Directory to search (default: website_content)",
130
+ )
131
+ scan_parser.add_argument(
132
+ "--output",
133
+ type=Path,
134
+ help="Path for output JSON file (default: <git_root>/scripts/asset_queue.json)",
135
+ )
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # generate sub-command
139
+ # ---------------------------------------------------------------------------
140
+ generate_parser = subparsers.add_parser(
141
+ Command.GENERATE, help="Generate AI alt text suggestions"
142
+ )
143
+ generate_parser.add_argument(
144
+ "--root",
145
+ type=Path,
146
+ default=git_root / "website_content",
147
+ help="Markdown root directory",
148
+ )
149
+ generate_parser.add_argument(
150
+ "--model", required=True, help="LLM model to use for generation"
151
+ )
152
+ generate_parser.add_argument(
153
+ "--max-chars",
154
+ type=int,
155
+ default=300,
156
+ help="Max characters for generated alt text",
157
+ )
158
+ generate_parser.add_argument(
159
+ "--timeout", type=int, default=120, help="LLM command timeout seconds"
160
+ )
161
+ generate_parser.add_argument(
162
+ "--captions",
163
+ type=Path,
164
+ default=git_root / "scripts" / "asset_captions.json",
165
+ help="Existing/final captions JSON path (used to skip existing unless --process-existing)",
166
+ )
167
+ generate_parser.add_argument(
168
+ "--suggestions-file",
169
+ type=Path,
170
+ default=git_root / "scripts" / "suggested_alts.json",
171
+ help="Path to read/write suggestions JSON",
172
+ )
173
+ generate_parser.add_argument(
174
+ "--process-existing",
175
+ dest="skip_existing",
176
+ action="store_false",
177
+ help="Also process assets that already have captions (default is to skip)",
178
+ )
179
+ generate_parser.add_argument(
180
+ "--estimate-only",
181
+ action="store_true",
182
+ help="Only estimate cost without generating suggestions",
183
+ )
184
+ generate_parser.set_defaults(skip_existing=True)
185
+
186
+ # ---------------------------------------------------------------------------
187
+ # label sub-command
188
+ # ---------------------------------------------------------------------------
189
+ label_parser = subparsers.add_parser(
190
+ Command.LABEL, help="Interactively label alt text suggestions"
191
+ )
192
+ label_parser.add_argument(
193
+ "--suggestions-file",
194
+ type=Path,
195
+ default=git_root / "scripts" / "suggested_alts.json",
196
+ help="Path to read suggestions JSON",
197
+ )
198
+ label_parser.add_argument(
199
+ "--output",
200
+ type=Path,
201
+ default=git_root / "scripts" / "asset_captions.json",
202
+ help="Final captions JSON path",
203
+ )
204
+ label_parser.add_argument(
205
+ "--skip-existing",
206
+ action="store_true",
207
+ default=True,
208
+ help="Skip captions already present in output file",
209
+ )
210
+ label_parser.add_argument(
211
+ "--vi-mode",
212
+ action="store_true",
213
+ default=False,
214
+ help="Enable vi keybindings for text editing (default: disabled)",
215
+ )
216
+
217
+ return parser.parse_args()
218
+
219
+
220
+ def main() -> None:
221
+ """Main entry point for alt text workflows."""
222
+ args = _parse_args()
223
+
224
+ if args.command == Command.SCAN:
225
+ _scan_command(args)
226
+ elif args.command == Command.GENERATE:
227
+ _generate_command(args)
228
+ elif args.command == Command.LABEL:
229
+ _label_command(args)
230
+ else:
231
+ raise ValueError(f"Invalid command: {args.command}")
232
+
233
+
234
+ if __name__ == "__main__":
235
+ main()
alt_text_llm/scan.py ADDED
@@ -0,0 +1,219 @@
1
+ """
2
+ Scan markdown files for assets without meaningful alt text.
3
+
4
+ This script produces a JSON work-queue.
5
+ """
6
+
7
+ import re
8
+ from dataclasses import asdict, dataclass
9
+ from pathlib import Path
10
+ from typing import Iterable, Sequence
11
+
12
+ from markdown_it import MarkdownIt
13
+ from markdown_it.token import Token
14
+
15
+ from alt_text_llm import utils
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class QueueItem:
20
+ """Represents a single asset lacking adequate alt text."""
21
+
22
+ markdown_file: str
23
+ asset_path: str
24
+ line_number: int # 1-based, must be positive
25
+ context_snippet: str
26
+
27
+ def __post_init__(self) -> None:
28
+ if self.line_number <= 0:
29
+ raise ValueError("line_number must be positive")
30
+
31
+ def to_json(self) -> dict[str, str | int]: # pylint: disable=C0116
32
+ return asdict(self)
33
+
34
+
35
+ def _create_queue_item(
36
+ md_path: Path,
37
+ asset_path: str,
38
+ line_number: int,
39
+ lines: Sequence[str],
40
+ ) -> QueueItem:
41
+ return QueueItem(
42
+ markdown_file=str(md_path),
43
+ asset_path=asset_path,
44
+ line_number=line_number,
45
+ context_snippet=utils.paragraph_context(lines, line_number - 1),
46
+ )
47
+
48
+
49
+ _PLACEHOLDER_ALTS: set[str] = {
50
+ "img",
51
+ "image",
52
+ "photo",
53
+ "placeholder",
54
+ "screenshot",
55
+ "picture",
56
+ }
57
+
58
+
59
+ def _is_alt_meaningful(alt: str | None) -> bool:
60
+ if alt is None:
61
+ return False
62
+ alt_stripped = alt.strip().lower()
63
+ return bool(alt_stripped) and alt_stripped not in _PLACEHOLDER_ALTS
64
+
65
+
66
+ def _iter_image_tokens(tokens: Sequence[Token]) -> Iterable[Token]:
67
+ """Yield all tokens (including nested children) that correspond to
68
+ images."""
69
+
70
+ stack: list[Token] = list(tokens)
71
+ while stack:
72
+ token = stack.pop()
73
+
74
+ # Depth-first traversal of the token tree.
75
+ if token.children:
76
+ stack.extend(token.children)
77
+
78
+ if token.type == "image":
79
+ yield token
80
+ continue
81
+
82
+ if (
83
+ token.type in {"html_inline", "html_block"}
84
+ and "<img" in token.content.lower()
85
+ ):
86
+ yield token
87
+
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # Helpers
91
+ # ---------------------------------------------------------------------------
92
+
93
+
94
+ _ALT_RE = re.compile(r"alt=\"(?P<alt>[^\"]*)\"", re.IGNORECASE)
95
+
96
+ # ``markdown_it`` represents HTML img tags inside an ``html_inline`` or
97
+ # ``html_block`` token. Use a lightweight regex so we do not pull in another
98
+ # HTML parser just for <img>.
99
+ _IMG_TAG_RE = re.compile(
100
+ r"<img\s+[^>]*src=\"(?P<src>[^\"]+)\"[^>]*>", re.IGNORECASE | re.DOTALL
101
+ )
102
+
103
+
104
+ def _extract_html_img_info(token: Token) -> list[tuple[str, str | None]]:
105
+ """Return list of (src, alt) pairs for each <img> within the token."""
106
+
107
+ infos: list[tuple[str, str | None]] = []
108
+ for m in _IMG_TAG_RE.finditer(token.content):
109
+ src = m.group("src")
110
+ alt_match = _ALT_RE.search(m.group(0))
111
+ alt: str | None = alt_match.group("alt") if alt_match else None
112
+ infos.append((src, alt))
113
+ return infos
114
+
115
+
116
+ def _get_line_number(
117
+ token: Token, lines: Sequence[str], search_snippet: str
118
+ ) -> int:
119
+ if token.map:
120
+ return token.map[0] + 1
121
+
122
+ # Try exact match first
123
+ for idx, ln in enumerate(lines):
124
+ if search_snippet in ln:
125
+ return idx + 1
126
+
127
+ # If exact match fails, try with whitespace variations
128
+ # Remove parentheses and search for just the asset path with flexible whitespace
129
+ if search_snippet.startswith("(") and search_snippet.endswith(")"):
130
+ asset_path = search_snippet[1:-1] # Remove parentheses
131
+ for idx, ln in enumerate(lines):
132
+ if asset_path in ln:
133
+ return idx + 1
134
+
135
+ raise ValueError(
136
+ f"Could not find asset '{search_snippet}' in markdown file"
137
+ )
138
+
139
+
140
+ def _handle_md_asset(
141
+ token: Token, md_path: Path, lines: Sequence[str]
142
+ ) -> list[QueueItem]:
143
+ """
144
+ Process a markdown ``image`` token.
145
+
146
+ Args:
147
+ token: The ``markdown_it`` token representing the asset.
148
+ md_path: Current markdown file path.
149
+ lines: Contents of *md_path* split by lines.
150
+
151
+ Returns:
152
+ Zero or one-element list containing a ``QueueItem`` for assets with
153
+ missing or placeholder alt text.
154
+ """
155
+
156
+ src_raw = token.attrGet("src")
157
+ src_attr: str | None = str(src_raw) if src_raw is not None else None
158
+
159
+ alt_text: str | None = token.content # alt stored here
160
+ if not src_attr or _is_alt_meaningful(alt_text):
161
+ return []
162
+
163
+ line_no = _get_line_number(token, lines, f"({src_attr})")
164
+ return [_create_queue_item(md_path, src_attr, line_no, lines)]
165
+
166
+
167
+ def _handle_html_asset(
168
+ token: Token, md_path: Path, lines: Sequence[str]
169
+ ) -> list[QueueItem]:
170
+ """
171
+ Process an ``html_inline`` or ``html_block`` token containing ``<img>``.
172
+
173
+ Args:
174
+ token: Token potentially containing one or more ``<img>`` tags.
175
+ md_path: Current markdown file path.
176
+ lines: Contents of *md_path* split by lines.
177
+
178
+ Returns:
179
+ List of ``QueueItem`` instances—one for each offending ``<img>``.
180
+ """
181
+
182
+ items: list[QueueItem] = []
183
+ for src_attr, alt_text in _extract_html_img_info(token):
184
+ if _is_alt_meaningful(alt_text):
185
+ continue
186
+
187
+ line_no = _get_line_number(token, lines, src_attr)
188
+ items.append(_create_queue_item(md_path, src_attr, line_no, lines))
189
+
190
+ return items
191
+
192
+
193
+ def _process_file(md_path: Path) -> list[QueueItem]:
194
+ md = MarkdownIt("commonmark")
195
+ source_text = md_path.read_text(encoding="utf-8")
196
+ lines = source_text.splitlines()
197
+
198
+ items: list[QueueItem] = []
199
+ tokens = md.parse(source_text)
200
+ for token in _iter_image_tokens(tokens):
201
+ if token.type == "image":
202
+ token_items = _handle_md_asset(token, md_path, lines)
203
+ else:
204
+ token_items = _handle_html_asset(token, md_path, lines)
205
+ items.extend(token_items)
206
+ return items
207
+
208
+
209
+ def build_queue(root: Path) -> list[QueueItem]:
210
+ """Return a queue of assets lacking alt text beneath *root*."""
211
+
212
+ md_files = utils.get_files(
213
+ root, filetypes_to_match=(".md",), use_git_ignore=True
214
+ )
215
+ queue: list[QueueItem] = []
216
+ for md_file in md_files:
217
+ queue.extend(_process_file(md_file))
218
+
219
+ return queue