alt-text-llm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alt-text-llm might be problematic. Click here for more details.
- alt_text_llm/__init__.py +13 -0
- alt_text_llm/generate.py +208 -0
- alt_text_llm/label.py +347 -0
- alt_text_llm/main.py +235 -0
- alt_text_llm/scan.py +219 -0
- alt_text_llm/utils.py +515 -0
- alt_text_llm-0.1.0.dist-info/METADATA +181 -0
- alt_text_llm-0.1.0.dist-info/RECORD +12 -0
- alt_text_llm-0.1.0.dist-info/WHEEL +5 -0
- alt_text_llm-0.1.0.dist-info/entry_points.txt +2 -0
- alt_text_llm-0.1.0.dist-info/licenses/LICENSE +21 -0
- alt_text_llm-0.1.0.dist-info/top_level.txt +1 -0
alt_text_llm/main.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""Main entry point for alt text generation and labeling workflows."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
from enum import StrEnum
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from alt_text_llm import generate, label, scan, utils
|
|
12
|
+
|
|
13
|
+
_JSON_INDENT: int = 2
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Command(StrEnum):
|
|
17
|
+
"""Available commands for alt text workflows."""
|
|
18
|
+
|
|
19
|
+
SCAN = "scan"
|
|
20
|
+
GENERATE = "generate"
|
|
21
|
+
LABEL = "label"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _scan_command(args: argparse.Namespace) -> None:
|
|
25
|
+
"""Execute the scan sub-command."""
|
|
26
|
+
output_path = (
|
|
27
|
+
args.output or utils.get_git_root() / "scripts" / "asset_queue.json"
|
|
28
|
+
)
|
|
29
|
+
queue_items = scan.build_queue(args.root)
|
|
30
|
+
|
|
31
|
+
output_path.write_text(
|
|
32
|
+
json.dumps(
|
|
33
|
+
[item.to_json() for item in queue_items],
|
|
34
|
+
indent=_JSON_INDENT,
|
|
35
|
+
ensure_ascii=False,
|
|
36
|
+
),
|
|
37
|
+
encoding="utf-8",
|
|
38
|
+
)
|
|
39
|
+
print(f"Wrote {len(queue_items)} queue item(s) to {output_path}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _generate_command(args: argparse.Namespace) -> None:
|
|
43
|
+
"""Execute the generate sub-command."""
|
|
44
|
+
if not args.model:
|
|
45
|
+
print("Error: --model is required for the generate command")
|
|
46
|
+
exit(1)
|
|
47
|
+
|
|
48
|
+
opts = generate.GenerateAltTextOptions(
|
|
49
|
+
root=args.root,
|
|
50
|
+
model=args.model,
|
|
51
|
+
max_chars=args.max_chars,
|
|
52
|
+
timeout=args.timeout,
|
|
53
|
+
output_path=args.captions,
|
|
54
|
+
skip_existing=args.skip_existing,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
suggestions_path = args.suggestions_file
|
|
58
|
+
console = Console()
|
|
59
|
+
queue_items = scan.build_queue(opts.root)
|
|
60
|
+
|
|
61
|
+
if opts.skip_existing:
|
|
62
|
+
queue_items = generate.filter_existing_captions(
|
|
63
|
+
queue_items,
|
|
64
|
+
[opts.output_path, suggestions_path],
|
|
65
|
+
console,
|
|
66
|
+
verbose=False if args.estimate_only else True,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Show cost estimate
|
|
70
|
+
cost_est = generate.estimate_cost(opts.model, len(queue_items))
|
|
71
|
+
console.print(
|
|
72
|
+
f"[bold blue]{len(queue_items)} items → {cost_est} using model '{opts.model}'[/bold blue]"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# If estimate-only mode, exit here
|
|
76
|
+
if args.estimate_only:
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
# Run generation
|
|
80
|
+
if not queue_items:
|
|
81
|
+
console.print("[yellow]No items to process.[/yellow]")
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
console.print(
|
|
85
|
+
f"[bold green]Generating {len(queue_items)} suggestions with '{opts.model}'[/bold green]"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
suggestions = []
|
|
89
|
+
try:
|
|
90
|
+
suggestions = asyncio.run(
|
|
91
|
+
generate.async_generate_suggestions(queue_items, opts)
|
|
92
|
+
)
|
|
93
|
+
finally:
|
|
94
|
+
utils.write_output(suggestions, suggestions_path, append_mode=True)
|
|
95
|
+
console.print(
|
|
96
|
+
f"[green]Saved {len(suggestions)} suggestions to {suggestions_path}[/green]"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _label_command(args: argparse.Namespace) -> None:
|
|
101
|
+
"""Execute the label sub-command."""
|
|
102
|
+
label.label_from_suggestions_file(
|
|
103
|
+
args.suggestions_file, args.output, args.skip_existing, args.vi_mode
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _parse_args() -> argparse.Namespace:
|
|
108
|
+
"""Parse command-line arguments for all alt text workflows."""
|
|
109
|
+
git_root = utils.get_git_root()
|
|
110
|
+
|
|
111
|
+
parser = argparse.ArgumentParser(
|
|
112
|
+
description="Alt text generation and labeling workflows"
|
|
113
|
+
)
|
|
114
|
+
subparsers = parser.add_subparsers(
|
|
115
|
+
dest="command", help="Available commands"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# scan sub-command
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
scan_parser = subparsers.add_parser(
|
|
122
|
+
Command.SCAN,
|
|
123
|
+
help="Scan markdown files for assets without meaningful alt text",
|
|
124
|
+
)
|
|
125
|
+
scan_parser.add_argument(
|
|
126
|
+
"--root",
|
|
127
|
+
type=Path,
|
|
128
|
+
default=git_root / "website_content",
|
|
129
|
+
help="Directory to search (default: website_content)",
|
|
130
|
+
)
|
|
131
|
+
scan_parser.add_argument(
|
|
132
|
+
"--output",
|
|
133
|
+
type=Path,
|
|
134
|
+
help="Path for output JSON file (default: <git_root>/scripts/asset_queue.json)",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# generate sub-command
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
generate_parser = subparsers.add_parser(
|
|
141
|
+
Command.GENERATE, help="Generate AI alt text suggestions"
|
|
142
|
+
)
|
|
143
|
+
generate_parser.add_argument(
|
|
144
|
+
"--root",
|
|
145
|
+
type=Path,
|
|
146
|
+
default=git_root / "website_content",
|
|
147
|
+
help="Markdown root directory",
|
|
148
|
+
)
|
|
149
|
+
generate_parser.add_argument(
|
|
150
|
+
"--model", required=True, help="LLM model to use for generation"
|
|
151
|
+
)
|
|
152
|
+
generate_parser.add_argument(
|
|
153
|
+
"--max-chars",
|
|
154
|
+
type=int,
|
|
155
|
+
default=300,
|
|
156
|
+
help="Max characters for generated alt text",
|
|
157
|
+
)
|
|
158
|
+
generate_parser.add_argument(
|
|
159
|
+
"--timeout", type=int, default=120, help="LLM command timeout seconds"
|
|
160
|
+
)
|
|
161
|
+
generate_parser.add_argument(
|
|
162
|
+
"--captions",
|
|
163
|
+
type=Path,
|
|
164
|
+
default=git_root / "scripts" / "asset_captions.json",
|
|
165
|
+
help="Existing/final captions JSON path (used to skip existing unless --process-existing)",
|
|
166
|
+
)
|
|
167
|
+
generate_parser.add_argument(
|
|
168
|
+
"--suggestions-file",
|
|
169
|
+
type=Path,
|
|
170
|
+
default=git_root / "scripts" / "suggested_alts.json",
|
|
171
|
+
help="Path to read/write suggestions JSON",
|
|
172
|
+
)
|
|
173
|
+
generate_parser.add_argument(
|
|
174
|
+
"--process-existing",
|
|
175
|
+
dest="skip_existing",
|
|
176
|
+
action="store_false",
|
|
177
|
+
help="Also process assets that already have captions (default is to skip)",
|
|
178
|
+
)
|
|
179
|
+
generate_parser.add_argument(
|
|
180
|
+
"--estimate-only",
|
|
181
|
+
action="store_true",
|
|
182
|
+
help="Only estimate cost without generating suggestions",
|
|
183
|
+
)
|
|
184
|
+
generate_parser.set_defaults(skip_existing=True)
|
|
185
|
+
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
# label sub-command
|
|
188
|
+
# ---------------------------------------------------------------------------
|
|
189
|
+
label_parser = subparsers.add_parser(
|
|
190
|
+
Command.LABEL, help="Interactively label alt text suggestions"
|
|
191
|
+
)
|
|
192
|
+
label_parser.add_argument(
|
|
193
|
+
"--suggestions-file",
|
|
194
|
+
type=Path,
|
|
195
|
+
default=git_root / "scripts" / "suggested_alts.json",
|
|
196
|
+
help="Path to read suggestions JSON",
|
|
197
|
+
)
|
|
198
|
+
label_parser.add_argument(
|
|
199
|
+
"--output",
|
|
200
|
+
type=Path,
|
|
201
|
+
default=git_root / "scripts" / "asset_captions.json",
|
|
202
|
+
help="Final captions JSON path",
|
|
203
|
+
)
|
|
204
|
+
label_parser.add_argument(
|
|
205
|
+
"--skip-existing",
|
|
206
|
+
action="store_true",
|
|
207
|
+
default=True,
|
|
208
|
+
help="Skip captions already present in output file",
|
|
209
|
+
)
|
|
210
|
+
label_parser.add_argument(
|
|
211
|
+
"--vi-mode",
|
|
212
|
+
action="store_true",
|
|
213
|
+
default=False,
|
|
214
|
+
help="Enable vi keybindings for text editing (default: disabled)",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
return parser.parse_args()
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def main() -> None:
|
|
221
|
+
"""Main entry point for alt text workflows."""
|
|
222
|
+
args = _parse_args()
|
|
223
|
+
|
|
224
|
+
if args.command == Command.SCAN:
|
|
225
|
+
_scan_command(args)
|
|
226
|
+
elif args.command == Command.GENERATE:
|
|
227
|
+
_generate_command(args)
|
|
228
|
+
elif args.command == Command.LABEL:
|
|
229
|
+
_label_command(args)
|
|
230
|
+
else:
|
|
231
|
+
raise ValueError(f"Invalid command: {args.command}")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
main()
|
alt_text_llm/scan.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scan markdown files for assets without meaningful alt text.
|
|
3
|
+
|
|
4
|
+
This script produces a JSON work-queue.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from dataclasses import asdict, dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Iterable, Sequence
|
|
11
|
+
|
|
12
|
+
from markdown_it import MarkdownIt
|
|
13
|
+
from markdown_it.token import Token
|
|
14
|
+
|
|
15
|
+
from alt_text_llm import utils
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(slots=True)
|
|
19
|
+
class QueueItem:
|
|
20
|
+
"""Represents a single asset lacking adequate alt text."""
|
|
21
|
+
|
|
22
|
+
markdown_file: str
|
|
23
|
+
asset_path: str
|
|
24
|
+
line_number: int # 1-based, must be positive
|
|
25
|
+
context_snippet: str
|
|
26
|
+
|
|
27
|
+
def __post_init__(self) -> None:
|
|
28
|
+
if self.line_number <= 0:
|
|
29
|
+
raise ValueError("line_number must be positive")
|
|
30
|
+
|
|
31
|
+
def to_json(self) -> dict[str, str | int]: # pylint: disable=C0116
|
|
32
|
+
return asdict(self)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _create_queue_item(
|
|
36
|
+
md_path: Path,
|
|
37
|
+
asset_path: str,
|
|
38
|
+
line_number: int,
|
|
39
|
+
lines: Sequence[str],
|
|
40
|
+
) -> QueueItem:
|
|
41
|
+
return QueueItem(
|
|
42
|
+
markdown_file=str(md_path),
|
|
43
|
+
asset_path=asset_path,
|
|
44
|
+
line_number=line_number,
|
|
45
|
+
context_snippet=utils.paragraph_context(lines, line_number - 1),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_PLACEHOLDER_ALTS: set[str] = {
|
|
50
|
+
"img",
|
|
51
|
+
"image",
|
|
52
|
+
"photo",
|
|
53
|
+
"placeholder",
|
|
54
|
+
"screenshot",
|
|
55
|
+
"picture",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _is_alt_meaningful(alt: str | None) -> bool:
|
|
60
|
+
if alt is None:
|
|
61
|
+
return False
|
|
62
|
+
alt_stripped = alt.strip().lower()
|
|
63
|
+
return bool(alt_stripped) and alt_stripped not in _PLACEHOLDER_ALTS
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _iter_image_tokens(tokens: Sequence[Token]) -> Iterable[Token]:
|
|
67
|
+
"""Yield all tokens (including nested children) that correspond to
|
|
68
|
+
images."""
|
|
69
|
+
|
|
70
|
+
stack: list[Token] = list(tokens)
|
|
71
|
+
while stack:
|
|
72
|
+
token = stack.pop()
|
|
73
|
+
|
|
74
|
+
# Depth-first traversal of the token tree.
|
|
75
|
+
if token.children:
|
|
76
|
+
stack.extend(token.children)
|
|
77
|
+
|
|
78
|
+
if token.type == "image":
|
|
79
|
+
yield token
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
if (
|
|
83
|
+
token.type in {"html_inline", "html_block"}
|
|
84
|
+
and "<img" in token.content.lower()
|
|
85
|
+
):
|
|
86
|
+
yield token
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
# Helpers
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
_ALT_RE = re.compile(r"alt=\"(?P<alt>[^\"]*)\"", re.IGNORECASE)
|
|
95
|
+
|
|
96
|
+
# ``markdown_it`` represents HTML img tags inside an ``html_inline`` or
|
|
97
|
+
# ``html_block`` token. Use a lightweight regex so we do not pull in another
|
|
98
|
+
# HTML parser just for <img>.
|
|
99
|
+
_IMG_TAG_RE = re.compile(
|
|
100
|
+
r"<img\s+[^>]*src=\"(?P<src>[^\"]+)\"[^>]*>", re.IGNORECASE | re.DOTALL
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _extract_html_img_info(token: Token) -> list[tuple[str, str | None]]:
|
|
105
|
+
"""Return list of (src, alt) pairs for each <img> within the token."""
|
|
106
|
+
|
|
107
|
+
infos: list[tuple[str, str | None]] = []
|
|
108
|
+
for m in _IMG_TAG_RE.finditer(token.content):
|
|
109
|
+
src = m.group("src")
|
|
110
|
+
alt_match = _ALT_RE.search(m.group(0))
|
|
111
|
+
alt: str | None = alt_match.group("alt") if alt_match else None
|
|
112
|
+
infos.append((src, alt))
|
|
113
|
+
return infos
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _get_line_number(
|
|
117
|
+
token: Token, lines: Sequence[str], search_snippet: str
|
|
118
|
+
) -> int:
|
|
119
|
+
if token.map:
|
|
120
|
+
return token.map[0] + 1
|
|
121
|
+
|
|
122
|
+
# Try exact match first
|
|
123
|
+
for idx, ln in enumerate(lines):
|
|
124
|
+
if search_snippet in ln:
|
|
125
|
+
return idx + 1
|
|
126
|
+
|
|
127
|
+
# If exact match fails, try with whitespace variations
|
|
128
|
+
# Remove parentheses and search for just the asset path with flexible whitespace
|
|
129
|
+
if search_snippet.startswith("(") and search_snippet.endswith(")"):
|
|
130
|
+
asset_path = search_snippet[1:-1] # Remove parentheses
|
|
131
|
+
for idx, ln in enumerate(lines):
|
|
132
|
+
if asset_path in ln:
|
|
133
|
+
return idx + 1
|
|
134
|
+
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Could not find asset '{search_snippet}' in markdown file"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _handle_md_asset(
|
|
141
|
+
token: Token, md_path: Path, lines: Sequence[str]
|
|
142
|
+
) -> list[QueueItem]:
|
|
143
|
+
"""
|
|
144
|
+
Process a markdown ``image`` token.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
token: The ``markdown_it`` token representing the asset.
|
|
148
|
+
md_path: Current markdown file path.
|
|
149
|
+
lines: Contents of *md_path* split by lines.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Zero or one-element list containing a ``QueueItem`` for assets with
|
|
153
|
+
missing or placeholder alt text.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
src_raw = token.attrGet("src")
|
|
157
|
+
src_attr: str | None = str(src_raw) if src_raw is not None else None
|
|
158
|
+
|
|
159
|
+
alt_text: str | None = token.content # alt stored here
|
|
160
|
+
if not src_attr or _is_alt_meaningful(alt_text):
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
line_no = _get_line_number(token, lines, f"({src_attr})")
|
|
164
|
+
return [_create_queue_item(md_path, src_attr, line_no, lines)]
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _handle_html_asset(
|
|
168
|
+
token: Token, md_path: Path, lines: Sequence[str]
|
|
169
|
+
) -> list[QueueItem]:
|
|
170
|
+
"""
|
|
171
|
+
Process an ``html_inline`` or ``html_block`` token containing ``<img>``.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
token: Token potentially containing one or more ``<img>`` tags.
|
|
175
|
+
md_path: Current markdown file path.
|
|
176
|
+
lines: Contents of *md_path* split by lines.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of ``QueueItem`` instances—one for each offending ``<img>``.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
items: list[QueueItem] = []
|
|
183
|
+
for src_attr, alt_text in _extract_html_img_info(token):
|
|
184
|
+
if _is_alt_meaningful(alt_text):
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
line_no = _get_line_number(token, lines, src_attr)
|
|
188
|
+
items.append(_create_queue_item(md_path, src_attr, line_no, lines))
|
|
189
|
+
|
|
190
|
+
return items
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _process_file(md_path: Path) -> list[QueueItem]:
|
|
194
|
+
md = MarkdownIt("commonmark")
|
|
195
|
+
source_text = md_path.read_text(encoding="utf-8")
|
|
196
|
+
lines = source_text.splitlines()
|
|
197
|
+
|
|
198
|
+
items: list[QueueItem] = []
|
|
199
|
+
tokens = md.parse(source_text)
|
|
200
|
+
for token in _iter_image_tokens(tokens):
|
|
201
|
+
if token.type == "image":
|
|
202
|
+
token_items = _handle_md_asset(token, md_path, lines)
|
|
203
|
+
else:
|
|
204
|
+
token_items = _handle_html_asset(token, md_path, lines)
|
|
205
|
+
items.extend(token_items)
|
|
206
|
+
return items
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def build_queue(root: Path) -> list[QueueItem]:
|
|
210
|
+
"""Return a queue of assets lacking alt text beneath *root*."""
|
|
211
|
+
|
|
212
|
+
md_files = utils.get_files(
|
|
213
|
+
root, filetypes_to_match=(".md",), use_git_ignore=True
|
|
214
|
+
)
|
|
215
|
+
queue: list[QueueItem] = []
|
|
216
|
+
for md_file in md_files:
|
|
217
|
+
queue.extend(_process_file(md_file))
|
|
218
|
+
|
|
219
|
+
return queue
|