alt-text-llm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alt-text-llm might be problematic. Click here for more details.

@@ -0,0 +1,13 @@
1
+ """AI-powered alt text generation and labeling tools."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from alt_text_llm import generate, label, main, scan, utils
6
+
7
+ __all__ = [
8
+ "generate",
9
+ "label",
10
+ "main",
11
+ "scan",
12
+ "utils",
13
+ ]
@@ -0,0 +1,208 @@
1
+ """Generate AI alt text suggestions for assets lacking meaningful alt text."""
2
+
3
+ import asyncio
4
+ import shutil
5
+ import subprocess
6
+ import tempfile
7
+ import warnings
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Sequence
11
+
12
+ from rich.console import Console
13
+ from tqdm.rich import tqdm
14
+ from tqdm.std import TqdmExperimentalWarning
15
+
16
+ from alt_text_llm import scan, utils
17
+
18
+ warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
19
+
20
+ # Approximate cost estimates per 1000 tokens (as of Sep 2025)
21
+ MODEL_COSTS = {
22
+ # https://www.helicone.ai/llm-cost
23
+ "gemini-2.5-pro": {"input": 0.00125, "output": 0.01},
24
+ "gemini-2.5-flash": {"input": 0.0003, "output": 0.0025},
25
+ "gemini-2.5-flash-lite": {"input": 0.00001, "output": 0.00004},
26
+ # https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/?ref=testingcatalog.com
27
+ "gemini-2.5-flash-lite-preview-09-2025": {
28
+ "input": 0.00001,
29
+ "output": 0.00004,
30
+ },
31
+ "gemini-2.5-flash-preview-09-2025": {"input": 0.00001, "output": 0.00004},
32
+ }
33
+
34
+
35
+ def _run_llm(
36
+ attachment: Path,
37
+ prompt: str,
38
+ model: str,
39
+ timeout: int,
40
+ ) -> str:
41
+ """Execute LLM command and return generated caption."""
42
+ llm_path = utils.find_executable("llm")
43
+
44
+ result = subprocess.run(
45
+ [llm_path, "-m", model, "-a", str(attachment), "--usage", prompt],
46
+ check=False,
47
+ capture_output=True,
48
+ text=True,
49
+ timeout=timeout,
50
+ )
51
+
52
+ if result.returncode != 0:
53
+ error_output = result.stderr.strip() or result.stdout.strip()
54
+ raise utils.AltGenerationError(
55
+ f"Caption generation failed for {attachment}: {error_output}"
56
+ )
57
+
58
+ cleaned = result.stdout.strip()
59
+ if not cleaned:
60
+ raise utils.AltGenerationError("LLM returned empty caption")
61
+ return cleaned
62
+
63
+
64
+ @dataclass(slots=True)
65
+ class GenerateAltTextOptions:
66
+ """Options for generating alt text."""
67
+
68
+ root: Path
69
+ model: str
70
+ max_chars: int
71
+ timeout: int
72
+ output_path: Path
73
+ skip_existing: bool = False
74
+
75
+
76
+ def estimate_cost(
77
+ model: str,
78
+ queue_count: int,
79
+ avg_prompt_tokens: int = 4500,
80
+ avg_output_tokens: int = 1500,
81
+ ) -> str:
82
+ """Estimate the cost of processing the queue with the given model."""
83
+ # Normalize model name for cost lookup
84
+ model_lower = model.lower()
85
+
86
+ if model_lower in MODEL_COSTS:
87
+ cost_info = MODEL_COSTS[model_lower]
88
+ else:
89
+ return f"Can't estimate cost for unknown model: {model}. Available models: {MODEL_COSTS.keys()}"
90
+
91
+ # Calculate costs
92
+ input_cost = (avg_prompt_tokens * queue_count / 1000) * cost_info["input"]
93
+ output_cost = (avg_output_tokens * queue_count / 1000) * cost_info[
94
+ "output"
95
+ ]
96
+ total_cost = input_cost + output_cost
97
+
98
+ return f"Estimated cost: ${total_cost:.3f} (${input_cost:.3f} input + ${output_cost:.3f} output)"
99
+
100
+
101
+ def filter_existing_captions(
102
+ queue_items: Sequence["scan.QueueItem"],
103
+ output_paths: Sequence[Path],
104
+ console: Console,
105
+ verbose: bool = True,
106
+ ) -> list["scan.QueueItem"]:
107
+ """Filter out items that already have captions in the output paths."""
108
+ existing_captions = set()
109
+ for output_path in output_paths:
110
+ existing_captions.update(utils.load_existing_captions(output_path))
111
+ original_count = len(queue_items)
112
+ filtered_items = [
113
+ item
114
+ for item in queue_items
115
+ if item.asset_path not in existing_captions
116
+ ]
117
+ skipped_count = original_count - len(filtered_items)
118
+ if skipped_count > 0 and verbose:
119
+ console.print(
120
+ f"[dim]Skipped {skipped_count} items with existing captions[/dim]"
121
+ )
122
+ return filtered_items
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Async helpers for parallel LLM calls
127
+ # ---------------------------------------------------------------------------
128
+
129
+
130
+ _CONCURRENCY_LIMIT = 32
131
+
132
+
133
+ async def _run_llm_async(
134
+ queue_item: "scan.QueueItem",
135
+ options: GenerateAltTextOptions,
136
+ sem: asyncio.Semaphore,
137
+ ) -> utils.AltGenerationResult:
138
+ """Download asset, run LLM in a thread; clean up; return suggestion
139
+ payload."""
140
+ workspace = Path(tempfile.mkdtemp())
141
+ try:
142
+ async with sem:
143
+ attachment = await asyncio.to_thread(
144
+ utils.download_asset, queue_item, workspace
145
+ )
146
+ prompt = utils.build_prompt(queue_item, options.max_chars)
147
+ caption = await asyncio.to_thread(
148
+ _run_llm,
149
+ attachment,
150
+ prompt,
151
+ options.model,
152
+ options.timeout,
153
+ )
154
+ return utils.AltGenerationResult(
155
+ markdown_file=queue_item.markdown_file,
156
+ asset_path=queue_item.asset_path,
157
+ suggested_alt=caption,
158
+ model=options.model,
159
+ context_snippet=queue_item.context_snippet,
160
+ line_number=queue_item.line_number,
161
+ )
162
+ finally:
163
+ shutil.rmtree(workspace, ignore_errors=True)
164
+
165
+
166
+ async def async_generate_suggestions(
167
+ queue_items: Sequence["scan.QueueItem"],
168
+ options: GenerateAltTextOptions,
169
+ ) -> list[utils.AltGenerationResult]:
170
+ """Generate suggestions concurrently for *queue_items*."""
171
+ sem = asyncio.Semaphore(_CONCURRENCY_LIMIT)
172
+ tasks: list[asyncio.Task[utils.AltGenerationResult]] = []
173
+
174
+ for qi in queue_items:
175
+ tasks.append(
176
+ asyncio.create_task(
177
+ _run_llm_async(
178
+ qi,
179
+ options,
180
+ sem,
181
+ )
182
+ )
183
+ )
184
+
185
+ task_count = len(tasks)
186
+ if task_count == 0:
187
+ return []
188
+
189
+ suggestions: list[utils.AltGenerationResult] = []
190
+ with tqdm(total=task_count, desc="Generating alt text") as progress_bar:
191
+ try:
192
+ for finished in asyncio.as_completed(tasks):
193
+ try:
194
+ result = await finished
195
+ suggestions.append(result)
196
+ except (
197
+ utils.AltGenerationError,
198
+ FileNotFoundError,
199
+ ) as err:
200
+ # Skip individual items that fail (e.g., unsupported file types)
201
+ progress_bar.write(f"Skipped item due to error: {err}")
202
+ progress_bar.update(1)
203
+ except asyncio.CancelledError:
204
+ progress_bar.set_description(
205
+ "Generating alt text (cancelled, finishing up...)"
206
+ )
207
+
208
+ return suggestions
alt_text_llm/label.py ADDED
@@ -0,0 +1,347 @@
1
+ """Interactive labeling interface for alt text suggestions."""
2
+
3
+ import json
4
+ import os
5
+ import readline
6
+ import subprocess
7
+ from dataclasses import replace
8
+ from pathlib import Path
9
+ from tempfile import TemporaryDirectory
10
+ from typing import Sequence
11
+ import sys
12
+
13
+ import requests
14
+ from rich.box import ROUNDED
15
+ from rich.console import Console
16
+ from rich.markdown import Markdown
17
+ from rich.panel import Panel
18
+
19
+ from alt_text_llm import scan, utils
20
+
21
+ UNDO_REQUESTED = "UNDO_REQUESTED"
22
+
23
+
24
+ class LabelingSession:
25
+ """Manages the labeling session state and navigation."""
26
+
27
+ def __init__(
28
+ self, suggestions: Sequence[utils.AltGenerationResult]
29
+ ) -> None:
30
+ self.suggestions = suggestions
31
+ self.current_index = 0
32
+ self.processed_results: list[utils.AltGenerationResult] = []
33
+
34
+ def can_undo(self) -> bool:
35
+ """Check if undo is possible."""
36
+ return len(self.processed_results) > 0
37
+
38
+ def undo(self) -> utils.AltGenerationResult | None:
39
+ """Undo the last processed result and return to previous item."""
40
+ if not self.can_undo():
41
+ return None
42
+
43
+ undone_result = self.processed_results.pop()
44
+ self.current_index = max(0, self.current_index - 1)
45
+ return undone_result
46
+
47
+ def add_result(self, result: utils.AltGenerationResult) -> None:
48
+ """Add a processed result and advance to next item."""
49
+ self.processed_results.append(result)
50
+ self.current_index += 1
51
+
52
+ def get_current_suggestion(self) -> utils.AltGenerationResult | None:
53
+ """Get the current suggestion to process."""
54
+ if self.current_index >= len(self.suggestions):
55
+ return None
56
+ return self.suggestions[self.current_index]
57
+
58
+ def is_complete(self) -> bool:
59
+ """Check if all suggestions have been processed."""
60
+ return self.current_index >= len(self.suggestions)
61
+
62
+ def get_progress(self) -> tuple[int, int]:
63
+ """Get current position and total count."""
64
+ return self.current_index + 1, len(self.suggestions)
65
+
66
+ def skip_current(self) -> None:
67
+ """Skip the current suggestion due to error and advance index."""
68
+ self.current_index += 1
69
+
70
+
71
+ class DisplayManager:
72
+ """Handles rich console display operations."""
73
+
74
+ def __init__(self, console: Console, vi_mode: bool = False) -> None:
75
+ self.console = console
76
+ self.vi_mode = vi_mode
77
+
78
+ def show_context(self, queue_item: "scan.QueueItem") -> None:
79
+ """Display context information for the queue item."""
80
+ context = utils.generate_article_context(
81
+ queue_item, max_before=4, max_after=1, trim_frontmatter=True
82
+ )
83
+ rendered_context = Markdown(context)
84
+ basename = Path(queue_item.markdown_file).name
85
+ self.console.print(
86
+ Panel(
87
+ rendered_context,
88
+ title="Context",
89
+ subtitle=f"{basename}:{queue_item.line_number}",
90
+ box=ROUNDED,
91
+ )
92
+ )
93
+
94
+ def show_image(self, path: Path) -> None:
95
+ """Display the image using imgcat."""
96
+ if "TMUX" in os.environ:
97
+ raise ValueError("Cannot open image in tmux")
98
+ try:
99
+ subprocess.run(["imgcat", str(path)], check=True)
100
+ except subprocess.CalledProcessError as err:
101
+ raise ValueError(
102
+ f"Failed to open image: {err}; is imgcat installed?"
103
+ ) from err
104
+
105
+ def show_progress(self, current: int, total: int) -> None:
106
+ """Display progress information."""
107
+ progress_text = (
108
+ f"Progress: {current}/{total} ({(current-1)/total*100:.1f}%)"
109
+ )
110
+ self.console.print(f"[dim]{progress_text}[/dim]")
111
+
112
+ def prompt_for_edit(
113
+ self,
114
+ suggestion: str,
115
+ current: int | None = None,
116
+ total: int | None = None,
117
+ ) -> str:
118
+ """Prompt user to edit the suggestion with prefilled editable text."""
119
+ # Show progress if provided
120
+ if current is not None and total is not None:
121
+ self.show_progress(current, total)
122
+
123
+ # Enable vim keybindings for readline if requested
124
+ if self.vi_mode:
125
+ readline.parse_and_bind("set editing-mode vi")
126
+ readline.set_startup_hook(lambda: readline.insert_text(suggestion))
127
+ self.console.print(
128
+ "\n[bold blue]Edit alt text (or press Enter to accept, 'undo' to go back):[/bold blue]"
129
+ )
130
+ result = input("> ")
131
+ readline.set_startup_hook(None)
132
+
133
+ # Check for undo command
134
+ if result.strip().lower() in ("undo", "u"):
135
+ return UNDO_REQUESTED
136
+
137
+ return result if result.strip() else suggestion
138
+
139
+ def show_rule(self, title: str) -> None:
140
+ """Display a separator rule."""
141
+ self.console.rule(title)
142
+
143
+ def show_error(self, error_message: str) -> None:
144
+ """Display error message."""
145
+ self.console.print(
146
+ Panel(
147
+ error_message,
148
+ title="Alt generation error",
149
+ box=ROUNDED,
150
+ style="red",
151
+ )
152
+ )
153
+
154
+
155
+ def _process_single_suggestion_for_labeling(
156
+ suggestion_data: utils.AltGenerationResult,
157
+ display: DisplayManager,
158
+ current: int | None = None,
159
+ total: int | None = None,
160
+ ) -> utils.AltGenerationResult:
161
+ # Recreate queue item for display
162
+ queue_item = scan.QueueItem(
163
+ markdown_file=suggestion_data.markdown_file,
164
+ asset_path=suggestion_data.asset_path,
165
+ line_number=suggestion_data.line_number,
166
+ context_snippet=suggestion_data.context_snippet,
167
+ )
168
+
169
+ # Download asset for display
170
+ with TemporaryDirectory() as temp_dir:
171
+ workspace = Path(temp_dir)
172
+ attachment = utils.download_asset(queue_item, workspace)
173
+
174
+ # Display results
175
+ display.show_rule(queue_item.asset_path)
176
+ display.show_context(queue_item)
177
+ display.show_image(attachment)
178
+
179
+ # Allow user to edit the suggestion
180
+ prefill_text = (
181
+ suggestion_data.final_alt
182
+ if suggestion_data.final_alt is not None
183
+ else suggestion_data.suggested_alt
184
+ )
185
+ final_alt = prefill_text
186
+ if sys.stdout.isatty():
187
+ final_alt = display.prompt_for_edit(prefill_text, current, total)
188
+
189
+ return utils.AltGenerationResult(
190
+ markdown_file=suggestion_data.markdown_file,
191
+ asset_path=suggestion_data.asset_path,
192
+ suggested_alt=suggestion_data.suggested_alt,
193
+ final_alt=final_alt,
194
+ model=suggestion_data.model,
195
+ context_snippet=suggestion_data.context_snippet,
196
+ line_number=suggestion_data.line_number,
197
+ )
198
+
199
+
200
+ def _filter_suggestions_by_existing(
201
+ suggestions: Sequence[utils.AltGenerationResult],
202
+ output_path: Path,
203
+ console: Console,
204
+ ) -> list[utils.AltGenerationResult]:
205
+ """Filter out suggestions that already have captions."""
206
+ existing_captions = utils.load_existing_captions(output_path)
207
+ filtered = [
208
+ s for s in suggestions if s.asset_path not in existing_captions
209
+ ]
210
+
211
+ skipped_count = len(suggestions) - len(filtered)
212
+ if skipped_count > 0:
213
+ console.print(
214
+ f"[dim]Skipped {skipped_count} items with existing captions[/dim]"
215
+ )
216
+
217
+ return filtered
218
+
219
+
220
+ def _handle_undo_request(
221
+ session: LabelingSession,
222
+ console: Console,
223
+ ) -> None:
224
+ """Handle undo request by reverting to previous suggestion."""
225
+ undone_result = session.undo()
226
+
227
+ if undone_result is None:
228
+ console.print("[yellow]Nothing to undo - at the beginning[/yellow]")
229
+ return
230
+
231
+ console.print(f"[yellow]Undoing: {undone_result.asset_path}[/yellow]")
232
+
233
+ # Prefill with the previous final_alt value
234
+ prefill_text = (
235
+ undone_result.final_alt
236
+ if undone_result.final_alt is not None
237
+ else undone_result.suggested_alt
238
+ )
239
+ session.suggestions[session.current_index] = replace(
240
+ session.suggestions[session.current_index],
241
+ final_alt=prefill_text,
242
+ )
243
+
244
+
245
+ def _process_labeling_loop(
246
+ session: LabelingSession,
247
+ display: DisplayManager,
248
+ console: Console,
249
+ ) -> None:
250
+ """Process all suggestions in the labeling session."""
251
+ while not session.is_complete():
252
+ current_suggestion = session.get_current_suggestion()
253
+ if current_suggestion is None:
254
+ break
255
+
256
+ try:
257
+ current, total = session.get_progress()
258
+ result = _process_single_suggestion_for_labeling(
259
+ current_suggestion, display, current=current, total=total
260
+ )
261
+
262
+ if result.final_alt == UNDO_REQUESTED:
263
+ _handle_undo_request(session, console)
264
+ else:
265
+ session.add_result(result)
266
+
267
+ except (
268
+ utils.AltGenerationError,
269
+ FileNotFoundError,
270
+ requests.RequestException,
271
+ ) as err:
272
+ display.show_error(str(err))
273
+ session.skip_current()
274
+
275
+
276
+ def label_suggestions(
277
+ suggestions: Sequence[utils.AltGenerationResult],
278
+ console: Console,
279
+ output_path: Path,
280
+ append_mode: bool,
281
+ vi_mode: bool = False,
282
+ ) -> int:
283
+ """Load suggestions and allow user to label them, collecting results."""
284
+ console.print(
285
+ f"\n[bold blue]Labeling {len(suggestions)} suggestions[/bold blue]\n"
286
+ )
287
+
288
+ suggestions_to_process = (
289
+ _filter_suggestions_by_existing(suggestions, output_path, console)
290
+ if append_mode
291
+ else suggestions
292
+ )
293
+
294
+ session = LabelingSession(suggestions_to_process)
295
+ display = DisplayManager(console, vi_mode=vi_mode)
296
+
297
+ try:
298
+ _process_labeling_loop(session, display, console)
299
+ finally:
300
+ if session.processed_results:
301
+ utils.write_output(
302
+ session.processed_results, output_path, append_mode=append_mode
303
+ )
304
+ console.print(
305
+ f"[green]Saved {len(session.processed_results)} results to {output_path}[/green]"
306
+ )
307
+
308
+ return len(session.processed_results)
309
+
310
+
311
+ def label_from_suggestions_file(
312
+ suggestions_file: Path,
313
+ output_path: Path,
314
+ skip_existing: bool = False,
315
+ vi_mode: bool = False,
316
+ ) -> None:
317
+ """Load suggestions from file and start labeling process."""
318
+ console = Console()
319
+
320
+ with open(suggestions_file, encoding="utf-8") as f:
321
+ suggestions_from_file = json.load(f)
322
+
323
+ # Convert loaded data to AltGenerationResult, filtering out extra fields
324
+ suggestions: list[utils.AltGenerationResult] = []
325
+ for s in suggestions_from_file:
326
+ filtered_data = {
327
+ "markdown_file": s["markdown_file"],
328
+ "asset_path": s["asset_path"],
329
+ "suggested_alt": s["suggested_alt"],
330
+ "model": s["model"],
331
+ "context_snippet": s["context_snippet"],
332
+ "line_number": int(s["line_number"]),
333
+ }
334
+ suggestions.append(utils.AltGenerationResult(**filtered_data))
335
+
336
+ console.print(
337
+ f"[green]Loaded {len(suggestions)} suggestions from {suggestions_file}[/green]"
338
+ )
339
+
340
+ processed_count = label_suggestions(
341
+ suggestions, console, output_path, skip_existing, vi_mode
342
+ )
343
+
344
+ # Write final results
345
+ console.print(
346
+ f"\n[green]Completed! Wrote {processed_count} results to {output_path}[/green]"
347
+ )