alt-text-llm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alt-text-llm might be problematic. Click here for more details.

@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Alexander Turner (TurnTrout)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ include README.md LICENSE
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: alt-text-llm
3
+ Version: 0.1.0
4
+ Summary: AI-powered alt text generation and labeling tools for markdown content
5
+ Author: TurnTrout
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/alexander-turner/alt-text-llm
8
+ Keywords: alt-text,accessibility,markdown,llm,ai
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: gitpython
13
+ Requires-Dist: requests
14
+ Requires-Dist: ruamel.yaml
15
+ Requires-Dist: markdown-it-py
16
+ Requires-Dist: rich
17
+ Requires-Dist: tqdm
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest; extra == "dev"
20
+ Requires-Dist: mypy; extra == "dev"
21
+ Requires-Dist: types-requests; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # alt-text-llm
25
+
26
+ AI-powered alt text generation and labeling tools for markdown content. Originally developed for [my website](https://turntrout.com/design) ([repo](https://github.com/alexander-turner/TurnTrout.com)).
27
+
28
+ ## Installation
29
+
30
+ ### Quick install from GitHub
31
+
32
+ ```bash
33
+ pip install git+https://github.com/alexander-turner/alt-text-llm.git
34
+ ```
35
+
36
+ ### Automated setup (includes system dependencies)
37
+
38
+ ```bash
39
+ git clone https://github.com/alexander-turner/alt-text-llm.git
40
+ cd alt-text-llm
41
+ ./setup.sh
42
+ ```
43
+
44
+ ## Prerequisites
45
+
46
+ The following command-line tools must be installed:
47
+
48
+ - **`llm`** - LLM interface ([install instructions](https://llm.datasette.io/))
49
+ - **`git`** - Version control
50
+ - **`magick`** (ImageMagick) - Image processing
51
+ - **`ffmpeg`** - Video processing
52
+ - **`imgcat`** - Terminal image display
53
+
54
+ **macOS:**
55
+
56
+ ```bash
57
+ brew install imagemagick ffmpeg imgcat
58
+ pip install llm
59
+ ```
60
+
61
+ **Linux:**
62
+
63
+ ```bash
64
+ sudo apt-get install imagemagick ffmpeg
65
+ pip install llm
66
+ # imgcat: curl -sL https://iterm2.com/utilities/imgcat -o ~/.local/bin/imgcat && chmod +x ~/.local/bin/imgcat
67
+ ```
68
+
69
+ ## Usage
70
+
71
+ The tool provides three main commands: `scan`, `generate`, and `label`.
72
+
73
+ ### 1. Scan for missing alt text
74
+
75
+ Scan your markdown files to find images without meaningful alt text:
76
+
77
+ ```bash
78
+ alt-text-llm scan --root /path/to/markdown/files
79
+ ```
80
+
81
+ This creates `asset_queue.json` with all assets needing alt text.
82
+
83
+ ### 2. Generate AI suggestions
84
+
85
+ Generate alt text suggestions using an LLM:
86
+
87
+ ```bash
88
+ alt-text-llm generate \
89
+ --root /path/to/markdown/files \
90
+ --model gemini-2.5-flash \
91
+ --suggestions-file suggested_alts.json
92
+ ```
93
+
94
+ **Available options:**
95
+
96
+ - `--model` (required) - LLM model to use (e.g., `gemini-2.5-flash`, `gpt-4o-mini`, `claude-3-5-sonnet`)
97
+ - `--max-chars` - Maximum characters for alt text (default: 300)
98
+ - `--timeout` - LLM timeout in seconds (default: 120)
99
+ - `--estimate-only` - Only show cost estimate without generating
100
+ - `--process-existing` - Also process assets that already have captions
101
+
102
+ **Cost estimation:**
103
+
104
+ ```bash
105
+ alt-text-llm generate \
106
+ --root /path/to/markdown/files \
107
+ --model gemini-2.5-flash \
108
+ --estimate-only
109
+ ```
110
+
111
+ ### 3. Label and approve suggestions
112
+
113
+ Interactively review and approve the AI-generated suggestions:
114
+
115
+ ```bash
116
+ alt-text-llm label \
117
+ --suggestions-file suggested_alts.json \
118
+ --output asset_captions.json
119
+ ```
120
+
121
+ **Interactive commands:**
122
+
123
+ - Edit the suggested alt text (vim keybindings enabled)
124
+ - Press Enter to accept the suggestion as-is
125
+ - Submit `undo` or `u` to go back to the previous item
126
+ - Images display in your terminal (requires `imgcat`)
127
+
128
+ ## Example workflow
129
+
130
+ ```bash
131
+ # 1. Scan markdown files for missing alt text
132
+ alt-text-llm scan --root ./content
133
+
134
+ # 2. Estimate the cost
135
+ alt-text-llm generate \
136
+ --root ./content \
137
+ --model gemini-2.5-flash \
138
+ --estimate-only
139
+
140
+ # 3. Generate suggestions (if cost is acceptable)
141
+ alt-text-llm generate \
142
+ --root ./content \
143
+ --model gemini-2.5-flash
144
+
145
+ # 4. Review and approve suggestions
146
+ alt-text-llm label
147
+ ```
148
+
149
+ ## Configuration
150
+
151
+ ### LLM Integration
152
+
153
+ This tool uses the [`llm` CLI tool](https://llm.datasette.io/) to generate alt text. This provides access to many different AI models including:
154
+
155
+ - **Gemini** (Google) via the [llm-gemini plugin](https://github.com/simonw/llm-gemini)
156
+ - **Claude** (Anthropic) via the [llm-claude-3 plugin](https://github.com/tomviner/llm-claude-3)
157
+ - And [many more via plugins](https://llm.datasette.io/en/stable/plugins/directory.html)
158
+
159
+ ### Setting up your model
160
+
161
+ **For Gemini models (default):**
162
+
163
+ ```bash
164
+ llm install llm-gemini
165
+ llm keys set gemini # enter API key
166
+ llm -m gemini-2.5-flash "Hello, world!"
167
+ ```
168
+
169
+ **For other models:**
170
+
171
+ 1. Install the appropriate llm plugin (e.g., `llm install llm-openai`)
172
+ 2. Configure your API key (e.g., `llm keys set openai`)
173
+ 3. Use the model name with `--model` flag (e.g., `--model gpt-4o-mini`)
174
+
175
+ See the [llm documentation](https://llm.datasette.io/en/stable/setup.html) for setup instructions and the [plugin directory](https://llm.datasette.io/en/stable/plugins/directory.html) for available models.
176
+
177
+ ## Output files
178
+
179
+ - `asset_queue.json` - Queue of assets needing alt text (from `scan`)
180
+ - `suggested_alts.json` - AI-generated suggestions (from `generate`)
181
+ - `asset_captions.json` - Approved final captions (from `label`)
@@ -0,0 +1,158 @@
1
+ # alt-text-llm
2
+
3
+ AI-powered alt text generation and labeling tools for markdown content. Originally developed for [my website](https://turntrout.com/design) ([repo](https://github.com/alexander-turner/TurnTrout.com)).
4
+
5
+ ## Installation
6
+
7
+ ### Quick install from GitHub
8
+
9
+ ```bash
10
+ pip install git+https://github.com/alexander-turner/alt-text-llm.git
11
+ ```
12
+
13
+ ### Automated setup (includes system dependencies)
14
+
15
+ ```bash
16
+ git clone https://github.com/alexander-turner/alt-text-llm.git
17
+ cd alt-text-llm
18
+ ./setup.sh
19
+ ```
20
+
21
+ ## Prerequisites
22
+
23
+ The following command-line tools must be installed:
24
+
25
+ - **`llm`** - LLM interface ([install instructions](https://llm.datasette.io/))
26
+ - **`git`** - Version control
27
+ - **`magick`** (ImageMagick) - Image processing
28
+ - **`ffmpeg`** - Video processing
29
+ - **`imgcat`** - Terminal image display
30
+
31
+ **macOS:**
32
+
33
+ ```bash
34
+ brew install imagemagick ffmpeg imgcat
35
+ pip install llm
36
+ ```
37
+
38
+ **Linux:**
39
+
40
+ ```bash
41
+ sudo apt-get install imagemagick ffmpeg
42
+ pip install llm
43
+ # imgcat: curl -sL https://iterm2.com/utilities/imgcat -o ~/.local/bin/imgcat && chmod +x ~/.local/bin/imgcat
44
+ ```
45
+
46
+ ## Usage
47
+
48
+ The tool provides three main commands: `scan`, `generate`, and `label`.
49
+
50
+ ### 1. Scan for missing alt text
51
+
52
+ Scan your markdown files to find images without meaningful alt text:
53
+
54
+ ```bash
55
+ alt-text-llm scan --root /path/to/markdown/files
56
+ ```
57
+
58
+ This creates `asset_queue.json` with all assets needing alt text.
59
+
60
+ ### 2. Generate AI suggestions
61
+
62
+ Generate alt text suggestions using an LLM:
63
+
64
+ ```bash
65
+ alt-text-llm generate \
66
+ --root /path/to/markdown/files \
67
+ --model gemini-2.5-flash \
68
+ --suggestions-file suggested_alts.json
69
+ ```
70
+
71
+ **Available options:**
72
+
73
+ - `--model` (required) - LLM model to use (e.g., `gemini-2.5-flash`, `gpt-4o-mini`, `claude-3-5-sonnet`)
74
+ - `--max-chars` - Maximum characters for alt text (default: 300)
75
+ - `--timeout` - LLM timeout in seconds (default: 120)
76
+ - `--estimate-only` - Only show cost estimate without generating
77
+ - `--process-existing` - Also process assets that already have captions
78
+
79
+ **Cost estimation:**
80
+
81
+ ```bash
82
+ alt-text-llm generate \
83
+ --root /path/to/markdown/files \
84
+ --model gemini-2.5-flash \
85
+ --estimate-only
86
+ ```
87
+
88
+ ### 3. Label and approve suggestions
89
+
90
+ Interactively review and approve the AI-generated suggestions:
91
+
92
+ ```bash
93
+ alt-text-llm label \
94
+ --suggestions-file suggested_alts.json \
95
+ --output asset_captions.json
96
+ ```
97
+
98
+ **Interactive commands:**
99
+
100
+ - Edit the suggested alt text (vim keybindings enabled)
101
+ - Press Enter to accept the suggestion as-is
102
+ - Submit `undo` or `u` to go back to the previous item
103
+ - Images display in your terminal (requires `imgcat`)
104
+
105
+ ## Example workflow
106
+
107
+ ```bash
108
+ # 1. Scan markdown files for missing alt text
109
+ alt-text-llm scan --root ./content
110
+
111
+ # 2. Estimate the cost
112
+ alt-text-llm generate \
113
+ --root ./content \
114
+ --model gemini-2.5-flash \
115
+ --estimate-only
116
+
117
+ # 3. Generate suggestions (if cost is acceptable)
118
+ alt-text-llm generate \
119
+ --root ./content \
120
+ --model gemini-2.5-flash
121
+
122
+ # 4. Review and approve suggestions
123
+ alt-text-llm label
124
+ ```
125
+
126
+ ## Configuration
127
+
128
+ ### LLM Integration
129
+
130
+ This tool uses the [`llm` CLI tool](https://llm.datasette.io/) to generate alt text. This provides access to many different AI models including:
131
+
132
+ - **Gemini** (Google) via the [llm-gemini plugin](https://github.com/simonw/llm-gemini)
133
+ - **Claude** (Anthropic) via the [llm-claude-3 plugin](https://github.com/tomviner/llm-claude-3)
134
+ - And [many more via plugins](https://llm.datasette.io/en/stable/plugins/directory.html)
135
+
136
+ ### Setting up your model
137
+
138
+ **For Gemini models (default):**
139
+
140
+ ```bash
141
+ llm install llm-gemini
142
+ llm keys set gemini # enter API key
143
+ llm -m gemini-2.5-flash "Hello, world!"
144
+ ```
145
+
146
+ **For other models:**
147
+
148
+ 1. Install the appropriate llm plugin (e.g., `llm install llm-openai`)
149
+ 2. Configure your API key (e.g., `llm keys set openai`)
150
+ 3. Use the model name with `--model` flag (e.g., `--model gpt-4o-mini`)
151
+
152
+ See the [llm documentation](https://llm.datasette.io/en/stable/setup.html) for setup instructions and the [plugin directory](https://llm.datasette.io/en/stable/plugins/directory.html) for available models.
153
+
154
+ ## Output files
155
+
156
+ - `asset_queue.json` - Queue of assets needing alt text (from `scan`)
157
+ - `suggested_alts.json` - AI-generated suggestions (from `generate`)
158
+ - `asset_captions.json` - Approved final captions (from `label`)
@@ -0,0 +1,13 @@
1
+ """AI-powered alt text generation and labeling tools."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from alt_text_llm import generate, label, main, scan, utils
6
+
7
+ __all__ = [
8
+ "generate",
9
+ "label",
10
+ "main",
11
+ "scan",
12
+ "utils",
13
+ ]
@@ -0,0 +1,208 @@
1
+ """Generate AI alt text suggestions for assets lacking meaningful alt text."""
2
+
3
+ import asyncio
4
+ import shutil
5
+ import subprocess
6
+ import tempfile
7
+ import warnings
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Sequence
11
+
12
+ from rich.console import Console
13
+ from tqdm.rich import tqdm
14
+ from tqdm.std import TqdmExperimentalWarning
15
+
16
+ from alt_text_llm import scan, utils
17
+
18
+ warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
19
+
20
+ # Approximate cost estimates per 1000 tokens (as of Sep 2025)
21
+ MODEL_COSTS = {
22
+ # https://www.helicone.ai/llm-cost
23
+ "gemini-2.5-pro": {"input": 0.00125, "output": 0.01},
24
+ "gemini-2.5-flash": {"input": 0.0003, "output": 0.0025},
25
+ "gemini-2.5-flash-lite": {"input": 0.00001, "output": 0.00004},
26
+ # https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/?ref=testingcatalog.com
27
+ "gemini-2.5-flash-lite-preview-09-2025": {
28
+ "input": 0.00001,
29
+ "output": 0.00004,
30
+ },
31
+ "gemini-2.5-flash-preview-09-2025": {"input": 0.00001, "output": 0.00004},
32
+ }
33
+
34
+
35
+ def _run_llm(
36
+ attachment: Path,
37
+ prompt: str,
38
+ model: str,
39
+ timeout: int,
40
+ ) -> str:
41
+ """Execute LLM command and return generated caption."""
42
+ llm_path = utils.find_executable("llm")
43
+
44
+ result = subprocess.run(
45
+ [llm_path, "-m", model, "-a", str(attachment), "--usage", prompt],
46
+ check=False,
47
+ capture_output=True,
48
+ text=True,
49
+ timeout=timeout,
50
+ )
51
+
52
+ if result.returncode != 0:
53
+ error_output = result.stderr.strip() or result.stdout.strip()
54
+ raise utils.AltGenerationError(
55
+ f"Caption generation failed for {attachment}: {error_output}"
56
+ )
57
+
58
+ cleaned = result.stdout.strip()
59
+ if not cleaned:
60
+ raise utils.AltGenerationError("LLM returned empty caption")
61
+ return cleaned
62
+
63
+
64
+ @dataclass(slots=True)
65
+ class GenerateAltTextOptions:
66
+ """Options for generating alt text."""
67
+
68
+ root: Path
69
+ model: str
70
+ max_chars: int
71
+ timeout: int
72
+ output_path: Path
73
+ skip_existing: bool = False
74
+
75
+
76
+ def estimate_cost(
77
+ model: str,
78
+ queue_count: int,
79
+ avg_prompt_tokens: int = 4500,
80
+ avg_output_tokens: int = 1500,
81
+ ) -> str:
82
+ """Estimate the cost of processing the queue with the given model."""
83
+ # Normalize model name for cost lookup
84
+ model_lower = model.lower()
85
+
86
+ if model_lower in MODEL_COSTS:
87
+ cost_info = MODEL_COSTS[model_lower]
88
+ else:
89
+ return f"Can't estimate cost for unknown model: {model}. Available models: {MODEL_COSTS.keys()}"
90
+
91
+ # Calculate costs
92
+ input_cost = (avg_prompt_tokens * queue_count / 1000) * cost_info["input"]
93
+ output_cost = (avg_output_tokens * queue_count / 1000) * cost_info[
94
+ "output"
95
+ ]
96
+ total_cost = input_cost + output_cost
97
+
98
+ return f"Estimated cost: ${total_cost:.3f} (${input_cost:.3f} input + ${output_cost:.3f} output)"
99
+
100
+
101
+ def filter_existing_captions(
102
+ queue_items: Sequence["scan.QueueItem"],
103
+ output_paths: Sequence[Path],
104
+ console: Console,
105
+ verbose: bool = True,
106
+ ) -> list["scan.QueueItem"]:
107
+ """Filter out items that already have captions in the output paths."""
108
+ existing_captions = set()
109
+ for output_path in output_paths:
110
+ existing_captions.update(utils.load_existing_captions(output_path))
111
+ original_count = len(queue_items)
112
+ filtered_items = [
113
+ item
114
+ for item in queue_items
115
+ if item.asset_path not in existing_captions
116
+ ]
117
+ skipped_count = original_count - len(filtered_items)
118
+ if skipped_count > 0 and verbose:
119
+ console.print(
120
+ f"[dim]Skipped {skipped_count} items with existing captions[/dim]"
121
+ )
122
+ return filtered_items
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Async helpers for parallel LLM calls
127
+ # ---------------------------------------------------------------------------
128
+
129
+
130
+ _CONCURRENCY_LIMIT = 32
131
+
132
+
133
+ async def _run_llm_async(
134
+ queue_item: "scan.QueueItem",
135
+ options: GenerateAltTextOptions,
136
+ sem: asyncio.Semaphore,
137
+ ) -> utils.AltGenerationResult:
138
+ """Download asset, run LLM in a thread; clean up; return suggestion
139
+ payload."""
140
+ workspace = Path(tempfile.mkdtemp())
141
+ try:
142
+ async with sem:
143
+ attachment = await asyncio.to_thread(
144
+ utils.download_asset, queue_item, workspace
145
+ )
146
+ prompt = utils.build_prompt(queue_item, options.max_chars)
147
+ caption = await asyncio.to_thread(
148
+ _run_llm,
149
+ attachment,
150
+ prompt,
151
+ options.model,
152
+ options.timeout,
153
+ )
154
+ return utils.AltGenerationResult(
155
+ markdown_file=queue_item.markdown_file,
156
+ asset_path=queue_item.asset_path,
157
+ suggested_alt=caption,
158
+ model=options.model,
159
+ context_snippet=queue_item.context_snippet,
160
+ line_number=queue_item.line_number,
161
+ )
162
+ finally:
163
+ shutil.rmtree(workspace, ignore_errors=True)
164
+
165
+
166
+ async def async_generate_suggestions(
167
+ queue_items: Sequence["scan.QueueItem"],
168
+ options: GenerateAltTextOptions,
169
+ ) -> list[utils.AltGenerationResult]:
170
+ """Generate suggestions concurrently for *queue_items*."""
171
+ sem = asyncio.Semaphore(_CONCURRENCY_LIMIT)
172
+ tasks: list[asyncio.Task[utils.AltGenerationResult]] = []
173
+
174
+ for qi in queue_items:
175
+ tasks.append(
176
+ asyncio.create_task(
177
+ _run_llm_async(
178
+ qi,
179
+ options,
180
+ sem,
181
+ )
182
+ )
183
+ )
184
+
185
+ task_count = len(tasks)
186
+ if task_count == 0:
187
+ return []
188
+
189
+ suggestions: list[utils.AltGenerationResult] = []
190
+ with tqdm(total=task_count, desc="Generating alt text") as progress_bar:
191
+ try:
192
+ for finished in asyncio.as_completed(tasks):
193
+ try:
194
+ result = await finished
195
+ suggestions.append(result)
196
+ except (
197
+ utils.AltGenerationError,
198
+ FileNotFoundError,
199
+ ) as err:
200
+ # Skip individual items that fail (e.g., unsupported file types)
201
+ progress_bar.write(f"Skipped item due to error: {err}")
202
+ progress_bar.update(1)
203
+ except asyncio.CancelledError:
204
+ progress_bar.set_description(
205
+ "Generating alt text (cancelled, finishing up...)"
206
+ )
207
+
208
+ return suggestions