alt-text-llm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alt-text-llm might be problematic. Click here for more details.
- alt_text_llm-0.1.0/LICENSE +21 -0
- alt_text_llm-0.1.0/MANIFEST.in +1 -0
- alt_text_llm-0.1.0/PKG-INFO +181 -0
- alt_text_llm-0.1.0/README.md +158 -0
- alt_text_llm-0.1.0/alt_text_llm/__init__.py +13 -0
- alt_text_llm-0.1.0/alt_text_llm/generate.py +208 -0
- alt_text_llm-0.1.0/alt_text_llm/label.py +347 -0
- alt_text_llm-0.1.0/alt_text_llm/main.py +235 -0
- alt_text_llm-0.1.0/alt_text_llm/scan.py +219 -0
- alt_text_llm-0.1.0/alt_text_llm/utils.py +515 -0
- alt_text_llm-0.1.0/alt_text_llm.egg-info/PKG-INFO +181 -0
- alt_text_llm-0.1.0/alt_text_llm.egg-info/SOURCES.txt +20 -0
- alt_text_llm-0.1.0/alt_text_llm.egg-info/dependency_links.txt +1 -0
- alt_text_llm-0.1.0/alt_text_llm.egg-info/entry_points.txt +2 -0
- alt_text_llm-0.1.0/alt_text_llm.egg-info/requires.txt +11 -0
- alt_text_llm-0.1.0/alt_text_llm.egg-info/top_level.txt +1 -0
- alt_text_llm-0.1.0/pyproject.toml +31 -0
- alt_text_llm-0.1.0/setup.cfg +4 -0
- alt_text_llm-0.1.0/tests/test_generate.py +266 -0
- alt_text_llm-0.1.0/tests/test_label.py +465 -0
- alt_text_llm-0.1.0/tests/test_scan.py +214 -0
- alt_text_llm-0.1.0/tests/test_utils.py +1487 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Alexander Turner (TurnTrout)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include README.md LICENSE
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: alt-text-llm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-powered alt text generation and labeling tools for markdown content
|
|
5
|
+
Author: TurnTrout
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/alexander-turner/alt-text-llm
|
|
8
|
+
Keywords: alt-text,accessibility,markdown,llm,ai
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: gitpython
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Requires-Dist: ruamel.yaml
|
|
15
|
+
Requires-Dist: markdown-it-py
|
|
16
|
+
Requires-Dist: rich
|
|
17
|
+
Requires-Dist: tqdm
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest; extra == "dev"
|
|
20
|
+
Requires-Dist: mypy; extra == "dev"
|
|
21
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# alt-text-llm
|
|
25
|
+
|
|
26
|
+
AI-powered alt text generation and labeling tools for markdown content. Originally developed for [my website](https://turntrout.com/design) ([repo](https://github.com/alexander-turner/TurnTrout.com)).
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
### Quick install from GitHub
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install git+https://github.com/alexander-turner/alt-text-llm.git
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Automated setup (includes system dependencies)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
git clone https://github.com/alexander-turner/alt-text-llm.git
|
|
40
|
+
cd alt-text-llm
|
|
41
|
+
./setup.sh
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Prerequisites
|
|
45
|
+
|
|
46
|
+
The following command-line tools must be installed:
|
|
47
|
+
|
|
48
|
+
- **`llm`** - LLM interface ([install instructions](https://llm.datasette.io/))
|
|
49
|
+
- **`git`** - Version control
|
|
50
|
+
- **`magick`** (ImageMagick) - Image processing
|
|
51
|
+
- **`ffmpeg`** - Video processing
|
|
52
|
+
- **`imgcat`** - Terminal image display
|
|
53
|
+
|
|
54
|
+
**macOS:**
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
brew install imagemagick ffmpeg imgcat
|
|
58
|
+
pip install llm
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Linux:**
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
sudo apt-get install imagemagick ffmpeg
|
|
65
|
+
pip install llm
|
|
66
|
+
# imgcat: curl -sL https://iterm2.com/utilities/imgcat -o ~/.local/bin/imgcat && chmod +x ~/.local/bin/imgcat
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
The tool provides three main commands: `scan`, `generate`, and `label`.
|
|
72
|
+
|
|
73
|
+
### 1. Scan for missing alt text
|
|
74
|
+
|
|
75
|
+
Scan your markdown files to find images without meaningful alt text:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
alt-text-llm scan --root /path/to/markdown/files
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
This creates `asset_queue.json` with all assets needing alt text.
|
|
82
|
+
|
|
83
|
+
### 2. Generate AI suggestions
|
|
84
|
+
|
|
85
|
+
Generate alt text suggestions using an LLM:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
alt-text-llm generate \
|
|
89
|
+
--root /path/to/markdown/files \
|
|
90
|
+
--model gemini-2.5-flash \
|
|
91
|
+
--suggestions-file suggested_alts.json
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Available options:**
|
|
95
|
+
|
|
96
|
+
- `--model` (required) - LLM model to use (e.g., `gemini-2.5-flash`, `gpt-4o-mini`, `claude-3-5-sonnet`)
|
|
97
|
+
- `--max-chars` - Maximum characters for alt text (default: 300)
|
|
98
|
+
- `--timeout` - LLM timeout in seconds (default: 120)
|
|
99
|
+
- `--estimate-only` - Only show cost estimate without generating
|
|
100
|
+
- `--process-existing` - Also process assets that already have captions
|
|
101
|
+
|
|
102
|
+
**Cost estimation:**
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
alt-text-llm generate \
|
|
106
|
+
--root /path/to/markdown/files \
|
|
107
|
+
--model gemini-2.5-flash \
|
|
108
|
+
--estimate-only
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 3. Label and approve suggestions
|
|
112
|
+
|
|
113
|
+
Interactively review and approve the AI-generated suggestions:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
alt-text-llm label \
|
|
117
|
+
--suggestions-file suggested_alts.json \
|
|
118
|
+
--output asset_captions.json
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Interactive commands:**
|
|
122
|
+
|
|
123
|
+
- Edit the suggested alt text (vim keybindings enabled)
|
|
124
|
+
- Press Enter to accept the suggestion as-is
|
|
125
|
+
- Submit `undo` or `u` to go back to the previous item
|
|
126
|
+
- Images display in your terminal (requires `imgcat`)
|
|
127
|
+
|
|
128
|
+
## Example workflow
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# 1. Scan markdown files for missing alt text
|
|
132
|
+
alt-text-llm scan --root ./content
|
|
133
|
+
|
|
134
|
+
# 2. Estimate the cost
|
|
135
|
+
alt-text-llm generate \
|
|
136
|
+
--root ./content \
|
|
137
|
+
--model gemini-2.5-flash \
|
|
138
|
+
--estimate-only
|
|
139
|
+
|
|
140
|
+
# 3. Generate suggestions (if cost is acceptable)
|
|
141
|
+
alt-text-llm generate \
|
|
142
|
+
--root ./content \
|
|
143
|
+
--model gemini-2.5-flash
|
|
144
|
+
|
|
145
|
+
# 4. Review and approve suggestions
|
|
146
|
+
alt-text-llm label
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Configuration
|
|
150
|
+
|
|
151
|
+
### LLM Integration
|
|
152
|
+
|
|
153
|
+
This tool uses the [`llm` CLI tool](https://llm.datasette.io/) to generate alt text. This provides access to many different AI models including:
|
|
154
|
+
|
|
155
|
+
- **Gemini** (Google) via the [llm-gemini plugin](https://github.com/simonw/llm-gemini)
|
|
156
|
+
- **Claude** (Anthropic) via the [llm-claude-3 plugin](https://github.com/tomviner/llm-claude-3)
|
|
157
|
+
- And [many more via plugins](https://llm.datasette.io/en/stable/plugins/directory.html)
|
|
158
|
+
|
|
159
|
+
### Setting up your model
|
|
160
|
+
|
|
161
|
+
**For Gemini models (default):**
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
llm install llm-gemini
|
|
165
|
+
llm keys set gemini # enter API key
|
|
166
|
+
llm -m gemini-2.5-flash "Hello, world!"
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**For other models:**
|
|
170
|
+
|
|
171
|
+
1. Install the appropriate llm plugin (e.g., `llm install llm-openai`)
|
|
172
|
+
2. Configure your API key (e.g., `llm keys set openai`)
|
|
173
|
+
3. Use the model name with `--model` flag (e.g., `--model gpt-4o-mini`)
|
|
174
|
+
|
|
175
|
+
See the [llm documentation](https://llm.datasette.io/en/stable/setup.html) for setup instructions and the [plugin directory](https://llm.datasette.io/en/stable/plugins/directory.html) for available models.
|
|
176
|
+
|
|
177
|
+
## Output files
|
|
178
|
+
|
|
179
|
+
- `asset_queue.json` - Queue of assets needing alt text (from `scan`)
|
|
180
|
+
- `suggested_alts.json` - AI-generated suggestions (from `generate`)
|
|
181
|
+
- `asset_captions.json` - Approved final captions (from `label`)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# alt-text-llm
|
|
2
|
+
|
|
3
|
+
AI-powered alt text generation and labeling tools for markdown content. Originally developed for [my website](https://turntrout.com/design) ([repo](https://github.com/alexander-turner/TurnTrout.com)).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
### Quick install from GitHub
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install git+https://github.com/alexander-turner/alt-text-llm.git
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
### Automated setup (includes system dependencies)
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/alexander-turner/alt-text-llm.git
|
|
17
|
+
cd alt-text-llm
|
|
18
|
+
./setup.sh
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Prerequisites
|
|
22
|
+
|
|
23
|
+
The following command-line tools must be installed:
|
|
24
|
+
|
|
25
|
+
- **`llm`** - LLM interface ([install instructions](https://llm.datasette.io/))
|
|
26
|
+
- **`git`** - Version control
|
|
27
|
+
- **`magick`** (ImageMagick) - Image processing
|
|
28
|
+
- **`ffmpeg`** - Video processing
|
|
29
|
+
- **`imgcat`** - Terminal image display
|
|
30
|
+
|
|
31
|
+
**macOS:**
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
brew install imagemagick ffmpeg imgcat
|
|
35
|
+
pip install llm
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Linux:**
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
sudo apt-get install imagemagick ffmpeg
|
|
42
|
+
pip install llm
|
|
43
|
+
# imgcat: curl -sL https://iterm2.com/utilities/imgcat -o ~/.local/bin/imgcat && chmod +x ~/.local/bin/imgcat
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
|
|
48
|
+
The tool provides three main commands: `scan`, `generate`, and `label`.
|
|
49
|
+
|
|
50
|
+
### 1. Scan for missing alt text
|
|
51
|
+
|
|
52
|
+
Scan your markdown files to find images without meaningful alt text:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
alt-text-llm scan --root /path/to/markdown/files
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
This creates `asset_queue.json` with all assets needing alt text.
|
|
59
|
+
|
|
60
|
+
### 2. Generate AI suggestions
|
|
61
|
+
|
|
62
|
+
Generate alt text suggestions using an LLM:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
alt-text-llm generate \
|
|
66
|
+
--root /path/to/markdown/files \
|
|
67
|
+
--model gemini-2.5-flash \
|
|
68
|
+
--suggestions-file suggested_alts.json
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Available options:**
|
|
72
|
+
|
|
73
|
+
- `--model` (required) - LLM model to use (e.g., `gemini-2.5-flash`, `gpt-4o-mini`, `claude-3-5-sonnet`)
|
|
74
|
+
- `--max-chars` - Maximum characters for alt text (default: 300)
|
|
75
|
+
- `--timeout` - LLM timeout in seconds (default: 120)
|
|
76
|
+
- `--estimate-only` - Only show cost estimate without generating
|
|
77
|
+
- `--process-existing` - Also process assets that already have captions
|
|
78
|
+
|
|
79
|
+
**Cost estimation:**
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
alt-text-llm generate \
|
|
83
|
+
--root /path/to/markdown/files \
|
|
84
|
+
--model gemini-2.5-flash \
|
|
85
|
+
--estimate-only
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 3. Label and approve suggestions
|
|
89
|
+
|
|
90
|
+
Interactively review and approve the AI-generated suggestions:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
alt-text-llm label \
|
|
94
|
+
--suggestions-file suggested_alts.json \
|
|
95
|
+
--output asset_captions.json
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Interactive commands:**
|
|
99
|
+
|
|
100
|
+
- Edit the suggested alt text (vim keybindings enabled)
|
|
101
|
+
- Press Enter to accept the suggestion as-is
|
|
102
|
+
- Submit `undo` or `u` to go back to the previous item
|
|
103
|
+
- Images display in your terminal (requires `imgcat`)
|
|
104
|
+
|
|
105
|
+
## Example workflow
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# 1. Scan markdown files for missing alt text
|
|
109
|
+
alt-text-llm scan --root ./content
|
|
110
|
+
|
|
111
|
+
# 2. Estimate the cost
|
|
112
|
+
alt-text-llm generate \
|
|
113
|
+
--root ./content \
|
|
114
|
+
--model gemini-2.5-flash \
|
|
115
|
+
--estimate-only
|
|
116
|
+
|
|
117
|
+
# 3. Generate suggestions (if cost is acceptable)
|
|
118
|
+
alt-text-llm generate \
|
|
119
|
+
--root ./content \
|
|
120
|
+
--model gemini-2.5-flash
|
|
121
|
+
|
|
122
|
+
# 4. Review and approve suggestions
|
|
123
|
+
alt-text-llm label
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Configuration
|
|
127
|
+
|
|
128
|
+
### LLM Integration
|
|
129
|
+
|
|
130
|
+
This tool uses the [`llm` CLI tool](https://llm.datasette.io/) to generate alt text. This provides access to many different AI models including:
|
|
131
|
+
|
|
132
|
+
- **Gemini** (Google) via the [llm-gemini plugin](https://github.com/simonw/llm-gemini)
|
|
133
|
+
- **Claude** (Anthropic) via the [llm-claude-3 plugin](https://github.com/tomviner/llm-claude-3)
|
|
134
|
+
- And [many more via plugins](https://llm.datasette.io/en/stable/plugins/directory.html)
|
|
135
|
+
|
|
136
|
+
### Setting up your model
|
|
137
|
+
|
|
138
|
+
**For Gemini models (default):**
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
llm install llm-gemini
|
|
142
|
+
llm keys set gemini # enter API key
|
|
143
|
+
llm -m gemini-2.5-flash "Hello, world!"
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**For other models:**
|
|
147
|
+
|
|
148
|
+
1. Install the appropriate llm plugin (e.g., `llm install llm-openai`)
|
|
149
|
+
2. Configure your API key (e.g., `llm keys set openai`)
|
|
150
|
+
3. Use the model name with `--model` flag (e.g., `--model gpt-4o-mini`)
|
|
151
|
+
|
|
152
|
+
See the [llm documentation](https://llm.datasette.io/en/stable/setup.html) for setup instructions and the [plugin directory](https://llm.datasette.io/en/stable/plugins/directory.html) for available models.
|
|
153
|
+
|
|
154
|
+
## Output files
|
|
155
|
+
|
|
156
|
+
- `asset_queue.json` - Queue of assets needing alt text (from `scan`)
|
|
157
|
+
- `suggested_alts.json` - AI-generated suggestions (from `generate`)
|
|
158
|
+
- `asset_captions.json` - Approved final captions (from `label`)
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Generate AI alt text suggestions for assets lacking meaningful alt text."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import tempfile
|
|
7
|
+
import warnings
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Sequence
|
|
11
|
+
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from tqdm.rich import tqdm
|
|
14
|
+
from tqdm.std import TqdmExperimentalWarning
|
|
15
|
+
|
|
16
|
+
from alt_text_llm import scan, utils
|
|
17
|
+
|
|
18
|
+
warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
|
|
19
|
+
|
|
20
|
+
# Approximate cost estimates per 1000 tokens (as of Sep 2025)
|
|
21
|
+
MODEL_COSTS = {
|
|
22
|
+
# https://www.helicone.ai/llm-cost
|
|
23
|
+
"gemini-2.5-pro": {"input": 0.00125, "output": 0.01},
|
|
24
|
+
"gemini-2.5-flash": {"input": 0.0003, "output": 0.0025},
|
|
25
|
+
"gemini-2.5-flash-lite": {"input": 0.00001, "output": 0.00004},
|
|
26
|
+
# https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/?ref=testingcatalog.com
|
|
27
|
+
"gemini-2.5-flash-lite-preview-09-2025": {
|
|
28
|
+
"input": 0.00001,
|
|
29
|
+
"output": 0.00004,
|
|
30
|
+
},
|
|
31
|
+
"gemini-2.5-flash-preview-09-2025": {"input": 0.00001, "output": 0.00004},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _run_llm(
|
|
36
|
+
attachment: Path,
|
|
37
|
+
prompt: str,
|
|
38
|
+
model: str,
|
|
39
|
+
timeout: int,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Execute LLM command and return generated caption."""
|
|
42
|
+
llm_path = utils.find_executable("llm")
|
|
43
|
+
|
|
44
|
+
result = subprocess.run(
|
|
45
|
+
[llm_path, "-m", model, "-a", str(attachment), "--usage", prompt],
|
|
46
|
+
check=False,
|
|
47
|
+
capture_output=True,
|
|
48
|
+
text=True,
|
|
49
|
+
timeout=timeout,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if result.returncode != 0:
|
|
53
|
+
error_output = result.stderr.strip() or result.stdout.strip()
|
|
54
|
+
raise utils.AltGenerationError(
|
|
55
|
+
f"Caption generation failed for {attachment}: {error_output}"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
cleaned = result.stdout.strip()
|
|
59
|
+
if not cleaned:
|
|
60
|
+
raise utils.AltGenerationError("LLM returned empty caption")
|
|
61
|
+
return cleaned
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass(slots=True)
|
|
65
|
+
class GenerateAltTextOptions:
|
|
66
|
+
"""Options for generating alt text."""
|
|
67
|
+
|
|
68
|
+
root: Path
|
|
69
|
+
model: str
|
|
70
|
+
max_chars: int
|
|
71
|
+
timeout: int
|
|
72
|
+
output_path: Path
|
|
73
|
+
skip_existing: bool = False
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def estimate_cost(
|
|
77
|
+
model: str,
|
|
78
|
+
queue_count: int,
|
|
79
|
+
avg_prompt_tokens: int = 4500,
|
|
80
|
+
avg_output_tokens: int = 1500,
|
|
81
|
+
) -> str:
|
|
82
|
+
"""Estimate the cost of processing the queue with the given model."""
|
|
83
|
+
# Normalize model name for cost lookup
|
|
84
|
+
model_lower = model.lower()
|
|
85
|
+
|
|
86
|
+
if model_lower in MODEL_COSTS:
|
|
87
|
+
cost_info = MODEL_COSTS[model_lower]
|
|
88
|
+
else:
|
|
89
|
+
return f"Can't estimate cost for unknown model: {model}. Available models: {MODEL_COSTS.keys()}"
|
|
90
|
+
|
|
91
|
+
# Calculate costs
|
|
92
|
+
input_cost = (avg_prompt_tokens * queue_count / 1000) * cost_info["input"]
|
|
93
|
+
output_cost = (avg_output_tokens * queue_count / 1000) * cost_info[
|
|
94
|
+
"output"
|
|
95
|
+
]
|
|
96
|
+
total_cost = input_cost + output_cost
|
|
97
|
+
|
|
98
|
+
return f"Estimated cost: ${total_cost:.3f} (${input_cost:.3f} input + ${output_cost:.3f} output)"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def filter_existing_captions(
|
|
102
|
+
queue_items: Sequence["scan.QueueItem"],
|
|
103
|
+
output_paths: Sequence[Path],
|
|
104
|
+
console: Console,
|
|
105
|
+
verbose: bool = True,
|
|
106
|
+
) -> list["scan.QueueItem"]:
|
|
107
|
+
"""Filter out items that already have captions in the output paths."""
|
|
108
|
+
existing_captions = set()
|
|
109
|
+
for output_path in output_paths:
|
|
110
|
+
existing_captions.update(utils.load_existing_captions(output_path))
|
|
111
|
+
original_count = len(queue_items)
|
|
112
|
+
filtered_items = [
|
|
113
|
+
item
|
|
114
|
+
for item in queue_items
|
|
115
|
+
if item.asset_path not in existing_captions
|
|
116
|
+
]
|
|
117
|
+
skipped_count = original_count - len(filtered_items)
|
|
118
|
+
if skipped_count > 0 and verbose:
|
|
119
|
+
console.print(
|
|
120
|
+
f"[dim]Skipped {skipped_count} items with existing captions[/dim]"
|
|
121
|
+
)
|
|
122
|
+
return filtered_items
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Async helpers for parallel LLM calls
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
_CONCURRENCY_LIMIT = 32
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
async def _run_llm_async(
|
|
134
|
+
queue_item: "scan.QueueItem",
|
|
135
|
+
options: GenerateAltTextOptions,
|
|
136
|
+
sem: asyncio.Semaphore,
|
|
137
|
+
) -> utils.AltGenerationResult:
|
|
138
|
+
"""Download asset, run LLM in a thread; clean up; return suggestion
|
|
139
|
+
payload."""
|
|
140
|
+
workspace = Path(tempfile.mkdtemp())
|
|
141
|
+
try:
|
|
142
|
+
async with sem:
|
|
143
|
+
attachment = await asyncio.to_thread(
|
|
144
|
+
utils.download_asset, queue_item, workspace
|
|
145
|
+
)
|
|
146
|
+
prompt = utils.build_prompt(queue_item, options.max_chars)
|
|
147
|
+
caption = await asyncio.to_thread(
|
|
148
|
+
_run_llm,
|
|
149
|
+
attachment,
|
|
150
|
+
prompt,
|
|
151
|
+
options.model,
|
|
152
|
+
options.timeout,
|
|
153
|
+
)
|
|
154
|
+
return utils.AltGenerationResult(
|
|
155
|
+
markdown_file=queue_item.markdown_file,
|
|
156
|
+
asset_path=queue_item.asset_path,
|
|
157
|
+
suggested_alt=caption,
|
|
158
|
+
model=options.model,
|
|
159
|
+
context_snippet=queue_item.context_snippet,
|
|
160
|
+
line_number=queue_item.line_number,
|
|
161
|
+
)
|
|
162
|
+
finally:
|
|
163
|
+
shutil.rmtree(workspace, ignore_errors=True)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
async def async_generate_suggestions(
|
|
167
|
+
queue_items: Sequence["scan.QueueItem"],
|
|
168
|
+
options: GenerateAltTextOptions,
|
|
169
|
+
) -> list[utils.AltGenerationResult]:
|
|
170
|
+
"""Generate suggestions concurrently for *queue_items*."""
|
|
171
|
+
sem = asyncio.Semaphore(_CONCURRENCY_LIMIT)
|
|
172
|
+
tasks: list[asyncio.Task[utils.AltGenerationResult]] = []
|
|
173
|
+
|
|
174
|
+
for qi in queue_items:
|
|
175
|
+
tasks.append(
|
|
176
|
+
asyncio.create_task(
|
|
177
|
+
_run_llm_async(
|
|
178
|
+
qi,
|
|
179
|
+
options,
|
|
180
|
+
sem,
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
task_count = len(tasks)
|
|
186
|
+
if task_count == 0:
|
|
187
|
+
return []
|
|
188
|
+
|
|
189
|
+
suggestions: list[utils.AltGenerationResult] = []
|
|
190
|
+
with tqdm(total=task_count, desc="Generating alt text") as progress_bar:
|
|
191
|
+
try:
|
|
192
|
+
for finished in asyncio.as_completed(tasks):
|
|
193
|
+
try:
|
|
194
|
+
result = await finished
|
|
195
|
+
suggestions.append(result)
|
|
196
|
+
except (
|
|
197
|
+
utils.AltGenerationError,
|
|
198
|
+
FileNotFoundError,
|
|
199
|
+
) as err:
|
|
200
|
+
# Skip individual items that fail (e.g., unsupported file types)
|
|
201
|
+
progress_bar.write(f"Skipped item due to error: {err}")
|
|
202
|
+
progress_bar.update(1)
|
|
203
|
+
except asyncio.CancelledError:
|
|
204
|
+
progress_bar.set_description(
|
|
205
|
+
"Generating alt text (cancelled, finishing up...)"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return suggestions
|