alt-text-llm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alt-text-llm might be problematic. Click here for more details.

alt_text_llm/utils.py ADDED
@@ -0,0 +1,515 @@
1
+ """Shared utilities for alt text generation and labeling."""
2
+
3
+ import json
4
+ import shutil
5
+ import subprocess
6
+ import textwrap
7
+ from dataclasses import asdict, dataclass
8
+ from pathlib import Path
9
+ from typing import (
10
+ TYPE_CHECKING,
11
+ Collection,
12
+ Dict,
13
+ Iterable,
14
+ Optional,
15
+ Sequence,
16
+ )
17
+ from urllib.parse import urlparse
18
+
19
+ import git
20
+ import requests
21
+ from ruamel.yaml import YAML, YAMLError
22
+
23
+ if TYPE_CHECKING:
24
+ from alt_text_llm import scan
25
+
26
+ _executable_cache: Dict[str, str] = {}
27
+
28
+
29
+ def find_executable(name: str) -> str:
30
+ """
31
+ Find and cache the absolute path of an executable.
32
+
33
+ Args:
34
+ name: The name of the executable to find.
35
+
36
+ Returns:
37
+ The absolute path to the executable.
38
+
39
+ Raises:
40
+ FileNotFoundError: If the executable cannot be found.
41
+ """
42
+ if name in _executable_cache:
43
+ return _executable_cache[name]
44
+
45
+ executable_path = shutil.which(name)
46
+ if not executable_path:
47
+ raise FileNotFoundError(
48
+ f"Executable '{name}' not found. Please ensure it is in your PATH."
49
+ )
50
+
51
+ _executable_cache[name] = executable_path
52
+ return executable_path
53
+
54
+
55
+ def get_git_root(starting_dir: Optional[Path] = None) -> Path:
56
+ """
57
+ Returns the absolute path to the top-level directory of the Git repository.
58
+
59
+ Args:
60
+ starting_dir: Directory from which to start searching for the Git root.
61
+
62
+ Returns:
63
+ Path: Absolute path to the Git repository root.
64
+
65
+ Raises:
66
+ RuntimeError: If Git root cannot be determined.
67
+ """
68
+ git_executable = find_executable("git")
69
+ completed_process = subprocess.run(
70
+ [git_executable, "rev-parse", "--show-toplevel"],
71
+ capture_output=True,
72
+ text=True,
73
+ check=True,
74
+ cwd=starting_dir if starting_dir else Path.cwd(),
75
+ )
76
+ if completed_process.returncode == 0:
77
+ return Path(completed_process.stdout.strip())
78
+ raise RuntimeError("Failed to get Git root")
79
+
80
+
81
+ def get_files(
82
+ dir_to_search: Optional[Path] = None,
83
+ filetypes_to_match: Collection[str] = (".md",),
84
+ use_git_ignore: bool = True,
85
+ ignore_dirs: Optional[Collection[str]] = None,
86
+ ) -> tuple[Path, ...]:
87
+ """
88
+ Returns a tuple of all files in the specified directory of the Git
89
+ repository.
90
+
91
+ Args:
92
+ dir_to_search: A directory to search for files.
93
+ filetypes_to_match: A collection of file types to search for.
94
+ use_git_ignore: Whether to exclude files based on .gitignore.
95
+ ignore_dirs: Directory names to ignore.
96
+
97
+ Returns:
98
+ tuple[Path, ...]: A tuple of all matching files.
99
+ """
100
+ files: list[Path] = []
101
+ if dir_to_search is not None:
102
+ for filetype in filetypes_to_match:
103
+ files.extend(dir_to_search.rglob(f"*{filetype}"))
104
+
105
+ # Filter out ignored directories
106
+ if ignore_dirs:
107
+ files = [
108
+ f
109
+ for f in files
110
+ if not any(ignore_dir in f.parts for ignore_dir in ignore_dirs)
111
+ ]
112
+
113
+ if use_git_ignore:
114
+ try:
115
+ root = get_git_root(starting_dir=dir_to_search)
116
+ repo = git.Repo(root)
117
+ # Convert file paths to paths relative to the git root
118
+ relative_files = [file.relative_to(root) for file in files]
119
+ # Filter out ignored files
120
+ files = [
121
+ file
122
+ for file, rel_file in zip(files, relative_files)
123
+ if not repo.ignored(rel_file)
124
+ ]
125
+ except (
126
+ git.GitCommandError,
127
+ ValueError,
128
+ RuntimeError,
129
+ subprocess.CalledProcessError,
130
+ ):
131
+ # If Git operations fail, continue without Git filtering
132
+ pass
133
+ return tuple(files)
134
+
135
+
136
+ def split_yaml(file_path: Path, verbose: bool = False) -> tuple[dict, str]:
137
+ """
138
+ Split a markdown file into its YAML frontmatter and content.
139
+
140
+ Args:
141
+ file_path: Path to the markdown file
142
+ verbose: Whether to print error messages
143
+
144
+ Returns:
145
+ Tuple of (metadata dict, content string)
146
+ """
147
+ yaml = YAML(
148
+ typ="rt"
149
+ ) # 'rt' means round-trip, preserving comments and formatting
150
+ yaml.preserve_quotes = True # Preserve quote style
151
+
152
+ with file_path.open("r", encoding="utf-8") as f:
153
+ content = f.read()
154
+
155
+ # Split frontmatter and content
156
+ parts = content.split("---", 2)
157
+ if len(parts) < 3:
158
+ if verbose:
159
+ print(f"Skipping {file_path}: No valid frontmatter found")
160
+ return {}, ""
161
+
162
+ try:
163
+ metadata = yaml.load(parts[1])
164
+ if not metadata:
165
+ metadata = {}
166
+ except YAMLError as e:
167
+ print(f"Error parsing YAML in {file_path}: {str(e)}")
168
+ return {}, ""
169
+
170
+ return metadata, parts[2]
171
+
172
+
173
+ def is_url(path: str) -> bool:
174
+ """Check if path is a URL."""
175
+ parsed = urlparse(path)
176
+ return bool(parsed.scheme and parsed.netloc)
177
+
178
+
179
+ def _parse_paragraphs(
180
+ lines: Sequence[str],
181
+ ) -> tuple[list[list[str]], list[int]]:
182
+ """Parse lines into paragraphs and their start indices."""
183
+ paragraphs: list[list[str]] = []
184
+ paragraph_starts: list[int] = []
185
+ current: list[str] = []
186
+
187
+ for idx, line in enumerate(lines):
188
+ if line.strip() == "":
189
+ if current:
190
+ paragraphs.append(current)
191
+ paragraph_starts.append(idx - len(current))
192
+ current = []
193
+ else:
194
+ current.append(line.rstrip("\n"))
195
+
196
+ if current:
197
+ paragraphs.append(current)
198
+ paragraph_starts.append(len(lines) - len(current))
199
+
200
+ return paragraphs, paragraph_starts
201
+
202
+
203
+ def _find_target_paragraph(
204
+ lines: Sequence[str],
205
+ target_idx: int,
206
+ paragraphs: list[list[str]],
207
+ paragraph_starts: list[int],
208
+ ) -> int | None:
209
+ """Find the paragraph index for the target line."""
210
+ selected_line = lines[target_idx] if target_idx < len(lines) else ""
211
+
212
+ if selected_line.strip() != "":
213
+ selected_stripped = selected_line.rstrip("\n")
214
+ for i, paragraph in enumerate(paragraphs):
215
+ if selected_stripped in paragraph:
216
+ return i
217
+ else:
218
+ for i, start in enumerate(paragraph_starts):
219
+ if start > target_idx:
220
+ return i
221
+ return None
222
+
223
+
224
+ def paragraph_context(
225
+ lines: Sequence[str],
226
+ target_idx: int,
227
+ max_before: int | None = None,
228
+ max_after: int = 2,
229
+ ) -> str:
230
+ """
231
+ Return a slice of text around *target_idx* in **paragraph** units.
232
+
233
+ A *paragraph* is any non-empty run of lines separated by at least one blank
234
+ line. The returned snippet includes:
235
+
236
+ • Up to *max_before* paragraphs **before** the target paragraph.
237
+ – ``None`` means *unlimited* (all preceding paragraphs).
238
+ – ``0`` means *no* paragraphs before the target.
239
+ • The target paragraph itself.
240
+ • Up to *max_after* paragraphs **after** the target paragraph (``0`` means
241
+ none).
242
+
243
+ If *target_idx* is located on a blank line, the function treats the **next**
244
+ paragraph as the target. Requests that are out-of-bounds or that point
245
+ past the last paragraph return an empty string instead of raising. The
246
+ original line formatting (including Markdown, punctuation, etc.) is
247
+ preserved.
248
+ """
249
+ if (
250
+ target_idx < 0
251
+ or (max_before is not None and max_before < 0)
252
+ or max_after < 0
253
+ ): # pragma: no cover
254
+ raise ValueError(
255
+ f"{target_idx=}, {max_before=}, and {max_after=} must be non-negative"
256
+ )
257
+
258
+ paragraphs, paragraph_starts = _parse_paragraphs(lines)
259
+ par_idx = _find_target_paragraph(
260
+ lines, target_idx, paragraphs, paragraph_starts
261
+ )
262
+
263
+ if par_idx is None:
264
+ return ""
265
+
266
+ if max_before is None:
267
+ start_idx = 0
268
+ elif max_before == 0:
269
+ start_idx = par_idx
270
+ else:
271
+ start_idx = max(0, par_idx - max_before)
272
+
273
+ end_idx = min(len(paragraphs), par_idx + max_after + 1)
274
+
275
+ snippet_lines: list[str] = []
276
+ for para in paragraphs[start_idx:end_idx]:
277
+ snippet_lines.extend(para)
278
+ snippet_lines.append("")
279
+
280
+ return "\n".join(snippet_lines).strip()
281
+
282
+
283
+ @dataclass(slots=True)
284
+ class AltGenerationResult:
285
+ """Container for AI-generated alt text suggestions."""
286
+
287
+ markdown_file: str
288
+ asset_path: str
289
+ suggested_alt: str
290
+ model: str
291
+ context_snippet: str
292
+ line_number: int
293
+ final_alt: str | None = None
294
+
295
+ def to_json(self) -> dict[str, object]:
296
+ """Convert to JSON-serializable dict."""
297
+ return asdict(self)
298
+
299
+
300
+ class AltGenerationError(Exception):
301
+ """Raised when caption generation fails."""
302
+
303
+
304
+ def _convert_avif_to_png(asset_path: Path, workspace: Path) -> Path:
305
+ """Convert AVIF images to PNG format for LLM compatibility."""
306
+ if asset_path.suffix.lower() != ".avif":
307
+ return asset_path
308
+
309
+ png_target = workspace / f"{asset_path.stem}.png"
310
+ magick_executable = find_executable("magick")
311
+
312
+ try:
313
+ subprocess.run(
314
+ [magick_executable, str(asset_path), str(png_target)],
315
+ check=True,
316
+ capture_output=True,
317
+ text=True,
318
+ )
319
+ return png_target
320
+ except subprocess.CalledProcessError as err:
321
+ raise AltGenerationError(
322
+ f"Failed to convert AVIF to PNG: {err.stderr or err.stdout}"
323
+ ) from err
324
+
325
+
326
+ def _convert_gif_to_mp4(asset_path: Path, workspace: Path) -> Path:
327
+ """Convert GIF files to MP4 format for LLM compatibility."""
328
+ if asset_path.suffix.lower() != ".gif":
329
+ raise ValueError(f"Unsupported file type '{asset_path.suffix}'.")
330
+
331
+ mp4_target = workspace / f"{asset_path.stem}.mp4"
332
+ ffmpeg_executable = find_executable("ffmpeg")
333
+
334
+ try:
335
+ subprocess.run(
336
+ [
337
+ ffmpeg_executable,
338
+ "-i",
339
+ str(asset_path),
340
+ "-vf",
341
+ "scale=trunc(iw/2)*2:trunc(ih/2)*2",
342
+ "-y",
343
+ str(mp4_target),
344
+ ],
345
+ check=True,
346
+ capture_output=True,
347
+ text=True,
348
+ timeout=30,
349
+ )
350
+ return mp4_target
351
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as err:
352
+ raise AltGenerationError(
353
+ f"Failed to convert GIF to MP4: {err}"
354
+ ) from err
355
+
356
+
357
+ def _convert_asset_for_llm(asset_path: Path, workspace: Path) -> Path:
358
+ """Converts asset to a format compatible with the LLM if needed."""
359
+ if asset_path.suffix.lower() == ".avif":
360
+ return _convert_avif_to_png(asset_path, workspace)
361
+ if asset_path.suffix.lower() == ".gif":
362
+ return _convert_gif_to_mp4(asset_path, workspace)
363
+ return asset_path
364
+
365
+
366
+ def download_asset(queue_item: "scan.QueueItem", workspace: Path) -> Path:
367
+ """Download or locate asset file, returning path to accessible copy."""
368
+ asset_path = queue_item.asset_path
369
+
370
+ if is_url(asset_path):
371
+ headers = {
372
+ "User-Agent": (
373
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
374
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
375
+ "Chrome/91.0.4472.124 Safari/537.36"
376
+ )
377
+ }
378
+ response = requests.get(
379
+ asset_path, timeout=20, stream=True, headers=headers
380
+ )
381
+ response.raise_for_status()
382
+ suffix = Path(urlparse(asset_path).path).suffix or ".bin"
383
+ target = workspace / f"asset{suffix}"
384
+ with target.open("wb") as handle:
385
+ for chunk in response.iter_content(chunk_size=8192):
386
+ handle.write(chunk)
387
+ return _convert_asset_for_llm(target, workspace)
388
+
389
+ # Try relative to markdown file first
390
+ markdown_path = Path(queue_item.markdown_file)
391
+ candidate = markdown_path.parent / asset_path
392
+ if candidate.exists():
393
+ return _convert_asset_for_llm(candidate.resolve(), workspace)
394
+
395
+ # Try relative to git root
396
+ git_root = get_git_root()
397
+ alternative = git_root / asset_path.lstrip("/")
398
+ if alternative.exists():
399
+ return _convert_asset_for_llm(alternative.resolve(), workspace)
400
+
401
+ raise FileNotFoundError(
402
+ f"Unable to locate asset '{asset_path}' referenced in {queue_item.markdown_file}"
403
+ )
404
+
405
+
406
+ def generate_article_context(
407
+ queue_item: "scan.QueueItem",
408
+ max_before: int | None = None,
409
+ max_after: int = 2,
410
+ trim_frontmatter: bool = False,
411
+ ) -> str:
412
+ """Generate context with all preceding paragraphs and 2 after for LLM
413
+ prompts."""
414
+ markdown_path = Path(queue_item.markdown_file)
415
+ source_text = markdown_path.read_text(encoding="utf-8")
416
+ source_lines = source_text.splitlines()
417
+
418
+ # Convert from 1-based line number to 0-based index
419
+ line_number_to_pass = queue_item.line_number - 1
420
+ lines_to_show = source_lines
421
+
422
+ if trim_frontmatter:
423
+ # Try to split YAML frontmatter and get content only
424
+ _, split_content = split_yaml(markdown_path, verbose=False)
425
+
426
+ # If frontmatter found, use content without frontmatter
427
+ if split_content.strip():
428
+ lines_to_show = split_content.splitlines()
429
+ num_frontmatter_lines = len(source_lines) - len(lines_to_show)
430
+ line_number_to_pass = (
431
+ queue_item.line_number - 1 - num_frontmatter_lines
432
+ )
433
+
434
+ return paragraph_context(
435
+ lines_to_show,
436
+ line_number_to_pass,
437
+ max_before=max_before,
438
+ max_after=max_after,
439
+ )
440
+
441
+
442
+ def build_prompt(
443
+ queue_item: "scan.QueueItem",
444
+ max_chars: int,
445
+ ) -> str:
446
+ """Build prompt for LLM caption generation."""
447
+ base_prompt = textwrap.dedent(
448
+ """
449
+ Generate concise alt text for accessibility and SEO.
450
+ Describe the intended information of the image clearly and accurately.
451
+ """
452
+ ).strip()
453
+
454
+ article_context = generate_article_context(
455
+ queue_item, trim_frontmatter=False
456
+ )
457
+ main_prompt = textwrap.dedent(
458
+ f"""
459
+ Context from {queue_item.markdown_file}:
460
+ {article_context}
461
+
462
+ Critical requirements:
463
+ - Under {max_chars} characters (aim for 1-2 sentences when possible)
464
+ - Do not include redundant information (e.g. "image of", "picture of", "diagram illustrating", "a diagram of")
465
+ - Return only the alt text, no quotes
466
+ - For text-heavy images: transcribe key text content, then describe visual elements
467
+ - Don't reintroduce acronyms
468
+ - Don't describe purely visual elements unless directly relevant for
469
+ understanding the content (e.g. don't say "the line in this scientific chart is green")
470
+ - Describe spatial relationships and visual hierarchy when important
471
+
472
+ Prioritize completeness over brevity - include both textual content and visual description as needed.
473
+ While thinking quietly, propose a candidate alt text. Then critique the candidate alt text—
474
+ does it accurately describe the information the image is meant to convey?
475
+ Incorporate the critique into the alt text to improve it. Only output the improved alt text.
476
+ """
477
+ ).strip()
478
+
479
+ return f"{base_prompt}\n{main_prompt}"
480
+
481
+
482
+ def load_existing_captions(captions_path: Path) -> set[str]:
483
+ """Load existing asset paths from captions file."""
484
+ try:
485
+ with open(captions_path, encoding="utf-8") as f:
486
+ data = json.load(f)
487
+ return {item["asset_path"] for item in data if "asset_path" in item}
488
+ except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
489
+ return set()
490
+
491
+
492
+ def write_output(
493
+ results: Iterable[AltGenerationResult],
494
+ output_path: Path,
495
+ append_mode: bool = False,
496
+ ) -> None:
497
+ """Write results to JSON file."""
498
+ payload = [result.to_json() for result in results]
499
+
500
+ if append_mode and output_path.exists():
501
+ # Load existing data and append new results
502
+ try:
503
+ with open(output_path, encoding="utf-8") as f:
504
+ existing_data = json.load(f)
505
+ if isinstance(existing_data, list):
506
+ payload = existing_data + payload
507
+ except (json.JSONDecodeError, TypeError):
508
+ # If existing file is corrupted, just use new data
509
+ print(f"Existing file {output_path} is corrupted, using new data")
510
+
511
+ print(f"Writing {len(payload)} results to {output_path}")
512
+ output_path.write_text(
513
+ json.dumps(payload, indent=2, ensure_ascii=False),
514
+ encoding="utf-8",
515
+ )
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: alt-text-llm
3
+ Version: 0.1.0
4
+ Summary: AI-powered alt text generation and labeling tools for markdown content
5
+ Author: TurnTrout
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/alexander-turner/alt-text-llm
8
+ Keywords: alt-text,accessibility,markdown,llm,ai
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: gitpython
13
+ Requires-Dist: requests
14
+ Requires-Dist: ruamel.yaml
15
+ Requires-Dist: markdown-it-py
16
+ Requires-Dist: rich
17
+ Requires-Dist: tqdm
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest; extra == "dev"
20
+ Requires-Dist: mypy; extra == "dev"
21
+ Requires-Dist: types-requests; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # alt-text-llm
25
+
26
+ AI-powered alt text generation and labeling tools for markdown content. Originally developed for [my website](https://turntrout.com/design) ([repo](https://github.com/alexander-turner/TurnTrout.com)).
27
+
28
+ ## Installation
29
+
30
+ ### Quick install from GitHub
31
+
32
+ ```bash
33
+ pip install git+https://github.com/alexander-turner/alt-text-llm.git
34
+ ```
35
+
36
+ ### Automated setup (includes system dependencies)
37
+
38
+ ```bash
39
+ git clone https://github.com/alexander-turner/alt-text-llm.git
40
+ cd alt-text-llm
41
+ ./setup.sh
42
+ ```
43
+
44
+ ## Prerequisites
45
+
46
+ The following command-line tools must be installed:
47
+
48
+ - **`llm`** - LLM interface ([install instructions](https://llm.datasette.io/))
49
+ - **`git`** - Version control
50
+ - **`magick`** (ImageMagick) - Image processing
51
+ - **`ffmpeg`** - Video processing
52
+ - **`imgcat`** - Terminal image display
53
+
54
+ **macOS:**
55
+
56
+ ```bash
57
+ brew install imagemagick ffmpeg imgcat
58
+ pip install llm
59
+ ```
60
+
61
+ **Linux:**
62
+
63
+ ```bash
64
+ sudo apt-get install imagemagick ffmpeg
65
+ pip install llm
66
+ # imgcat: curl -sL https://iterm2.com/utilities/imgcat -o ~/.local/bin/imgcat && chmod +x ~/.local/bin/imgcat
67
+ ```
68
+
69
+ ## Usage
70
+
71
+ The tool provides three main commands: `scan`, `generate`, and `label`.
72
+
73
+ ### 1. Scan for missing alt text
74
+
75
+ Scan your markdown files to find images without meaningful alt text:
76
+
77
+ ```bash
78
+ alt-text-llm scan --root /path/to/markdown/files
79
+ ```
80
+
81
+ This creates `asset_queue.json` with all assets needing alt text.
82
+
83
+ ### 2. Generate AI suggestions
84
+
85
+ Generate alt text suggestions using an LLM:
86
+
87
+ ```bash
88
+ alt-text-llm generate \
89
+ --root /path/to/markdown/files \
90
+ --model gemini-2.5-flash \
91
+ --suggestions-file suggested_alts.json
92
+ ```
93
+
94
+ **Available options:**
95
+
96
+ - `--model` (required) - LLM model to use (e.g., `gemini-2.5-flash`, `gpt-4o-mini`, `claude-3-5-sonnet`)
97
+ - `--max-chars` - Maximum characters for alt text (default: 300)
98
+ - `--timeout` - LLM timeout in seconds (default: 120)
99
+ - `--estimate-only` - Only show cost estimate without generating
100
+ - `--process-existing` - Also process assets that already have captions
101
+
102
+ **Cost estimation:**
103
+
104
+ ```bash
105
+ alt-text-llm generate \
106
+ --root /path/to/markdown/files \
107
+ --model gemini-2.5-flash \
108
+ --estimate-only
109
+ ```
110
+
111
+ ### 3. Label and approve suggestions
112
+
113
+ Interactively review and approve the AI-generated suggestions:
114
+
115
+ ```bash
116
+ alt-text-llm label \
117
+ --suggestions-file suggested_alts.json \
118
+ --output asset_captions.json
119
+ ```
120
+
121
+ **Interactive commands:**
122
+
123
+ - Edit the suggested alt text (vim keybindings enabled)
124
+ - Press Enter to accept the suggestion as-is
125
+ - Submit `undo` or `u` to go back to the previous item
126
+ - Images display in your terminal (requires `imgcat`)
127
+
128
+ ## Example workflow
129
+
130
+ ```bash
131
+ # 1. Scan markdown files for missing alt text
132
+ alt-text-llm scan --root ./content
133
+
134
+ # 2. Estimate the cost
135
+ alt-text-llm generate \
136
+ --root ./content \
137
+ --model gemini-2.5-flash \
138
+ --estimate-only
139
+
140
+ # 3. Generate suggestions (if cost is acceptable)
141
+ alt-text-llm generate \
142
+ --root ./content \
143
+ --model gemini-2.5-flash
144
+
145
+ # 4. Review and approve suggestions
146
+ alt-text-llm label
147
+ ```
148
+
149
+ ## Configuration
150
+
151
+ ### LLM Integration
152
+
153
+ This tool uses the [`llm` CLI tool](https://llm.datasette.io/) to generate alt text. This provides access to many different AI models including:
154
+
155
+ - **Gemini** (Google) via the [llm-gemini plugin](https://github.com/simonw/llm-gemini)
156
+ - **Claude** (Anthropic) via the [llm-claude-3 plugin](https://github.com/tomviner/llm-claude-3)
157
+ - And [many more via plugins](https://llm.datasette.io/en/stable/plugins/directory.html)
158
+
159
+ ### Setting up your model
160
+
161
+ **For Gemini models (default):**
162
+
163
+ ```bash
164
+ llm install llm-gemini
165
+ llm keys set gemini # enter API key
166
+ llm -m gemini-2.5-flash "Hello, world!"
167
+ ```
168
+
169
+ **For other models:**
170
+
171
+ 1. Install the appropriate llm plugin (e.g., `llm install llm-openai`)
172
+ 2. Configure your API key (e.g., `llm keys set openai`)
173
+ 3. Use the model name with `--model` flag (e.g., `--model gpt-4o-mini`)
174
+
175
+ See the [llm documentation](https://llm.datasette.io/en/stable/setup.html) for setup instructions and the [plugin directory](https://llm.datasette.io/en/stable/plugins/directory.html) for available models.
176
+
177
+ ## Output files
178
+
179
+ - `asset_queue.json` - Queue of assets needing alt text (from `scan`)
180
+ - `suggested_alts.json` - AI-generated suggestions (from `generate`)
181
+ - `asset_captions.json` - Approved final captions (from `label`)
@@ -0,0 +1,12 @@
1
+ alt_text_llm/__init__.py,sha256=vkNaW0Zx2C7JtXD9nG7NHFWBFYqYZ_iECgRtdJP4f5A,222
2
+ alt_text_llm/generate.py,sha256=dYLQMzF9qS4cNoyH4v4_mIZZa2bWeqoVpXYBnw2zlu0,6550
3
+ alt_text_llm/label.py,sha256=XvPINQfW-NFcxTbaa0rdaVKK2P6gE6UqrnIEDXV8T5k,11295
4
+ alt_text_llm/main.py,sha256=CQsRnwP2u2Jca4Kdj73DBntjYND_OUd1nkKxHv4qwQs,7146
5
+ alt_text_llm/scan.py,sha256=fOhfJb5rKLQejFaj1iCAu0vrqIe_bKx08jkeYXFGd-E,6233
6
+ alt_text_llm/utils.py,sha256=4xMFXviMvVB4XXZdMN-VeUB1TefdjpNpWQsWVBYCWMA,16418
7
+ alt_text_llm-0.1.0.dist-info/licenses/LICENSE,sha256=VCpqtaN5u5ulLyhFHpAIKHfYLkMYubaYtpK2m1Bss6c,1085
8
+ alt_text_llm-0.1.0.dist-info/METADATA,sha256=MYgTZlNC_6a9br6fLi19DWcEamB-ahXIk2vkR_UVLHg,4978
9
+ alt_text_llm-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ alt_text_llm-0.1.0.dist-info/entry_points.txt,sha256=SQyNVYF_LXPoleopqGrZOyR878rKcmGtUS9gIhNLRpY,56
11
+ alt_text_llm-0.1.0.dist-info/top_level.txt,sha256=SJh1xf4GM9seHJryaePMI469CUtALg30wM22vUIqnw4,13
12
+ alt_text_llm-0.1.0.dist-info/RECORD,,