PyPI - alt-text-llm - Versions diffs - 0.1.0__py3-none-any.whl - Mend

alt-text-llm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alt-text-llm might be problematic. Click here for more details.

Files changed (12) hide show

alt_text_llm/__init__.py +13 -0
alt_text_llm/generate.py +208 -0
alt_text_llm/label.py +347 -0
alt_text_llm/main.py +235 -0
alt_text_llm/scan.py +219 -0
alt_text_llm/utils.py +515 -0
alt_text_llm-0.1.0.dist-info/METADATA +181 -0
alt_text_llm-0.1.0.dist-info/RECORD +12 -0
alt_text_llm-0.1.0.dist-info/WHEEL +5 -0
alt_text_llm-0.1.0.dist-info/entry_points.txt +2 -0
alt_text_llm-0.1.0.dist-info/licenses/LICENSE +21 -0
alt_text_llm-0.1.0.dist-info/top_level.txt +1 -0

alt_text_llm/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""AI-powered alt text generation and labeling tools."""
+__version__ = "0.1.0"
+from alt_text_llm import generate, label, main, scan, utils
+__all__ = [
+    "generate",
+    "label",
+    "main",
+    "scan",
+    "utils",
+]

alt_text_llm/generate.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""Generate AI alt text suggestions for assets lacking meaningful alt text."""
+import asyncio
+import shutil
+import subprocess
+import tempfile
+import warnings
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Sequence
+from rich.console import Console
+from tqdm.rich import tqdm
+from tqdm.std import TqdmExperimentalWarning
+from alt_text_llm import scan, utils
+warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
+# Approximate cost estimates per 1000 tokens (as of Sep 2025)
+MODEL_COSTS = {
+    # https://www.helicone.ai/llm-cost
+    "gemini-2.5-pro": {"input": 0.00125, "output": 0.01},
+    "gemini-2.5-flash": {"input": 0.0003, "output": 0.0025},
+    "gemini-2.5-flash-lite": {"input": 0.00001, "output": 0.00004},
+    # https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/?ref=testingcatalog.com
+    "gemini-2.5-flash-lite-preview-09-2025": {
+        "input": 0.00001,
+        "output": 0.00004,
+    },
+    "gemini-2.5-flash-preview-09-2025": {"input": 0.00001, "output": 0.00004},
+}
+def _run_llm(
+    attachment: Path,
+    prompt: str,
+    model: str,
+    timeout: int,
+) -> str:
+    """Execute LLM command and return generated caption."""
+    llm_path = utils.find_executable("llm")
+    result = subprocess.run(
+        [llm_path, "-m", model, "-a", str(attachment), "--usage", prompt],
+        check=False,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    if result.returncode != 0:
+        error_output = result.stderr.strip() or result.stdout.strip()
+        raise utils.AltGenerationError(
+            f"Caption generation failed for {attachment}: {error_output}"
+        )
+    cleaned = result.stdout.strip()
+    if not cleaned:
+        raise utils.AltGenerationError("LLM returned empty caption")
+    return cleaned
+@dataclass(slots=True)
+class GenerateAltTextOptions:
+    """Options for generating alt text."""
+    root: Path
+    model: str
+    max_chars: int
+    timeout: int
+    output_path: Path
+    skip_existing: bool = False
+def estimate_cost(
+    model: str,
+    queue_count: int,
+    avg_prompt_tokens: int = 4500,
+    avg_output_tokens: int = 1500,
+) -> str:
+    """Estimate the cost of processing the queue with the given model."""
+    # Normalize model name for cost lookup
+    model_lower = model.lower()
+    if model_lower in MODEL_COSTS:
+        cost_info = MODEL_COSTS[model_lower]
+    else:
+        return f"Can't estimate cost for unknown model: {model}. Available models: {MODEL_COSTS.keys()}"
+    # Calculate costs
+    input_cost = (avg_prompt_tokens * queue_count / 1000) * cost_info["input"]
+    output_cost = (avg_output_tokens * queue_count / 1000) * cost_info[
+        "output"
+    ]
+    total_cost = input_cost + output_cost
+    return f"Estimated cost: ${total_cost:.3f} (${input_cost:.3f} input + ${output_cost:.3f} output)"
+def filter_existing_captions(
+    queue_items: Sequence["scan.QueueItem"],
+    output_paths: Sequence[Path],
+    console: Console,
+    verbose: bool = True,
+) -> list["scan.QueueItem"]:
+    """Filter out items that already have captions in the output paths."""
+    existing_captions = set()
+    for output_path in output_paths:
+        existing_captions.update(utils.load_existing_captions(output_path))
+    original_count = len(queue_items)
+    filtered_items = [
+        item
+        for item in queue_items
+        if item.asset_path not in existing_captions
+    ]
+    skipped_count = original_count - len(filtered_items)
+    if skipped_count > 0 and verbose:
+        console.print(
+            f"[dim]Skipped {skipped_count} items with existing captions[/dim]"
+        )
+    return filtered_items
+# ---------------------------------------------------------------------------
+# Async helpers for parallel LLM calls
+# ---------------------------------------------------------------------------
+_CONCURRENCY_LIMIT = 32
+async def _run_llm_async(
+    queue_item: "scan.QueueItem",
+    options: GenerateAltTextOptions,
+    sem: asyncio.Semaphore,
+) -> utils.AltGenerationResult:
+    """Download asset, run LLM in a thread; clean up; return suggestion
+    payload."""
+    workspace = Path(tempfile.mkdtemp())
+    try:
+        async with sem:
+            attachment = await asyncio.to_thread(
+                utils.download_asset, queue_item, workspace
+            )
+            prompt = utils.build_prompt(queue_item, options.max_chars)
+            caption = await asyncio.to_thread(
+                _run_llm,
+                attachment,
+                prompt,
+                options.model,
+                options.timeout,
+            )
+        return utils.AltGenerationResult(
+            markdown_file=queue_item.markdown_file,
+            asset_path=queue_item.asset_path,
+            suggested_alt=caption,
+            model=options.model,
+            context_snippet=queue_item.context_snippet,
+            line_number=queue_item.line_number,
+        )
+    finally:
+        shutil.rmtree(workspace, ignore_errors=True)
+async def async_generate_suggestions(
+    queue_items: Sequence["scan.QueueItem"],
+    options: GenerateAltTextOptions,
+) -> list[utils.AltGenerationResult]:
+    """Generate suggestions concurrently for *queue_items*."""
+    sem = asyncio.Semaphore(_CONCURRENCY_LIMIT)
+    tasks: list[asyncio.Task[utils.AltGenerationResult]] = []
+    for qi in queue_items:
+        tasks.append(
+            asyncio.create_task(
+                _run_llm_async(
+                    qi,
+                    options,
+                    sem,
+                )
+            )
+        )
+    task_count = len(tasks)
+    if task_count == 0:
+        return []
+    suggestions: list[utils.AltGenerationResult] = []
+    with tqdm(total=task_count, desc="Generating alt text") as progress_bar:
+        try:
+            for finished in asyncio.as_completed(tasks):
+                try:
+                    result = await finished
+                    suggestions.append(result)
+                except (
+                    utils.AltGenerationError,
+                    FileNotFoundError,
+                ) as err:
+                    # Skip individual items that fail (e.g., unsupported file types)
+                    progress_bar.write(f"Skipped item due to error: {err}")
+                progress_bar.update(1)
+        except asyncio.CancelledError:
+            progress_bar.set_description(
+                "Generating alt text (cancelled, finishing up...)"
+            )
+    return suggestions

alt_text_llm/label.py ADDED Viewed

@@ -0,0 +1,347 @@
+"""Interactive labeling interface for alt text suggestions."""
+import json
+import os
+import readline
+import subprocess
+from dataclasses import replace
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Sequence
+import sys
+import requests
+from rich.box import ROUNDED
+from rich.console import Console
+from rich.markdown import Markdown
+from rich.panel import Panel
+from alt_text_llm import scan, utils
+UNDO_REQUESTED = "UNDO_REQUESTED"
+class LabelingSession:
+    """Manages the labeling session state and navigation."""
+    def __init__(
+        self, suggestions: Sequence[utils.AltGenerationResult]
+    ) -> None:
+        self.suggestions = suggestions
+        self.current_index = 0
+        self.processed_results: list[utils.AltGenerationResult] = []
+    def can_undo(self) -> bool:
+        """Check if undo is possible."""
+        return len(self.processed_results) > 0
+    def undo(self) -> utils.AltGenerationResult | None:
+        """Undo the last processed result and return to previous item."""
+        if not self.can_undo():
+            return None
+        undone_result = self.processed_results.pop()
+        self.current_index = max(0, self.current_index - 1)
+        return undone_result
+    def add_result(self, result: utils.AltGenerationResult) -> None:
+        """Add a processed result and advance to next item."""
+        self.processed_results.append(result)
+        self.current_index += 1
+    def get_current_suggestion(self) -> utils.AltGenerationResult | None:
+        """Get the current suggestion to process."""
+        if self.current_index >= len(self.suggestions):
+            return None
+        return self.suggestions[self.current_index]
+    def is_complete(self) -> bool:
+        """Check if all suggestions have been processed."""
+        return self.current_index >= len(self.suggestions)
+    def get_progress(self) -> tuple[int, int]:
+        """Get current position and total count."""
+        return self.current_index + 1, len(self.suggestions)
+    def skip_current(self) -> None:
+        """Skip the current suggestion due to error and advance index."""
+        self.current_index += 1
+class DisplayManager:
+    """Handles rich console display operations."""
+    def __init__(self, console: Console, vi_mode: bool = False) -> None:
+        self.console = console
+        self.vi_mode = vi_mode
+    def show_context(self, queue_item: "scan.QueueItem") -> None:
+        """Display context information for the queue item."""
+        context = utils.generate_article_context(
+            queue_item, max_before=4, max_after=1, trim_frontmatter=True
+        )
+        rendered_context = Markdown(context)
+        basename = Path(queue_item.markdown_file).name
+        self.console.print(
+            Panel(
+                rendered_context,
+                title="Context",
+                subtitle=f"{basename}:{queue_item.line_number}",
+                box=ROUNDED,
+            )
+        )
+    def show_image(self, path: Path) -> None:
+        """Display the image using imgcat."""
+        if "TMUX" in os.environ:
+            raise ValueError("Cannot open image in tmux")
+        try:
+            subprocess.run(["imgcat", str(path)], check=True)
+        except subprocess.CalledProcessError as err:
+            raise ValueError(
+                f"Failed to open image: {err}; is imgcat installed?"
+            ) from err
+    def show_progress(self, current: int, total: int) -> None:
+        """Display progress information."""
+        progress_text = (
+            f"Progress: {current}/{total} ({(current-1)/total*100:.1f}%)"
+        )
+        self.console.print(f"[dim]{progress_text}[/dim]")
+    def prompt_for_edit(
+        self,
+        suggestion: str,
+        current: int | None = None,
+        total: int | None = None,
+    ) -> str:
+        """Prompt user to edit the suggestion with prefilled editable text."""
+        # Show progress if provided
+        if current is not None and total is not None:
+            self.show_progress(current, total)
+        # Enable vim keybindings for readline if requested
+        if self.vi_mode:
+            readline.parse_and_bind("set editing-mode vi")
+        readline.set_startup_hook(lambda: readline.insert_text(suggestion))
+        self.console.print(
+            "\n[bold blue]Edit alt text (or press Enter to accept, 'undo' to go back):[/bold blue]"
+        )
+        result = input("> ")
+        readline.set_startup_hook(None)
+        # Check for undo command
+        if result.strip().lower() in ("undo", "u"):
+            return UNDO_REQUESTED
+        return result if result.strip() else suggestion
+    def show_rule(self, title: str) -> None:
+        """Display a separator rule."""
+        self.console.rule(title)
+    def show_error(self, error_message: str) -> None:
+        """Display error message."""
+        self.console.print(
+            Panel(
+                error_message,
+                title="Alt generation error",
+                box=ROUNDED,
+                style="red",
+            )
+        )
+def _process_single_suggestion_for_labeling(
+    suggestion_data: utils.AltGenerationResult,
+    display: DisplayManager,
+    current: int | None = None,
+    total: int | None = None,
+) -> utils.AltGenerationResult:
+    # Recreate queue item for display
+    queue_item = scan.QueueItem(
+        markdown_file=suggestion_data.markdown_file,
+        asset_path=suggestion_data.asset_path,
+        line_number=suggestion_data.line_number,
+        context_snippet=suggestion_data.context_snippet,
+    )
+    # Download asset for display
+    with TemporaryDirectory() as temp_dir:
+        workspace = Path(temp_dir)
+        attachment = utils.download_asset(queue_item, workspace)
+        # Display results
+        display.show_rule(queue_item.asset_path)
+        display.show_context(queue_item)
+        display.show_image(attachment)
+        # Allow user to edit the suggestion
+        prefill_text = (
+            suggestion_data.final_alt
+            if suggestion_data.final_alt is not None
+            else suggestion_data.suggested_alt
+        )
+        final_alt = prefill_text
+        if sys.stdout.isatty():
+            final_alt = display.prompt_for_edit(prefill_text, current, total)
+        return utils.AltGenerationResult(
+            markdown_file=suggestion_data.markdown_file,
+            asset_path=suggestion_data.asset_path,
+            suggested_alt=suggestion_data.suggested_alt,
+            final_alt=final_alt,
+            model=suggestion_data.model,
+            context_snippet=suggestion_data.context_snippet,
+            line_number=suggestion_data.line_number,
+        )
+def _filter_suggestions_by_existing(
+    suggestions: Sequence[utils.AltGenerationResult],
+    output_path: Path,
+    console: Console,
+) -> list[utils.AltGenerationResult]:
+    """Filter out suggestions that already have captions."""
+    existing_captions = utils.load_existing_captions(output_path)
+    filtered = [
+        s for s in suggestions if s.asset_path not in existing_captions
+    ]
+    skipped_count = len(suggestions) - len(filtered)
+    if skipped_count > 0:
+        console.print(
+            f"[dim]Skipped {skipped_count} items with existing captions[/dim]"
+        )
+    return filtered
+def _handle_undo_request(
+    session: LabelingSession,
+    console: Console,
+) -> None:
+    """Handle undo request by reverting to previous suggestion."""
+    undone_result = session.undo()
+    if undone_result is None:
+        console.print("[yellow]Nothing to undo - at the beginning[/yellow]")
+        return
+    console.print(f"[yellow]Undoing: {undone_result.asset_path}[/yellow]")
+    # Prefill with the previous final_alt value
+    prefill_text = (
+        undone_result.final_alt
+        if undone_result.final_alt is not None
+        else undone_result.suggested_alt
+    )
+    session.suggestions[session.current_index] = replace(
+        session.suggestions[session.current_index],
+        final_alt=prefill_text,
+    )
+def _process_labeling_loop(
+    session: LabelingSession,
+    display: DisplayManager,
+    console: Console,
+) -> None:
+    """Process all suggestions in the labeling session."""
+    while not session.is_complete():
+        current_suggestion = session.get_current_suggestion()
+        if current_suggestion is None:
+            break
+        try:
+            current, total = session.get_progress()
+            result = _process_single_suggestion_for_labeling(
+                current_suggestion, display, current=current, total=total
+            )
+            if result.final_alt == UNDO_REQUESTED:
+                _handle_undo_request(session, console)
+            else:
+                session.add_result(result)
+        except (
+            utils.AltGenerationError,
+            FileNotFoundError,
+            requests.RequestException,
+        ) as err:
+            display.show_error(str(err))
+            session.skip_current()
+def label_suggestions(
+    suggestions: Sequence[utils.AltGenerationResult],
+    console: Console,
+    output_path: Path,
+    append_mode: bool,
+    vi_mode: bool = False,
+) -> int:
+    """Load suggestions and allow user to label them, collecting results."""
+    console.print(
+        f"\n[bold blue]Labeling {len(suggestions)} suggestions[/bold blue]\n"
+    )
+    suggestions_to_process = (
+        _filter_suggestions_by_existing(suggestions, output_path, console)
+        if append_mode
+        else suggestions
+    )
+    session = LabelingSession(suggestions_to_process)
+    display = DisplayManager(console, vi_mode=vi_mode)
+    try:
+        _process_labeling_loop(session, display, console)
+    finally:
+        if session.processed_results:
+            utils.write_output(
+                session.processed_results, output_path, append_mode=append_mode
+            )
+            console.print(
+                f"[green]Saved {len(session.processed_results)} results to {output_path}[/green]"
+            )
+    return len(session.processed_results)
+def label_from_suggestions_file(
+    suggestions_file: Path,
+    output_path: Path,
+    skip_existing: bool = False,
+    vi_mode: bool = False,
+) -> None:
+    """Load suggestions from file and start labeling process."""
+    console = Console()
+    with open(suggestions_file, encoding="utf-8") as f:
+        suggestions_from_file = json.load(f)
+    # Convert loaded data to AltGenerationResult, filtering out extra fields
+    suggestions: list[utils.AltGenerationResult] = []
+    for s in suggestions_from_file:
+        filtered_data = {
+            "markdown_file": s["markdown_file"],
+            "asset_path": s["asset_path"],
+            "suggested_alt": s["suggested_alt"],
+            "model": s["model"],
+            "context_snippet": s["context_snippet"],
+            "line_number": int(s["line_number"]),
+        }
+        suggestions.append(utils.AltGenerationResult(**filtered_data))
+    console.print(
+        f"[green]Loaded {len(suggestions)} suggestions from {suggestions_file}[/green]"
+    )
+    processed_count = label_suggestions(
+        suggestions, console, output_path, skip_existing, vi_mode
+    )
+    # Write final results
+    console.print(
+        f"\n[green]Completed! Wrote {processed_count} results to {output_path}[/green]"
+    )