PyPI - argus-code - Versions diffs - 0.2.0__py3-none-any.whl - Mend

argus-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

argus/__init__.py +3 -0
argus/adapters/__init__.py +7 -0
argus/adapters/base.py +108 -0
argus/adapters/claude_code/__init__.py +5 -0
argus/adapters/claude_code/adapter.py +63 -0
argus/adapters/claude_code/discover.py +72 -0
argus/adapters/claude_code/extract_tool_calls.py +86 -0
argus/adapters/claude_code/extract_transcript.py +111 -0
argus/adapters/claude_code/extract_turns.py +69 -0
argus/adapters/claude_code/history_jsonl.py +138 -0
argus/adapters/claude_code/ingest_file.py +137 -0
argus/adapters/claude_code/model.py +11 -0
argus/adapters/claude_code/schemas.py +77 -0
argus/adapters/registry.py +30 -0
argus/cli.py +384 -0
argus/collector/__init__.py +0 -0
argus/collector/aggregate.py +102 -0
argus/collector/first_run.py +189 -0
argus/collector/pipeline.py +140 -0
argus/collector/rollup_subagents.py +27 -0
argus/collector/scheduler.py +89 -0
argus/collector/search_backfill.py +109 -0
argus/collector/watcher.py +178 -0
argus/dashboard-dist/_astro/charts.BIevw6Es.js +1 -0
argus/dashboard-dist/_astro/format.DxC1NGYT.js +1 -0
argus/dashboard-dist/_astro/index.astro_astro_type_script_index_0_lang.CgwSARdD.js +24 -0
argus/dashboard-dist/_astro/index.astro_astro_type_script_index_0_lang.W18SJsr7.js +11 -0
argus/dashboard-dist/_astro/installCanvasRenderer.D_tC6TXz.js +18 -0
argus/dashboard-dist/_astro/models.astro_astro_type_script_index_0_lang.BHTHXYHC.js +13 -0
argus/dashboard-dist/_astro/prompts.astro_astro_type_script_index_0_lang.DfNgiDv9.js +17 -0
argus/dashboard-dist/_astro/session.astro_astro_type_script_index_0_lang.Dj_bfrIa.js +86 -0
argus/dashboard-dist/_astro/settings.astro_astro_type_script_index_0_lang.d_a-uvdi.js +24 -0
argus/dashboard-dist/_astro/tools.astro_astro_type_script_index_0_lang.Dzzau3Yt.js +12 -0
argus/dashboard-dist/_astro/trends.astro_astro_type_script_index_0_lang.BLLeGRNa.js +5 -0
argus/dashboard-dist/index.html +2 -0
argus/dashboard-dist/models/index.html +1 -0
argus/dashboard-dist/prompts/index.html +18 -0
argus/dashboard-dist/session/index.html +2 -0
argus/dashboard-dist/sessions/index.html +1 -0
argus/dashboard-dist/settings/index.html +8 -0
argus/dashboard-dist/styles/global.css +307 -0
argus/dashboard-dist/tools/index.html +1 -0
argus/dashboard-dist/trends/index.html +1 -0
argus/detectors/__init__.py +6 -0
argus/detectors/base.py +34 -0
argus/detectors/registry.py +20 -0
argus/detectors/tool_error_rate_spike.py +138 -0
argus/pricing/2026-05-02.json +24 -0
argus/pricing/__init__.py +0 -0
argus/pricing/compute.py +46 -0
argus/pricing/load.py +45 -0
argus/pricing/refresh.py +91 -0
argus/pricing/types.py +21 -0
argus/scaffold/__init__.py +0 -0
argus/scaffold/scaffolder.py +45 -0
argus/scaffold/snapshot.py +73 -0
argus/scaffold/storage.py +60 -0
argus/schema/__init__.py +0 -0
argus/schema/types.py +157 -0
argus/server/__init__.py +0 -0
argus/server/api.py +661 -0
argus/server/app.py +97 -0
argus/store/__init__.py +0 -0
argus/store/db.py +103 -0
argus/store/migrations/__init__.py +0 -0
argus/store/migrations/inline.py +180 -0
argus/store/repository.py +778 -0
argus/templates/default/.claude/agents/code-reviewer.md +27 -0
argus/templates/default/.claude/agents/security-auditor.md +28 -0
argus/templates/default/.claude/commands/commit.md +38 -0
argus/templates/default/.claude/commands/deploy.md +13 -0
argus/templates/default/.claude/commands/fix-issue.md +15 -0
argus/templates/default/.claude/commands/pr.md +38 -0
argus/templates/default/.claude/commands/review.md +14 -0
argus/templates/default/.claude/rules/api-conventions.md +27 -0
argus/templates/default/.claude/rules/code-style.md +25 -0
argus/templates/default/.claude/rules/testing.md +19 -0
argus/templates/default/.claude/settings.json +28 -0
argus/templates/default/.claude/skills/example/SKILL.md +11 -0
argus/templates/default/CLAUDE.md +57 -0
argus_code-0.2.0.dist-info/METADATA +247 -0
argus_code-0.2.0.dist-info/RECORD +86 -0
argus_code-0.2.0.dist-info/WHEEL +4 -0
argus_code-0.2.0.dist-info/entry_points.txt +2 -0
argus_code-0.2.0.dist-info/licenses/LICENSE +21 -0
argus_code-0.2.0.dist-info/licenses/NOTICE +22 -0

argus/collector/aggregate.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Build Session and Turn rows from adapter-supplied raw events."""
+from __future__ import annotations
+from datetime import datetime, timezone
+from ..adapters.base import AdapterIngestResult
+from ..pricing.compute import compute_turn_cost
+from ..pricing.types import PricingTable
+from ..schema.types import RawSessionHeader, RawTurnEvent, Session, Turn
+def _iso_now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def build_turn(raw: RawTurnEvent, session_id: str, table: PricingTable) -> Turn:
+    return Turn(
+        id=f"{session_id}:{raw.native_turn_id}",
+        session_id=session_id,
+        sequence=raw.sequence,
+        timestamp=raw.timestamp,
+        model=raw.model,
+        model_raw=raw.model_raw,
+        fresh_input_tokens=raw.fresh_input_tokens,
+        output_tokens=raw.output_tokens,
+        cache_read_tokens=raw.cache_read_tokens,
+        cache_write_tokens=raw.cache_write_tokens,
+        cache_write_5m_tokens=raw.cache_write_5m_tokens,
+        cache_write_1h_tokens=raw.cache_write_1h_tokens,
+        tool_calls_count=raw.tool_calls_count,
+        cost_usd=compute_turn_cost(raw, table),
+        metadata=raw.metadata,
+    )
+def build_session(
+    header: RawSessionHeader,
+    session_id: str,
+    all_turns: list[Turn],
+    pricing_version: str,
+) -> Session:
+    computed_at = _iso_now()
+    fresh = sum(t.fresh_input_tokens for t in all_turns)
+    out = sum(t.output_tokens for t in all_turns)
+    cr = sum(t.cache_read_tokens for t in all_turns)
+    cw = sum(t.cache_write_tokens for t in all_turns)
+    cost = sum(t.cost_usd for t in all_turns)
+    # primary_model = the model with the most (input + output) tokens.
+    model_tokens: dict[str, int] = {}
+    for t in all_turns:
+        model_tokens[t.model] = (
+            model_tokens.get(t.model, 0) + t.fresh_input_tokens + t.output_tokens
+        )
+    primary = (
+        sorted(model_tokens.items(), key=lambda kv: kv[1], reverse=True)[0][0]
+        if model_tokens
+        else "unknown"
+    )
+    started_at = header.started_at or (all_turns[0].timestamp if all_turns else computed_at)
+    ended_at = header.ended_at or (all_turns[-1].timestamp if all_turns else None)
+    duration: int | None = None
+    if ended_at:
+        try:
+            s = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
+            e = datetime.fromisoformat(ended_at.replace("Z", "+00:00"))
+            duration = max(0, int((e - s).total_seconds()))
+        except (ValueError, TypeError):
+            duration = None
+    return Session(
+        id=session_id,
+        agent=header.agent,
+        agent_version=header.agent_version,
+        project_path=header.project_path,
+        started_at=started_at,
+        ended_at=ended_at,
+        duration_sec=duration,
+        total_fresh_input_tokens=fresh,
+        total_output_tokens=out,
+        total_cache_read_tokens=cr,
+        total_cache_write_tokens=cw,
+        total_cost_usd=cost,
+        primary_model=primary,
+        turn_count=len(all_turns),
+        pricing_table_version=pricing_version,
+        computed_at=computed_at,
+        agent_reported_cost_usd=header.agent_reported_cost_usd,
+        metadata=header.metadata,
+    )
+def aggregate_adapter_result(
+    r: AdapterIngestResult, table: PricingTable
+) -> tuple[Session, list[Turn]]:
+    """Backward-compat helper: build a (session, turns) pair from a fresh result."""
+    session_id = f"{r.header.agent}:{r.header.native_session_id}"
+    turns = [build_turn(t, session_id, table) for t in r.turns]
+    session = build_session(r.header, session_id, turns, table.version)
+    return session, turns

argus/collector/first_run.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""First-pass ingest: walk every adapter's files, recent first.
+Recent files run synchronously in the foreground so the dashboard is
+useful immediately. Older files run in a background ThreadPoolExecutor.
+"""
+from __future__ import annotations
+import logging
+import threading
+import time
+from concurrent.futures import Future, ThreadPoolExecutor
+from dataclasses import dataclass
+from pathlib import Path
+from ..adapters.base import Adapter
+from ..pricing.types import PricingTable
+from ..store.repository import Repository
+from .pipeline import ingest_file
+logger = logging.getLogger(__name__)
+@dataclass
+class IngestStatus:
+    foreground_complete: bool
+    pending: int
+    processed: int
+    total: int
+class FirstRunHandle:
+    """Returned by ``run_first_pass_ingest``; exposes foreground/backfill futures."""
+    def __init__(self) -> None:
+        self._processed = 0
+        self._total = 0
+        self._foreground_complete = False
+        self._lock = threading.Lock()
+        self._foreground_done = threading.Event()
+        self._backfill_done = threading.Event()
+    def _inc(self) -> None:
+        with self._lock:
+            self._processed += 1
+    def status(self) -> IngestStatus:
+        with self._lock:
+            return IngestStatus(
+                foreground_complete=self._foreground_complete,
+                pending=max(0, self._total - self._processed),
+                processed=self._processed,
+                total=self._total,
+            )
+    def wait_foreground(self, timeout: float | None = None) -> bool:
+        return self._foreground_done.wait(timeout)
+    def wait_backfill(self, timeout: float | None = None) -> bool:
+        return self._backfill_done.wait(timeout)
+def run_first_pass_ingest(
+    adapters: list[Adapter],
+    repo: Repository,
+    table: PricingTable,
+    *,
+    recent_days: int = 30,
+) -> FirstRunHandle:
+    """Kick off ingest. Recent files run inline; older files in a thread.
+    Returns immediately with a handle whose ``status()`` is pollable, and
+    whose ``wait_foreground()`` / ``wait_backfill()`` block until each
+    phase finishes.
+    """
+    cutoff = time.time() - recent_days * 86_400
+    handle = FirstRunHandle()
+    # Phase 1 (foreground, sync, in the calling thread).
+    recent: list[tuple[Adapter, Path]] = []
+    older: list[tuple[Adapter, Path]] = []
+    for a in adapters:
+        for f in a.discover_session_files():
+            try:
+                mtime = f.stat().st_mtime
+            except OSError:
+                continue
+            (recent if mtime >= cutoff else older).append((a, f))
+    with handle._lock:
+        handle._total = len(recent) + len(older)
+    for adapter, file in recent:
+        try:
+            ingest_file(adapter, file, repo, table)
+        except Exception as e:  # noqa: BLE001
+            repo.record_parse_error(
+                {
+                    "file": str(file),
+                    "byte_offset": -1,
+                    "reason": f"[ingest] {e}",
+                    "raw_line_truncated": "",
+                }
+            )
+        handle._inc()
+    # Also ingest adapter-specific extras (e.g., history.jsonl) during the
+    # foreground phase so they're available to the dashboard immediately.
+    for a in adapters:
+        for extra in a.extra_watch_paths():
+            try:
+                a.ingest_extra(extra, repo)
+            except Exception as e:  # noqa: BLE001
+                repo.record_parse_error(
+                    {
+                        "file": str(extra),
+                        "byte_offset": -1,
+                        "reason": f"[history] {e}",
+                        "raw_line_truncated": "",
+                    }
+                )
+    with handle._lock:
+        handle._foreground_complete = True
+    handle._foreground_done.set()
+    # Phase 2 (background) — older files + missing-data backfill.
+    def _background() -> None:
+        for adapter, file in older:
+            try:
+                ingest_file(adapter, file, repo, table)
+            except Exception as e:  # noqa: BLE001
+                repo.record_parse_error(
+                    {
+                        "file": str(file),
+                        "byte_offset": -1,
+                        "reason": f"[ingest] {e}",
+                        "raw_line_truncated": "",
+                    }
+                )
+            handle._inc()
+        _backfill_missing_derived_data(adapters, repo, table)
+        handle._backfill_done.set()
+    threading.Thread(target=_background, name="argus-firstrun-bg", daemon=True).start()
+    return handle
+def _backfill_missing_derived_data(
+    adapters: list[Adapter], repo: Repository, table: PricingTable
+) -> None:
+    """Re-ingest sessions missing tool_calls / segments after a slice upgrade."""
+    missing_tools = repo.sessions_missing_tool_calls(200)
+    ids: set[str] = {c["id"] for c in missing_tools}
+    if repo.is_search_indexing_enabled():
+        for c in repo.sessions_missing_segments(200):
+            ids.add(c["id"])
+    candidates = sorted(ids)[:200]
+    if not candidates:
+        return
+    # session_id "claude_code:<basename>" → file path lookup.
+    file_by_basename: dict[str, tuple[Adapter, Path]] = {}
+    for a in adapters:
+        for f in a.discover_session_files():
+            file_by_basename[f.stem] = (a, f)
+    for id_ in candidates:
+        if "/" in id_:  # sub-agent rollup ids — walked via parents
+            continue
+        colon = id_.find(":")
+        if colon < 0:
+            continue
+        native = id_[colon + 1 :]
+        match = file_by_basename.get(native)
+        if match is None:
+            continue
+        adapter, file = match
+        repo.set_file_offset(str(file), 0)
+        try:
+            ingest_file(adapter, file, repo, table)
+        except Exception as e:  # noqa: BLE001
+            repo.record_parse_error(
+                {
+                    "file": str(file),
+                    "byte_offset": -1,
+                    "reason": f"[backfill-tools] {e}",
+                    "raw_line_truncated": "",
+                }
+            )

argus/collector/pipeline.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Orchestrate one ingest: read new bytes, upsert turns/calls/segments, recompute session."""
+from __future__ import annotations
+from pathlib import Path
+from ..adapters.base import Adapter, RawSegment, RawToolCall
+from ..pricing.types import PricingTable
+from ..schema.types import Session, ToolCall, TranscriptSegment
+from ..store.repository import Repository
+from .aggregate import build_session, build_turn
+from .rollup_subagents import rollup_subagents
+def _to_tool_call(r: RawToolCall, session_id: str) -> ToolCall:
+    return ToolCall(
+        id=f"{session_id}:{r.tool_use_id}",
+        session_id=session_id,
+        turn_index=r.turn_index,
+        tool_name=r.tool_name,
+        is_error=r.is_error,
+        input_size=r.input_size,
+        subagent_type=r.subagent_type,
+        timestamp=r.timestamp,
+    )
+def _to_segment(r: RawSegment, session_id: str) -> TranscriptSegment:
+    return TranscriptSegment(
+        uid=f"{session_id}:{r.uid_suffix}",
+        session_id=session_id,
+        timestamp=r.timestamp,
+        role=r.role,  # type: ignore[arg-type]
+        text=r.text,
+    )
+def ingest_file(
+    adapter: Adapter, file_path: Path, repo: Repository, table: PricingTable
+) -> None:
+    """Read new bytes from ``file_path`` via ``adapter`` and upsert into ``repo``."""
+    file_str = str(file_path)
+    from_offset = repo.get_file_offset(file_str)
+    result, new_offset = adapter.ingest_file(file_path, from_offset)
+    for e in result.parse_errors:
+        repo.record_parse_error(
+            {
+                "file": e.file,
+                "byte_offset": e.byte_offset,
+                "reason": e.reason,
+                "raw_line_truncated": e.raw_line_truncated,
+            }
+        )
+    # No turn events in the bytes we just read.
+    #   (a) re-ingest with no growth — nothing to do
+    #   (b) first ingest of a file with only metadata / user lines / hooks
+    #   (c) Codex stub (binary launched, no prompt sent)
+    # For (b)/(c) we still record the new offset, but MUST NOT create an
+    # empty session row that would clutter the dashboard.
+    if not result.turns:
+        if new_offset > from_offset:
+            repo.set_file_offset(file_str, new_offset)
+        return
+    session_id = f"{result.header.agent}:{result.header.native_session_id}"
+    # Ensure a session row exists (FK target for turns + tool_calls).
+    if repo.get_session(session_id) is None:
+        repo.upsert_session(build_session(result.header, session_id, [], table.version))
+    for raw in result.turns:
+        repo.upsert_turn(build_turn(raw, session_id, table))
+    if result.tool_calls:
+        repo.upsert_tool_calls([_to_tool_call(r, session_id) for r in result.tool_calls])
+    if result.segments and repo.is_search_indexing_enabled():
+        repo.upsert_transcript_segments(
+            [_to_segment(r, session_id) for r in result.segments]
+        )
+    # Sub-agents: each sub-agent JSONL becomes its own session under
+    # <sessionId>/<filename>. Adapter decides what counts as a sub-session
+    # via sub_session_files_for(); the pipeline never branches on agent.
+    sub_sessions: list[Session] = []
+    if not adapter.should_skip(file_path):
+        for sub in adapter.sub_session_files_for(file_path):
+            sub_session_id = f"{session_id}/{sub.stem}"
+            sub_from_offset = repo.get_file_offset(str(sub))
+            sub_result, sub_new_offset = adapter.ingest_file(sub, sub_from_offset)
+            for e in sub_result.parse_errors:
+                repo.record_parse_error(
+                    {
+                        "file": e.file,
+                        "byte_offset": e.byte_offset,
+                        "reason": e.reason,
+                        "raw_line_truncated": e.raw_line_truncated,
+                    }
+                )
+            if (
+                repo.get_session(sub_session_id) is None
+                and sub_result.turns
+            ):
+                repo.upsert_session(
+                    build_session(sub_result.header, sub_session_id, [], table.version)
+                )
+            for raw in sub_result.turns:
+                repo.upsert_turn(build_turn(raw, sub_session_id, table))
+            if sub_result.tool_calls:
+                repo.upsert_tool_calls(
+                    [_to_tool_call(r, sub_session_id) for r in sub_result.tool_calls]
+                )
+            if sub_result.segments and repo.is_search_indexing_enabled():
+                repo.upsert_transcript_segments(
+                    [_to_segment(r, sub_session_id) for r in sub_result.segments]
+                )
+            existing_sub = repo.get_session(sub_session_id)
+            if existing_sub:
+                all_sub_turns = repo.get_turns_for_session(sub_session_id)
+                recomputed = build_session(
+                    sub_result.header, sub_session_id, all_sub_turns, table.version
+                )
+                repo.upsert_session(recomputed)
+                sub_sessions.append(recomputed)
+            repo.set_file_offset(str(sub), sub_new_offset)
+    # Recompute parent session totals from ALL stored turns (the new ones
+    # we just upserted + any previously stored). Then layer the sub-agent
+    # rollup on top — build_session only sums the parent's own turns, so
+    # this is idempotent.
+    all_turns = repo.get_turns_for_session(session_id)
+    session = build_session(result.header, session_id, all_turns, table.version)
+    if sub_sessions:
+        session = rollup_subagents(session, sub_sessions)
+    repo.upsert_session(session)
+    repo.set_file_offset(file_str, new_offset)

argus/collector/rollup_subagents.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Roll sub-agent session totals up into the parent."""
+from __future__ import annotations
+from ..schema.types import Session
+def rollup_subagents(parent: Session, subs: list[Session]) -> Session:
+    if not subs:
+        return parent
+    fresh = parent.total_fresh_input_tokens + sum(s.total_fresh_input_tokens for s in subs)
+    out = parent.total_output_tokens + sum(s.total_output_tokens for s in subs)
+    cr = parent.total_cache_read_tokens + sum(s.total_cache_read_tokens for s in subs)
+    cw = parent.total_cache_write_tokens + sum(s.total_cache_write_tokens for s in subs)
+    cost = parent.total_cost_usd + sum(s.total_cost_usd for s in subs)
+    turns = parent.turn_count + sum(s.turn_count for s in subs)
+    metadata = {**parent.metadata, "sub_agent_session_ids": [s.id for s in subs]}
+    return parent.model_copy(
+        update={
+            "total_fresh_input_tokens": fresh,
+            "total_output_tokens": out,
+            "total_cache_read_tokens": cr,
+            "total_cache_write_tokens": cw,
+            "total_cost_usd": cost,
+            "turn_count": turns,
+            "metadata": metadata,
+        }
+    )

argus/collector/scheduler.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""Periodic detector loop. Daemon thread inside the same process as the
+server; runs each detector once at boot, then every ``interval_sec``
+(default 600) until ``.stop()`` is called.
+Detectors are pure: they read the repo and return Findings. The scheduler
+is the only thing that writes alerts (``repo.upsert_alert``). This keeps
+the "detectors are pure" rule a structural property rather than a
+convention.
+"""
+from __future__ import annotations
+import logging
+import threading
+from datetime import datetime, timezone
+from ..detectors.base import Detector, Finding
+from ..schema.types import Alert
+from ..store.repository import Repository
+logger = logging.getLogger(__name__)
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+def _finding_to_alert(f: Finding, now_iso: str) -> Alert:
+    return Alert(
+        detector=f.detector,
+        dedup_key=f.dedup_key,
+        severity=f.severity,
+        title=f.title,
+        message=f.message,
+        metadata=dict(f.metadata),
+        first_seen_at=now_iso,
+        last_seen_at=now_iso,
+        seen_at=None,
+    )
+def _run_once(detectors: list[Detector], repo: Repository) -> None:
+    now = _now_iso()
+    for detector in detectors:
+        try:
+            findings = detector.detect(repo, now)
+        except Exception:  # noqa: BLE001
+            logger.exception("Detector %s crashed", getattr(detector, "name", "?"))
+            continue
+        active_keys: list[str] = []
+        for f in findings:
+            try:
+                repo.upsert_alert(_finding_to_alert(f, now))
+                active_keys.append(f.dedup_key)
+            except Exception:  # noqa: BLE001
+                logger.exception("Failed to write alert from %s", detector.name)
+        try:
+            repo.resolve_stale_alerts(
+                detector=detector.name, active_dedup_keys=active_keys
+            )
+        except Exception:  # noqa: BLE001
+            logger.exception("Failed to reconcile alerts for %s", detector.name)
+class SchedulerHandle:
+    def __init__(self, thread: threading.Thread, stop_event: threading.Event) -> None:
+        self._thread = thread
+        self._stop = stop_event
+    def stop(self) -> None:
+        self._stop.set()
+        self._thread.join(timeout=5)
+def start_scheduler(
+    detectors: list[Detector],
+    repo: Repository,
+    *,
+    interval_sec: int = 600,
+) -> SchedulerHandle:
+    stop_event = threading.Event()
+    def loop() -> None:
+        _run_once(detectors, repo)  # startup tick
+        while not stop_event.wait(interval_sec):
+            _run_once(detectors, repo)
+    t = threading.Thread(target=loop, name="argus-scheduler", daemon=True)
+    t.start()
+    return SchedulerHandle(t, stop_event)

argus/collector/search_backfill.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Background search-index backfill.
+Re-ingests sessions that don't yet have transcript_segments rows. Caller
+sets ``enable_transcript_search`` first; the pipeline then writes
+segments as a side effect of the re-ingest.
+"""
+from __future__ import annotations
+import threading
+import time
+from dataclasses import dataclass
+from ..adapters.base import Adapter
+from ..pricing.types import PricingTable
+from ..store.repository import Repository
+from .pipeline import ingest_file
+@dataclass
+class SearchBackfillStatus:
+    in_progress: bool
+    processed: int
+    total: int
+    started_at_ms: int | None
+    finished_at_ms: int | None
+# Singleton process state.
+_state = SearchBackfillStatus(
+    in_progress=False, processed=0, total=0, started_at_ms=None, finished_at_ms=None
+)
+_lock = threading.Lock()
+def get_search_backfill_status() -> SearchBackfillStatus:
+    with _lock:
+        return SearchBackfillStatus(
+            in_progress=_state.in_progress,
+            processed=_state.processed,
+            total=_state.total,
+            started_at_ms=_state.started_at_ms,
+            finished_at_ms=_state.finished_at_ms,
+        )
+def run_segment_backfill(
+    adapters: list[Adapter], repo: Repository, table: PricingTable
+) -> SearchBackfillStatus:
+    """Kick off (non-blocking) a backfill of missing transcript segments."""
+    with _lock:
+        if _state.in_progress:
+            return get_search_backfill_status()
+    # Build basename → (adapter, file) map for top-level claude_code files.
+    file_by_basename: dict[str, tuple[Adapter, "object"]] = {}
+    for a in adapters:
+        if a.agent != "claude_code":
+            continue
+        for f in a.discover_session_files():
+            file_by_basename[f.stem] = (a, f)
+    candidates = [
+        c for c in repo.sessions_missing_segments(1000) if "/" not in c["id"]
+    ]
+    with _lock:
+        _state.in_progress = True
+        _state.processed = 0
+        _state.total = len(candidates)
+        _state.started_at_ms = int(time.time() * 1000)
+        _state.finished_at_ms = None
+    def _worker() -> None:
+        try:
+            for c in candidates:
+                id_ = c["id"]
+                colon = id_.find(":")
+                if colon < 0:
+                    with _lock:
+                        _state.processed += 1
+                    continue
+                native = id_[colon + 1 :]
+                match = file_by_basename.get(native)
+                if match is None:
+                    with _lock:
+                        _state.processed += 1
+                    continue
+                adapter, file = match
+                repo.set_file_offset(str(file), 0)
+                try:
+                    ingest_file(adapter, file, repo, table)  # type: ignore[arg-type]
+                except Exception as e:  # noqa: BLE001
+                    repo.record_parse_error(
+                        {
+                            "file": str(file),
+                            "byte_offset": -1,
+                            "reason": f"[search-backfill] {e}",
+                            "raw_line_truncated": "",
+                        }
+                    )
+                with _lock:
+                    _state.processed += 1
+        finally:
+            with _lock:
+                _state.in_progress = False
+                _state.finished_at_ms = int(time.time() * 1000)
+    threading.Thread(target=_worker, name="argus-search-backfill", daemon=True).start()
+    return get_search_backfill_status()