argus-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. argus/__init__.py +3 -0
  2. argus/adapters/__init__.py +7 -0
  3. argus/adapters/base.py +108 -0
  4. argus/adapters/claude_code/__init__.py +5 -0
  5. argus/adapters/claude_code/adapter.py +63 -0
  6. argus/adapters/claude_code/discover.py +72 -0
  7. argus/adapters/claude_code/extract_tool_calls.py +86 -0
  8. argus/adapters/claude_code/extract_transcript.py +111 -0
  9. argus/adapters/claude_code/extract_turns.py +69 -0
  10. argus/adapters/claude_code/history_jsonl.py +138 -0
  11. argus/adapters/claude_code/ingest_file.py +137 -0
  12. argus/adapters/claude_code/model.py +11 -0
  13. argus/adapters/claude_code/schemas.py +77 -0
  14. argus/adapters/registry.py +30 -0
  15. argus/cli.py +384 -0
  16. argus/collector/__init__.py +0 -0
  17. argus/collector/aggregate.py +102 -0
  18. argus/collector/first_run.py +189 -0
  19. argus/collector/pipeline.py +140 -0
  20. argus/collector/rollup_subagents.py +27 -0
  21. argus/collector/scheduler.py +89 -0
  22. argus/collector/search_backfill.py +109 -0
  23. argus/collector/watcher.py +178 -0
  24. argus/dashboard-dist/_astro/charts.BIevw6Es.js +1 -0
  25. argus/dashboard-dist/_astro/format.DxC1NGYT.js +1 -0
  26. argus/dashboard-dist/_astro/index.astro_astro_type_script_index_0_lang.CgwSARdD.js +24 -0
  27. argus/dashboard-dist/_astro/index.astro_astro_type_script_index_0_lang.W18SJsr7.js +11 -0
  28. argus/dashboard-dist/_astro/installCanvasRenderer.D_tC6TXz.js +18 -0
  29. argus/dashboard-dist/_astro/models.astro_astro_type_script_index_0_lang.BHTHXYHC.js +13 -0
  30. argus/dashboard-dist/_astro/prompts.astro_astro_type_script_index_0_lang.DfNgiDv9.js +17 -0
  31. argus/dashboard-dist/_astro/session.astro_astro_type_script_index_0_lang.Dj_bfrIa.js +86 -0
  32. argus/dashboard-dist/_astro/settings.astro_astro_type_script_index_0_lang.d_a-uvdi.js +24 -0
  33. argus/dashboard-dist/_astro/tools.astro_astro_type_script_index_0_lang.Dzzau3Yt.js +12 -0
  34. argus/dashboard-dist/_astro/trends.astro_astro_type_script_index_0_lang.BLLeGRNa.js +5 -0
  35. argus/dashboard-dist/index.html +2 -0
  36. argus/dashboard-dist/models/index.html +1 -0
  37. argus/dashboard-dist/prompts/index.html +18 -0
  38. argus/dashboard-dist/session/index.html +2 -0
  39. argus/dashboard-dist/sessions/index.html +1 -0
  40. argus/dashboard-dist/settings/index.html +8 -0
  41. argus/dashboard-dist/styles/global.css +307 -0
  42. argus/dashboard-dist/tools/index.html +1 -0
  43. argus/dashboard-dist/trends/index.html +1 -0
  44. argus/detectors/__init__.py +6 -0
  45. argus/detectors/base.py +34 -0
  46. argus/detectors/registry.py +20 -0
  47. argus/detectors/tool_error_rate_spike.py +138 -0
  48. argus/pricing/2026-05-02.json +24 -0
  49. argus/pricing/__init__.py +0 -0
  50. argus/pricing/compute.py +46 -0
  51. argus/pricing/load.py +45 -0
  52. argus/pricing/refresh.py +91 -0
  53. argus/pricing/types.py +21 -0
  54. argus/scaffold/__init__.py +0 -0
  55. argus/scaffold/scaffolder.py +45 -0
  56. argus/scaffold/snapshot.py +73 -0
  57. argus/scaffold/storage.py +60 -0
  58. argus/schema/__init__.py +0 -0
  59. argus/schema/types.py +157 -0
  60. argus/server/__init__.py +0 -0
  61. argus/server/api.py +661 -0
  62. argus/server/app.py +97 -0
  63. argus/store/__init__.py +0 -0
  64. argus/store/db.py +103 -0
  65. argus/store/migrations/__init__.py +0 -0
  66. argus/store/migrations/inline.py +180 -0
  67. argus/store/repository.py +778 -0
  68. argus/templates/default/.claude/agents/code-reviewer.md +27 -0
  69. argus/templates/default/.claude/agents/security-auditor.md +28 -0
  70. argus/templates/default/.claude/commands/commit.md +38 -0
  71. argus/templates/default/.claude/commands/deploy.md +13 -0
  72. argus/templates/default/.claude/commands/fix-issue.md +15 -0
  73. argus/templates/default/.claude/commands/pr.md +38 -0
  74. argus/templates/default/.claude/commands/review.md +14 -0
  75. argus/templates/default/.claude/rules/api-conventions.md +27 -0
  76. argus/templates/default/.claude/rules/code-style.md +25 -0
  77. argus/templates/default/.claude/rules/testing.md +19 -0
  78. argus/templates/default/.claude/settings.json +28 -0
  79. argus/templates/default/.claude/skills/example/SKILL.md +11 -0
  80. argus/templates/default/CLAUDE.md +57 -0
  81. argus_code-0.2.0.dist-info/METADATA +247 -0
  82. argus_code-0.2.0.dist-info/RECORD +86 -0
  83. argus_code-0.2.0.dist-info/WHEEL +4 -0
  84. argus_code-0.2.0.dist-info/entry_points.txt +2 -0
  85. argus_code-0.2.0.dist-info/licenses/LICENSE +21 -0
  86. argus_code-0.2.0.dist-info/licenses/NOTICE +22 -0
@@ -0,0 +1,102 @@
1
+ """Build Session and Turn rows from adapter-supplied raw events."""
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+
6
+ from ..adapters.base import AdapterIngestResult
7
+ from ..pricing.compute import compute_turn_cost
8
+ from ..pricing.types import PricingTable
9
+ from ..schema.types import RawSessionHeader, RawTurnEvent, Session, Turn
10
+
11
+
12
+ def _iso_now() -> str:
13
+ return datetime.now(timezone.utc).isoformat()
14
+
15
+
16
+ def build_turn(raw: RawTurnEvent, session_id: str, table: PricingTable) -> Turn:
17
+ return Turn(
18
+ id=f"{session_id}:{raw.native_turn_id}",
19
+ session_id=session_id,
20
+ sequence=raw.sequence,
21
+ timestamp=raw.timestamp,
22
+ model=raw.model,
23
+ model_raw=raw.model_raw,
24
+ fresh_input_tokens=raw.fresh_input_tokens,
25
+ output_tokens=raw.output_tokens,
26
+ cache_read_tokens=raw.cache_read_tokens,
27
+ cache_write_tokens=raw.cache_write_tokens,
28
+ cache_write_5m_tokens=raw.cache_write_5m_tokens,
29
+ cache_write_1h_tokens=raw.cache_write_1h_tokens,
30
+ tool_calls_count=raw.tool_calls_count,
31
+ cost_usd=compute_turn_cost(raw, table),
32
+ metadata=raw.metadata,
33
+ )
34
+
35
+
36
+ def build_session(
37
+ header: RawSessionHeader,
38
+ session_id: str,
39
+ all_turns: list[Turn],
40
+ pricing_version: str,
41
+ ) -> Session:
42
+ computed_at = _iso_now()
43
+ fresh = sum(t.fresh_input_tokens for t in all_turns)
44
+ out = sum(t.output_tokens for t in all_turns)
45
+ cr = sum(t.cache_read_tokens for t in all_turns)
46
+ cw = sum(t.cache_write_tokens for t in all_turns)
47
+ cost = sum(t.cost_usd for t in all_turns)
48
+
49
+ # primary_model = the model with the most (input + output) tokens.
50
+ model_tokens: dict[str, int] = {}
51
+ for t in all_turns:
52
+ model_tokens[t.model] = (
53
+ model_tokens.get(t.model, 0) + t.fresh_input_tokens + t.output_tokens
54
+ )
55
+ primary = (
56
+ sorted(model_tokens.items(), key=lambda kv: kv[1], reverse=True)[0][0]
57
+ if model_tokens
58
+ else "unknown"
59
+ )
60
+
61
+ started_at = header.started_at or (all_turns[0].timestamp if all_turns else computed_at)
62
+ ended_at = header.ended_at or (all_turns[-1].timestamp if all_turns else None)
63
+
64
+ duration: int | None = None
65
+ if ended_at:
66
+ try:
67
+ s = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
68
+ e = datetime.fromisoformat(ended_at.replace("Z", "+00:00"))
69
+ duration = max(0, int((e - s).total_seconds()))
70
+ except (ValueError, TypeError):
71
+ duration = None
72
+
73
+ return Session(
74
+ id=session_id,
75
+ agent=header.agent,
76
+ agent_version=header.agent_version,
77
+ project_path=header.project_path,
78
+ started_at=started_at,
79
+ ended_at=ended_at,
80
+ duration_sec=duration,
81
+ total_fresh_input_tokens=fresh,
82
+ total_output_tokens=out,
83
+ total_cache_read_tokens=cr,
84
+ total_cache_write_tokens=cw,
85
+ total_cost_usd=cost,
86
+ primary_model=primary,
87
+ turn_count=len(all_turns),
88
+ pricing_table_version=pricing_version,
89
+ computed_at=computed_at,
90
+ agent_reported_cost_usd=header.agent_reported_cost_usd,
91
+ metadata=header.metadata,
92
+ )
93
+
94
+
95
+ def aggregate_adapter_result(
96
+ r: AdapterIngestResult, table: PricingTable
97
+ ) -> tuple[Session, list[Turn]]:
98
+ """Backward-compat helper: build a (session, turns) pair from a fresh result."""
99
+ session_id = f"{r.header.agent}:{r.header.native_session_id}"
100
+ turns = [build_turn(t, session_id, table) for t in r.turns]
101
+ session = build_session(r.header, session_id, turns, table.version)
102
+ return session, turns
@@ -0,0 +1,189 @@
1
+ """First-pass ingest: walk every adapter's files, recent first.
2
+
3
+ Recent files run synchronously in the foreground so the dashboard is
4
+ useful immediately. Older files run in a background ThreadPoolExecutor.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import threading
10
+ import time
11
+ from concurrent.futures import Future, ThreadPoolExecutor
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+ from ..adapters.base import Adapter
16
+ from ..pricing.types import PricingTable
17
+ from ..store.repository import Repository
18
+ from .pipeline import ingest_file
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class IngestStatus:
25
+ foreground_complete: bool
26
+ pending: int
27
+ processed: int
28
+ total: int
29
+
30
+
31
+ class FirstRunHandle:
32
+ """Returned by ``run_first_pass_ingest``; exposes foreground/backfill futures."""
33
+
34
+ def __init__(self) -> None:
35
+ self._processed = 0
36
+ self._total = 0
37
+ self._foreground_complete = False
38
+ self._lock = threading.Lock()
39
+ self._foreground_done = threading.Event()
40
+ self._backfill_done = threading.Event()
41
+
42
+ def _inc(self) -> None:
43
+ with self._lock:
44
+ self._processed += 1
45
+
46
+ def status(self) -> IngestStatus:
47
+ with self._lock:
48
+ return IngestStatus(
49
+ foreground_complete=self._foreground_complete,
50
+ pending=max(0, self._total - self._processed),
51
+ processed=self._processed,
52
+ total=self._total,
53
+ )
54
+
55
+ def wait_foreground(self, timeout: float | None = None) -> bool:
56
+ return self._foreground_done.wait(timeout)
57
+
58
+ def wait_backfill(self, timeout: float | None = None) -> bool:
59
+ return self._backfill_done.wait(timeout)
60
+
61
+
62
+ def run_first_pass_ingest(
63
+ adapters: list[Adapter],
64
+ repo: Repository,
65
+ table: PricingTable,
66
+ *,
67
+ recent_days: int = 30,
68
+ ) -> FirstRunHandle:
69
+ """Kick off ingest. Recent files run inline; older files in a thread.
70
+
71
+ Returns immediately with a handle whose ``status()`` is pollable, and
72
+ whose ``wait_foreground()`` / ``wait_backfill()`` block until each
73
+ phase finishes.
74
+ """
75
+ cutoff = time.time() - recent_days * 86_400
76
+ handle = FirstRunHandle()
77
+
78
+ # Phase 1 (foreground, sync, in the calling thread).
79
+ recent: list[tuple[Adapter, Path]] = []
80
+ older: list[tuple[Adapter, Path]] = []
81
+ for a in adapters:
82
+ for f in a.discover_session_files():
83
+ try:
84
+ mtime = f.stat().st_mtime
85
+ except OSError:
86
+ continue
87
+ (recent if mtime >= cutoff else older).append((a, f))
88
+
89
+ with handle._lock:
90
+ handle._total = len(recent) + len(older)
91
+
92
+ for adapter, file in recent:
93
+ try:
94
+ ingest_file(adapter, file, repo, table)
95
+ except Exception as e: # noqa: BLE001
96
+ repo.record_parse_error(
97
+ {
98
+ "file": str(file),
99
+ "byte_offset": -1,
100
+ "reason": f"[ingest] {e}",
101
+ "raw_line_truncated": "",
102
+ }
103
+ )
104
+ handle._inc()
105
+
106
+ # Also ingest adapter-specific extras (e.g., history.jsonl) during the
107
+ # foreground phase so they're available to the dashboard immediately.
108
+ for a in adapters:
109
+ for extra in a.extra_watch_paths():
110
+ try:
111
+ a.ingest_extra(extra, repo)
112
+ except Exception as e: # noqa: BLE001
113
+ repo.record_parse_error(
114
+ {
115
+ "file": str(extra),
116
+ "byte_offset": -1,
117
+ "reason": f"[history] {e}",
118
+ "raw_line_truncated": "",
119
+ }
120
+ )
121
+
122
+ with handle._lock:
123
+ handle._foreground_complete = True
124
+ handle._foreground_done.set()
125
+
126
+ # Phase 2 (background) — older files + missing-data backfill.
127
+ def _background() -> None:
128
+ for adapter, file in older:
129
+ try:
130
+ ingest_file(adapter, file, repo, table)
131
+ except Exception as e: # noqa: BLE001
132
+ repo.record_parse_error(
133
+ {
134
+ "file": str(file),
135
+ "byte_offset": -1,
136
+ "reason": f"[ingest] {e}",
137
+ "raw_line_truncated": "",
138
+ }
139
+ )
140
+ handle._inc()
141
+ _backfill_missing_derived_data(adapters, repo, table)
142
+ handle._backfill_done.set()
143
+
144
+ threading.Thread(target=_background, name="argus-firstrun-bg", daemon=True).start()
145
+ return handle
146
+
147
+
148
+ def _backfill_missing_derived_data(
149
+ adapters: list[Adapter], repo: Repository, table: PricingTable
150
+ ) -> None:
151
+ """Re-ingest sessions missing tool_calls / segments after a slice upgrade."""
152
+ missing_tools = repo.sessions_missing_tool_calls(200)
153
+ ids: set[str] = {c["id"] for c in missing_tools}
154
+ if repo.is_search_indexing_enabled():
155
+ for c in repo.sessions_missing_segments(200):
156
+ ids.add(c["id"])
157
+ candidates = sorted(ids)[:200]
158
+ if not candidates:
159
+ return
160
+
161
+ # session_id "claude_code:<basename>" → file path lookup.
162
+ file_by_basename: dict[str, tuple[Adapter, Path]] = {}
163
+ for a in adapters:
164
+ for f in a.discover_session_files():
165
+ file_by_basename[f.stem] = (a, f)
166
+
167
+ for id_ in candidates:
168
+ if "/" in id_: # sub-agent rollup ids — walked via parents
169
+ continue
170
+ colon = id_.find(":")
171
+ if colon < 0:
172
+ continue
173
+ native = id_[colon + 1 :]
174
+ match = file_by_basename.get(native)
175
+ if match is None:
176
+ continue
177
+ adapter, file = match
178
+ repo.set_file_offset(str(file), 0)
179
+ try:
180
+ ingest_file(adapter, file, repo, table)
181
+ except Exception as e: # noqa: BLE001
182
+ repo.record_parse_error(
183
+ {
184
+ "file": str(file),
185
+ "byte_offset": -1,
186
+ "reason": f"[backfill-tools] {e}",
187
+ "raw_line_truncated": "",
188
+ }
189
+ )
@@ -0,0 +1,140 @@
1
+ """Orchestrate one ingest: read new bytes, upsert turns/calls/segments, recompute session."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ from ..adapters.base import Adapter, RawSegment, RawToolCall
7
+ from ..pricing.types import PricingTable
8
+ from ..schema.types import Session, ToolCall, TranscriptSegment
9
+ from ..store.repository import Repository
10
+ from .aggregate import build_session, build_turn
11
+ from .rollup_subagents import rollup_subagents
12
+
13
+
14
+ def _to_tool_call(r: RawToolCall, session_id: str) -> ToolCall:
15
+ return ToolCall(
16
+ id=f"{session_id}:{r.tool_use_id}",
17
+ session_id=session_id,
18
+ turn_index=r.turn_index,
19
+ tool_name=r.tool_name,
20
+ is_error=r.is_error,
21
+ input_size=r.input_size,
22
+ subagent_type=r.subagent_type,
23
+ timestamp=r.timestamp,
24
+ )
25
+
26
+
27
+ def _to_segment(r: RawSegment, session_id: str) -> TranscriptSegment:
28
+ return TranscriptSegment(
29
+ uid=f"{session_id}:{r.uid_suffix}",
30
+ session_id=session_id,
31
+ timestamp=r.timestamp,
32
+ role=r.role, # type: ignore[arg-type]
33
+ text=r.text,
34
+ )
35
+
36
+
37
+ def ingest_file(
38
+ adapter: Adapter, file_path: Path, repo: Repository, table: PricingTable
39
+ ) -> None:
40
+ """Read new bytes from ``file_path`` via ``adapter`` and upsert into ``repo``."""
41
+ file_str = str(file_path)
42
+ from_offset = repo.get_file_offset(file_str)
43
+ result, new_offset = adapter.ingest_file(file_path, from_offset)
44
+
45
+ for e in result.parse_errors:
46
+ repo.record_parse_error(
47
+ {
48
+ "file": e.file,
49
+ "byte_offset": e.byte_offset,
50
+ "reason": e.reason,
51
+ "raw_line_truncated": e.raw_line_truncated,
52
+ }
53
+ )
54
+
55
+ # No turn events in the bytes we just read.
56
+ # (a) re-ingest with no growth — nothing to do
57
+ # (b) first ingest of a file with only metadata / user lines / hooks
58
+ # (c) Codex stub (binary launched, no prompt sent)
59
+ # For (b)/(c) we still record the new offset, but MUST NOT create an
60
+ # empty session row that would clutter the dashboard.
61
+ if not result.turns:
62
+ if new_offset > from_offset:
63
+ repo.set_file_offset(file_str, new_offset)
64
+ return
65
+
66
+ session_id = f"{result.header.agent}:{result.header.native_session_id}"
67
+
68
+ # Ensure a session row exists (FK target for turns + tool_calls).
69
+ if repo.get_session(session_id) is None:
70
+ repo.upsert_session(build_session(result.header, session_id, [], table.version))
71
+
72
+ for raw in result.turns:
73
+ repo.upsert_turn(build_turn(raw, session_id, table))
74
+
75
+ if result.tool_calls:
76
+ repo.upsert_tool_calls([_to_tool_call(r, session_id) for r in result.tool_calls])
77
+
78
+ if result.segments and repo.is_search_indexing_enabled():
79
+ repo.upsert_transcript_segments(
80
+ [_to_segment(r, session_id) for r in result.segments]
81
+ )
82
+
83
+ # Sub-agents: each sub-agent JSONL becomes its own session under
84
+ # <sessionId>/<filename>. Adapter decides what counts as a sub-session
85
+ # via sub_session_files_for(); the pipeline never branches on agent.
86
+ sub_sessions: list[Session] = []
87
+ if not adapter.should_skip(file_path):
88
+ for sub in adapter.sub_session_files_for(file_path):
89
+ sub_session_id = f"{session_id}/{sub.stem}"
90
+ sub_from_offset = repo.get_file_offset(str(sub))
91
+ sub_result, sub_new_offset = adapter.ingest_file(sub, sub_from_offset)
92
+
93
+ for e in sub_result.parse_errors:
94
+ repo.record_parse_error(
95
+ {
96
+ "file": e.file,
97
+ "byte_offset": e.byte_offset,
98
+ "reason": e.reason,
99
+ "raw_line_truncated": e.raw_line_truncated,
100
+ }
101
+ )
102
+
103
+ if (
104
+ repo.get_session(sub_session_id) is None
105
+ and sub_result.turns
106
+ ):
107
+ repo.upsert_session(
108
+ build_session(sub_result.header, sub_session_id, [], table.version)
109
+ )
110
+ for raw in sub_result.turns:
111
+ repo.upsert_turn(build_turn(raw, sub_session_id, table))
112
+ if sub_result.tool_calls:
113
+ repo.upsert_tool_calls(
114
+ [_to_tool_call(r, sub_session_id) for r in sub_result.tool_calls]
115
+ )
116
+ if sub_result.segments and repo.is_search_indexing_enabled():
117
+ repo.upsert_transcript_segments(
118
+ [_to_segment(r, sub_session_id) for r in sub_result.segments]
119
+ )
120
+
121
+ existing_sub = repo.get_session(sub_session_id)
122
+ if existing_sub:
123
+ all_sub_turns = repo.get_turns_for_session(sub_session_id)
124
+ recomputed = build_session(
125
+ sub_result.header, sub_session_id, all_sub_turns, table.version
126
+ )
127
+ repo.upsert_session(recomputed)
128
+ sub_sessions.append(recomputed)
129
+ repo.set_file_offset(str(sub), sub_new_offset)
130
+
131
+ # Recompute parent session totals from ALL stored turns (the new ones
132
+ # we just upserted + any previously stored). Then layer the sub-agent
133
+ # rollup on top — build_session only sums the parent's own turns, so
134
+ # this is idempotent.
135
+ all_turns = repo.get_turns_for_session(session_id)
136
+ session = build_session(result.header, session_id, all_turns, table.version)
137
+ if sub_sessions:
138
+ session = rollup_subagents(session, sub_sessions)
139
+ repo.upsert_session(session)
140
+ repo.set_file_offset(file_str, new_offset)
@@ -0,0 +1,27 @@
1
+ """Roll sub-agent session totals up into the parent."""
2
+ from __future__ import annotations
3
+
4
+ from ..schema.types import Session
5
+
6
+
7
+ def rollup_subagents(parent: Session, subs: list[Session]) -> Session:
8
+ if not subs:
9
+ return parent
10
+ fresh = parent.total_fresh_input_tokens + sum(s.total_fresh_input_tokens for s in subs)
11
+ out = parent.total_output_tokens + sum(s.total_output_tokens for s in subs)
12
+ cr = parent.total_cache_read_tokens + sum(s.total_cache_read_tokens for s in subs)
13
+ cw = parent.total_cache_write_tokens + sum(s.total_cache_write_tokens for s in subs)
14
+ cost = parent.total_cost_usd + sum(s.total_cost_usd for s in subs)
15
+ turns = parent.turn_count + sum(s.turn_count for s in subs)
16
+ metadata = {**parent.metadata, "sub_agent_session_ids": [s.id for s in subs]}
17
+ return parent.model_copy(
18
+ update={
19
+ "total_fresh_input_tokens": fresh,
20
+ "total_output_tokens": out,
21
+ "total_cache_read_tokens": cr,
22
+ "total_cache_write_tokens": cw,
23
+ "total_cost_usd": cost,
24
+ "turn_count": turns,
25
+ "metadata": metadata,
26
+ }
27
+ )
@@ -0,0 +1,89 @@
1
+ """Periodic detector loop. Daemon thread inside the same process as the
2
+ server; runs each detector once at boot, then every ``interval_sec``
3
+ (default 600) until ``.stop()`` is called.
4
+
5
+ Detectors are pure: they read the repo and return Findings. The scheduler
6
+ is the only thing that writes alerts (``repo.upsert_alert``). This keeps
7
+ the "detectors are pure" rule a structural property rather than a
8
+ convention.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import threading
14
+ from datetime import datetime, timezone
15
+
16
+ from ..detectors.base import Detector, Finding
17
+ from ..schema.types import Alert
18
+ from ..store.repository import Repository
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def _now_iso() -> str:
24
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
25
+
26
+
27
+ def _finding_to_alert(f: Finding, now_iso: str) -> Alert:
28
+ return Alert(
29
+ detector=f.detector,
30
+ dedup_key=f.dedup_key,
31
+ severity=f.severity,
32
+ title=f.title,
33
+ message=f.message,
34
+ metadata=dict(f.metadata),
35
+ first_seen_at=now_iso,
36
+ last_seen_at=now_iso,
37
+ seen_at=None,
38
+ )
39
+
40
+
41
+ def _run_once(detectors: list[Detector], repo: Repository) -> None:
42
+ now = _now_iso()
43
+ for detector in detectors:
44
+ try:
45
+ findings = detector.detect(repo, now)
46
+ except Exception: # noqa: BLE001
47
+ logger.exception("Detector %s crashed", getattr(detector, "name", "?"))
48
+ continue
49
+ active_keys: list[str] = []
50
+ for f in findings:
51
+ try:
52
+ repo.upsert_alert(_finding_to_alert(f, now))
53
+ active_keys.append(f.dedup_key)
54
+ except Exception: # noqa: BLE001
55
+ logger.exception("Failed to write alert from %s", detector.name)
56
+ try:
57
+ repo.resolve_stale_alerts(
58
+ detector=detector.name, active_dedup_keys=active_keys
59
+ )
60
+ except Exception: # noqa: BLE001
61
+ logger.exception("Failed to reconcile alerts for %s", detector.name)
62
+
63
+
64
+ class SchedulerHandle:
65
+ def __init__(self, thread: threading.Thread, stop_event: threading.Event) -> None:
66
+ self._thread = thread
67
+ self._stop = stop_event
68
+
69
+ def stop(self) -> None:
70
+ self._stop.set()
71
+ self._thread.join(timeout=5)
72
+
73
+
74
+ def start_scheduler(
75
+ detectors: list[Detector],
76
+ repo: Repository,
77
+ *,
78
+ interval_sec: int = 600,
79
+ ) -> SchedulerHandle:
80
+ stop_event = threading.Event()
81
+
82
+ def loop() -> None:
83
+ _run_once(detectors, repo) # startup tick
84
+ while not stop_event.wait(interval_sec):
85
+ _run_once(detectors, repo)
86
+
87
+ t = threading.Thread(target=loop, name="argus-scheduler", daemon=True)
88
+ t.start()
89
+ return SchedulerHandle(t, stop_event)
@@ -0,0 +1,109 @@
1
+ """Background search-index backfill.
2
+
3
+ Re-ingests sessions that don't yet have transcript_segments rows. Caller
4
+ sets ``enable_transcript_search`` first; the pipeline then writes
5
+ segments as a side effect of the re-ingest.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import threading
10
+ import time
11
+ from dataclasses import dataclass
12
+
13
+ from ..adapters.base import Adapter
14
+ from ..pricing.types import PricingTable
15
+ from ..store.repository import Repository
16
+ from .pipeline import ingest_file
17
+
18
+
19
+ @dataclass
20
+ class SearchBackfillStatus:
21
+ in_progress: bool
22
+ processed: int
23
+ total: int
24
+ started_at_ms: int | None
25
+ finished_at_ms: int | None
26
+
27
+
28
+ # Singleton process state.
29
+ _state = SearchBackfillStatus(
30
+ in_progress=False, processed=0, total=0, started_at_ms=None, finished_at_ms=None
31
+ )
32
+ _lock = threading.Lock()
33
+
34
+
35
+ def get_search_backfill_status() -> SearchBackfillStatus:
36
+ with _lock:
37
+ return SearchBackfillStatus(
38
+ in_progress=_state.in_progress,
39
+ processed=_state.processed,
40
+ total=_state.total,
41
+ started_at_ms=_state.started_at_ms,
42
+ finished_at_ms=_state.finished_at_ms,
43
+ )
44
+
45
+
46
+ def run_segment_backfill(
47
+ adapters: list[Adapter], repo: Repository, table: PricingTable
48
+ ) -> SearchBackfillStatus:
49
+ """Kick off (non-blocking) a backfill of missing transcript segments."""
50
+ with _lock:
51
+ if _state.in_progress:
52
+ return get_search_backfill_status()
53
+
54
+ # Build basename → (adapter, file) map for top-level claude_code files.
55
+ file_by_basename: dict[str, tuple[Adapter, "object"]] = {}
56
+ for a in adapters:
57
+ if a.agent != "claude_code":
58
+ continue
59
+ for f in a.discover_session_files():
60
+ file_by_basename[f.stem] = (a, f)
61
+
62
+ candidates = [
63
+ c for c in repo.sessions_missing_segments(1000) if "/" not in c["id"]
64
+ ]
65
+
66
+ with _lock:
67
+ _state.in_progress = True
68
+ _state.processed = 0
69
+ _state.total = len(candidates)
70
+ _state.started_at_ms = int(time.time() * 1000)
71
+ _state.finished_at_ms = None
72
+
73
+ def _worker() -> None:
74
+ try:
75
+ for c in candidates:
76
+ id_ = c["id"]
77
+ colon = id_.find(":")
78
+ if colon < 0:
79
+ with _lock:
80
+ _state.processed += 1
81
+ continue
82
+ native = id_[colon + 1 :]
83
+ match = file_by_basename.get(native)
84
+ if match is None:
85
+ with _lock:
86
+ _state.processed += 1
87
+ continue
88
+ adapter, file = match
89
+ repo.set_file_offset(str(file), 0)
90
+ try:
91
+ ingest_file(adapter, file, repo, table) # type: ignore[arg-type]
92
+ except Exception as e: # noqa: BLE001
93
+ repo.record_parse_error(
94
+ {
95
+ "file": str(file),
96
+ "byte_offset": -1,
97
+ "reason": f"[search-backfill] {e}",
98
+ "raw_line_truncated": "",
99
+ }
100
+ )
101
+ with _lock:
102
+ _state.processed += 1
103
+ finally:
104
+ with _lock:
105
+ _state.in_progress = False
106
+ _state.finished_at_ms = int(time.time() * 1000)
107
+
108
+ threading.Thread(target=_worker, name="argus-search-backfill", daemon=True).start()
109
+ return get_search_backfill_status()