codevigil 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {codevigil-0.3.0 → codevigil-0.4.0}/PKG-INFO +7 -6
  2. {codevigil-0.3.0 → codevigil-0.4.0}/README.md +6 -5
  3. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/__init__.py +1 -1
  4. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/aggregator.py +100 -15
  5. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/processed_store.py +169 -43
  6. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/store.py +73 -17
  7. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/cli.py +84 -34
  8. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/config.py +151 -1
  9. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/history/detail_cmd.py +6 -2
  10. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/history/diff_cmd.py +7 -3
  11. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/history/heatmap_cmd.py +6 -2
  12. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/ingest.py +26 -10
  13. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/renderers/terminal.py +44 -18
  14. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/report/renderer.py +16 -1
  15. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/types.py +3 -0
  16. codevigil-0.4.0/codevigil/watch_roots.py +88 -0
  17. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/watcher.py +56 -1
  18. {codevigil-0.3.0 → codevigil-0.4.0}/pyproject.toml +1 -1
  19. {codevigil-0.3.0 → codevigil-0.4.0}/.gitignore +0 -0
  20. {codevigil-0.3.0 → codevigil-0.4.0}/LICENSE +0 -0
  21. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/__main__.py +0 -0
  22. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/__init__.py +0 -0
  23. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/cohort.py +0 -0
  24. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/compare.py +0 -0
  25. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/correlations.py +0 -0
  26. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/analysis/guards.py +0 -0
  27. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/bootstrap.py +0 -0
  28. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/classifier.py +0 -0
  29. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/__init__.py +0 -0
  30. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/_text_match.py +0 -0
  31. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/parse_health.py +0 -0
  32. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/prompts.py +0 -0
  33. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/read_edit_ratio.py +0 -0
  34. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/reasoning_loop.py +0 -0
  35. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/stop_phrase.py +0 -0
  36. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/collectors/thinking.py +0 -0
  37. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/errors.py +0 -0
  38. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/history/__init__.py +0 -0
  39. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/history/filters.py +0 -0
  40. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/history/list_cmd.py +0 -0
  41. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/parser.py +0 -0
  42. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/privacy.py +0 -0
  43. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/projects.py +0 -0
  44. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/registry.py +0 -0
  45. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/renderers/__init__.py +0 -0
  46. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/renderers/_bars.py +0 -0
  47. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/renderers/json_file.py +0 -0
  48. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/report/__init__.py +0 -0
  49. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/report/loader.py +0 -0
  50. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/turns.py +0 -0
  51. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/ui/progress.py +0 -0
  52. {codevigil-0.3.0 → codevigil-0.4.0}/codevigil/watcher_cache.py +0 -0
  53. {codevigil-0.3.0 → codevigil-0.4.0}/docs/README.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codevigil
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Local, privacy-preserving observability for Claude Code sessions.
5
5
  Project-URL: Homepage, https://github.com/Mathews-Tom/codevigil
6
6
  Project-URL: Issues, https://github.com/Mathews-Tom/codevigil/issues
@@ -231,7 +231,7 @@ Local, privacy-preserving observability for Claude Code sessions.
231
231
  codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a rich terminal dashboard or as JSON / markdown reports. **Zero network egress, no data ever leaves your machine.**
232
232
 
233
233
  [![Status](https://img.shields.io/badge/status-beta-blue.svg)](https://github.com/Mathews-Tom/codevigil)
234
- [![Version](https://img.shields.io/badge/version-0.3.0-informational.svg)](CHANGELOG.md)
234
+ [![Version](https://img.shields.io/badge/version-0.4.0-informational.svg)](CHANGELOG.md)
235
235
  [![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue.svg)](https://www.python.org/downloads/)
236
236
  [![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
237
237
  [![CI](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml/badge.svg)](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml)
@@ -261,7 +261,7 @@ codevigil ingest # one-shot cold-ingest into persistent memory (first run
261
261
  codevigil watch # project roll-up dashboard, resumes every file from its cached cursor
262
262
  ```
263
263
 
264
- `codevigil ingest` walks every JSONL under `watch.root`, parses it end-to-end, and writes a durable record (session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
264
+ `codevigil ingest` walks every JSONL under `watch.roots`, parses them end-to-end, and writes a durable record (root-aware session key, raw session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
265
265
 
266
266
  `codevigil watch` then prints a live **project-row** dashboard: one row per Claude Code project, with the fleet-worst severity, the active session count, and the aggregate metric summary. The top line shows fleet totals (session count, CRIT/WARN/OK tallies, project count, last-updated wall-clock tick). Every session's rolling-window collector state is restored from the store so restart does not erase your percentile baselines.
267
267
 
@@ -317,12 +317,13 @@ Full flag reference for every subcommand: [docs/cli.md](docs/cli.md).
317
317
 
318
318
  ## Configuration
319
319
 
320
- codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. Run `codevigil config check` to see every resolved key with its source.
320
+ codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. `watch.roots` is the canonical multi-root setting; `watch.root` and `CODEVIGIL_WATCH_ROOT` remain supported as deprecated single-root aliases. Run `codevigil config check` to see every resolved key with its source and any deprecation notices.
321
321
 
322
322
  A minimal `~/.config/codevigil/config.toml`:
323
323
 
324
324
  ```toml
325
325
  [watch]
326
+ roots = ["~/.claude/projects"]
326
327
  poll_interval = 1.0
327
328
 
328
329
  [collectors.read_edit_ratio]
@@ -349,7 +350,7 @@ Five user-facing collectors plus an always-on integrity gate:
349
350
 
350
351
  ## Persistent memory
351
352
 
352
- 0.3.0 adds a local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session writes a durable row session id, file id, cursor byte offset, collector state snapshot, and derived metric summary and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
353
+ 0.4.0 adds first-class multi-root support on top of the local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session now writes a root-aware identity (`session_key`, raw `session_id`, cursor byte offset, collector state snapshot, and derived metric summary), and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts, even when different roots contain the same `session_id`. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
353
354
 
354
355
  ## Cohort trend reports
355
356
 
@@ -366,7 +367,7 @@ Both new default collectors (`thinking`, `prompts`) surface in cohort reports as
366
367
 
367
368
  ## Task classifier `[experimental]`
368
369
 
369
- 0.2.0 adds a turn-level task classifier that labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
370
+ The experimental task classifier labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
370
371
 
371
372
  - **`history list`** — new `task_type` column and `--task-type <label>` filter
372
373
  - **`history heatmap --axis task_type`** — cross-tab metrics against task labels
@@ -5,7 +5,7 @@ Local, privacy-preserving observability for Claude Code sessions.
5
5
  codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a rich terminal dashboard or as JSON / markdown reports. **Zero network egress, no data ever leaves your machine.**
6
6
 
7
7
  [![Status](https://img.shields.io/badge/status-beta-blue.svg)](https://github.com/Mathews-Tom/codevigil)
8
- [![Version](https://img.shields.io/badge/version-0.3.0-informational.svg)](CHANGELOG.md)
8
+ [![Version](https://img.shields.io/badge/version-0.4.0-informational.svg)](CHANGELOG.md)
9
9
  [![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue.svg)](https://www.python.org/downloads/)
10
10
  [![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
11
11
  [![CI](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml/badge.svg)](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml)
@@ -35,7 +35,7 @@ codevigil ingest # one-shot cold-ingest into persistent memory (first run
35
35
  codevigil watch # project roll-up dashboard, resumes every file from its cached cursor
36
36
  ```
37
37
 
38
- `codevigil ingest` walks every JSONL under `watch.root`, parses it end-to-end, and writes a durable record (session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
38
+ `codevigil ingest` walks every JSONL under `watch.roots`, parses them end-to-end, and writes a durable record (root-aware session key, raw session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
39
39
 
40
40
  `codevigil watch` then prints a live **project-row** dashboard: one row per Claude Code project, with the fleet-worst severity, the active session count, and the aggregate metric summary. The top line shows fleet totals (session count, CRIT/WARN/OK tallies, project count, last-updated wall-clock tick). Every session's rolling-window collector state is restored from the store so restart does not erase your percentile baselines.
41
41
 
@@ -91,12 +91,13 @@ Full flag reference for every subcommand: [docs/cli.md](docs/cli.md).
91
91
 
92
92
  ## Configuration
93
93
 
94
- codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. Run `codevigil config check` to see every resolved key with its source.
94
+ codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. `watch.roots` is the canonical multi-root setting; `watch.root` and `CODEVIGIL_WATCH_ROOT` remain supported as deprecated single-root aliases. Run `codevigil config check` to see every resolved key with its source and any deprecation notices.
95
95
 
96
96
  A minimal `~/.config/codevigil/config.toml`:
97
97
 
98
98
  ```toml
99
99
  [watch]
100
+ roots = ["~/.claude/projects"]
100
101
  poll_interval = 1.0
101
102
 
102
103
  [collectors.read_edit_ratio]
@@ -123,7 +124,7 @@ Five user-facing collectors plus an always-on integrity gate:
123
124
 
124
125
  ## Persistent memory
125
126
 
126
- 0.3.0 adds a local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session writes a durable row session id, file id, cursor byte offset, collector state snapshot, and derived metric summary and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
127
+ 0.4.0 adds first-class multi-root support on top of the local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session now writes a root-aware identity (`session_key`, raw `session_id`, cursor byte offset, collector state snapshot, and derived metric summary), and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts, even when different roots contain the same `session_id`. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
127
128
 
128
129
  ## Cohort trend reports
129
130
 
@@ -140,7 +141,7 @@ Both new default collectors (`thinking`, `prompts`) surface in cohort reports as
140
141
 
141
142
  ## Task classifier `[experimental]`
142
143
 
143
- 0.2.0 adds a turn-level task classifier that labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
144
+ The experimental task classifier labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
144
145
 
145
146
  - **`history list`** — new `task_type` column and `--task-type <label>` filter
146
147
  - **`history heatmap --axis task_type`** — cross-tab metrics against task labels
@@ -14,6 +14,6 @@ from codevigil.privacy import install as _install_privacy_hook
14
14
 
15
15
  _install_privacy_hook()
16
16
 
17
- __version__: str = "0.3.0"
17
+ __version__: str = "0.4.0"
18
18
 
19
19
  __all__ = ["PrivacyViolationError", "__version__"]
@@ -93,6 +93,7 @@ from codevigil.types import (
93
93
  SessionState,
94
94
  Severity,
95
95
  )
96
+ from codevigil.watch_roots import LEGACY_ROOT_ID, legacy_session_key
96
97
  from codevigil.watcher import Source, SourceEvent, SourceEventKind
97
98
 
98
99
  _PARSE_HEALTH_NAME: str = "parse_health"
@@ -123,6 +124,9 @@ class _SessionContext:
123
124
  """
124
125
 
125
126
  session_id: str
127
+ session_key: str
128
+ root_id: str
129
+ root_label: str
126
130
  file_path: Path
127
131
  project_hash: str
128
132
  parser: SessionParser
@@ -158,6 +162,69 @@ class _SessionContext:
158
162
  _ClockFn = Callable[[], float]
159
163
 
160
164
 
165
+ class _SessionView:
166
+ """Compatibility view over the internal session-keyed context map."""
167
+
168
+ def __init__(self, sessions: dict[str, _SessionContext]) -> None:
169
+ self._sessions = sessions
170
+
171
+ def _resolve_key(self, key: str) -> str | None:
172
+ if key in self._sessions:
173
+ return key
174
+ matches = [
175
+ session_key for session_key, ctx in self._sessions.items() if ctx.session_id == key
176
+ ]
177
+ if len(matches) == 1:
178
+ return matches[0]
179
+ return None
180
+
181
+ def _get_resolved(self, key: str) -> _SessionContext | None:
182
+ resolved = self._resolve_key(key)
183
+ if resolved is None:
184
+ return None
185
+ return self._sessions[resolved]
186
+
187
+ def __getitem__(self, key: str) -> _SessionContext:
188
+ ctx = self._get_resolved(key)
189
+ if ctx is None:
190
+ raise KeyError(key)
191
+ return ctx
192
+
193
+ def get(self, key: str, default: Any = None) -> _SessionContext | Any:
194
+ ctx = self._get_resolved(key)
195
+ if ctx is None:
196
+ return default
197
+ return ctx
198
+
199
+ def pop(self, key: str, default: Any = None) -> _SessionContext | Any:
200
+ resolved = self._resolve_key(key)
201
+ if resolved is None:
202
+ return default
203
+ return self._sessions.pop(resolved, default)
204
+
205
+ def __contains__(self, key: object) -> bool:
206
+ return isinstance(key, str) and self._resolve_key(key) is not None
207
+
208
+ def values(self) -> Any:
209
+ return self._sessions.values()
210
+
211
+ def items(self) -> Any:
212
+ return self._sessions.items()
213
+
214
+ def __iter__(self) -> Any:
215
+ return iter(self._sessions)
216
+
217
+ def __len__(self) -> int:
218
+ return len(self._sessions)
219
+
220
+ def __eq__(self, other: object) -> bool:
221
+ if isinstance(other, dict):
222
+ return self._sessions == other
223
+ if isinstance(other, _SessionView):
224
+ return self._sessions == other._sessions
225
+ return NotImplemented
226
+
227
+
161
228
  class SessionAggregator:
162
229
  """Drive a :class:`Source` through parser, collectors, and lifecycle.
163
230
 
@@ -192,7 +259,7 @@ class SessionAggregator:
192
259
  self._sessions: dict[str, _SessionContext] = {}
193
260
  self._bootstrap: BootstrapManager | None = bootstrap
194
261
  # Phase C5: collector-state restore provider. Given a session
195
- # ID, returns a ``{collector_name: state_dict}`` mapping from
262
+ # key, returns a ``{collector_name: state_dict}`` mapping from
196
263
  # the processed-session store, or ``None`` when no persisted
197
264
  # state exists for that session. Called once from
198
265
  # ``_ensure_session`` immediately after fresh collectors are
@@ -233,10 +300,10 @@ class SessionAggregator:
233
300
  # --------------------------------------------------------------- properties
234
301
 
235
302
  @property
236
- def sessions(self) -> dict[str, _SessionContext]:
303
+ def sessions(self) -> _SessionView:
237
304
  """Read-only-ish accessor used by tests; do not mutate externally."""
238
305
 
239
- return self._sessions
306
+ return _SessionView(self._sessions)
240
307
 
241
308
  @property
242
309
  def eviction_churn(self) -> int:
@@ -369,17 +436,18 @@ class SessionAggregator:
369
436
  # the aggregator just keeps consuming.
370
437
  return
371
438
  if kind is SourceEventKind.DELETE:
372
- self._evict_session(source_event.session_id)
439
+ self._evict_session(self._event_session_key(source_event))
373
440
  return
374
441
 
375
442
  def _ensure_session(self, source_event: SourceEvent) -> _SessionContext:
376
443
  sid = source_event.session_id
377
- existing = self._sessions.get(sid)
444
+ session_key = self._event_session_key(source_event)
445
+ existing = self._sessions.get(session_key)
378
446
  if existing is not None:
379
447
  return existing
380
448
  parser = SessionParser(session_id=sid)
381
449
  collectors = self._instantiate_collectors(parser)
382
- self._maybe_restore_collector_state(sid, collectors)
450
+ self._maybe_restore_collector_state(session_key, collectors)
383
451
  now_clock = self._clock()
384
452
  now_wall = source_event.timestamp
385
453
  project_hash = self._extract_project_hash(source_event.path, session_id=sid)
@@ -393,6 +461,9 @@ class SessionAggregator:
393
461
  last_monotonic = now_clock - age_seconds
394
462
  ctx = _SessionContext(
395
463
  session_id=sid,
464
+ session_key=session_key,
465
+ root_id=source_event.root_id or LEGACY_ROOT_ID,
466
+ root_label=source_event.root_label or str(source_event.path.parent),
396
467
  file_path=source_event.path,
397
468
  project_hash=project_hash,
398
469
  parser=parser,
@@ -401,19 +472,19 @@ class SessionAggregator:
401
472
  last_event_time=now_wall,
402
473
  last_monotonic=last_monotonic,
403
474
  )
404
- self._sessions[sid] = ctx
475
+ self._sessions[session_key] = ctx
405
476
  return ctx
406
477
 
407
478
  def _maybe_restore_collector_state(
408
479
  self,
409
- session_id: str,
480
+ session_key: str,
410
481
  collectors: dict[str, Collector],
411
482
  ) -> None:
412
483
  """Hydrate collectors from persistent state when one is available.
413
484
 
414
485
  When the caller configured ``collector_state_provider`` (Phase
415
486
  C5 watch path), we consult the processed-session store keyed by
416
- ``session_id``. A hit returns ``{collector_name: state_dict}``;
487
+ ``session_key``. A hit returns ``{collector_name: state_dict}``;
417
488
  each collector that implements ``restore_state`` then hydrates
418
489
  from its slice. Collectors that do not implement
419
490
  ``restore_state`` (or that lack a matching key in the stored
@@ -422,7 +493,7 @@ class SessionAggregator:
422
493
 
423
494
  if self._collector_state_provider is None:
424
495
  return
425
- state = self._collector_state_provider(session_id)
496
+ state = self._collector_state_provider(session_key)
426
497
  if not state:
427
498
  return
428
499
  for name, collector in collectors.items():
@@ -459,6 +530,12 @@ class SessionAggregator:
459
530
  continue
460
531
  return out
461
532
 
533
+ @staticmethod
534
+ def _event_session_key(source_event: SourceEvent) -> str:
535
+ if source_event.session_key:
536
+ return source_event.session_key
537
+ return legacy_session_key(source_event.session_id)
538
+
462
539
  def _extract_project_hash(self, path: Path, *, session_id: str) -> str:
463
540
  """Pull the project-hash directory from the canonical path layout.
464
541
 
@@ -731,7 +808,7 @@ class SessionAggregator:
731
808
  payload[collector_name] = snap
732
809
  if not payload:
733
810
  return
734
- bootstrap.observe_session(ctx.session_id, payload)
811
+ bootstrap.observe_session(ctx.session_key, payload)
735
812
  if bootstrap.finalize_if_ready():
736
813
  record(
737
814
  CodevigilError(
@@ -773,6 +850,9 @@ class SessionAggregator:
773
850
  state=ctx.state,
774
851
  snapshot_history=history,
775
852
  session_task_type=current_task_type,
853
+ session_key=ctx.session_key,
854
+ root_id=ctx.root_id,
855
+ root_label=ctx.root_label,
776
856
  )
777
857
 
778
858
  # ----------------------------------------------------------------- lifecycle
@@ -792,12 +872,12 @@ class SessionAggregator:
792
872
 
793
873
  def _evict_session(
794
874
  self,
795
- session_id: str,
875
+ session_key: str,
796
876
  *,
797
877
  reason: str = "source_delete",
798
878
  silence_seconds: float | None = None,
799
879
  ) -> None:
800
- ctx = self._sessions.pop(session_id, None)
880
+ ctx = self._sessions.pop(session_key, None)
801
881
  if ctx is None:
802
882
  return
803
883
  ctx.state = SessionState.EVICTED
@@ -806,7 +886,8 @@ class SessionAggregator:
806
886
  # watcher or a chatty editor that rolls files every few minutes
807
887
  # shows up as an elevated eviction rate in the INFO log.
808
888
  context: dict[str, Any] = {
809
- "session_id": session_id,
889
+ "session_id": ctx.session_id,
890
+ "session_key": ctx.session_key,
810
891
  "reason": reason,
811
892
  "event_count": ctx.event_count,
812
893
  "remaining_sessions": len(self._sessions),
@@ -819,7 +900,8 @@ class SessionAggregator:
819
900
  source=ErrorSource.AGGREGATOR,
820
901
  code="aggregator.session_evicted",
821
902
  message=(
822
- f"session {session_id!r} evicted ({reason}); {ctx.event_count} events processed"
903
+ f"session {ctx.session_key!r} evicted ({reason}); "
904
+ f"{ctx.event_count} events processed"
823
905
  ),
824
906
  context=context,
825
907
  )
@@ -859,6 +941,9 @@ class SessionAggregator:
859
941
  try:
860
942
  report = build_report(
861
943
  session_id=ctx.session_id,
944
+ session_key=ctx.session_key,
945
+ root_id=ctx.root_id,
946
+ root_label=ctx.root_label,
862
947
  project_hash=ctx.project_hash,
863
948
  project_name=self._project_registry.resolve(ctx.project_hash),
864
949
  model=None, # Phase 5 wires model from session metadata