codevigil 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codevigil/config.py ADDED
@@ -0,0 +1,776 @@
1
+ """Config resolution: TOML loader with layered precedence and fail-loud validation.
2
+
3
+ Precedence, lowest to highest:
4
+
5
+ 1. Built-in defaults (``CONFIG_DEFAULTS`` below).
6
+ 2. Config file (``~/.config/codevigil/config.toml`` or ``--config <path>``).
7
+ 3. Environment variables (``CODEVIGIL_*``).
8
+ 4. CLI flags.
9
+
10
+ Every resolved value carries a provenance string so ``codevigil config check``
11
+ can show where each value came from and users can audit precedence conflicts.
12
+
13
+ Validation is strict: unknown keys, unknown collector / renderer names, wrong
14
+ types, and out-of-range values all abort startup with a descriptive error
15
+ that names the offending key, source, and expected type or range.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import tomllib
22
+ from dataclasses import dataclass, field
23
+ from pathlib import Path
24
+ from typing import Any
25
+
26
+ from codevigil.errors import CodevigilError, ErrorLevel, ErrorSource
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Defaults
30
+ # ---------------------------------------------------------------------------
31
+
32
+ CONFIG_DEFAULTS: dict[str, Any] = {
33
+ "watch": {
34
+ "root": "~/.claude/projects",
35
+ "poll_interval": 2.0,
36
+ "max_files": 2000,
37
+ "large_file_warn_bytes": 10 * 1024 * 1024,
38
+ "stale_after_seconds": 300,
39
+ "evict_after_seconds": 2100,
40
+ "tick_interval": 1.0,
41
+ },
42
+ "collectors": {
43
+ "enabled": ["read_edit_ratio", "stop_phrase", "reasoning_loop"],
44
+ "parse_health": {
45
+ # parse_health is a built-in always-on integrity collector.
46
+ # The validator refuses any layer that flips this flag to
47
+ # false — see ``_validate_parse_health_undisableable``.
48
+ "enabled": True,
49
+ },
50
+ "read_edit_ratio": {
51
+ "window_size": 50,
52
+ "warn_threshold": 4.0,
53
+ "critical_threshold": 2.0,
54
+ "blind_edit_window": 20,
55
+ "blind_edit_confidence_floor": 0.95,
56
+ "experimental": True,
57
+ },
58
+ "stop_phrase": {
59
+ "custom_phrases": [],
60
+ "warn_threshold": 1.0,
61
+ "critical_threshold": 3.0,
62
+ "experimental": True,
63
+ },
64
+ "reasoning_loop": {
65
+ "warn_threshold": 10.0,
66
+ "critical_threshold": 20.0,
67
+ "experimental": True,
68
+ },
69
+ },
70
+ "renderers": {
71
+ "enabled": ["terminal"],
72
+ },
73
+ "report": {
74
+ "output_format": "json",
75
+ "output_dir": "~/.local/share/codevigil/reports",
76
+ },
77
+ "logging": {
78
+ "log_path": "~/.local/state/codevigil/codevigil.log",
79
+ },
80
+ "bootstrap": {
81
+ "sessions": 10,
82
+ "state_path": "~/.local/state/codevigil/bootstrap.json",
83
+ },
84
+ }
85
+
86
+ # Known collector and renderer names. These are hardcoded for Phase 2 because
87
+ # the runtime registries are empty until their phases land. Later phases may
88
+ # replace this with a registry-backed lookup, but the validator always needs
89
+ # *some* source of truth so typos in the enabled list abort at load time.
90
+ _KNOWN_COLLECTORS: frozenset[str] = frozenset(
91
+ {"parse_health", "read_edit_ratio", "stop_phrase", "reasoning_loop"}
92
+ )
93
+ _KNOWN_RENDERERS: frozenset[str] = frozenset({"terminal", "json_file"})
94
+
95
+ _VALID_OUTPUT_FORMATS: frozenset[str] = frozenset({"json", "markdown"})
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Environment variable bindings
100
+ # ---------------------------------------------------------------------------
101
+
102
+ # Mapping from CODEVIGIL_* env var names to dotted config paths. Only the
103
+ # keys in this mapping can be overridden via the environment; every other
104
+ # key must be set in the TOML file or on the CLI. This keeps the env surface
105
+ # small and auditable.
106
+ _ENV_BINDINGS: dict[str, tuple[str, ...]] = {
107
+ "CODEVIGIL_LOG_PATH": ("logging", "log_path"),
108
+ "CODEVIGIL_WATCH_ROOT": ("watch", "root"),
109
+ "CODEVIGIL_WATCH_POLL_INTERVAL": ("watch", "poll_interval"),
110
+ "CODEVIGIL_WATCH_TICK_INTERVAL": ("watch", "tick_interval"),
111
+ "CODEVIGIL_REPORT_OUTPUT_DIR": ("report", "output_dir"),
112
+ "CODEVIGIL_REPORT_OUTPUT_FORMAT": ("report", "output_format"),
113
+ "CODEVIGIL_BOOTSTRAP_SESSIONS": ("bootstrap", "sessions"),
114
+ }
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # Errors and resolved value containers
118
+ # ---------------------------------------------------------------------------
119
+
120
+
121
+ class ConfigError(CodevigilError):
122
+ """Raised when the config layer cannot resolve or validate a value."""
123
+
124
+ def __init__(self, *, code: str, message: str, context: dict[str, Any] | None = None) -> None:
125
+ super().__init__(
126
+ level=ErrorLevel.CRITICAL,
127
+ source=ErrorSource.CONFIG,
128
+ code=code,
129
+ message=message,
130
+ context=context or {},
131
+ )
132
+
133
+
134
+ @dataclass(frozen=True, slots=True)
135
+ class ResolvedValue:
136
+ """A single config value paired with its provenance string."""
137
+
138
+ value: Any
139
+ source: str # "default" | "file:<path>" | "env:CODEVIGIL_*" | "cli:--flag"
140
+
141
+
142
+ @dataclass(frozen=True, slots=True)
143
+ class ResolvedConfig:
144
+ """Fully resolved config with per-key provenance.
145
+
146
+ ``values`` holds the effective config as a nested dict matching
147
+ ``CONFIG_DEFAULTS``. ``sources`` maps dotted paths (``"watch.root"``) to
148
+ the provenance string for that value. Only leaf values are tracked —
149
+ intermediate dict nodes have no source.
150
+ """
151
+
152
+ values: dict[str, Any]
153
+ sources: dict[str, str] = field(default_factory=dict)
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # Public API
158
+ # ---------------------------------------------------------------------------
159
+
160
+
161
+ def load_config(
162
+ *,
163
+ config_path: Path | None = None,
164
+ env: dict[str, str] | None = None,
165
+ cli_overrides: dict[str, Any] | None = None,
166
+ ) -> ResolvedConfig:
167
+ """Resolve the effective config from defaults → file → env → CLI.
168
+
169
+ Parameters:
170
+ config_path: Optional path to a TOML config file. If ``None``, the
171
+ default ``~/.config/codevigil/config.toml`` is tried; a missing
172
+ default file is not an error. An explicitly-passed path that
173
+ does not exist *is* an error.
174
+ env: Environment mapping to read ``CODEVIGIL_*`` bindings from.
175
+ Defaults to ``os.environ``.
176
+ cli_overrides: Dotted-path → value mapping from parsed CLI flags.
177
+
178
+ Returns:
179
+ ``ResolvedConfig`` with every leaf value annotated with its source.
180
+
181
+ Raises:
182
+ ConfigError: on unknown keys, wrong types, out-of-range values,
183
+ unknown collector / renderer names, or file load errors.
184
+ """
185
+
186
+ environment = dict(os.environ) if env is None else dict(env)
187
+ overrides = dict(cli_overrides) if cli_overrides is not None else {}
188
+
189
+ values: dict[str, Any] = _deep_copy_defaults()
190
+ sources: dict[str, str] = _flatten_sources(values, source="default")
191
+
192
+ file_values, file_path_used = _load_file_layer(config_path)
193
+ if file_values is not None:
194
+ _validate_layer_shape(file_values, source=f"file:{file_path_used}")
195
+ _apply_layer(
196
+ values,
197
+ file_values,
198
+ sources,
199
+ source_label=f"file:{file_path_used}",
200
+ )
201
+
202
+ env_values = _collect_env_overrides(environment)
203
+ for dotted, (raw_value, env_name) in env_values.items():
204
+ coerced = _coerce_scalar(dotted, raw_value, source=f"env:{env_name}")
205
+ _assign_dotted(values, dotted, coerced)
206
+ sources[dotted] = f"env:{env_name}"
207
+
208
+ for dotted, raw_value in overrides.items():
209
+ _check_known_path(dotted, source="cli")
210
+ coerced = _coerce_scalar(dotted, raw_value, source=f"cli:--{dotted}")
211
+ _assign_dotted(values, dotted, coerced)
212
+ sources[dotted] = f"cli:--{dotted}"
213
+
214
+ _validate_resolved(values)
215
+ return ResolvedConfig(values=values, sources=sources)
216
+
217
+
218
+ def render_config_check(resolved: ResolvedConfig) -> str:
219
+ """Format a resolved config for the ``codevigil config check`` command."""
220
+
221
+ lines: list[str] = ["codevigil config check"]
222
+ for dotted in sorted(resolved.sources):
223
+ value = _read_dotted(resolved.values, dotted)
224
+ source = resolved.sources[dotted]
225
+ lines.append(f" {dotted} = {_format_value(value)} ({source})")
226
+ return "\n".join(lines) + "\n"
227
+
228
+
229
+ # ---------------------------------------------------------------------------
230
+ # Layer helpers
231
+ # ---------------------------------------------------------------------------
232
+
233
+
234
+ def _deep_copy_defaults() -> dict[str, Any]:
235
+ copy = _deep_copy(CONFIG_DEFAULTS)
236
+ assert isinstance(copy, dict)
237
+ return copy
238
+
239
+
240
+ def _deep_copy(value: Any) -> Any:
241
+ if isinstance(value, dict):
242
+ return {k: _deep_copy(v) for k, v in value.items()}
243
+ if isinstance(value, list):
244
+ return [_deep_copy(v) for v in value]
245
+ return value
246
+
247
+
248
+ def _flatten_sources(values: dict[str, Any], *, source: str) -> dict[str, str]:
249
+ out: dict[str, str] = {}
250
+ _walk_leaves(values, prefix=(), accumulator=out, source=source)
251
+ return out
252
+
253
+
254
+ def _walk_leaves(
255
+ values: dict[str, Any],
256
+ *,
257
+ prefix: tuple[str, ...],
258
+ accumulator: dict[str, str],
259
+ source: str,
260
+ ) -> None:
261
+ for key, value in values.items():
262
+ path = (*prefix, key)
263
+ if isinstance(value, dict):
264
+ _walk_leaves(value, prefix=path, accumulator=accumulator, source=source)
265
+ else:
266
+ accumulator[".".join(path)] = source
267
+
268
+
269
+ def _load_file_layer(config_path: Path | None) -> tuple[dict[str, Any] | None, Path | None]:
270
+ if config_path is None:
271
+ default_path = Path("~/.config/codevigil/config.toml").expanduser()
272
+ if not default_path.exists():
273
+ return None, None
274
+ return _read_toml(default_path), default_path
275
+ expanded = config_path.expanduser()
276
+ if not expanded.exists():
277
+ raise ConfigError(
278
+ code="config.file_not_found",
279
+ message=f"config file does not exist: {expanded}",
280
+ context={"path": str(expanded)},
281
+ )
282
+ return _read_toml(expanded), expanded
283
+
284
+
285
+ def _read_toml(path: Path) -> dict[str, Any]:
286
+ try:
287
+ with path.open("rb") as handle:
288
+ return tomllib.load(handle)
289
+ except tomllib.TOMLDecodeError as exc:
290
+ raise ConfigError(
291
+ code="config.toml_parse_error",
292
+ message=f"failed to parse {path}: {exc}",
293
+ context={"path": str(path)},
294
+ ) from exc
295
+
296
+
297
+ def _apply_layer(
298
+ base: dict[str, Any],
299
+ overlay: dict[str, Any],
300
+ sources: dict[str, str],
301
+ *,
302
+ source_label: str,
303
+ prefix: tuple[str, ...] = (),
304
+ ) -> None:
305
+ for key, value in overlay.items():
306
+ path = (*prefix, key)
307
+ dotted = ".".join(path)
308
+ default_slot = _read_dotted_optional(CONFIG_DEFAULTS, dotted)
309
+ if default_slot is _MISSING:
310
+ raise ConfigError(
311
+ code="config.unknown_key",
312
+ message=f"unknown config key {dotted!r} in {source_label}",
313
+ context={"key": dotted, "source": source_label},
314
+ )
315
+ if isinstance(default_slot, dict):
316
+ if not isinstance(value, dict):
317
+ raise ConfigError(
318
+ code="config.type_mismatch",
319
+ message=(
320
+ f"config key {dotted!r} expected a table, got "
321
+ f"{type(value).__name__} in {source_label}"
322
+ ),
323
+ context={
324
+ "key": dotted,
325
+ "expected": "table",
326
+ "actual": type(value).__name__,
327
+ "source": source_label,
328
+ },
329
+ )
330
+ _apply_layer(
331
+ base,
332
+ value,
333
+ sources,
334
+ source_label=source_label,
335
+ prefix=path,
336
+ )
337
+ continue
338
+ coerced = _coerce_against_default(dotted, value, default_slot, source=source_label)
339
+ _assign_dotted(base, dotted, coerced)
340
+ sources[dotted] = source_label
341
+
342
+
343
+ def _collect_env_overrides(environment: dict[str, str]) -> dict[str, tuple[str, str]]:
344
+ out: dict[str, tuple[str, str]] = {}
345
+ for env_name, path in _ENV_BINDINGS.items():
346
+ raw = environment.get(env_name)
347
+ if raw is None:
348
+ continue
349
+ dotted = ".".join(path)
350
+ out[dotted] = (raw, env_name)
351
+ return out
352
+
353
+
354
+ # ---------------------------------------------------------------------------
355
+ # Coercion and validation
356
+ # ---------------------------------------------------------------------------
357
+
358
+ _MISSING: object = object()
359
+
360
+
361
+ def _read_dotted(root: dict[str, Any], dotted: str) -> Any:
362
+ node: Any = root
363
+ for part in dotted.split("."):
364
+ node = node[part]
365
+ return node
366
+
367
+
368
+ def _read_dotted_optional(root: dict[str, Any], dotted: str) -> Any:
369
+ node: Any = root
370
+ for part in dotted.split("."):
371
+ if not isinstance(node, dict) or part not in node:
372
+ return _MISSING
373
+ node = node[part]
374
+ return node
375
+
376
+
377
+ def _assign_dotted(root: dict[str, Any], dotted: str, value: Any) -> None:
378
+ parts = dotted.split(".")
379
+ node: dict[str, Any] = root
380
+ for part in parts[:-1]:
381
+ next_node = node.get(part)
382
+ if not isinstance(next_node, dict):
383
+ next_node = {}
384
+ node[part] = next_node
385
+ node = next_node
386
+ node[parts[-1]] = value
387
+
388
+
389
+ def _validate_layer_shape(layer: dict[str, Any], *, source: str) -> None:
390
+ if not isinstance(layer, dict): # pragma: no cover - defensive
391
+ raise ConfigError(
392
+ code="config.bad_layer_shape",
393
+ message=f"{source} did not produce a table",
394
+ )
395
+
396
+
397
+ def _check_known_path(dotted: str, *, source: str) -> None:
398
+ if _read_dotted_optional(CONFIG_DEFAULTS, dotted) is _MISSING:
399
+ raise ConfigError(
400
+ code="config.unknown_key",
401
+ message=f"unknown config key {dotted!r} from {source}",
402
+ context={"key": dotted, "source": source},
403
+ )
404
+
405
+
406
+ def _coerce_against_default(dotted: str, value: Any, default: Any, *, source: str) -> Any:
407
+ expected_type = type(default)
408
+ if isinstance(default, bool):
409
+ if not isinstance(value, bool):
410
+ raise ConfigError(
411
+ code="config.type_mismatch",
412
+ message=(
413
+ f"config key {dotted!r} expected bool, got {type(value).__name__} in {source}"
414
+ ),
415
+ context={"key": dotted, "expected": "bool", "source": source},
416
+ )
417
+ return value
418
+ if isinstance(default, int) and not isinstance(default, bool):
419
+ if isinstance(value, bool) or not isinstance(value, int):
420
+ raise ConfigError(
421
+ code="config.type_mismatch",
422
+ message=(
423
+ f"config key {dotted!r} expected int, got {type(value).__name__} in {source}"
424
+ ),
425
+ context={"key": dotted, "expected": "int", "source": source},
426
+ )
427
+ return value
428
+ if isinstance(default, float):
429
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
430
+ raise ConfigError(
431
+ code="config.type_mismatch",
432
+ message=(
433
+ f"config key {dotted!r} expected float, got {type(value).__name__} in {source}"
434
+ ),
435
+ context={"key": dotted, "expected": "float", "source": source},
436
+ )
437
+ return float(value)
438
+ if isinstance(default, str):
439
+ if not isinstance(value, str):
440
+ raise ConfigError(
441
+ code="config.type_mismatch",
442
+ message=(
443
+ f"config key {dotted!r} expected str, got {type(value).__name__} in {source}"
444
+ ),
445
+ context={"key": dotted, "expected": "str", "source": source},
446
+ )
447
+ return value
448
+ if isinstance(default, list):
449
+ if not isinstance(value, list):
450
+ raise ConfigError(
451
+ code="config.type_mismatch",
452
+ message=(
453
+ f"config key {dotted!r} expected list, got {type(value).__name__} in {source}"
454
+ ),
455
+ context={"key": dotted, "expected": "list", "source": source},
456
+ )
457
+ # ``stop_phrase.custom_phrases`` accepts a mixed list of plain
458
+ # strings and table entries with ``text``/``mode``/``category``/
459
+ # ``intent`` keys. Every other list-valued config key is still
460
+ # the strict ``list[str]`` form.
461
+ if dotted == "collectors.stop_phrase.custom_phrases":
462
+ return _coerce_custom_phrase_list(dotted, value, source=source)
463
+ for item in value:
464
+ if not isinstance(item, str):
465
+ raise ConfigError(
466
+ code="config.type_mismatch",
467
+ message=(
468
+ f"config key {dotted!r} list item expected str, got "
469
+ f"{type(item).__name__} in {source}"
470
+ ),
471
+ context={
472
+ "key": dotted,
473
+ "expected": "list[str]",
474
+ "source": source,
475
+ },
476
+ )
477
+ return list(value)
478
+ raise ConfigError( # pragma: no cover - defensive for unknown default types
479
+ code="config.unsupported_default_type",
480
+ message=f"config key {dotted!r} has unsupported default type {expected_type.__name__}",
481
+ context={"key": dotted, "type": expected_type.__name__},
482
+ )
483
+
484
+
485
+ _CUSTOM_PHRASE_FIELDS: frozenset[str] = frozenset({"text", "mode", "category", "intent"})
486
+ _CUSTOM_PHRASE_MODES: frozenset[str] = frozenset({"word", "regex", "substring"})
487
+
488
+
489
+ def _coerce_custom_phrase_list(dotted: str, value: list[Any], *, source: str) -> list[Any]:
490
+ """Validate the mixed string/table form of ``stop_phrase.custom_phrases``."""
491
+
492
+ out: list[Any] = []
493
+ for item in value:
494
+ if isinstance(item, str):
495
+ out.append(item)
496
+ continue
497
+ if not isinstance(item, dict):
498
+ raise ConfigError(
499
+ code="config.type_mismatch",
500
+ message=(
501
+ f"config key {dotted!r} list item expected str or table, got "
502
+ f"{type(item).__name__} in {source}"
503
+ ),
504
+ context={"key": dotted, "source": source},
505
+ )
506
+ unknown = set(item.keys()) - _CUSTOM_PHRASE_FIELDS
507
+ if unknown:
508
+ raise ConfigError(
509
+ code="config.unknown_key",
510
+ message=(
511
+ f"config key {dotted!r} table entry has unknown field(s) "
512
+ f"{sorted(unknown)!r} in {source}"
513
+ ),
514
+ context={"key": dotted, "unknown": sorted(unknown), "source": source},
515
+ )
516
+ text = item.get("text")
517
+ if not isinstance(text, str) or not text:
518
+ raise ConfigError(
519
+ code="config.type_mismatch",
520
+ message=(
521
+ f"config key {dotted!r} table entry requires a non-empty "
522
+ f"'text' field in {source}"
523
+ ),
524
+ context={"key": dotted, "source": source},
525
+ )
526
+ mode = item.get("mode", "word")
527
+ if mode not in _CUSTOM_PHRASE_MODES:
528
+ raise ConfigError(
529
+ code="config.out_of_range",
530
+ message=(
531
+ f"config key {dotted!r} table entry has invalid mode "
532
+ f"{mode!r}; expected one of {sorted(_CUSTOM_PHRASE_MODES)!r} in {source}"
533
+ ),
534
+ context={"key": dotted, "mode": mode, "source": source},
535
+ )
536
+ out.append(dict(item))
537
+ return out
538
+
539
+
540
+ def _coerce_scalar(dotted: str, raw: Any, *, source: str) -> Any:
541
+ default = _read_dotted_optional(CONFIG_DEFAULTS, dotted)
542
+ if default is _MISSING:
543
+ raise ConfigError(
544
+ code="config.unknown_key",
545
+ message=f"unknown config key {dotted!r} from {source}",
546
+ context={"key": dotted, "source": source},
547
+ )
548
+ if isinstance(default, dict):
549
+ raise ConfigError(
550
+ code="config.scalar_into_table",
551
+ message=f"config key {dotted!r} expects a table, not a scalar from {source}",
552
+ context={"key": dotted, "source": source},
553
+ )
554
+ if not isinstance(raw, str):
555
+ return _coerce_against_default(dotted, raw, default, source=source)
556
+ # Env / CLI raw values arrive as strings; parse them against the default
557
+ # type so CODEVIGIL_WATCH_POLL_INTERVAL="0.5" becomes float 0.5.
558
+ if isinstance(default, bool):
559
+ lowered = raw.strip().lower()
560
+ if lowered in {"true", "1", "yes", "on"}:
561
+ return True
562
+ if lowered in {"false", "0", "no", "off"}:
563
+ return False
564
+ raise ConfigError(
565
+ code="config.type_mismatch",
566
+ message=f"config key {dotted!r} expected bool, got {raw!r} in {source}",
567
+ context={"key": dotted, "raw": raw, "source": source},
568
+ )
569
+ if isinstance(default, int) and not isinstance(default, bool):
570
+ try:
571
+ return int(raw)
572
+ except ValueError as exc:
573
+ raise ConfigError(
574
+ code="config.type_mismatch",
575
+ message=f"config key {dotted!r} expected int, got {raw!r} in {source}",
576
+ context={"key": dotted, "raw": raw, "source": source},
577
+ ) from exc
578
+ if isinstance(default, float):
579
+ try:
580
+ return float(raw)
581
+ except ValueError as exc:
582
+ raise ConfigError(
583
+ code="config.type_mismatch",
584
+ message=f"config key {dotted!r} expected float, got {raw!r} in {source}",
585
+ context={"key": dotted, "raw": raw, "source": source},
586
+ ) from exc
587
+ if isinstance(default, list):
588
+ # Comma-separated env / CLI form: "a,b,c".
589
+ items = [part.strip() for part in raw.split(",") if part.strip()]
590
+ return items
591
+ return raw
592
+
593
+
594
+ def _validate_resolved(values: dict[str, Any]) -> None:
595
+ _validate_range(values, "watch.poll_interval", minimum=0.05, maximum=3600.0, kind="float")
596
+ _validate_range(values, "watch.tick_interval", minimum=0.05, maximum=3600.0, kind="float")
597
+ _validate_range(values, "watch.max_files", minimum=1, maximum=1_000_000, kind="int")
598
+ _validate_range(
599
+ values,
600
+ "watch.stale_after_seconds",
601
+ minimum=1,
602
+ maximum=86_400,
603
+ kind="int",
604
+ )
605
+ _validate_range(
606
+ values,
607
+ "watch.evict_after_seconds",
608
+ minimum=1,
609
+ maximum=86_400,
610
+ kind="int",
611
+ )
612
+ _validate_range(
613
+ values,
614
+ "watch.large_file_warn_bytes",
615
+ minimum=1024,
616
+ maximum=10**12,
617
+ kind="int",
618
+ )
619
+ _validate_range(
620
+ values,
621
+ "collectors.read_edit_ratio.window_size",
622
+ minimum=1,
623
+ maximum=100_000,
624
+ kind="int",
625
+ )
626
+ _validate_range(
627
+ values,
628
+ "collectors.read_edit_ratio.blind_edit_window",
629
+ minimum=1,
630
+ maximum=10_000,
631
+ kind="int",
632
+ )
633
+ _validate_range(
634
+ values,
635
+ "collectors.read_edit_ratio.blind_edit_confidence_floor",
636
+ minimum=0.0,
637
+ maximum=1.0,
638
+ kind="float",
639
+ )
640
+ _validate_range(
641
+ values,
642
+ "bootstrap.sessions",
643
+ minimum=1,
644
+ maximum=1_000,
645
+ kind="int",
646
+ )
647
+
648
+ _validate_stale_vs_evict(values)
649
+ _validate_enabled_names(
650
+ values,
651
+ "collectors.enabled",
652
+ known=_KNOWN_COLLECTORS,
653
+ kind="collector",
654
+ )
655
+ _validate_enabled_names(
656
+ values,
657
+ "renderers.enabled",
658
+ known=_KNOWN_RENDERERS,
659
+ kind="renderer",
660
+ )
661
+ _validate_output_format(values)
662
+ _validate_parse_health_undisableable(values)
663
+
664
+
665
+ def _validate_parse_health_undisableable(values: dict[str, Any]) -> None:
666
+ """Refuse any config layer that tries to disable ``parse_health``.
667
+
668
+ ``parse_health`` is the parser-drift integrity collector. Allowing it
669
+ to be disabled would let a user silence the only signal that catches
670
+ a silent Claude Code schema break, which defeats the design goal of
671
+ treating drift as a first-class observable.
672
+ """
673
+
674
+ enabled = _read_dotted_optional(values, "collectors.parse_health.enabled")
675
+ if enabled is _MISSING or enabled is True:
676
+ return
677
+ raise ConfigError(
678
+ code="config.parse_health_undisableable",
679
+ message=(
680
+ "collectors.parse_health.enabled cannot be set to false; "
681
+ "parse_health is a built-in always-on integrity collector"
682
+ ),
683
+ context={"key": "collectors.parse_health.enabled", "value": enabled},
684
+ )
685
+
686
+
687
+ def _validate_range(
688
+ values: dict[str, Any],
689
+ dotted: str,
690
+ *,
691
+ minimum: float,
692
+ maximum: float,
693
+ kind: str,
694
+ ) -> None:
695
+ value = _read_dotted(values, dotted)
696
+ if value < minimum or value > maximum:
697
+ raise ConfigError(
698
+ code="config.out_of_range",
699
+ message=(
700
+ f"config key {dotted!r} = {value!r} is out of range "
701
+ f"[{minimum}, {maximum}] for {kind}"
702
+ ),
703
+ context={
704
+ "key": dotted,
705
+ "value": value,
706
+ "min": minimum,
707
+ "max": maximum,
708
+ },
709
+ )
710
+
711
+
712
+ def _validate_stale_vs_evict(values: dict[str, Any]) -> None:
713
+ stale = _read_dotted(values, "watch.stale_after_seconds")
714
+ evict = _read_dotted(values, "watch.evict_after_seconds")
715
+ if evict <= stale:
716
+ raise ConfigError(
717
+ code="config.out_of_range",
718
+ message=(
719
+ f"watch.evict_after_seconds ({evict}) must be strictly greater "
720
+ f"than watch.stale_after_seconds ({stale})"
721
+ ),
722
+ context={"stale": stale, "evict": evict},
723
+ )
724
+
725
+
726
+ def _validate_enabled_names(
727
+ values: dict[str, Any],
728
+ dotted: str,
729
+ *,
730
+ known: frozenset[str],
731
+ kind: str,
732
+ ) -> None:
733
+ enabled: list[str] = _read_dotted(values, dotted)
734
+ unknown = [name for name in enabled if name not in known]
735
+ if unknown:
736
+ raise ConfigError(
737
+ code=f"config.unknown_{kind}",
738
+ message=(f"unknown {kind} name(s) in {dotted}: {unknown!r}; known: {sorted(known)!r}"),
739
+ context={"key": dotted, "unknown": unknown, "known": sorted(known)},
740
+ )
741
+ if len(enabled) != len(set(enabled)):
742
+ raise ConfigError(
743
+ code=f"config.duplicate_{kind}",
744
+ message=f"duplicate {kind} name(s) in {dotted}: {enabled!r}",
745
+ context={"key": dotted, "enabled": enabled},
746
+ )
747
+
748
+
749
+ def _validate_output_format(values: dict[str, Any]) -> None:
750
+ fmt = _read_dotted(values, "report.output_format")
751
+ if fmt not in _VALID_OUTPUT_FORMATS:
752
+ raise ConfigError(
753
+ code="config.invalid_output_format",
754
+ message=(
755
+ f"report.output_format = {fmt!r} is not one of {sorted(_VALID_OUTPUT_FORMATS)!r}"
756
+ ),
757
+ context={"value": fmt, "valid": sorted(_VALID_OUTPUT_FORMATS)},
758
+ )
759
+
760
+
761
+ def _format_value(value: Any) -> str:
762
+ if isinstance(value, str):
763
+ return repr(value)
764
+ if isinstance(value, list):
765
+ return "[" + ", ".join(_format_value(v) for v in value) + "]"
766
+ return repr(value)
767
+
768
+
769
+ __all__ = [
770
+ "CONFIG_DEFAULTS",
771
+ "ConfigError",
772
+ "ResolvedConfig",
773
+ "ResolvedValue",
774
+ "load_config",
775
+ "render_config_check",
776
+ ]