codevigil 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codevigil/__init__.py +19 -0
- codevigil/__main__.py +10 -0
- codevigil/aggregator.py +506 -0
- codevigil/bootstrap.py +284 -0
- codevigil/cli.py +732 -0
- codevigil/collectors/__init__.py +24 -0
- codevigil/collectors/_text_match.py +271 -0
- codevigil/collectors/parse_health.py +94 -0
- codevigil/collectors/read_edit_ratio.py +258 -0
- codevigil/collectors/reasoning_loop.py +167 -0
- codevigil/collectors/stop_phrase.py +266 -0
- codevigil/config.py +776 -0
- codevigil/errors.py +211 -0
- codevigil/parser.py +673 -0
- codevigil/privacy.py +191 -0
- codevigil/projects.py +132 -0
- codevigil/registry.py +121 -0
- codevigil/renderers/__init__.py +20 -0
- codevigil/renderers/json_file.py +105 -0
- codevigil/renderers/terminal.py +236 -0
- codevigil/types.py +189 -0
- codevigil/watcher.py +456 -0
- codevigil-0.1.0.dist-info/METADATA +351 -0
- codevigil-0.1.0.dist-info/RECORD +27 -0
- codevigil-0.1.0.dist-info/WHEEL +4 -0
- codevigil-0.1.0.dist-info/entry_points.txt +2 -0
- codevigil-0.1.0.dist-info/licenses/LICENSE +201 -0
codevigil/config.py
ADDED
|
@@ -0,0 +1,776 @@
|
|
|
1
|
+
"""Config resolution: TOML loader with layered precedence and fail-loud validation.
|
|
2
|
+
|
|
3
|
+
Precedence, lowest to highest:
|
|
4
|
+
|
|
5
|
+
1. Built-in defaults (``CONFIG_DEFAULTS`` below).
|
|
6
|
+
2. Config file (``~/.config/codevigil/config.toml`` or ``--config <path>``).
|
|
7
|
+
3. Environment variables (``CODEVIGIL_*``).
|
|
8
|
+
4. CLI flags.
|
|
9
|
+
|
|
10
|
+
Every resolved value carries a provenance string so ``codevigil config check``
|
|
11
|
+
can show where each value came from and users can audit precedence conflicts.
|
|
12
|
+
|
|
13
|
+
Validation is strict: unknown keys, unknown collector / renderer names, wrong
|
|
14
|
+
types, and out-of-range values all abort startup with a descriptive error
|
|
15
|
+
that names the offending key, source, and expected type or range.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import tomllib
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from codevigil.errors import CodevigilError, ErrorLevel, ErrorSource
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Defaults
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
CONFIG_DEFAULTS: dict[str, Any] = {
|
|
33
|
+
"watch": {
|
|
34
|
+
"root": "~/.claude/projects",
|
|
35
|
+
"poll_interval": 2.0,
|
|
36
|
+
"max_files": 2000,
|
|
37
|
+
"large_file_warn_bytes": 10 * 1024 * 1024,
|
|
38
|
+
"stale_after_seconds": 300,
|
|
39
|
+
"evict_after_seconds": 2100,
|
|
40
|
+
"tick_interval": 1.0,
|
|
41
|
+
},
|
|
42
|
+
"collectors": {
|
|
43
|
+
"enabled": ["read_edit_ratio", "stop_phrase", "reasoning_loop"],
|
|
44
|
+
"parse_health": {
|
|
45
|
+
# parse_health is a built-in always-on integrity collector.
|
|
46
|
+
# The validator refuses any layer that flips this flag to
|
|
47
|
+
# false — see ``_validate_parse_health_undisableable``.
|
|
48
|
+
"enabled": True,
|
|
49
|
+
},
|
|
50
|
+
"read_edit_ratio": {
|
|
51
|
+
"window_size": 50,
|
|
52
|
+
"warn_threshold": 4.0,
|
|
53
|
+
"critical_threshold": 2.0,
|
|
54
|
+
"blind_edit_window": 20,
|
|
55
|
+
"blind_edit_confidence_floor": 0.95,
|
|
56
|
+
"experimental": True,
|
|
57
|
+
},
|
|
58
|
+
"stop_phrase": {
|
|
59
|
+
"custom_phrases": [],
|
|
60
|
+
"warn_threshold": 1.0,
|
|
61
|
+
"critical_threshold": 3.0,
|
|
62
|
+
"experimental": True,
|
|
63
|
+
},
|
|
64
|
+
"reasoning_loop": {
|
|
65
|
+
"warn_threshold": 10.0,
|
|
66
|
+
"critical_threshold": 20.0,
|
|
67
|
+
"experimental": True,
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
"renderers": {
|
|
71
|
+
"enabled": ["terminal"],
|
|
72
|
+
},
|
|
73
|
+
"report": {
|
|
74
|
+
"output_format": "json",
|
|
75
|
+
"output_dir": "~/.local/share/codevigil/reports",
|
|
76
|
+
},
|
|
77
|
+
"logging": {
|
|
78
|
+
"log_path": "~/.local/state/codevigil/codevigil.log",
|
|
79
|
+
},
|
|
80
|
+
"bootstrap": {
|
|
81
|
+
"sessions": 10,
|
|
82
|
+
"state_path": "~/.local/state/codevigil/bootstrap.json",
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Known collector and renderer names. These are hardcoded for Phase 2 because
|
|
87
|
+
# the runtime registries are empty until their phases land. Later phases may
|
|
88
|
+
# replace this with a registry-backed lookup, but the validator always needs
|
|
89
|
+
# *some* source of truth so typos in the enabled list abort at load time.
|
|
90
|
+
_KNOWN_COLLECTORS: frozenset[str] = frozenset(
|
|
91
|
+
{"parse_health", "read_edit_ratio", "stop_phrase", "reasoning_loop"}
|
|
92
|
+
)
|
|
93
|
+
_KNOWN_RENDERERS: frozenset[str] = frozenset({"terminal", "json_file"})
|
|
94
|
+
|
|
95
|
+
_VALID_OUTPUT_FORMATS: frozenset[str] = frozenset({"json", "markdown"})
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Environment variable bindings
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
# Mapping from CODEVIGIL_* env var names to dotted config paths. Only the
|
|
103
|
+
# keys in this mapping can be overridden via the environment; every other
|
|
104
|
+
# key must be set in the TOML file or on the CLI. This keeps the env surface
|
|
105
|
+
# small and auditable.
|
|
106
|
+
_ENV_BINDINGS: dict[str, tuple[str, ...]] = {
|
|
107
|
+
"CODEVIGIL_LOG_PATH": ("logging", "log_path"),
|
|
108
|
+
"CODEVIGIL_WATCH_ROOT": ("watch", "root"),
|
|
109
|
+
"CODEVIGIL_WATCH_POLL_INTERVAL": ("watch", "poll_interval"),
|
|
110
|
+
"CODEVIGIL_WATCH_TICK_INTERVAL": ("watch", "tick_interval"),
|
|
111
|
+
"CODEVIGIL_REPORT_OUTPUT_DIR": ("report", "output_dir"),
|
|
112
|
+
"CODEVIGIL_REPORT_OUTPUT_FORMAT": ("report", "output_format"),
|
|
113
|
+
"CODEVIGIL_BOOTSTRAP_SESSIONS": ("bootstrap", "sessions"),
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
# Errors and resolved value containers
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ConfigError(CodevigilError):
|
|
122
|
+
"""Raised when the config layer cannot resolve or validate a value."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, *, code: str, message: str, context: dict[str, Any] | None = None) -> None:
|
|
125
|
+
super().__init__(
|
|
126
|
+
level=ErrorLevel.CRITICAL,
|
|
127
|
+
source=ErrorSource.CONFIG,
|
|
128
|
+
code=code,
|
|
129
|
+
message=message,
|
|
130
|
+
context=context or {},
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass(frozen=True, slots=True)
|
|
135
|
+
class ResolvedValue:
|
|
136
|
+
"""A single config value paired with its provenance string."""
|
|
137
|
+
|
|
138
|
+
value: Any
|
|
139
|
+
source: str # "default" | "file:<path>" | "env:CODEVIGIL_*" | "cli:--flag"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass(frozen=True, slots=True)
|
|
143
|
+
class ResolvedConfig:
|
|
144
|
+
"""Fully resolved config with per-key provenance.
|
|
145
|
+
|
|
146
|
+
``values`` holds the effective config as a nested dict matching
|
|
147
|
+
``CONFIG_DEFAULTS``. ``sources`` maps dotted paths (``"watch.root"``) to
|
|
148
|
+
the provenance string for that value. Only leaf values are tracked —
|
|
149
|
+
intermediate dict nodes have no source.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
values: dict[str, Any]
|
|
153
|
+
sources: dict[str, str] = field(default_factory=dict)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
# Public API
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def load_config(
|
|
162
|
+
*,
|
|
163
|
+
config_path: Path | None = None,
|
|
164
|
+
env: dict[str, str] | None = None,
|
|
165
|
+
cli_overrides: dict[str, Any] | None = None,
|
|
166
|
+
) -> ResolvedConfig:
|
|
167
|
+
"""Resolve the effective config from defaults → file → env → CLI.
|
|
168
|
+
|
|
169
|
+
Parameters:
|
|
170
|
+
config_path: Optional path to a TOML config file. If ``None``, the
|
|
171
|
+
default ``~/.config/codevigil/config.toml`` is tried; a missing
|
|
172
|
+
default file is not an error. An explicitly-passed path that
|
|
173
|
+
does not exist *is* an error.
|
|
174
|
+
env: Environment mapping to read ``CODEVIGIL_*`` bindings from.
|
|
175
|
+
Defaults to ``os.environ``.
|
|
176
|
+
cli_overrides: Dotted-path → value mapping from parsed CLI flags.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
``ResolvedConfig`` with every leaf value annotated with its source.
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
ConfigError: on unknown keys, wrong types, out-of-range values,
|
|
183
|
+
unknown collector / renderer names, or file load errors.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
environment = dict(os.environ) if env is None else dict(env)
|
|
187
|
+
overrides = dict(cli_overrides) if cli_overrides is not None else {}
|
|
188
|
+
|
|
189
|
+
values: dict[str, Any] = _deep_copy_defaults()
|
|
190
|
+
sources: dict[str, str] = _flatten_sources(values, source="default")
|
|
191
|
+
|
|
192
|
+
file_values, file_path_used = _load_file_layer(config_path)
|
|
193
|
+
if file_values is not None:
|
|
194
|
+
_validate_layer_shape(file_values, source=f"file:{file_path_used}")
|
|
195
|
+
_apply_layer(
|
|
196
|
+
values,
|
|
197
|
+
file_values,
|
|
198
|
+
sources,
|
|
199
|
+
source_label=f"file:{file_path_used}",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
env_values = _collect_env_overrides(environment)
|
|
203
|
+
for dotted, (raw_value, env_name) in env_values.items():
|
|
204
|
+
coerced = _coerce_scalar(dotted, raw_value, source=f"env:{env_name}")
|
|
205
|
+
_assign_dotted(values, dotted, coerced)
|
|
206
|
+
sources[dotted] = f"env:{env_name}"
|
|
207
|
+
|
|
208
|
+
for dotted, raw_value in overrides.items():
|
|
209
|
+
_check_known_path(dotted, source="cli")
|
|
210
|
+
coerced = _coerce_scalar(dotted, raw_value, source=f"cli:--{dotted}")
|
|
211
|
+
_assign_dotted(values, dotted, coerced)
|
|
212
|
+
sources[dotted] = f"cli:--{dotted}"
|
|
213
|
+
|
|
214
|
+
_validate_resolved(values)
|
|
215
|
+
return ResolvedConfig(values=values, sources=sources)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def render_config_check(resolved: ResolvedConfig) -> str:
|
|
219
|
+
"""Format a resolved config for the ``codevigil config check`` command."""
|
|
220
|
+
|
|
221
|
+
lines: list[str] = ["codevigil config check"]
|
|
222
|
+
for dotted in sorted(resolved.sources):
|
|
223
|
+
value = _read_dotted(resolved.values, dotted)
|
|
224
|
+
source = resolved.sources[dotted]
|
|
225
|
+
lines.append(f" {dotted} = {_format_value(value)} ({source})")
|
|
226
|
+
return "\n".join(lines) + "\n"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# ---------------------------------------------------------------------------
|
|
230
|
+
# Layer helpers
|
|
231
|
+
# ---------------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _deep_copy_defaults() -> dict[str, Any]:
|
|
235
|
+
copy = _deep_copy(CONFIG_DEFAULTS)
|
|
236
|
+
assert isinstance(copy, dict)
|
|
237
|
+
return copy
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _deep_copy(value: Any) -> Any:
|
|
241
|
+
if isinstance(value, dict):
|
|
242
|
+
return {k: _deep_copy(v) for k, v in value.items()}
|
|
243
|
+
if isinstance(value, list):
|
|
244
|
+
return [_deep_copy(v) for v in value]
|
|
245
|
+
return value
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _flatten_sources(values: dict[str, Any], *, source: str) -> dict[str, str]:
|
|
249
|
+
out: dict[str, str] = {}
|
|
250
|
+
_walk_leaves(values, prefix=(), accumulator=out, source=source)
|
|
251
|
+
return out
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _walk_leaves(
|
|
255
|
+
values: dict[str, Any],
|
|
256
|
+
*,
|
|
257
|
+
prefix: tuple[str, ...],
|
|
258
|
+
accumulator: dict[str, str],
|
|
259
|
+
source: str,
|
|
260
|
+
) -> None:
|
|
261
|
+
for key, value in values.items():
|
|
262
|
+
path = (*prefix, key)
|
|
263
|
+
if isinstance(value, dict):
|
|
264
|
+
_walk_leaves(value, prefix=path, accumulator=accumulator, source=source)
|
|
265
|
+
else:
|
|
266
|
+
accumulator[".".join(path)] = source
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _load_file_layer(config_path: Path | None) -> tuple[dict[str, Any] | None, Path | None]:
|
|
270
|
+
if config_path is None:
|
|
271
|
+
default_path = Path("~/.config/codevigil/config.toml").expanduser()
|
|
272
|
+
if not default_path.exists():
|
|
273
|
+
return None, None
|
|
274
|
+
return _read_toml(default_path), default_path
|
|
275
|
+
expanded = config_path.expanduser()
|
|
276
|
+
if not expanded.exists():
|
|
277
|
+
raise ConfigError(
|
|
278
|
+
code="config.file_not_found",
|
|
279
|
+
message=f"config file does not exist: {expanded}",
|
|
280
|
+
context={"path": str(expanded)},
|
|
281
|
+
)
|
|
282
|
+
return _read_toml(expanded), expanded
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _read_toml(path: Path) -> dict[str, Any]:
|
|
286
|
+
try:
|
|
287
|
+
with path.open("rb") as handle:
|
|
288
|
+
return tomllib.load(handle)
|
|
289
|
+
except tomllib.TOMLDecodeError as exc:
|
|
290
|
+
raise ConfigError(
|
|
291
|
+
code="config.toml_parse_error",
|
|
292
|
+
message=f"failed to parse {path}: {exc}",
|
|
293
|
+
context={"path": str(path)},
|
|
294
|
+
) from exc
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _apply_layer(
|
|
298
|
+
base: dict[str, Any],
|
|
299
|
+
overlay: dict[str, Any],
|
|
300
|
+
sources: dict[str, str],
|
|
301
|
+
*,
|
|
302
|
+
source_label: str,
|
|
303
|
+
prefix: tuple[str, ...] = (),
|
|
304
|
+
) -> None:
|
|
305
|
+
for key, value in overlay.items():
|
|
306
|
+
path = (*prefix, key)
|
|
307
|
+
dotted = ".".join(path)
|
|
308
|
+
default_slot = _read_dotted_optional(CONFIG_DEFAULTS, dotted)
|
|
309
|
+
if default_slot is _MISSING:
|
|
310
|
+
raise ConfigError(
|
|
311
|
+
code="config.unknown_key",
|
|
312
|
+
message=f"unknown config key {dotted!r} in {source_label}",
|
|
313
|
+
context={"key": dotted, "source": source_label},
|
|
314
|
+
)
|
|
315
|
+
if isinstance(default_slot, dict):
|
|
316
|
+
if not isinstance(value, dict):
|
|
317
|
+
raise ConfigError(
|
|
318
|
+
code="config.type_mismatch",
|
|
319
|
+
message=(
|
|
320
|
+
f"config key {dotted!r} expected a table, got "
|
|
321
|
+
f"{type(value).__name__} in {source_label}"
|
|
322
|
+
),
|
|
323
|
+
context={
|
|
324
|
+
"key": dotted,
|
|
325
|
+
"expected": "table",
|
|
326
|
+
"actual": type(value).__name__,
|
|
327
|
+
"source": source_label,
|
|
328
|
+
},
|
|
329
|
+
)
|
|
330
|
+
_apply_layer(
|
|
331
|
+
base,
|
|
332
|
+
value,
|
|
333
|
+
sources,
|
|
334
|
+
source_label=source_label,
|
|
335
|
+
prefix=path,
|
|
336
|
+
)
|
|
337
|
+
continue
|
|
338
|
+
coerced = _coerce_against_default(dotted, value, default_slot, source=source_label)
|
|
339
|
+
_assign_dotted(base, dotted, coerced)
|
|
340
|
+
sources[dotted] = source_label
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _collect_env_overrides(environment: dict[str, str]) -> dict[str, tuple[str, str]]:
|
|
344
|
+
out: dict[str, tuple[str, str]] = {}
|
|
345
|
+
for env_name, path in _ENV_BINDINGS.items():
|
|
346
|
+
raw = environment.get(env_name)
|
|
347
|
+
if raw is None:
|
|
348
|
+
continue
|
|
349
|
+
dotted = ".".join(path)
|
|
350
|
+
out[dotted] = (raw, env_name)
|
|
351
|
+
return out
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
# ---------------------------------------------------------------------------
|
|
355
|
+
# Coercion and validation
|
|
356
|
+
# ---------------------------------------------------------------------------
|
|
357
|
+
|
|
358
|
+
_MISSING: object = object()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _read_dotted(root: dict[str, Any], dotted: str) -> Any:
|
|
362
|
+
node: Any = root
|
|
363
|
+
for part in dotted.split("."):
|
|
364
|
+
node = node[part]
|
|
365
|
+
return node
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _read_dotted_optional(root: dict[str, Any], dotted: str) -> Any:
|
|
369
|
+
node: Any = root
|
|
370
|
+
for part in dotted.split("."):
|
|
371
|
+
if not isinstance(node, dict) or part not in node:
|
|
372
|
+
return _MISSING
|
|
373
|
+
node = node[part]
|
|
374
|
+
return node
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _assign_dotted(root: dict[str, Any], dotted: str, value: Any) -> None:
|
|
378
|
+
parts = dotted.split(".")
|
|
379
|
+
node: dict[str, Any] = root
|
|
380
|
+
for part in parts[:-1]:
|
|
381
|
+
next_node = node.get(part)
|
|
382
|
+
if not isinstance(next_node, dict):
|
|
383
|
+
next_node = {}
|
|
384
|
+
node[part] = next_node
|
|
385
|
+
node = next_node
|
|
386
|
+
node[parts[-1]] = value
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _validate_layer_shape(layer: dict[str, Any], *, source: str) -> None:
|
|
390
|
+
if not isinstance(layer, dict): # pragma: no cover - defensive
|
|
391
|
+
raise ConfigError(
|
|
392
|
+
code="config.bad_layer_shape",
|
|
393
|
+
message=f"{source} did not produce a table",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _check_known_path(dotted: str, *, source: str) -> None:
|
|
398
|
+
if _read_dotted_optional(CONFIG_DEFAULTS, dotted) is _MISSING:
|
|
399
|
+
raise ConfigError(
|
|
400
|
+
code="config.unknown_key",
|
|
401
|
+
message=f"unknown config key {dotted!r} from {source}",
|
|
402
|
+
context={"key": dotted, "source": source},
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _coerce_against_default(dotted: str, value: Any, default: Any, *, source: str) -> Any:
|
|
407
|
+
expected_type = type(default)
|
|
408
|
+
if isinstance(default, bool):
|
|
409
|
+
if not isinstance(value, bool):
|
|
410
|
+
raise ConfigError(
|
|
411
|
+
code="config.type_mismatch",
|
|
412
|
+
message=(
|
|
413
|
+
f"config key {dotted!r} expected bool, got {type(value).__name__} in {source}"
|
|
414
|
+
),
|
|
415
|
+
context={"key": dotted, "expected": "bool", "source": source},
|
|
416
|
+
)
|
|
417
|
+
return value
|
|
418
|
+
if isinstance(default, int) and not isinstance(default, bool):
|
|
419
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
420
|
+
raise ConfigError(
|
|
421
|
+
code="config.type_mismatch",
|
|
422
|
+
message=(
|
|
423
|
+
f"config key {dotted!r} expected int, got {type(value).__name__} in {source}"
|
|
424
|
+
),
|
|
425
|
+
context={"key": dotted, "expected": "int", "source": source},
|
|
426
|
+
)
|
|
427
|
+
return value
|
|
428
|
+
if isinstance(default, float):
|
|
429
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
|
430
|
+
raise ConfigError(
|
|
431
|
+
code="config.type_mismatch",
|
|
432
|
+
message=(
|
|
433
|
+
f"config key {dotted!r} expected float, got {type(value).__name__} in {source}"
|
|
434
|
+
),
|
|
435
|
+
context={"key": dotted, "expected": "float", "source": source},
|
|
436
|
+
)
|
|
437
|
+
return float(value)
|
|
438
|
+
if isinstance(default, str):
|
|
439
|
+
if not isinstance(value, str):
|
|
440
|
+
raise ConfigError(
|
|
441
|
+
code="config.type_mismatch",
|
|
442
|
+
message=(
|
|
443
|
+
f"config key {dotted!r} expected str, got {type(value).__name__} in {source}"
|
|
444
|
+
),
|
|
445
|
+
context={"key": dotted, "expected": "str", "source": source},
|
|
446
|
+
)
|
|
447
|
+
return value
|
|
448
|
+
if isinstance(default, list):
|
|
449
|
+
if not isinstance(value, list):
|
|
450
|
+
raise ConfigError(
|
|
451
|
+
code="config.type_mismatch",
|
|
452
|
+
message=(
|
|
453
|
+
f"config key {dotted!r} expected list, got {type(value).__name__} in {source}"
|
|
454
|
+
),
|
|
455
|
+
context={"key": dotted, "expected": "list", "source": source},
|
|
456
|
+
)
|
|
457
|
+
# ``stop_phrase.custom_phrases`` accepts a mixed list of plain
|
|
458
|
+
# strings and table entries with ``text``/``mode``/``category``/
|
|
459
|
+
# ``intent`` keys. Every other list-valued config key is still
|
|
460
|
+
# the strict ``list[str]`` form.
|
|
461
|
+
if dotted == "collectors.stop_phrase.custom_phrases":
|
|
462
|
+
return _coerce_custom_phrase_list(dotted, value, source=source)
|
|
463
|
+
for item in value:
|
|
464
|
+
if not isinstance(item, str):
|
|
465
|
+
raise ConfigError(
|
|
466
|
+
code="config.type_mismatch",
|
|
467
|
+
message=(
|
|
468
|
+
f"config key {dotted!r} list item expected str, got "
|
|
469
|
+
f"{type(item).__name__} in {source}"
|
|
470
|
+
),
|
|
471
|
+
context={
|
|
472
|
+
"key": dotted,
|
|
473
|
+
"expected": "list[str]",
|
|
474
|
+
"source": source,
|
|
475
|
+
},
|
|
476
|
+
)
|
|
477
|
+
return list(value)
|
|
478
|
+
raise ConfigError( # pragma: no cover - defensive for unknown default types
|
|
479
|
+
code="config.unsupported_default_type",
|
|
480
|
+
message=f"config key {dotted!r} has unsupported default type {expected_type.__name__}",
|
|
481
|
+
context={"key": dotted, "type": expected_type.__name__},
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
_CUSTOM_PHRASE_FIELDS: frozenset[str] = frozenset({"text", "mode", "category", "intent"})
|
|
486
|
+
_CUSTOM_PHRASE_MODES: frozenset[str] = frozenset({"word", "regex", "substring"})
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _coerce_custom_phrase_list(dotted: str, value: list[Any], *, source: str) -> list[Any]:
|
|
490
|
+
"""Validate the mixed string/table form of ``stop_phrase.custom_phrases``."""
|
|
491
|
+
|
|
492
|
+
out: list[Any] = []
|
|
493
|
+
for item in value:
|
|
494
|
+
if isinstance(item, str):
|
|
495
|
+
out.append(item)
|
|
496
|
+
continue
|
|
497
|
+
if not isinstance(item, dict):
|
|
498
|
+
raise ConfigError(
|
|
499
|
+
code="config.type_mismatch",
|
|
500
|
+
message=(
|
|
501
|
+
f"config key {dotted!r} list item expected str or table, got "
|
|
502
|
+
f"{type(item).__name__} in {source}"
|
|
503
|
+
),
|
|
504
|
+
context={"key": dotted, "source": source},
|
|
505
|
+
)
|
|
506
|
+
unknown = set(item.keys()) - _CUSTOM_PHRASE_FIELDS
|
|
507
|
+
if unknown:
|
|
508
|
+
raise ConfigError(
|
|
509
|
+
code="config.unknown_key",
|
|
510
|
+
message=(
|
|
511
|
+
f"config key {dotted!r} table entry has unknown field(s) "
|
|
512
|
+
f"{sorted(unknown)!r} in {source}"
|
|
513
|
+
),
|
|
514
|
+
context={"key": dotted, "unknown": sorted(unknown), "source": source},
|
|
515
|
+
)
|
|
516
|
+
text = item.get("text")
|
|
517
|
+
if not isinstance(text, str) or not text:
|
|
518
|
+
raise ConfigError(
|
|
519
|
+
code="config.type_mismatch",
|
|
520
|
+
message=(
|
|
521
|
+
f"config key {dotted!r} table entry requires a non-empty "
|
|
522
|
+
f"'text' field in {source}"
|
|
523
|
+
),
|
|
524
|
+
context={"key": dotted, "source": source},
|
|
525
|
+
)
|
|
526
|
+
mode = item.get("mode", "word")
|
|
527
|
+
if mode not in _CUSTOM_PHRASE_MODES:
|
|
528
|
+
raise ConfigError(
|
|
529
|
+
code="config.out_of_range",
|
|
530
|
+
message=(
|
|
531
|
+
f"config key {dotted!r} table entry has invalid mode "
|
|
532
|
+
f"{mode!r}; expected one of {sorted(_CUSTOM_PHRASE_MODES)!r} in {source}"
|
|
533
|
+
),
|
|
534
|
+
context={"key": dotted, "mode": mode, "source": source},
|
|
535
|
+
)
|
|
536
|
+
out.append(dict(item))
|
|
537
|
+
return out
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def _coerce_scalar(dotted: str, raw: Any, *, source: str) -> Any:
|
|
541
|
+
default = _read_dotted_optional(CONFIG_DEFAULTS, dotted)
|
|
542
|
+
if default is _MISSING:
|
|
543
|
+
raise ConfigError(
|
|
544
|
+
code="config.unknown_key",
|
|
545
|
+
message=f"unknown config key {dotted!r} from {source}",
|
|
546
|
+
context={"key": dotted, "source": source},
|
|
547
|
+
)
|
|
548
|
+
if isinstance(default, dict):
|
|
549
|
+
raise ConfigError(
|
|
550
|
+
code="config.scalar_into_table",
|
|
551
|
+
message=f"config key {dotted!r} expects a table, not a scalar from {source}",
|
|
552
|
+
context={"key": dotted, "source": source},
|
|
553
|
+
)
|
|
554
|
+
if not isinstance(raw, str):
|
|
555
|
+
return _coerce_against_default(dotted, raw, default, source=source)
|
|
556
|
+
# Env / CLI raw values arrive as strings; parse them against the default
|
|
557
|
+
# type so CODEVIGIL_WATCH_POLL_INTERVAL="0.5" becomes float 0.5.
|
|
558
|
+
if isinstance(default, bool):
|
|
559
|
+
lowered = raw.strip().lower()
|
|
560
|
+
if lowered in {"true", "1", "yes", "on"}:
|
|
561
|
+
return True
|
|
562
|
+
if lowered in {"false", "0", "no", "off"}:
|
|
563
|
+
return False
|
|
564
|
+
raise ConfigError(
|
|
565
|
+
code="config.type_mismatch",
|
|
566
|
+
message=f"config key {dotted!r} expected bool, got {raw!r} in {source}",
|
|
567
|
+
context={"key": dotted, "raw": raw, "source": source},
|
|
568
|
+
)
|
|
569
|
+
if isinstance(default, int) and not isinstance(default, bool):
|
|
570
|
+
try:
|
|
571
|
+
return int(raw)
|
|
572
|
+
except ValueError as exc:
|
|
573
|
+
raise ConfigError(
|
|
574
|
+
code="config.type_mismatch",
|
|
575
|
+
message=f"config key {dotted!r} expected int, got {raw!r} in {source}",
|
|
576
|
+
context={"key": dotted, "raw": raw, "source": source},
|
|
577
|
+
) from exc
|
|
578
|
+
if isinstance(default, float):
|
|
579
|
+
try:
|
|
580
|
+
return float(raw)
|
|
581
|
+
except ValueError as exc:
|
|
582
|
+
raise ConfigError(
|
|
583
|
+
code="config.type_mismatch",
|
|
584
|
+
message=f"config key {dotted!r} expected float, got {raw!r} in {source}",
|
|
585
|
+
context={"key": dotted, "raw": raw, "source": source},
|
|
586
|
+
) from exc
|
|
587
|
+
if isinstance(default, list):
|
|
588
|
+
# Comma-separated env / CLI form: "a,b,c".
|
|
589
|
+
items = [part.strip() for part in raw.split(",") if part.strip()]
|
|
590
|
+
return items
|
|
591
|
+
return raw
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def _validate_resolved(values: dict[str, Any]) -> None:
|
|
595
|
+
_validate_range(values, "watch.poll_interval", minimum=0.05, maximum=3600.0, kind="float")
|
|
596
|
+
_validate_range(values, "watch.tick_interval", minimum=0.05, maximum=3600.0, kind="float")
|
|
597
|
+
_validate_range(values, "watch.max_files", minimum=1, maximum=1_000_000, kind="int")
|
|
598
|
+
_validate_range(
|
|
599
|
+
values,
|
|
600
|
+
"watch.stale_after_seconds",
|
|
601
|
+
minimum=1,
|
|
602
|
+
maximum=86_400,
|
|
603
|
+
kind="int",
|
|
604
|
+
)
|
|
605
|
+
_validate_range(
|
|
606
|
+
values,
|
|
607
|
+
"watch.evict_after_seconds",
|
|
608
|
+
minimum=1,
|
|
609
|
+
maximum=86_400,
|
|
610
|
+
kind="int",
|
|
611
|
+
)
|
|
612
|
+
_validate_range(
|
|
613
|
+
values,
|
|
614
|
+
"watch.large_file_warn_bytes",
|
|
615
|
+
minimum=1024,
|
|
616
|
+
maximum=10**12,
|
|
617
|
+
kind="int",
|
|
618
|
+
)
|
|
619
|
+
_validate_range(
|
|
620
|
+
values,
|
|
621
|
+
"collectors.read_edit_ratio.window_size",
|
|
622
|
+
minimum=1,
|
|
623
|
+
maximum=100_000,
|
|
624
|
+
kind="int",
|
|
625
|
+
)
|
|
626
|
+
_validate_range(
|
|
627
|
+
values,
|
|
628
|
+
"collectors.read_edit_ratio.blind_edit_window",
|
|
629
|
+
minimum=1,
|
|
630
|
+
maximum=10_000,
|
|
631
|
+
kind="int",
|
|
632
|
+
)
|
|
633
|
+
_validate_range(
|
|
634
|
+
values,
|
|
635
|
+
"collectors.read_edit_ratio.blind_edit_confidence_floor",
|
|
636
|
+
minimum=0.0,
|
|
637
|
+
maximum=1.0,
|
|
638
|
+
kind="float",
|
|
639
|
+
)
|
|
640
|
+
_validate_range(
|
|
641
|
+
values,
|
|
642
|
+
"bootstrap.sessions",
|
|
643
|
+
minimum=1,
|
|
644
|
+
maximum=1_000,
|
|
645
|
+
kind="int",
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
_validate_stale_vs_evict(values)
|
|
649
|
+
_validate_enabled_names(
|
|
650
|
+
values,
|
|
651
|
+
"collectors.enabled",
|
|
652
|
+
known=_KNOWN_COLLECTORS,
|
|
653
|
+
kind="collector",
|
|
654
|
+
)
|
|
655
|
+
_validate_enabled_names(
|
|
656
|
+
values,
|
|
657
|
+
"renderers.enabled",
|
|
658
|
+
known=_KNOWN_RENDERERS,
|
|
659
|
+
kind="renderer",
|
|
660
|
+
)
|
|
661
|
+
_validate_output_format(values)
|
|
662
|
+
_validate_parse_health_undisableable(values)
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _validate_parse_health_undisableable(values: dict[str, Any]) -> None:
|
|
666
|
+
"""Refuse any config layer that tries to disable ``parse_health``.
|
|
667
|
+
|
|
668
|
+
``parse_health`` is the parser-drift integrity collector. Allowing it
|
|
669
|
+
to be disabled would let a user silence the only signal that catches
|
|
670
|
+
a silent Claude Code schema break, which defeats the design goal of
|
|
671
|
+
treating drift as a first-class observable.
|
|
672
|
+
"""
|
|
673
|
+
|
|
674
|
+
enabled = _read_dotted_optional(values, "collectors.parse_health.enabled")
|
|
675
|
+
if enabled is _MISSING or enabled is True:
|
|
676
|
+
return
|
|
677
|
+
raise ConfigError(
|
|
678
|
+
code="config.parse_health_undisableable",
|
|
679
|
+
message=(
|
|
680
|
+
"collectors.parse_health.enabled cannot be set to false; "
|
|
681
|
+
"parse_health is a built-in always-on integrity collector"
|
|
682
|
+
),
|
|
683
|
+
context={"key": "collectors.parse_health.enabled", "value": enabled},
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def _validate_range(
|
|
688
|
+
values: dict[str, Any],
|
|
689
|
+
dotted: str,
|
|
690
|
+
*,
|
|
691
|
+
minimum: float,
|
|
692
|
+
maximum: float,
|
|
693
|
+
kind: str,
|
|
694
|
+
) -> None:
|
|
695
|
+
value = _read_dotted(values, dotted)
|
|
696
|
+
if value < minimum or value > maximum:
|
|
697
|
+
raise ConfigError(
|
|
698
|
+
code="config.out_of_range",
|
|
699
|
+
message=(
|
|
700
|
+
f"config key {dotted!r} = {value!r} is out of range "
|
|
701
|
+
f"[{minimum}, {maximum}] for {kind}"
|
|
702
|
+
),
|
|
703
|
+
context={
|
|
704
|
+
"key": dotted,
|
|
705
|
+
"value": value,
|
|
706
|
+
"min": minimum,
|
|
707
|
+
"max": maximum,
|
|
708
|
+
},
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _validate_stale_vs_evict(values: dict[str, Any]) -> None:
|
|
713
|
+
stale = _read_dotted(values, "watch.stale_after_seconds")
|
|
714
|
+
evict = _read_dotted(values, "watch.evict_after_seconds")
|
|
715
|
+
if evict <= stale:
|
|
716
|
+
raise ConfigError(
|
|
717
|
+
code="config.out_of_range",
|
|
718
|
+
message=(
|
|
719
|
+
f"watch.evict_after_seconds ({evict}) must be strictly greater "
|
|
720
|
+
f"than watch.stale_after_seconds ({stale})"
|
|
721
|
+
),
|
|
722
|
+
context={"stale": stale, "evict": evict},
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _validate_enabled_names(
|
|
727
|
+
values: dict[str, Any],
|
|
728
|
+
dotted: str,
|
|
729
|
+
*,
|
|
730
|
+
known: frozenset[str],
|
|
731
|
+
kind: str,
|
|
732
|
+
) -> None:
|
|
733
|
+
enabled: list[str] = _read_dotted(values, dotted)
|
|
734
|
+
unknown = [name for name in enabled if name not in known]
|
|
735
|
+
if unknown:
|
|
736
|
+
raise ConfigError(
|
|
737
|
+
code=f"config.unknown_{kind}",
|
|
738
|
+
message=(f"unknown {kind} name(s) in {dotted}: {unknown!r}; known: {sorted(known)!r}"),
|
|
739
|
+
context={"key": dotted, "unknown": unknown, "known": sorted(known)},
|
|
740
|
+
)
|
|
741
|
+
if len(enabled) != len(set(enabled)):
|
|
742
|
+
raise ConfigError(
|
|
743
|
+
code=f"config.duplicate_{kind}",
|
|
744
|
+
message=f"duplicate {kind} name(s) in {dotted}: {enabled!r}",
|
|
745
|
+
context={"key": dotted, "enabled": enabled},
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def _validate_output_format(values: dict[str, Any]) -> None:
|
|
750
|
+
fmt = _read_dotted(values, "report.output_format")
|
|
751
|
+
if fmt not in _VALID_OUTPUT_FORMATS:
|
|
752
|
+
raise ConfigError(
|
|
753
|
+
code="config.invalid_output_format",
|
|
754
|
+
message=(
|
|
755
|
+
f"report.output_format = {fmt!r} is not one of {sorted(_VALID_OUTPUT_FORMATS)!r}"
|
|
756
|
+
),
|
|
757
|
+
context={"value": fmt, "valid": sorted(_VALID_OUTPUT_FORMATS)},
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def _format_value(value: Any) -> str:
|
|
762
|
+
if isinstance(value, str):
|
|
763
|
+
return repr(value)
|
|
764
|
+
if isinstance(value, list):
|
|
765
|
+
return "[" + ", ".join(_format_value(v) for v in value) + "]"
|
|
766
|
+
return repr(value)
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
__all__ = [
|
|
770
|
+
"CONFIG_DEFAULTS",
|
|
771
|
+
"ConfigError",
|
|
772
|
+
"ResolvedConfig",
|
|
773
|
+
"ResolvedValue",
|
|
774
|
+
"load_config",
|
|
775
|
+
"render_config_check",
|
|
776
|
+
]
|