codevigil 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codevigil/privacy.py ADDED
@@ -0,0 +1,191 @@
1
+ """Network-egress import gate.
2
+
3
+ Installs an ``importlib`` meta-path finder that refuses to load any banned
4
+ module when the *direct* importer lives inside the ``codevigil`` package. The
5
+ hook is active in every execution mode and is the runtime half of the privacy
6
+ guarantee documented in ``docs/design.md`` §Privacy Enforcement.
7
+
8
+ The hook is deliberately scoped to *direct* imports from codevigil: if codevigil
9
+ imports a permitted stdlib module (e.g. ``json``) which in turn imports a
10
+ banned module transitively, the transitive import is allowed — the importer
11
+ frame at the point the banned module is resolved is the permitted stdlib
12
+ module, not codevigil.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import sys
18
+ from collections.abc import Sequence
19
+ from importlib.abc import MetaPathFinder
20
+ from importlib.machinery import ModuleSpec
21
+ from types import FrameType
22
+
23
+ # Exact fully-qualified module names that are blocked.
24
+ _BANNED_EXACT: frozenset[str] = frozenset(
25
+ {
26
+ "socket",
27
+ "ssl",
28
+ "http",
29
+ "http.client",
30
+ "http.server",
31
+ "urllib",
32
+ "urllib.request",
33
+ "urllib.parse",
34
+ "urllib.error",
35
+ "urllib3",
36
+ "httpx",
37
+ "requests",
38
+ "aiohttp",
39
+ "ftplib",
40
+ "smtplib",
41
+ "poplib",
42
+ "imaplib",
43
+ "nntplib",
44
+ "telnetlib",
45
+ "xmlrpc",
46
+ "xmlrpc.client",
47
+ "xmlrpc.server",
48
+ "subprocess",
49
+ "pty",
50
+ "multiprocessing.popen_fork",
51
+ "multiprocessing.popen_forkserver",
52
+ "multiprocessing.popen_spawn_posix",
53
+ "multiprocessing.popen_spawn_win32",
54
+ }
55
+ )
56
+
57
+ # Top-level package names whose submodules are all blocked.
58
+ _BANNED_ROOTS: frozenset[str] = frozenset(
59
+ {
60
+ "socket",
61
+ "ssl",
62
+ "urllib",
63
+ "urllib3",
64
+ "httpx",
65
+ "requests",
66
+ "aiohttp",
67
+ "ftplib",
68
+ "smtplib",
69
+ "poplib",
70
+ "imaplib",
71
+ "nntplib",
72
+ "telnetlib",
73
+ "xmlrpc",
74
+ "subprocess",
75
+ "pty",
76
+ }
77
+ )
78
+
79
+ _CODEVIGIL_ROOT: str = "codevigil"
80
+
81
+ # Frame modules we skip when locating the direct caller of a banned import.
82
+ # These are all Python-internal import-machinery frames.
83
+ _SKIPPED_CALLER_PREFIXES: tuple[str, ...] = (
84
+ "importlib",
85
+ "_frozen_importlib",
86
+ "_frozen_importlib_external",
87
+ )
88
+
89
+
90
+ class PrivacyViolationError(ImportError):
91
+ """Raised when a codevigil module attempts to import a banned module.
92
+
93
+ Subclasses ``ImportError`` so ``find_spec`` can raise it and have the
94
+ traceback blame the offending import statement, and so test assertions
95
+ that use ``pytest.raises(ImportError)`` continue to work.
96
+ """
97
+
98
+
99
+ def _is_banned(fullname: str) -> bool:
100
+ if fullname in _BANNED_EXACT:
101
+ return True
102
+ root = fullname.split(".", 1)[0]
103
+ return root in _BANNED_ROOTS
104
+
105
+
106
+ def _direct_caller_module(start: FrameType | None) -> str | None:
107
+ """Return the name of the first non-import-machinery frame's module.
108
+
109
+ Walks ``frame.f_back`` until it finds a frame whose ``__name__`` does
110
+ not belong to the import-machinery prefixes. Returns ``None`` if no such
111
+ frame exists (e.g. if called at interpreter shutdown).
112
+ """
113
+
114
+ frame = start
115
+ while frame is not None:
116
+ raw = frame.f_globals.get("__name__", "")
117
+ module_name = raw if isinstance(raw, str) else ""
118
+ if not any(
119
+ module_name == prefix or module_name.startswith(prefix + ".")
120
+ for prefix in _SKIPPED_CALLER_PREFIXES
121
+ ):
122
+ return module_name
123
+ frame = frame.f_back
124
+ return None
125
+
126
+
127
+ def _caller_is_codevigil(module_name: str | None) -> bool:
128
+ if module_name is None:
129
+ return False
130
+ return module_name == _CODEVIGIL_ROOT or module_name.startswith(_CODEVIGIL_ROOT + ".")
131
+
132
+
133
+ class PrivacyImportHook(MetaPathFinder):
134
+ """Meta-path finder that blocks banned imports from inside codevigil."""
135
+
136
+ def find_spec(
137
+ self,
138
+ fullname: str,
139
+ path: Sequence[str] | None,
140
+ target: object | None = None,
141
+ ) -> ModuleSpec | None:
142
+ if not _is_banned(fullname):
143
+ return None
144
+ caller = _direct_caller_module(sys._getframe(1))
145
+ if _caller_is_codevigil(caller):
146
+ raise PrivacyViolationError(
147
+ f"codevigil module {caller!r} attempted to import banned module "
148
+ f"{fullname!r}; network and subprocess modules are disallowed "
149
+ "by the privacy gate (see docs/design.md §Privacy Enforcement)."
150
+ )
151
+ return None
152
+
153
+
154
+ _HOOK_SINGLETON: PrivacyImportHook | None = None
155
+
156
+
157
+ def install() -> PrivacyImportHook:
158
+ """Install the privacy import hook.
159
+
160
+ Idempotent: repeated calls return the same singleton instance without
161
+ registering the hook more than once.
162
+ """
163
+
164
+ global _HOOK_SINGLETON
165
+ if _HOOK_SINGLETON is None:
166
+ _HOOK_SINGLETON = PrivacyImportHook()
167
+ sys.meta_path.insert(0, _HOOK_SINGLETON)
168
+ elif _HOOK_SINGLETON not in sys.meta_path:
169
+ sys.meta_path.insert(0, _HOOK_SINGLETON)
170
+ return _HOOK_SINGLETON
171
+
172
+
173
+ def uninstall() -> None:
174
+ """Remove the privacy import hook if installed.
175
+
176
+ Exposed for tests that need to observe the uninstalled baseline. Never
177
+ called by the runtime.
178
+ """
179
+
180
+ global _HOOK_SINGLETON
181
+ if _HOOK_SINGLETON is not None and _HOOK_SINGLETON in sys.meta_path:
182
+ sys.meta_path.remove(_HOOK_SINGLETON)
183
+ _HOOK_SINGLETON = None
184
+
185
+
186
+ __all__ = [
187
+ "PrivacyImportHook",
188
+ "PrivacyViolationError",
189
+ "install",
190
+ "uninstall",
191
+ ]
codevigil/projects.py ADDED
@@ -0,0 +1,132 @@
1
+ """Project hash → friendly name resolution.
2
+
3
+ Claude Code stores session files under
4
+ ``~/.claude/projects/<project-hash>/sessions/<session-id>.jsonl``. The hash is
5
+ opaque to humans, so the aggregator threads every session through a
6
+ :class:`ProjectRegistry` that maps the hash to a display name using three
7
+ sources, highest precedence first (per ``docs/design.md`` §Project Name
8
+ Resolution):
9
+
10
+ 1. A user-maintained TOML file at ``~/.config/codevigil/projects.toml`` with
11
+ ``{hash = "name"}`` pairs at the top level. This is the manual override
12
+ users reach for when the auto-resolved name is wrong or missing.
13
+ 2. The first ``cwd`` field observed inside a SYSTEM event payload for that
14
+ hash, stripped to the last path component via ``Path(cwd).name``. This is
15
+ what `claude` itself records when a user runs it from a project directory.
16
+ 3. The raw hash prefix (``hash[:8]``) as the always-available fallback. An
17
+ unresolved hash is *expected state*, not an error, so this branch never
18
+ emits a WARN.
19
+
20
+ The registry is process-local: it is constructed once by the aggregator and
21
+ mutated only via :meth:`observe_system_event`.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import tomllib
27
+ from pathlib import Path
28
+ from typing import Any
29
+
30
+ from codevigil.errors import CodevigilError, ErrorLevel, ErrorSource, record
31
+ from codevigil.types import Event, EventKind, safe_get
32
+
33
+ _DEFAULT_TOML_PATH: Path = Path("~/.config/codevigil/projects.toml").expanduser()
34
+
35
+
36
+ class ProjectRegistry:
37
+ """Resolve Claude Code project hashes to display names.
38
+
39
+ The constructor loads the optional TOML override file synchronously; a
40
+ malformed or unreadable file is reported via the error channel as a WARN
41
+ and the registry continues with an empty user map (the cwd and hash
42
+ fallbacks still work). The aggregator instantiates one registry per
43
+ process and shares it across every session.
44
+ """
45
+
46
+ def __init__(self, toml_path: Path | None = None) -> None:
47
+ self._toml_path: Path = toml_path if toml_path is not None else _DEFAULT_TOML_PATH
48
+ self._user_overrides: dict[str, str] = {}
49
+ self._cwd_cache: dict[str, str] = {}
50
+ self._load_user_overrides()
51
+
52
+ # ------------------------------------------------------------------ loading
53
+
54
+ def _load_user_overrides(self) -> None:
55
+ path = self._toml_path
56
+ if not path.exists():
57
+ return
58
+ try:
59
+ with path.open("rb") as handle:
60
+ data: dict[str, Any] = tomllib.load(handle)
61
+ except (OSError, tomllib.TOMLDecodeError) as exc:
62
+ record(
63
+ CodevigilError(
64
+ level=ErrorLevel.WARN,
65
+ source=ErrorSource.AGGREGATOR,
66
+ code="projects.toml_load_failed",
67
+ message=(
68
+ f"failed to load projects override file {str(path)!r}: {exc}; "
69
+ f"continuing with empty user map"
70
+ ),
71
+ context={"path": str(path)},
72
+ )
73
+ )
74
+ return
75
+ for key, value in data.items():
76
+ if isinstance(key, str) and isinstance(value, str) and value:
77
+ self._user_overrides[key] = value
78
+ else:
79
+ record(
80
+ CodevigilError(
81
+ level=ErrorLevel.WARN,
82
+ source=ErrorSource.AGGREGATOR,
83
+ code="projects.toml_bad_entry",
84
+ message=(f"ignoring non-string entry {key!r} in projects override file"),
85
+ context={"path": str(self._toml_path), "key": str(key)},
86
+ )
87
+ )
88
+
89
+ # ----------------------------------------------------------------- ingestion
90
+
91
+ def observe_system_event(self, project_hash: str, event: Event) -> None:
92
+ """Cache the first ``cwd`` value seen on a SYSTEM event for a hash.
93
+
94
+ The aggregator forwards every SYSTEM event here. We keep the *first*
95
+ observation rather than the latest because the resolution policy
96
+ wants stable display names — a session that ``cd``s mid-run should
97
+ still show up under the directory it started in.
98
+ """
99
+
100
+ if event.kind is not EventKind.SYSTEM:
101
+ return
102
+ if project_hash in self._cwd_cache:
103
+ return
104
+ cwd = safe_get(
105
+ event.payload,
106
+ "cwd",
107
+ default=None,
108
+ expected=str,
109
+ source=ErrorSource.AGGREGATOR,
110
+ event_kind="system",
111
+ )
112
+ if not isinstance(cwd, str) or not cwd:
113
+ return
114
+ name = Path(cwd).name
115
+ if name:
116
+ self._cwd_cache[project_hash] = name
117
+
118
+ # ----------------------------------------------------------------- resolution
119
+
120
+ def resolve(self, project_hash: str) -> str:
121
+ """Return the highest-precedence display name available for a hash."""
122
+
123
+ override = self._user_overrides.get(project_hash)
124
+ if override:
125
+ return override
126
+ cwd_name = self._cwd_cache.get(project_hash)
127
+ if cwd_name:
128
+ return cwd_name
129
+ return project_hash[:8] if project_hash else ""
130
+
131
+
132
+ __all__ = ["ProjectRegistry"]
codevigil/registry.py ADDED
@@ -0,0 +1,121 @@
1
+ """Shared validation for the collector and renderer registries.
2
+
3
+ Both ``codevigil.collectors`` and ``codevigil.renderers`` expose a registry
4
+ dict built by calling ``register()`` at module import time. This module holds
5
+ the validation rules that apply to both:
6
+
7
+ * Duplicate ``name`` collides loudly via ``RegistryCollisionError``.
8
+ * Third-party plugins (any module not under ``codevigil.collectors`` /
9
+ ``codevigil.renderers``) must use a dotted ``name`` to avoid stomping on
10
+ built-in names. Built-ins use bare names.
11
+ * Each registered class is checked for the presence of the attributes and
12
+ methods its target protocol declares, at registration time — not at the
13
+ first ingest/render call.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import TypeVar, cast
19
+
20
+ from codevigil.types import Collector, Renderer
21
+
22
+ _CODEVIGIL_COLLECTOR_PKG: str = "codevigil.collectors"
23
+ _CODEVIGIL_RENDERER_PKG: str = "codevigil.renderers"
24
+
25
+ _COLLECTOR_REQUIRED_METHODS: tuple[str, ...] = ("ingest", "snapshot", "reset")
26
+ _RENDERER_REQUIRED_METHODS: tuple[str, ...] = ("render", "render_error", "close")
27
+
28
+ _COLLECTOR_REQUIRED_STR_ATTRS: tuple[str, ...] = ("name", "complexity")
29
+ _RENDERER_REQUIRED_STR_ATTRS: tuple[str, ...] = ("name",)
30
+
31
+
32
+ class RegistryCollisionError(Exception):
33
+ """Two classes tried to register under the same name."""
34
+
35
+
36
+ class RegistryValidationError(Exception):
37
+ """A class failed protocol conformance or namespacing checks at registration."""
38
+
39
+
40
+ def _check_str_attrs(cls: type, attrs: tuple[str, ...], kind: str) -> None:
41
+ for attr in attrs:
42
+ if not hasattr(cls, attr):
43
+ raise RegistryValidationError(
44
+ f"{kind} {cls.__qualname__!r} is missing required class attribute {attr!r}"
45
+ )
46
+ value = getattr(cls, attr)
47
+ if not isinstance(value, str) or not value:
48
+ raise RegistryValidationError(
49
+ f"{kind} {cls.__qualname__!r} class attribute {attr!r} must be a "
50
+ f"non-empty string; got {type(value).__name__}"
51
+ )
52
+
53
+
54
+ def _check_methods(cls: type, methods: tuple[str, ...], kind: str) -> None:
55
+ for method in methods:
56
+ if not callable(getattr(cls, method, None)):
57
+ raise RegistryValidationError(
58
+ f"{kind} {cls.__qualname__!r} is missing required method {method!r}"
59
+ )
60
+
61
+
62
+ def _check_namespace(cls: type, name: str, builtin_pkg: str, kind: str) -> None:
63
+ module = cls.__module__ or ""
64
+ is_builtin = module == builtin_pkg or module.startswith(builtin_pkg + ".")
65
+ if is_builtin:
66
+ if "." in name:
67
+ raise RegistryValidationError(
68
+ f"built-in {kind} {cls.__qualname__!r} must use a bare name "
69
+ f"without dots; got {name!r}"
70
+ )
71
+ return
72
+ if "." not in name:
73
+ raise RegistryValidationError(
74
+ f"third-party {kind} {cls.__qualname__!r} (module {module!r}) must "
75
+ f"register under a dotted name like 'vendor.metric'; got {name!r}"
76
+ )
77
+
78
+
79
+ C = TypeVar("C", bound=type)
80
+
81
+
82
+ def register_collector(registry: dict[str, type[Collector]], cls: C) -> C:
83
+ """Validate and register a collector class in the given registry dict."""
84
+
85
+ _check_str_attrs(cls, _COLLECTOR_REQUIRED_STR_ATTRS, kind="collector")
86
+ _check_methods(cls, _COLLECTOR_REQUIRED_METHODS, kind="collector")
87
+ name: str = cast(str, cls.name) # type: ignore[attr-defined]
88
+ _check_namespace(cls, name, _CODEVIGIL_COLLECTOR_PKG, kind="collector")
89
+ if name in registry:
90
+ existing = registry[name]
91
+ raise RegistryCollisionError(
92
+ f"collector name {name!r} already registered by "
93
+ f"{existing.__qualname__!r}; {cls.__qualname__!r} cannot reuse it"
94
+ )
95
+ registry[name] = cls
96
+ return cls
97
+
98
+
99
+ def register_renderer(registry: dict[str, type[Renderer]], cls: C) -> C:
100
+ """Validate and register a renderer class in the given registry dict."""
101
+
102
+ _check_str_attrs(cls, _RENDERER_REQUIRED_STR_ATTRS, kind="renderer")
103
+ _check_methods(cls, _RENDERER_REQUIRED_METHODS, kind="renderer")
104
+ name: str = cast(str, cls.name) # type: ignore[attr-defined]
105
+ _check_namespace(cls, name, _CODEVIGIL_RENDERER_PKG, kind="renderer")
106
+ if name in registry:
107
+ existing = registry[name]
108
+ raise RegistryCollisionError(
109
+ f"renderer name {name!r} already registered by "
110
+ f"{existing.__qualname__!r}; {cls.__qualname__!r} cannot reuse it"
111
+ )
112
+ registry[name] = cls
113
+ return cls
114
+
115
+
116
+ __all__ = [
117
+ "RegistryCollisionError",
118
+ "RegistryValidationError",
119
+ "register_collector",
120
+ "register_renderer",
121
+ ]
@@ -0,0 +1,20 @@
1
+ """Renderer registry package.
2
+
3
+ Built-in renderers import themselves into ``RENDERERS`` at module import
4
+ time via ``register_renderer``. v0.1 ships the terminal and json_file
5
+ renderers; third-party renderers register under dotted names.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from codevigil.registry import register_renderer
11
+ from codevigil.renderers import json_file as _json_file
12
+ from codevigil.renderers import terminal as _terminal
13
+ from codevigil.types import Renderer
14
+
15
+ RENDERERS: dict[str, type[Renderer]] = {}
16
+
17
+ register_renderer(RENDERERS, _terminal.TerminalRenderer)
18
+ register_renderer(RENDERERS, _json_file.JsonFileRenderer)
19
+
20
+ __all__ = ["RENDERERS", "register_renderer"]
@@ -0,0 +1,105 @@
1
+ """NDJSON file renderer backed by the rotating JSONL writer.
2
+
3
+ Emits one JSON record per ``render()`` call into a rotating file under an
4
+ output directory. The directory must resolve inside ``$HOME``; attempts to
5
+ write outside are rejected at construction with ``PrivacyViolationError``,
6
+ matching the scope check the watcher enforces on its walk root.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from datetime import UTC, datetime
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from codevigil.errors import (
16
+ CodevigilError,
17
+ ErrorLevel,
18
+ ErrorSource,
19
+ RotatingJsonlWriter,
20
+ record,
21
+ )
22
+ from codevigil.privacy import PrivacyViolationError
23
+ from codevigil.types import MetricSnapshot, SessionMeta
24
+
25
+
26
+ class JsonFileRenderer:
27
+ """Appends NDJSON snapshot records to a rotating file under ``output_dir``."""
28
+
29
+ name: str = "json_file"
30
+
31
+ def __init__(
32
+ self,
33
+ *,
34
+ output_dir: Path,
35
+ filename: str = "snapshots.jsonl",
36
+ max_bytes: int = 10 * 1024 * 1024,
37
+ backups: int = 3,
38
+ ) -> None:
39
+ resolved_dir = self._validate_dir(output_dir)
40
+ self._output_dir: Path = resolved_dir
41
+ self._filename: str = filename
42
+ self._path: Path = resolved_dir / filename
43
+ self._writer: RotatingJsonlWriter = RotatingJsonlWriter(
44
+ self._path, max_bytes=max_bytes, backups=backups
45
+ )
46
+
47
+ @property
48
+ def path(self) -> Path:
49
+ return self._path
50
+
51
+ @staticmethod
52
+ def _validate_dir(output_dir: Path) -> Path:
53
+ resolved_dir = output_dir.expanduser().resolve()
54
+ home = Path.home().resolve()
55
+ if not resolved_dir.is_relative_to(home):
56
+ err = CodevigilError(
57
+ level=ErrorLevel.CRITICAL,
58
+ source=ErrorSource.RENDERER,
59
+ code="json_file.path_scope_violation",
60
+ message=(
61
+ f"json_file output directory {str(resolved_dir)!r} is outside "
62
+ f"the user home directory {str(home)!r}; refusing to write"
63
+ ),
64
+ context={"output_dir": str(resolved_dir), "home": str(home)},
65
+ )
66
+ record(err)
67
+ raise PrivacyViolationError(err.message)
68
+ return resolved_dir
69
+
70
+ def render(self, snapshots: list[MetricSnapshot], meta: SessionMeta) -> None:
71
+ record_payload: dict[str, Any] = {
72
+ "timestamp": datetime.now(tz=UTC).isoformat(),
73
+ "kind": "snapshot",
74
+ "session_id": meta.session_id,
75
+ "project_hash": meta.project_hash,
76
+ "project_name": meta.project_name,
77
+ "state": meta.state.value,
78
+ "parse_confidence": meta.parse_confidence,
79
+ "snapshots": [_snapshot_to_dict(s) for s in snapshots],
80
+ }
81
+ self._writer.write(record_payload)
82
+
83
+ def render_error(self, err: CodevigilError, meta: SessionMeta | None) -> None:
84
+ payload: dict[str, Any] = err.to_json_record()
85
+ payload["kind"] = "error"
86
+ if meta is not None:
87
+ payload["session_id"] = meta.session_id
88
+ payload["project_hash"] = meta.project_hash
89
+ self._writer.write(payload)
90
+
91
+ def close(self) -> None:
92
+ """No-op. ``RotatingJsonlWriter`` opens and closes per write."""
93
+
94
+
95
+ def _snapshot_to_dict(snap: MetricSnapshot) -> dict[str, Any]:
96
+ return {
97
+ "name": snap.name,
98
+ "value": snap.value,
99
+ "label": snap.label,
100
+ "severity": snap.severity.value,
101
+ "detail": snap.detail,
102
+ }
103
+
104
+
105
+ __all__ = ["JsonFileRenderer"]