alter-runtime 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alter_runtime/__init__.py +11 -0
- alter_runtime/adapters/__init__.py +19 -0
- alter_runtime/adapters/claude_jsonl_watcher.py +545 -0
- alter_runtime/adapters/git_watcher.py +457 -0
- alter_runtime/adapters/household/__init__.py +29 -0
- alter_runtime/adapters/household/_base.py +138 -0
- alter_runtime/adapters/household/compost/__init__.py +17 -0
- alter_runtime/adapters/household/compost/adapter.py +81 -0
- alter_runtime/adapters/household/compost/storage.py +75 -0
- alter_runtime/adapters/household/compost/tests/__init__.py +0 -0
- alter_runtime/adapters/household/compost/tests/test_adapter.py +62 -0
- alter_runtime/adapters/household/compost/tests/test_storage.py +23 -0
- alter_runtime/adapters/household/compost/tests/test_traits.py +38 -0
- alter_runtime/adapters/household/compost/traits.py +79 -0
- alter_runtime/adapters/household/self_hoster/__init__.py +30 -0
- alter_runtime/adapters/household/self_hoster/adapter.py +248 -0
- alter_runtime/adapters/household/self_hoster/storage.py +83 -0
- alter_runtime/adapters/household/self_hoster/tests/__init__.py +0 -0
- alter_runtime/adapters/household/self_hoster/tests/test_adapter.py +216 -0
- alter_runtime/adapters/household/self_hoster/tests/test_storage.py +25 -0
- alter_runtime/adapters/household/self_hoster/tests/test_traits.py +55 -0
- alter_runtime/adapters/household/self_hoster/traits.py +105 -0
- alter_runtime/adapters/household/tapo_ecosystem/__init__.py +22 -0
- alter_runtime/adapters/household/tapo_ecosystem/adapter.py +98 -0
- alter_runtime/adapters/household/tapo_ecosystem/storage.py +95 -0
- alter_runtime/adapters/household/tapo_ecosystem/tests/__init__.py +0 -0
- alter_runtime/adapters/household/tapo_ecosystem/tests/test_adapter.py +55 -0
- alter_runtime/adapters/household/tapo_ecosystem/tests/test_storage.py +28 -0
- alter_runtime/adapters/household/tapo_ecosystem/tests/test_traits.py +45 -0
- alter_runtime/adapters/household/tapo_ecosystem/traits.py +97 -0
- alter_runtime/adapters/household/workshop_tools/__init__.py +25 -0
- alter_runtime/adapters/household/workshop_tools/adapter.py +77 -0
- alter_runtime/adapters/household/workshop_tools/storage.py +92 -0
- alter_runtime/adapters/household/workshop_tools/tests/__init__.py +0 -0
- alter_runtime/adapters/household/workshop_tools/tests/test_adapter.py +48 -0
- alter_runtime/adapters/household/workshop_tools/tests/test_storage.py +26 -0
- alter_runtime/adapters/household/workshop_tools/tests/test_traits.py +45 -0
- alter_runtime/adapters/household/workshop_tools/traits.py +95 -0
- alter_runtime/adapters/worktree_watcher.py +378 -0
- alter_runtime/atlas/__init__.py +48 -0
- alter_runtime/atlas/base.py +102 -0
- alter_runtime/atlas/ledger.py +196 -0
- alter_runtime/atlas/observations.py +136 -0
- alter_runtime/atlas/schema.py +106 -0
- alter_runtime/cap_cache.py +392 -0
- alter_runtime/cli.py +517 -0
- alter_runtime/clients/__init__.py +0 -0
- alter_runtime/clients/token_usage_client.py +273 -0
- alter_runtime/config.py +648 -0
- alter_runtime/consent.py +425 -0
- alter_runtime/daemon.py +518 -0
- alter_runtime/floor_loop.py +335 -0
- alter_runtime/floor_preflight.py +734 -0
- alter_runtime/http_auth.py +173 -0
- alter_runtime/notifiers/__init__.py +18 -0
- alter_runtime/notifiers/desktop.py +321 -0
- alter_runtime/sdk/__init__.py +12 -0
- alter_runtime/sdk/client.py +399 -0
- alter_runtime/service_install.py +616 -0
- alter_runtime/services/__init__.py +59 -0
- alter_runtime/services/launchd/com.alter.runtime.plist.in +90 -0
- alter_runtime/services/systemd/alter-runtime.service.in +74 -0
- alter_runtime/services/systemd/cf-access-env.conf.in +29 -0
- alter_runtime/sockets/__init__.py +20 -0
- alter_runtime/sockets/dbus.py +272 -0
- alter_runtime/sockets/unix.py +702 -0
- alter_runtime/subscribers/__init__.py +58 -0
- alter_runtime/subscribers/active_sessions_cron_emitter.py +313 -0
- alter_runtime/subscribers/active_sessions_do_publisher.py +1159 -0
- alter_runtime/subscribers/active_sessions_gc.py +432 -0
- alter_runtime/subscribers/active_sessions_writer.py +446 -0
- alter_runtime/subscribers/adapters_writer.py +415 -0
- alter_runtime/subscribers/agent_frames.py +461 -0
- alter_runtime/subscribers/bus.py +188 -0
- alter_runtime/subscribers/cache_writer.py +347 -0
- alter_runtime/subscribers/ceremony_echo.py +290 -0
- alter_runtime/subscribers/do_sse.py +864 -0
- alter_runtime/subscribers/ebpf.py +506 -0
- alter_runtime/subscribers/inbox_writer.py +469 -0
- alter_runtime/subscribers/mcp_fallback.py +391 -0
- alter_runtime/subscribers/presence_writer.py +426 -0
- alter_runtime/subscribers/session_presence.py +467 -0
- alter_runtime/subscribers/sse.py +125 -0
- alter_runtime/subscribers/weave_intent_writer.py +608 -0
- alter_runtime/update_loop.py +519 -0
- alter_runtime/weave/__init__.py +21 -0
- alter_runtime/weave/resolver.py +544 -0
- alter_runtime-0.3.0.dist-info/METADATA +289 -0
- alter_runtime-0.3.0.dist-info/RECORD +92 -0
- alter_runtime-0.3.0.dist-info/WHEEL +4 -0
- alter_runtime-0.3.0.dist-info/entry_points.txt +2 -0
- alter_runtime-0.3.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""~Alter Identity Runtime.
|
|
2
|
+
|
|
3
|
+
L3 of the six-layer identity distribution surface. See the package README and
|
|
4
|
+
the Strategic Decisions Register entries D-RT1 through D-RT10 (8 Apr 2026) for
|
|
5
|
+
architectural context.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from alter_runtime.sdk.client import AlterClient
|
|
9
|
+
|
|
10
|
+
__version__ = "0.3.0"
|
|
11
|
+
__all__ = ["AlterClient", "__version__"]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Local adapters - ambient-signal publishers.
|
|
2
|
+
|
|
3
|
+
Adapters are the runtime's *inputs from the device itself*: they observe
|
|
4
|
+
something local (git commits, CC hook invocations, shell activity) and
|
|
5
|
+
publish a signal onto the ``local.signal`` topic. A separate egress producer
|
|
6
|
+
(W2.2d) is responsible for POSTing those signals back to the per-handle DO
|
|
7
|
+
``/ingest`` endpoint so they become part of the continuous identity field.
|
|
8
|
+
|
|
9
|
+
Adapters are deliberately kept simple: one file per signal source, no shared
|
|
10
|
+
state, no cross-adapter coordination. Each registers as a
|
|
11
|
+
:class:`alter_runtime.daemon.Component` and is supervised like any other
|
|
12
|
+
runtime component.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from alter_runtime.adapters.claude_jsonl_watcher import ClaudeJsonlWatcher
|
|
16
|
+
from alter_runtime.adapters.git_watcher import GitWatcher
|
|
17
|
+
from alter_runtime.adapters.worktree_watcher import WorktreeWatcher
|
|
18
|
+
|
|
19
|
+
__all__ = ["ClaudeJsonlWatcher", "GitWatcher", "WorktreeWatcher"]
|
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
"""ClaudeJsonlWatcher - ambient token-usage adapter for Claude Code transcripts.
|
|
2
|
+
|
|
3
|
+
Watches ``~/.claude/projects/<slug>/*.jsonl`` files with ``watchdog``.
|
|
4
|
+
On modification, reads new lines from the persisted byte offset, parses each
|
|
5
|
+
via the privacy-hard-stop ``parse_assistant_line`` function, and POSTs
|
|
6
|
+
batches to the ALTER backend token-usage audit endpoint.
|
|
7
|
+
|
|
8
|
+
Privacy guarantee
|
|
9
|
+
-----------------
|
|
10
|
+
|
|
11
|
+
``parse_assistant_line`` is the ONLY path from JSONL bytes to the wire.
|
|
12
|
+
It is a strict **whitelist** parser: it constructs the output dict key-by-key
|
|
13
|
+
from explicitly named fields and never spreads ``record``, ``message``, or
|
|
14
|
+
``usage``. The privacy regression test (``tests/test_claude_jsonl_watcher_no_content_leak.py``)
|
|
15
|
+
seeds every non-whitelisted field with a CANARY sentinel and asserts that no
|
|
16
|
+
sentinel reaches the serialised output. Any change to the whitelist fails
|
|
17
|
+
that test.
|
|
18
|
+
|
|
19
|
+
Allowed output keys (exact set, no extras):
|
|
20
|
+
- ``session_id`` - from top-level ``sessionId``
|
|
21
|
+
- ``message_id`` - from ``message.id``
|
|
22
|
+
- ``model`` - from ``message.model``
|
|
23
|
+
- ``input_tokens`` - from ``message.usage.input_tokens``
|
|
24
|
+
- ``output_tokens`` - from ``message.usage.output_tokens``
|
|
25
|
+
- ``cache_creation_tokens`` - from ``message.usage.cache_creation_input_tokens``
|
|
26
|
+
- ``cache_read_tokens`` - from ``message.usage.cache_read_input_tokens``
|
|
27
|
+
- ``ts`` - from top-level ``timestamp``
|
|
28
|
+
|
|
29
|
+
Offset persistence
|
|
30
|
+
------------------
|
|
31
|
+
|
|
32
|
+
Per-file byte offsets are persisted atomically to
|
|
33
|
+
``~/.local/share/alter-runtime/cc-offsets.json`` so the adapter survives
|
|
34
|
+
daemon restarts without re-emitting already-posted events. The backend is
|
|
35
|
+
idempotent on ``message_id`` as a second line of defence.
|
|
36
|
+
|
|
37
|
+
On file rotation (on-disk size < persisted offset), the offset is reset to 0.
|
|
38
|
+
|
|
39
|
+
Backfill
|
|
40
|
+
--------
|
|
41
|
+
|
|
42
|
+
On first run, every existing ``*.jsonl`` under ``~/.claude/projects/`` is
|
|
43
|
+
walked and assistant events within the last 30 days are emitted in batches of
|
|
44
|
+
at most 500. A 5-second sleep is inserted between batches to avoid flooding
|
|
45
|
+
the backend.
|
|
46
|
+
|
|
47
|
+
Threading
|
|
48
|
+
---------
|
|
49
|
+
|
|
50
|
+
``watchdog`` callbacks run on the observer's own thread. We marshal onto the
|
|
51
|
+
asyncio loop via ``loop.call_soon_threadsafe`` before doing any async work -
|
|
52
|
+
the bus and HTTP client are not thread-safe.
|
|
53
|
+
|
|
54
|
+
Configuration
|
|
55
|
+
-------------
|
|
56
|
+
|
|
57
|
+
The adapter is opt-in: register it only when ``config.enable_claude_jsonl_watcher``
|
|
58
|
+
is ``True`` (default ``False``).
|
|
59
|
+
|
|
60
|
+
Decisions
|
|
61
|
+
---------
|
|
62
|
+
|
|
63
|
+
- D-IaI-1 (Identity-as-Inference): token usage is a passive aggregate with
|
|
64
|
+
k=1 (single principal machine). The inferred signal (burn-rate by model)
|
|
65
|
+
is operational telemetry, NOT psychometric inference - no clause of the
|
|
66
|
+
IaI 5-point test is triggered. Return: ops.truealter.com visibility.
|
|
67
|
+
- HTTP auth: JWT sourced from ``load_session()`` at adapter start, then
|
|
68
|
+
refreshed per-POST from ``ALTER_RUNTIME_SESSION_JWT`` env var as a
|
|
69
|
+
fallback. Full auth substrate wiring (ensureFreshSession equivalent) is a
|
|
70
|
+
Wave 2 follow-up - see ``TOKEN_USAGE_AUTH_GAP`` docstring on
|
|
71
|
+
``TokenUsageClient``.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
from __future__ import annotations
|
|
75
|
+
|
|
76
|
+
import asyncio
|
|
77
|
+
import contextlib
|
|
78
|
+
import json
|
|
79
|
+
import logging
|
|
80
|
+
import os
|
|
81
|
+
import threading
|
|
82
|
+
import time
|
|
83
|
+
from datetime import datetime, timedelta, timezone
|
|
84
|
+
from pathlib import Path
|
|
85
|
+
from typing import Optional
|
|
86
|
+
|
|
87
|
+
from alter_runtime.config import DaemonConfig, data_dir, load_session
|
|
88
|
+
from alter_runtime.daemon import Component
|
|
89
|
+
from alter_runtime.subscribers.bus import EventBus
|
|
90
|
+
|
|
91
|
+
__all__ = ["ClaudeJsonlWatcher", "parse_assistant_line"]
|
|
92
|
+
|
|
93
|
+
logger = logging.getLogger("alter_runtime.adapters.claude_jsonl_watcher")
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Constants
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
#: Root directory containing per-project JSONL transcript directories.
|
|
100
|
+
CLAUDE_PROJECTS_DIR: Path = Path.home() / ".claude" / "projects"
|
|
101
|
+
|
|
102
|
+
#: Offset state file - written atomically with rename.
|
|
103
|
+
OFFSETS_FILENAME: str = "cc-offsets.json"
|
|
104
|
+
|
|
105
|
+
#: Maximum events per POST batch.
|
|
106
|
+
BATCH_SIZE: int = 500
|
|
107
|
+
|
|
108
|
+
#: Minimum seconds between successive POSTs (rate limit).
|
|
109
|
+
POST_RATE_LIMIT_SECONDS: float = 5.0
|
|
110
|
+
|
|
111
|
+
#: Backfill window in days.
|
|
112
|
+
BACKFILL_DAYS: int = 30
|
|
113
|
+
|
|
114
|
+
#: Sleep between backfill batches.
|
|
115
|
+
BACKFILL_BATCH_SLEEP_SECONDS: float = 5.0
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# Privacy hard-stop - WHITELIST PARSER
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def parse_assistant_line(record: dict) -> Optional[dict]:
|
|
124
|
+
"""Parse one JSONL record and return a token-usage dict, or None.
|
|
125
|
+
|
|
126
|
+
This is the privacy hard-stop between Claude Code's local transcripts
|
|
127
|
+
and ALTER's audit database. It is a strict whitelist: the output dict is
|
|
128
|
+
constructed key-by-key from explicitly named fields. ``record``,
|
|
129
|
+
``message``, and ``usage`` are NEVER spread. No key beyond the whitelist
|
|
130
|
+
can reach the output.
|
|
131
|
+
|
|
132
|
+
Returns None when:
|
|
133
|
+
- ``record`` is not a dict
|
|
134
|
+
- ``record["type"]`` is not ``"assistant"``
|
|
135
|
+
- ``record["message"]`` is missing or not a dict
|
|
136
|
+
- ``record["message"]["usage"]`` is missing or not a dict
|
|
137
|
+
- ``record["message"]["id"]`` is missing or not a str
|
|
138
|
+
- ``record["message"]["usage"]["input_tokens"]`` or ``output_tokens`` is missing
|
|
139
|
+
|
|
140
|
+
Malformed records return None rather than raising.
|
|
141
|
+
"""
|
|
142
|
+
if not isinstance(record, dict):
|
|
143
|
+
return None
|
|
144
|
+
if record.get("type") != "assistant":
|
|
145
|
+
return None
|
|
146
|
+
msg = record.get("message")
|
|
147
|
+
if not isinstance(msg, dict):
|
|
148
|
+
return None
|
|
149
|
+
usage = msg.get("usage")
|
|
150
|
+
if not isinstance(usage, dict):
|
|
151
|
+
return None
|
|
152
|
+
msg_id = msg.get("id")
|
|
153
|
+
if not isinstance(msg_id, str):
|
|
154
|
+
return None
|
|
155
|
+
input_tokens = usage.get("input_tokens")
|
|
156
|
+
output_tokens = usage.get("output_tokens")
|
|
157
|
+
if input_tokens is None or output_tokens is None:
|
|
158
|
+
return None
|
|
159
|
+
# Construct output dict key-by-key - no spread, no wildcard access.
|
|
160
|
+
return {
|
|
161
|
+
"session_id": str(record.get("sessionId", "")),
|
|
162
|
+
"message_id": msg_id,
|
|
163
|
+
"model": str(msg.get("model", "")),
|
|
164
|
+
"input_tokens": int(input_tokens),
|
|
165
|
+
"output_tokens": int(output_tokens),
|
|
166
|
+
"cache_creation_tokens": int(usage.get("cache_creation_input_tokens") or 0),
|
|
167
|
+
"cache_read_tokens": int(usage.get("cache_read_input_tokens") or 0),
|
|
168
|
+
"ts": str(record.get("timestamp", "")),
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# Offset persistence helpers
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _offsets_path() -> Path:
|
|
178
|
+
"""Return the path to the per-file byte-offset state file."""
|
|
179
|
+
return data_dir() / OFFSETS_FILENAME
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _load_offsets() -> dict[str, int]:
|
|
183
|
+
"""Load the offset state file, returning an empty dict on failure."""
|
|
184
|
+
path = _offsets_path()
|
|
185
|
+
if not path.exists():
|
|
186
|
+
return {}
|
|
187
|
+
try:
|
|
188
|
+
raw = path.read_text(encoding="utf-8")
|
|
189
|
+
data = json.loads(raw)
|
|
190
|
+
if isinstance(data, dict):
|
|
191
|
+
return {str(k): int(v) for k, v in data.items() if isinstance(v, (int, float))}
|
|
192
|
+
except (json.JSONDecodeError, OSError, ValueError):
|
|
193
|
+
pass
|
|
194
|
+
return {}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _save_offsets(offsets: dict[str, int]) -> None:
|
|
198
|
+
"""Atomically write the offset state file (write → fsync → rename)."""
|
|
199
|
+
path = _offsets_path()
|
|
200
|
+
tmp = path.with_suffix(".tmp")
|
|
201
|
+
try:
|
|
202
|
+
content = json.dumps(offsets, indent=2)
|
|
203
|
+
tmp.write_text(content, encoding="utf-8")
|
|
204
|
+
# fsync the tmp file to ensure data reaches disk before rename.
|
|
205
|
+
with tmp.open("rb") as f:
|
|
206
|
+
os.fsync(f.fileno())
|
|
207
|
+
tmp.rename(path)
|
|
208
|
+
except OSError as exc:
|
|
209
|
+
logger.warning("cc-offsets write failed: %s", exc)
|
|
210
|
+
with contextlib.suppress(OSError):
|
|
211
|
+
tmp.unlink()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ---------------------------------------------------------------------------
|
|
215
|
+
# ClaudeJsonlWatcher
|
|
216
|
+
# ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class ClaudeJsonlWatcher(Component):
|
|
220
|
+
"""Watches Claude Code JSONL transcripts and posts token-usage events.
|
|
221
|
+
|
|
222
|
+
Parameters
|
|
223
|
+
----------
|
|
224
|
+
config:
|
|
225
|
+
Loaded :class:`DaemonConfig`.
|
|
226
|
+
bus:
|
|
227
|
+
Shared :class:`EventBus` (not published to - present for Component
|
|
228
|
+
symmetry and future local-signal emission).
|
|
229
|
+
projects_dir:
|
|
230
|
+
Override ``~/.claude/projects/`` for testing.
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
name = "claude_jsonl_watcher"
|
|
234
|
+
|
|
235
|
+
def __init__(
|
|
236
|
+
self,
|
|
237
|
+
config: DaemonConfig,
|
|
238
|
+
bus: EventBus,
|
|
239
|
+
projects_dir: Path | None = None,
|
|
240
|
+
) -> None:
|
|
241
|
+
self._config = config
|
|
242
|
+
self._bus = bus
|
|
243
|
+
self._projects_dir = (projects_dir or CLAUDE_PROJECTS_DIR).expanduser().resolve()
|
|
244
|
+
self._stop_event = asyncio.Event()
|
|
245
|
+
self._loop: asyncio.AbstractEventLoop | None = None
|
|
246
|
+
self._offsets: dict[str, int] = {}
|
|
247
|
+
self._last_post_time: float = 0.0
|
|
248
|
+
self._http_client: object | None = None # TokenUsageClient, imported lazily
|
|
249
|
+
self._pending_flush: asyncio.Task | None = None
|
|
250
|
+
|
|
251
|
+
# ------------------------------------------------------------------
|
|
252
|
+
# Component lifecycle
|
|
253
|
+
# ------------------------------------------------------------------
|
|
254
|
+
|
|
255
|
+
async def run(self) -> None:
|
|
256
|
+
self._loop = asyncio.get_running_loop()
|
|
257
|
+
self._offsets = _load_offsets()
|
|
258
|
+
|
|
259
|
+
# Import HTTP client lazily so the module is importable without httpx.
|
|
260
|
+
from alter_runtime.clients.token_usage_client import TokenUsageClient
|
|
261
|
+
|
|
262
|
+
session = load_session()
|
|
263
|
+
jwt: str | None = None
|
|
264
|
+
if session is not None:
|
|
265
|
+
jwt = session.jwt
|
|
266
|
+
|
|
267
|
+
api_base = os.environ.get("ALTER_RUNTIME_API_BASE", "https://api.truealter.com")
|
|
268
|
+
|
|
269
|
+
def _jwt_provider() -> str | None:
|
|
270
|
+
# Prefer env-var override (for future ensureFreshSession wiring);
|
|
271
|
+
# fall back to the session JWT loaded at startup.
|
|
272
|
+
return os.environ.get("ALTER_RUNTIME_SESSION_JWT") or jwt
|
|
273
|
+
|
|
274
|
+
self._http_client = TokenUsageClient(base_url=api_base, jwt_provider=_jwt_provider)
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
from watchdog.events import FileSystemEventHandler # noqa: F401
|
|
278
|
+
from watchdog.observers import Observer
|
|
279
|
+
except ImportError:
|
|
280
|
+
logger.warning("watchdog not installed - claude_jsonl_watcher disabled")
|
|
281
|
+
await self._stop_event.wait()
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
if not self._projects_dir.exists():
|
|
285
|
+
logger.info(
|
|
286
|
+
"claude_jsonl_watcher: %s does not exist - skipping",
|
|
287
|
+
self._projects_dir,
|
|
288
|
+
)
|
|
289
|
+
await self._stop_event.wait()
|
|
290
|
+
return
|
|
291
|
+
|
|
292
|
+
# First-run backfill
|
|
293
|
+
await self._backfill_existing()
|
|
294
|
+
|
|
295
|
+
# Set up watchdog observer
|
|
296
|
+
observer = Observer()
|
|
297
|
+
handler = _JsonlFileHandler(self)
|
|
298
|
+
observer.schedule(handler, str(self._projects_dir), recursive=True)
|
|
299
|
+
observer.daemon = True
|
|
300
|
+
observer.start()
|
|
301
|
+
|
|
302
|
+
logger.info(
|
|
303
|
+
"claude_jsonl_watcher watching %s (offsets_file=%s)",
|
|
304
|
+
self._projects_dir,
|
|
305
|
+
_offsets_path(),
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
await self._stop_event.wait()
|
|
310
|
+
finally:
|
|
311
|
+
with contextlib.suppress(Exception):
|
|
312
|
+
observer.stop()
|
|
313
|
+
observer.join(timeout=2.0)
|
|
314
|
+
logger.info("claude_jsonl_watcher stopped")
|
|
315
|
+
|
|
316
|
+
async def stop(self) -> None:
|
|
317
|
+
self._stop_event.set()
|
|
318
|
+
|
|
319
|
+
# ------------------------------------------------------------------
|
|
320
|
+
# Backfill
|
|
321
|
+
# ------------------------------------------------------------------
|
|
322
|
+
|
|
323
|
+
async def _backfill_existing(self) -> None:
|
|
324
|
+
"""Walk every existing *.jsonl and emit the last 30 days' events."""
|
|
325
|
+
cutoff = datetime.now(tz=timezone.utc) - timedelta(days=BACKFILL_DAYS)
|
|
326
|
+
batch: list[dict] = []
|
|
327
|
+
count = 0
|
|
328
|
+
|
|
329
|
+
for jsonl_path in sorted(self._projects_dir.rglob("*.jsonl")):
|
|
330
|
+
path_str = str(jsonl_path)
|
|
331
|
+
if path_str in self._offsets:
|
|
332
|
+
# Already partially consumed - skip full backfill for this file,
|
|
333
|
+
# tail will catch new bytes.
|
|
334
|
+
continue
|
|
335
|
+
try:
|
|
336
|
+
events = _read_events_from_file(jsonl_path, offset=0, cutoff=cutoff)
|
|
337
|
+
except OSError as exc:
|
|
338
|
+
logger.debug("backfill: skipping %s: %s", path_str, exc)
|
|
339
|
+
continue
|
|
340
|
+
# Record the end-of-file offset so future tails start correctly.
|
|
341
|
+
try:
|
|
342
|
+
size = jsonl_path.stat().st_size
|
|
343
|
+
except OSError:
|
|
344
|
+
size = 0
|
|
345
|
+
self._offsets[path_str] = size
|
|
346
|
+
|
|
347
|
+
for ev in events:
|
|
348
|
+
slug = _slug_from_path(jsonl_path, self._projects_dir)
|
|
349
|
+
ev["project_slug"] = slug
|
|
350
|
+
batch.append(ev)
|
|
351
|
+
if len(batch) >= BATCH_SIZE:
|
|
352
|
+
await self._flush_batch(batch)
|
|
353
|
+
batch = []
|
|
354
|
+
count += BATCH_SIZE
|
|
355
|
+
await asyncio.sleep(BACKFILL_BATCH_SLEEP_SECONDS)
|
|
356
|
+
|
|
357
|
+
if batch:
|
|
358
|
+
await self._flush_batch(batch)
|
|
359
|
+
|
|
360
|
+
_save_offsets(self._offsets)
|
|
361
|
+
logger.info("claude_jsonl_watcher backfill complete events_emitted=%d", count + len(batch))
|
|
362
|
+
|
|
363
|
+
# ------------------------------------------------------------------
|
|
364
|
+
# Watchdog callback (thread → asyncio bridge)
|
|
365
|
+
# ------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
def _on_file_change(self, path_str: str) -> None:
|
|
368
|
+
"""Called on the watchdog observer thread when a JSONL file changes."""
|
|
369
|
+
if not path_str.endswith(".jsonl"):
|
|
370
|
+
return
|
|
371
|
+
loop = self._loop
|
|
372
|
+
if loop is None or loop.is_closed():
|
|
373
|
+
return
|
|
374
|
+
loop.call_soon_threadsafe(lambda: asyncio.create_task(self._handle_file_change(path_str)))
|
|
375
|
+
|
|
376
|
+
async def _handle_file_change(self, path_str: str) -> None:
|
|
377
|
+
"""Read new lines from the file and post events."""
|
|
378
|
+
jsonl_path = Path(path_str)
|
|
379
|
+
if not jsonl_path.is_file():
|
|
380
|
+
return
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
current_size = jsonl_path.stat().st_size
|
|
384
|
+
except OSError:
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
persisted_offset = self._offsets.get(path_str, 0)
|
|
388
|
+
|
|
389
|
+
# File rotation: on-disk size smaller than persisted offset.
|
|
390
|
+
if current_size < persisted_offset:
|
|
391
|
+
logger.info(
|
|
392
|
+
"claude_jsonl_watcher: rotation detected %s (size=%d < offset=%d) - reset",
|
|
393
|
+
path_str,
|
|
394
|
+
current_size,
|
|
395
|
+
persisted_offset,
|
|
396
|
+
)
|
|
397
|
+
persisted_offset = 0
|
|
398
|
+
self._offsets[path_str] = 0
|
|
399
|
+
|
|
400
|
+
if current_size == persisted_offset:
|
|
401
|
+
return # No new bytes
|
|
402
|
+
|
|
403
|
+
try:
|
|
404
|
+
events = _read_events_from_file(jsonl_path, offset=persisted_offset)
|
|
405
|
+
except OSError as exc:
|
|
406
|
+
logger.debug("claude_jsonl_watcher: read error %s: %s", path_str, exc)
|
|
407
|
+
return
|
|
408
|
+
|
|
409
|
+
slug = _slug_from_path(jsonl_path, self._projects_dir)
|
|
410
|
+
batch = []
|
|
411
|
+
for ev in events:
|
|
412
|
+
ev["project_slug"] = slug
|
|
413
|
+
batch.append(ev)
|
|
414
|
+
|
|
415
|
+
if not batch:
|
|
416
|
+
# Advance offset even when no parseable events (e.g. user-only lines).
|
|
417
|
+
self._offsets[path_str] = current_size
|
|
418
|
+
_save_offsets(self._offsets)
|
|
419
|
+
return
|
|
420
|
+
|
|
421
|
+
# Rate-limit: if we posted recently, wait.
|
|
422
|
+
now = time.monotonic()
|
|
423
|
+
since_last = now - self._last_post_time
|
|
424
|
+
if since_last < POST_RATE_LIMIT_SECONDS:
|
|
425
|
+
await asyncio.sleep(POST_RATE_LIMIT_SECONDS - since_last)
|
|
426
|
+
|
|
427
|
+
success = await self._flush_batch(batch)
|
|
428
|
+
if success:
|
|
429
|
+
# Only advance offset after a successful POST - on failure the
|
|
430
|
+
# next watchdog tick will re-attempt from the old offset.
|
|
431
|
+
self._offsets[path_str] = current_size
|
|
432
|
+
_save_offsets(self._offsets)
|
|
433
|
+
|
|
434
|
+
# ------------------------------------------------------------------
|
|
435
|
+
# HTTP posting
|
|
436
|
+
# ------------------------------------------------------------------
|
|
437
|
+
|
|
438
|
+
async def _flush_batch(self, events: list[dict]) -> bool:
|
|
439
|
+
"""POST a batch of events. Returns True on success, False on failure."""
|
|
440
|
+
if not events or self._http_client is None:
|
|
441
|
+
return True
|
|
442
|
+
try:
|
|
443
|
+
result = await self._http_client.post_events(events) # type: ignore[attr-defined]
|
|
444
|
+
self._last_post_time = time.monotonic()
|
|
445
|
+
logger.info(
|
|
446
|
+
"claude_jsonl_watcher posted events=%d result=%s",
|
|
447
|
+
len(events),
|
|
448
|
+
result,
|
|
449
|
+
)
|
|
450
|
+
return True
|
|
451
|
+
except Exception as exc:
|
|
452
|
+
logger.warning("claude_jsonl_watcher POST failed: %s - will retry on next tick", exc)
|
|
453
|
+
return False
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
# ---------------------------------------------------------------------------
|
|
457
|
+
# File-reading helpers
|
|
458
|
+
# ---------------------------------------------------------------------------
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _read_events_from_file(
|
|
462
|
+
path: Path,
|
|
463
|
+
offset: int,
|
|
464
|
+
cutoff: datetime | None = None,
|
|
465
|
+
) -> list[dict]:
|
|
466
|
+
"""Read and parse assistant events from a JSONL file starting at ``offset``.
|
|
467
|
+
|
|
468
|
+
Only assistant lines with token usage are returned. Lines that don't
|
|
469
|
+
parse or fail the date cutoff are silently skipped.
|
|
470
|
+
"""
|
|
471
|
+
events: list[dict] = []
|
|
472
|
+
try:
|
|
473
|
+
with path.open("rb") as f:
|
|
474
|
+
if offset > 0:
|
|
475
|
+
f.seek(offset)
|
|
476
|
+
for raw_line in f:
|
|
477
|
+
line = raw_line.decode("utf-8", errors="replace").strip()
|
|
478
|
+
if not line:
|
|
479
|
+
continue
|
|
480
|
+
try:
|
|
481
|
+
record = json.loads(line)
|
|
482
|
+
except json.JSONDecodeError:
|
|
483
|
+
continue
|
|
484
|
+
parsed = parse_assistant_line(record)
|
|
485
|
+
if parsed is None:
|
|
486
|
+
continue
|
|
487
|
+
if cutoff is not None:
|
|
488
|
+
ts_str = parsed.get("ts", "")
|
|
489
|
+
if ts_str:
|
|
490
|
+
try:
|
|
491
|
+
ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
492
|
+
if ts < cutoff:
|
|
493
|
+
continue
|
|
494
|
+
except ValueError:
|
|
495
|
+
pass # Unparseable timestamp - include it.
|
|
496
|
+
events.append(parsed)
|
|
497
|
+
except OSError:
|
|
498
|
+
raise
|
|
499
|
+
return events
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _slug_from_path(jsonl_path: Path, projects_dir: Path) -> str:
|
|
503
|
+
"""Derive the project slug from the JSONL file path.
|
|
504
|
+
|
|
505
|
+
``~/.claude/projects/<slug>/<session>.jsonl`` → ``<slug>``
|
|
506
|
+
"""
|
|
507
|
+
try:
|
|
508
|
+
relative = jsonl_path.relative_to(projects_dir)
|
|
509
|
+
return relative.parts[0] if relative.parts else str(jsonl_path.parent.name)
|
|
510
|
+
except ValueError:
|
|
511
|
+
return jsonl_path.parent.name
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
# ---------------------------------------------------------------------------
|
|
515
|
+
# watchdog event handler
|
|
516
|
+
# ---------------------------------------------------------------------------
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
class _JsonlFileHandler:
|
|
520
|
+
"""watchdog handler shim - dispatches JSONL file events to the watcher.
|
|
521
|
+
|
|
522
|
+
Imported lazily (watchdog is optional). We shim the handler interface via
|
|
523
|
+
duck typing rather than subclassing so that importing this module never
|
|
524
|
+
demands watchdog at module load time.
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
def __init__(self, watcher: ClaudeJsonlWatcher) -> None:
|
|
528
|
+
self._watcher = watcher
|
|
529
|
+
self._thread = threading.get_ident()
|
|
530
|
+
|
|
531
|
+
def dispatch(self, event: object) -> None:
|
|
532
|
+
# Ignore directory events and pure-read events.
|
|
533
|
+
if getattr(event, "is_directory", False):
|
|
534
|
+
return
|
|
535
|
+
kind = type(event).__name__
|
|
536
|
+
if kind in ("FileOpenedEvent", "FileClosedNoWriteEvent"):
|
|
537
|
+
return
|
|
538
|
+
|
|
539
|
+
src = getattr(event, "src_path", None)
|
|
540
|
+
dest = getattr(event, "dest_path", None)
|
|
541
|
+
target = dest or src
|
|
542
|
+
if not isinstance(target, str):
|
|
543
|
+
return
|
|
544
|
+
# New files (FileCreatedEvent) also trigger tail from offset=0.
|
|
545
|
+
self._watcher._on_file_change(target)
|