halton-meter 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """Halton Meter — local LLM API proxy daemon."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,11 @@
1
+ """Provider adapter package."""
2
+
3
+ from halton_meter.adapters.anthropic import AnthropicAdapter
4
+ from halton_meter.adapters.base import ProviderAdapter, RequestMetadata, ResponseMetadata
5
+
6
+ __all__ = [
7
+ "AnthropicAdapter",
8
+ "ProviderAdapter",
9
+ "RequestMetadata",
10
+ "ResponseMetadata",
11
+ ]
@@ -0,0 +1,189 @@
1
+ """
2
+ Anthropic provider adapter.
3
+
4
+ Handles api.anthropic.com — both non-streaming JSON responses and
5
+ streaming SSE responses (text/event-stream).
6
+
7
+ Token count locations in the Anthropic API:
8
+
9
+ Non-streaming:
10
+ response body → top-level "usage" object:
11
+ input_tokens, output_tokens, thinking_tokens (optional),
12
+ cache_read_input_tokens (optional)
13
+
14
+ Streaming SSE:
15
+ message_start event → message.usage:
16
+ input_tokens, thinking_tokens (optional), cache_read_input_tokens (optional)
17
+ message_delta event → usage:
18
+ output_tokens
19
+
20
+ See spike.py for the full design rationale and edge-case notes.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import time
27
+ from typing import ClassVar
28
+
29
+ import structlog
30
+ from mitmproxy import http
31
+
32
+ from halton_meter.adapters.base import RequestMetadata, ResponseMetadata
33
+
34
+ log = structlog.get_logger()
35
+
36
+
37
+ class AnthropicAdapter:
38
+ """
39
+ Adapter for api.anthropic.com.
40
+
41
+ Handles both non-streaming JSON responses and streaming SSE responses.
42
+ All methods are safe to call from the mitmproxy hot path — they catch
43
+ all exceptions internally and return zero-value metadata rather than raising.
44
+ """
45
+
46
+ name: ClassVar[str] = "anthropic"
47
+ domains: ClassVar[list[str]] = ["api.anthropic.com"]
48
+
49
+ def matches(self, host: str) -> bool:
50
+ """Match api.anthropic.com with or without port suffix."""
51
+ return host.startswith("api.anthropic.com")
52
+
53
+ def parse_request(self, flow: http.HTTPFlow) -> RequestMetadata:
54
+ """
55
+ Extract model name and stream flag from the request body.
56
+
57
+ Non-JSON bodies (e.g. GET /v1/models) are handled gracefully — model
58
+ defaults to 'unknown' and stream to False.
59
+ """
60
+ model = "unknown"
61
+ stream = False
62
+ try:
63
+ body = flow.request.get_text(strict=False) or ""
64
+ if body:
65
+ data = json.loads(body)
66
+ model = data.get("model", "unknown")
67
+ stream = bool(data.get("stream", False))
68
+ except (json.JSONDecodeError, UnicodeDecodeError):
69
+ # Non-JSON body is expected for non-messages endpoints
70
+ pass
71
+
72
+ return RequestMetadata(
73
+ provider=self.name,
74
+ model=model,
75
+ stream=stream,
76
+ started_at=time.monotonic(),
77
+ )
78
+
79
+ def parse_response(
80
+ self,
81
+ flow: http.HTTPFlow,
82
+ request_meta: RequestMetadata,
83
+ ) -> ResponseMetadata:
84
+ """
85
+ Extract token counts from the completed response body.
86
+
87
+ mitmproxy accumulates the full body before calling response(), so SSE
88
+ streams are parsed in a single pass here. Latency is computed from
89
+ request_meta.started_at (set at request parse time).
90
+ """
91
+ latency_ms = (time.monotonic() - request_meta.started_at) * 1000
92
+ content_type = flow.response.headers.get("content-type", "")
93
+ is_sse = "text/event-stream" in content_type
94
+
95
+ input_tokens = 0
96
+ output_tokens = 0
97
+ thinking_tokens = 0
98
+ cached_tokens = 0
99
+ tokens_complete = True
100
+
101
+ try:
102
+ body = flow.response.get_text(strict=False) or ""
103
+
104
+ if is_sse:
105
+ input_tokens, output_tokens, thinking_tokens, cached_tokens = (
106
+ self._parse_sse(body)
107
+ )
108
+ else:
109
+ data = json.loads(body)
110
+ usage = data.get("usage", {})
111
+ input_tokens = usage.get("input_tokens", 0)
112
+ output_tokens = usage.get("output_tokens", 0)
113
+ # Anthropic returns thinking_tokens only when extended thinking is on
114
+ thinking_tokens = usage.get("thinking_tokens", 0)
115
+ cached_tokens = usage.get("cache_read_input_tokens", 0)
116
+
117
+ except Exception as exc:
118
+ tokens_complete = False
119
+ log.error(
120
+ "adapter.parse_response.failed",
121
+ provider=self.name,
122
+ error=str(exc),
123
+ exc_info=True,
124
+ )
125
+
126
+ return ResponseMetadata(
127
+ input_tokens=input_tokens,
128
+ output_tokens=output_tokens,
129
+ thinking_tokens=thinking_tokens,
130
+ cached_tokens=cached_tokens,
131
+ tokens_complete=tokens_complete,
132
+ latency_ms=latency_ms,
133
+ )
134
+
135
+ def _parse_sse(self, body: str) -> tuple[int, int, int, int]:
136
+ """
137
+ Walk SSE lines and accumulate token counts.
138
+
139
+ Returns (input_tokens, output_tokens, thinking_tokens, cached_tokens).
140
+
141
+ Token locations in the Anthropic SSE stream:
142
+ message_start → message.usage.input_tokens
143
+ → message.usage.thinking_tokens (optional)
144
+ → message.usage.cache_read_input_tokens (optional)
145
+ message_delta → usage.output_tokens
146
+
147
+ Blank lines separate SSE events. Lines starting with "event:" set the
148
+ current event type; lines starting with "data:" carry the payload JSON.
149
+ """
150
+ input_tokens = 0
151
+ output_tokens = 0
152
+ thinking_tokens = 0
153
+ cached_tokens = 0
154
+
155
+ current_event: str | None = None
156
+ for raw_line in body.splitlines():
157
+ line = raw_line.strip()
158
+
159
+ if line.startswith("event:"):
160
+ current_event = line[len("event:") :].strip()
161
+
162
+ elif line.startswith("data:"):
163
+ raw_data = line[len("data:") :].strip()
164
+ if raw_data in ("[DONE]", ""):
165
+ continue
166
+ try:
167
+ payload = json.loads(raw_data)
168
+ except json.JSONDecodeError:
169
+ continue
170
+
171
+ event_type = payload.get("type", current_event or "")
172
+
173
+ if event_type == "message_start":
174
+ msg_usage = payload.get("message", {}).get("usage", {})
175
+ input_tokens += msg_usage.get("input_tokens", 0)
176
+ # Anthropic returns thinking_tokens here when extended thinking is active;
177
+ # field is absent (not 0) when thinking is off — treat absence as 0.
178
+ thinking_tokens += msg_usage.get("thinking_tokens", 0)
179
+ cached_tokens += msg_usage.get("cache_read_input_tokens", 0)
180
+
181
+ elif event_type == "message_delta":
182
+ delta_usage = payload.get("usage", {})
183
+ output_tokens += delta_usage.get("output_tokens", 0)
184
+
185
+ elif line == "":
186
+ # Blank line marks the end of an SSE event block
187
+ current_event = None
188
+
189
+ return input_tokens, output_tokens, thinking_tokens, cached_tokens
@@ -0,0 +1,112 @@
1
+ """
2
+ Base types for provider adapters.
3
+
4
+ Defines the ProviderAdapter Protocol and the Pydantic data models that
5
+ all adapters produce. Adding a new provider means implementing this Protocol —
6
+ nothing else in the codebase needs to change.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Protocol, runtime_checkable
12
+
13
+ from pydantic import BaseModel, ConfigDict, Field
14
+
15
+
16
+ class RequestMetadata(BaseModel):
17
+ """
18
+ Structured metadata extracted from an outbound LLM API request.
19
+
20
+ Populated by ProviderAdapter.parse_request() before the request is forwarded.
21
+ Carried through the flow and passed to parse_response() so the adapter can
22
+ compute latency without needing to re-read the flow.
23
+ """
24
+
25
+ model_config = ConfigDict(frozen=True)
26
+
27
+ provider: str = Field(description="Provider name, e.g. 'anthropic'")
28
+ model: str = Field(description="Model identifier as returned by the provider")
29
+ stream: bool = Field(description="True if the client requested a streaming response")
30
+ started_at: float = Field(
31
+ description="time.monotonic() value at request parse time, for latency computation"
32
+ )
33
+
34
+
35
+ class ResponseMetadata(BaseModel):
36
+ """
37
+ Structured metadata extracted from a completed LLM API response.
38
+
39
+ Populated by ProviderAdapter.parse_response() after the full body is available.
40
+ Token fields default to 0 when absent. tokens_complete=False signals that
41
+ the body was truncated (e.g. client disconnected mid-stream) — stored record
42
+ should be marked incomplete rather than silently showing wrong counts.
43
+ """
44
+
45
+ model_config = ConfigDict(frozen=True)
46
+
47
+ input_tokens: int = Field(default=0, description="Prompt tokens including system and tools")
48
+ output_tokens: int = Field(default=0, description="Completion tokens")
49
+ thinking_tokens: int = Field(
50
+ default=0,
51
+ description="Extended thinking tokens (Anthropic) or reasoning tokens (OpenAI o-series)",
52
+ )
53
+ cached_tokens: int = Field(default=0, description="Cache read tokens, priced separately")
54
+ tokens_complete: bool = Field(
55
+ default=True,
56
+ description="False if the response body was truncated and counts may be partial",
57
+ )
58
+ latency_ms: float = Field(default=0.0, description="End-to-end latency in milliseconds")
59
+
60
+
61
+ @runtime_checkable
62
+ class ProviderAdapter(Protocol):
63
+ """
64
+ Protocol for provider-specific request/response parsers.
65
+
66
+ Implementations must be:
67
+ - Stateless: no instance-level mutable state that depends on request flow.
68
+ - Pure: no I/O, no network calls, no filesystem access, no side effects.
69
+ - Safe: must not raise — callers wrap in try/except but prefer not to need it.
70
+
71
+ To add a new provider: implement this Protocol, add an instance to
72
+ ADAPTER_REGISTRY in proxy.py. No other changes required.
73
+ """
74
+
75
+ name: str
76
+ """Short provider identifier, e.g. 'anthropic'. Used in log fields and DB records."""
77
+
78
+ domains: list[str]
79
+ """Canonical hostnames this adapter handles, e.g. ['api.anthropic.com']."""
80
+
81
+ def matches(self, host: str) -> bool:
82
+ """
83
+ Return True if this adapter should handle the given request host.
84
+
85
+ Host may include a port suffix (e.g. 'api.anthropic.com:443').
86
+ Implementations should use startswith() to handle both forms.
87
+ """
88
+ ...
89
+
90
+ def parse_request(self, flow: object) -> RequestMetadata:
91
+ """
92
+ Extract metadata from an outbound request.
93
+
94
+ Called with the full mitmproxy HTTPFlow after the request headers and
95
+ body are available. Must return a RequestMetadata even on parse failure
96
+ (use defaults). Must not raise.
97
+ """
98
+ ...
99
+
100
+ def parse_response(
101
+ self,
102
+ flow: object,
103
+ request_meta: RequestMetadata,
104
+ ) -> ResponseMetadata:
105
+ """
106
+ Extract metadata from a completed response.
107
+
108
+ Called after the full response body has been received and accumulated
109
+ by mitmproxy (including streamed SSE bodies). Must return a
110
+ ResponseMetadata even on parse failure. Must not raise.
111
+ """
112
+ ...
halton_meter/cli.py ADDED
@@ -0,0 +1,181 @@
1
+ """
2
+ Halton Meter CLI entry point.
3
+
4
+ Commands (Phase 0):
5
+ daemon — start the local proxy
6
+ version — print version
7
+ report — print cost report from local SQLite logs
8
+ setup — install CA cert, configure system proxy (Step 0.10)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+
15
+ import click
16
+ from rich.console import Console
17
+
18
+ from halton_meter import __version__
19
+ from halton_meter.config import load_config
20
+
21
+ console = Console()
22
+
23
+
24
+ @click.group()
25
+ def cli() -> None:
26
+ """Halton Meter — local LLM API cost attribution proxy."""
27
+
28
+
29
+ @cli.command()
30
+ @click.option(
31
+ "--host",
32
+ default=None,
33
+ help="Override listen host (default from config or 127.0.0.1).",
34
+ )
35
+ @click.option(
36
+ "--port",
37
+ default=None,
38
+ type=int,
39
+ help="Override listen port (default from config or 8080).",
40
+ )
41
+ @click.option(
42
+ "--config",
43
+ "config_path",
44
+ default=None,
45
+ type=click.Path(exists=False, dir_okay=False),
46
+ help="Path to config.toml (default: ~/.halton-meter/config.toml).",
47
+ )
48
+ def daemon(host: str | None, port: int | None, config_path: str | None) -> None:
49
+ """Start the Halton Meter proxy daemon."""
50
+ from pathlib import Path
51
+
52
+ cfg = load_config(Path(config_path) if config_path else None)
53
+
54
+ # CLI flags override config values. We reconstruct a new config rather than
55
+ # mutating the frozen Pydantic model.
56
+ if host is not None or port is not None:
57
+ from halton_meter.config import DaemonConfig, HaltonConfig
58
+
59
+ daemon_cfg = DaemonConfig(
60
+ listen_host=host or cfg.daemon.listen_host,
61
+ listen_port=port or cfg.daemon.listen_port,
62
+ log_path=cfg.daemon.log_path,
63
+ )
64
+ cfg = HaltonConfig(
65
+ daemon=daemon_cfg,
66
+ storage=cfg.storage,
67
+ sync=cfg.sync,
68
+ )
69
+
70
+ asyncio.run(_run_daemon(cfg))
71
+
72
+
73
+ async def _run_daemon(cfg: object) -> None:
74
+ """Run proxy + optional sync + policy sync workers concurrently."""
75
+ from halton_meter.policy_sync import PolicySyncWorker
76
+ from halton_meter.proxy import run_proxy
77
+ from halton_meter.sync import SyncWorker
78
+
79
+ sync_worker: SyncWorker | None = None
80
+ policy_sync_worker: PolicySyncWorker | None = None
81
+
82
+ if cfg.sync.enabled and cfg.sync.backend_url:
83
+ sync_worker = SyncWorker(cfg.sync, cfg.storage.db_path)
84
+ await sync_worker.start()
85
+ # Policy sync runs alongside data sync — same backend_url
86
+ policy_sync_worker = PolicySyncWorker(cfg.sync, cfg.storage.db_path)
87
+ await policy_sync_worker.start()
88
+
89
+ try:
90
+ await run_proxy(cfg)
91
+ finally:
92
+ if sync_worker is not None:
93
+ await sync_worker.stop()
94
+ if policy_sync_worker is not None:
95
+ await policy_sync_worker.stop()
96
+
97
+
98
+ @cli.command()
99
+ def version() -> None:
100
+ """Print the halton-meter version."""
101
+ console.print(f"halton-meter {__version__}")
102
+
103
+
104
+ @cli.command()
105
+ @click.option(
106
+ "--project",
107
+ default=None,
108
+ help="Filter to one project (default: all projects).",
109
+ )
110
+ @click.option(
111
+ "--since",
112
+ "since_days",
113
+ default=30,
114
+ type=int,
115
+ show_default=True,
116
+ help="Only show records from the last N days. 0 returns nothing.",
117
+ )
118
+ @click.option(
119
+ "--db",
120
+ "db_path",
121
+ default=None,
122
+ type=click.Path(exists=False, dir_okay=False),
123
+ help="Override DB path (default: from config).",
124
+ )
125
+ @click.option(
126
+ "--config",
127
+ "config_path",
128
+ default=None,
129
+ type=click.Path(exists=False, dir_okay=False),
130
+ help="Path to config.toml (default: ~/.halton-meter/config.toml).",
131
+ )
132
+ def report(
133
+ project: str | None,
134
+ since_days: int,
135
+ db_path: str | None,
136
+ config_path: str | None,
137
+ ) -> None:
138
+ """Print a formatted cost and token report from local SQLite logs."""
139
+ from pathlib import Path
140
+
141
+ from halton_meter.report import build_report_data, filter_records_by_days, render_report
142
+ from halton_meter.storage import StorageManager
143
+
144
+ cfg = load_config(Path(config_path) if config_path else None)
145
+ resolved_db = db_path or cfg.storage.db_path
146
+
147
+ async def _run() -> None:
148
+ async with StorageManager(resolved_db) as storage:
149
+ records = await storage.read_records(project=project, limit=10_000)
150
+
151
+ filtered = filter_records_by_days(records, since_days)
152
+ data = build_report_data(filtered)
153
+ render_report(data, console)
154
+
155
+ asyncio.run(_run())
156
+
157
+
158
+ @cli.command()
159
+ @click.option(
160
+ "--check",
161
+ "check_only",
162
+ is_flag=True,
163
+ default=False,
164
+ help="Print current state of each setup step without modifying anything.",
165
+ )
166
+ @click.option(
167
+ "--force",
168
+ is_flag=True,
169
+ default=False,
170
+ help="Re-run all steps even if already done.",
171
+ )
172
+ def setup(check_only: bool, force: bool) -> None:
173
+ """One-time setup: generate CA cert, trust in keychain, patch certifi."""
174
+ from halton_meter.setup import check_steps, render_result, run_setup
175
+
176
+ if check_only:
177
+ result = check_steps()
178
+ else:
179
+ result = run_setup(force=force, console=console)
180
+
181
+ render_result(result, console)
halton_meter/config.py ADDED
@@ -0,0 +1,75 @@
1
+ """
2
+ Config loading for Halton Meter daemon.
3
+
4
+ Reads ~/.halton-meter/config.toml on startup. Missing file is not an error —
5
+ defaults are applied. Config is validated with Pydantic and frozen after load.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import tomllib
11
+ from pathlib import Path
12
+
13
+ from pydantic import BaseModel, ConfigDict, Field
14
+
15
+
16
+ class DaemonConfig(BaseModel):
17
+ model_config = ConfigDict(frozen=True)
18
+
19
+ listen_host: str = Field(default="127.0.0.1")
20
+ listen_port: int = Field(default=8080)
21
+ log_path: str = Field(default="~/.halton-meter/daemon.log")
22
+
23
+
24
+ class StorageConfig(BaseModel):
25
+ model_config = ConfigDict(frozen=True)
26
+
27
+ db_path: str = Field(default="~/.halton-meter/logs.db")
28
+
29
+
30
+ class SyncConfig(BaseModel):
31
+ model_config = ConfigDict(frozen=True)
32
+
33
+ # Phase 1+: backend sync settings
34
+ backend_url: str = Field(default="")
35
+ batch_interval_seconds: int = Field(default=60)
36
+ batch_size: int = Field(default=100, description="Records per POST batch")
37
+ api_key: str = Field(default="")
38
+ enabled: bool = Field(
39
+ default=True,
40
+ description="Set False to disable sync even if backend_url is set",
41
+ )
42
+
43
+
44
+ class HaltonConfig(BaseModel):
45
+ """Top-level config model. Loaded once at daemon startup."""
46
+
47
+ model_config = ConfigDict(frozen=True)
48
+
49
+ daemon: DaemonConfig = Field(default_factory=DaemonConfig)
50
+ storage: StorageConfig = Field(default_factory=StorageConfig)
51
+ sync: SyncConfig = Field(default_factory=SyncConfig)
52
+
53
+
54
+ _DEFAULT_CONFIG_PATH = Path("~/.halton-meter/config.toml").expanduser()
55
+
56
+
57
+ def load_config(path: Path | None = None) -> HaltonConfig:
58
+ """
59
+ Load config from TOML file. Returns defaults if the file does not exist.
60
+
61
+ Args:
62
+ path: Override the config file path. Defaults to ~/.halton-meter/config.toml.
63
+
64
+ Returns:
65
+ Validated, frozen HaltonConfig instance.
66
+ """
67
+ config_path = path or _DEFAULT_CONFIG_PATH
68
+
69
+ if not config_path.exists():
70
+ return HaltonConfig()
71
+
72
+ with config_path.open("rb") as fh:
73
+ raw = tomllib.load(fh)
74
+
75
+ return HaltonConfig.model_validate(raw)
halton_meter/models.py ADDED
@@ -0,0 +1,57 @@
1
+ """
2
+ Pydantic models for Halton Meter local log records.
3
+
4
+ LogRecord maps 1:1 to the `requests` table in schema.sql.
5
+ All money fields are integers in millicents (1 USD = 100_000 millicents).
6
+ See memory/decisions.md 2026-04-28 for the money-storage decision.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pydantic import BaseModel, ConfigDict, Field
12
+
13
+
14
+ class LogRecord(BaseModel):
15
+ """
16
+ A single captured LLM API request/response pair.
17
+
18
+ Stored in SQLite at ~/.halton-meter/logs.db. Synced to the backend in
19
+ Phase 1. cost_millicents is nullable — NULL means we have no pricing
20
+ rate for this model (e.g. unknown model, or Phase 0 pricing matrix
21
+ doesn't cover it).
22
+ """
23
+
24
+ model_config = ConfigDict(frozen=True)
25
+
26
+ id: str = Field(description="UUID v4 — unique record identifier")
27
+ project: str = Field(description="Project tag resolved by tagging.py")
28
+ provider: str = Field(description="Provider name, e.g. 'anthropic'")
29
+ model: str = Field(description="Model identifier as returned by the provider")
30
+
31
+ input_tokens: int = Field(default=0, description="Prompt tokens including system and tools")
32
+ output_tokens: int = Field(default=0, description="Completion tokens")
33
+ thinking_tokens: int = Field(
34
+ default=0,
35
+ description="Extended thinking tokens (Anthropic) or reasoning tokens (OpenAI o-series)",
36
+ )
37
+ cached_tokens: int = Field(default=0, description="Cache read tokens, priced separately")
38
+
39
+ cost_millicents: int | None = Field(
40
+ default=None,
41
+ description=(
42
+ "Computed cost in millicents (1 USD = 100_000). "
43
+ "NULL if model is unknown or not in the pricing matrix."
44
+ ),
45
+ )
46
+
47
+ tokens_complete: bool = Field(
48
+ default=True,
49
+ description="False if response body was truncated and token counts may be partial",
50
+ )
51
+ latency_ms: float = Field(default=0.0, description="End-to-end latency in milliseconds")
52
+
53
+ requested_at: str = Field(description="ISO 8601 UTC timestamp when the request was made")
54
+ recorded_at: str = Field(description="ISO 8601 UTC timestamp when the record was written")
55
+
56
+ # Phase 3: request disposition. 'success' | 'error' | 'blocked_by_policy'
57
+ status: str = Field(default="success", description="Request disposition")