PyPI - halton-meter - Versions diffs - 0.0.1__py3-none-any.whl - Mend

halton-meter 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

halton_meter/__init__.py +3 -0
halton_meter/adapters/__init__.py +11 -0
halton_meter/adapters/anthropic.py +189 -0
halton_meter/adapters/base.py +112 -0
halton_meter/cli.py +181 -0
halton_meter/config.py +75 -0
halton_meter/models.py +57 -0
halton_meter/policy_engine.py +226 -0
halton_meter/policy_sync.py +133 -0
halton_meter/pricing/__init__.py +59 -0
halton_meter/pricing/matrix.py +181 -0
halton_meter/proxy.py +475 -0
halton_meter/report.py +343 -0
halton_meter/schema.sql +56 -0
halton_meter/setup.py +471 -0
halton_meter/storage.py +483 -0
halton_meter/sync.py +187 -0
halton_meter/tagging.py +167 -0
halton_meter-0.0.1.dist-info/METADATA +102 -0
halton_meter-0.0.1.dist-info/RECORD +24 -0
halton_meter-0.0.1.dist-info/WHEEL +4 -0
halton_meter-0.0.1.dist-info/entry_points.txt +2 -0
halton_meter-0.0.1.dist-info/licenses/LICENSE +202 -0
halton_meter-0.0.1.dist-info/licenses/NOTICE +10 -0

halton_meter/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Halton Meter — local LLM API proxy daemon."""
+__version__ = "0.1.0"

halton_meter/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Provider adapter package."""
+from halton_meter.adapters.anthropic import AnthropicAdapter
+from halton_meter.adapters.base import ProviderAdapter, RequestMetadata, ResponseMetadata
+__all__ = [
+    "AnthropicAdapter",
+    "ProviderAdapter",
+    "RequestMetadata",
+    "ResponseMetadata",
+]

halton_meter/adapters/anthropic.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""
+Anthropic provider adapter.
+Handles api.anthropic.com — both non-streaming JSON responses and
+streaming SSE responses (text/event-stream).
+Token count locations in the Anthropic API:
+Non-streaming:
+    response body → top-level "usage" object:
+        input_tokens, output_tokens, thinking_tokens (optional),
+        cache_read_input_tokens (optional)
+Streaming SSE:
+    message_start event → message.usage:
+        input_tokens, thinking_tokens (optional), cache_read_input_tokens (optional)
+    message_delta event → usage:
+        output_tokens
+See spike.py for the full design rationale and edge-case notes.
+"""
+from __future__ import annotations
+import json
+import time
+from typing import ClassVar
+import structlog
+from mitmproxy import http
+from halton_meter.adapters.base import RequestMetadata, ResponseMetadata
+log = structlog.get_logger()
+class AnthropicAdapter:
+    """
+    Adapter for api.anthropic.com.
+    Handles both non-streaming JSON responses and streaming SSE responses.
+    All methods are safe to call from the mitmproxy hot path — they catch
+    all exceptions internally and return zero-value metadata rather than raising.
+    """
+    name: ClassVar[str] = "anthropic"
+    domains: ClassVar[list[str]] = ["api.anthropic.com"]
+    def matches(self, host: str) -> bool:
+        """Match api.anthropic.com with or without port suffix."""
+        return host.startswith("api.anthropic.com")
+    def parse_request(self, flow: http.HTTPFlow) -> RequestMetadata:
+        """
+        Extract model name and stream flag from the request body.
+        Non-JSON bodies (e.g. GET /v1/models) are handled gracefully — model
+        defaults to 'unknown' and stream to False.
+        """
+        model = "unknown"
+        stream = False
+        try:
+            body = flow.request.get_text(strict=False) or ""
+            if body:
+                data = json.loads(body)
+                model = data.get("model", "unknown")
+                stream = bool(data.get("stream", False))
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            # Non-JSON body is expected for non-messages endpoints
+            pass
+        return RequestMetadata(
+            provider=self.name,
+            model=model,
+            stream=stream,
+            started_at=time.monotonic(),
+        )
+    def parse_response(
+        self,
+        flow: http.HTTPFlow,
+        request_meta: RequestMetadata,
+    ) -> ResponseMetadata:
+        """
+        Extract token counts from the completed response body.
+        mitmproxy accumulates the full body before calling response(), so SSE
+        streams are parsed in a single pass here. Latency is computed from
+        request_meta.started_at (set at request parse time).
+        """
+        latency_ms = (time.monotonic() - request_meta.started_at) * 1000
+        content_type = flow.response.headers.get("content-type", "")
+        is_sse = "text/event-stream" in content_type
+        input_tokens = 0
+        output_tokens = 0
+        thinking_tokens = 0
+        cached_tokens = 0
+        tokens_complete = True
+        try:
+            body = flow.response.get_text(strict=False) or ""
+            if is_sse:
+                input_tokens, output_tokens, thinking_tokens, cached_tokens = (
+                    self._parse_sse(body)
+                )
+            else:
+                data = json.loads(body)
+                usage = data.get("usage", {})
+                input_tokens = usage.get("input_tokens", 0)
+                output_tokens = usage.get("output_tokens", 0)
+                # Anthropic returns thinking_tokens only when extended thinking is on
+                thinking_tokens = usage.get("thinking_tokens", 0)
+                cached_tokens = usage.get("cache_read_input_tokens", 0)
+        except Exception as exc:
+            tokens_complete = False
+            log.error(
+                "adapter.parse_response.failed",
+                provider=self.name,
+                error=str(exc),
+                exc_info=True,
+            )
+        return ResponseMetadata(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            thinking_tokens=thinking_tokens,
+            cached_tokens=cached_tokens,
+            tokens_complete=tokens_complete,
+            latency_ms=latency_ms,
+        )
+    def _parse_sse(self, body: str) -> tuple[int, int, int, int]:
+        """
+        Walk SSE lines and accumulate token counts.
+        Returns (input_tokens, output_tokens, thinking_tokens, cached_tokens).
+        Token locations in the Anthropic SSE stream:
+          message_start → message.usage.input_tokens
+                        → message.usage.thinking_tokens  (optional)
+                        → message.usage.cache_read_input_tokens  (optional)
+          message_delta → usage.output_tokens
+        Blank lines separate SSE events. Lines starting with "event:" set the
+        current event type; lines starting with "data:" carry the payload JSON.
+        """
+        input_tokens = 0
+        output_tokens = 0
+        thinking_tokens = 0
+        cached_tokens = 0
+        current_event: str | None = None
+        for raw_line in body.splitlines():
+            line = raw_line.strip()
+            if line.startswith("event:"):
+                current_event = line[len("event:") :].strip()
+            elif line.startswith("data:"):
+                raw_data = line[len("data:") :].strip()
+                if raw_data in ("[DONE]", ""):
+                    continue
+                try:
+                    payload = json.loads(raw_data)
+                except json.JSONDecodeError:
+                    continue
+                event_type = payload.get("type", current_event or "")
+                if event_type == "message_start":
+                    msg_usage = payload.get("message", {}).get("usage", {})
+                    input_tokens += msg_usage.get("input_tokens", 0)
+                    # Anthropic returns thinking_tokens here when extended thinking is active;
+                    # field is absent (not 0) when thinking is off — treat absence as 0.
+                    thinking_tokens += msg_usage.get("thinking_tokens", 0)
+                    cached_tokens += msg_usage.get("cache_read_input_tokens", 0)
+                elif event_type == "message_delta":
+                    delta_usage = payload.get("usage", {})
+                    output_tokens += delta_usage.get("output_tokens", 0)
+            elif line == "":
+                # Blank line marks the end of an SSE event block
+                current_event = None
+        return input_tokens, output_tokens, thinking_tokens, cached_tokens

halton_meter/adapters/base.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""
+Base types for provider adapters.
+Defines the ProviderAdapter Protocol and the Pydantic data models that
+all adapters produce. Adding a new provider means implementing this Protocol —
+nothing else in the codebase needs to change.
+"""
+from __future__ import annotations
+from typing import Protocol, runtime_checkable
+from pydantic import BaseModel, ConfigDict, Field
+class RequestMetadata(BaseModel):
+    """
+    Structured metadata extracted from an outbound LLM API request.
+    Populated by ProviderAdapter.parse_request() before the request is forwarded.
+    Carried through the flow and passed to parse_response() so the adapter can
+    compute latency without needing to re-read the flow.
+    """
+    model_config = ConfigDict(frozen=True)
+    provider: str = Field(description="Provider name, e.g. 'anthropic'")
+    model: str = Field(description="Model identifier as returned by the provider")
+    stream: bool = Field(description="True if the client requested a streaming response")
+    started_at: float = Field(
+        description="time.monotonic() value at request parse time, for latency computation"
+    )
+class ResponseMetadata(BaseModel):
+    """
+    Structured metadata extracted from a completed LLM API response.
+    Populated by ProviderAdapter.parse_response() after the full body is available.
+    Token fields default to 0 when absent. tokens_complete=False signals that
+    the body was truncated (e.g. client disconnected mid-stream) — stored record
+    should be marked incomplete rather than silently showing wrong counts.
+    """
+    model_config = ConfigDict(frozen=True)
+    input_tokens: int = Field(default=0, description="Prompt tokens including system and tools")
+    output_tokens: int = Field(default=0, description="Completion tokens")
+    thinking_tokens: int = Field(
+        default=0,
+        description="Extended thinking tokens (Anthropic) or reasoning tokens (OpenAI o-series)",
+    )
+    cached_tokens: int = Field(default=0, description="Cache read tokens, priced separately")
+    tokens_complete: bool = Field(
+        default=True,
+        description="False if the response body was truncated and counts may be partial",
+    )
+    latency_ms: float = Field(default=0.0, description="End-to-end latency in milliseconds")
+@runtime_checkable
+class ProviderAdapter(Protocol):
+    """
+    Protocol for provider-specific request/response parsers.
+    Implementations must be:
+    - Stateless: no instance-level mutable state that depends on request flow.
+    - Pure: no I/O, no network calls, no filesystem access, no side effects.
+    - Safe: must not raise — callers wrap in try/except but prefer not to need it.
+    To add a new provider: implement this Protocol, add an instance to
+    ADAPTER_REGISTRY in proxy.py. No other changes required.
+    """
+    name: str
+    """Short provider identifier, e.g. 'anthropic'. Used in log fields and DB records."""
+    domains: list[str]
+    """Canonical hostnames this adapter handles, e.g. ['api.anthropic.com']."""
+    def matches(self, host: str) -> bool:
+        """
+        Return True if this adapter should handle the given request host.
+        Host may include a port suffix (e.g. 'api.anthropic.com:443').
+        Implementations should use startswith() to handle both forms.
+        """
+        ...
+    def parse_request(self, flow: object) -> RequestMetadata:
+        """
+        Extract metadata from an outbound request.
+        Called with the full mitmproxy HTTPFlow after the request headers and
+        body are available. Must return a RequestMetadata even on parse failure
+        (use defaults). Must not raise.
+        """
+        ...
+    def parse_response(
+        self,
+        flow: object,
+        request_meta: RequestMetadata,
+    ) -> ResponseMetadata:
+        """
+        Extract metadata from a completed response.
+        Called after the full response body has been received and accumulated
+        by mitmproxy (including streamed SSE bodies). Must return a
+        ResponseMetadata even on parse failure. Must not raise.
+        """
+        ...

halton_meter/cli.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""
+Halton Meter CLI entry point.
+Commands (Phase 0):
+  daemon   — start the local proxy
+  version  — print version
+  report   — print cost report from local SQLite logs
+  setup    — install CA cert, configure system proxy (Step 0.10)
+"""
+from __future__ import annotations
+import asyncio
+import click
+from rich.console import Console
+from halton_meter import __version__
+from halton_meter.config import load_config
+console = Console()
+@click.group()
+def cli() -> None:
+    """Halton Meter — local LLM API cost attribution proxy."""
+@cli.command()
+@click.option(
+    "--host",
+    default=None,
+    help="Override listen host (default from config or 127.0.0.1).",
+)
+@click.option(
+    "--port",
+    default=None,
+    type=int,
+    help="Override listen port (default from config or 8080).",
+)
+@click.option(
+    "--config",
+    "config_path",
+    default=None,
+    type=click.Path(exists=False, dir_okay=False),
+    help="Path to config.toml (default: ~/.halton-meter/config.toml).",
+)
+def daemon(host: str | None, port: int | None, config_path: str | None) -> None:
+    """Start the Halton Meter proxy daemon."""
+    from pathlib import Path
+    cfg = load_config(Path(config_path) if config_path else None)
+    # CLI flags override config values. We reconstruct a new config rather than
+    # mutating the frozen Pydantic model.
+    if host is not None or port is not None:
+        from halton_meter.config import DaemonConfig, HaltonConfig
+        daemon_cfg = DaemonConfig(
+            listen_host=host or cfg.daemon.listen_host,
+            listen_port=port or cfg.daemon.listen_port,
+            log_path=cfg.daemon.log_path,
+        )
+        cfg = HaltonConfig(
+            daemon=daemon_cfg,
+            storage=cfg.storage,
+            sync=cfg.sync,
+        )
+    asyncio.run(_run_daemon(cfg))
+async def _run_daemon(cfg: object) -> None:
+    """Run proxy + optional sync + policy sync workers concurrently."""
+    from halton_meter.policy_sync import PolicySyncWorker
+    from halton_meter.proxy import run_proxy
+    from halton_meter.sync import SyncWorker
+    sync_worker: SyncWorker | None = None
+    policy_sync_worker: PolicySyncWorker | None = None
+    if cfg.sync.enabled and cfg.sync.backend_url:
+        sync_worker = SyncWorker(cfg.sync, cfg.storage.db_path)
+        await sync_worker.start()
+        # Policy sync runs alongside data sync — same backend_url
+        policy_sync_worker = PolicySyncWorker(cfg.sync, cfg.storage.db_path)
+        await policy_sync_worker.start()
+    try:
+        await run_proxy(cfg)
+    finally:
+        if sync_worker is not None:
+            await sync_worker.stop()
+        if policy_sync_worker is not None:
+            await policy_sync_worker.stop()
+@cli.command()
+def version() -> None:
+    """Print the halton-meter version."""
+    console.print(f"halton-meter {__version__}")
+@cli.command()
+@click.option(
+    "--project",
+    default=None,
+    help="Filter to one project (default: all projects).",
+)
+@click.option(
+    "--since",
+    "since_days",
+    default=30,
+    type=int,
+    show_default=True,
+    help="Only show records from the last N days. 0 returns nothing.",
+)
+@click.option(
+    "--db",
+    "db_path",
+    default=None,
+    type=click.Path(exists=False, dir_okay=False),
+    help="Override DB path (default: from config).",
+)
+@click.option(
+    "--config",
+    "config_path",
+    default=None,
+    type=click.Path(exists=False, dir_okay=False),
+    help="Path to config.toml (default: ~/.halton-meter/config.toml).",
+)
+def report(
+    project: str | None,
+    since_days: int,
+    db_path: str | None,
+    config_path: str | None,
+) -> None:
+    """Print a formatted cost and token report from local SQLite logs."""
+    from pathlib import Path
+    from halton_meter.report import build_report_data, filter_records_by_days, render_report
+    from halton_meter.storage import StorageManager
+    cfg = load_config(Path(config_path) if config_path else None)
+    resolved_db = db_path or cfg.storage.db_path
+    async def _run() -> None:
+        async with StorageManager(resolved_db) as storage:
+            records = await storage.read_records(project=project, limit=10_000)
+        filtered = filter_records_by_days(records, since_days)
+        data = build_report_data(filtered)
+        render_report(data, console)
+    asyncio.run(_run())
+@cli.command()
+@click.option(
+    "--check",
+    "check_only",
+    is_flag=True,
+    default=False,
+    help="Print current state of each setup step without modifying anything.",
+)
+@click.option(
+    "--force",
+    is_flag=True,
+    default=False,
+    help="Re-run all steps even if already done.",
+)
+def setup(check_only: bool, force: bool) -> None:
+    """One-time setup: generate CA cert, trust in keychain, patch certifi."""
+    from halton_meter.setup import check_steps, render_result, run_setup
+    if check_only:
+        result = check_steps()
+    else:
+        result = run_setup(force=force, console=console)
+    render_result(result, console)

halton_meter/config.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""
+Config loading for Halton Meter daemon.
+Reads ~/.halton-meter/config.toml on startup. Missing file is not an error —
+defaults are applied. Config is validated with Pydantic and frozen after load.
+"""
+from __future__ import annotations
+import tomllib
+from pathlib import Path
+from pydantic import BaseModel, ConfigDict, Field
+class DaemonConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+    listen_host: str = Field(default="127.0.0.1")
+    listen_port: int = Field(default=8080)
+    log_path: str = Field(default="~/.halton-meter/daemon.log")
+class StorageConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+    db_path: str = Field(default="~/.halton-meter/logs.db")
+class SyncConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+    # Phase 1+: backend sync settings
+    backend_url: str = Field(default="")
+    batch_interval_seconds: int = Field(default=60)
+    batch_size: int = Field(default=100, description="Records per POST batch")
+    api_key: str = Field(default="")
+    enabled: bool = Field(
+        default=True,
+        description="Set False to disable sync even if backend_url is set",
+    )
+class HaltonConfig(BaseModel):
+    """Top-level config model. Loaded once at daemon startup."""
+    model_config = ConfigDict(frozen=True)
+    daemon: DaemonConfig = Field(default_factory=DaemonConfig)
+    storage: StorageConfig = Field(default_factory=StorageConfig)
+    sync: SyncConfig = Field(default_factory=SyncConfig)
+_DEFAULT_CONFIG_PATH = Path("~/.halton-meter/config.toml").expanduser()
+def load_config(path: Path | None = None) -> HaltonConfig:
+    """
+    Load config from TOML file. Returns defaults if the file does not exist.
+    Args:
+        path: Override the config file path. Defaults to ~/.halton-meter/config.toml.
+    Returns:
+        Validated, frozen HaltonConfig instance.
+    """
+    config_path = path or _DEFAULT_CONFIG_PATH
+    if not config_path.exists():
+        return HaltonConfig()
+    with config_path.open("rb") as fh:
+        raw = tomllib.load(fh)
+    return HaltonConfig.model_validate(raw)

halton_meter/models.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Pydantic models for Halton Meter local log records.
+LogRecord maps 1:1 to the `requests` table in schema.sql.
+All money fields are integers in millicents (1 USD = 100_000 millicents).
+See memory/decisions.md 2026-04-28 for the money-storage decision.
+"""
+from __future__ import annotations
+from pydantic import BaseModel, ConfigDict, Field
+class LogRecord(BaseModel):
+    """
+    A single captured LLM API request/response pair.
+    Stored in SQLite at ~/.halton-meter/logs.db. Synced to the backend in
+    Phase 1. cost_millicents is nullable — NULL means we have no pricing
+    rate for this model (e.g. unknown model, or Phase 0 pricing matrix
+    doesn't cover it).
+    """
+    model_config = ConfigDict(frozen=True)
+    id: str = Field(description="UUID v4 — unique record identifier")
+    project: str = Field(description="Project tag resolved by tagging.py")
+    provider: str = Field(description="Provider name, e.g. 'anthropic'")
+    model: str = Field(description="Model identifier as returned by the provider")
+    input_tokens: int = Field(default=0, description="Prompt tokens including system and tools")
+    output_tokens: int = Field(default=0, description="Completion tokens")
+    thinking_tokens: int = Field(
+        default=0,
+        description="Extended thinking tokens (Anthropic) or reasoning tokens (OpenAI o-series)",
+    )
+    cached_tokens: int = Field(default=0, description="Cache read tokens, priced separately")
+    cost_millicents: int | None = Field(
+        default=None,
+        description=(
+            "Computed cost in millicents (1 USD = 100_000). "
+            "NULL if model is unknown or not in the pricing matrix."
+        ),
+    )
+    tokens_complete: bool = Field(
+        default=True,
+        description="False if response body was truncated and token counts may be partial",
+    )
+    latency_ms: float = Field(default=0.0, description="End-to-end latency in milliseconds")
+    requested_at: str = Field(description="ISO 8601 UTC timestamp when the request was made")
+    recorded_at: str = Field(description="ISO 8601 UTC timestamp when the record was written")
+    # Phase 3: request disposition. 'success' | 'error' | 'blocked_by_policy'
+    status: str = Field(default="success", description="Request disposition")