PyPI - modelstat-sdk - Versions diffs - 0.0.1__py3-none-any.whl - Mend

modelstat-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

modelstat/__init__.py +94 -0
modelstat/_version.py +8 -0
modelstat/capture.py +264 -0
modelstat/client.py +72 -0
modelstat/config.py +135 -0
modelstat/py.typed +0 -0
modelstat/redact.py +150 -0
modelstat/transport.py +97 -0
modelstat/wire.py +344 -0
modelstat/worker.py +183 -0
modelstat_sdk-0.0.1.dist-info/METADATA +158 -0
modelstat_sdk-0.0.1.dist-info/RECORD +13 -0
modelstat_sdk-0.0.1.dist-info/WHEEL +4 -0

modelstat/__init__.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""modelstat -- a privacy-first SDK for wrapping the LLM calls your backend
+already makes and shipping **redacted** usage to modelstat, without adding
+latency to live requests.
+The hot path (:meth:`Client.record`) does nothing but copy your already-in-hand
+call into a bounded buffer and return. A background worker thread redacts,
+batches, and ships off the request path. On overflow the newest record is
+dropped and a counter increments -- your request is never blocked and never
+grows memory unbounded.
+Modes
+-----
+* **Local daemon (default).** Hand calls to a local modelstat daemon over
+  loopback; it summarizes with a local Qwen model and ships only redacted
+  abstracts. Raw text never leaves the machine.
+* **Remote.** Ship directly to the modelstat server (no local model). With
+  ``raw=True``, send full floor-redacted turns for server-side summarization.
+Example
+-------
+.. code-block:: python
+    from modelstat import Client, Config, LlmCall, TokenUsage
+    # Org-scoped ingest key binds traffic to your account; remote mode here.
+    cfg = Config("msk_live_...", "raw_sdk_openai").with_remote(
+        "https://api.modelstat.ai", raw=True
+    )
+    with Client(cfg) as ms:  # shutdown() flushes on the way out
+        # ... after your real LLM call returns ...
+        ms.record(
+            LlmCall("openai", "session-or-trace-id")
+            .model_("gpt-x")
+            .with_tokens(TokenUsage(input=800, output=120))
+            .text("the prompt", "the completion")
+        )
+"""
+from __future__ import annotations
+from ._version import __version__
+from .capture import LlmCall, ToolCallInput, build_batch
+from .client import Client
+from .config import DEFAULT_DAEMON_URL, Config, Mode, RedactionPolicy
+from .redact import Redacted, redact
+from .transport import FakeTransport, HttpTransport, Transport, TransportError
+from .wire import (
+    BillingMode,
+    EventKind,
+    GitContext,
+    IngestBatch,
+    RawEvent,
+    TokenUsage,
+    ToolCallStatus,
+    ToolCallWire,
+    batch_id,
+    content_hash,
+    source_event_id,
+)
+__all__ = [
+    "__version__",
+    # client + config
+    "Client",
+    "Config",
+    "Mode",
+    "RedactionPolicy",
+    "DEFAULT_DAEMON_URL",
+    # capture
+    "LlmCall",
+    "ToolCallInput",
+    "build_batch",
+    # redaction
+    "redact",
+    "Redacted",
+    # transports
+    "Transport",
+    "HttpTransport",
+    "FakeTransport",
+    "TransportError",
+    # wire
+    "IngestBatch",
+    "RawEvent",
+    "ToolCallWire",
+    "TokenUsage",
+    "GitContext",
+    "EventKind",
+    "BillingMode",
+    "ToolCallStatus",
+    "content_hash",
+    "source_event_id",
+    "batch_id",
+]

modelstat/_version.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Single source of truth for the package version.
+Read both at runtime (to build ``Config.client_version`` -> the wire
+``daemon_version``) and by hatchling at build time (see ``pyproject.toml``'s
+``[tool.hatch.version]``), so the two can never drift.
+"""
+__version__ = "0.0.1"

modelstat/capture.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""The capture surface: what a caller hands the SDK per LLM call, and the
+(worker-side) conversion into wire records.
+Building an :class:`LlmCall` and calling :meth:`Client.record` is the only thing
+that happens on the live request path -- it must stay a cheap move into a
+buffer. All of the work here (redaction, hashing, id derivation) runs later, on
+the background worker, off the hot path.
+"""
+from __future__ import annotations
+import hashlib
+import json
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+from . import wire
+from .config import Config, RedactionPolicy
+from .redact import redact
+from .wire import (
+    BillingMode,
+    EventKind,
+    GitContext,
+    IngestBatch,
+    RawEvent,
+    TokenUsage,
+    ToolCallStatus,
+    ToolCallWire,
+)
+__all__ = ["LlmCall", "ToolCallInput", "build_batch"]
+# The excerpt cap for the standard (non-raw) path, in Unicode code points.
+EXCERPT_MAX_CHARS = 320
+def _now_utc() -> datetime:
+    return datetime.now(timezone.utc)
+@dataclass
+class ToolCallInput:
+    """One captured tool invocation.
+    The SDK is in the call path, so it has the real args and result -- it
+    hashes/sizes them here (never ships them raw).
+    """
+    # Bare tool name (``Bash``, ``create_pr``).
+    name: str
+    status: ToolCallStatus
+    # ``builtin`` or ``mcp:<server>``.
+    server: str = "builtin"
+    # The call's arguments, if any. Hashed and sized; never shipped.
+    args: Optional[Any] = None
+    # Byte length of the result/output (the SDK sizes it; never ships it).
+    result_bytes: int = 0
+    started_at: datetime = field(default_factory=_now_utc)
+    ended_at: Optional[datetime] = None
+    # Allowlisted command verbs for shell-ish tools (<=3, each <=40 chars).
+    command_families: List[str] = field(default_factory=list)
+@dataclass
+class LlmCall:
+    """One captured LLM call.
+    Construct directly with keyword arguments, or build incrementally with the
+    chainable helpers (:meth:`model`, :meth:`with_tokens`, :meth:`text`).
+    ``prompt`` / ``completion`` are raw here and are redacted on the worker.
+    """
+    provider: str
+    # Trace/conversation id used to group calls into a session downstream.
+    session_id: str
+    model: Optional[str] = None
+    kind: EventKind = EventKind.ASSISTANT_MESSAGE
+    tokens: TokenUsage = field(default_factory=TokenUsage)
+    started_at: datetime = field(default_factory=_now_utc)
+    duration_ms: Optional[int] = None
+    prompt: Optional[str] = None
+    completion: Optional[str] = None
+    cwd: Optional[str] = None
+    git: Optional[GitContext] = None
+    billing: Optional[BillingMode] = None
+    tool_calls: List[ToolCallInput] = field(default_factory=list)
+    # ---- chainable builder helpers (ergonomic, mirror the Rust builder) -----
+    def model_(self, model: str) -> "LlmCall":
+        """Set the model. (Trailing underscore avoids shadowing the field.)"""
+        self.model = model
+        return self
+    def with_tokens(self, tokens: TokenUsage) -> "LlmCall":
+        """Set token usage."""
+        self.tokens = tokens
+        return self
+    def text(self, prompt: str, completion: str) -> "LlmCall":
+        """Set the prompt and completion text (raw; redacted on the worker)."""
+        self.prompt = prompt
+        self.completion = completion
+        return self
+def _truncate_chars(s: str, max_chars: int) -> str:
+    """Truncate to at most ``max_chars`` Unicode code points, appending an
+    elision marker. Python strings index by code point, so slicing is the direct
+    equivalent of the Rust ``chars().take(max)``."""
+    if len(s) <= max_chars:
+        return s
+    return s[:max_chars] + "…"
+def _sha256_hex(data: bytes) -> str:
+    """sha256 hex of ``data``."""
+    return hashlib.sha256(data).hexdigest()
+def _hash_args(args: Optional[Any]) -> Tuple[str, str, int]:
+    """Build the privacy-reduced ``(args_hash, signature_hash, args_bytes)``
+    triple for a tool call's arguments.
+    Canonical JSON matches the Rust reference: compact separators and *insertion
+    order preserved* (``sort_keys=False``) -- ``serde_json`` serializes a Map in
+    its stored order, and Python's ``dict`` is insertion-ordered, so the byte
+    sizes agree. ``signature_hash`` hashes the *sorted* top-level key names; it
+    is the literal ``"none"`` when there are no args or the args are not a dict.
+    """
+    if args is None:
+        return ("", "none", 0)
+    serialized = json.dumps(args, separators=(",", ":"), sort_keys=False)
+    serialized_bytes = serialized.encode("utf-8")
+    args_hash = _sha256_hex(serialized_bytes)
+    if isinstance(args, dict):
+        keys = sorted(args.keys())
+        signature = _sha256_hex(",".join(keys).encode("utf-8"))
+    else:
+        signature = "none"
+    return (args_hash, signature, len(serialized_bytes))
+def _build_excerpt(cfg: Config, call: LlmCall) -> Optional[str]:
+    """Build the redacted excerpt from a call's prompt + completion, honoring
+    the configured redaction policy and (for the standard path) the 320-char
+    cap. Empty input yields ``None`` (the key is then omitted on the wire)."""
+    joined = ""
+    if call.prompt is not None:
+        joined += call.prompt
+    if call.completion is not None:
+        if joined:
+            joined += "\n---\n"
+        joined += call.completion
+    if not joined:
+        return None
+    if cfg.redaction == RedactionPolicy.FLOOR:
+        scrubbed = redact(joined).text
+    else:  # RedactionPolicy.NONE
+        scrubbed = joined
+    # Raw mode ships the full (redacted) turns for server-side summarization;
+    # the standard path caps the excerpt.
+    if cfg.sends_full_turns():
+        return scrubbed
+    return _truncate_chars(scrubbed, EXCERPT_MAX_CHARS)
+def _event_from_call(
+    cfg: Config, call: LlmCall, seq: int
+) -> Tuple[RawEvent, List[ToolCallWire]]:
+    """Convert one captured call into a wire event plus its tool-call records."""
+    # Integer-millis since the epoch, matching Rust's ``timestamp_millis()``.
+    # Computed with integer arithmetic (not ``ts * 1000``) to avoid float
+    # rounding that could occasionally shift the floored millisecond and so
+    # change the derived ``source_event_id``.
+    ts = call.started_at
+    started_millis = int(ts.timestamp()) * 1000 + ts.microsecond // 1000
+    source_ref = f"{call.session_id}::{started_millis}::{seq}"
+    src_event_id = wire.source_event_id(cfg.device_id, source_ref)
+    event = RawEvent(
+        source_event_id=src_event_id,
+        ts=call.started_at,
+        kind=call.kind,
+        agent=cfg.agent,
+        provider=call.provider,
+        session_id=call.session_id,
+        tokens=call.tokens,
+        model=call.model,
+        cwd=call.cwd,
+        git=call.git,
+        duration_ms=call.duration_ms,
+        billing=call.billing,
+        content_excerpt=_build_excerpt(cfg, call),
+    )
+    tool_calls: List[ToolCallWire] = []
+    for i, tc in enumerate(call.tool_calls):
+        args_hash, signature_hash, args_bytes = _hash_args(tc.args)
+        external_call_id = "tc_" + content_hash_tc(src_event_id, i)
+        tool_calls.append(
+            ToolCallWire(
+                external_call_id=external_call_id,
+                session_id=call.session_id,
+                source_event_id=src_event_id,
+                agent=cfg.agent,
+                server=tc.server,
+                name=tc.name,
+                call_index=i,
+                started_at=tc.started_at,
+                status=tc.status,
+                args_hash=args_hash,
+                signature_hash=signature_hash,
+                args_bytes=args_bytes,
+                result_bytes=tc.result_bytes,
+                model=call.model,
+                command_families=list(tc.command_families[:3]),
+            )
+        )
+    return event, tool_calls
+def content_hash_tc(src_event_id: str, index: int) -> str:
+    """The 16-char content hash used in a tool call's ``external_call_id``.
+    ``content_hash`` already truncates to 32 chars; the tool-call id takes the
+    first 16 of that, matching the Rust ``content_hash(...)[..16]``.
+    """
+    return wire.content_hash([src_event_id, str(index)])[:16]
+def build_batch(
+    cfg: Config, calls: Iterable[LlmCall], seq: int
+) -> Tuple[IngestBatch, int]:
+    """Drain a batch of captured calls into a wire :class:`IngestBatch`.
+    ``seq`` is a monotonic counter used to keep per-call dedupe keys distinct
+    within a run; it is bumped once per call. Returns the built batch and the
+    updated ``seq`` (Python ints are immutable, so the new value is returned
+    rather than mutated in place).
+    """
+    events: List[RawEvent] = []
+    tool_calls: List[ToolCallWire] = []
+    source_ids: List[str] = []
+    for call in calls:
+        seq += 1
+        event, tcs = _event_from_call(cfg, call, seq)
+        source_ids.append(event.source_event_id)
+        tool_calls.extend(tcs)
+        events.append(event)
+    batch = IngestBatch(
+        batch_id=wire.batch_id(source_ids),
+        device_id=cfg.device_id,
+        daemon_version=cfg.client_version,
+        events=events,
+        tool_calls=tool_calls,
+    )
+    return batch, seq

modelstat/client.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""The :class:`Client` facade.
+A thin handle over the background :class:`Worker`. The hot path
+(:meth:`Client.record`) does nothing but a non-blocking enqueue and returns; the
+worker thread redacts, batches, and ships off the request path. On overflow the
+newest record is dropped and a counter increments -- your request is never
+blocked and never grows memory unbounded.
+"""
+from __future__ import annotations
+from types import TracebackType
+from typing import Optional, Type
+from .capture import LlmCall
+from .config import Config
+from .transport import HttpTransport, Transport
+from .worker import Worker
+__all__ = ["Client"]
+class Client:
+    """The SDK entry point.
+    Construct with :class:`Client` (real HTTP transport for ``cfg.mode``) or
+    :meth:`Client.with_transport` (a custom transport, e.g. ``FakeTransport`` in
+    tests). Usable as a context manager -- ``with Client(cfg) as ms: ...`` calls
+    :meth:`shutdown` on exit.
+    """
+    def __init__(self, cfg: Config) -> None:
+        self._worker = Worker(cfg, HttpTransport.from_config(cfg))
+    @classmethod
+    def with_transport(cls, cfg: Config, transport: Transport) -> "Client":
+        """Start the SDK with a custom :class:`Transport`."""
+        self = cls.__new__(cls)
+        self._worker = Worker(cfg, transport)
+        return self
+    def record(self, call: LlmCall) -> None:
+        """Record a captured call. **Hot path:** a non-blocking enqueue. If the
+        buffer is full the call is dropped and :meth:`dropped` increments -- the
+        caller is never blocked."""
+        self._worker.record(call)
+    def dropped(self) -> int:
+        """Number of calls dropped due to buffer overflow (a backpressure
+        signal)."""
+        return self._worker.dropped()
+    def flush(self) -> None:
+        """Flush buffered calls and block until the worker has shipped them."""
+        self._worker.flush()
+    def shutdown(self) -> None:
+        """Flush on the way out, then join the worker thread."""
+        self._worker.shutdown()
+    # ---- context-manager sugar ---------------------------------------------
+    def __enter__(self) -> "Client":
+        return self
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc: Optional[BaseException],
+        tb: Optional[TracebackType],
+    ) -> None:
+        self.shutdown()

modelstat/config.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""SDK configuration: where to ship, how to authenticate, how hard to redact,
+and how the background worker batches.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional
+from ._version import __version__
+__all__ = ["Mode", "RedactionPolicy", "Config", "DEFAULT_DAEMON_URL"]
+# The default local daemon loopback URL.
+DEFAULT_DAEMON_URL = "http://127.0.0.1:4319/v1/ingest"
+@dataclass(frozen=True)
+class Mode:
+    """Where the SDK ships captured calls.
+    Construct via :meth:`local_daemon` or :meth:`remote` rather than directly.
+    A "local daemon" mode hands calls to a local modelstat daemon over loopback;
+    the daemon summarizes with its local Qwen model and ships only redacted
+    abstracts to the server -- raw text never leaves the machine. A "remote"
+    mode ships directly to the modelstat server (no local daemon / no local
+    model); with ``raw = True`` it sends full (still floor-redacted) turns to
+    ``/v1/ingest/raw`` for server-side summarization.
+    """
+    # ``"local_daemon"`` or ``"remote"``.
+    kind: str
+    # The daemon's loopback ingest URL (local-daemon mode only).
+    url: Optional[str] = None
+    # Base URL, e.g. ``https://api.modelstat.ai`` (remote mode only).
+    base_url: Optional[str] = None
+    # When ``True``, remote mode sends full floor-redacted turns to
+    # ``/v1/ingest/raw`` for server-side summarization; when ``False``, only the
+    # floor-redacted <=320-char excerpt to ``/v1/ingest``.
+    raw: bool = False
+    @classmethod
+    def local_daemon(cls, url: str = DEFAULT_DAEMON_URL) -> "Mode":
+        """Hand off to a local modelstat daemon over loopback (the default)."""
+        return cls(kind="local_daemon", url=url)
+    @classmethod
+    def remote(cls, base_url: str, raw: bool = False) -> "Mode":
+        """Ship directly to the modelstat server (no local daemon)."""
+        return cls(kind="remote", base_url=base_url, raw=raw)
+    def endpoint(self) -> str:
+        """Resolve the concrete POST endpoint for this mode."""
+        if self.kind == "local_daemon":
+            assert self.url is not None
+            return self.url
+        # remote
+        assert self.base_url is not None
+        base = self.base_url.rstrip("/")
+        return f"{base}/v1/ingest/raw" if self.raw else f"{base}/v1/ingest"
+class RedactionPolicy(Enum):
+    """How hard to scrub text before it leaves the SDK process."""
+    # Run the privacy floor (secrets + email + absolute paths). The default, and
+    # the floor that even "raw" mode keeps.
+    FLOOR = "floor"
+    # Skip in-process redaction entirely. Only valid when shipping to a trusted
+    # local daemon that will redact, or under an explicit raw-data contract.
+    NONE = "none"
+@dataclass
+class Config:
+    """SDK configuration.
+    Construct with the two required arguments (``ingest_key`` and ``agent``),
+    then adjust fields directly or use the ``with_*`` helpers. Defaults:
+    local-daemon mode, floor redaction, a 4096-slot buffer, a 2s flush interval,
+    and 256-record batches.
+    """
+    # Bearer credential: an org-scoped ingest key (``msk_...``) or a device
+    # secret.
+    ingest_key: str
+    # The **agent** label for every record -- which AI tool/integration the user
+    # used (e.g. ``raw_sdk_openai``, ``raw_sdk_anthropic``, ``raw_sdk_generic``).
+    # Ships as the wire ``agent`` field.
+    agent: str
+    # Stable device/service identifier (``dev_...``). Should be stable per host
+    # so dedupe keys are stable across restarts.
+    device_id: str = "dev_sdk"
+    # This client build's version (<=40 chars). Ships as the wire
+    # ``daemon_version`` field -- the *producer's* version (daemon or SDK), not
+    # the agent's.
+    client_version: str = field(default_factory=lambda: f"python-sdk/{__version__}")
+    # Where to ship.
+    mode: Mode = field(default_factory=Mode.local_daemon)
+    # In-process redaction policy.
+    redaction: RedactionPolicy = RedactionPolicy.FLOOR
+    # Bounded in-memory buffer between the hot path and the worker. On overflow
+    # the newest record is dropped and the dropped-counter increments -- the
+    # live request is never blocked.
+    buffer_capacity: int = 4096
+    # Flush the buffer at least this often, in seconds.
+    flush_interval: float = 2.0
+    # Flush eagerly once this many records are buffered.
+    flush_max_batch: int = 256
+    def __post_init__(self) -> None:
+        # The wire field is constrained to 1..=40 chars; keep the SDK honest so
+        # a long custom version can't trip an HTTP 400 at the server.
+        if len(self.client_version) > 40:
+            self.client_version = self.client_version[:40]
+    def with_remote(self, base_url: str, raw: bool = False) -> "Config":
+        """Ship directly to the modelstat server instead of a local daemon.
+        ``raw = True`` opts into server-side summarization of full
+        (floor-redacted) turns. Returns ``self`` for chaining.
+        """
+        self.mode = Mode.remote(base_url, raw)
+        return self
+    def with_device_id(self, device_id: str) -> "Config":
+        """Override the device id. Returns ``self`` for chaining."""
+        self.device_id = device_id
+        return self
+    def sends_full_turns(self) -> bool:
+        """Whether this mode sends full (untruncated) redacted turns for
+        server-side summarization (remote + raw)."""
+        return self.mode.kind == "remote" and self.mode.raw

modelstat/py.typed ADDED Viewed

File without changes