PyPI - modelstat-sdk - Versions diffs - 0.0.1__py3-none-any.whl - Mend

modelstat-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

modelstat/__init__.py +94 -0
modelstat/_version.py +8 -0
modelstat/capture.py +264 -0
modelstat/client.py +72 -0
modelstat/config.py +135 -0
modelstat/py.typed +0 -0
modelstat/redact.py +150 -0
modelstat/transport.py +97 -0
modelstat/wire.py +344 -0
modelstat/worker.py +183 -0
modelstat_sdk-0.0.1.dist-info/METADATA +158 -0
modelstat_sdk-0.0.1.dist-info/RECORD +13 -0
modelstat_sdk-0.0.1.dist-info/WHEEL +4 -0

modelstat/worker.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""The background worker: the only place redaction, batching, and network I/O
+happen.
+It drains a bounded queue on a timer or when a batch fills, converts captured
+calls into a wire batch, and ships it via the :class:`Transport`. It runs on a
+single daemon thread so it never keeps the interpreter alive at shutdown, and so
+the caller's hot path (:meth:`Client.record`) only ever does a non-blocking
+enqueue.
+"""
+from __future__ import annotations
+import queue
+import sys
+import threading
+import time
+from typing import List, Optional, Union
+from . import capture
+from .capture import LlmCall
+from .config import Config
+from .transport import Transport, TransportError
+__all__ = ["Worker"]
+# Retry the failed send once after this delay before dropping the batch.
+_RETRY_DELAY = 0.25
+class _Drain:
+    """A queue sentinel asking the worker to flush, with an :class:`Event` the
+    worker sets once the flush has been attempted (used by ``flush()`` to block
+    until the buffer has been drained and shipped)."""
+    __slots__ = ("done",)
+    def __init__(self) -> None:
+        self.done = threading.Event()
+class _Shutdown:
+    """A queue sentinel asking the worker to do a final flush and exit."""
+    __slots__ = ("done",)
+    def __init__(self) -> None:
+        self.done = threading.Event()
+# What can travel through the queue: a captured call, or a control sentinel.
+_Msg = Union[LlmCall, _Drain, _Shutdown]
+class Worker:
+    """Owns the bounded queue, the background thread, and the dropped counter."""
+    def __init__(self, cfg: Config, transport: Transport) -> None:
+        self._cfg = cfg
+        self._transport = transport
+        # Bounded buffer between the hot path and the worker.
+        self._queue: "queue.Queue[_Msg]" = queue.Queue(maxsize=cfg.buffer_capacity)
+        # Thread-safe overflow counter (a backpressure signal).
+        self._dropped = 0
+        self._dropped_lock = threading.Lock()
+        self._seq = 0
+        self._buf: List[LlmCall] = []
+        self._thread = threading.Thread(
+            target=self._run, name="modelstat-worker", daemon=True
+        )
+        self._thread.start()
+    # ---- hot path -----------------------------------------------------------
+    def record(self, call: LlmCall) -> None:
+        """Non-blocking enqueue. On overflow the *newest* record is dropped and
+        the dropped counter increments -- the caller is never blocked and never
+        does I/O or redaction here."""
+        try:
+            self._queue.put_nowait(call)
+        except queue.Full:
+            with self._dropped_lock:
+                self._dropped += 1
+    def dropped(self) -> int:
+        """Number of calls dropped due to buffer overflow."""
+        with self._dropped_lock:
+            return self._dropped
+    # ---- control ------------------------------------------------------------
+    def flush(self) -> None:
+        """Flush buffered calls and block until the worker has shipped them."""
+        drain = _Drain()
+        # ``put`` (blocking) so a full queue can't lose the control message.
+        self._queue.put(drain)
+        drain.done.wait()
+    def shutdown(self) -> None:
+        """Final flush, then join the worker thread."""
+        shutdown = _Shutdown()
+        self._queue.put(shutdown)
+        shutdown.done.wait()
+        self._thread.join()
+    # ---- worker loop --------------------------------------------------------
+    def _run(self) -> None:
+        # Deadline of the next time-based flush. We poll the queue with a
+        # timeout so an idle SDK wakes on the flush interval and a busy one
+        # flushes as soon as a batch fills -- the equivalent of the Rust
+        # select! over a channel and a ticker.
+        next_flush = time.monotonic() + self._cfg.flush_interval
+        while True:
+            timeout = max(0.0, next_flush - time.monotonic())
+            try:
+                msg: Optional[_Msg] = self._queue.get(timeout=timeout)
+            except queue.Empty:
+                msg = None
+            if msg is None:
+                # Timer elapsed.
+                self._flush()
+                next_flush = time.monotonic() + self._cfg.flush_interval
+                continue
+            if isinstance(msg, _Drain):
+                self._flush()
+                msg.done.set()
+                next_flush = time.monotonic() + self._cfg.flush_interval
+                continue
+            if isinstance(msg, _Shutdown):
+                self._flush()
+                msg.done.set()
+                return
+            # A captured call.
+            self._buf.append(msg)
+            if len(self._buf) >= self._cfg.flush_max_batch:
+                self._flush()
+                next_flush = time.monotonic() + self._cfg.flush_interval
+    def _flush(self) -> None:
+        """Convert and ship the buffered calls. Retries once on failure, then
+        drops the batch loudly (in local-daemon mode the daemon owns durable
+        retry; remote durability is a follow-up -- see the README)."""
+        if not self._buf:
+            return
+        calls = self._buf
+        self._buf = []
+        batch, self._seq = capture.build_batch(self._cfg, calls, self._seq)
+        payload = batch.to_dict()
+        for attempt in range(2):
+            try:
+                self._transport.send(payload)
+                return
+            except TransportError as e:
+                if attempt == 0:
+                    print(
+                        f"modelstat: send failed (retrying once): {e}",
+                        file=sys.stderr,
+                    )
+                    time.sleep(_RETRY_DELAY)
+                else:
+                    print(
+                        f"modelstat: dropping batch of {len(batch.events)} "
+                        f"events after retry: {e}",
+                        file=sys.stderr,
+                    )
+            except Exception as e:  # never let the worker thread die
+                if attempt == 0:
+                    print(
+                        f"modelstat: send error (retrying once): {e}",
+                        file=sys.stderr,
+                    )
+                    time.sleep(_RETRY_DELAY)
+                else:
+                    print(
+                        f"modelstat: dropping batch of {len(batch.events)} "
+                        f"events after retry: {e}",
+                        file=sys.stderr,
+                    )

modelstat_sdk-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,158 @@
+Metadata-Version: 2.4
+Name: modelstat-sdk
+Version: 0.0.1
+Summary: Privacy-first SDK for modelstat — wrap your backend LLM calls and ship redacted usage to a local daemon or the modelstat server, without touching live-request latency.
+Project-URL: Homepage, https://modelstat.ai
+Project-URL: Repository, https://github.com/modelstat/modelstat
+Author: modelstat
+License-Expression: Apache-2.0
+Keywords: ai,llm,observability,redaction,telemetry
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: System :: Monitoring
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Requires-Dist: blake3
+Description-Content-Type: text/markdown
+# modelstat
+**Wrap your backend's LLM calls and get spend + usage analytics — while your prompts stay on your own machine.**
+`modelstat-sdk` is a privacy-first Python SDK. It captures the LLM calls your backend already makes and hands them to a **local modelstat daemon**, which **summarizes them on your machine with a local model** and ships only short, **redacted abstracts** to the modelstat analytics server. Raw prompts, completions, and tool arguments **never leave your infrastructure**.
+```text
+   your backend                          your machine                       modelstat
+ ┌──────────────┐   loopback        ┌──────────────────────┐   HTTPS    ┌───────────────┐
+ │  ms.record() │ ───────────────▶  │   modelstat daemon   │ ─────────▶ │   analytics   │
+ │ (non-block)  │   raw stays here  │  • local model        │  redacted  │   dashboard   │
+ └──────────────┘                   │    → summarize        │  abstract  │  (spend, by   │
+        ▲                           │  • redact (PII/keys)  │   + tokens │  project/etc) │
+   real LLM call                    │  • batch + retry      │            └───────────────┘
+                                    └──────────────────────┘
+              ↑ raw prompts / completions / args never cross this line ↑
+```
+## Why a local daemon?
+- **Privacy by construction.** Summarization happens **on your machine**. Only a bounded, redacted abstract + token/cost numbers are uploaded — never raw text. That's what gives you content-level attribution (by project, feature, work-type) *without* sending content to a vendor.
+- **No added request latency.** `record()` is a non-blocking enqueue into an in-memory buffer; a background worker **thread** handles redaction, the daemon hand-off, batching, and shipping entirely off your request path. If the buffer fills, the newest record is dropped and a counter ticks up — your request is **never** blocked.
+- **One daemon, many producers.** Every service instance points at the same local daemon; the daemon owns the local model, durable retry, and the upload. Your app stays a thin, dependency-light client (one runtime dependency: `blake3`).
+## Install
+```bash
+pip install modelstat-sdk
+```
+```python
+import modelstat
+```
+The import package is `modelstat`; the distribution on PyPI is `modelstat-sdk`. Requires Python 3.9+.
+## Guide: run a daemon locally, then point the SDK at it
+### 1. Run the modelstat daemon
+The daemon is the open-source `modelstat` daemon. It runs as a background service, downloads a small local model on first start, and listens on loopback for SDK traffic.
+```bash
+# zero-install: starts the background service + fetches the local model
+npx modelstat@latest
+# …or install it globally
+npm i -g modelstat && modelstat start
+modelstat status      # confirm it's running (and which loopback port it uses)
+```
+By default the daemon listens on `http://127.0.0.1:4319`.
+### 2. Point the SDK at the daemon
+Local-daemon mode is the **default** — supply your org ingest key and an agent label and you're pointed at the local daemon already:
+```python
+from modelstat import Client, Config
+cfg = Config("msk_live_…", "raw_sdk_openai")  # defaults to the local daemon
+ms = Client(cfg)
+```
+Changed the daemon's port? Set the mode explicitly:
+```python
+from modelstat import Config, Mode
+cfg = Config("msk_live_…", "raw_sdk_openai")
+cfg.mode = Mode.local_daemon("http://127.0.0.1:4319/v1/ingest")
+```
+### 3. Record your calls
+After each real LLM call returns, hand the SDK what it already has. `record()` is non-blocking; use the client as a context manager so it flushes on the way out:
+```python
+from modelstat import Client, Config, LlmCall, TokenUsage
+cfg = Config("msk_live_…", "raw_sdk_openai")
+with Client(cfg) as ms:                                  # shutdown() flushes on exit
+    ms.record(
+        LlmCall("openai", "session-or-trace-id")          # provider, grouping id
+        .model_("gpt-x")
+        .with_tokens(TokenUsage(input=800, output=120))
+        .text("the prompt", "the completion")             # raw — summarized locally, never uploaded raw
+    )
+```
+You can also construct an `LlmCall` with plain keyword arguments
+(`LlmCall(provider="openai", session_id="…", model="gpt-x", tokens=TokenUsage(input=800))`).
+Call `ms.flush()` to block until buffered calls are shipped, `ms.shutdown()` to flush and stop the worker thread, and `ms.dropped()` to read the overflow counter.
+**What flows where:** your prompt + completion go to the **local daemon only**. The daemon summarizes them with its local model, redacts, and uploads just the abstract + token/cost metadata to modelstat. The `agent` label (`raw_sdk_openai`) records which integration produced the calls; `session_id` groups calls into a conversation/session downstream.
+## Modes
+| Mode | Where summarization runs | What leaves your machine | Use when |
+|---|---|---|---|
+| **Local daemon** *(default)* | Your machine (daemon's local model) | Redacted abstract + metadata only | Maximum privacy; a daemon can run on/near the host |
+| **Remote** | modelstat server | Floor-redacted full turns (`raw=True`), or just the ≤320-char redacted excerpt (`raw=False`) | Serverless / can't run a local model; you accept server-side summarization |
+```python
+# Remote (no local daemon / no local model):
+cfg = Config("msk_live_…", "raw_sdk_openai").with_remote(
+    "https://api.modelstat.ai", raw=True
+)
+```
+## Privacy floor (always on)
+Before any bytes leave the SDK process — in **every** mode — an in-process redaction floor scrubs secrets (provider keys, tokens, JWTs, PEM blocks, DB passwords, …), emails, and absolute home paths. "Raw" mode means *full turns*, not *leaked credentials* — the floor still runs. Tool calls ship only hashes, byte sizes, and allowlisted command verbs — never raw args, results, paths, or command text.
+What the floor redacts: Anthropic / OpenAI / Google / AWS / GitHub / Slack / Stripe / Discord keys and tokens, JWTs, PEM private-key blocks, modelstat device secrets, generic `NAME_KEY=value` env secrets (the name is kept, the value is dropped), `Bearer` tokens, database-URL passwords, lone 40-char AWS-style secret blobs, email addresses, and absolute `/Users/…`, `/home/…`, and `C:\Users\…` paths.
+## What's live today (v0.0.1)
+Early release — the honest state, so nothing surprises you:
+- ✅ **SDK**: zero-latency capture, the redaction floor, batching/backpressure, and both transports are implemented and tested.
+- 🚧 **Daemon loopback ingest** (the receiving side of local-daemon mode) is in active development. The daemon already runs a local model and summarizes today; the SDK-push endpoint is landing next. **Until it ships, use remote mode** — the local-daemon API is stable, so your code won't change when it does.
+- 🚧 **`/v1/ingest/raw`** (server-side summarization for `raw=True`) is rolling out; `raw=False` against `/v1/ingest` works today for token/cost telemetry.
+Progress: https://github.com/modelstat/modelstat
+## License
+Apache-2.0.

modelstat_sdk-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+modelstat/__init__.py,sha256=o0aRhH4MOorbTcBET8DkXsLw9qqQqeK_CrvrShKxGPw,2633
+modelstat/_version.py,sha256=L-Xc-z9ustIZ1AdwOHrjHkbINuGTXq48vvVayQRSFeA,280
+modelstat/capture.py,sha256=aNAuJShQxR0z2Vk2AnAyVRgBarAp0bD3ewCNHJwr9ZA,9209
+modelstat/client.py,sha256=0o-ByBHE7Pvm_f9oGoORn45Y8wo5ttc886b3Zbr-poM,2367
+modelstat/config.py,sha256=9JY2KJyAuBtRJMmzl0gxV1nlQrarbQjlVDd1W6U2IVk,5471
+modelstat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+modelstat/redact.py,sha256=R67yvobMqpV0hfB3VuYYiOTcZt7xVH8NIp1n8t9jOAA,5258
+modelstat/transport.py,sha256=iskXSpVFVJNf_b3lQbTjt9-oGfXkLVdRU4TMajcRHzk,3488
+modelstat/wire.py,sha256=xyyGHuEeo5H9RObXsmqxKIXturmBS7gWq0sjs8INSMo,11631
+modelstat/worker.py,sha256=9UucT40opeDbE6O3smUyB0iddxbBgBl2fidtq3BN4rY,6416
+modelstat_sdk-0.0.1.dist-info/METADATA,sha256=G0ru9y4HlO0AZM_Ix9yJobDXebjbjuAJc67hAZAZqRc,8564
+modelstat_sdk-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+modelstat_sdk-0.0.1.dist-info/RECORD,,

modelstat_sdk-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any