modelstat-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modelstat/worker.py ADDED
@@ -0,0 +1,183 @@
1
+ """The background worker: the only place redaction, batching, and network I/O
2
+ happen.
3
+
4
+ It drains a bounded queue on a timer or when a batch fills, converts captured
5
+ calls into a wire batch, and ships it via the :class:`Transport`. It runs on a
6
+ single daemon thread so it never keeps the interpreter alive at shutdown, and so
7
+ the caller's hot path (:meth:`Client.record`) only ever does a non-blocking
8
+ enqueue.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import queue
14
+ import sys
15
+ import threading
16
+ import time
17
+ from typing import List, Optional, Union
18
+
19
+ from . import capture
20
+ from .capture import LlmCall
21
+ from .config import Config
22
+ from .transport import Transport, TransportError
23
+
24
+ __all__ = ["Worker"]
25
+
26
+ # Retry the failed send once after this delay before dropping the batch.
27
+ _RETRY_DELAY = 0.25
28
+
29
+
30
+ class _Drain:
31
+ """A queue sentinel asking the worker to flush, with an :class:`Event` the
32
+ worker sets once the flush has been attempted (used by ``flush()`` to block
33
+ until the buffer has been drained and shipped)."""
34
+
35
+ __slots__ = ("done",)
36
+
37
+ def __init__(self) -> None:
38
+ self.done = threading.Event()
39
+
40
+
41
+ class _Shutdown:
42
+ """A queue sentinel asking the worker to do a final flush and exit."""
43
+
44
+ __slots__ = ("done",)
45
+
46
+ def __init__(self) -> None:
47
+ self.done = threading.Event()
48
+
49
+
50
+ # What can travel through the queue: a captured call, or a control sentinel.
51
+ _Msg = Union[LlmCall, _Drain, _Shutdown]
52
+
53
+
54
+ class Worker:
55
+ """Owns the bounded queue, the background thread, and the dropped counter."""
56
+
57
+ def __init__(self, cfg: Config, transport: Transport) -> None:
58
+ self._cfg = cfg
59
+ self._transport = transport
60
+ # Bounded buffer between the hot path and the worker.
61
+ self._queue: "queue.Queue[_Msg]" = queue.Queue(maxsize=cfg.buffer_capacity)
62
+ # Thread-safe overflow counter (a backpressure signal).
63
+ self._dropped = 0
64
+ self._dropped_lock = threading.Lock()
65
+ self._seq = 0
66
+ self._buf: List[LlmCall] = []
67
+ self._thread = threading.Thread(
68
+ target=self._run, name="modelstat-worker", daemon=True
69
+ )
70
+ self._thread.start()
71
+
72
+ # ---- hot path -----------------------------------------------------------
73
+
74
+ def record(self, call: LlmCall) -> None:
75
+ """Non-blocking enqueue. On overflow the *newest* record is dropped and
76
+ the dropped counter increments -- the caller is never blocked and never
77
+ does I/O or redaction here."""
78
+ try:
79
+ self._queue.put_nowait(call)
80
+ except queue.Full:
81
+ with self._dropped_lock:
82
+ self._dropped += 1
83
+
84
+ def dropped(self) -> int:
85
+ """Number of calls dropped due to buffer overflow."""
86
+ with self._dropped_lock:
87
+ return self._dropped
88
+
89
+ # ---- control ------------------------------------------------------------
90
+
91
+ def flush(self) -> None:
92
+ """Flush buffered calls and block until the worker has shipped them."""
93
+ drain = _Drain()
94
+ # ``put`` (blocking) so a full queue can't lose the control message.
95
+ self._queue.put(drain)
96
+ drain.done.wait()
97
+
98
+ def shutdown(self) -> None:
99
+ """Final flush, then join the worker thread."""
100
+ shutdown = _Shutdown()
101
+ self._queue.put(shutdown)
102
+ shutdown.done.wait()
103
+ self._thread.join()
104
+
105
+ # ---- worker loop --------------------------------------------------------
106
+
107
+ def _run(self) -> None:
108
+ # Deadline of the next time-based flush. We poll the queue with a
109
+ # timeout so an idle SDK wakes on the flush interval and a busy one
110
+ # flushes as soon as a batch fills -- the equivalent of the Rust
111
+ # select! over a channel and a ticker.
112
+ next_flush = time.monotonic() + self._cfg.flush_interval
113
+ while True:
114
+ timeout = max(0.0, next_flush - time.monotonic())
115
+ try:
116
+ msg: Optional[_Msg] = self._queue.get(timeout=timeout)
117
+ except queue.Empty:
118
+ msg = None
119
+
120
+ if msg is None:
121
+ # Timer elapsed.
122
+ self._flush()
123
+ next_flush = time.monotonic() + self._cfg.flush_interval
124
+ continue
125
+
126
+ if isinstance(msg, _Drain):
127
+ self._flush()
128
+ msg.done.set()
129
+ next_flush = time.monotonic() + self._cfg.flush_interval
130
+ continue
131
+
132
+ if isinstance(msg, _Shutdown):
133
+ self._flush()
134
+ msg.done.set()
135
+ return
136
+
137
+ # A captured call.
138
+ self._buf.append(msg)
139
+ if len(self._buf) >= self._cfg.flush_max_batch:
140
+ self._flush()
141
+ next_flush = time.monotonic() + self._cfg.flush_interval
142
+
143
+ def _flush(self) -> None:
144
+ """Convert and ship the buffered calls. Retries once on failure, then
145
+ drops the batch loudly (in local-daemon mode the daemon owns durable
146
+ retry; remote durability is a follow-up -- see the README)."""
147
+ if not self._buf:
148
+ return
149
+ calls = self._buf
150
+ self._buf = []
151
+ batch, self._seq = capture.build_batch(self._cfg, calls, self._seq)
152
+ payload = batch.to_dict()
153
+
154
+ for attempt in range(2):
155
+ try:
156
+ self._transport.send(payload)
157
+ return
158
+ except TransportError as e:
159
+ if attempt == 0:
160
+ print(
161
+ f"modelstat: send failed (retrying once): {e}",
162
+ file=sys.stderr,
163
+ )
164
+ time.sleep(_RETRY_DELAY)
165
+ else:
166
+ print(
167
+ f"modelstat: dropping batch of {len(batch.events)} "
168
+ f"events after retry: {e}",
169
+ file=sys.stderr,
170
+ )
171
+ except Exception as e: # never let the worker thread die
172
+ if attempt == 0:
173
+ print(
174
+ f"modelstat: send error (retrying once): {e}",
175
+ file=sys.stderr,
176
+ )
177
+ time.sleep(_RETRY_DELAY)
178
+ else:
179
+ print(
180
+ f"modelstat: dropping batch of {len(batch.events)} "
181
+ f"events after retry: {e}",
182
+ file=sys.stderr,
183
+ )
@@ -0,0 +1,158 @@
1
+ Metadata-Version: 2.4
2
+ Name: modelstat-sdk
3
+ Version: 0.0.1
4
+ Summary: Privacy-first SDK for modelstat — wrap your backend LLM calls and ship redacted usage to a local daemon or the modelstat server, without touching live-request latency.
5
+ Project-URL: Homepage, https://modelstat.ai
6
+ Project-URL: Repository, https://github.com/modelstat/modelstat
7
+ Author: modelstat
8
+ License-Expression: Apache-2.0
9
+ Keywords: ai,llm,observability,redaction,telemetry
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Topic :: System :: Monitoring
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: blake3
25
+ Description-Content-Type: text/markdown
26
+
27
+ # modelstat
28
+
29
+ **Wrap your backend's LLM calls and get spend + usage analytics — while your prompts stay on your own machine.**
30
+
31
+ `modelstat-sdk` is a privacy-first Python SDK. It captures the LLM calls your backend already makes and hands them to a **local modelstat daemon**, which **summarizes them on your machine with a local model** and ships only short, **redacted abstracts** to the modelstat analytics server. Raw prompts, completions, and tool arguments **never leave your infrastructure**.
32
+
33
+ ```text
34
+ your backend your machine modelstat
35
+ ┌──────────────┐ loopback ┌──────────────────────┐ HTTPS ┌───────────────┐
36
+ │ ms.record() │ ───────────────▶ │ modelstat daemon │ ─────────▶ │ analytics │
37
+ │ (non-block) │ raw stays here │ • local model │ redacted │ dashboard │
38
+ └──────────────┘ │ → summarize │ abstract │ (spend, by │
39
+ ▲ │ • redact (PII/keys) │ + tokens │ project/etc) │
40
+ real LLM call │ • batch + retry │ └───────────────┘
41
+ └──────────────────────┘
42
+ ↑ raw prompts / completions / args never cross this line ↑
43
+ ```
44
+
45
+ ## Why a local daemon?
46
+
47
+ - **Privacy by construction.** Summarization happens **on your machine**. Only a bounded, redacted abstract + token/cost numbers are uploaded — never raw text. That's what gives you content-level attribution (by project, feature, work-type) *without* sending content to a vendor.
48
+ - **No added request latency.** `record()` is a non-blocking enqueue into an in-memory buffer; a background worker **thread** handles redaction, the daemon hand-off, batching, and shipping entirely off your request path. If the buffer fills, the newest record is dropped and a counter ticks up — your request is **never** blocked.
49
+ - **One daemon, many producers.** Every service instance points at the same local daemon; the daemon owns the local model, durable retry, and the upload. Your app stays a thin, dependency-light client (one runtime dependency: `blake3`).
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install modelstat-sdk
55
+ ```
56
+
57
+ ```python
58
+ import modelstat
59
+ ```
60
+
61
+ The import package is `modelstat`; the distribution on PyPI is `modelstat-sdk`. Requires Python 3.9+.
62
+
63
+ ## Guide: run a daemon locally, then point the SDK at it
64
+
65
+ ### 1. Run the modelstat daemon
66
+
67
+ The daemon is the open-source `modelstat` daemon. It runs as a background service, downloads a small local model on first start, and listens on loopback for SDK traffic.
68
+
69
+ ```bash
70
+ # zero-install: starts the background service + fetches the local model
71
+ npx modelstat@latest
72
+
73
+ # …or install it globally
74
+ npm i -g modelstat && modelstat start
75
+
76
+ modelstat status # confirm it's running (and which loopback port it uses)
77
+ ```
78
+
79
+ By default the daemon listens on `http://127.0.0.1:4319`.
80
+
81
+ ### 2. Point the SDK at the daemon
82
+
83
+ Local-daemon mode is the **default** — supply your org ingest key and an agent label and you're pointed at the local daemon already:
84
+
85
+ ```python
86
+ from modelstat import Client, Config
87
+
88
+ cfg = Config("msk_live_…", "raw_sdk_openai") # defaults to the local daemon
89
+ ms = Client(cfg)
90
+ ```
91
+
92
+ Changed the daemon's port? Set the mode explicitly:
93
+
94
+ ```python
95
+ from modelstat import Config, Mode
96
+
97
+ cfg = Config("msk_live_…", "raw_sdk_openai")
98
+ cfg.mode = Mode.local_daemon("http://127.0.0.1:4319/v1/ingest")
99
+ ```
100
+
101
+ ### 3. Record your calls
102
+
103
+ After each real LLM call returns, hand the SDK what it already has. `record()` is non-blocking; use the client as a context manager so it flushes on the way out:
104
+
105
+ ```python
106
+ from modelstat import Client, Config, LlmCall, TokenUsage
107
+
108
+ cfg = Config("msk_live_…", "raw_sdk_openai")
109
+
110
+ with Client(cfg) as ms: # shutdown() flushes on exit
111
+ ms.record(
112
+ LlmCall("openai", "session-or-trace-id") # provider, grouping id
113
+ .model_("gpt-x")
114
+ .with_tokens(TokenUsage(input=800, output=120))
115
+ .text("the prompt", "the completion") # raw — summarized locally, never uploaded raw
116
+ )
117
+ ```
118
+
119
+ You can also construct an `LlmCall` with plain keyword arguments
120
+ (`LlmCall(provider="openai", session_id="…", model="gpt-x", tokens=TokenUsage(input=800))`).
121
+
122
+ Call `ms.flush()` to block until buffered calls are shipped, `ms.shutdown()` to flush and stop the worker thread, and `ms.dropped()` to read the overflow counter.
123
+
124
+ **What flows where:** your prompt + completion go to the **local daemon only**. The daemon summarizes them with its local model, redacts, and uploads just the abstract + token/cost metadata to modelstat. The `agent` label (`raw_sdk_openai`) records which integration produced the calls; `session_id` groups calls into a conversation/session downstream.
125
+
126
+ ## Modes
127
+
128
+ | Mode | Where summarization runs | What leaves your machine | Use when |
129
+ |---|---|---|---|
130
+ | **Local daemon** *(default)* | Your machine (daemon's local model) | Redacted abstract + metadata only | Maximum privacy; a daemon can run on/near the host |
131
+ | **Remote** | modelstat server | Floor-redacted full turns (`raw=True`), or just the ≤320-char redacted excerpt (`raw=False`) | Serverless / can't run a local model; you accept server-side summarization |
132
+
133
+ ```python
134
+ # Remote (no local daemon / no local model):
135
+ cfg = Config("msk_live_…", "raw_sdk_openai").with_remote(
136
+ "https://api.modelstat.ai", raw=True
137
+ )
138
+ ```
139
+
140
+ ## Privacy floor (always on)
141
+
142
+ Before any bytes leave the SDK process — in **every** mode — an in-process redaction floor scrubs secrets (provider keys, tokens, JWTs, PEM blocks, DB passwords, …), emails, and absolute home paths. "Raw" mode means *full turns*, not *leaked credentials* — the floor still runs. Tool calls ship only hashes, byte sizes, and allowlisted command verbs — never raw args, results, paths, or command text.
143
+
144
+ What the floor redacts: Anthropic / OpenAI / Google / AWS / GitHub / Slack / Stripe / Discord keys and tokens, JWTs, PEM private-key blocks, modelstat device secrets, generic `NAME_KEY=value` env secrets (the name is kept, the value is dropped), `Bearer` tokens, database-URL passwords, lone 40-char AWS-style secret blobs, email addresses, and absolute `/Users/…`, `/home/…`, and `C:\Users\…` paths.
145
+
146
+ ## What's live today (v0.0.1)
147
+
148
+ Early release — the honest state, so nothing surprises you:
149
+
150
+ - ✅ **SDK**: zero-latency capture, the redaction floor, batching/backpressure, and both transports are implemented and tested.
151
+ - 🚧 **Daemon loopback ingest** (the receiving side of local-daemon mode) is in active development. The daemon already runs a local model and summarizes today; the SDK-push endpoint is landing next. **Until it ships, use remote mode** — the local-daemon API is stable, so your code won't change when it does.
152
+ - 🚧 **`/v1/ingest/raw`** (server-side summarization for `raw=True`) is rolling out; `raw=False` against `/v1/ingest` works today for token/cost telemetry.
153
+
154
+ Progress: https://github.com/modelstat/modelstat
155
+
156
+ ## License
157
+
158
+ Apache-2.0.
@@ -0,0 +1,13 @@
1
+ modelstat/__init__.py,sha256=o0aRhH4MOorbTcBET8DkXsLw9qqQqeK_CrvrShKxGPw,2633
2
+ modelstat/_version.py,sha256=L-Xc-z9ustIZ1AdwOHrjHkbINuGTXq48vvVayQRSFeA,280
3
+ modelstat/capture.py,sha256=aNAuJShQxR0z2Vk2AnAyVRgBarAp0bD3ewCNHJwr9ZA,9209
4
+ modelstat/client.py,sha256=0o-ByBHE7Pvm_f9oGoORn45Y8wo5ttc886b3Zbr-poM,2367
5
+ modelstat/config.py,sha256=9JY2KJyAuBtRJMmzl0gxV1nlQrarbQjlVDd1W6U2IVk,5471
6
+ modelstat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ modelstat/redact.py,sha256=R67yvobMqpV0hfB3VuYYiOTcZt7xVH8NIp1n8t9jOAA,5258
8
+ modelstat/transport.py,sha256=iskXSpVFVJNf_b3lQbTjt9-oGfXkLVdRU4TMajcRHzk,3488
9
+ modelstat/wire.py,sha256=xyyGHuEeo5H9RObXsmqxKIXturmBS7gWq0sjs8INSMo,11631
10
+ modelstat/worker.py,sha256=9UucT40opeDbE6O3smUyB0iddxbBgBl2fidtq3BN4rY,6416
11
+ modelstat_sdk-0.0.1.dist-info/METADATA,sha256=G0ru9y4HlO0AZM_Ix9yJobDXebjbjuAJc67hAZAZqRc,8564
12
+ modelstat_sdk-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
13
+ modelstat_sdk-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any