modelstat-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modelstat/__init__.py ADDED
@@ -0,0 +1,94 @@
1
+ """modelstat -- a privacy-first SDK for wrapping the LLM calls your backend
2
+ already makes and shipping **redacted** usage to modelstat, without adding
3
+ latency to live requests.
4
+
5
+ The hot path (:meth:`Client.record`) does nothing but copy your already-in-hand
6
+ call into a bounded buffer and return. A background worker thread redacts,
7
+ batches, and ships off the request path. On overflow the newest record is
8
+ dropped and a counter increments -- your request is never blocked and never
9
+ grows memory unbounded.
10
+
11
+ Modes
12
+ -----
13
+ * **Local daemon (default).** Hand calls to a local modelstat daemon over
14
+ loopback; it summarizes with a local Qwen model and ships only redacted
15
+ abstracts. Raw text never leaves the machine.
16
+ * **Remote.** Ship directly to the modelstat server (no local model). With
17
+ ``raw=True``, send full floor-redacted turns for server-side summarization.
18
+
19
+ Example
20
+ -------
21
+ .. code-block:: python
22
+
23
+ from modelstat import Client, Config, LlmCall, TokenUsage
24
+
25
+ # Org-scoped ingest key binds traffic to your account; remote mode here.
26
+ cfg = Config("msk_live_...", "raw_sdk_openai").with_remote(
27
+ "https://api.modelstat.ai", raw=True
28
+ )
29
+
30
+ with Client(cfg) as ms: # shutdown() flushes on the way out
31
+ # ... after your real LLM call returns ...
32
+ ms.record(
33
+ LlmCall("openai", "session-or-trace-id")
34
+ .model_("gpt-x")
35
+ .with_tokens(TokenUsage(input=800, output=120))
36
+ .text("the prompt", "the completion")
37
+ )
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ from ._version import __version__
43
+ from .capture import LlmCall, ToolCallInput, build_batch
44
+ from .client import Client
45
+ from .config import DEFAULT_DAEMON_URL, Config, Mode, RedactionPolicy
46
+ from .redact import Redacted, redact
47
+ from .transport import FakeTransport, HttpTransport, Transport, TransportError
48
+ from .wire import (
49
+ BillingMode,
50
+ EventKind,
51
+ GitContext,
52
+ IngestBatch,
53
+ RawEvent,
54
+ TokenUsage,
55
+ ToolCallStatus,
56
+ ToolCallWire,
57
+ batch_id,
58
+ content_hash,
59
+ source_event_id,
60
+ )
61
+
62
+ __all__ = [
63
+ "__version__",
64
+ # client + config
65
+ "Client",
66
+ "Config",
67
+ "Mode",
68
+ "RedactionPolicy",
69
+ "DEFAULT_DAEMON_URL",
70
+ # capture
71
+ "LlmCall",
72
+ "ToolCallInput",
73
+ "build_batch",
74
+ # redaction
75
+ "redact",
76
+ "Redacted",
77
+ # transports
78
+ "Transport",
79
+ "HttpTransport",
80
+ "FakeTransport",
81
+ "TransportError",
82
+ # wire
83
+ "IngestBatch",
84
+ "RawEvent",
85
+ "ToolCallWire",
86
+ "TokenUsage",
87
+ "GitContext",
88
+ "EventKind",
89
+ "BillingMode",
90
+ "ToolCallStatus",
91
+ "content_hash",
92
+ "source_event_id",
93
+ "batch_id",
94
+ ]
modelstat/_version.py ADDED
@@ -0,0 +1,8 @@
1
+ """Single source of truth for the package version.
2
+
3
+ Read both at runtime (to build ``Config.client_version`` -> the wire
4
+ ``daemon_version``) and by hatchling at build time (see ``pyproject.toml``'s
5
+ ``[tool.hatch.version]``), so the two can never drift.
6
+ """
7
+
8
+ __version__ = "0.0.1"
modelstat/capture.py ADDED
@@ -0,0 +1,264 @@
1
+ """The capture surface: what a caller hands the SDK per LLM call, and the
2
+ (worker-side) conversion into wire records.
3
+
4
+ Building an :class:`LlmCall` and calling :meth:`Client.record` is the only thing
5
+ that happens on the live request path -- it must stay a cheap move into a
6
+ buffer. All of the work here (redaction, hashing, id derivation) runs later, on
7
+ the background worker, off the hot path.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
17
+
18
+ from . import wire
19
+ from .config import Config, RedactionPolicy
20
+ from .redact import redact
21
+ from .wire import (
22
+ BillingMode,
23
+ EventKind,
24
+ GitContext,
25
+ IngestBatch,
26
+ RawEvent,
27
+ TokenUsage,
28
+ ToolCallStatus,
29
+ ToolCallWire,
30
+ )
31
+
32
+ __all__ = ["LlmCall", "ToolCallInput", "build_batch"]
33
+
34
+ # The excerpt cap for the standard (non-raw) path, in Unicode code points.
35
+ EXCERPT_MAX_CHARS = 320
36
+
37
+
38
+ def _now_utc() -> datetime:
39
+ return datetime.now(timezone.utc)
40
+
41
+
42
+ @dataclass
43
+ class ToolCallInput:
44
+ """One captured tool invocation.
45
+
46
+ The SDK is in the call path, so it has the real args and result -- it
47
+ hashes/sizes them here (never ships them raw).
48
+ """
49
+
50
+ # Bare tool name (``Bash``, ``create_pr``).
51
+ name: str
52
+ status: ToolCallStatus
53
+ # ``builtin`` or ``mcp:<server>``.
54
+ server: str = "builtin"
55
+ # The call's arguments, if any. Hashed and sized; never shipped.
56
+ args: Optional[Any] = None
57
+ # Byte length of the result/output (the SDK sizes it; never ships it).
58
+ result_bytes: int = 0
59
+ started_at: datetime = field(default_factory=_now_utc)
60
+ ended_at: Optional[datetime] = None
61
+ # Allowlisted command verbs for shell-ish tools (<=3, each <=40 chars).
62
+ command_families: List[str] = field(default_factory=list)
63
+
64
+
65
+ @dataclass
66
+ class LlmCall:
67
+ """One captured LLM call.
68
+
69
+ Construct directly with keyword arguments, or build incrementally with the
70
+ chainable helpers (:meth:`model`, :meth:`with_tokens`, :meth:`text`).
71
+ ``prompt`` / ``completion`` are raw here and are redacted on the worker.
72
+ """
73
+
74
+ provider: str
75
+ # Trace/conversation id used to group calls into a session downstream.
76
+ session_id: str
77
+ model: Optional[str] = None
78
+ kind: EventKind = EventKind.ASSISTANT_MESSAGE
79
+ tokens: TokenUsage = field(default_factory=TokenUsage)
80
+ started_at: datetime = field(default_factory=_now_utc)
81
+ duration_ms: Optional[int] = None
82
+ prompt: Optional[str] = None
83
+ completion: Optional[str] = None
84
+ cwd: Optional[str] = None
85
+ git: Optional[GitContext] = None
86
+ billing: Optional[BillingMode] = None
87
+ tool_calls: List[ToolCallInput] = field(default_factory=list)
88
+
89
+ # ---- chainable builder helpers (ergonomic, mirror the Rust builder) -----
90
+
91
+ def model_(self, model: str) -> "LlmCall":
92
+ """Set the model. (Trailing underscore avoids shadowing the field.)"""
93
+ self.model = model
94
+ return self
95
+
96
+ def with_tokens(self, tokens: TokenUsage) -> "LlmCall":
97
+ """Set token usage."""
98
+ self.tokens = tokens
99
+ return self
100
+
101
+ def text(self, prompt: str, completion: str) -> "LlmCall":
102
+ """Set the prompt and completion text (raw; redacted on the worker)."""
103
+ self.prompt = prompt
104
+ self.completion = completion
105
+ return self
106
+
107
+
108
+ def _truncate_chars(s: str, max_chars: int) -> str:
109
+ """Truncate to at most ``max_chars`` Unicode code points, appending an
110
+ elision marker. Python strings index by code point, so slicing is the direct
111
+ equivalent of the Rust ``chars().take(max)``."""
112
+ if len(s) <= max_chars:
113
+ return s
114
+ return s[:max_chars] + "…"
115
+
116
+
117
+ def _sha256_hex(data: bytes) -> str:
118
+ """sha256 hex of ``data``."""
119
+ return hashlib.sha256(data).hexdigest()
120
+
121
+
122
+ def _hash_args(args: Optional[Any]) -> Tuple[str, str, int]:
123
+ """Build the privacy-reduced ``(args_hash, signature_hash, args_bytes)``
124
+ triple for a tool call's arguments.
125
+
126
+ Canonical JSON matches the Rust reference: compact separators and *insertion
127
+ order preserved* (``sort_keys=False``) -- ``serde_json`` serializes a Map in
128
+ its stored order, and Python's ``dict`` is insertion-ordered, so the byte
129
+ sizes agree. ``signature_hash`` hashes the *sorted* top-level key names; it
130
+ is the literal ``"none"`` when there are no args or the args are not a dict.
131
+ """
132
+ if args is None:
133
+ return ("", "none", 0)
134
+ serialized = json.dumps(args, separators=(",", ":"), sort_keys=False)
135
+ serialized_bytes = serialized.encode("utf-8")
136
+ args_hash = _sha256_hex(serialized_bytes)
137
+ if isinstance(args, dict):
138
+ keys = sorted(args.keys())
139
+ signature = _sha256_hex(",".join(keys).encode("utf-8"))
140
+ else:
141
+ signature = "none"
142
+ return (args_hash, signature, len(serialized_bytes))
143
+
144
+
145
+ def _build_excerpt(cfg: Config, call: LlmCall) -> Optional[str]:
146
+ """Build the redacted excerpt from a call's prompt + completion, honoring
147
+ the configured redaction policy and (for the standard path) the 320-char
148
+ cap. Empty input yields ``None`` (the key is then omitted on the wire)."""
149
+ joined = ""
150
+ if call.prompt is not None:
151
+ joined += call.prompt
152
+ if call.completion is not None:
153
+ if joined:
154
+ joined += "\n---\n"
155
+ joined += call.completion
156
+ if not joined:
157
+ return None
158
+
159
+ if cfg.redaction == RedactionPolicy.FLOOR:
160
+ scrubbed = redact(joined).text
161
+ else: # RedactionPolicy.NONE
162
+ scrubbed = joined
163
+
164
+ # Raw mode ships the full (redacted) turns for server-side summarization;
165
+ # the standard path caps the excerpt.
166
+ if cfg.sends_full_turns():
167
+ return scrubbed
168
+ return _truncate_chars(scrubbed, EXCERPT_MAX_CHARS)
169
+
170
+
171
+ def _event_from_call(
172
+ cfg: Config, call: LlmCall, seq: int
173
+ ) -> Tuple[RawEvent, List[ToolCallWire]]:
174
+ """Convert one captured call into a wire event plus its tool-call records."""
175
+ # Integer-millis since the epoch, matching Rust's ``timestamp_millis()``.
176
+ # Computed with integer arithmetic (not ``ts * 1000``) to avoid float
177
+ # rounding that could occasionally shift the floored millisecond and so
178
+ # change the derived ``source_event_id``.
179
+ ts = call.started_at
180
+ started_millis = int(ts.timestamp()) * 1000 + ts.microsecond // 1000
181
+ source_ref = f"{call.session_id}::{started_millis}::{seq}"
182
+ src_event_id = wire.source_event_id(cfg.device_id, source_ref)
183
+
184
+ event = RawEvent(
185
+ source_event_id=src_event_id,
186
+ ts=call.started_at,
187
+ kind=call.kind,
188
+ agent=cfg.agent,
189
+ provider=call.provider,
190
+ session_id=call.session_id,
191
+ tokens=call.tokens,
192
+ model=call.model,
193
+ cwd=call.cwd,
194
+ git=call.git,
195
+ duration_ms=call.duration_ms,
196
+ billing=call.billing,
197
+ content_excerpt=_build_excerpt(cfg, call),
198
+ )
199
+
200
+ tool_calls: List[ToolCallWire] = []
201
+ for i, tc in enumerate(call.tool_calls):
202
+ args_hash, signature_hash, args_bytes = _hash_args(tc.args)
203
+ external_call_id = "tc_" + content_hash_tc(src_event_id, i)
204
+ tool_calls.append(
205
+ ToolCallWire(
206
+ external_call_id=external_call_id,
207
+ session_id=call.session_id,
208
+ source_event_id=src_event_id,
209
+ agent=cfg.agent,
210
+ server=tc.server,
211
+ name=tc.name,
212
+ call_index=i,
213
+ started_at=tc.started_at,
214
+ status=tc.status,
215
+ args_hash=args_hash,
216
+ signature_hash=signature_hash,
217
+ args_bytes=args_bytes,
218
+ result_bytes=tc.result_bytes,
219
+ model=call.model,
220
+ command_families=list(tc.command_families[:3]),
221
+ )
222
+ )
223
+
224
+ return event, tool_calls
225
+
226
+
227
+ def content_hash_tc(src_event_id: str, index: int) -> str:
228
+ """The 16-char content hash used in a tool call's ``external_call_id``.
229
+
230
+ ``content_hash`` already truncates to 32 chars; the tool-call id takes the
231
+ first 16 of that, matching the Rust ``content_hash(...)[..16]``.
232
+ """
233
+ return wire.content_hash([src_event_id, str(index)])[:16]
234
+
235
+
236
+ def build_batch(
237
+ cfg: Config, calls: Iterable[LlmCall], seq: int
238
+ ) -> Tuple[IngestBatch, int]:
239
+ """Drain a batch of captured calls into a wire :class:`IngestBatch`.
240
+
241
+ ``seq`` is a monotonic counter used to keep per-call dedupe keys distinct
242
+ within a run; it is bumped once per call. Returns the built batch and the
243
+ updated ``seq`` (Python ints are immutable, so the new value is returned
244
+ rather than mutated in place).
245
+ """
246
+ events: List[RawEvent] = []
247
+ tool_calls: List[ToolCallWire] = []
248
+ source_ids: List[str] = []
249
+
250
+ for call in calls:
251
+ seq += 1
252
+ event, tcs = _event_from_call(cfg, call, seq)
253
+ source_ids.append(event.source_event_id)
254
+ tool_calls.extend(tcs)
255
+ events.append(event)
256
+
257
+ batch = IngestBatch(
258
+ batch_id=wire.batch_id(source_ids),
259
+ device_id=cfg.device_id,
260
+ daemon_version=cfg.client_version,
261
+ events=events,
262
+ tool_calls=tool_calls,
263
+ )
264
+ return batch, seq
modelstat/client.py ADDED
@@ -0,0 +1,72 @@
1
+ """The :class:`Client` facade.
2
+
3
+ A thin handle over the background :class:`Worker`. The hot path
4
+ (:meth:`Client.record`) does nothing but a non-blocking enqueue and returns; the
5
+ worker thread redacts, batches, and ships off the request path. On overflow the
6
+ newest record is dropped and a counter increments -- your request is never
7
+ blocked and never grows memory unbounded.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from types import TracebackType
13
+ from typing import Optional, Type
14
+
15
+ from .capture import LlmCall
16
+ from .config import Config
17
+ from .transport import HttpTransport, Transport
18
+ from .worker import Worker
19
+
20
+ __all__ = ["Client"]
21
+
22
+
23
+ class Client:
24
+ """The SDK entry point.
25
+
26
+ Construct with :class:`Client` (real HTTP transport for ``cfg.mode``) or
27
+ :meth:`Client.with_transport` (a custom transport, e.g. ``FakeTransport`` in
28
+ tests). Usable as a context manager -- ``with Client(cfg) as ms: ...`` calls
29
+ :meth:`shutdown` on exit.
30
+ """
31
+
32
+ def __init__(self, cfg: Config) -> None:
33
+ self._worker = Worker(cfg, HttpTransport.from_config(cfg))
34
+
35
+ @classmethod
36
+ def with_transport(cls, cfg: Config, transport: Transport) -> "Client":
37
+ """Start the SDK with a custom :class:`Transport`."""
38
+ self = cls.__new__(cls)
39
+ self._worker = Worker(cfg, transport)
40
+ return self
41
+
42
+ def record(self, call: LlmCall) -> None:
43
+ """Record a captured call. **Hot path:** a non-blocking enqueue. If the
44
+ buffer is full the call is dropped and :meth:`dropped` increments -- the
45
+ caller is never blocked."""
46
+ self._worker.record(call)
47
+
48
+ def dropped(self) -> int:
49
+ """Number of calls dropped due to buffer overflow (a backpressure
50
+ signal)."""
51
+ return self._worker.dropped()
52
+
53
+ def flush(self) -> None:
54
+ """Flush buffered calls and block until the worker has shipped them."""
55
+ self._worker.flush()
56
+
57
+ def shutdown(self) -> None:
58
+ """Flush on the way out, then join the worker thread."""
59
+ self._worker.shutdown()
60
+
61
+ # ---- context-manager sugar ---------------------------------------------
62
+
63
+ def __enter__(self) -> "Client":
64
+ return self
65
+
66
+ def __exit__(
67
+ self,
68
+ exc_type: Optional[Type[BaseException]],
69
+ exc: Optional[BaseException],
70
+ tb: Optional[TracebackType],
71
+ ) -> None:
72
+ self.shutdown()
modelstat/config.py ADDED
@@ -0,0 +1,135 @@
1
+ """SDK configuration: where to ship, how to authenticate, how hard to redact,
2
+ and how the background worker batches.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from enum import Enum
9
+ from typing import Optional
10
+
11
+ from ._version import __version__
12
+
13
+ __all__ = ["Mode", "RedactionPolicy", "Config", "DEFAULT_DAEMON_URL"]
14
+
15
+ # The default local daemon loopback URL.
16
+ DEFAULT_DAEMON_URL = "http://127.0.0.1:4319/v1/ingest"
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class Mode:
21
+ """Where the SDK ships captured calls.
22
+
23
+ Construct via :meth:`local_daemon` or :meth:`remote` rather than directly.
24
+ A "local daemon" mode hands calls to a local modelstat daemon over loopback;
25
+ the daemon summarizes with its local Qwen model and ships only redacted
26
+ abstracts to the server -- raw text never leaves the machine. A "remote"
27
+ mode ships directly to the modelstat server (no local daemon / no local
28
+ model); with ``raw = True`` it sends full (still floor-redacted) turns to
29
+ ``/v1/ingest/raw`` for server-side summarization.
30
+ """
31
+
32
+ # ``"local_daemon"`` or ``"remote"``.
33
+ kind: str
34
+ # The daemon's loopback ingest URL (local-daemon mode only).
35
+ url: Optional[str] = None
36
+ # Base URL, e.g. ``https://api.modelstat.ai`` (remote mode only).
37
+ base_url: Optional[str] = None
38
+ # When ``True``, remote mode sends full floor-redacted turns to
39
+ # ``/v1/ingest/raw`` for server-side summarization; when ``False``, only the
40
+ # floor-redacted <=320-char excerpt to ``/v1/ingest``.
41
+ raw: bool = False
42
+
43
+ @classmethod
44
+ def local_daemon(cls, url: str = DEFAULT_DAEMON_URL) -> "Mode":
45
+ """Hand off to a local modelstat daemon over loopback (the default)."""
46
+ return cls(kind="local_daemon", url=url)
47
+
48
+ @classmethod
49
+ def remote(cls, base_url: str, raw: bool = False) -> "Mode":
50
+ """Ship directly to the modelstat server (no local daemon)."""
51
+ return cls(kind="remote", base_url=base_url, raw=raw)
52
+
53
+ def endpoint(self) -> str:
54
+ """Resolve the concrete POST endpoint for this mode."""
55
+ if self.kind == "local_daemon":
56
+ assert self.url is not None
57
+ return self.url
58
+ # remote
59
+ assert self.base_url is not None
60
+ base = self.base_url.rstrip("/")
61
+ return f"{base}/v1/ingest/raw" if self.raw else f"{base}/v1/ingest"
62
+
63
+
64
+ class RedactionPolicy(Enum):
65
+ """How hard to scrub text before it leaves the SDK process."""
66
+
67
+ # Run the privacy floor (secrets + email + absolute paths). The default, and
68
+ # the floor that even "raw" mode keeps.
69
+ FLOOR = "floor"
70
+ # Skip in-process redaction entirely. Only valid when shipping to a trusted
71
+ # local daemon that will redact, or under an explicit raw-data contract.
72
+ NONE = "none"
73
+
74
+
75
+ @dataclass
76
+ class Config:
77
+ """SDK configuration.
78
+
79
+ Construct with the two required arguments (``ingest_key`` and ``agent``),
80
+ then adjust fields directly or use the ``with_*`` helpers. Defaults:
81
+ local-daemon mode, floor redaction, a 4096-slot buffer, a 2s flush interval,
82
+ and 256-record batches.
83
+ """
84
+
85
+ # Bearer credential: an org-scoped ingest key (``msk_...``) or a device
86
+ # secret.
87
+ ingest_key: str
88
+ # The **agent** label for every record -- which AI tool/integration the user
89
+ # used (e.g. ``raw_sdk_openai``, ``raw_sdk_anthropic``, ``raw_sdk_generic``).
90
+ # Ships as the wire ``agent`` field.
91
+ agent: str
92
+ # Stable device/service identifier (``dev_...``). Should be stable per host
93
+ # so dedupe keys are stable across restarts.
94
+ device_id: str = "dev_sdk"
95
+ # This client build's version (<=40 chars). Ships as the wire
96
+ # ``daemon_version`` field -- the *producer's* version (daemon or SDK), not
97
+ # the agent's.
98
+ client_version: str = field(default_factory=lambda: f"python-sdk/{__version__}")
99
+ # Where to ship.
100
+ mode: Mode = field(default_factory=Mode.local_daemon)
101
+ # In-process redaction policy.
102
+ redaction: RedactionPolicy = RedactionPolicy.FLOOR
103
+ # Bounded in-memory buffer between the hot path and the worker. On overflow
104
+ # the newest record is dropped and the dropped-counter increments -- the
105
+ # live request is never blocked.
106
+ buffer_capacity: int = 4096
107
+ # Flush the buffer at least this often, in seconds.
108
+ flush_interval: float = 2.0
109
+ # Flush eagerly once this many records are buffered.
110
+ flush_max_batch: int = 256
111
+
112
+ def __post_init__(self) -> None:
113
+ # The wire field is constrained to 1..=40 chars; keep the SDK honest so
114
+ # a long custom version can't trip an HTTP 400 at the server.
115
+ if len(self.client_version) > 40:
116
+ self.client_version = self.client_version[:40]
117
+
118
+ def with_remote(self, base_url: str, raw: bool = False) -> "Config":
119
+ """Ship directly to the modelstat server instead of a local daemon.
120
+
121
+ ``raw = True`` opts into server-side summarization of full
122
+ (floor-redacted) turns. Returns ``self`` for chaining.
123
+ """
124
+ self.mode = Mode.remote(base_url, raw)
125
+ return self
126
+
127
+ def with_device_id(self, device_id: str) -> "Config":
128
+ """Override the device id. Returns ``self`` for chaining."""
129
+ self.device_id = device_id
130
+ return self
131
+
132
+ def sends_full_turns(self) -> bool:
133
+ """Whether this mode sends full (untruncated) redacted turns for
134
+ server-side summarization (remote + raw)."""
135
+ return self.mode.kind == "remote" and self.mode.raw
modelstat/py.typed ADDED
File without changes