modelstat-sdk 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelstat_sdk-0.0.1/.gitignore +9 -0
- modelstat_sdk-0.0.1/PKG-INFO +158 -0
- modelstat_sdk-0.0.1/README.md +132 -0
- modelstat_sdk-0.0.1/pyproject.toml +40 -0
- modelstat_sdk-0.0.1/src/modelstat/__init__.py +94 -0
- modelstat_sdk-0.0.1/src/modelstat/_version.py +8 -0
- modelstat_sdk-0.0.1/src/modelstat/capture.py +264 -0
- modelstat_sdk-0.0.1/src/modelstat/client.py +72 -0
- modelstat_sdk-0.0.1/src/modelstat/config.py +135 -0
- modelstat_sdk-0.0.1/src/modelstat/py.typed +0 -0
- modelstat_sdk-0.0.1/src/modelstat/redact.py +150 -0
- modelstat_sdk-0.0.1/src/modelstat/transport.py +97 -0
- modelstat_sdk-0.0.1/src/modelstat/wire.py +344 -0
- modelstat_sdk-0.0.1/src/modelstat/worker.py +183 -0
- modelstat_sdk-0.0.1/tests/test_capture.py +154 -0
- modelstat_sdk-0.0.1/tests/test_client.py +99 -0
- modelstat_sdk-0.0.1/tests/test_redact.py +143 -0
- modelstat_sdk-0.0.1/tests/test_wire.py +88 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: modelstat-sdk
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Privacy-first SDK for modelstat — wrap your backend LLM calls and ship redacted usage to a local daemon or the modelstat server, without touching live-request latency.
|
|
5
|
+
Project-URL: Homepage, https://modelstat.ai
|
|
6
|
+
Project-URL: Repository, https://github.com/modelstat/modelstat
|
|
7
|
+
Author: modelstat
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
Keywords: ai,llm,observability,redaction,telemetry
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Topic :: System :: Monitoring
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: blake3
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# modelstat
|
|
28
|
+
|
|
29
|
+
**Wrap your backend's LLM calls and get spend + usage analytics — while your prompts stay on your own machine.**
|
|
30
|
+
|
|
31
|
+
`modelstat-sdk` is a privacy-first Python SDK. It captures the LLM calls your backend already makes and hands them to a **local modelstat daemon**, which **summarizes them on your machine with a local model** and ships only short, **redacted abstracts** to the modelstat analytics server. Raw prompts, completions, and tool arguments **never leave your infrastructure**.
|
|
32
|
+
|
|
33
|
+
```text
|
|
34
|
+
your backend your machine modelstat
|
|
35
|
+
┌──────────────┐ loopback ┌──────────────────────┐ HTTPS ┌───────────────┐
|
|
36
|
+
│ ms.record() │ ───────────────▶ │ modelstat daemon │ ─────────▶ │ analytics │
|
|
37
|
+
│ (non-block) │ raw stays here │ • local model │ redacted │ dashboard │
|
|
38
|
+
└──────────────┘ │ → summarize │ abstract │ (spend, by │
|
|
39
|
+
▲ │ • redact (PII/keys) │ + tokens │ project/etc) │
|
|
40
|
+
real LLM call │ • batch + retry │ └───────────────┘
|
|
41
|
+
└──────────────────────┘
|
|
42
|
+
↑ raw prompts / completions / args never cross this line ↑
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Why a local daemon?
|
|
46
|
+
|
|
47
|
+
- **Privacy by construction.** Summarization happens **on your machine**. Only a bounded, redacted abstract + token/cost numbers are uploaded — never raw text. That's what gives you content-level attribution (by project, feature, work-type) *without* sending content to a vendor.
|
|
48
|
+
- **No added request latency.** `record()` is a non-blocking enqueue into an in-memory buffer; a background worker **thread** handles redaction, the daemon hand-off, batching, and shipping entirely off your request path. If the buffer fills, the newest record is dropped and a counter ticks up — your request is **never** blocked.
|
|
49
|
+
- **One daemon, many producers.** Every service instance points at the same local daemon; the daemon owns the local model, durable retry, and the upload. Your app stays a thin, dependency-light client (one runtime dependency: `blake3`).
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install modelstat-sdk
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
import modelstat
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
The import package is `modelstat`; the distribution on PyPI is `modelstat-sdk`. Requires Python 3.9+.
|
|
62
|
+
|
|
63
|
+
## Guide: run a daemon locally, then point the SDK at it
|
|
64
|
+
|
|
65
|
+
### 1. Run the modelstat daemon
|
|
66
|
+
|
|
67
|
+
The daemon is the open-source `modelstat` daemon. It runs as a background service, downloads a small local model on first start, and listens on loopback for SDK traffic.
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# zero-install: starts the background service + fetches the local model
|
|
71
|
+
npx modelstat@latest
|
|
72
|
+
|
|
73
|
+
# …or install it globally
|
|
74
|
+
npm i -g modelstat && modelstat start
|
|
75
|
+
|
|
76
|
+
modelstat status # confirm it's running (and which loopback port it uses)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
By default the daemon listens on `http://127.0.0.1:4319`.
|
|
80
|
+
|
|
81
|
+
### 2. Point the SDK at the daemon
|
|
82
|
+
|
|
83
|
+
Local-daemon mode is the **default** — supply your org ingest key and an agent label and you're pointed at the local daemon already:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from modelstat import Client, Config
|
|
87
|
+
|
|
88
|
+
cfg = Config("msk_live_…", "raw_sdk_openai") # defaults to the local daemon
|
|
89
|
+
ms = Client(cfg)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Changed the daemon's port? Set the mode explicitly:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from modelstat import Config, Mode
|
|
96
|
+
|
|
97
|
+
cfg = Config("msk_live_…", "raw_sdk_openai")
|
|
98
|
+
cfg.mode = Mode.local_daemon("http://127.0.0.1:4319/v1/ingest")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### 3. Record your calls
|
|
102
|
+
|
|
103
|
+
After each real LLM call returns, hand the SDK what it already has. `record()` is non-blocking; use the client as a context manager so it flushes on the way out:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from modelstat import Client, Config, LlmCall, TokenUsage
|
|
107
|
+
|
|
108
|
+
cfg = Config("msk_live_…", "raw_sdk_openai")
|
|
109
|
+
|
|
110
|
+
with Client(cfg) as ms: # shutdown() flushes on exit
|
|
111
|
+
ms.record(
|
|
112
|
+
LlmCall("openai", "session-or-trace-id") # provider, grouping id
|
|
113
|
+
.model_("gpt-x")
|
|
114
|
+
.with_tokens(TokenUsage(input=800, output=120))
|
|
115
|
+
.text("the prompt", "the completion") # raw — summarized locally, never uploaded raw
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
You can also construct an `LlmCall` with plain keyword arguments
|
|
120
|
+
(`LlmCall(provider="openai", session_id="…", model="gpt-x", tokens=TokenUsage(input=800))`).
|
|
121
|
+
|
|
122
|
+
Call `ms.flush()` to block until buffered calls are shipped, `ms.shutdown()` to flush and stop the worker thread, and `ms.dropped()` to read the overflow counter.
|
|
123
|
+
|
|
124
|
+
**What flows where:** your prompt + completion go to the **local daemon only**. The daemon summarizes them with its local model, redacts, and uploads just the abstract + token/cost metadata to modelstat. The `agent` label (`raw_sdk_openai`) records which integration produced the calls; `session_id` groups calls into a conversation/session downstream.
|
|
125
|
+
|
|
126
|
+
## Modes
|
|
127
|
+
|
|
128
|
+
| Mode | Where summarization runs | What leaves your machine | Use when |
|
|
129
|
+
|---|---|---|---|
|
|
130
|
+
| **Local daemon** *(default)* | Your machine (daemon's local model) | Redacted abstract + metadata only | Maximum privacy; a daemon can run on/near the host |
|
|
131
|
+
| **Remote** | modelstat server | Floor-redacted full turns (`raw=True`), or just the ≤320-char redacted excerpt (`raw=False`) | Serverless / can't run a local model; you accept server-side summarization |
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
# Remote (no local daemon / no local model):
|
|
135
|
+
cfg = Config("msk_live_…", "raw_sdk_openai").with_remote(
|
|
136
|
+
"https://api.modelstat.ai", raw=True
|
|
137
|
+
)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Privacy floor (always on)
|
|
141
|
+
|
|
142
|
+
Before any bytes leave the SDK process — in **every** mode — an in-process redaction floor scrubs secrets (provider keys, tokens, JWTs, PEM blocks, DB passwords, …), emails, and absolute home paths. "Raw" mode means *full turns*, not *leaked credentials* — the floor still runs. Tool calls ship only hashes, byte sizes, and allowlisted command verbs — never raw args, results, paths, or command text.
|
|
143
|
+
|
|
144
|
+
What the floor redacts: Anthropic / OpenAI / Google / AWS / GitHub / Slack / Stripe / Discord keys and tokens, JWTs, PEM private-key blocks, modelstat device secrets, generic `NAME_KEY=value` env secrets (the name is kept, the value is dropped), `Bearer` tokens, database-URL passwords, lone 40-char AWS-style secret blobs, email addresses, and absolute `/Users/…`, `/home/…`, and `C:\Users\…` paths.
|
|
145
|
+
|
|
146
|
+
## What's live today (v0.0.1)
|
|
147
|
+
|
|
148
|
+
Early release — the honest state, so nothing surprises you:
|
|
149
|
+
|
|
150
|
+
- ✅ **SDK**: zero-latency capture, the redaction floor, batching/backpressure, and both transports are implemented and tested.
|
|
151
|
+
- 🚧 **Daemon loopback ingest** (the receiving side of local-daemon mode) is in active development. The daemon already runs a local model and summarizes today; the SDK-push endpoint is landing next. **Until it ships, use remote mode** — the local-daemon API is stable, so your code won't change when it does.
|
|
152
|
+
- 🚧 **`/v1/ingest/raw`** (server-side summarization for `raw=True`) is rolling out; `raw=False` against `/v1/ingest` works today for token/cost telemetry.
|
|
153
|
+
|
|
154
|
+
Progress: https://github.com/modelstat/modelstat
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
Apache-2.0.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# modelstat
|
|
2
|
+
|
|
3
|
+
**Wrap your backend's LLM calls and get spend + usage analytics — while your prompts stay on your own machine.**
|
|
4
|
+
|
|
5
|
+
`modelstat-sdk` is a privacy-first Python SDK. It captures the LLM calls your backend already makes and hands them to a **local modelstat daemon**, which **summarizes them on your machine with a local model** and ships only short, **redacted abstracts** to the modelstat analytics server. Raw prompts, completions, and tool arguments **never leave your infrastructure**.
|
|
6
|
+
|
|
7
|
+
```text
|
|
8
|
+
your backend your machine modelstat
|
|
9
|
+
┌──────────────┐ loopback ┌──────────────────────┐ HTTPS ┌───────────────┐
|
|
10
|
+
│ ms.record() │ ───────────────▶ │ modelstat daemon │ ─────────▶ │ analytics │
|
|
11
|
+
│ (non-block) │ raw stays here │ • local model │ redacted │ dashboard │
|
|
12
|
+
└──────────────┘ │ → summarize │ abstract │ (spend, by │
|
|
13
|
+
▲ │ • redact (PII/keys) │ + tokens │ project/etc) │
|
|
14
|
+
real LLM call │ • batch + retry │ └───────────────┘
|
|
15
|
+
└──────────────────────┘
|
|
16
|
+
↑ raw prompts / completions / args never cross this line ↑
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Why a local daemon?
|
|
20
|
+
|
|
21
|
+
- **Privacy by construction.** Summarization happens **on your machine**. Only a bounded, redacted abstract + token/cost numbers are uploaded — never raw text. That's what gives you content-level attribution (by project, feature, work-type) *without* sending content to a vendor.
|
|
22
|
+
- **No added request latency.** `record()` is a non-blocking enqueue into an in-memory buffer; a background worker **thread** handles redaction, the daemon hand-off, batching, and shipping entirely off your request path. If the buffer fills, the newest record is dropped and a counter ticks up — your request is **never** blocked.
|
|
23
|
+
- **One daemon, many producers.** Every service instance points at the same local daemon; the daemon owns the local model, durable retry, and the upload. Your app stays a thin, dependency-light client (one runtime dependency: `blake3`).
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install modelstat-sdk
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import modelstat
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
The import package is `modelstat`; the distribution on PyPI is `modelstat-sdk`. Requires Python 3.9+.
|
|
36
|
+
|
|
37
|
+
## Guide: run a daemon locally, then point the SDK at it
|
|
38
|
+
|
|
39
|
+
### 1. Run the modelstat daemon
|
|
40
|
+
|
|
41
|
+
The daemon is the open-source `modelstat` daemon. It runs as a background service, downloads a small local model on first start, and listens on loopback for SDK traffic.
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# zero-install: starts the background service + fetches the local model
|
|
45
|
+
npx modelstat@latest
|
|
46
|
+
|
|
47
|
+
# …or install it globally
|
|
48
|
+
npm i -g modelstat && modelstat start
|
|
49
|
+
|
|
50
|
+
modelstat status # confirm it's running (and which loopback port it uses)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
By default the daemon listens on `http://127.0.0.1:4319`.
|
|
54
|
+
|
|
55
|
+
### 2. Point the SDK at the daemon
|
|
56
|
+
|
|
57
|
+
Local-daemon mode is the **default** — supply your org ingest key and an agent label and you're pointed at the local daemon already:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from modelstat import Client, Config
|
|
61
|
+
|
|
62
|
+
cfg = Config("msk_live_…", "raw_sdk_openai") # defaults to the local daemon
|
|
63
|
+
ms = Client(cfg)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Changed the daemon's port? Set the mode explicitly:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from modelstat import Config, Mode
|
|
70
|
+
|
|
71
|
+
cfg = Config("msk_live_…", "raw_sdk_openai")
|
|
72
|
+
cfg.mode = Mode.local_daemon("http://127.0.0.1:4319/v1/ingest")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 3. Record your calls
|
|
76
|
+
|
|
77
|
+
After each real LLM call returns, hand the SDK what it already has. `record()` is non-blocking; use the client as a context manager so it flushes on the way out:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from modelstat import Client, Config, LlmCall, TokenUsage
|
|
81
|
+
|
|
82
|
+
cfg = Config("msk_live_…", "raw_sdk_openai")
|
|
83
|
+
|
|
84
|
+
with Client(cfg) as ms: # shutdown() flushes on exit
|
|
85
|
+
ms.record(
|
|
86
|
+
LlmCall("openai", "session-or-trace-id") # provider, grouping id
|
|
87
|
+
.model_("gpt-x")
|
|
88
|
+
.with_tokens(TokenUsage(input=800, output=120))
|
|
89
|
+
.text("the prompt", "the completion") # raw — summarized locally, never uploaded raw
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
You can also construct an `LlmCall` with plain keyword arguments
|
|
94
|
+
(`LlmCall(provider="openai", session_id="…", model="gpt-x", tokens=TokenUsage(input=800))`).
|
|
95
|
+
|
|
96
|
+
Call `ms.flush()` to block until buffered calls are shipped, `ms.shutdown()` to flush and stop the worker thread, and `ms.dropped()` to read the overflow counter.
|
|
97
|
+
|
|
98
|
+
**What flows where:** your prompt + completion go to the **local daemon only**. The daemon summarizes them with its local model, redacts, and uploads just the abstract + token/cost metadata to modelstat. The `agent` label (`raw_sdk_openai`) records which integration produced the calls; `session_id` groups calls into a conversation/session downstream.
|
|
99
|
+
|
|
100
|
+
## Modes
|
|
101
|
+
|
|
102
|
+
| Mode | Where summarization runs | What leaves your machine | Use when |
|
|
103
|
+
|---|---|---|---|
|
|
104
|
+
| **Local daemon** *(default)* | Your machine (daemon's local model) | Redacted abstract + metadata only | Maximum privacy; a daemon can run on/near the host |
|
|
105
|
+
| **Remote** | modelstat server | Floor-redacted full turns (`raw=True`), or just the ≤320-char redacted excerpt (`raw=False`) | Serverless / can't run a local model; you accept server-side summarization |
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
# Remote (no local daemon / no local model):
|
|
109
|
+
cfg = Config("msk_live_…", "raw_sdk_openai").with_remote(
|
|
110
|
+
"https://api.modelstat.ai", raw=True
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Privacy floor (always on)
|
|
115
|
+
|
|
116
|
+
Before any bytes leave the SDK process — in **every** mode — an in-process redaction floor scrubs secrets (provider keys, tokens, JWTs, PEM blocks, DB passwords, …), emails, and absolute home paths. "Raw" mode means *full turns*, not *leaked credentials* — the floor still runs. Tool calls ship only hashes, byte sizes, and allowlisted command verbs — never raw args, results, paths, or command text.
|
|
117
|
+
|
|
118
|
+
What the floor redacts: Anthropic / OpenAI / Google / AWS / GitHub / Slack / Stripe / Discord keys and tokens, JWTs, PEM private-key blocks, modelstat device secrets, generic `NAME_KEY=value` env secrets (the name is kept, the value is dropped), `Bearer` tokens, database-URL passwords, lone 40-char AWS-style secret blobs, email addresses, and absolute `/Users/…`, `/home/…`, and `C:\Users\…` paths.
|
|
119
|
+
|
|
120
|
+
## What's live today (v0.0.1)
|
|
121
|
+
|
|
122
|
+
Early release — the honest state, so nothing surprises you:
|
|
123
|
+
|
|
124
|
+
- ✅ **SDK**: zero-latency capture, the redaction floor, batching/backpressure, and both transports are implemented and tested.
|
|
125
|
+
- 🚧 **Daemon loopback ingest** (the receiving side of local-daemon mode) is in active development. The daemon already runs a local model and summarizes today; the SDK-push endpoint is landing next. **Until it ships, use remote mode** — the local-daemon API is stable, so your code won't change when it does.
|
|
126
|
+
- 🚧 **`/v1/ingest/raw`** (server-side summarization for `raw=True`) is rolling out; `raw=False` against `/v1/ingest` works today for token/cost telemetry.
|
|
127
|
+
|
|
128
|
+
Progress: https://github.com/modelstat/modelstat
|
|
129
|
+
|
|
130
|
+
## License
|
|
131
|
+
|
|
132
|
+
Apache-2.0.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "modelstat-sdk"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Privacy-first SDK for modelstat — wrap your backend LLM calls and ship redacted usage to a local daemon or the modelstat server, without touching live-request latency."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
keywords = ["llm", "observability", "telemetry", "redaction", "ai"]
|
|
13
|
+
authors = [{ name = "modelstat" }]
|
|
14
|
+
dependencies = ["blake3"]
|
|
15
|
+
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: Apache Software License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Topic :: Software Development :: Libraries",
|
|
28
|
+
"Topic :: System :: Monitoring",
|
|
29
|
+
"Typing :: Typed",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://modelstat.ai"
|
|
34
|
+
Repository = "https://github.com/modelstat/modelstat"
|
|
35
|
+
|
|
36
|
+
[tool.hatch.version]
|
|
37
|
+
path = "src/modelstat/_version.py"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/modelstat"]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""modelstat -- a privacy-first SDK for wrapping the LLM calls your backend
|
|
2
|
+
already makes and shipping **redacted** usage to modelstat, without adding
|
|
3
|
+
latency to live requests.
|
|
4
|
+
|
|
5
|
+
The hot path (:meth:`Client.record`) does nothing but copy your already-in-hand
|
|
6
|
+
call into a bounded buffer and return. A background worker thread redacts,
|
|
7
|
+
batches, and ships off the request path. On overflow the newest record is
|
|
8
|
+
dropped and a counter increments -- your request is never blocked and never
|
|
9
|
+
grows memory unbounded.
|
|
10
|
+
|
|
11
|
+
Modes
|
|
12
|
+
-----
|
|
13
|
+
* **Local daemon (default).** Hand calls to a local modelstat daemon over
|
|
14
|
+
loopback; it summarizes with a local Qwen model and ships only redacted
|
|
15
|
+
abstracts. Raw text never leaves the machine.
|
|
16
|
+
* **Remote.** Ship directly to the modelstat server (no local model). With
|
|
17
|
+
``raw=True``, send full floor-redacted turns for server-side summarization.
|
|
18
|
+
|
|
19
|
+
Example
|
|
20
|
+
-------
|
|
21
|
+
.. code-block:: python
|
|
22
|
+
|
|
23
|
+
from modelstat import Client, Config, LlmCall, TokenUsage
|
|
24
|
+
|
|
25
|
+
# Org-scoped ingest key binds traffic to your account; remote mode here.
|
|
26
|
+
cfg = Config("msk_live_...", "raw_sdk_openai").with_remote(
|
|
27
|
+
"https://api.modelstat.ai", raw=True
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
with Client(cfg) as ms: # shutdown() flushes on the way out
|
|
31
|
+
# ... after your real LLM call returns ...
|
|
32
|
+
ms.record(
|
|
33
|
+
LlmCall("openai", "session-or-trace-id")
|
|
34
|
+
.model_("gpt-x")
|
|
35
|
+
.with_tokens(TokenUsage(input=800, output=120))
|
|
36
|
+
.text("the prompt", "the completion")
|
|
37
|
+
)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from __future__ import annotations
|
|
41
|
+
|
|
42
|
+
from ._version import __version__
|
|
43
|
+
from .capture import LlmCall, ToolCallInput, build_batch
|
|
44
|
+
from .client import Client
|
|
45
|
+
from .config import DEFAULT_DAEMON_URL, Config, Mode, RedactionPolicy
|
|
46
|
+
from .redact import Redacted, redact
|
|
47
|
+
from .transport import FakeTransport, HttpTransport, Transport, TransportError
|
|
48
|
+
from .wire import (
|
|
49
|
+
BillingMode,
|
|
50
|
+
EventKind,
|
|
51
|
+
GitContext,
|
|
52
|
+
IngestBatch,
|
|
53
|
+
RawEvent,
|
|
54
|
+
TokenUsage,
|
|
55
|
+
ToolCallStatus,
|
|
56
|
+
ToolCallWire,
|
|
57
|
+
batch_id,
|
|
58
|
+
content_hash,
|
|
59
|
+
source_event_id,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
"__version__",
|
|
64
|
+
# client + config
|
|
65
|
+
"Client",
|
|
66
|
+
"Config",
|
|
67
|
+
"Mode",
|
|
68
|
+
"RedactionPolicy",
|
|
69
|
+
"DEFAULT_DAEMON_URL",
|
|
70
|
+
# capture
|
|
71
|
+
"LlmCall",
|
|
72
|
+
"ToolCallInput",
|
|
73
|
+
"build_batch",
|
|
74
|
+
# redaction
|
|
75
|
+
"redact",
|
|
76
|
+
"Redacted",
|
|
77
|
+
# transports
|
|
78
|
+
"Transport",
|
|
79
|
+
"HttpTransport",
|
|
80
|
+
"FakeTransport",
|
|
81
|
+
"TransportError",
|
|
82
|
+
# wire
|
|
83
|
+
"IngestBatch",
|
|
84
|
+
"RawEvent",
|
|
85
|
+
"ToolCallWire",
|
|
86
|
+
"TokenUsage",
|
|
87
|
+
"GitContext",
|
|
88
|
+
"EventKind",
|
|
89
|
+
"BillingMode",
|
|
90
|
+
"ToolCallStatus",
|
|
91
|
+
"content_hash",
|
|
92
|
+
"source_event_id",
|
|
93
|
+
"batch_id",
|
|
94
|
+
]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Single source of truth for the package version.
|
|
2
|
+
|
|
3
|
+
Read both at runtime (to build ``Config.client_version`` -> the wire
|
|
4
|
+
``daemon_version``) and by hatchling at build time (see ``pyproject.toml``'s
|
|
5
|
+
``[tool.hatch.version]``), so the two can never drift.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.0.1"
|