scopecall-py 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scopecall/__init__.py ADDED
@@ -0,0 +1,81 @@
1
+ """ScopeCall — source-available, self-hostable AI observability for Python.
2
+
3
+ Quick start:
4
+
5
+ import scopecall
6
+ from openai import OpenAI
7
+
8
+ sdk = scopecall.init(
9
+ api_key="sc_live_xxx",
10
+ endpoint="http://localhost:8080/v1/ingest",
11
+ )
12
+
13
+ # Native OpenAI / Anthropic instrumentation:
14
+ openai_client = sdk.instrument(OpenAI())
15
+
16
+ with sdk.trace("support-agent", user_id="user_123") as ctx:
17
+ response = openai_client.chat.completions.create(
18
+ model="gpt-4o-mini",
19
+ messages=[{"role": "user", "content": "Help me with my refund"}],
20
+ )
21
+
22
+ # Manual API (LangChain / LlamaIndex / RAG / custom wrappers):
23
+ with sdk.trace("custom-agent", user_id="user_456"):
24
+ sdk.record_llm_call(
25
+ model="gpt-4o-mini",
26
+ provider="openai",
27
+ input_tokens=120, output_tokens=48,
28
+ latency_ms=842,
29
+ input_text="Help me with my refund",
30
+ output_text="...",
31
+ )
32
+
33
+ sdk.close() # graceful shutdown — flushes the queue
34
+
35
+
36
+ API surface:
37
+
38
+ init(...) → ScopeCallSDK instance
39
+ ScopeCallSDK → trace(name) / workflow(name) /
40
+ instrument(client, provider="openai"|"anthropic") /
41
+ record_llm_call(...) / add_redaction_pattern(...) /
42
+ flush() / close()
43
+ ScopeCallConfig → typed config dataclass for dependency-injection style
44
+ ConfigError → raised when init() gets an invalid config
45
+ LLMEvent → wire-format dataclass (advanced — usually emitted
46
+ for you by record_llm_call or the instrumentations)
47
+
48
+
49
+ Migrating from scopecall v0.1.x:
50
+
51
+ v0.1 used module-level globals (`scopecall.init(); scopecall.trace(...)`).
52
+ v0.2 returns an instance from `init()`. The two changes most likely to
53
+ break callers:
54
+
55
+ OLD: scopecall.init(api_key="...") # module-level
56
+ with scopecall.trace(feature="x"):
57
+ ...
58
+
59
+ NEW: sdk = scopecall.init(api_key="...", # endpoint REQUIRED now
60
+ endpoint="http://localhost:8080/v1/ingest")
61
+ with sdk.trace("x"): # name is positional
62
+ ...
63
+
64
+ See CHANGELOG.md → v0.2.0 for the full migration guide.
65
+ """
66
+
67
+ from ._config import ConfigError, ScopeCallConfig
68
+ from ._context import TraceContext
69
+ from ._sdk import ScopeCallSDK, init
70
+ from ._version import __version__
71
+ from .wire._event import LLMEvent
72
+
73
+ __all__ = [
74
+ "init",
75
+ "ScopeCallSDK",
76
+ "ScopeCallConfig",
77
+ "ConfigError",
78
+ "TraceContext",
79
+ "LLMEvent",
80
+ "__version__",
81
+ ]
scopecall/_config.py ADDED
@@ -0,0 +1,129 @@
1
+ """SDK configuration.
2
+
3
+ Matches the TS SDK's `ScopeCallConfig` shape (sdks/typescript/src/config.ts)
4
+ field-for-field where it makes sense. Naming follows Python conventions
5
+ (`snake_case`, `bool` defaults) — the field set itself is parity.
6
+
7
+ Round-8 review made `endpoint` required when `api_key` is set: a missing
8
+ endpoint used to silently default to https://ingest.scopecall.com/v1/ingest
9
+ which doesn't exist yet (hosted Cloud isn't live). Python now follows the
10
+ same contract — fail loud with a `ConfigError` that names the fix.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+
17
+
18
+ class ConfigError(ValueError):
19
+ """Raised by `init(...)` when the config is internally inconsistent.
20
+
21
+ Subclasses `ValueError` so existing try/except blocks that catch the
22
+ base class still work; the named subclass lets careful callers
23
+ distinguish config errors from other ValueError sources.
24
+ """
25
+
26
+
27
+ @dataclass
28
+ class ScopeCallConfig:
29
+ # ── Transport selection ──────────────────────────────────────────────
30
+ # Exactly one of api_key / output / debug must be set. Mirrors TS.
31
+ api_key: str | None = None
32
+
33
+ # `endpoint` is REQUIRED when api_key is set (Round-8 review). For
34
+ # self-hosted, point at the Rust ingest URL, e.g.
35
+ # http://localhost:8080/v1/ingest. For hosted Cloud — not yet live —
36
+ # this default will be reintroduced.
37
+ endpoint: str | None = None
38
+
39
+ # Debug mode pretty-prints to stdout instead of shipping events.
40
+ # Useful during integration. Overrides api_key + output.
41
+ debug: bool = False
42
+
43
+ # File mode appends NDJSON events to the given path. Useful for local
44
+ # batch capture without a running ingest service.
45
+ output: str | None = None
46
+
47
+ # ── Behavior ─────────────────────────────────────────────────────────
48
+ environment: str = "production"
49
+ redact_pii: bool = True
50
+ capture_content: bool = True
51
+
52
+ # ── Auto-flush ───────────────────────────────────────────────────────
53
+ # Background thread flushes the queue this often (seconds). 5 s aligns
54
+ # with the TS SDK's flushIntervalMs=5000 default. The first-run UI's
55
+ # 3 s pre-first-call poll cadence is intentionally faster than this
56
+ # so the dashboard catches the first trace within ~8 s end-to-end.
57
+ flush_interval: float = 5.0
58
+ batch_size: int = 50
59
+ queue_max_size: int = 10_000
60
+ max_retries: int = 3
61
+
62
+ # ── Off-switch ───────────────────────────────────────────────────────
63
+ # When True, `init()` returns a no-op SDK that swallows every call.
64
+ # Useful in tests that import production code paths but don't want
65
+ # network IO. Mirrors TS `ScopeCallConfig.disabled`.
66
+ disabled: bool = False
67
+
68
+ # ── Defaults applied to every event ──────────────────────────────────
69
+ # Each of these is overridable per-trace via sdk.trace(...).
70
+ default_feature: str | None = None
71
+ default_user_id: str | None = None
72
+ default_session_id: str | None = None
73
+
74
+ # Round-4 review (TS): default_prompt_version tags every call with a
75
+ # build/commit/release identifier when the app has a single canonical
76
+ # prompt set. Per-trace prompt_version wins, then parent trace's
77
+ # value, then this default, then None.
78
+ default_prompt_version: str | None = None
79
+
80
+ def __post_init__(self) -> None:
81
+ # Serverless guard: a zero or negative interval would spin the
82
+ # flush thread. Clamp to 0.1 s rather than reject — the user
83
+ # probably meant "flush often" and we want to be forgiving.
84
+ if self.flush_interval <= 0:
85
+ self.flush_interval = 0.1
86
+
87
+ @property
88
+ def mode(self) -> str:
89
+ """Which transport `init()` should select for this config."""
90
+ if self.disabled:
91
+ return "noop"
92
+ if self.debug:
93
+ return "console"
94
+ if self.output:
95
+ return "file"
96
+ return "api"
97
+
98
+
99
+ def validate(config: ScopeCallConfig) -> None:
100
+ """Raise ConfigError if the config can't possibly produce a working SDK.
101
+
102
+ Three valid configurations:
103
+ 1. debug=True → console mode (no api_key needed)
104
+ 2. output=<path> → file mode (no api_key needed)
105
+ 3. api_key + endpoint → HTTP mode (BOTH required since Round-8)
106
+
107
+ `disabled=True` shorts the entire SDK to no-ops; we don't bother
108
+ validating in that case because the SDK never sends anything anyway.
109
+ """
110
+ if config.disabled:
111
+ return
112
+ if config.debug:
113
+ return
114
+ if config.output:
115
+ return
116
+ if not config.api_key:
117
+ raise ConfigError(
118
+ "scopecall.init() requires one of: api_key=..., debug=True, or output=<path>."
119
+ )
120
+ # Round-8: endpoint is now required alongside api_key. No silent
121
+ # fallback to a hosted-Cloud URL that doesn't exist yet.
122
+ if not config.endpoint:
123
+ raise ConfigError(
124
+ "scopecall.init(api_key=...) requires endpoint=... "
125
+ "Self-hosted: point at your ingest service, e.g. "
126
+ "endpoint='http://localhost:8080/v1/ingest'. "
127
+ "(ScopeCall Cloud is not yet available; a managed default "
128
+ "endpoint will return in a future release.)"
129
+ )
scopecall/_context.py ADDED
@@ -0,0 +1,131 @@
1
+ """Trace context — `contextvars` propagation so nested `sdk.trace()` blocks
2
+ chain correctly through sync code, async code, FastAPI request handlers,
3
+ asyncio tasks, and background workers.
4
+
5
+ Why `contextvars` (PEP 567) and not threadlocals: thread-locals don't
6
+ propagate across `await` boundaries by default. `contextvars.ContextVar`
7
+ DOES propagate across `await` and into `asyncio.create_task()`, which is
8
+ the table-stakes property for any AI backend using `AsyncOpenAI` /
9
+ `AsyncAnthropic`. The reviewer correctly called this out as a P0.
10
+
11
+ Each `sdk.trace(name)` call:
12
+
13
+ 1. Generates a new `span_id` for itself.
14
+ 2. Reads the current `_current_trace` ContextVar (if any) to find the
15
+ parent's `trace_id` + `span_id`. If there is one, inherit
16
+ `trace_id`; otherwise mint a fresh one.
17
+ 3. Sets the new `TraceContext` as `_current_trace` for the body of the
18
+ block.
19
+ 4. Resets `_current_trace` on exit so nesting unwinds cleanly.
20
+
21
+ The block ALSO emits a synthetic workflow event on exit — see
22
+ `scopecall._sdk.ScopeCallSDK.trace` for the call site. The event is
23
+ what the dashboard's Flow Map and trace tree render as the parent
24
+ "workflow" node. Without it, child LLM rows would have a
25
+ `parent_span_id` that points at nothing in ClickHouse, and the
26
+ flow-map JOIN finds no parent.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import uuid
32
+ from contextvars import ContextVar
33
+ from dataclasses import dataclass, field
34
+
35
+
36
+ @dataclass
37
+ class TraceContext:
38
+ """The state a single `sdk.trace()` block carries.
39
+
40
+ The instance is what `with sdk.trace(...) as ctx:` yields — users can
41
+ read these fields to add custom span IDs, parent linkage etc. in
42
+ bespoke instrumentation.
43
+ """
44
+
45
+ # Stable across the whole trace tree (one trace = many spans).
46
+ trace_id: str
47
+
48
+ # Unique per `sdk.trace()` block. Children inside reference this as
49
+ # their `parent_span_id`.
50
+ span_id: str
51
+
52
+ # The PARENT trace's span_id, if this block is nested inside another
53
+ # `sdk.trace()`. None at the root.
54
+ parent_span_id: str | None
55
+
56
+ # The block's human label. Doubles as the default feature_name on the
57
+ # synthetic workflow event we emit on block exit. The reviewer's
58
+ # FastAPI example was `sdk.trace("chat-api", ...)` — that string ends
59
+ # up as feature_name='chat-api' on the workflow row.
60
+ name: str | None
61
+
62
+ # Per-trace prompt_version. None at this level means "inherit from
63
+ # config.default_prompt_version". The TS SDK does the same precedence:
64
+ # trace's value → parent trace's value → config default → None.
65
+ prompt_version: str | None = None
66
+
67
+ # Per-trace overrides for user/session/feature. None means "inherit
68
+ # config defaults at event-emission time."
69
+ user_id: str | None = None
70
+ session_id: str | None = None
71
+ feature_name: str | None = None
72
+
73
+ # Wall-clock start time (ms epoch). Used to compute the workflow
74
+ # span's latency when the block exits.
75
+ start_time_ms: float = field(default=0.0)
76
+
77
+
78
+ # Module-level ContextVar. The reset-token pattern below is what
79
+ # guarantees nested traces unwind in the right order even when the user
80
+ # raises an exception inside the body.
81
+ _current_trace: ContextVar[TraceContext | None] = ContextVar(
82
+ "scopecall_current_trace", default=None
83
+ )
84
+
85
+
86
+ def get_current() -> TraceContext | None:
87
+ """Return the innermost active TraceContext, or None at the root.
88
+
89
+ Provider instrumentations (chunk 2) call this to discover the parent
90
+ span for an outgoing LLM event. Manual API helpers (`sdk.span`,
91
+ `sdk.record_llm_call`) do the same.
92
+ """
93
+ return _current_trace.get()
94
+
95
+
96
+ def push(ctx: TraceContext) -> object:
97
+ """Set `ctx` as the current trace and return a token.
98
+
99
+ The caller is responsible for `pop(token)` in a finally block. The
100
+ SDK's `trace()` context manager does this — manual callers usually
101
+ don't need to touch push/pop directly.
102
+ """
103
+ return _current_trace.set(ctx)
104
+
105
+
106
+ def pop(token: object) -> None:
107
+ """Restore the previous TraceContext using the token from `push`.
108
+
109
+ `ContextVar.reset()` is the right primitive here because it's
110
+ exception-safe: reseting always succeeds even if the token's
111
+ var-binding was overridden by intermediate `set` calls in the
112
+ interim. We type the parameter as `object` because the actual
113
+ `Token` class isn't easily constructible in user code and exposing
114
+ it would invite accidental forgery.
115
+ """
116
+ _current_trace.reset(token) # type: ignore[arg-type]
117
+
118
+
119
+ def new_span_id() -> str:
120
+ """Mint a 16-hex-char span ID (matches the OTel + TS SDK convention).
121
+
122
+ OTel uses 8-byte span IDs rendered as 16 hex chars. We follow the
123
+ same shape so trace IDs are interoperable when an OTel bridge ships
124
+ in v0.2.x.
125
+ """
126
+ return uuid.uuid4().hex[:16]
127
+
128
+
129
+ def new_trace_id() -> str:
130
+ """Mint a 32-hex-char trace ID (matches OTel + TS SDK convention)."""
131
+ return uuid.uuid4().hex
scopecall/_exporter.py ADDED
@@ -0,0 +1,273 @@
1
+ """Background exporter — circular buffer + auto-flush + HTTP transport.
2
+
3
+ Architecturally identical to the TS SDK's
4
+ `sdks/typescript/src/exporter.ts`:
5
+
6
+ - A bounded in-memory queue (drops oldest on overflow).
7
+ - A background thread that wakes up every `flush_interval` seconds (or
8
+ immediately on `.flush()`), drains up to `batch_size` events, and
9
+ posts them as one HTTP request.
10
+ - Auto-flush is enabled by default (Round-5 review P0 — without it,
11
+ long-running servers queued events forever and no traces ever
12
+ appeared in the dashboard).
13
+ - `.close()` clears the wake-up signal, drains remaining events, and
14
+ joins the thread within `timeout` seconds.
15
+
16
+ Why a thread (not asyncio):
17
+ Python's `asyncio` doesn't run in non-async contexts (e.g. a sync
18
+ Flask request handler in a pre-3.12 app), and we have to work in both.
19
+ A daemon thread is the lowest-common-denominator that works for
20
+ sync code, async code, and background scripts. The thread holds a
21
+ `queue.Queue` which is itself thread-safe; the synchronisation cost
22
+ is negligible compared to the HTTP latency we're hiding.
23
+
24
+ Why httpx (not requests):
25
+ httpx supports both sync and async with one API. Chunk 2's
26
+ AsyncOpenAI / AsyncAnthropic instrumentation lives in the same
27
+ process; sharing httpx means we don't ship two HTTP clients.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import atexit
33
+ import json
34
+ import logging
35
+ import queue
36
+ import threading
37
+ import time
38
+ from datetime import datetime, timezone
39
+ from typing import TYPE_CHECKING
40
+
41
+ import httpx
42
+
43
+ from ._version import __version__
44
+ from .wire._event import LLMEvent
45
+
46
+ if TYPE_CHECKING:
47
+ from ._config import ScopeCallConfig
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+
52
+ class Exporter:
53
+ """Thread-safe queue + auto-flush + HTTP delivery.
54
+
55
+ One instance per SDK. `enqueue()` is what every instrumentation /
56
+ manual-API call hits on the hot path — must be O(1) and never block.
57
+ """
58
+
59
+ def __init__(self, config: ScopeCallConfig) -> None:
60
+ self._config = config
61
+ self._queue: queue.Queue[LLMEvent] = queue.Queue(maxsize=config.queue_max_size)
62
+ self._shutdown_event = threading.Event()
63
+ self._flush_now = threading.Event()
64
+ self._file_lock = threading.Lock()
65
+
66
+ # Concurrent flush guard — Round-5 TS review caught a race where
67
+ # an auto-tick and a manual `flush()` could each drain half a
68
+ # batch and post both halves in parallel. The lock makes flush
69
+ # serial; the auto-tick yields if a manual flush is in progress.
70
+ self._flush_lock = threading.Lock()
71
+
72
+ # HTTP client lives for the SDK's lifetime so we get TCP keepalive
73
+ # across batches. Headers are constant — set once.
74
+ self._http: httpx.Client | None = None
75
+ if config.mode == "api":
76
+ self._http = httpx.Client(
77
+ headers={
78
+ "Content-Type": "application/json",
79
+ "Authorization": f"Bearer {config.api_key or ''}",
80
+ "User-Agent": f"scopecall-python/{__version__}",
81
+ "X-ScopeCall-SDK": "python",
82
+ },
83
+ timeout=10.0,
84
+ )
85
+
86
+ # Background flush thread. Daemon=True so a misbehaving thread
87
+ # doesn't block process exit; the atexit hook below explicitly
88
+ # drains before the interpreter tears down.
89
+ self._thread = threading.Thread(
90
+ target=self._run, daemon=True, name="scopecall-exporter"
91
+ )
92
+ self._thread.start()
93
+ # atexit-driven drain is the safety net for callers who forget
94
+ # to `sdk.close()`. Same role as TS's `attachProcessHooks`.
95
+ atexit.register(self._on_atexit)
96
+
97
+ # ── Hot path ────────────────────────────────────────────────────────
98
+
99
+ def enqueue(self, event: LLMEvent) -> None:
100
+ """Add an event to the export queue. Non-blocking.
101
+
102
+ On overflow we drop the OLDEST event (not the new one) — same
103
+ policy as the TS circular buffer. Rationale: in a sustained
104
+ burst the freshest events are the most useful for live debugging,
105
+ so we'd rather keep "what just happened" than "what happened
106
+ first" when the queue saturates.
107
+ """
108
+ if self._config.mode == "noop":
109
+ return
110
+ try:
111
+ self._queue.put_nowait(event)
112
+ except queue.Full:
113
+ # Drop oldest, retry. Two-stage so the get + put are both
114
+ # non-blocking; if another thread drains between them we
115
+ # might still fail to enqueue — that's acceptable degraded
116
+ # behavior under heavy backpressure.
117
+ try:
118
+ self._queue.get_nowait()
119
+ self._queue.put_nowait(event)
120
+ except (queue.Empty, queue.Full):
121
+ pass
122
+
123
+ # ── User-facing controls ────────────────────────────────────────────
124
+
125
+ def flush(self, timeout: float = 5.0) -> None:
126
+ """Drain the queue synchronously, blocking up to `timeout` seconds.
127
+
128
+ Returns when either:
129
+ - every queued event has been posted (or written to file /
130
+ console), OR
131
+ - `timeout` elapses, whichever comes first.
132
+
133
+ Safe to call concurrently with auto-flush ticks — the lock
134
+ serialises them.
135
+ """
136
+ self._flush_now.set()
137
+ deadline = time.monotonic() + timeout
138
+ while time.monotonic() < deadline:
139
+ # unfinished_tasks counts items dequeued but not `task_done()`d —
140
+ # essentially "events that started flushing but haven't
141
+ # finished." When the queue is empty AND no flush is in
142
+ # flight, we're truly drained.
143
+ if self._queue.unfinished_tasks == 0:
144
+ return
145
+ time.sleep(0.02)
146
+
147
+ def close(self, timeout: float = 5.0) -> None:
148
+ """Shut the SDK down: stop the auto-flush thread, drain remaining
149
+ events, close the HTTP client.
150
+
151
+ Idempotent — calling twice is a no-op.
152
+ """
153
+ if self._shutdown_event.is_set():
154
+ return
155
+ self._shutdown_event.set()
156
+ # Wake the flush thread so it sees the shutdown signal without
157
+ # waiting out its current sleep interval.
158
+ self._flush_now.set()
159
+ self._thread.join(timeout=timeout)
160
+ if self._http is not None:
161
+ self._http.close()
162
+ self._http = None
163
+
164
+ # ── Internals ───────────────────────────────────────────────────────
165
+
166
+ def _on_atexit(self) -> None:
167
+ # atexit is best-effort — if the process is dying from SIGKILL
168
+ # we never get here. For graceful exits this gives us one last
169
+ # chance to ship the queue.
170
+ try:
171
+ self.close(timeout=2.0)
172
+ except Exception: # noqa: BLE001
173
+ pass
174
+
175
+ def _run(self) -> None:
176
+ """Auto-flush loop. Wakes on either a periodic tick or an explicit
177
+ `flush_now` signal."""
178
+ while not self._shutdown_event.is_set():
179
+ self._flush_now.wait(timeout=self._config.flush_interval)
180
+ self._flush_now.clear()
181
+ self._drain()
182
+ # Final drain on shutdown — the wait loop above might exit without
183
+ # draining the queue if `_shutdown_event` was set first.
184
+ self._drain()
185
+
186
+ def _drain(self) -> None:
187
+ """Pop up to batch_size events, ship them, mark task_done.
188
+
189
+ Held under `_flush_lock` so a manual flush() can't double-drain
190
+ while the auto-tick is mid-flight.
191
+ """
192
+ with self._flush_lock:
193
+ batch: list[LLMEvent] = []
194
+ while len(batch) < self._config.batch_size:
195
+ try:
196
+ batch.append(self._queue.get_nowait())
197
+ except queue.Empty:
198
+ break
199
+ if not batch:
200
+ return
201
+
202
+ try:
203
+ self._send_batch(batch)
204
+ except Exception as exc: # noqa: BLE001
205
+ # The SDK must NEVER raise into customer code. A failed
206
+ # batch is logged at debug — operators who want louder
207
+ # logging can crank `logging.getLogger("scopecall")` up.
208
+ logger.debug("scopecall: export failed: %s", exc)
209
+ finally:
210
+ for _ in batch:
211
+ self._queue.task_done()
212
+
213
+ def _send_batch(self, batch: list[LLMEvent]) -> None:
214
+ """Ship one batch via the configured transport (console/file/API).
215
+
216
+ The HTTP envelope matches the Rust ingest contract documented in
217
+ `services-rust/ingest/src/routes/ingest.rs`:
218
+
219
+ { "events": [ <LLMEvent.to_wire()>, ... ],
220
+ "sent_at": "<RFC3339 timestamp>" }
221
+
222
+ The Rust side rejects payloads without `sent_at` to catch clock
223
+ skew / stale-deliveries (Round-1 review P0).
224
+ """
225
+ mode = self._config.mode
226
+
227
+ if mode == "console":
228
+ for ev in batch:
229
+ print(json.dumps(ev.to_wire(), indent=2, default=str))
230
+ return
231
+
232
+ if mode == "file":
233
+ assert self._config.output is not None
234
+ with self._file_lock, open(self._config.output, "a") as f:
235
+ for ev in batch:
236
+ f.write(json.dumps(ev.to_wire(), default=str) + "\n")
237
+ return
238
+
239
+ # API mode. Retries with exponential backoff; on final failure
240
+ # the events are silently dropped (logged at debug). The Rust
241
+ # ingest is durable past this point — once a 2xx returns, the
242
+ # event is committed to Redpanda before the HTTP response is
243
+ # sent, so we don't have to worry about partial acceptance.
244
+ assert self._http is not None
245
+ assert self._config.endpoint is not None
246
+ envelope = {
247
+ "events": [ev.to_wire() for ev in batch],
248
+ "sent_at": datetime.now(timezone.utc).isoformat(),
249
+ }
250
+ backoff = 0.1
251
+ for attempt in range(self._config.max_retries):
252
+ if self._shutdown_event.is_set() and attempt > 0:
253
+ # Don't keep retrying past shutdown — better to drop
254
+ # than to delay process exit.
255
+ return
256
+ try:
257
+ resp = self._http.post(self._config.endpoint, json=envelope)
258
+ resp.raise_for_status()
259
+ return
260
+ except httpx.HTTPError as exc:
261
+ if attempt < self._config.max_retries - 1:
262
+ # Use the shutdown event as the sleep — wakes early
263
+ # on close() so we don't waste backoff time during
264
+ # graceful shutdown.
265
+ self._shutdown_event.wait(timeout=backoff)
266
+ backoff *= 2
267
+ else:
268
+ logger.debug(
269
+ "scopecall: dropping %d events after %d retries: %s",
270
+ len(batch),
271
+ self._config.max_retries,
272
+ exc,
273
+ )
scopecall/_pricing.py ADDED
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import logging
5
+ import threading
6
+ from dataclasses import dataclass
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Update LAST_VERIFIED_DATE when refreshing this table.
11
+ # CI (test_pricing_freshness.py) fails if it's older than 90 days.
12
+ LAST_VERIFIED_DATE = datetime.date(2026, 5, 22)
13
+
14
+ # (input_cost_per_1k, output_cost_per_1k) in USD
15
+ _BUNDLED: dict[str, tuple[float, float]] = {
16
+ # OpenAI
17
+ "gpt-4o": (0.0025, 0.010),
18
+ "gpt-4o-mini": (0.00015, 0.00060),
19
+ "gpt-4-turbo": (0.010, 0.030),
20
+ "gpt-4-turbo-preview": (0.010, 0.030),
21
+ "gpt-4": (0.030, 0.060),
22
+ "gpt-3.5-turbo": (0.0005, 0.0015),
23
+ "gpt-3.5-turbo-0125": (0.0005, 0.0015),
24
+ # Anthropic
25
+ "claude-opus-4-7": (0.015, 0.075),
26
+ "claude-sonnet-4-6": (0.003, 0.015),
27
+ "claude-haiku-4-5-20251001": (0.00025, 0.00125),
28
+ "claude-3-5-sonnet-20241022": (0.003, 0.015),
29
+ "claude-3-5-haiku-20241022": (0.00025, 0.00125),
30
+ "claude-3-opus-20240229": (0.015, 0.075),
31
+ "claude-3-sonnet-20240229": (0.003, 0.015),
32
+ "claude-3-haiku-20240307": (0.00025, 0.00125),
33
+ # Google
34
+ "gemini-1.5-pro": (0.00125, 0.005),
35
+ "gemini-1.5-flash": (0.000075, 0.0003),
36
+ "gemini-2.0-flash": (0.0001, 0.0004),
37
+ }
38
+
39
+
40
+ @dataclass
41
+ class PricingTable:
42
+ _table: dict[str, tuple[float, float]]
43
+ _lock: threading.Lock
44
+
45
+ def __init__(self) -> None:
46
+ self._table = dict(_BUNDLED)
47
+ self._lock = threading.Lock()
48
+
49
+ def calculate(self, model: str, input_tokens: int, output_tokens: int) -> float:
50
+ with self._lock:
51
+ entry = self._table.get(model)
52
+ if entry is None:
53
+ # Try prefix match for versioned model names (e.g. "gpt-4o-2024-11-20")
54
+ with self._lock:
55
+ for key, val in self._table.items():
56
+ if model.startswith(key):
57
+ entry = val
58
+ break
59
+ if entry is None:
60
+ return 0.0
61
+ input_cost, output_cost = entry
62
+ return round(
63
+ (input_tokens / 1000 * input_cost) + (output_tokens / 1000 * output_cost),
64
+ 6,
65
+ )
66
+
67
+ def update(self, model: str, input_per_1k: float, output_per_1k: float) -> None:
68
+ with self._lock:
69
+ self._table[model] = (input_per_1k, output_per_1k)