scopecall-py 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scopecall/__init__.py +81 -0
- scopecall/_config.py +129 -0
- scopecall/_context.py +131 -0
- scopecall/_exporter.py +273 -0
- scopecall/_pricing.py +69 -0
- scopecall/_redactor.py +80 -0
- scopecall/_sdk.py +518 -0
- scopecall/_version.py +7 -0
- scopecall/instrumentation/__init__.py +13 -0
- scopecall/instrumentation/_anthropic.py +517 -0
- scopecall/instrumentation/_common.py +243 -0
- scopecall/instrumentation/_openai.py +528 -0
- scopecall/transport/__init__.py +0 -0
- scopecall/wire/__init__.py +5 -0
- scopecall/wire/_event.py +171 -0
- scopecall_py-0.2.0.dist-info/METADATA +521 -0
- scopecall_py-0.2.0.dist-info/RECORD +19 -0
- scopecall_py-0.2.0.dist-info/WHEEL +4 -0
- scopecall_py-0.2.0.dist-info/licenses/LICENSE +121 -0
scopecall/__init__.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""ScopeCall — source-available, self-hostable AI observability for Python.
|
|
2
|
+
|
|
3
|
+
Quick start:
|
|
4
|
+
|
|
5
|
+
import scopecall
|
|
6
|
+
from openai import OpenAI
|
|
7
|
+
|
|
8
|
+
sdk = scopecall.init(
|
|
9
|
+
api_key="sc_live_xxx",
|
|
10
|
+
endpoint="http://localhost:8080/v1/ingest",
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
# Native OpenAI / Anthropic instrumentation:
|
|
14
|
+
openai_client = sdk.instrument(OpenAI())
|
|
15
|
+
|
|
16
|
+
with sdk.trace("support-agent", user_id="user_123") as ctx:
|
|
17
|
+
response = openai_client.chat.completions.create(
|
|
18
|
+
model="gpt-4o-mini",
|
|
19
|
+
messages=[{"role": "user", "content": "Help me with my refund"}],
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Manual API (LangChain / LlamaIndex / RAG / custom wrappers):
|
|
23
|
+
with sdk.trace("custom-agent", user_id="user_456"):
|
|
24
|
+
sdk.record_llm_call(
|
|
25
|
+
model="gpt-4o-mini",
|
|
26
|
+
provider="openai",
|
|
27
|
+
input_tokens=120, output_tokens=48,
|
|
28
|
+
latency_ms=842,
|
|
29
|
+
input_text="Help me with my refund",
|
|
30
|
+
output_text="...",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
sdk.close() # graceful shutdown — flushes the queue
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
API surface:
|
|
37
|
+
|
|
38
|
+
init(...) → ScopeCallSDK instance
|
|
39
|
+
ScopeCallSDK → trace(name) / workflow(name) /
|
|
40
|
+
instrument(client, provider="openai"|"anthropic") /
|
|
41
|
+
record_llm_call(...) / add_redaction_pattern(...) /
|
|
42
|
+
flush() / close()
|
|
43
|
+
ScopeCallConfig → typed config dataclass for dependency-injection style
|
|
44
|
+
ConfigError → raised when init() gets an invalid config
|
|
45
|
+
LLMEvent → wire-format dataclass (advanced — usually emitted
|
|
46
|
+
for you by record_llm_call or the instrumentations)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
Migrating from scopecall v0.1.x:
|
|
50
|
+
|
|
51
|
+
v0.1 used module-level globals (`scopecall.init(); scopecall.trace(...)`).
|
|
52
|
+
v0.2 returns an instance from `init()`. The two changes most likely to
|
|
53
|
+
break callers:
|
|
54
|
+
|
|
55
|
+
OLD: scopecall.init(api_key="...") # module-level
|
|
56
|
+
with scopecall.trace(feature="x"):
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
NEW: sdk = scopecall.init(api_key="...", # endpoint REQUIRED now
|
|
60
|
+
endpoint="http://localhost:8080/v1/ingest")
|
|
61
|
+
with sdk.trace("x"): # name is positional
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
See CHANGELOG.md → v0.2.0 for the full migration guide.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
from ._config import ConfigError, ScopeCallConfig
|
|
68
|
+
from ._context import TraceContext
|
|
69
|
+
from ._sdk import ScopeCallSDK, init
|
|
70
|
+
from ._version import __version__
|
|
71
|
+
from .wire._event import LLMEvent
|
|
72
|
+
|
|
73
|
+
__all__ = [
|
|
74
|
+
"init",
|
|
75
|
+
"ScopeCallSDK",
|
|
76
|
+
"ScopeCallConfig",
|
|
77
|
+
"ConfigError",
|
|
78
|
+
"TraceContext",
|
|
79
|
+
"LLMEvent",
|
|
80
|
+
"__version__",
|
|
81
|
+
]
|
scopecall/_config.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""SDK configuration.
|
|
2
|
+
|
|
3
|
+
Matches the TS SDK's `ScopeCallConfig` shape (sdks/typescript/src/config.ts)
|
|
4
|
+
field-for-field where it makes sense. Naming follows Python conventions
|
|
5
|
+
(`snake_case`, `bool` defaults) — the field set itself is parity.
|
|
6
|
+
|
|
7
|
+
Round-8 review made `endpoint` required when `api_key` is set: a missing
|
|
8
|
+
endpoint used to silently default to https://ingest.scopecall.com/v1/ingest
|
|
9
|
+
which doesn't exist yet (hosted Cloud isn't live). Python now follows the
|
|
10
|
+
same contract — fail loud with a `ConfigError` that names the fix.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ConfigError(ValueError):
|
|
19
|
+
"""Raised by `init(...)` when the config is internally inconsistent.
|
|
20
|
+
|
|
21
|
+
Subclasses `ValueError` so existing try/except blocks that catch the
|
|
22
|
+
base class still work; the named subclass lets careful callers
|
|
23
|
+
distinguish config errors from other ValueError sources.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ScopeCallConfig:
|
|
29
|
+
# ── Transport selection ──────────────────────────────────────────────
|
|
30
|
+
# Exactly one of api_key / output / debug must be set. Mirrors TS.
|
|
31
|
+
api_key: str | None = None
|
|
32
|
+
|
|
33
|
+
# `endpoint` is REQUIRED when api_key is set (Round-8 review). For
|
|
34
|
+
# self-hosted, point at the Rust ingest URL, e.g.
|
|
35
|
+
# http://localhost:8080/v1/ingest. For hosted Cloud — not yet live —
|
|
36
|
+
# this default will be reintroduced.
|
|
37
|
+
endpoint: str | None = None
|
|
38
|
+
|
|
39
|
+
# Debug mode pretty-prints to stdout instead of shipping events.
|
|
40
|
+
# Useful during integration. Overrides api_key + output.
|
|
41
|
+
debug: bool = False
|
|
42
|
+
|
|
43
|
+
# File mode appends NDJSON events to the given path. Useful for local
|
|
44
|
+
# batch capture without a running ingest service.
|
|
45
|
+
output: str | None = None
|
|
46
|
+
|
|
47
|
+
# ── Behavior ─────────────────────────────────────────────────────────
|
|
48
|
+
environment: str = "production"
|
|
49
|
+
redact_pii: bool = True
|
|
50
|
+
capture_content: bool = True
|
|
51
|
+
|
|
52
|
+
# ── Auto-flush ───────────────────────────────────────────────────────
|
|
53
|
+
# Background thread flushes the queue this often (seconds). 5 s aligns
|
|
54
|
+
# with the TS SDK's flushIntervalMs=5000 default. The first-run UI's
|
|
55
|
+
# 3 s pre-first-call poll cadence is intentionally faster than this
|
|
56
|
+
# so the dashboard catches the first trace within ~8 s end-to-end.
|
|
57
|
+
flush_interval: float = 5.0
|
|
58
|
+
batch_size: int = 50
|
|
59
|
+
queue_max_size: int = 10_000
|
|
60
|
+
max_retries: int = 3
|
|
61
|
+
|
|
62
|
+
# ── Off-switch ───────────────────────────────────────────────────────
|
|
63
|
+
# When True, `init()` returns a no-op SDK that swallows every call.
|
|
64
|
+
# Useful in tests that import production code paths but don't want
|
|
65
|
+
# network IO. Mirrors TS `ScopeCallConfig.disabled`.
|
|
66
|
+
disabled: bool = False
|
|
67
|
+
|
|
68
|
+
# ── Defaults applied to every event ──────────────────────────────────
|
|
69
|
+
# Each of these is overridable per-trace via sdk.trace(...).
|
|
70
|
+
default_feature: str | None = None
|
|
71
|
+
default_user_id: str | None = None
|
|
72
|
+
default_session_id: str | None = None
|
|
73
|
+
|
|
74
|
+
# Round-4 review (TS): default_prompt_version tags every call with a
|
|
75
|
+
# build/commit/release identifier when the app has a single canonical
|
|
76
|
+
# prompt set. Per-trace prompt_version wins, then parent trace's
|
|
77
|
+
# value, then this default, then None.
|
|
78
|
+
default_prompt_version: str | None = None
|
|
79
|
+
|
|
80
|
+
def __post_init__(self) -> None:
|
|
81
|
+
# Serverless guard: a zero or negative interval would spin the
|
|
82
|
+
# flush thread. Clamp to 0.1 s rather than reject — the user
|
|
83
|
+
# probably meant "flush often" and we want to be forgiving.
|
|
84
|
+
if self.flush_interval <= 0:
|
|
85
|
+
self.flush_interval = 0.1
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def mode(self) -> str:
|
|
89
|
+
"""Which transport `init()` should select for this config."""
|
|
90
|
+
if self.disabled:
|
|
91
|
+
return "noop"
|
|
92
|
+
if self.debug:
|
|
93
|
+
return "console"
|
|
94
|
+
if self.output:
|
|
95
|
+
return "file"
|
|
96
|
+
return "api"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def validate(config: ScopeCallConfig) -> None:
|
|
100
|
+
"""Raise ConfigError if the config can't possibly produce a working SDK.
|
|
101
|
+
|
|
102
|
+
Three valid configurations:
|
|
103
|
+
1. debug=True → console mode (no api_key needed)
|
|
104
|
+
2. output=<path> → file mode (no api_key needed)
|
|
105
|
+
3. api_key + endpoint → HTTP mode (BOTH required since Round-8)
|
|
106
|
+
|
|
107
|
+
`disabled=True` shorts the entire SDK to no-ops; we don't bother
|
|
108
|
+
validating in that case because the SDK never sends anything anyway.
|
|
109
|
+
"""
|
|
110
|
+
if config.disabled:
|
|
111
|
+
return
|
|
112
|
+
if config.debug:
|
|
113
|
+
return
|
|
114
|
+
if config.output:
|
|
115
|
+
return
|
|
116
|
+
if not config.api_key:
|
|
117
|
+
raise ConfigError(
|
|
118
|
+
"scopecall.init() requires one of: api_key=..., debug=True, or output=<path>."
|
|
119
|
+
)
|
|
120
|
+
# Round-8: endpoint is now required alongside api_key. No silent
|
|
121
|
+
# fallback to a hosted-Cloud URL that doesn't exist yet.
|
|
122
|
+
if not config.endpoint:
|
|
123
|
+
raise ConfigError(
|
|
124
|
+
"scopecall.init(api_key=...) requires endpoint=... "
|
|
125
|
+
"Self-hosted: point at your ingest service, e.g. "
|
|
126
|
+
"endpoint='http://localhost:8080/v1/ingest'. "
|
|
127
|
+
"(ScopeCall Cloud is not yet available; a managed default "
|
|
128
|
+
"endpoint will return in a future release.)"
|
|
129
|
+
)
|
scopecall/_context.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Trace context — `contextvars` propagation so nested `sdk.trace()` blocks
|
|
2
|
+
chain correctly through sync code, async code, FastAPI request handlers,
|
|
3
|
+
asyncio tasks, and background workers.
|
|
4
|
+
|
|
5
|
+
Why `contextvars` (PEP 567) and not threadlocals: thread-locals don't
|
|
6
|
+
propagate across `await` boundaries by default. `contextvars.ContextVar`
|
|
7
|
+
DOES propagate across `await` and into `asyncio.create_task()`, which is
|
|
8
|
+
the table-stakes property for any AI backend using `AsyncOpenAI` /
|
|
9
|
+
`AsyncAnthropic`. The reviewer correctly called this out as a P0.
|
|
10
|
+
|
|
11
|
+
Each `sdk.trace(name)` call:
|
|
12
|
+
|
|
13
|
+
1. Generates a new `span_id` for itself.
|
|
14
|
+
2. Reads the current `_current_trace` ContextVar (if any) to find the
|
|
15
|
+
parent's `trace_id` + `span_id`. If there is one, inherit
|
|
16
|
+
`trace_id`; otherwise mint a fresh one.
|
|
17
|
+
3. Sets the new `TraceContext` as `_current_trace` for the body of the
|
|
18
|
+
block.
|
|
19
|
+
4. Resets `_current_trace` on exit so nesting unwinds cleanly.
|
|
20
|
+
|
|
21
|
+
The block ALSO emits a synthetic workflow event on exit — see
|
|
22
|
+
`scopecall._sdk.ScopeCallSDK.trace` for the call site. The event is
|
|
23
|
+
what the dashboard's Flow Map and trace tree render as the parent
|
|
24
|
+
"workflow" node. Without it, child LLM rows would have a
|
|
25
|
+
`parent_span_id` that points at nothing in ClickHouse, and the
|
|
26
|
+
flow-map JOIN finds no parent.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import uuid
|
|
32
|
+
from contextvars import ContextVar
|
|
33
|
+
from dataclasses import dataclass, field
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class TraceContext:
|
|
38
|
+
"""The state a single `sdk.trace()` block carries.
|
|
39
|
+
|
|
40
|
+
The instance is what `with sdk.trace(...) as ctx:` yields — users can
|
|
41
|
+
read these fields to add custom span IDs, parent linkage etc. in
|
|
42
|
+
bespoke instrumentation.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# Stable across the whole trace tree (one trace = many spans).
|
|
46
|
+
trace_id: str
|
|
47
|
+
|
|
48
|
+
# Unique per `sdk.trace()` block. Children inside reference this as
|
|
49
|
+
# their `parent_span_id`.
|
|
50
|
+
span_id: str
|
|
51
|
+
|
|
52
|
+
# The PARENT trace's span_id, if this block is nested inside another
|
|
53
|
+
# `sdk.trace()`. None at the root.
|
|
54
|
+
parent_span_id: str | None
|
|
55
|
+
|
|
56
|
+
# The block's human label. Doubles as the default feature_name on the
|
|
57
|
+
# synthetic workflow event we emit on block exit. The reviewer's
|
|
58
|
+
# FastAPI example was `sdk.trace("chat-api", ...)` — that string ends
|
|
59
|
+
# up as feature_name='chat-api' on the workflow row.
|
|
60
|
+
name: str | None
|
|
61
|
+
|
|
62
|
+
# Per-trace prompt_version. None at this level means "inherit from
|
|
63
|
+
# config.default_prompt_version". The TS SDK does the same precedence:
|
|
64
|
+
# trace's value → parent trace's value → config default → None.
|
|
65
|
+
prompt_version: str | None = None
|
|
66
|
+
|
|
67
|
+
# Per-trace overrides for user/session/feature. None means "inherit
|
|
68
|
+
# config defaults at event-emission time."
|
|
69
|
+
user_id: str | None = None
|
|
70
|
+
session_id: str | None = None
|
|
71
|
+
feature_name: str | None = None
|
|
72
|
+
|
|
73
|
+
# Wall-clock start time (ms epoch). Used to compute the workflow
|
|
74
|
+
# span's latency when the block exits.
|
|
75
|
+
start_time_ms: float = field(default=0.0)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# Module-level ContextVar. The reset-token pattern below is what
|
|
79
|
+
# guarantees nested traces unwind in the right order even when the user
|
|
80
|
+
# raises an exception inside the body.
|
|
81
|
+
_current_trace: ContextVar[TraceContext | None] = ContextVar(
|
|
82
|
+
"scopecall_current_trace", default=None
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_current() -> TraceContext | None:
|
|
87
|
+
"""Return the innermost active TraceContext, or None at the root.
|
|
88
|
+
|
|
89
|
+
Provider instrumentations (chunk 2) call this to discover the parent
|
|
90
|
+
span for an outgoing LLM event. Manual API helpers (`sdk.span`,
|
|
91
|
+
`sdk.record_llm_call`) do the same.
|
|
92
|
+
"""
|
|
93
|
+
return _current_trace.get()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def push(ctx: TraceContext) -> object:
|
|
97
|
+
"""Set `ctx` as the current trace and return a token.
|
|
98
|
+
|
|
99
|
+
The caller is responsible for `pop(token)` in a finally block. The
|
|
100
|
+
SDK's `trace()` context manager does this — manual callers usually
|
|
101
|
+
don't need to touch push/pop directly.
|
|
102
|
+
"""
|
|
103
|
+
return _current_trace.set(ctx)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def pop(token: object) -> None:
|
|
107
|
+
"""Restore the previous TraceContext using the token from `push`.
|
|
108
|
+
|
|
109
|
+
`ContextVar.reset()` is the right primitive here because it's
|
|
110
|
+
exception-safe: reseting always succeeds even if the token's
|
|
111
|
+
var-binding was overridden by intermediate `set` calls in the
|
|
112
|
+
interim. We type the parameter as `object` because the actual
|
|
113
|
+
`Token` class isn't easily constructible in user code and exposing
|
|
114
|
+
it would invite accidental forgery.
|
|
115
|
+
"""
|
|
116
|
+
_current_trace.reset(token) # type: ignore[arg-type]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def new_span_id() -> str:
|
|
120
|
+
"""Mint a 16-hex-char span ID (matches the OTel + TS SDK convention).
|
|
121
|
+
|
|
122
|
+
OTel uses 8-byte span IDs rendered as 16 hex chars. We follow the
|
|
123
|
+
same shape so trace IDs are interoperable when an OTel bridge ships
|
|
124
|
+
in v0.2.x.
|
|
125
|
+
"""
|
|
126
|
+
return uuid.uuid4().hex[:16]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def new_trace_id() -> str:
|
|
130
|
+
"""Mint a 32-hex-char trace ID (matches OTel + TS SDK convention)."""
|
|
131
|
+
return uuid.uuid4().hex
|
scopecall/_exporter.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Background exporter — circular buffer + auto-flush + HTTP transport.
|
|
2
|
+
|
|
3
|
+
Architecturally identical to the TS SDK's
|
|
4
|
+
`sdks/typescript/src/exporter.ts`:
|
|
5
|
+
|
|
6
|
+
- A bounded in-memory queue (drops oldest on overflow).
|
|
7
|
+
- A background thread that wakes up every `flush_interval` seconds (or
|
|
8
|
+
immediately on `.flush()`), drains up to `batch_size` events, and
|
|
9
|
+
posts them as one HTTP request.
|
|
10
|
+
- Auto-flush is enabled by default (Round-5 review P0 — without it,
|
|
11
|
+
long-running servers queued events forever and no traces ever
|
|
12
|
+
appeared in the dashboard).
|
|
13
|
+
- `.close()` clears the wake-up signal, drains remaining events, and
|
|
14
|
+
joins the thread within `timeout` seconds.
|
|
15
|
+
|
|
16
|
+
Why a thread (not asyncio):
|
|
17
|
+
Python's `asyncio` doesn't run in non-async contexts (e.g. a sync
|
|
18
|
+
Flask request handler in a pre-3.12 app), and we have to work in both.
|
|
19
|
+
A daemon thread is the lowest-common-denominator that works for
|
|
20
|
+
sync code, async code, and background scripts. The thread holds a
|
|
21
|
+
`queue.Queue` which is itself thread-safe; the synchronisation cost
|
|
22
|
+
is negligible compared to the HTTP latency we're hiding.
|
|
23
|
+
|
|
24
|
+
Why httpx (not requests):
|
|
25
|
+
httpx supports both sync and async with one API. Chunk 2's
|
|
26
|
+
AsyncOpenAI / AsyncAnthropic instrumentation lives in the same
|
|
27
|
+
process; sharing httpx means we don't ship two HTTP clients.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import atexit
|
|
33
|
+
import json
|
|
34
|
+
import logging
|
|
35
|
+
import queue
|
|
36
|
+
import threading
|
|
37
|
+
import time
|
|
38
|
+
from datetime import datetime, timezone
|
|
39
|
+
from typing import TYPE_CHECKING
|
|
40
|
+
|
|
41
|
+
import httpx
|
|
42
|
+
|
|
43
|
+
from ._version import __version__
|
|
44
|
+
from .wire._event import LLMEvent
|
|
45
|
+
|
|
46
|
+
if TYPE_CHECKING:
|
|
47
|
+
from ._config import ScopeCallConfig
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Exporter:
|
|
53
|
+
"""Thread-safe queue + auto-flush + HTTP delivery.
|
|
54
|
+
|
|
55
|
+
One instance per SDK. `enqueue()` is what every instrumentation /
|
|
56
|
+
manual-API call hits on the hot path — must be O(1) and never block.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, config: ScopeCallConfig) -> None:
|
|
60
|
+
self._config = config
|
|
61
|
+
self._queue: queue.Queue[LLMEvent] = queue.Queue(maxsize=config.queue_max_size)
|
|
62
|
+
self._shutdown_event = threading.Event()
|
|
63
|
+
self._flush_now = threading.Event()
|
|
64
|
+
self._file_lock = threading.Lock()
|
|
65
|
+
|
|
66
|
+
# Concurrent flush guard — Round-5 TS review caught a race where
|
|
67
|
+
# an auto-tick and a manual `flush()` could each drain half a
|
|
68
|
+
# batch and post both halves in parallel. The lock makes flush
|
|
69
|
+
# serial; the auto-tick yields if a manual flush is in progress.
|
|
70
|
+
self._flush_lock = threading.Lock()
|
|
71
|
+
|
|
72
|
+
# HTTP client lives for the SDK's lifetime so we get TCP keepalive
|
|
73
|
+
# across batches. Headers are constant — set once.
|
|
74
|
+
self._http: httpx.Client | None = None
|
|
75
|
+
if config.mode == "api":
|
|
76
|
+
self._http = httpx.Client(
|
|
77
|
+
headers={
|
|
78
|
+
"Content-Type": "application/json",
|
|
79
|
+
"Authorization": f"Bearer {config.api_key or ''}",
|
|
80
|
+
"User-Agent": f"scopecall-python/{__version__}",
|
|
81
|
+
"X-ScopeCall-SDK": "python",
|
|
82
|
+
},
|
|
83
|
+
timeout=10.0,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Background flush thread. Daemon=True so a misbehaving thread
|
|
87
|
+
# doesn't block process exit; the atexit hook below explicitly
|
|
88
|
+
# drains before the interpreter tears down.
|
|
89
|
+
self._thread = threading.Thread(
|
|
90
|
+
target=self._run, daemon=True, name="scopecall-exporter"
|
|
91
|
+
)
|
|
92
|
+
self._thread.start()
|
|
93
|
+
# atexit-driven drain is the safety net for callers who forget
|
|
94
|
+
# to `sdk.close()`. Same role as TS's `attachProcessHooks`.
|
|
95
|
+
atexit.register(self._on_atexit)
|
|
96
|
+
|
|
97
|
+
# ── Hot path ────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
def enqueue(self, event: LLMEvent) -> None:
|
|
100
|
+
"""Add an event to the export queue. Non-blocking.
|
|
101
|
+
|
|
102
|
+
On overflow we drop the OLDEST event (not the new one) — same
|
|
103
|
+
policy as the TS circular buffer. Rationale: in a sustained
|
|
104
|
+
burst the freshest events are the most useful for live debugging,
|
|
105
|
+
so we'd rather keep "what just happened" than "what happened
|
|
106
|
+
first" when the queue saturates.
|
|
107
|
+
"""
|
|
108
|
+
if self._config.mode == "noop":
|
|
109
|
+
return
|
|
110
|
+
try:
|
|
111
|
+
self._queue.put_nowait(event)
|
|
112
|
+
except queue.Full:
|
|
113
|
+
# Drop oldest, retry. Two-stage so the get + put are both
|
|
114
|
+
# non-blocking; if another thread drains between them we
|
|
115
|
+
# might still fail to enqueue — that's acceptable degraded
|
|
116
|
+
# behavior under heavy backpressure.
|
|
117
|
+
try:
|
|
118
|
+
self._queue.get_nowait()
|
|
119
|
+
self._queue.put_nowait(event)
|
|
120
|
+
except (queue.Empty, queue.Full):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
# ── User-facing controls ────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
def flush(self, timeout: float = 5.0) -> None:
|
|
126
|
+
"""Drain the queue synchronously, blocking up to `timeout` seconds.
|
|
127
|
+
|
|
128
|
+
Returns when either:
|
|
129
|
+
- every queued event has been posted (or written to file /
|
|
130
|
+
console), OR
|
|
131
|
+
- `timeout` elapses, whichever comes first.
|
|
132
|
+
|
|
133
|
+
Safe to call concurrently with auto-flush ticks — the lock
|
|
134
|
+
serialises them.
|
|
135
|
+
"""
|
|
136
|
+
self._flush_now.set()
|
|
137
|
+
deadline = time.monotonic() + timeout
|
|
138
|
+
while time.monotonic() < deadline:
|
|
139
|
+
# unfinished_tasks counts items dequeued but not `task_done()`d —
|
|
140
|
+
# essentially "events that started flushing but haven't
|
|
141
|
+
# finished." When the queue is empty AND no flush is in
|
|
142
|
+
# flight, we're truly drained.
|
|
143
|
+
if self._queue.unfinished_tasks == 0:
|
|
144
|
+
return
|
|
145
|
+
time.sleep(0.02)
|
|
146
|
+
|
|
147
|
+
def close(self, timeout: float = 5.0) -> None:
|
|
148
|
+
"""Shut the SDK down: stop the auto-flush thread, drain remaining
|
|
149
|
+
events, close the HTTP client.
|
|
150
|
+
|
|
151
|
+
Idempotent — calling twice is a no-op.
|
|
152
|
+
"""
|
|
153
|
+
if self._shutdown_event.is_set():
|
|
154
|
+
return
|
|
155
|
+
self._shutdown_event.set()
|
|
156
|
+
# Wake the flush thread so it sees the shutdown signal without
|
|
157
|
+
# waiting out its current sleep interval.
|
|
158
|
+
self._flush_now.set()
|
|
159
|
+
self._thread.join(timeout=timeout)
|
|
160
|
+
if self._http is not None:
|
|
161
|
+
self._http.close()
|
|
162
|
+
self._http = None
|
|
163
|
+
|
|
164
|
+
# ── Internals ───────────────────────────────────────────────────────
|
|
165
|
+
|
|
166
|
+
def _on_atexit(self) -> None:
|
|
167
|
+
# atexit is best-effort — if the process is dying from SIGKILL
|
|
168
|
+
# we never get here. For graceful exits this gives us one last
|
|
169
|
+
# chance to ship the queue.
|
|
170
|
+
try:
|
|
171
|
+
self.close(timeout=2.0)
|
|
172
|
+
except Exception: # noqa: BLE001
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
def _run(self) -> None:
|
|
176
|
+
"""Auto-flush loop. Wakes on either a periodic tick or an explicit
|
|
177
|
+
`flush_now` signal."""
|
|
178
|
+
while not self._shutdown_event.is_set():
|
|
179
|
+
self._flush_now.wait(timeout=self._config.flush_interval)
|
|
180
|
+
self._flush_now.clear()
|
|
181
|
+
self._drain()
|
|
182
|
+
# Final drain on shutdown — the wait loop above might exit without
|
|
183
|
+
# draining the queue if `_shutdown_event` was set first.
|
|
184
|
+
self._drain()
|
|
185
|
+
|
|
186
|
+
def _drain(self) -> None:
|
|
187
|
+
"""Pop up to batch_size events, ship them, mark task_done.
|
|
188
|
+
|
|
189
|
+
Held under `_flush_lock` so a manual flush() can't double-drain
|
|
190
|
+
while the auto-tick is mid-flight.
|
|
191
|
+
"""
|
|
192
|
+
with self._flush_lock:
|
|
193
|
+
batch: list[LLMEvent] = []
|
|
194
|
+
while len(batch) < self._config.batch_size:
|
|
195
|
+
try:
|
|
196
|
+
batch.append(self._queue.get_nowait())
|
|
197
|
+
except queue.Empty:
|
|
198
|
+
break
|
|
199
|
+
if not batch:
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
self._send_batch(batch)
|
|
204
|
+
except Exception as exc: # noqa: BLE001
|
|
205
|
+
# The SDK must NEVER raise into customer code. A failed
|
|
206
|
+
# batch is logged at debug — operators who want louder
|
|
207
|
+
# logging can crank `logging.getLogger("scopecall")` up.
|
|
208
|
+
logger.debug("scopecall: export failed: %s", exc)
|
|
209
|
+
finally:
|
|
210
|
+
for _ in batch:
|
|
211
|
+
self._queue.task_done()
|
|
212
|
+
|
|
213
|
+
def _send_batch(self, batch: list[LLMEvent]) -> None:
|
|
214
|
+
"""Ship one batch via the configured transport (console/file/API).
|
|
215
|
+
|
|
216
|
+
The HTTP envelope matches the Rust ingest contract documented in
|
|
217
|
+
`services-rust/ingest/src/routes/ingest.rs`:
|
|
218
|
+
|
|
219
|
+
{ "events": [ <LLMEvent.to_wire()>, ... ],
|
|
220
|
+
"sent_at": "<RFC3339 timestamp>" }
|
|
221
|
+
|
|
222
|
+
The Rust side rejects payloads without `sent_at` to catch clock
|
|
223
|
+
skew / stale-deliveries (Round-1 review P0).
|
|
224
|
+
"""
|
|
225
|
+
mode = self._config.mode
|
|
226
|
+
|
|
227
|
+
if mode == "console":
|
|
228
|
+
for ev in batch:
|
|
229
|
+
print(json.dumps(ev.to_wire(), indent=2, default=str))
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
if mode == "file":
|
|
233
|
+
assert self._config.output is not None
|
|
234
|
+
with self._file_lock, open(self._config.output, "a") as f:
|
|
235
|
+
for ev in batch:
|
|
236
|
+
f.write(json.dumps(ev.to_wire(), default=str) + "\n")
|
|
237
|
+
return
|
|
238
|
+
|
|
239
|
+
# API mode. Retries with exponential backoff; on final failure
|
|
240
|
+
# the events are silently dropped (logged at debug). The Rust
|
|
241
|
+
# ingest is durable past this point — once a 2xx returns, the
|
|
242
|
+
# event is committed to Redpanda before the HTTP response is
|
|
243
|
+
# sent, so we don't have to worry about partial acceptance.
|
|
244
|
+
assert self._http is not None
|
|
245
|
+
assert self._config.endpoint is not None
|
|
246
|
+
envelope = {
|
|
247
|
+
"events": [ev.to_wire() for ev in batch],
|
|
248
|
+
"sent_at": datetime.now(timezone.utc).isoformat(),
|
|
249
|
+
}
|
|
250
|
+
backoff = 0.1
|
|
251
|
+
for attempt in range(self._config.max_retries):
|
|
252
|
+
if self._shutdown_event.is_set() and attempt > 0:
|
|
253
|
+
# Don't keep retrying past shutdown — better to drop
|
|
254
|
+
# than to delay process exit.
|
|
255
|
+
return
|
|
256
|
+
try:
|
|
257
|
+
resp = self._http.post(self._config.endpoint, json=envelope)
|
|
258
|
+
resp.raise_for_status()
|
|
259
|
+
return
|
|
260
|
+
except httpx.HTTPError as exc:
|
|
261
|
+
if attempt < self._config.max_retries - 1:
|
|
262
|
+
# Use the shutdown event as the sleep — wakes early
|
|
263
|
+
# on close() so we don't waste backoff time during
|
|
264
|
+
# graceful shutdown.
|
|
265
|
+
self._shutdown_event.wait(timeout=backoff)
|
|
266
|
+
backoff *= 2
|
|
267
|
+
else:
|
|
268
|
+
logger.debug(
|
|
269
|
+
"scopecall: dropping %d events after %d retries: %s",
|
|
270
|
+
len(batch),
|
|
271
|
+
self._config.max_retries,
|
|
272
|
+
exc,
|
|
273
|
+
)
|
scopecall/_pricing.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import logging
|
|
5
|
+
import threading
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
# Update LAST_VERIFIED_DATE when refreshing this table.
|
|
11
|
+
# CI (test_pricing_freshness.py) fails if it's older than 90 days.
|
|
12
|
+
LAST_VERIFIED_DATE = datetime.date(2026, 5, 22)
|
|
13
|
+
|
|
14
|
+
# (input_cost_per_1k, output_cost_per_1k) in USD
|
|
15
|
+
_BUNDLED: dict[str, tuple[float, float]] = {
|
|
16
|
+
# OpenAI
|
|
17
|
+
"gpt-4o": (0.0025, 0.010),
|
|
18
|
+
"gpt-4o-mini": (0.00015, 0.00060),
|
|
19
|
+
"gpt-4-turbo": (0.010, 0.030),
|
|
20
|
+
"gpt-4-turbo-preview": (0.010, 0.030),
|
|
21
|
+
"gpt-4": (0.030, 0.060),
|
|
22
|
+
"gpt-3.5-turbo": (0.0005, 0.0015),
|
|
23
|
+
"gpt-3.5-turbo-0125": (0.0005, 0.0015),
|
|
24
|
+
# Anthropic
|
|
25
|
+
"claude-opus-4-7": (0.015, 0.075),
|
|
26
|
+
"claude-sonnet-4-6": (0.003, 0.015),
|
|
27
|
+
"claude-haiku-4-5-20251001": (0.00025, 0.00125),
|
|
28
|
+
"claude-3-5-sonnet-20241022": (0.003, 0.015),
|
|
29
|
+
"claude-3-5-haiku-20241022": (0.00025, 0.00125),
|
|
30
|
+
"claude-3-opus-20240229": (0.015, 0.075),
|
|
31
|
+
"claude-3-sonnet-20240229": (0.003, 0.015),
|
|
32
|
+
"claude-3-haiku-20240307": (0.00025, 0.00125),
|
|
33
|
+
# Google
|
|
34
|
+
"gemini-1.5-pro": (0.00125, 0.005),
|
|
35
|
+
"gemini-1.5-flash": (0.000075, 0.0003),
|
|
36
|
+
"gemini-2.0-flash": (0.0001, 0.0004),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class PricingTable:
|
|
42
|
+
_table: dict[str, tuple[float, float]]
|
|
43
|
+
_lock: threading.Lock
|
|
44
|
+
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
self._table = dict(_BUNDLED)
|
|
47
|
+
self._lock = threading.Lock()
|
|
48
|
+
|
|
49
|
+
def calculate(self, model: str, input_tokens: int, output_tokens: int) -> float:
|
|
50
|
+
with self._lock:
|
|
51
|
+
entry = self._table.get(model)
|
|
52
|
+
if entry is None:
|
|
53
|
+
# Try prefix match for versioned model names (e.g. "gpt-4o-2024-11-20")
|
|
54
|
+
with self._lock:
|
|
55
|
+
for key, val in self._table.items():
|
|
56
|
+
if model.startswith(key):
|
|
57
|
+
entry = val
|
|
58
|
+
break
|
|
59
|
+
if entry is None:
|
|
60
|
+
return 0.0
|
|
61
|
+
input_cost, output_cost = entry
|
|
62
|
+
return round(
|
|
63
|
+
(input_tokens / 1000 * input_cost) + (output_tokens / 1000 * output_cost),
|
|
64
|
+
6,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def update(self, model: str, input_per_1k: float, output_per_1k: float) -> None:
|
|
68
|
+
with self._lock:
|
|
69
|
+
self._table[model] = (input_per_1k, output_per_1k)
|