nullrun 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nullrun/runtime.py ADDED
@@ -0,0 +1,1806 @@
1
+ """
2
+ NullRun Runtime - core runtime safety layer for AI agents.
3
+
4
+ This is the main entry point for the SDK. It handles:
5
+ - Authentication with NullRun cloud
6
+ - Policy fetching and caching
7
+ - Event buffering and batched flush
8
+ - Local policy enforcement (instant, no network latency)
9
+ - Pre-execution enforcement via /execute endpoint
10
+
11
+ ## Pre-execution gate fail-OPEN/CLOSED contract (ADR-008)
12
+
13
+ The SDK enforces workflow safety through a set of *pre-execution gates*
14
+ that run before a protected function body executes and may raise to halt
15
+ the work. Each gate declares its own fail-OPEN/CLOSED policy -- this is
16
+ the authoritative table; deviations require an ADR amendment (Rule 5).
17
+
18
+ | Gate | Transport-error behavior | Recovery behavior | Opt-out |
19
+ |---|---|---|---|
20
+ | `check_workflow_budget` | OPEN (skip check, log warning) | silent post-hoc correction in `/track` events via `cost_correction_applied=true` | `NULLRUN_SKIP_BUDGET_CHECK=1` -- **full billing bypass**, not just check bypass (see docstring WARNING) |
21
+ | `check_control_plane` | OPEN (treat state as `Normal`) | deferred enforcement -- next WS-push or `/status` poll sees the true state | none |
22
+ | `_enforce_sensitive_tool` (default `_fallback_mode=permissive`) | CLOSED -- body MUST NOT run when `decision_source` is any `FALLBACK_*` | n/a (body did not run) | `NULLRUN_SENSITIVE_FAIL_OPEN=1` -- explicitly documented as "OPEN-when-engine-unavailable" |
23
+ | `_enforce_sensitive_tool` (`_fallback_mode=strict`) | CLOSED -- transport returns `decision=block, decision_source=FALLBACK_*` | n/a | none |
24
+ | `_emit_span_start` / `_emit_span_end` | n/a -- never blocks | n/a | n/a |
25
+
26
+ The "Opt-out" column makes it explicit that `NULLRUN_SKIP_BUDGET_CHECK=1`
27
+ is a **different category** of action than
28
+ `NULLRUN_SENSITIVE_FAIL_OPEN=1` (bypass vs. change semantics), despite
29
+ the similar naming. See `docs/adr/008-sdk-preflight-fail-policy.md`
30
+ for the full rules, including transport error classification
31
+ (`FALLBACK_NETWORK_ERROR` / `FALLBACK_GATEWAY_ERROR` / `FALLBACK_BREAKER_OPEN`).
32
+ """
33
+
34
+ import asyncio
35
+ import logging
36
+ import os
37
+ import threading
38
+ import time
39
+ import uuid
40
+ from collections import defaultdict, deque
41
+ from collections.abc import Callable
42
+ from dataclasses import dataclass
43
+ from typing import Any, Optional
44
+
45
+ import httpx
46
+
47
+ from nullrun.actions import ActionHandler, ActionType
48
+ from nullrun.breaker.exceptions import (
49
+ BreakerError,
50
+ NullRunAuthenticationError,
51
+ NullRunBlockedException,
52
+ WorkflowKilledInterrupt,
53
+ WorkflowPausedException,
54
+ )
55
+ from nullrun.context import (
56
+ generate_span_id,
57
+ generate_trace_id,
58
+ get_agent_id,
59
+ get_attempt_index,
60
+ get_span_id,
61
+ get_trace_id,
62
+ get_workflow_id,
63
+ )
64
+ from nullrun.observability import metrics
65
+ from nullrun.transport import (
66
+ DecisionSource,
67
+ FallbackMode,
68
+ FlushConfig,
69
+ Transport,
70
+ TransportErrorSource,
71
+ )
72
+
73
+
74
+ class LoopTracker:
75
+ """
76
+ In-memory loop detection using deque with timestamps.
77
+
78
+ Tracks calls per tool_name with a 60-second sliding window.
79
+ """
80
+
81
+ def __init__(self, window_seconds: int = 60):
82
+ self._calls = defaultdict(deque)
83
+ self._window_seconds = window_seconds
84
+
85
+ def record(self, tool_name: str) -> None:
86
+ """Record a call for a tool."""
87
+ now = time.time()
88
+ self._calls[tool_name].append(now)
89
+ self._prune(tool_name, before=now - self._window_seconds)
90
+
91
+ def count(self, tool_name: str, window: int = None) -> int:
92
+ """
93
+ Count calls for a tool within the time window.
94
+
95
+ Args:
96
+ tool_name: Name of the tool
97
+ window: Time window in seconds (defaults to init window)
98
+
99
+ Returns:
100
+ Number of calls in the window
101
+ """
102
+ if window is None:
103
+ window = self._window_seconds
104
+ self._prune(tool_name, before=time.time() - window)
105
+ return len(self._calls[tool_name])
106
+
107
+ def _prune(self, tool_name: str, before: float) -> None:
108
+ """Remove calls older than the threshold."""
109
+ while self._calls[tool_name] and self._calls[tool_name][0] < before:
110
+ self._calls[tool_name].popleft()
111
+
112
+
113
+ class RateTracker:
114
+ """
115
+ In-memory rate tracking using deque with timestamps.
116
+
117
+ Tracks total calls per minute to enforce rate limits.
118
+ """
119
+
120
+ def __init__(self, window_seconds: int = 60):
121
+ self._calls = deque()
122
+ self._window_seconds = window_seconds
123
+
124
+ def record(self) -> None:
125
+ """Record a call."""
126
+ now = time.time()
127
+ self._calls.append(now)
128
+ self._prune(before=now - self._window_seconds)
129
+
130
+ def count(self, window: int = None) -> int:
131
+ """
132
+ Count calls within the time window.
133
+
134
+ Args:
135
+ window: Time window in seconds (defaults to init window)
136
+
137
+ Returns:
138
+ Number of calls in the window
139
+ """
140
+ if window is None:
141
+ window = self._window_seconds
142
+ self._prune(before=time.time() - window)
143
+ return len(self._calls)
144
+
145
+ def exceeds_limit(self, limit: int, window: int = None) -> bool:
146
+ """
147
+ Check if rate limit is exceeded.
148
+
149
+ Args:
150
+ limit: Maximum allowed calls in the window
151
+ window: Time window in seconds (defaults to init window)
152
+
153
+ Returns:
154
+ True if limit is exceeded
155
+ """
156
+ return self.count(window) >= limit
157
+
158
+ def _prune(self, before: float) -> None:
159
+ """Remove calls older than the threshold."""
160
+ while self._calls and self._calls[0] < before:
161
+ self._calls.popleft()
162
+
163
+
164
+ @dataclass
165
+ class LocalDecision:
166
+ """Decision from local check (no network round-trip)."""
167
+ allowed: bool
168
+ reason: str = None
169
+ suggestion: str = None
170
+
171
+
172
+ logger = logging.getLogger(__name__)
173
+
174
+ # Phase 0.3.1: sentinel used when a gate fires outside a
175
+ # ``with workflow(...)`` context. The double-underscore prefix
176
+ # namespacing avoids collision with a user workflow that happens
177
+ # to be named ``<unknown>`` (the previous literal was a
178
+ # collision hazard). Wire compat: still a string.
179
+ UNKNOWN_WORKFLOW_ID: str = "__nullrun_unknown__"
180
+
181
+
182
+ @dataclass
183
+ class Policy:
184
+ """
185
+ Policy fetched from NullRun backend.
186
+
187
+ Defines the safety limits for an agent workflow.
188
+ """
189
+ budget_cents: int
190
+ rate_limit: int # cents per minute
191
+ loop_threshold: int = 6 # same tool calls in window
192
+ retry_threshold: int = 5 # retries in window
193
+ anomaly_detection_enabled: bool = True
194
+ loop_detection_enabled: bool = True
195
+ retry_detection_enabled: bool = True
196
+
197
+ @classmethod
198
+ def default_local(cls) -> "Policy":
199
+ """Default policy for local mode (free tier)."""
200
+ return cls(
201
+ budget_cents=1000, # $10
202
+ rate_limit=100,
203
+ loop_threshold=6,
204
+ retry_threshold=5,
205
+ )
206
+
207
+ @classmethod
208
+ def from_dict(cls, data: dict[str, Any]) -> "Policy":
209
+ """Create Policy from API response dict."""
210
+ return cls(
211
+ budget_cents=data.get("budget_cents", 1000),
212
+ rate_limit=data.get("rate_limit", 100),
213
+ loop_threshold=data.get("loop_threshold", 6),
214
+ retry_threshold=data.get("retry_threshold", 5),
215
+ anomaly_detection_enabled=data.get("anomaly_detection_enabled", True),
216
+ loop_detection_enabled=data.get("loop_detection_enabled", True),
217
+ retry_detection_enabled=data.get("retry_detection_enabled", True),
218
+ )
219
+
220
+
221
+ class NullRunRuntime:
222
+ """
223
+ Central runtime for NullRun SDK.
224
+
225
+ This is a singleton that manages:
226
+ - Authentication state (organization_id)
227
+ - Cached policies from backend
228
+ - Event buffering and batched transport
229
+ - Local policy enforcement
230
+
231
+ Usage:
232
+ # Automatic (via protect())
233
+ import nullrun
234
+ nullrun.protect()
235
+
236
+ # Manual
237
+ rt = NullRunRuntime.get_instance()
238
+ rt.track({"type": "llm_call", "tokens": 100, "cost_cents": 5})
239
+ """
240
+
241
+ _instance: Optional["NullRunRuntime"] = None
242
+ _lock = threading.Lock()
243
+
244
+ def __init__(
245
+ self,
246
+ api_key: str | None = None,
247
+ secret_key: str | None = None,
248
+ api_url: str = "https://api.nullrun.io",
249
+ policy: Policy | None = None,
250
+ fallback_mode: str | None = None,
251
+ debug: bool = False,
252
+ _test_mode: bool = False,
253
+ polling: bool = True,
254
+ ):
255
+ """
256
+ Initialize NullRun Runtime.
257
+
258
+ Args:
259
+ api_key: API key from NullRun dashboard. If None, reads from
260
+ NULLRUN_API_KEY env variable. If both None, uses local mode.
261
+ secret_key: Secret key for HMAC request signing. If None, no signing.
262
+ api_url: URL of NullRun proxy server. Defaults to https://api.nullrun.io.
263
+ policy: Optional policy to use. If None, fetches from backend
264
+ (cloud mode) or uses default (local mode).
265
+ debug: Enable debug logging.
266
+ _test_mode: Internal flag to skip network calls (for testing).
267
+ polling: Internal flag for tests/CI to skip the background
268
+ control-plane listener (WS or HTTP poll). Defaults True
269
+ in production. Set False when the test environment
270
+ cannot tolerate a background thread opening sockets.
271
+
272
+ Note:
273
+ - `organization_id` is set from `_authenticate()` after init; it is
274
+ NOT a public init parameter and not read from env.
275
+ - `api_key` is required as of 0.3.0 (T3-S2). The previous
276
+ `local_mode` flag was removed because it silently bypassed
277
+ every backend gate.
278
+ - `fallback_mode` is fixed at PERMISSIVE (no public override).
279
+ - `timeout`/`max_retries` are fixed at 30s / 3 (no public override).
280
+
281
+ Raises:
282
+ NullRunAuthenticationError: if neither `api_key` nor
283
+ `NULLRUN_API_KEY` is set. The public `init()` surface
284
+ performs the same check first and produces a clearer
285
+ error message; this constructor-level raise is the
286
+ direct fallback for tests and advanced callers that
287
+ build the runtime by hand.
288
+ """
289
+ self.api_key = api_key or os.getenv("NULLRUN_API_KEY")
290
+ self.secret_key = secret_key or os.getenv("NULLRUN_SECRET_KEY")
291
+ self.api_url = api_url or os.getenv("NULLRUN_API_URL", "https://api.nullrun.io")
292
+
293
+ # T3-S2 (0.3.0): api_key is now required. The previous `local_mode`
294
+ # flag silently bypassed every backend gate (budget, policy,
295
+ # control plane), which was a real safety hole in production.
296
+ # We raise NullRunAuthenticationError here instead so the
297
+ # misconfiguration is caught at startup. The public `init()`
298
+ # surface raises first with a clearer message; this is the
299
+ # direct construction path used by tests and advanced callers.
300
+ if not self.api_key:
301
+ raise NullRunAuthenticationError(
302
+ "NullRunRuntime() requires an api_key. Pass api_key='nr_live_...' "
303
+ "or set NULLRUN_API_KEY. (Silent no-op fallback was removed "
304
+ "in 0.3.0 -- see CHANGELOG.)"
305
+ )
306
+ # organization_id is set by _authenticate(); stays None until then.
307
+ self.organization_id: str | None = None
308
+ # Phase 139+: workflow_id is set by _authenticate() from the API
309
+ # key's binding (organization_api_keys.workflow_id). Used as a
310
+ # fallback for /check, /status, and span events when the user
311
+ # hasn't entered a `with workflow(...)` context. None on legacy
312
+ # keys (pre-139 or never used) -- call sites must NOT invent one.
313
+ self.workflow_id: str | None = None
314
+
315
+ self._test_mode = _test_mode
316
+ self.polling = polling
317
+
318
+ self._policy: Policy | None = policy
319
+ # Sprint 3.2: prefer the typed ``on_transport_error`` parameter
320
+ # over the legacy string ``fallback_mode`` parameter. The
321
+ # legacy string (and its NULLRUN_FALLBACK_MODE env var) is
322
+ # still honoured for one minor version, with a one-time
323
+ # ``DeprecationWarning`` so operators see the migration path.
324
+ fb_raw = fallback_mode
325
+ if fb_raw is None and os.environ.get("NULLRUN_FALLBACK_MODE"):
326
+ # Legacy env var: emit a one-time deprecation warning
327
+ # at construction. After Sprint 3.2 the env var
328
+ # continues to work (so existing deployments don't
329
+ # break) but the user is told to migrate to
330
+ # ``on_transport_error`` on ``Transport.execute()``.
331
+ import warnings as _w
332
+ _w.warn(
333
+ "NULLRUN_FALLBACK_MODE is deprecated. Pass "
334
+ "``on_transport_error=`` to ``Transport.execute()`` "
335
+ "instead (one of 'raise' | 'open' | 'closed'). "
336
+ "The env var will be removed in 0.5.0.",
337
+ DeprecationWarning,
338
+ stacklevel=2,
339
+ )
340
+ fb_raw = os.environ.get("NULLRUN_FALLBACK_MODE", "permissive")
341
+ fb_upper = str(fb_raw).upper() if fb_raw is not None else "PERMISSIVE"
342
+ if fb_upper == "STRICT":
343
+ self._fallback_mode = FallbackMode.STRICT
344
+ elif fb_upper == "CACHED":
345
+ self._fallback_mode = FallbackMode.CACHED
346
+ else:
347
+ self._fallback_mode = FallbackMode.PERMISSIVE
348
+ self._timeout = 30
349
+ self._max_retries = 3
350
+ self._debug = debug
351
+ self._transport: Transport | None = None
352
+
353
+ # Local enforcement state
354
+ # Phase 0.3.1: the BoundedDict-based per-workflow cost /
355
+ # loop / retry counters have been removed alongside
356
+ # ``_check_local_limits``. The local loop / rate checks
357
+ # (``_loop_tracker`` / ``_rate_tracker`` below) are
358
+ # independent and stay -- they do not depend on cost.
359
+ self._workflow_start_time: float = time.time()
360
+
361
+ # Local loop and rate tracking (for _local_check in track())
362
+ self._loop_tracker = LoopTracker(window_seconds=60)
363
+ self._rate_tracker = RateTracker(window_seconds=60)
364
+
365
+ # Phase D: dedup LRU. Multiple observation paths (httpx transport,
366
+ # LangChain callback, OpenAI Agents tracer) can fire for the same
367
+ # LLM call. We collapse them to a single track() per fingerprint.
368
+ # The fingerprint is computed at the observation point and passed
369
+ # via the `_fingerprint` event field.
370
+ from nullrun.instrumentation.auto import make_dedup_state
371
+ self._seen_track_fingerprints = make_dedup_state()
372
+
373
+ # Per ADR-008 the SDK does not track local cost. The two response
374
+ # fields below are kept in the return shape for backwards
375
+ # compatibility with 0.3.x callers but always read 0. The previous
376
+ # implementation read from `self._workflow_costs` (a BoundedDict
377
+ # removed in 0.3.1) which left `track()` raising AttributeError on
378
+ # first call.
379
+ self._local_cost_cents_estimate: int = 0
380
+
381
+ # Default thresholds for local check (Phase 1 - hardcoded, not from backend)
382
+ self._local_loop_threshold = 6
383
+ self._local_rate_limit = 1000 # calls per minute
384
+
385
+ # Coverage counters (Phase 3 of the production-readiness plan).
386
+ # The instrumentation layer in `nullrun.instrumentation.auto`
387
+ # calls ``_safe_bump_coverage(runtime, "_coverage_seen" /
388
+ # "_coverage_tracked" / "_coverage_streaming_skipped", host)``
389
+ # so the dashboard can show "which LLM hosts the SDK is
390
+ # seeing vs. successfully tracking". Previous versions
391
+ # relied on ``_safe_bump_coverage`` to no-op when these
392
+ # attributes were missing -- the dashboard's coverage tab
393
+ # was always empty.
394
+ self._coverage_seen: dict[str, int] = {}
395
+ self._coverage_tracked: dict[str, int] = {}
396
+ self._coverage_streaming_skipped: dict[str, int] = {}
397
+
398
+ # Remote control plane state (per-workflow, pushed from server via WS).
399
+ # Unified model: effective_state = max(local_state, remote_state).
400
+ # All writes and reads go through the `_remote_state_for` /
401
+ # `_set_remote_state` helpers (Phase 5 #5.1) so the WS callback,
402
+ # the HTTP poll, and the gate check can run concurrently
403
+ # without a TOCTOU race. RLock because the same thread can
404
+ # re-enter via the gate's get-then-set sequence.
405
+ self._remote_states: dict[str, dict[str, Any]] = {}
406
+ self._states_lock = threading.RLock()
407
+
408
+ # Phase B: control plane transport. The SDK connects to the server's
409
+ # WS endpoint and receives state push events (killed/paused) within
410
+ # ~100ms of the operator action -- vs the previous 1s HTTP poll.
411
+ # The HTTP poll path is preserved as a fallback when
412
+ # `NULLRUN_TRANSPORT=http` is set (env var defaults to `ws`).
413
+ self._transport_mode: str = os.getenv("NULLRUN_TRANSPORT", "ws").lower()
414
+ self._ws_thread: threading.Thread | None = None
415
+ self._ws_stop_event = threading.Event()
416
+ self._ws_connection: Any = None # WebSocketConnection; typed loosely to avoid import cycle
417
+ self._ws_loop: Any = None # asyncio loop running in the WS thread
418
+ # Legacy HTTP-poll state -- only used when transport mode is `http`.
419
+ self._poll_thread: threading.Thread | None = None
420
+ self._poll_running = False
421
+
422
+ # Action handling
423
+ self._action_handler: ActionHandler | None = None
424
+
425
+ # Initialize transport FIRST (before auth/policy) so we can reuse its client
426
+ # Transport will be started later after auth/policy succeed
427
+ self._transport = Transport(
428
+ api_url=self.api_url,
429
+ api_key=self.api_key,
430
+ secret_key=self.secret_key,
431
+ config=FlushConfig(
432
+ batch_size=50,
433
+ flush_interval=5.0,
434
+ ),
435
+ )
436
+
437
+ # Note: a gRPC transport was prototyped in earlier SDK versions but the
438
+ # gRPC server at the platform is intentionally frozen until the
439
+ # activation checklist (TLS, auth, proto extensions, cost pipeline
440
+ # parity, tests) is complete. The SDK no longer attempts to construct
441
+ # a gRPC client. NULLRUN_USE_GRPC is a silent no-op.
442
+ if os.getenv("NULLRUN_USE_GRPC"):
443
+ logger.info(
444
+ "NULLRUN_USE_GRPC is set but the gRPC transport is not "
445
+ "implemented in this SDK version; falling back to HTTP."
446
+ )
447
+
448
+ # Initialize
449
+ if self._test_mode:
450
+ # Test mode: skip all network calls, use local policy
451
+ self._policy = self._policy or Policy.default_local()
452
+ self._transport.start()
453
+ else:
454
+ try:
455
+ self._authenticate()
456
+ except NullRunAuthenticationError:
457
+ raise # Re-raise auth errors immediately - don't continue in unprotected mode
458
+ except httpx.RequestError as e:
459
+ raise NullRunAuthenticationError(
460
+ f"Auth request failed: {e}. Cannot establish secure connection to NullRun. "
461
+ f"Refusing to operate in unprotected mode."
462
+ ) from e
463
+ self._fetch_policy()
464
+ self._transport.start()
465
+ # Start remote polling unless disabled (internal `polling=False`
466
+ # for tests/CI). Production always polls.
467
+ if self.polling:
468
+ self._start_remote_polling()
469
+
470
+ # Initialize action handler
471
+ self._action_handler = ActionHandler()
472
+
473
+ # Phase 1.4: Sensitive tools that require strict mode (pre-execution enforcement)
474
+ # These tools MUST go through /execute endpoint, NOT direct execution
475
+ self._sensitive_tools: set = {
476
+ # Financial operations
477
+ "stripe.charge",
478
+ "stripe.refund",
479
+ "stripe.payout",
480
+ "payment.process",
481
+ # Email / communication
482
+ "send_email",
483
+ "send_sms",
484
+ "send_slack",
485
+ "send_discord",
486
+ # Database operations
487
+ "db.delete",
488
+ "db.drop",
489
+ "db.truncate",
490
+ "db.write",
491
+ # External API calls
492
+ "api.post",
493
+ "api.put",
494
+ "api.delete",
495
+ # File operations
496
+ "file.delete",
497
+ "file.write",
498
+ "s3.delete",
499
+ # Admin operations
500
+ "admin.delete",
501
+ "admin.create_user",
502
+ "admin.disable_user",
503
+ }
504
+ self._strict_mode_tools: set[str] = set()
505
+
506
+
507
+
508
+ logger.info(
509
+ f"NullRun Runtime initialized: "
510
+ f"mode=cloud, "
511
+ f"policy={self._policy}"
512
+ )
513
+
514
+ @classmethod
515
+ def get_instance(cls) -> "NullRunRuntime":
516
+ """Get the singleton runtime instance.
517
+
518
+ Thread-safe: the singleton lock is held for the full read-compare-
519
+ rebuild sequence (Phase 5 #5.3). The previous version dropped the
520
+ lock between shutdown and the recursive get_instance(), creating a
521
+ window where a concurrent caller could observe a half-shutdown
522
+ runtime.
523
+ """
524
+ with cls._lock:
525
+ # Re-read env vars at every call site so credential rotation
526
+ # is observed on the next get_instance() invocation.
527
+ api_key = os.getenv("NULLRUN_API_KEY")
528
+ api_url = os.getenv("NULLRUN_API_URL", "https://api.nullrun.io")
529
+
530
+ if cls._instance is None:
531
+ cls._instance = cls(api_key=api_key, api_url=api_url)
532
+ return cls._instance
533
+
534
+ existing = cls._instance
535
+ key_changed = api_key != existing.api_key
536
+ url_changed = api_url != existing.api_url
537
+
538
+ if key_changed or url_changed:
539
+ logger.info(
540
+ f"Credentials changed: api_key={'***' if key_changed else 'unchanged'}, "
541
+ f"api_url={'changed' if url_changed else 'unchanged'} - reinitializing"
542
+ )
543
+ existing.shutdown()
544
+ cls._instance = cls(api_key=api_key, api_url=api_url)
545
+ return cls._instance
546
+
547
+ return cls._instance
548
+
549
+ @classmethod
550
+ def reset_instance(cls) -> None:
551
+ """Reset the singleton. Mainly for testing."""
552
+ with cls._lock:
553
+ if cls._instance is not None:
554
+ cls._instance.shutdown()
555
+ cls._instance = None
556
+
557
+ def _authenticate(self) -> None:
558
+ """Authenticate with API key and get organization_id.
559
+
560
+ Also handles key version updates for HMAC secret key rotation.
561
+ On successful auth, the server may return a new key_version indicating
562
+ a secret key rotation. The SDK stores this and uses it for signing.
563
+ """
564
+ if not self.api_key:
565
+ raise BreakerError("API key required for cloud mode")
566
+
567
+ logger.debug(f"Authenticating with API at {self.api_url}/auth/verify")
568
+ try:
569
+ # Use Transport's client for connection pooling, retry, and circuit breaker
570
+ response = self._transport._client.post(
571
+ f"{self.api_url}/api/v1/auth/verify",
572
+ json={"api_key": self.api_key},
573
+ )
574
+
575
+ if response.status_code == 200:
576
+ data = response.json()
577
+ # STRICT MODE: organization_id is REQUIRED, no fallback
578
+ org_id = data.get("organization_id")
579
+ if not org_id:
580
+ raise NullRunAuthenticationError(
581
+ "Auth response missing organization_id - server may be outdated or compromised. "
582
+ "Refusing to operate with legacy identity."
583
+ )
584
+ self.organization_id = org_id
585
+
586
+ # Phase 139+: pick up the workflow this key is bound to.
587
+ # `None` on legacy keys (pre-139 or never-used) -- call
588
+ # sites that NEED a workflow (check_workflow_budget,
589
+ # check_control_plane, span events) will fall through to
590
+ # the contextvar when self.workflow_id is None, exactly
591
+ # like before. New keys always have this set.
592
+ self.workflow_id = data.get("workflow_id")
593
+
594
+ # Phase 0.3.1: pre-Phase-139 API keys do not return
595
+ # workflow_id, so the SDK cannot honour the
596
+ # dashboard's KILL/PAUSE for that workflow. Emit a
597
+ # one-time WARNING so the operator knows to rotate
598
+ # the key. Without this, the kill switch silently
599
+ # no-ops (a real safety hole for legacy users).
600
+ if self.workflow_id is None:
601
+ masked_key = (
602
+ (self.api_key[:8] + "***")
603
+ if self.api_key and len(self.api_key) >= 8
604
+ else "***"
605
+ )
606
+ logger.warning(
607
+ f"API key {masked_key!s} is a legacy key with no "
608
+ f"workflow binding; remote kill/pause will not be "
609
+ f"honoured. Rotate to a Phase 139+ key in the "
610
+ f"dashboard to enable control plane enforcement."
611
+ )
612
+
613
+ # Handle key rotation: server may return new key_version and secret_key
614
+ # This allows seamless secret key rotation without downtime
615
+ new_key_version = data.get("key_version")
616
+ new_secret_key = data.get("secret_key")
617
+
618
+ if new_key_version is not None and new_secret_key is not None:
619
+ old_version = getattr(self, '_key_version', None)
620
+ if old_version != new_key_version:
621
+ logger.info(
622
+ f"Secret key rotation: version {old_version} -> {new_key_version}"
623
+ )
624
+ self._key_version = new_key_version
625
+ self.secret_key = new_secret_key
626
+ # Update transport's secret key for subsequent requests
627
+ self._transport.secret_key = new_secret_key
628
+
629
+ logger.info(f"Authenticated: organization_id={self.organization_id}")
630
+ else:
631
+ # Auth failed - raise exception instead of silent fallback
632
+ raise NullRunAuthenticationError(
633
+ f"Auth failed with status {response.status_code}. "
634
+ f"API key may be invalid or expired. Not operating in unsafe mode."
635
+ )
636
+ except httpx.RequestError as e:
637
+ # Network error - raise exception, do not fall back silently
638
+ raise NullRunAuthenticationError(
639
+ f"Auth request failed: {e}. Cannot establish secure connection to NullRun. "
640
+ f"Refusing to operate in unprotected mode."
641
+ ) from e
642
+
643
+ def _fetch_policy(self) -> None:
644
+ """Fetch policy from backend and cache locally."""
645
+ if not self.organization_id:
646
+ self._policy = Policy.default_local()
647
+ return
648
+
649
+ try:
650
+ # Use Transport's client for connection pooling, retry, and circuit breaker
651
+ response = self._transport._client.post(
652
+ f"{self.api_url}/api/v1/policies",
653
+ json={"organization_id": self.organization_id},
654
+ )
655
+
656
+ if response.status_code == 200:
657
+ data = response.json()
658
+ if data and len(data) > 0:
659
+ self._policy = Policy.from_dict(data[0])
660
+ logger.info(f"Policy fetched: {self._policy}")
661
+ return
662
+ except Exception as e:
663
+ logger.warning(f"Failed to fetch policy: {e}")
664
+
665
+ # Fallback to default
666
+ self._policy = Policy.default_local()
667
+
668
+ def _start_transport(self) -> None:
669
+ """Start the transport layer with background flush.
670
+
671
+ Note: Transport is already created in __init__ before auth/policy.
672
+ This method only starts it.
673
+ """
674
+ if self._transport:
675
+ self._transport.start()
676
+
677
+ def _start_remote_polling(self) -> None:
678
+ """Start the control-plane background listener.
679
+
680
+ Phase B: defaults to WebSocket push for sub-second kill/pause
681
+ propagation. Set `NULLRUN_TRANSPORT=http` to fall back to the
682
+ legacy 1-second HTTP poll (kept for environments where the WS
683
+ endpoint is blocked or for parity with old SDK behavior).
684
+ """
685
+ if self._transport_mode == "http":
686
+ self._start_http_poller()
687
+ else:
688
+ self._start_ws_listener()
689
+
690
+ def _start_http_poller(self) -> None:
691
+ """Legacy: poll the server every second for state changes."""
692
+ self._poll_running = True
693
+ self._poll_thread = threading.Thread(
694
+ target=self._poll_commands,
695
+ daemon=True,
696
+ name="nullrun-poller"
697
+ )
698
+ self._poll_thread.start()
699
+ logger.info("Started remote state poller (HTTP)")
700
+
701
+ def _start_ws_listener(self) -> None:
702
+ """Phase B: connect the WebSocket push channel in a background thread.
703
+
704
+ The thread runs its own asyncio loop so the WS receive task can
705
+ drive `_remote_states` from server pushes without contending with
706
+ the user's main loop. Reconnects with exponential backoff on
707
+ disconnect (handled inside `WebSocketConnection`).
708
+ """
709
+ if not self.organization_id:
710
+ logger.warning(
711
+ "Cannot start WS control plane: organization_id is unset. "
712
+ "Falling back to HTTP poll."
713
+ )
714
+ self._start_http_poller()
715
+ return
716
+
717
+ self._ws_stop_event.clear()
718
+ self._ws_thread = threading.Thread(
719
+ target=self._ws_run,
720
+ daemon=True,
721
+ name="nullrun-ws",
722
+ )
723
+ self._ws_thread.start()
724
+ logger.info(
725
+ "Started WS control plane listener (org=%s)", self.organization_id
726
+ )
727
+
728
+ def _ws_run(self) -> None:
729
+ """Background thread entry point: run the WS connect/receive loop.
730
+
731
+ On any exception (connect refused, network drop, auth failure)
732
+ we wait on the stop event with a small backoff so the next
733
+ `_start_ws_listener` can take over without busy-looping.
734
+ """
735
+ try:
736
+ import asyncio
737
+
738
+ self._ws_loop = asyncio.new_event_loop()
739
+ asyncio.set_event_loop(self._ws_loop)
740
+ try:
741
+ self._ws_loop.run_until_complete(self._ws_connect_and_serve())
742
+ finally:
743
+ self._ws_loop.close()
744
+ self._ws_loop = None
745
+ except Exception as e: # noqa: BLE001 -- background thread, must never die silently
746
+ logger.warning(f"WS control plane thread exited: {e}")
747
+ finally:
748
+ self._ws_connection = None
749
+
750
+ async def _ws_connect_and_serve(self) -> None:
751
+ """Connect the WS once and serve messages until stop is signalled.
752
+
753
+ Uses `connect_websocket` from the existing transport, which handles
754
+ HMAC, ACK, and reconnect internally. We just need to install the
755
+ state-change callback that updates `_remote_states`.
756
+ """
757
+ if not self._transport:
758
+ logger.warning("WS control plane: transport not initialized, aborting")
759
+ return
760
+
761
+ def on_state_change(state: dict[str, Any]) -> None:
762
+ """Push state into `_remote_states` so `check_control_plane`
763
+ sees it on the next gate call. The push is synchronous (just
764
+ a dict write) so latency from server → gate is bounded only
765
+ by network + event-loop scheduling.
766
+ """
767
+ try:
768
+ workflow_id = state.get("workflow_id")
769
+ if not workflow_id:
770
+ logger.debug("WS state message missing workflow_id: %s", state)
771
+ return
772
+ self._set_remote_state(workflow_id, {
773
+ "state": state.get("state", "Normal"),
774
+ "version": state.get("version", 0),
775
+ "reason": state.get("reason"),
776
+ "updated_at": state.get("updated_at", 0),
777
+ })
778
+ logger.debug(
779
+ "WS state push: workflow=%s state=%s reason=%s",
780
+ workflow_id,
781
+ self._remote_states[workflow_id]["state"],
782
+ self._remote_states[workflow_id]["reason"],
783
+ )
784
+ except Exception as e: # noqa: BLE001
785
+ logger.warning(f"WS state callback error: {e}")
786
+
787
+ try:
788
+ conn = await self._transport.connect_websocket(
789
+ organization_id=self.organization_id,
790
+ on_state_change=on_state_change,
791
+ )
792
+ self._ws_connection = conn
793
+ except Exception as e:
794
+ logger.warning(f"WS control plane connect failed: {e}")
795
+ return
796
+
797
+ # Block until the connection closes (e.g. server disconnect).
798
+ try:
799
+ if conn._receive_task is not None: # type: ignore[attr-defined]
800
+ await conn._receive_task # type: ignore[attr-defined]
801
+ except Exception as e:
802
+ logger.debug(f"WS receive loop ended: {e}")
803
+ finally:
804
+ try:
805
+ await conn.close()
806
+ except Exception:
807
+ pass
808
+ self._ws_connection = None
809
+
810
+ def _poll_commands(self) -> None:
811
+ """
812
+ Poll server for per-workflow control plane state.
813
+
814
+ This runs in a background thread and updates _remote_states
815
+ with the latest state from the server.
816
+ """
817
+ while self._poll_running:
818
+ try:
819
+ # Get all workflows we're tracking
820
+ workflow_ids = list(self._remote_states.keys())
821
+ if not workflow_ids:
822
+ # If no workflows yet, try to get organization workflows
823
+ pass
824
+
825
+ for workflow_id in workflow_ids:
826
+ self._fetch_remote_state(workflow_id)
827
+
828
+ except Exception as e:
829
+ logger.debug(f"Polling error: {e}")
830
+
831
+ time.sleep(1.0) # Poll every second
832
+
833
+ def _resolve_workflow_id(self, explicit: str | None = None) -> str | None:
834
+ """
835
+ Resolve the effective workflow_id for /check, /status, and span
836
+ events. Order of precedence:
837
+
838
+ 1. `explicit` -- passed by the call site (e.g. contextvar in
839
+ track_event or the user-supplied arg in check_control_plane)
840
+ 2. `self.workflow_id` -- bound to the API key by the server
841
+ (Phase 139+). Set during _authenticate(). None on legacy
842
+ keys.
843
+ 3. None -- caller is in cloud mode but has no workflow scope.
844
+ /check falls through to org-level policy; /status is
845
+ skipped; span events are emitted without workflow_id
846
+ (orphan, as before).
847
+
848
+ The SDK does NOT auto-generate a workflow_id. The Phase 139
849
+ invariant -- workflow is derived server-side from the key, never
850
+ invented by the SDK -- is preserved.
851
+ """
852
+ if explicit:
853
+ return explicit
854
+ return self.workflow_id
855
+
856
+ def _remote_state_for(self, workflow_id: str) -> dict[str, Any]:
857
+ """Return the cached remote state for `workflow_id` (Phase 5 #5.1).
858
+
859
+ Thread-safe via `_states_lock`. If no state has been pushed
860
+ yet, returns an empty dict (so callers can do
861
+ ``state.get("state", "Normal")`` without an extra check).
862
+ """
863
+ with self._states_lock:
864
+ st = self._remote_states.get(workflow_id)
865
+ if st is None:
866
+ st = {}
867
+ self._remote_states[workflow_id] = st
868
+ return st
869
+
870
+ def _set_remote_state(self, workflow_id: str, state: dict[str, Any]) -> None:
871
+ """Atomically replace the cached remote state for `workflow_id`."""
872
+ with self._states_lock:
873
+ self._remote_states[workflow_id] = dict(state)
874
+
875
+ def _fetch_remote_state(self, workflow_id: str) -> None:
876
+ """Fetch remote state for a specific workflow from /status endpoint.
877
+
878
+ Phase 5 #5.5: route through ``self._transport._client`` so the
879
+ shared connection pool, retry policy, and circuit breaker
880
+ apply. The previous raw ``httpx.get`` call created a fresh
881
+ connection every time and bypassed the CB.
882
+ """
883
+ try:
884
+ response = self._transport._client.get(
885
+ f"{self.api_url}/api/v1/status/{workflow_id}",
886
+ headers=self._auth_headers(),
887
+ timeout=5.0,
888
+ )
889
+ if response.status_code == 200:
890
+ data = response.json()
891
+ self._set_remote_state(workflow_id, {
892
+ "state": data.get("state", "Normal"),
893
+ "version": data.get("version", 0),
894
+ "reason": data.get("reason"),
895
+ "updated_at": data.get("updated_at", 0),
896
+ })
897
+ logger.debug(
898
+ "Remote state for %s: %s",
899
+ workflow_id,
900
+ self._remote_state_for(workflow_id),
901
+ )
902
+ except Exception as e:
903
+ logger.debug(f"Failed to fetch remote state for {workflow_id}: {e}")
904
+
905
+ def check_control_plane(self, workflow_id: str) -> None:
906
+ """
907
+ Check remote control plane state and raise if workflow is paused/killed.
908
+
909
+ This is called in the execution path after local enforcement.
910
+ The unified state model: effective_state = max(local_state, remote_state)
911
+
912
+ Raises:
913
+ WorkflowPausedException: If workflow is paused on server
914
+ WorkflowKilledInterrupt: If workflow is killed on server
915
+ """
916
+ # Phase 139+: prefer the explicit arg (contextvar-supplied), fall
917
+ # back to the API key's bound workflow. None on legacy keys --
918
+ # in that case there's no workflow to check, so we no-op
919
+ # (preserves pre-139 behavior for keys that have never been
920
+ # workflow-bound).
921
+ resolved = self._resolve_workflow_id(workflow_id or None)
922
+ if not resolved:
923
+ return
924
+ workflow_id = resolved
925
+
926
+ # Ensure we have the latest remote state
927
+ # Phase 5 #5.1: use the lock-protected getter so a concurrent
928
+ # WS push can't drop the state between the membership check
929
+ # and the read.
930
+ remote_state = self._remote_state_for(workflow_id)
931
+ if not remote_state:
932
+ # Fetch synchronously if not in cache yet
933
+ self._fetch_remote_state(workflow_id)
934
+ remote_state = self._remote_state_for(workflow_id)
935
+ state = remote_state.get("state", "Normal")
936
+
937
+ if state == "Paused":
938
+ reason = remote_state.get("reason", "remote pause")
939
+ raise WorkflowPausedException(
940
+ workflow_id=workflow_id,
941
+ reason=reason,
942
+ )
943
+ elif state == "Killed":
944
+ reason = remote_state.get("reason", "remote kill")
945
+ raise WorkflowKilledInterrupt(
946
+ workflow_id=workflow_id,
947
+ reason=reason,
948
+ )
949
+
950
+ def check_workflow_budget(self) -> None:
951
+ """
952
+ Pre-flight budget check via /api/v1/gate. Called from @protect
953
+ before the wrapped function runs, so a workflow with no remaining
954
+ budget never gets to spend tokens.
955
+
956
+ Sprint 3.1: bumps the ``check_calls`` metric so the dashboard
957
+ can show the rate of pre-flight budget checks.
958
+
959
+ Decision → exception mapping:
960
+ "block" → WorkflowKilledInterrupt (hard policy / reservation error)
961
+ "throttle"→ WorkflowPausedException (insufficient budget, can resume)
962
+ "allow" → return
963
+
964
+ Fail-OPEN: any transport error (network, timeout, 5xx) is logged
965
+ at warning level and the caller proceeds. This mirrors the
966
+ pattern in `check_control_plane` -- a transient backend outage
967
+ must never freeze the user's agent. The /track fast path also
968
+ does not gate on budget, so the worst case under /gate failure
969
+ is that we revert to the pre-C behaviour: budget enforcement is
970
+ advisory until the gateway recovers.
971
+
972
+ Uses `estimated_tokens=1` (the minimum the API accepts). Goal
973
+ is the binary question "is there any budget left?", not cost
974
+ prediction -- the backend recomputes the authoritative cost on
975
+ /track from the real token count.
976
+
977
+ Opt-out: set `NULLRUN_SKIP_BUDGET_CHECK=1` to disable the
978
+ pre-flight. Useful in tests where the org's API key has
979
+ exhausted its budget from previous runs and the test only
980
+ wants to exercise a non-budget code path.
981
+ """
982
+ if os.environ.get("NULLRUN_SKIP_BUDGET_CHECK", "").strip() == "1":
983
+ logger.debug("check_workflow_budget: skipped via NULLRUN_SKIP_BUDGET_CHECK=1")
984
+ return
985
+
986
+ # Sprint 3.1 (B23): bump the ``check_calls`` counter so the
987
+ # dashboard can show the rate of pre-flight budget checks
988
+ # and the operator can verify the pre-flight is actually
989
+ # running (not silently always-skipped).
990
+ metrics.inc_runtime("check_calls")
991
+
992
+ from nullrun.context import get_workflow_id
993
+
994
+ # Phase 139+: prefer the user-set contextvar (explicit `with
995
+ # workflow(...)` block), fall back to the API key's bound
996
+ # workflow. Returns None only on legacy keys that have never
997
+ # been workflow-bound -- in that case the check is silently
998
+ # skipped, exactly as before this change.
999
+ workflow_id = self._resolve_workflow_id(get_workflow_id())
1000
+ if not workflow_id:
1001
+ return
1002
+
1003
+ check_req = {
1004
+ "organization_id": self.organization_id or "local",
1005
+ "execution_id": workflow_id,
1006
+ "operation_id": str(uuid.uuid4()),
1007
+ "check_type": "llm",
1008
+ "model": "budget-precheck",
1009
+ "estimated_tokens": 1,
1010
+ }
1011
+
1012
+ try:
1013
+ response = self._transport.check(check_req)
1014
+ except Exception as exc: # noqa: BLE001
1015
+ logger.warning(
1016
+ f"check_workflow_budget: /gate unavailable, failing open: {exc}"
1017
+ )
1018
+ return
1019
+
1020
+ decision = response.get("decision", "allow")
1021
+ decision_source = response.get("decision_source", DecisionSource.GATEWAY)
1022
+ # Round 3 (Phase 0.4.0): only fail-OPEN on EXPLICIT synthetic
1023
+ # responses (decision_source starts with "fallback" or is one
1024
+ # of the classified TransportErrorSource values). Real
1025
+ # backend decisions (decision_source="gateway", or missing,
1026
+ # for backward compat) are honoured.
1027
+ if decision_source.startswith("fallback") or decision_source in {
1028
+ TransportErrorSource.NETWORK_ERROR,
1029
+ TransportErrorSource.GATEWAY_ERROR,
1030
+ TransportErrorSource.BREAKER_OPEN,
1031
+ TransportErrorSource.AUTH_ERROR,
1032
+ }:
1033
+ logger.debug(
1034
+ f"check_workflow_budget: synthetic decision_source="
1035
+ f"{decision_source!r}, treating as transport error"
1036
+ )
1037
+ return
1038
+ if decision == "block":
1039
+ reasons = response.get("explanations") or ["block"]
1040
+ # Sprint 3 follow-up (B23): bump ``cost_limit_exceeded``
1041
+ # when the pre-flight blocks the workflow. The counter
1042
+ # is the operator's primary signal for "the budget
1043
+ # cap is biting" — distinct from loop / retry / rate
1044
+ # which have their own counters.
1045
+ metrics.inc_runtime("cost_limit_exceeded")
1046
+ raise WorkflowKilledInterrupt(
1047
+ workflow_id=workflow_id,
1048
+ reason="; ".join(reasons),
1049
+ )
1050
+ if decision == "throttle":
1051
+ reasons = response.get("explanations") or ["throttle"]
1052
+ raise WorkflowPausedException(
1053
+ workflow_id=workflow_id,
1054
+ reason="; ".join(reasons),
1055
+ )
1056
+
1057
+ def _auth_headers(self) -> dict[str, str]:
1058
+ """Get authentication headers."""
1059
+ headers = {"Content-Type": "application/json"}
1060
+ if self.api_key:
1061
+ headers["X-API-Key"] = self.api_key
1062
+ return headers
1063
+
1064
+ def shutdown(self) -> None:
1065
+ """Shutdown runtime gracefully."""
1066
+ # Stop the HTTP poller (legacy path) if it was started.
1067
+ self._poll_running = False
1068
+ if self._poll_thread and self._poll_thread.is_alive():
1069
+ # Phase 6 #6.3: cap to 0.5s (was 2.0s) so a SIGTERM
1070
+ # handler returns quickly. The HTTP-poll is best-effort
1071
+ # and the WS push channel is the authoritative source.
1072
+ self._poll_thread.join(timeout=0.5)
1073
+
1074
+ # Stop the WS control plane listener (Phase B). Closing the
1075
+ # connection causes the receive task to unblock, the loop to
1076
+ # exit, and the thread to terminate.
1077
+ self._ws_stop_event.set()
1078
+ conn = self._ws_connection
1079
+ if conn is not None and self._ws_loop is not None:
1080
+ try:
1081
+ future = asyncio.run_coroutine_threadsafe(conn.close(), self._ws_loop)
1082
+ future.result(timeout=2.0)
1083
+ except Exception as e:
1084
+ logger.debug(f"WS close on shutdown failed (best-effort): {e}")
1085
+ if self._ws_thread and self._ws_thread.is_alive():
1086
+ self._ws_thread.join(timeout=0.5)
1087
+
1088
+ if self._transport:
1089
+ self._transport.stop()
1090
+ NullRunRuntime._instance = None
1091
+ logger.info("NullRun Runtime shutdown")
1092
+
1093
+ @property
1094
+ def policy(self) -> Policy:
1095
+ """Get current policy."""
1096
+ return self._policy or Policy.default_local()
1097
+
1098
+ def track(
1099
+ self,
1100
+ event: dict[str, Any],
1101
+ ) -> dict[str, Any]:
1102
+ """
1103
+ Track a cost event.
1104
+
1105
+ This is the main API for recording events. It:
1106
+ 1. Adds workflow_id, trace_id, span_id from context
1107
+ 2. Runs local check FIRST (no network round-trip)
1108
+ 3. If local check passes, records and sends to backend
1109
+ 4. If local check blocks, returns blocked response immediately
1110
+
1111
+ Args:
1112
+ event: Event dict with keys like:
1113
+ - type: "llm_call" | "tool_call" | "workflow_start" | "workflow_end"
1114
+ - tokens: int
1115
+ - tool_name: str (optional)
1116
+ - is_retry: bool (optional)
1117
+ - latency_ms: int (optional)
1118
+ - metadata: dict (optional)
1119
+
1120
+ Note:
1121
+ `cost_cents` is NOT a valid event key -- the SDK does not
1122
+ estimate cost. The backend computes it from tokens + the
1123
+ organization's policy.
1124
+
1125
+ Returns:
1126
+ Dict with enforcement results:
1127
+ - allowed: bool
1128
+ - actions: list of actions taken
1129
+ - local_cost: current local cost
1130
+ - blocked_reason: str (if blocked locally)
1131
+ - blocked_suggestion: str (if blocked locally)
1132
+
1133
+ Note:
1134
+ Local block reasons (loop detected, retry storm, rate
1135
+ limit, cost limit) are reported via the returned dict's
1136
+ ``blocked`` / ``blocked_reason`` / ``blocked_suggestion``
1137
+ fields rather than by raising an exception. The
1138
+ exception-raising variants of these conditions were
1139
+ removed in 0.4.0 because they had no in-tree callers;
1140
+ see ``nullrun.breaker.exceptions`` for the list.
1141
+ """
1142
+ logger.debug(f"Tracking event: {event.get('event_type', 'unknown')}")
1143
+
1144
+ # Phase D: dedup gate. The httpx transport, LangChain callback, and
1145
+ # OpenAI Agents tracer can all fire for the same LLM call. We drop
1146
+ # repeats keyed by `_fingerprint` (set by the observation path) so
1147
+ # each unique call produces exactly one /api/v1/track POST.
1148
+ fp = event.get("_fingerprint")
1149
+ if fp:
1150
+ from nullrun.instrumentation.auto import _fingerprint_is_seen
1151
+ if _fingerprint_is_seen(self._seen_track_fingerprints, fp):
1152
+ logger.debug("track() dedup hit for fingerprint=%s", fp)
1153
+ return {
1154
+ "allowed": True,
1155
+ "actions": [],
1156
+ "local_cost_cents": self._local_cost_cents_estimate,
1157
+ "deduped": True,
1158
+ }
1159
+
1160
+ # Phase 1: LOCAL CHECK FIRST (before any network call)
1161
+ # This provides instant blocking without round-trip latency
1162
+ local_decision = self._local_check(event)
1163
+ if not local_decision.allowed:
1164
+ # Blocked locally - return immediately without backend call
1165
+ logger.debug(f"Local check blocked: {local_decision.reason}")
1166
+ return {
1167
+ "allowed": False,
1168
+ "actions": ["block"],
1169
+ "blocked_reason": local_decision.reason,
1170
+ "blocked_suggestion": local_decision.suggestion,
1171
+ "local_cost_cents": 0,
1172
+ }
1173
+
1174
+ # Local check passed - record the call BEFORE sending to backend
1175
+ tool_name = event.get('tool_name', 'unknown')
1176
+ self._loop_tracker.record(tool_name)
1177
+ self._rate_tracker.record()
1178
+
1179
+ # Enrich event with context
1180
+ enriched = self._enrich_event(event)
1181
+ logger.debug(
1182
+ "Event enriched: workflow_id=%s, tokens=%s",
1183
+ enriched.get("workflow_id"),
1184
+ enriched.get("tokens"),
1185
+ )
1186
+
1187
+ # Register workflow for remote state polling. workflow_id
1188
+ # may be None on legacy keys -- that's fine, the no-op
1189
+ # branch in check_control_plane will skip polling.
1190
+ workflow_id = enriched.get("workflow_id")
1191
+ if workflow_id:
1192
+ with self._states_lock:
1193
+ self._remote_states.setdefault(workflow_id, {})
1194
+
1195
+ # Phase 0.3.1: the local cost / loop / retry-storm check
1196
+ # (``_check_local_limits``) has been removed. It read
1197
+ # ``event.get("cost_cents", 0)`` and accumulated into a
1198
+ # per-workflow counter, but ``track_llm`` /
1199
+ # ``track_tool`` / ``track_event`` never set ``cost_cents``
1200
+ # (the SDK does not estimate cost -- the backend does). The
1201
+ # local check therefore never fired for the public API
1202
+ # and silently drifted from the backend's authoritative
1203
+ # cost. The local loop / rate checks (``_local_check``)
1204
+ # are independent and stay -- they do not depend on cost.
1205
+ # Budget enforcement is now exclusively the backend's
1206
+ # job: ``check_workflow_budget`` (pre-flight) + the
1207
+ # server-side /track cost ledger reconciliation.
1208
+
1209
+ # Check remote control plane (after local enforcement)
1210
+ # This catches server-initiated pause/kill. Resolves
1211
+ # contextvar → self.workflow_id → no-op (legacy keys).
1212
+ self.check_control_plane(workflow_id)
1213
+
1214
+ # Buffer for transport. The wire payload must NOT include
1215
+ # cost_cents -- the SDK does not estimate cost; the backend
1216
+ # recomputes it from tokens + the org's policy. The
1217
+ # sink-only ``_fingerprint`` field is also stripped before
1218
+ # the wire send so the dedup key shape is not leaked to
1219
+ # anyone with audit-log access.
1220
+ wire_event = {
1221
+ k: v for k, v in enriched.items()
1222
+ if k not in ("cost_cents", "_fingerprint")
1223
+ }
1224
+ self._transport.track(wire_event)
1225
+
1226
+ # Update metrics (thread-safe)
1227
+ metrics.inc_runtime("track_calls")
1228
+
1229
+ return {
1230
+ "allowed": True,
1231
+ "actions": [],
1232
+ "local_cost_cents": self._local_cost_cents_estimate,
1233
+ }
1234
+
1235
+ def _trigger_action(
1236
+ self,
1237
+ action: ActionType,
1238
+ workflow_id: str,
1239
+ reason: str,
1240
+ ) -> None:
1241
+ """
1242
+ Trigger a protective action.
1243
+
1244
+ This executes the action through the action handler.
1245
+ """
1246
+ if self._action_handler:
1247
+ try:
1248
+ self._action_handler.handle(action.value, workflow_id, reason)
1249
+ except Exception as e:
1250
+ logger.debug(f"Action handler raised: {e}")
1251
+ # Let the exception propagate
1252
+
1253
+ # =============================================================================
1254
+ # Phase 1.4: Pre-Execution Enforcement (SDK Boundary Fix)
1255
+ # =============================================================================
1256
+
1257
+ def is_sensitive_tool(self, tool_name: str) -> bool:
1258
+ """
1259
+ Check if a tool is sensitive (requires strict mode).
1260
+
1261
+ Sensitive tools MUST go through /execute endpoint for pre-execution
1262
+ enforcement. They cannot be executed directly.
1263
+
1264
+ Args:
1265
+ tool_name: Name of the tool
1266
+
1267
+ Returns:
1268
+ True if tool requires strict mode
1269
+ """
1270
+ return tool_name in self._sensitive_tools or tool_name in self._strict_mode_tools
1271
+
1272
+ def coverage_report(self) -> dict[str, dict[str, int]]:
1273
+ """
1274
+ Snapshot of the LLM-host coverage counters that the auto-
1275
+ instrumentation layer maintains. The SDK tracks three
1276
+ counters per host:
1277
+
1278
+ - ``seen`` -- every LLM host the SDK observed a request to.
1279
+ - ``tracked`` -- hosts whose response was successfully
1280
+ extracted and emitted as an ``llm_call`` event.
1281
+ - ``streaming_skipped`` -- hosts whose response was a
1282
+ streaming SSE / ``stream=True`` and was deliberately
1283
+ NOT buffered (so the user keeps their chunked read).
1284
+
1285
+ The same payload is sent over the WebSocket heartbeat every
1286
+ 60s and via the HTTP-fallback path when the WS connection
1287
+ is down. The dashboard's coverage tab uses these counters
1288
+ to surface "we know about this host but cannot track it" --
1289
+ the leading indicator that an SDK upgrade is needed.
1290
+
1291
+ Returns:
1292
+ ``{"seen": {...}, "tracked": {...},
1293
+ "streaming_skipped": {...}}``. Each value is a fresh
1294
+ ``dict`` so callers can mutate the result without
1295
+ affecting the runtime's internal state.
1296
+ """
1297
+ return {
1298
+ "seen": dict(self._coverage_seen),
1299
+ "tracked": dict(self._coverage_tracked),
1300
+ "streaming_skipped": dict(self._coverage_streaming_skipped),
1301
+ }
1302
+
1303
+ def get_org_status(self, org_id: str | None = None) -> dict[str, Any]:
1304
+ """Public helper for reading ``/api/v1/orgs/{org_id}/status``.
1305
+
1306
+ Phase 8 #8.1: routes through ``self._transport._client`` so
1307
+ the shared connection pool, retry policy, and circuit breaker
1308
+ apply. Used by ``examples/cost_dashboard.py``.
1309
+
1310
+ Args:
1311
+ org_id: Optional organisation ID. Defaults to the runtime's
1312
+ ``self.organization_id`` (set during ``_authenticate``).
1313
+
1314
+ Returns:
1315
+ Parsed JSON dict of the org-status payload.
1316
+
1317
+ Raises:
1318
+ NullRunAuthenticationError: if neither ``org_id`` nor
1319
+ ``self.organization_id`` is available.
1320
+ httpx.HTTPError: on transport failure.
1321
+ """
1322
+ resolved = org_id or self.organization_id
1323
+ if not resolved:
1324
+ raise NullRunAuthenticationError(
1325
+ "get_org_status requires org_id (or a runtime bound to one)"
1326
+ )
1327
+ response = self._transport._client.get(
1328
+ f"{self.api_url}/api/v1/orgs/{resolved}/status",
1329
+ headers=self._auth_headers(),
1330
+ timeout=10.0,
1331
+ )
1332
+ response.raise_for_status()
1333
+ return response.json() # type: ignore[no-any-return]
1334
+
1335
+ def add_sensitive_tool(self, tool_name: str) -> None:
1336
+ """
1337
+ Add a tool to the sensitive tools list.
1338
+
1339
+ Sensitive tools require strict mode enforcement and must go through
1340
+ the /execute endpoint for pre-execution policy evaluation.
1341
+
1342
+ Args:
1343
+ tool_name: Name of the tool to mark as sensitive
1344
+
1345
+ Example:
1346
+ runtime = NullRunRuntime.get_instance()
1347
+ runtime.add_sensitive_tool("my.custom_tool")
1348
+ """
1349
+ self._strict_mode_tools.add(tool_name)
1350
+
1351
+ def remove_sensitive_tool(self, tool_name: str) -> None:
1352
+ """
1353
+ Remove a tool from the sensitive tools list.
1354
+
1355
+ Args:
1356
+ tool_name: Name of the tool to remove from sensitive list
1357
+
1358
+ Example:
1359
+ runtime = NullRunRuntime.get_instance()
1360
+ runtime.remove_sensitive_tool("my.custom_tool")
1361
+ """
1362
+ self._strict_mode_tools.discard(tool_name)
1363
+
1364
+ def register_sensitive_tools(self, tool_names: list[str]) -> None:
1365
+ """
1366
+ Register multiple tools as sensitive.
1367
+
1368
+ Args:
1369
+ tool_names: List of tool names to mark as sensitive
1370
+
1371
+ Example:
1372
+ runtime = NullRunRuntime.get_instance()
1373
+ runtime.register_sensitive_tools([
1374
+ "stripe.charge",
1375
+ "payment.process",
1376
+ "send_email",
1377
+ ])
1378
+ """
1379
+ for tool_name in tool_names:
1380
+ self._strict_mode_tools.add(tool_name)
1381
+
1382
+ def get_sensitive_tools(self) -> set[str]:
1383
+ """
1384
+ Get all currently registered sensitive tools.
1385
+
1386
+ Returns:
1387
+ Set of sensitive tool names (includes both built-in and custom)
1388
+ """
1389
+ return self._sensitive_tools | self._strict_mode_tools
1390
+
1391
+ def execute(
1392
+ self,
1393
+ tool_name: str,
1394
+ input_data: dict[str, Any],
1395
+ mode: str = "auto",
1396
+ on_transport_error: Callable[[Exception], dict[str, Any]] | None = None,
1397
+ ) -> dict[str, Any]:
1398
+ """
1399
+ Pre-execution policy evaluation via /execute endpoint.
1400
+
1401
+ This is the PRIMARY enforcement point for sensitive tools.
1402
+ Decision is made BEFORE execution.
1403
+
1404
+ Args:
1405
+ tool_name: Name of the tool to execute
1406
+ input_data: Tool input parameters
1407
+ mode: Execution mode ("auto", "inline", "strict")
1408
+ - "auto": auto-select based on tool risk
1409
+ - "inline": force fast path (non-sensitive tools only)
1410
+ - "strict": force gateway roundtrip
1411
+
1412
+ Returns:
1413
+ Dict with:
1414
+ - decision: "allow" | "block" | "flag" | "pause" | "require_approval"
1415
+ - decision_source: "gateway" | "cached" | "fallback"
1416
+ - explanation: Human-readable explanation
1417
+ - policy_version: Policy version used
1418
+ - decision_context: Context used for the decision (for decision-history audit)
1419
+
1420
+ Raises:
1421
+ NullRunBlockedException: If decision is "block"
1422
+ """
1423
+ from nullrun.context import get_trace_id, get_workflow_id
1424
+
1425
+ organization_id = self.organization_id or "local"
1426
+ workflow_id = get_workflow_id()
1427
+ trace_id = get_trace_id() or str(uuid.uuid4())
1428
+
1429
+ # Auto-select mode: sensitive tools always use strict
1430
+ if mode == "auto":
1431
+ if self.is_sensitive_tool(tool_name):
1432
+ mode = "strict"
1433
+ else:
1434
+ mode = "inline"
1435
+
1436
+ # For inline mode with non-sensitive tools, skip execute and use local enforcement
1437
+ if mode == "inline" and not self.is_sensitive_tool(tool_name):
1438
+ return {
1439
+ "decision": "allow",
1440
+ "decision_source": DecisionSource.LOCAL,
1441
+ "explanation": "Inline mode: local enforcement only",
1442
+ "policy_version": 0,
1443
+ "allow_execution": True,
1444
+ }
1445
+
1446
+ # Strict mode or sensitive tool: call /execute endpoint
1447
+ # (no local_mode branch -- api_key is now required, see T3-S2)
1448
+ result = self._transport.execute(
1449
+ organization_id=organization_id,
1450
+ execution_id=workflow_id,
1451
+ trace_id=trace_id,
1452
+ tool=tool_name,
1453
+ input_data=input_data,
1454
+ mode=mode,
1455
+ fallback_mode=self._fallback_mode,
1456
+ on_transport_error=on_transport_error,
1457
+ )
1458
+
1459
+ # Update metrics (thread-safe)
1460
+ metrics.inc_runtime("execute_calls")
1461
+
1462
+ # Check if execution is allowed
1463
+ if result.get("decision") == "block":
1464
+ metrics.inc_runtime("execute_blocked")
1465
+ raise NullRunBlockedException(
1466
+ workflow_id=workflow_id or UNKNOWN_WORKFLOW_ID,
1467
+ reason=result.get("explanation", "policy violation"),
1468
+ tool_name=tool_name,
1469
+ )
1470
+
1471
+ metrics.inc_runtime("execute_allowed")
1472
+ return result
1473
+
1474
+ def start_recording(self, workflow_id: str, metadata: dict[str, Any] = None) -> str:
1475
+ """
1476
+ Start recording events for local decision history.
1477
+
1478
+ Args:
1479
+ workflow_id: ID of the workflow to record
1480
+ metadata: Optional metadata about the session
1481
+
1482
+ Returns:
1483
+ session_id for this recording
1484
+ """
1485
+ # Sprint 2.1: local decision-history recorder was removed.
1486
+ # This method is kept as a no-op stub for one minor
1487
+ # version to avoid breaking callers that imported it. It
1488
+ # will be deleted in the next release.
1489
+ logger.debug(
1490
+ "runtime.start_recording() is a no-op; "
1491
+ "decision history moved to the backend dashboard."
1492
+ )
1493
+ return ""
1494
+
1495
+ def stop_recording(self):
1496
+ """
1497
+ Stop recording and return the session.
1498
+
1499
+ Returns:
1500
+ The recorded session, or None if not recording
1501
+ """
1502
+ # Sprint 2.1: paired no-op stub for start_recording().
1503
+ return None
1504
+
1505
+ def _enrich_event(self, event: dict[str, Any]) -> dict[str, Any]:
1506
+ """Add context fields to event."""
1507
+ enriched = dict(event) # Don't modify original
1508
+
1509
+ # Phase 139+: workflow_id from context, else from the API
1510
+ # key's binding (set in _authenticate). Stays unset on legacy
1511
+ # keys -- emitted events then carry no workflow_id (orphan, as
1512
+ # before this change).
1513
+ if "workflow_id" not in enriched:
1514
+ wf_id = self._resolve_workflow_id(get_workflow_id())
1515
+ if wf_id:
1516
+ enriched["workflow_id"] = wf_id
1517
+
1518
+ # Add trace context
1519
+ if "trace_id" not in enriched:
1520
+ trace_id = get_trace_id() or generate_trace_id()
1521
+ enriched["trace_id"] = trace_id
1522
+
1523
+ if "span_id" not in enriched:
1524
+ span_id = get_span_id() or generate_span_id()
1525
+ enriched["span_id"] = span_id
1526
+
1527
+ # Add agent_id from context (for per-agent cost attribution)
1528
+ if "agent_id" not in enriched:
1529
+ agent_id = get_agent_id()
1530
+ if agent_id:
1531
+ enriched["agent_id"] = agent_id
1532
+
1533
+ # Add attempt_index from context (for retry correlation)
1534
+ if "attempt_index" not in enriched:
1535
+ attempt_index = get_attempt_index()
1536
+ if attempt_index > 0: # Only add if not default (first attempt)
1537
+ enriched["attempt_index"] = attempt_index
1538
+
1539
+ # Add type if not present
1540
+ if "type" not in enriched:
1541
+ enriched["type"] = "event"
1542
+
1543
+ # Add required fields with defaults
1544
+ if "is_retry" not in enriched:
1545
+ enriched["is_retry"] = False
1546
+
1547
+ if "operation_name" not in enriched:
1548
+ enriched["operation_name"] = None
1549
+
1550
+ return enriched
1551
+
1552
+ def _local_check(self, event: dict[str, Any]) -> LocalDecision:
1553
+ """
1554
+ Local check BEFORE sending to backend.
1555
+
1556
+ This runs before the event is sent to the backend and provides
1557
+ instant blocking without network round-trip.
1558
+
1559
+ Args:
1560
+ event: Event dict with tool_name
1561
+
1562
+ Returns:
1563
+ LocalDecision with allowed/blocked status
1564
+ """
1565
+ tool_name = event.get('tool_name', 'unknown')
1566
+
1567
+ # Check loop count (6 same tool calls in 60s window)
1568
+ loop_count = self._loop_tracker.count(tool_name, window=60)
1569
+ if loop_count >= self._local_loop_threshold:
1570
+ # Sprint 3.1 (B23): bump the ``loop_detections`` counter
1571
+ # so an SRE can alert on a sudden spike (often a sign
1572
+ # of an agent stuck in a retry loop).
1573
+ metrics.inc_runtime("loop_detections")
1574
+ return LocalDecision(
1575
+ allowed=False,
1576
+ reason="loop_detected",
1577
+ suggestion="retry after 60s"
1578
+ )
1579
+
1580
+ # Check rate limit (max 1000/min default)
1581
+ if self._rate_tracker.exceeds_limit(self._local_rate_limit):
1582
+ return LocalDecision(
1583
+ allowed=False,
1584
+ reason="rate_limit",
1585
+ suggestion="slow down"
1586
+ )
1587
+
1588
+ return LocalDecision(allowed=True)
1589
+
1590
+ def track_llm(
1591
+ self,
1592
+ input_tokens: int,
1593
+ output_tokens: int = 0,
1594
+ *,
1595
+ model: str | None = None,
1596
+ latency_ms: int | None = None,
1597
+ metadata: dict[str, Any] | None = None,
1598
+ ) -> dict[str, Any]:
1599
+ """
1600
+ Track an LLM call. Pulls the active SpanContext from contextvars
1601
+ automatically so the backend can attribute the call to the right
1602
+ span (e.g. the one created by `@protect`).
1603
+
1604
+ Args:
1605
+ input_tokens: Number of input / prompt tokens.
1606
+ output_tokens: Number of output / completion tokens. Defaults
1607
+ to 0 -- embeddings and reasoning-only calls have no
1608
+ completion token count.
1609
+ model: Model name, e.g. "gpt-4o-mini".
1610
+ latency_ms: Request latency in milliseconds.
1611
+ metadata: Arbitrary key-value pairs.
1612
+
1613
+ Returns:
1614
+ Track result dict from the runtime.
1615
+
1616
+ Note:
1617
+ `cost_cents` is no longer a parameter. The backend computes
1618
+ it from `input_tokens` + `output_tokens` + the org's pricing
1619
+ policy. Splitting prompt vs completion matters because most
1620
+ models price them differently.
1621
+ """
1622
+ # Lazy import to keep the runtime import graph acyclic --
1623
+ # `nullrun.tracing` deliberately has no SDK-side dependencies.
1624
+ from nullrun.tracing import get_current_span
1625
+
1626
+ event: dict[str, Any] = {
1627
+ "type": "llm_call",
1628
+ "input_tokens": input_tokens,
1629
+ "output_tokens": output_tokens,
1630
+ "tokens": input_tokens + output_tokens,
1631
+ }
1632
+ if model:
1633
+ event["model"] = model
1634
+ if latency_ms is not None:
1635
+ event["latency_ms"] = latency_ms
1636
+ if metadata:
1637
+ event["metadata"] = metadata
1638
+
1639
+ # Auto-tag the event with the active span so the backend can
1640
+ # render this call under the right node in the trace timeline.
1641
+ # If no @protect / manual set_span is active, span is None and
1642
+ # the field is omitted -- _enrich_event will fall back to the
1643
+ # loose contextvars or generate fresh IDs.
1644
+ span = get_current_span()
1645
+ if span is not None:
1646
+ event["trace_id"] = span.trace_id
1647
+ event["span_id"] = span.span_id
1648
+ event["parent_span_id"] = span.parent_span_id
1649
+ event["depth"] = span.depth
1650
+
1651
+ return self.track(event)
1652
+
1653
+ def track_tool(
1654
+ self,
1655
+ tool_name: str,
1656
+ duration_ms: int | None = None,
1657
+ *,
1658
+ is_retry: bool = False,
1659
+ metadata: dict[str, Any] | None = None,
1660
+ ) -> dict[str, Any]:
1661
+ """
1662
+ Track a tool call. Pulls the active SpanContext from contextvars
1663
+ automatically -- see `track_llm` for the rationale.
1664
+
1665
+ Args:
1666
+ tool_name: Name of the tool called.
1667
+ duration_ms: Execution duration in milliseconds.
1668
+ is_retry: Whether this is a retry attempt.
1669
+ metadata: Arbitrary key-value pairs.
1670
+
1671
+ Returns:
1672
+ Track result dict from the runtime.
1673
+
1674
+ Note:
1675
+ `cost_cents` is no longer a parameter. Tool cost is derived
1676
+ from `duration_ms` + the org's policy (or left at 0 if the
1677
+ org doesn't bill tools). `duration_ms` is the public field
1678
+ name; the wire field is `latency_ms` for backward compat
1679
+ with backend consumers.
1680
+ """
1681
+ from nullrun.tracing import get_current_span
1682
+
1683
+ event: dict[str, Any] = {
1684
+ "type": "tool_call",
1685
+ "tool_name": tool_name,
1686
+ "is_retry": is_retry,
1687
+ }
1688
+ if duration_ms is not None:
1689
+ event["latency_ms"] = duration_ms
1690
+ if metadata:
1691
+ event["metadata"] = metadata
1692
+
1693
+ span = get_current_span()
1694
+ if span is not None:
1695
+ event["trace_id"] = span.trace_id
1696
+ event["span_id"] = span.span_id
1697
+ event["parent_span_id"] = span.parent_span_id
1698
+ event["depth"] = span.depth
1699
+
1700
+ return self.track(event)
1701
+
1702
+ def track_event(
1703
+ self,
1704
+ event_type: str,
1705
+ **kwargs,
1706
+ ) -> dict[str, Any]:
1707
+ """
1708
+ Generic event tracking.
1709
+
1710
+ Args:
1711
+ event_type: Type of event ("workflow_start", "workflow_end", etc.)
1712
+ **kwargs: Additional event fields
1713
+
1714
+ Returns:
1715
+ Track result dict
1716
+ """
1717
+ event = {"type": event_type, **kwargs}
1718
+ # Backend's SdkTrackRequest requires `tokens: u64` (non-Optional).
1719
+ # Span-lifecycle events (span_start / span_end) don't have a
1720
+ # token count -- they're bookkeeping, not consumption. Default
1721
+ # to 0 so the deserializer accepts the event; the cost
1722
+ # computation in the handler treats 0 tokens as no-op.
1723
+ event.setdefault("tokens", 0)
1724
+ # Phase 3: emit a stable fingerprint so the dedup LRU at
1725
+ # the track() sink can collapse repeat emissions of the
1726
+ # same event (e.g. when the user calls track_event manually
1727
+ # AND the httpx transport hook fires for the same LLM
1728
+ # call). Field is stripped before wire send (see
1729
+ # ``_strip_wire_only_fields``).
1730
+ if "_fingerprint" not in event:
1731
+ from nullrun.instrumentation.auto import (
1732
+ _fingerprint_for_event_dict,
1733
+ )
1734
+ event["_fingerprint"] = _fingerprint_for_event_dict(event)
1735
+ return self.track(event)
1736
+
1737
+
1738
+ # Module-level convenience functions
1739
+ _runtime: NullRunRuntime | None = None
1740
+
1741
+
1742
+ def get_runtime() -> NullRunRuntime:
1743
+ """Get or create the global runtime instance."""
1744
+ global _runtime
1745
+ if _runtime is None:
1746
+ _runtime = NullRunRuntime.get_instance()
1747
+ return _runtime
1748
+
1749
+
1750
+ def track(event: dict[str, Any]) -> dict[str, Any]:
1751
+ """
1752
+ Module-level track function.
1753
+
1754
+ Usage:
1755
+ from nullrun import track
1756
+
1757
+ track({"type": "llm_call", "tokens": 100, "cost_cents": 5})
1758
+ """
1759
+ return get_runtime().track(event)
1760
+
1761
+
1762
+ # Phase 3.4: explicit alias for `track()` -- same call signature, friendlier
1763
+ # name for users who reach for `track_event` first. Both names share the
1764
+ # same callable object, so `nullrun.track is nullrun.track_event` is True.
1765
+ track_event = track
1766
+
1767
+
1768
+ def track_llm(
1769
+ input_tokens: int,
1770
+ output_tokens: int = 0,
1771
+ **kwargs,
1772
+ ) -> dict[str, Any]:
1773
+ """Module-level LLM tracking.
1774
+
1775
+ Forwards to `NullRunRuntime.track_llm`. The active SpanContext (if
1776
+ any) is attached to the event automatically so the backend can
1777
+ render the call under the right span.
1778
+
1779
+ Args:
1780
+ input_tokens: Number of input / prompt tokens.
1781
+ output_tokens: Number of output / completion tokens. Defaults
1782
+ to 0 -- embeddings and reasoning-only calls have no
1783
+ completion token count.
1784
+ **kwargs: Forwarded to `NullRunRuntime.track_llm` (model,
1785
+ latency_ms, metadata).
1786
+ """
1787
+ return get_runtime().track_llm(input_tokens, output_tokens, **kwargs)
1788
+
1789
+
1790
+ def track_tool(
1791
+ tool_name: str,
1792
+ duration_ms: int | None = None,
1793
+ **kwargs,
1794
+ ) -> dict[str, Any]:
1795
+ """Module-level tool tracking.
1796
+
1797
+ Forwards to `NullRunRuntime.track_tool`. The active SpanContext
1798
+ (if any) is attached to the event automatically.
1799
+
1800
+ Args:
1801
+ tool_name: Name of the tool
1802
+ duration_ms: How long the tool call took
1803
+ **kwargs: Forwarded to `NullRunRuntime.track_tool` (is_retry,
1804
+ metadata).
1805
+ """
1806
+ return get_runtime().track_tool(tool_name, duration_ms=duration_ms, **kwargs)