nullrun 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nullrun/__init__.py +282 -0
- nullrun/__version__.py +4 -0
- nullrun/actions.py +455 -0
- nullrun/breaker/__init__.py +27 -0
- nullrun/breaker/circuit_breaker.py +402 -0
- nullrun/breaker/exceptions.py +319 -0
- nullrun/context.py +208 -0
- nullrun/decorators.py +649 -0
- nullrun/instrumentation/__init__.py +23 -0
- nullrun/instrumentation/_safe_patch.py +99 -0
- nullrun/instrumentation/auto.py +1095 -0
- nullrun/instrumentation/auto_requests.py +257 -0
- nullrun/instrumentation/autogen.py +163 -0
- nullrun/instrumentation/crewai.py +140 -0
- nullrun/instrumentation/langgraph.py +412 -0
- nullrun/instrumentation/llama_index.py +110 -0
- nullrun/observability.py +160 -0
- nullrun/py.typed +0 -0
- nullrun/runtime.py +1806 -0
- nullrun/toolbox/__init__.py +20 -0
- nullrun/toolbox/langgraph.py +94 -0
- nullrun/tracing.py +155 -0
- nullrun/transport.py +1509 -0
- nullrun/transport_websocket.py +627 -0
- nullrun-0.4.0.dist-info/METADATA +194 -0
- nullrun-0.4.0.dist-info/RECORD +28 -0
- nullrun-0.4.0.dist-info/WHEEL +4 -0
- nullrun-0.4.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LangGraph instrumentation primitives for NullRun SDK.
|
|
3
|
+
|
|
4
|
+
This module ships the LangChain-compatible `NullRunCallback` —
|
|
5
|
+
the low-level handler that:
|
|
6
|
+
|
|
7
|
+
1. Extracts `input_tokens` / `output_tokens` from LLM responses
|
|
8
|
+
and forwards them to the runtime's `track()` method (so the
|
|
9
|
+
backend can compute cost from the org's pricing policy).
|
|
10
|
+
2. Emits `span_start` / `span_end` events for chain / tool /
|
|
11
|
+
agent runs so the dashboard reconstructs the agent tree
|
|
12
|
+
(not just LLM cost). Nested runs become a parent/child span
|
|
13
|
+
tree via `parent_run_id` → active-span lookup.
|
|
14
|
+
|
|
15
|
+
The user-facing helper that wires this callback onto a compiled
|
|
16
|
+
LangGraph app lives at `nullrun.toolbox.langgraph.wrapper` (the
|
|
17
|
+
manual escape hatch). For automatic attachment, see
|
|
18
|
+
`nullrun.instrumentation.auto.patch_langgraph_compiled` — that
|
|
19
|
+
is what `nullrun.init()` installs when `langgraph` is importable,
|
|
20
|
+
so the user does NOT need to call `wrapper()` explicitly.
|
|
21
|
+
|
|
22
|
+
Callers who want raw access to the callback can still import it
|
|
23
|
+
from this module:
|
|
24
|
+
|
|
25
|
+
from nullrun.instrumentation.langgraph import NullRunCallback
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
32
|
+
|
|
33
|
+
from nullrun.runtime import get_runtime
|
|
34
|
+
from nullrun.tracing import (
|
|
35
|
+
SpanContext,
|
|
36
|
+
create_child_span,
|
|
37
|
+
create_root_span,
|
|
38
|
+
get_current_span,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# =============================================================================
|
|
45
|
+
# Usage Normalization (SDK extracts, backend computes)
|
|
46
|
+
# =============================================================================
|
|
47
|
+
|
|
48
|
+
def extract_usage_from_response(response: Any, provider: str, model: str) -> dict[str, Any]:
|
|
49
|
+
"""
|
|
50
|
+
Extract usage data from LLM response.
|
|
51
|
+
|
|
52
|
+
Returns raw usage dict - backend will normalize and compute cost.
|
|
53
|
+
SDK does NOT compute cost - this is intentional (backend is source of truth).
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Dict with keys:
|
|
57
|
+
- input_tokens: int
|
|
58
|
+
- output_tokens: int
|
|
59
|
+
- total_tokens: int
|
|
60
|
+
- has_usage: bool
|
|
61
|
+
- raw_usage: original dict from provider
|
|
62
|
+
"""
|
|
63
|
+
usage: dict[str, Any] = {
|
|
64
|
+
"input_tokens": 0,
|
|
65
|
+
"output_tokens": 0,
|
|
66
|
+
"total_tokens": 0,
|
|
67
|
+
"has_usage": False,
|
|
68
|
+
"raw_usage": {},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Try LangChain's usage_metadata first (most common for OpenAI via LangChain)
|
|
72
|
+
# NOTE: For callback-based invocation, response is LLMResult, not AIMessage
|
|
73
|
+
# LLMResult stores usage in generations[0][0].message.usage_metadata
|
|
74
|
+
if hasattr(response, 'usage_metadata'):
|
|
75
|
+
usage_meta = response.usage_metadata
|
|
76
|
+
if isinstance(usage_meta, dict):
|
|
77
|
+
usage["input_tokens"] = usage_meta.get('input_tokens', 0) or 0
|
|
78
|
+
usage["output_tokens"] = usage_meta.get('output_tokens', 0) or 0
|
|
79
|
+
usage["total_tokens"] = usage_meta.get('total_tokens', 0) or 0
|
|
80
|
+
usage["raw_usage"] = dict(usage_meta)
|
|
81
|
+
elif hasattr(usage_meta, 'input_tokens'):
|
|
82
|
+
# Object with attributes
|
|
83
|
+
usage["input_tokens"] = getattr(usage_meta, 'input_tokens', 0) or 0
|
|
84
|
+
usage["output_tokens"] = getattr(usage_meta, 'output_tokens', 0) or 0
|
|
85
|
+
usage["total_tokens"] = getattr(usage_meta, 'total_tokens', 0) or 0
|
|
86
|
+
usage["raw_usage"] = {
|
|
87
|
+
'input_tokens': usage["input_tokens"],
|
|
88
|
+
'output_tokens': usage["output_tokens"],
|
|
89
|
+
'total_tokens': usage["total_tokens"],
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# For callback-based LLMResult, check generations[0][0].message.usage_metadata
|
|
93
|
+
elif hasattr(response, 'generations') and response.generations:
|
|
94
|
+
first_gen = response.generations[0][0] if response.generations else None
|
|
95
|
+
if first_gen and hasattr(first_gen, 'message'):
|
|
96
|
+
msg = first_gen.message
|
|
97
|
+
if hasattr(msg, 'usage_metadata'):
|
|
98
|
+
usage_meta = msg.usage_metadata
|
|
99
|
+
if isinstance(usage_meta, dict):
|
|
100
|
+
usage["input_tokens"] = usage_meta.get('input_tokens', 0) or 0
|
|
101
|
+
usage["output_tokens"] = usage_meta.get('output_tokens', 0) or 0
|
|
102
|
+
usage["total_tokens"] = usage_meta.get('total_tokens', 0) or 0
|
|
103
|
+
usage["raw_usage"] = dict(usage_meta)
|
|
104
|
+
elif hasattr(usage_meta, 'input_tokens'):
|
|
105
|
+
usage["input_tokens"] = getattr(usage_meta, 'input_tokens', 0) or 0
|
|
106
|
+
usage["output_tokens"] = getattr(usage_meta, 'output_tokens', 0) or 0
|
|
107
|
+
usage["total_tokens"] = getattr(usage_meta, 'total_tokens', 0) or 0
|
|
108
|
+
usage["raw_usage"] = {
|
|
109
|
+
'input_tokens': usage["input_tokens"],
|
|
110
|
+
'output_tokens': usage["output_tokens"],
|
|
111
|
+
'total_tokens': usage["total_tokens"],
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# Try response.usage (Anthropic, standard OpenAI format)
|
|
115
|
+
elif hasattr(response, 'usage') and response.usage:
|
|
116
|
+
usage_raw = response.usage
|
|
117
|
+
if isinstance(usage_raw, dict):
|
|
118
|
+
usage["input_tokens"] = usage_raw.get('input_tokens', 0) or 0
|
|
119
|
+
usage["output_tokens"] = usage_raw.get('output_tokens', 0) or 0
|
|
120
|
+
usage["total_tokens"] = usage_raw.get('total_tokens', 0) or 0
|
|
121
|
+
usage["raw_usage"] = dict(usage_raw)
|
|
122
|
+
elif hasattr(usage_raw, 'input_tokens') or hasattr(usage_raw, 'total_tokens'):
|
|
123
|
+
# Object with attributes
|
|
124
|
+
usage["input_tokens"] = getattr(usage_raw, 'input_tokens', 0) or 0
|
|
125
|
+
usage["output_tokens"] = getattr(usage_raw, 'output_tokens', 0) or 0
|
|
126
|
+
usage["total_tokens"] = getattr(usage_raw, 'total_tokens', 0) or 0
|
|
127
|
+
usage["raw_usage"] = {
|
|
128
|
+
'input_tokens': usage["input_tokens"],
|
|
129
|
+
'output_tokens': usage["output_tokens"],
|
|
130
|
+
'total_tokens': usage["total_tokens"],
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Try response_metadata (some providers) - also check llm_output for LLMResult
|
|
134
|
+
elif hasattr(response, 'response_metadata'):
|
|
135
|
+
resp_meta = response.response_metadata
|
|
136
|
+
if isinstance(resp_meta, dict):
|
|
137
|
+
# Some providers put token info here
|
|
138
|
+
token_usage = resp_meta.get('token_usage', {})
|
|
139
|
+
if isinstance(token_usage, dict):
|
|
140
|
+
usage["input_tokens"] = (
|
|
141
|
+
token_usage.get('prompt_tokens', 0) or
|
|
142
|
+
token_usage.get('input_tokens', 0) or 0
|
|
143
|
+
)
|
|
144
|
+
usage["output_tokens"] = (
|
|
145
|
+
token_usage.get('completion_tokens', 0) or
|
|
146
|
+
token_usage.get('output_tokens', 0) or 0
|
|
147
|
+
)
|
|
148
|
+
usage["total_tokens"] = token_usage.get('total_tokens', 0) or 0
|
|
149
|
+
usage["raw_usage"] = dict(token_usage)
|
|
150
|
+
# Check llm_output for LLMResult (callback case)
|
|
151
|
+
elif hasattr(response, 'llm_output') and response.llm_output:
|
|
152
|
+
token_usage = response.llm_output.get('token_usage', {})
|
|
153
|
+
if isinstance(token_usage, dict):
|
|
154
|
+
usage["input_tokens"] = (
|
|
155
|
+
token_usage.get('prompt_tokens', 0) or
|
|
156
|
+
token_usage.get('input_tokens', 0) or 0
|
|
157
|
+
)
|
|
158
|
+
usage["output_tokens"] = (
|
|
159
|
+
token_usage.get('completion_tokens', 0) or
|
|
160
|
+
token_usage.get('output_tokens', 0) or 0
|
|
161
|
+
)
|
|
162
|
+
usage["total_tokens"] = token_usage.get('total_tokens', 0) or 0
|
|
163
|
+
usage["raw_usage"] = dict(token_usage)
|
|
164
|
+
|
|
165
|
+
# Check for streaming chunks that accumulated usage
|
|
166
|
+
# (streaming responses may not have usage until final chunk)
|
|
167
|
+
if not usage["has_usage"] and hasattr(response, '__iter__'):
|
|
168
|
+
# For streaming, we can't get accurate usage in middle of stream
|
|
169
|
+
# Final response should have usage_metadata
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
# Determine if we got real usage data
|
|
173
|
+
usage["has_usage"] = (
|
|
174
|
+
usage["total_tokens"] > 0 or
|
|
175
|
+
usage["input_tokens"] > 0 or
|
|
176
|
+
usage["output_tokens"] > 0
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return usage
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class NullRunCallback(BaseCallbackHandler):
|
|
183
|
+
"""
|
|
184
|
+
LangChain-compatible callback handler for automatic tracking.
|
|
185
|
+
|
|
186
|
+
IMPORTANT: This callback extracts USAGE DATA only.
|
|
187
|
+
Cost computation happens in backend (source of truth).
|
|
188
|
+
|
|
189
|
+
Span emission: chain / tool / agent runs are wrapped in
|
|
190
|
+
`span_start` / `span_end` events so the dashboard reconstructs
|
|
191
|
+
the agent tree (not just LLM cost). Nested runs become a
|
|
192
|
+
parent/child span tree via `parent_run_id` -> active-span
|
|
193
|
+
lookup; if no parent is known, we fall back to the active
|
|
194
|
+
contextvar span (set by `@protect`) so a callback-driven chain
|
|
195
|
+
inside an `@protect`-wrapped function is properly nested.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(self, runtime: Any | None = None) -> None:
|
|
199
|
+
self.runtime = runtime or get_runtime()
|
|
200
|
+
# run_id -> SpanContext for in-flight chain / tool / agent
|
|
201
|
+
# runs. We use the LangChain run_id as the key because
|
|
202
|
+
# on_chain_end gives us the same run_id and we need to look
|
|
203
|
+
# up the corresponding span to emit span_end.
|
|
204
|
+
self._active_runs: dict[str, SpanContext] = {}
|
|
205
|
+
|
|
206
|
+
# ------------------------------------------------------------------
|
|
207
|
+
# LLM hooks (existing — token extraction only, no span bookkeeping)
|
|
208
|
+
# ------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
def on_llm_start(self, serialized: Any, prompts: Any, **kwargs: Any) -> None:
|
|
211
|
+
"""Called when LLM call starts."""
|
|
212
|
+
logger.debug(f"LLM start: {kwargs.get('invocation_params', {})}")
|
|
213
|
+
|
|
214
|
+
def on_llm_end(self, response: Any, **kwargs: Any) -> None:
|
|
215
|
+
"""
|
|
216
|
+
Called when LLM call ends.
|
|
217
|
+
|
|
218
|
+
Extracts usage data and sends to backend for cost computation.
|
|
219
|
+
Does NOT compute cost - backend is source of truth.
|
|
220
|
+
"""
|
|
221
|
+
try:
|
|
222
|
+
# Extract provider/model from invocation params
|
|
223
|
+
invocation_params = kwargs.get('invocation_params', {})
|
|
224
|
+
model = invocation_params.get('model_name', 'unknown')
|
|
225
|
+
provider = invocation_params.get('model_provider', 'openai')
|
|
226
|
+
|
|
227
|
+
# Extract usage (normalized format)
|
|
228
|
+
usage = extract_usage_from_response(response, provider, model)
|
|
229
|
+
|
|
230
|
+
logger.info(f"NullRun callback: model={model}, provider={provider}, "
|
|
231
|
+
f"usage={usage}, has_usage={usage['has_usage']}")
|
|
232
|
+
|
|
233
|
+
# Build event with RAW usage data (no cost computation in SDK!)
|
|
234
|
+
event = {
|
|
235
|
+
"type": "llm_call",
|
|
236
|
+
"model": model,
|
|
237
|
+
"provider": provider,
|
|
238
|
+
"tokens": usage["total_tokens"],
|
|
239
|
+
"input_tokens": usage["input_tokens"],
|
|
240
|
+
"output_tokens": usage["output_tokens"],
|
|
241
|
+
# Flag to backend: this is raw usage, compute cost yourself
|
|
242
|
+
"has_usage": usage["has_usage"],
|
|
243
|
+
"raw_usage": usage["raw_usage"],
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
logger.info(f"NullRun track event: {event}")
|
|
247
|
+
self.runtime.track(event)
|
|
248
|
+
|
|
249
|
+
if usage["has_usage"]:
|
|
250
|
+
logger.debug(
|
|
251
|
+
f"LLM tracked: model={model}, "
|
|
252
|
+
f"tokens={usage['total_tokens']} "
|
|
253
|
+
f"(in={usage['input_tokens']}, out={usage['output_tokens']})"
|
|
254
|
+
)
|
|
255
|
+
else:
|
|
256
|
+
logger.debug(f"LLM tracked: model={model}, NO usage data available")
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.warning(f"Failed to track LLM event: {e}")
|
|
260
|
+
|
|
261
|
+
# ------------------------------------------------------------------
|
|
262
|
+
# Chain / tool / agent hooks — emit span events
|
|
263
|
+
# ------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
def on_chain_start(
|
|
266
|
+
self,
|
|
267
|
+
serialized: Any,
|
|
268
|
+
inputs: Any,
|
|
269
|
+
*,
|
|
270
|
+
run_id: Any = None,
|
|
271
|
+
parent_run_id: Any = None,
|
|
272
|
+
**kwargs: Any,
|
|
273
|
+
) -> None:
|
|
274
|
+
"""Open a chain span. Nested chains become child spans."""
|
|
275
|
+
if run_id is None:
|
|
276
|
+
# Defensive: some LangChain versions may omit run_id. We
|
|
277
|
+
# cannot emit a span we can later close without a key.
|
|
278
|
+
logger.debug("on_chain_start without run_id — skipping span emission")
|
|
279
|
+
return
|
|
280
|
+
name = _extract_node_name(serialized, "chain")
|
|
281
|
+
self._begin_run(str(run_id), str(parent_run_id) if parent_run_id else None,
|
|
282
|
+
name, kind="chain")
|
|
283
|
+
|
|
284
|
+
def on_chain_end(self, outputs: Any, *, run_id: Any = None, **kwargs: Any) -> None:
|
|
285
|
+
self._end_run(run_id)
|
|
286
|
+
|
|
287
|
+
def on_chain_error(self, error: Any, *, run_id: Any = None, **kwargs: Any) -> None:
|
|
288
|
+
self._end_run(run_id, error=str(error))
|
|
289
|
+
|
|
290
|
+
def on_tool_start(
|
|
291
|
+
self,
|
|
292
|
+
serialized: Any,
|
|
293
|
+
input_str: Any,
|
|
294
|
+
*,
|
|
295
|
+
run_id: Any = None,
|
|
296
|
+
parent_run_id: Any = None,
|
|
297
|
+
**kwargs: Any,
|
|
298
|
+
) -> None:
|
|
299
|
+
"""Open a tool span — function calls inside an agent."""
|
|
300
|
+
if run_id is None:
|
|
301
|
+
logger.debug("on_tool_start without run_id — skipping span emission")
|
|
302
|
+
return
|
|
303
|
+
name = _extract_node_name(serialized, "tool")
|
|
304
|
+
self._begin_run(str(run_id), str(parent_run_id) if parent_run_id else None,
|
|
305
|
+
name, kind="tool")
|
|
306
|
+
|
|
307
|
+
def on_tool_end(self, output: Any, *, run_id: Any = None, **kwargs: Any) -> None:
|
|
308
|
+
self._end_run(run_id)
|
|
309
|
+
|
|
310
|
+
def on_tool_error(self, error: Any, *, run_id: Any = None, **kwargs: Any) -> None:
|
|
311
|
+
self._end_run(run_id, error=str(error))
|
|
312
|
+
|
|
313
|
+
def on_agent_action(
|
|
314
|
+
self,
|
|
315
|
+
action: Any,
|
|
316
|
+
*,
|
|
317
|
+
run_id: Any = None,
|
|
318
|
+
parent_run_id: Any = None,
|
|
319
|
+
**kwargs: Any,
|
|
320
|
+
) -> None:
|
|
321
|
+
"""Agent reasoning step (ReAct / OpenAI Functions agent)."""
|
|
322
|
+
if run_id is None:
|
|
323
|
+
return
|
|
324
|
+
tool = getattr(action, "tool", None) or "agent"
|
|
325
|
+
self._begin_run(str(run_id), str(parent_run_id) if parent_run_id else None,
|
|
326
|
+
f"agent_action:{tool}", kind="agent")
|
|
327
|
+
|
|
328
|
+
def on_agent_finish(self, finish: Any, *, run_id: Any = None, **kwargs: Any) -> None:
|
|
329
|
+
self._end_run(run_id)
|
|
330
|
+
|
|
331
|
+
# ------------------------------------------------------------------
|
|
332
|
+
# Span bookkeeping
|
|
333
|
+
# ------------------------------------------------------------------
|
|
334
|
+
|
|
335
|
+
def _begin_run(
|
|
336
|
+
self,
|
|
337
|
+
run_id: str,
|
|
338
|
+
parent_run_id: str | None,
|
|
339
|
+
name: str,
|
|
340
|
+
kind: str,
|
|
341
|
+
) -> None:
|
|
342
|
+
"""
|
|
343
|
+
Open a span for `run_id`, attached to the parent either via
|
|
344
|
+
the active-runs map (callback-internal nesting) or via the
|
|
345
|
+
SDK's `tracing` contextvar (set by `@protect`).
|
|
346
|
+
|
|
347
|
+
Span emission is best-effort — a failure here must never
|
|
348
|
+
break the user's chain. Mirrors the contract in
|
|
349
|
+
`nullrun.decorators._emit_span_start`.
|
|
350
|
+
"""
|
|
351
|
+
parent_ctx: SpanContext | None = None
|
|
352
|
+
if parent_run_id:
|
|
353
|
+
parent_ctx = self._active_runs.get(parent_run_id)
|
|
354
|
+
if parent_ctx is None:
|
|
355
|
+
# Fall back to contextvar (e.g. we're inside an
|
|
356
|
+
# @protect-wrapped function or a manual `set_span`).
|
|
357
|
+
parent_ctx = get_current_span()
|
|
358
|
+
if parent_ctx is not None:
|
|
359
|
+
ctx = create_child_span(parent_ctx)
|
|
360
|
+
else:
|
|
361
|
+
ctx = create_root_span()
|
|
362
|
+
self._active_runs[run_id] = ctx
|
|
363
|
+
try:
|
|
364
|
+
self.runtime.track_event(
|
|
365
|
+
event_type="span_start",
|
|
366
|
+
trace_id=ctx.trace_id,
|
|
367
|
+
span_id=ctx.span_id,
|
|
368
|
+
parent_span_id=ctx.parent_span_id,
|
|
369
|
+
depth=ctx.depth,
|
|
370
|
+
fn_name=name,
|
|
371
|
+
span_kind=kind,
|
|
372
|
+
)
|
|
373
|
+
except Exception as exc: # noqa: BLE001
|
|
374
|
+
logger.debug(f"span_start emission failed: {exc}")
|
|
375
|
+
|
|
376
|
+
def _end_run(self, run_id: Any, error: str | None = None) -> None:
|
|
377
|
+
if run_id is None:
|
|
378
|
+
return
|
|
379
|
+
ctx = self._active_runs.pop(str(run_id), None)
|
|
380
|
+
if ctx is None:
|
|
381
|
+
return
|
|
382
|
+
try:
|
|
383
|
+
self.runtime.track_event(
|
|
384
|
+
event_type="span_end",
|
|
385
|
+
trace_id=ctx.trace_id,
|
|
386
|
+
span_id=ctx.span_id,
|
|
387
|
+
parent_span_id=ctx.parent_span_id,
|
|
388
|
+
depth=ctx.depth,
|
|
389
|
+
fn_name=None,
|
|
390
|
+
error=error,
|
|
391
|
+
)
|
|
392
|
+
except Exception as exc: # noqa: BLE001
|
|
393
|
+
logger.debug(f"span_end emission failed: {exc}")
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _extract_node_name(serialized: Any, default: str) -> str:
|
|
397
|
+
"""
|
|
398
|
+
Best-effort extraction of a friendly node name from a LangChain
|
|
399
|
+
`serialized` dict. Falls back to `default` if nothing readable.
|
|
400
|
+
"""
|
|
401
|
+
if not isinstance(serialized, dict):
|
|
402
|
+
return default
|
|
403
|
+
ident = serialized.get("id")
|
|
404
|
+
if isinstance(ident, (list, tuple)) and ident:
|
|
405
|
+
return str(ident[-1])
|
|
406
|
+
if isinstance(ident, str):
|
|
407
|
+
return ident
|
|
408
|
+
name = serialized.get("name")
|
|
409
|
+
if isinstance(name, str):
|
|
410
|
+
return name
|
|
411
|
+
return default
|
|
412
|
+
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
llama-index auto-instrumentation for NullRun SDK.
|
|
3
|
+
|
|
4
|
+
Subscribes to the llama-index core event dispatcher (v0.10.20+) and
|
|
5
|
+
emits ``llm_call`` events for every chat completion. Token usage is
|
|
6
|
+
already captured by the httpx transport hook in ``auto.py`` — this
|
|
7
|
+
patch is the safety net for cases where the dispatcher fires without
|
|
8
|
+
a corresponding HTTP round-trip (e.g. tests, mock providers).
|
|
9
|
+
|
|
10
|
+
Mirrors the structure of ``patch_langgraph_compiled`` in
|
|
11
|
+
``auto.py:815-900``.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
_llama_index_patched = False
|
|
22
|
+
_orig_subscriber_handlers: list[tuple[Any, Callable[..., Any]]] = []
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def patch_llama_index(runtime: Any) -> bool:
|
|
26
|
+
"""Install NullRun subscribers on the llama-index core dispatcher.
|
|
27
|
+
|
|
28
|
+
Idempotent. Returns False if ``llama_index.core`` is not importable.
|
|
29
|
+
"""
|
|
30
|
+
global _llama_index_patched
|
|
31
|
+
if _llama_index_patched:
|
|
32
|
+
return True
|
|
33
|
+
try:
|
|
34
|
+
from llama_index.core.instrumentation import get_dispatcher
|
|
35
|
+
from llama_index.core.instrumentation.events.llm import LLMChatEndEvent
|
|
36
|
+
from llama_index.core.instrumentation.events.tool import FunctionCallEvent
|
|
37
|
+
except ImportError:
|
|
38
|
+
logger.debug("llama-index not installed; auto-patch skipped")
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
dispatcher = get_dispatcher(name="nullrun")
|
|
42
|
+
|
|
43
|
+
def on_chat_end(event: Any) -> None:
|
|
44
|
+
try:
|
|
45
|
+
usage = getattr(event.response, "raw", None) or {}
|
|
46
|
+
if hasattr(usage, "usage"):
|
|
47
|
+
usage = usage.usage or {}
|
|
48
|
+
prompt = int(usage.get("prompt_tokens", 0) or 0)
|
|
49
|
+
completion = int(usage.get("completion_tokens", 0) or 0)
|
|
50
|
+
total = int(usage.get("total_tokens", 0) or 0) or (prompt + completion)
|
|
51
|
+
if not (prompt or completion or total):
|
|
52
|
+
return
|
|
53
|
+
runtime.track(
|
|
54
|
+
{
|
|
55
|
+
"type": "llm_call",
|
|
56
|
+
"provider": "llama_index",
|
|
57
|
+
"model": getattr(event.response, "model", None),
|
|
58
|
+
"tokens": total,
|
|
59
|
+
"input_tokens": prompt,
|
|
60
|
+
"output_tokens": completion,
|
|
61
|
+
"has_usage": True,
|
|
62
|
+
}
|
|
63
|
+
)
|
|
64
|
+
except Exception as e: # pragma: no cover - defensive
|
|
65
|
+
logger.debug("llama_index on_chat_end: %s", e)
|
|
66
|
+
|
|
67
|
+
def on_function_call(event: Any) -> None:
|
|
68
|
+
try:
|
|
69
|
+
tool = getattr(event, "tool", None)
|
|
70
|
+
tool_name = getattr(tool, "name", None) or "tool"
|
|
71
|
+
runtime.track(
|
|
72
|
+
{
|
|
73
|
+
"type": "tool_call",
|
|
74
|
+
"tool_name": tool_name,
|
|
75
|
+
}
|
|
76
|
+
)
|
|
77
|
+
except Exception as e: # pragma: no cover - defensive
|
|
78
|
+
logger.debug("llama_index on_function_call: %s", e)
|
|
79
|
+
|
|
80
|
+
dispatcher.add_event_handler(LLMChatEndEvent, on_chat_end)
|
|
81
|
+
dispatcher.add_event_handler(FunctionCallEvent, on_function_call)
|
|
82
|
+
_orig_subscriber_handlers.extend(
|
|
83
|
+
[
|
|
84
|
+
(LLMChatEndEvent, on_chat_end),
|
|
85
|
+
(FunctionCallEvent, on_function_call),
|
|
86
|
+
]
|
|
87
|
+
)
|
|
88
|
+
_llama_index_patched = True
|
|
89
|
+
logger.info("llama-index auto-instrumentation installed")
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def unpatch_llama_index() -> None:
|
|
94
|
+
"""Detach our subscribers. Test-only. Idempotent."""
|
|
95
|
+
global _llama_index_patched
|
|
96
|
+
if not _llama_index_patched:
|
|
97
|
+
return
|
|
98
|
+
try:
|
|
99
|
+
from llama_index.core.instrumentation import get_dispatcher
|
|
100
|
+
|
|
101
|
+
dispatcher = get_dispatcher(name="nullrun")
|
|
102
|
+
for event_cls, handler in _orig_subscriber_handlers:
|
|
103
|
+
try:
|
|
104
|
+
dispatcher.remove_event_handler(event_cls, handler)
|
|
105
|
+
except Exception: # pragma: no cover
|
|
106
|
+
pass
|
|
107
|
+
except ImportError:
|
|
108
|
+
pass
|
|
109
|
+
_orig_subscriber_handlers.clear()
|
|
110
|
+
_llama_index_patched = False
|
nullrun/observability.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NullRun observability — thread-safe in-process metrics counters.
|
|
3
|
+
|
|
4
|
+
Exposes ``metrics`` for counter / gauge reporting; transport and runtime
|
|
5
|
+
modules call into it for thread-safe increments. No external
|
|
6
|
+
dependencies; integrate with Prometheus / OpenTelemetry on top.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from threading import Lock
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
# ----------------------------------------------------------------
|
|
16
|
+
# SDK Metrics (in-memory, no external dependencies)
|
|
17
|
+
# ----------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TransportMetrics:
|
|
21
|
+
"""Transport layer metrics. Reset on reset()."""
|
|
22
|
+
events_enqueued: int = 0
|
|
23
|
+
events_sent: int = 0
|
|
24
|
+
events_dropped: int = 0
|
|
25
|
+
batches_sent: int = 0
|
|
26
|
+
batches_failed: int = 0
|
|
27
|
+
retries_total: int = 0
|
|
28
|
+
circuit_breaker_opens: int = 0
|
|
29
|
+
last_flush_at: float | None = None
|
|
30
|
+
last_error: str | None = None
|
|
31
|
+
# Circuit breaker state transition metrics
|
|
32
|
+
circuit_open_count: int = 0
|
|
33
|
+
circuit_half_open_count: int = 0
|
|
34
|
+
circuit_closed_count: int = 0
|
|
35
|
+
fallback_mode_activations: int = 0
|
|
36
|
+
# Sprint 1.5 (B13): HMAC verification failures on the control
|
|
37
|
+
# plane WebSocket. Pre-fix, a signature mismatch on a signed
|
|
38
|
+
# ``state_change`` / ``key_rotated`` / ``policy_invalidated``
|
|
39
|
+
# message was logged at WARNING and the message was silently
|
|
40
|
+
# dropped — meaning a forged or mis-rotated kill command could
|
|
41
|
+
# be lost without a counter to alert on. The metric here is
|
|
42
|
+
# what a SRE alerts on for "control plane signature integrity".
|
|
43
|
+
hmac_verify_failures_total: int = 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class RuntimeMetrics:
|
|
48
|
+
"""Runtime layer metrics."""
|
|
49
|
+
track_calls: int = 0
|
|
50
|
+
execute_calls: int = 0
|
|
51
|
+
execute_allowed: int = 0
|
|
52
|
+
execute_blocked: int = 0
|
|
53
|
+
check_calls: int = 0
|
|
54
|
+
cost_limit_exceeded: int = 0
|
|
55
|
+
timeouts: int = 0
|
|
56
|
+
loop_detections: int = 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MetricsRegistry:
|
|
60
|
+
"""
|
|
61
|
+
Global SDK metrics registry.
|
|
62
|
+
|
|
63
|
+
Used for monitoring without external dependencies.
|
|
64
|
+
Can integrate with Prometheus or OpenTelemetry on top.
|
|
65
|
+
|
|
66
|
+
Thread-safe: All counter operations use locks to prevent race conditions
|
|
67
|
+
in multi-threaded environments.
|
|
68
|
+
|
|
69
|
+
Usage:
|
|
70
|
+
from nullrun.observability import metrics
|
|
71
|
+
print(metrics.transport.events_sent)
|
|
72
|
+
print(metrics.to_dict())
|
|
73
|
+
|
|
74
|
+
# Thread-safe increments (preferred over direct +=)
|
|
75
|
+
metrics.inc_transport("events_enqueued")
|
|
76
|
+
metrics.inc_transport("events_sent", 50)
|
|
77
|
+
metrics.inc_runtime("execute_calls")
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self) -> None:
|
|
81
|
+
self.transport = TransportMetrics()
|
|
82
|
+
self.runtime = RuntimeMetrics()
|
|
83
|
+
self._lock = Lock()
|
|
84
|
+
|
|
85
|
+
# ----------------------------------------------------------------
|
|
86
|
+
# Thread-safe metric increment methods
|
|
87
|
+
# ----------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def inc_transport(self, field: str, value: int = 1) -> None:
|
|
90
|
+
"""Thread-safe increment of transport metric counter.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
field: Metric name (e.g., "events_enqueued", "batches_sent")
|
|
94
|
+
value: Amount to increment (default 1)
|
|
95
|
+
"""
|
|
96
|
+
with self._lock:
|
|
97
|
+
current = getattr(self.transport, field, 0)
|
|
98
|
+
setattr(self.transport, field, current + value)
|
|
99
|
+
|
|
100
|
+
def inc_runtime(self, field: str, value: int = 1) -> None:
|
|
101
|
+
"""Thread-safe increment of runtime metric counter.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
field: Metric name (e.g., "track_calls", "execute_allowed")
|
|
105
|
+
value: Amount to increment (default 1)
|
|
106
|
+
"""
|
|
107
|
+
with self._lock:
|
|
108
|
+
current = getattr(self.runtime, field, 0)
|
|
109
|
+
setattr(self.runtime, field, current + value)
|
|
110
|
+
|
|
111
|
+
def set_transport(self, field: str, value: Any) -> None:
|
|
112
|
+
"""Thread-safe set of transport metric field.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
field: Metric name (e.g., "last_error", "last_flush_at")
|
|
116
|
+
value: Value to set
|
|
117
|
+
"""
|
|
118
|
+
with self._lock:
|
|
119
|
+
setattr(self.transport, field, value)
|
|
120
|
+
|
|
121
|
+
def to_dict(self) -> dict[str, Any]:
|
|
122
|
+
"""Export all metrics to dict. Convenient for /health endpoint."""
|
|
123
|
+
with self._lock:
|
|
124
|
+
return {
|
|
125
|
+
"transport": {
|
|
126
|
+
"events_enqueued": self.transport.events_enqueued,
|
|
127
|
+
"events_sent": self.transport.events_sent,
|
|
128
|
+
"events_dropped": self.transport.events_dropped,
|
|
129
|
+
"batches_sent": self.transport.batches_sent,
|
|
130
|
+
"batches_failed": self.transport.batches_failed,
|
|
131
|
+
"retries_total": self.transport.retries_total,
|
|
132
|
+
"circuit_breaker_opens": self.transport.circuit_breaker_opens,
|
|
133
|
+
"last_flush_at": self.transport.last_flush_at,
|
|
134
|
+
"last_error": self.transport.last_error,
|
|
135
|
+
"circuit_open_count": self.transport.circuit_open_count,
|
|
136
|
+
"circuit_half_open_count": self.transport.circuit_half_open_count,
|
|
137
|
+
"circuit_closed_count": self.transport.circuit_closed_count,
|
|
138
|
+
"fallback_mode_activations": self.transport.fallback_mode_activations,
|
|
139
|
+
"hmac_verify_failures_total": self.transport.hmac_verify_failures_total,
|
|
140
|
+
},
|
|
141
|
+
"runtime": {
|
|
142
|
+
"track_calls": self.runtime.track_calls,
|
|
143
|
+
"execute_calls": self.runtime.execute_calls,
|
|
144
|
+
"execute_allowed": self.runtime.execute_allowed,
|
|
145
|
+
"execute_blocked": self.runtime.execute_blocked,
|
|
146
|
+
"cost_limit_exceeded": self.runtime.cost_limit_exceeded,
|
|
147
|
+
"timeouts": self.runtime.timeouts,
|
|
148
|
+
"loop_detections": self.runtime.loop_detections,
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def reset(self) -> None:
|
|
153
|
+
"""Reset all counters (useful in tests)."""
|
|
154
|
+
with self._lock:
|
|
155
|
+
self.transport = TransportMetrics()
|
|
156
|
+
self.runtime = RuntimeMetrics()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Global singleton registry
|
|
160
|
+
metrics = MetricsRegistry()
|
nullrun/py.typed
ADDED
|
File without changes
|