fiddler-otel 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ """Fiddler OTel - core OpenTelemetry instrumentation for GenAI applications."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from fiddler_otel.attributes import set_conversation_id
6
+ from fiddler_otel.client import FiddlerClient, get_client
7
+ from fiddler_otel.decorators import get_current_span, trace
8
+ from fiddler_otel.span_wrapper import FiddlerChain, FiddlerGeneration, FiddlerSpan, FiddlerTool
9
+
10
+ try:
11
+ __version__ = version('fiddler-otel')
12
+ except PackageNotFoundError:
13
+ __version__ = 'unknown'
14
+
15
+ __all__ = [
16
+ 'FiddlerChain',
17
+ 'FiddlerClient',
18
+ 'FiddlerGeneration',
19
+ 'FiddlerSpan',
20
+ 'FiddlerTool',
21
+ '__version__',
22
+ 'get_client',
23
+ 'get_current_span',
24
+ 'set_conversation_id',
25
+ 'trace',
26
+ ]
@@ -0,0 +1,97 @@
1
+ """OpenTelemetry span attributes for Fiddler instrumentation."""
2
+
3
+ import contextvars
4
+ from typing import Any
5
+
6
+ from pydantic import ConfigDict, validate_call
7
+
8
+ # Key used for storing Fiddler-specific attributes in metadata dictionary
9
+ FIDDLER_METADATA_KEY = '_fiddler_attributes'
10
+
11
+ # Template strings for OpenTelemetry attribute key formatting
12
+ FIDDLER_USER_SPAN_ATTRIBUTE_TEMPLATE = 'fiddler.span.user.{key}'
13
+ FIDDLER_USER_SESSION_ATTRIBUTE_TEMPLATE = 'fiddler.session.user.{key}'
14
+
15
+
16
+ class FiddlerSpanAttributes: # pylint: disable=too-few-public-methods
17
+ """Constants for Fiddler OpenTelemetry span attributes."""
18
+
19
+ # common attributes
20
+ AGENT_NAME = 'gen_ai.agent.name'
21
+ AGENT_ID = 'gen_ai.agent.id'
22
+ CONVERSATION_ID = 'gen_ai.conversation.id'
23
+ TYPE = 'fiddler.span.type'
24
+ SERVICE_VERSION = 'service.version'
25
+
26
+ # LLM attributes
27
+ LLM_INPUT_SYSTEM = 'gen_ai.llm.input.system'
28
+ LLM_INPUT_USER = 'gen_ai.llm.input.user'
29
+ LLM_OUTPUT = 'gen_ai.llm.output'
30
+ LLM_CONTEXT = 'gen_ai.llm.context'
31
+
32
+ # Model attributes - following OpenTelemetry semantic conventions
33
+ LLM_REQUEST_MODEL = 'gen_ai.request.model'
34
+ LLM_SYSTEM = 'gen_ai.system'
35
+
36
+ # Token usage attributes
37
+ LLM_TOKEN_COUNT_INPUT = 'gen_ai.usage.input_tokens'
38
+ LLM_TOKEN_COUNT_OUTPUT = 'gen_ai.usage.output_tokens'
39
+ LLM_TOKEN_COUNT_TOTAL = 'gen_ai.usage.total_tokens'
40
+ GEN_AI_INPUT_MESSAGES = 'gen_ai.input.messages'
41
+ GEN_AI_OUTPUT_MESSAGES = 'gen_ai.output.messages'
42
+
43
+ # tool attributes
44
+ TOOL_INPUT = 'gen_ai.tool.input'
45
+ TOOL_OUTPUT = 'gen_ai.tool.output'
46
+ TOOL_NAME = 'gen_ai.tool.name'
47
+ TOOL_DEFINITIONS = 'gen_ai.tool.definitions'
48
+
49
+
50
+ class FiddlerResourceAttributes:
51
+ """Constants for Fiddler OpenTelemetry resource attributes."""
52
+
53
+ APPLICATION_ID = 'application.id'
54
+
55
+
56
+ class SpanType:
57
+ """Constants for Fiddler OpenTelemetry span types."""
58
+
59
+ AGENT = 'agent'
60
+ CHAIN = 'chain'
61
+ TOOL = 'tool'
62
+ LLM = 'llm'
63
+ OTHER = 'other'
64
+
65
+
66
+ # context variable for conversation ID - used to store the conversation ID for the current
67
+ # thread/async coroutine. Note that contextvars are shallow copied; dictionaries/lists are not
68
+ # copied deeply and are shared between threads/coroutines.
69
+ _CONVERSATION_ID: contextvars.ContextVar[str] = contextvars.ContextVar(
70
+ '_CONVERSATION_ID', default=''
71
+ )
72
+ _CUSTOM_ATTRIBUTES: contextvars.ContextVar[dict[str, Any] | None] = contextvars.ContextVar(
73
+ '_CUSTOM_ATTRIBUTES', default=None
74
+ )
75
+
76
+
77
+ @validate_call(config=ConfigDict(strict=True))
78
+ def set_conversation_id(conversation_id: str) -> None:
79
+ """Set the conversation ID for the current execution context.
80
+
81
+ The conversation ID is propagated to all spans created in the current
82
+ thread or async coroutine, allowing the Fiddler dashboard to filter and
83
+ display the full ordered sequence of operations for a single conversation.
84
+
85
+ This value persists until it is called again with a new ID.
86
+
87
+ :param conversation_id: Unique identifier for the conversation session.
88
+
89
+ Example::
90
+
91
+ from fiddler_otel import set_conversation_id
92
+ import uuid
93
+
94
+ set_conversation_id(str(uuid.uuid4()))
95
+ agent.invoke({"messages": [{"role": "user", "content": "Hello"}]})
96
+ """
97
+ _CONVERSATION_ID.set(conversation_id)
fiddler_otel/client.py ADDED
@@ -0,0 +1,406 @@
1
+ """Core client for Fiddler OTel instrumentation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import atexit
7
+ import logging
8
+ import threading
9
+ import uuid
10
+ from typing import Any, Literal
11
+ from urllib.parse import urlparse
12
+
13
+ from opentelemetry import context, trace
14
+ from opentelemetry.context.context import Context
15
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import Compression, OTLPSpanExporter
16
+ from opentelemetry.sdk.resources import (
17
+ OTELResourceDetector,
18
+ ProcessResourceDetector,
19
+ Resource,
20
+ get_aggregated_resources,
21
+ )
22
+ from opentelemetry.sdk.trace import SpanLimits, TracerProvider, sampling
23
+ from opentelemetry.sdk.trace.export import (
24
+ BatchSpanProcessor,
25
+ ConsoleSpanExporter,
26
+ SimpleSpanProcessor,
27
+ )
28
+
29
+ from fiddler_otel.attributes import FiddlerResourceAttributes
30
+ from fiddler_otel.jsonl_capture import JSONLSpanExporter, initialize_jsonl_capture
31
+ from fiddler_otel.span_processor import FiddlerSpanProcessor
32
+ from fiddler_otel.span_wrapper import (
33
+ _SPAN_TYPE_MAP,
34
+ FiddlerChain,
35
+ FiddlerGeneration,
36
+ FiddlerSpan,
37
+ FiddlerTool,
38
+ )
39
+ from fiddler_otel.utils import is_fiddler_span
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ class _FiddlerSpanContextManager:
45
+ """Context manager that activates a span in Fiddler's isolated context.
46
+
47
+ This ensures spans are set as "current" in the Fiddler context, not the global context,
48
+ maintaining isolation from other OpenTelemetry tracers.
49
+ """
50
+
51
+ def __init__(self, span: trace.Span, fdl_context: Context):
52
+ """Initialize the context manager.
53
+
54
+ :param span: The OpenTelemetry span to manage.
55
+ :param fdl_context: The Fiddler client's isolated context.
56
+ """
57
+ self._span = span
58
+ self._fdl_context = fdl_context
59
+ self._token: Any = None
60
+
61
+ def __enter__(self) -> trace.Span:
62
+ """Enter the context and set the span as current in Fiddler's context."""
63
+ new_context = trace.set_span_in_context(self._span, self._fdl_context)
64
+ self._token = context.attach(new_context)
65
+ return self._span
66
+
67
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
68
+ """Exit the context, end the span, and detach the Fiddler context."""
69
+ if exc_val is not None:
70
+ self._span.record_exception(exc_val)
71
+ self._span.set_status(trace.Status(trace.StatusCode.ERROR, str(exc_val)))
72
+ else:
73
+ self._span.set_status(trace.Status(trace.StatusCode.OK))
74
+
75
+ self._span.end()
76
+
77
+ if self._token is not None:
78
+ context.detach(self._token)
79
+
80
+ return False
81
+
82
+
83
+ # Global singleton: first FiddlerClient created in this process.
84
+ # One client => one BatchSpanProcessor (one thread, one connection); OTel
85
+ # uses contextvars for current span per asyncio task, so trace isolation is
86
+ # per-task without per-task clients.
87
+ _default_client: FiddlerClient | None = None
88
+ _client_lock = threading.Lock()
89
+
90
+
91
+ def get_client() -> FiddlerClient:
92
+ """Return the global FiddlerClient singleton (first created in this process).
93
+
94
+ :returns: The global client instance.
95
+ :raises RuntimeError: If no FiddlerClient has been initialized.
96
+ """
97
+ global _default_client
98
+ if _default_client is None:
99
+ raise RuntimeError(
100
+ 'No FiddlerClient initialized. Create one first: client = FiddlerClient(...)'
101
+ )
102
+ return _default_client
103
+
104
+
105
+ class FiddlerClient:
106
+ """The main client for instrumenting Generative AI applications with Fiddler observability.
107
+
108
+ This client configures and manages the OpenTelemetry tracer that sends telemetry data
109
+ to the Fiddler platform for monitoring, analysis, and debugging of your AI agents
110
+ and workflows.
111
+
112
+ Flush on exit: A shutdown handler is registered via :func:`atexit` so that pending
113
+ spans are flushed and the tracer is shut down when the process exits. For short
114
+ scripts or critical workloads, call :meth:`force_flush` and :meth:`shutdown` explicitly
115
+ (e.g. in a ``try``/``finally`` or signal handler) since ``atexit`` may not run in all
116
+ environments (e.g. SIGKILL, fork).
117
+
118
+ Asyncio: Tracing works in asyncio (context vars propagate across ``await``). When
119
+ shutting down from async code, use :meth:`aflush` and :meth:`ashutdown` so the event
120
+ loop is not blocked; the sync :meth:`force_flush` and :meth:`shutdown` can block for
121
+ up to the flush timeout.
122
+
123
+ Context manager: Use ``with FiddlerClient(...) as client:`` to ensure
124
+ :meth:`shutdown` is called on exit (flush then shutdown; atexit is unregistered).
125
+ """
126
+
127
+ def __init__(
128
+ self,
129
+ api_key: str,
130
+ application_id: str,
131
+ url: str,
132
+ console_tracer: bool = False,
133
+ span_limits: SpanLimits | None = None,
134
+ sampler: sampling.Sampler | None = None,
135
+ compression: Compression = Compression.Gzip,
136
+ jsonl_capture_enabled: bool = False,
137
+ jsonl_file_path: str = 'fiddler_trace_data.jsonl',
138
+ ):
139
+ """Initialise the FiddlerClient.
140
+
141
+ :param api_key: The API key for authenticating with the Fiddler backend.
142
+ :param application_id: The unique identifier (UUID4) for the application.
143
+ :param url: The base URL for your Fiddler instance (e.g. ``https://your-instance.fiddler.ai``).
144
+ :param console_tracer: If True, traces are printed to the console instead of being exported.
145
+ :param span_limits: OpenTelemetry span limits configuration.
146
+ :param sampler: OpenTelemetry sampler; defaults to parent-based always-on (100% sampling).
147
+ :param compression: OTLP export compression. Defaults to ``Compression.Gzip``.
148
+ :param jsonl_capture_enabled: Enable JSONL capture of trace data to a local file.
149
+ :param jsonl_file_path: Path to the JSONL file (used when ``jsonl_capture_enabled=True``).
150
+ :raises ValueError: If ``application_id`` is not a valid UUID4 or ``url`` is not valid.
151
+ """
152
+ # Validate application_id is a valid UUID4
153
+ parsed_uuid = uuid.UUID(application_id)
154
+ if parsed_uuid.version != 4:
155
+ raise ValueError(
156
+ f'application_id must be a valid UUID4 (version 4), got version {parsed_uuid.version}'
157
+ )
158
+ self.application_id = str(parsed_uuid)
159
+
160
+ # Validate URL
161
+ parsed_url = urlparse(url)
162
+ if not parsed_url.scheme or not parsed_url.netloc:
163
+ raise ValueError('URL must have a valid scheme and netloc')
164
+ if parsed_url.scheme not in ('http', 'https'):
165
+ raise ValueError('URL scheme must be http or https')
166
+ self.url = url.rstrip('/')
167
+
168
+ self.api_key = api_key
169
+
170
+ # Dedicated (non-global) TracerProvider - initialized lazily on first get_tracer() call
171
+ self._provider: TracerProvider | None = None
172
+ self._tracer: trace.Tracer | None = None
173
+ self._console_tracer = console_tracer
174
+
175
+ # Fiddler-specific isolated context (prevents ambient global span inheritance)
176
+ self._context = Context()
177
+
178
+ self.span_limits = span_limits
179
+ self.sampler = sampler
180
+ self.compression = compression
181
+ self.jsonl_capture_enabled = jsonl_capture_enabled
182
+ self.jsonl_file_path = jsonl_file_path
183
+
184
+ resource = Resource.create({FiddlerResourceAttributes.APPLICATION_ID: self.application_id})
185
+ self.resource = self._get_aggregated_resources_with_fallback(resource)
186
+
187
+ # Register as default client for singleton pattern
188
+ global _default_client
189
+ with _client_lock:
190
+ if _default_client is None:
191
+ _default_client = self
192
+
193
+ atexit.register(self._atexit_shutdown)
194
+
195
+ def __enter__(self) -> FiddlerClient:
196
+ """Context manager entry."""
197
+ return self
198
+
199
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
200
+ """Context manager exit — flushes and shuts down the tracer provider."""
201
+ self.shutdown()
202
+
203
+ def _atexit_shutdown(self) -> None:
204
+ """Called at process exit to flush and shutdown the tracer provider."""
205
+ self.shutdown()
206
+
207
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
208
+ """Flush pending spans to the exporter.
209
+
210
+ :param timeout_millis: Maximum time to wait for flush in milliseconds.
211
+ :returns: True if flush completed within the timeout, False otherwise.
212
+ """
213
+ if self._provider is None:
214
+ logger.debug('Tracer provider not initialized, skipping flush')
215
+ return True
216
+ logger.info('Flushing tracer provider (timeout_millis=%s)', timeout_millis)
217
+ return self._provider.force_flush(timeout_millis)
218
+
219
+ async def aflush(self, timeout_millis: int = 30000) -> bool:
220
+ """Async version of :meth:`force_flush`.
221
+
222
+ Runs the flush in a thread pool so the event loop is not blocked.
223
+ """
224
+ return await asyncio.to_thread(self.force_flush, timeout_millis)
225
+
226
+ def shutdown(self) -> None:
227
+ """Shut down the tracer provider after flushing pending spans.
228
+
229
+ Safe to call multiple times. The atexit handler is unregistered on first call.
230
+ """
231
+ if self._provider is None:
232
+ logger.debug('Tracer provider not initialized, skipping shutdown')
233
+ return
234
+ logger.info('Shutting down tracer provider')
235
+ try:
236
+ atexit.unregister(self._atexit_shutdown)
237
+ except Exception as e:
238
+ logger.debug('Could not unregister atexit handler: %s', e)
239
+ try:
240
+ self._provider.force_flush(30000)
241
+ except Exception as e:
242
+ logger.warning('Error flushing tracer provider during shutdown: %s', e)
243
+ try:
244
+ self._provider.shutdown()
245
+ except Exception as e:
246
+ logger.warning('Error shutting down tracer provider: %s', e)
247
+ finally:
248
+ self._provider = None
249
+ self._tracer = None
250
+
251
+ async def ashutdown(self) -> None:
252
+ """Async version of :meth:`shutdown`.
253
+
254
+ Runs flush and shutdown in a thread pool so the event loop is not blocked.
255
+ """
256
+ await asyncio.to_thread(self.shutdown)
257
+
258
+ def get_tracer_provider(self) -> TracerProvider:
259
+ """Return the OpenTelemetry TracerProvider, initializing it on first call.
260
+
261
+ :returns: The configured TracerProvider.
262
+ :raises RuntimeError: If initialization fails.
263
+ """
264
+ if self._provider is None:
265
+ self._initialize_provider()
266
+ if self._provider is None:
267
+ raise RuntimeError('Failed to initialize tracer provider')
268
+ return self._provider
269
+
270
+ def _get_aggregated_resources_with_fallback(self, initial_resource: Resource) -> Resource:
271
+ """Aggregate OTel resource detectors with a fallback for older OTel versions.
272
+
273
+ :param initial_resource: The base resource to start with.
274
+ :returns: The aggregated resource.
275
+ """
276
+ detectors = [OTELResourceDetector(), ProcessResourceDetector()]
277
+
278
+ try:
279
+ from opentelemetry.sdk.resources import OsResourceDetector
280
+
281
+ detectors.append(OsResourceDetector())
282
+ except ImportError:
283
+ pass
284
+
285
+ try:
286
+ return get_aggregated_resources(detectors, initial_resource=initial_resource)
287
+ except Exception as e:
288
+ logger.debug('Resource aggregation failed, using initial resource: %s', e)
289
+ return initial_resource
290
+
291
+ def update_resource(self, attributes: dict[str, Any]) -> None:
292
+ """Update the OTel resource with additional attributes.
293
+
294
+ Must be called **before** :meth:`get_tracer` is invoked.
295
+
296
+ :param attributes: Key-value pairs to merge into the resource.
297
+ :raises ValueError: If the tracer has already been initialized.
298
+ """
299
+ if self._tracer is not None:
300
+ raise ValueError('Cannot update resource after tracer is initialized')
301
+
302
+ if (
303
+ self.resource.attributes.get('service.name', '') != 'unknown_service'
304
+ and attributes.get('service.name') is None
305
+ ):
306
+ attributes['service.name'] = self.resource.attributes['service.name']
307
+
308
+ self.resource = self.resource.merge(Resource.create(attributes))
309
+
310
+ def _initialize_provider(self) -> None:
311
+ """Initialize the TracerProvider (dedicated, not the global one)."""
312
+ if self._provider is not None:
313
+ return
314
+
315
+ self._provider = TracerProvider(
316
+ resource=self.resource,
317
+ span_limits=self.span_limits,
318
+ sampler=self.sampler,
319
+ )
320
+
321
+ def _initialize_tracer(self) -> None:
322
+ """Initialize the OTel tracer and register span processors."""
323
+ if self._tracer is not None:
324
+ return
325
+
326
+ self._initialize_provider()
327
+ assert self._provider is not None # type guard for mypy
328
+
329
+ # FiddlerSpanProcessor runs first so it can inject session/conversation attributes
330
+ self._provider.add_span_processor(FiddlerSpanProcessor())
331
+
332
+ if self._console_tracer:
333
+ self._provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter()))
334
+
335
+ otlp_exporter = OTLPSpanExporter(
336
+ endpoint=f'{self.url}/v1/traces',
337
+ headers={
338
+ 'authorization': f'Bearer {self.api_key}',
339
+ 'fiddler-application-id': self.application_id,
340
+ },
341
+ compression=self.compression,
342
+ )
343
+ self._provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
344
+
345
+ if self.jsonl_capture_enabled:
346
+ jsonl_capture = initialize_jsonl_capture(self.jsonl_file_path)
347
+ jsonl_exporter = JSONLSpanExporter(jsonl_capture)
348
+ self._provider.add_span_processor(SimpleSpanProcessor(jsonl_exporter))
349
+
350
+ self._tracer = trace.get_tracer('fiddler.otel.tracer', tracer_provider=self._provider)
351
+
352
+ def get_tracer(self) -> trace.Tracer:
353
+ """Return an OTel tracer for creating spans.
354
+
355
+ Initializes the tracer on the first call.
356
+
357
+ :returns: The OTel tracer instance.
358
+ :raises RuntimeError: If tracer initialization fails.
359
+ """
360
+ if self._tracer is None:
361
+ self._initialize_tracer()
362
+ if self._tracer is None:
363
+ raise RuntimeError('Failed to initialize tracer')
364
+ return self._tracer
365
+
366
+ def start_as_current_span(
367
+ self,
368
+ name: str,
369
+ as_type: Literal['span', 'generation', 'chain', 'tool'] = 'span',
370
+ ) -> FiddlerSpan | FiddlerGeneration | FiddlerChain | FiddlerTool:
371
+ """Create a span using a context manager (automatic lifecycle management).
372
+
373
+ :param name: Name for the span.
374
+ :param as_type: Span type — ``"span"``, ``"generation"``, ``"chain"``, or ``"tool"``.
375
+ :returns: Span wrapper with context manager support.
376
+ """
377
+ tracer = self.get_tracer()
378
+ current_context = context.get_current()
379
+ current_span = trace.get_current_span(current_context)
380
+
381
+ parent_context = self._context
382
+ if is_fiddler_span(current_span):
383
+ parent_context = current_context
384
+
385
+ otel_span = tracer.start_span(name, context=parent_context)
386
+ fdl_context_manager = _FiddlerSpanContextManager(otel_span, self._context)
387
+
388
+ wrapper_class = _SPAN_TYPE_MAP.get(as_type, FiddlerSpan)
389
+ return wrapper_class(fdl_context_manager)
390
+
391
+ def start_span(
392
+ self,
393
+ name: str,
394
+ as_type: Literal['span', 'generation', 'chain', 'tool'] = 'span',
395
+ ) -> FiddlerSpan | FiddlerGeneration | FiddlerChain | FiddlerTool:
396
+ """Create a span with manual lifecycle control. Caller must call ``span.end()``.
397
+
398
+ :param name: Name for the span.
399
+ :param as_type: Span type — ``"span"``, ``"generation"``, ``"chain"``, or ``"tool"``.
400
+ :returns: Span wrapper requiring an explicit ``end()`` call.
401
+ """
402
+ tracer = self.get_tracer()
403
+ otel_span = tracer.start_span(name, context=self._context)
404
+
405
+ wrapper_class = _SPAN_TYPE_MAP.get(as_type, FiddlerSpan)
406
+ return wrapper_class(otel_span)