docent-python 0.1.2a0__py3-none-any.whl → 0.1.4a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

docent/trace_alt.py ADDED
@@ -0,0 +1,513 @@
1
+ import asyncio
2
+ import atexit
3
+ import functools
4
+ import io
5
+ import logging
6
+ import os
7
+ import uuid
8
+ from contextlib import asynccontextmanager, contextmanager, redirect_stdout
9
+ from contextvars import ContextVar, Token
10
+ from typing import Any, AsyncIterator, Callable, Dict, Iterator, Optional, Set
11
+
12
+ import requests
13
+ from opentelemetry.context import Context
14
+ from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
15
+ from opentelemetry.trace import Span
16
+ from traceloop.sdk import Traceloop
17
+
18
+ # Configure logging
19
+ logger = logging.getLogger(__name__)
20
+ logger.disabled = True
21
+
22
+ DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
23
+
24
+ # Context variables for tracking current agent run and collection
25
+ _current_agent_run_id: ContextVar[Optional[str]] = ContextVar("current_agent_run_id", default=None)
26
+ _current_collection_id: ContextVar[Optional[str]] = ContextVar(
27
+ "current_collection_id", default=None
28
+ )
29
+
30
+ # Global configuration
31
+ _tracing_initialized = False
32
+ _collection_name: Optional[str] = None
33
+ _collection_id: Optional[str] = None
34
+ _default_agent_run_id: Optional[str] = None
35
+ _endpoint: Optional[str] = None
36
+ _api_key: Optional[str] = None
37
+ _enable_console_export = False
38
+ _disable_batch = False
39
+ _instruments: Optional[Set[Any]] = None
40
+ _block_instruments: Optional[Set[Any]] = None
41
+
42
+
43
+ class DocentSpanProcessor(SpanProcessor):
44
+ """Custom span processor to add Docent metadata to spans.
45
+
46
+ This processor integrates cleanly with Traceloop's existing span processing
47
+ and adds Docent-specific attributes to all spans.
48
+ """
49
+
50
+ def __init__(self, collection_id: str, enable_console_export: bool = False):
51
+ self.collection_id = collection_id
52
+ self.enable_console_export = enable_console_export
53
+
54
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
55
+ """Add Docent metadata when a span starts."""
56
+ # Always add collection_id
57
+ span.set_attribute("collection_id", self.collection_id)
58
+
59
+ # Add agent_run_id if available
60
+ agent_run_id = _get_current_agent_run_id()
61
+ if agent_run_id:
62
+ span.set_attribute("agent_run_id", agent_run_id)
63
+ else:
64
+ span.set_attribute("agent_run_id", _get_default_agent_run_id())
65
+ span.set_attribute("agent_run_id_default", True)
66
+
67
+ # Add service name for better integration with existing OTEL setups
68
+ span.set_attribute("service.name", _collection_name or "docent-trace")
69
+
70
+ if self.enable_console_export:
71
+ logging.debug(
72
+ f"Span started - collection_id: {self.collection_id}, agent_run_id: {agent_run_id}"
73
+ )
74
+
75
+ def on_end(self, span: ReadableSpan) -> None:
76
+ pass
77
+
78
+ def shutdown(self) -> None:
79
+ """Called when the processor is shut down."""
80
+
81
+ def force_flush(self, timeout_millis: float = 30000) -> bool:
82
+ """Force flush any pending spans."""
83
+ return True
84
+
85
+
86
+ def initialize_tracing(
87
+ collection_name: str,
88
+ collection_id: Optional[str] = None,
89
+ endpoint: Optional[str] = None,
90
+ api_key: Optional[str] = None,
91
+ enable_console_export: bool = False,
92
+ disable_batch: bool = False,
93
+ instruments: Optional[Set[Any]] = None,
94
+ block_instruments: Optional[Set[Any]] = None,
95
+ ) -> None:
96
+ """Initialize Docent tracing with the specified configuration.
97
+
98
+ This function provides a comprehensive initialization that integrates cleanly
99
+ with existing OpenTelemetry setups and provides extensive configuration options.
100
+
101
+ Args:
102
+ collection_name: Name for your application/collection
103
+ collection_id: Optional collection ID (auto-generated if not provided)
104
+ endpoint: Optional OTLP endpoint URL (defaults to Docent's hosted service)
105
+ api_key: Optional API key (uses DOCENT_API_KEY environment variable if not provided)
106
+ enable_console_export: Whether to also export traces to console for debugging
107
+ disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
108
+ instruments: Set of instruments to enable (None = all instruments)
109
+ block_instruments: Set of instruments to explicitly disable
110
+ """
111
+ global _tracing_initialized, _collection_name, _collection_id, _default_agent_run_id, _endpoint, _api_key
112
+ global _enable_console_export, _disable_batch, _instruments, _block_instruments
113
+
114
+ if _tracing_initialized:
115
+ logging.warning("Docent tracing already initialized")
116
+ return
117
+
118
+ _collection_name = collection_name
119
+ _collection_id = collection_id or _generate_id()
120
+ _default_agent_run_id = _get_default_agent_run_id() # Generate default ID if not set
121
+ _endpoint = endpoint or DEFAULT_ENDPOINT
122
+ _api_key = api_key or os.getenv("DOCENT_API_KEY")
123
+ _enable_console_export = enable_console_export
124
+ _disable_batch = disable_batch
125
+ _instruments = instruments
126
+ _block_instruments = block_instruments
127
+
128
+ _set_current_collection_id(_collection_id)
129
+
130
+ if not _api_key:
131
+ raise ValueError(
132
+ "API key is required. Set DOCENT_API_KEY environment variable or pass api_key parameter."
133
+ )
134
+
135
+ # Initialize Traceloop with comprehensive configuration
136
+
137
+ # Get Traceloop's default span processor
138
+ from traceloop.sdk.tracing.tracing import get_default_span_processor
139
+
140
+ # Create our custom context span processor (only adds metadata, doesn't export)
141
+ docent_processor = DocentSpanProcessor(_collection_id, enable_console_export)
142
+
143
+ # Get Traceloop's default span processor for export
144
+ export_processor = get_default_span_processor(
145
+ disable_batch=_disable_batch,
146
+ api_endpoint=_endpoint,
147
+ headers={"Authorization": f"Bearer {_api_key}"},
148
+ )
149
+
150
+ # Combine both processors
151
+ processors = [docent_processor, export_processor]
152
+
153
+ os.environ["TRACELOOP_METRICS_ENABLED"] = "false"
154
+ os.environ["TRACELOOP_TRACE_ENABLED"] = "true"
155
+
156
+ # Temporarily redirect stdout to suppress print statements
157
+ with redirect_stdout(io.StringIO()):
158
+ Traceloop.init( # type: ignore
159
+ app_name=collection_name,
160
+ api_endpoint=_endpoint,
161
+ api_key=_api_key,
162
+ telemetry_enabled=False, # don't send analytics to traceloop's backend
163
+ disable_batch=_disable_batch,
164
+ instruments=_instruments,
165
+ block_instruments=_block_instruments,
166
+ processor=processors, # Add both our context processor and export processor
167
+ )
168
+
169
+ _tracing_initialized = True
170
+ logging.info(
171
+ f"Docent tracing initialized for collection: {collection_name} with collection_id: {_collection_id}"
172
+ )
173
+
174
+ # Register cleanup handlers
175
+ atexit.register(_cleanup_tracing)
176
+
177
+
178
+ def _cleanup_tracing() -> None:
179
+ """Clean up tracing resources on shutdown."""
180
+ global _tracing_initialized
181
+ if _tracing_initialized:
182
+ try:
183
+ # Notify API that the trace is over
184
+ _notify_trace_done()
185
+
186
+ logging.info("Docent tracing cleanup completed")
187
+ except Exception as e:
188
+ logging.warning(f"Error during tracing cleanup: {e}")
189
+ finally:
190
+ _tracing_initialized = False
191
+
192
+
193
+ def _ensure_tracing_initialized():
194
+ """Ensure tracing has been initialized before use."""
195
+ if not _tracing_initialized:
196
+ raise RuntimeError("Docent tracing not initialized. Call initialize_tracing() first.")
197
+
198
+
199
+ def _generate_id() -> str:
200
+ """Generate a unique ID for agent runs or collections."""
201
+ return str(uuid.uuid4())
202
+
203
+
204
+ def _get_current_agent_run_id() -> Optional[str]:
205
+ """Get the current agent run ID from context."""
206
+ return _current_agent_run_id.get()
207
+
208
+
209
+ def _get_current_collection_id() -> Optional[str]:
210
+ """Get the current collection ID from context."""
211
+ return _current_collection_id.get()
212
+
213
+
214
+ def _get_default_agent_run_id() -> str:
215
+ """Get the default agent run ID, generating it if not set."""
216
+ global _default_agent_run_id
217
+ if _default_agent_run_id is None:
218
+ _default_agent_run_id = _generate_id()
219
+ return _default_agent_run_id
220
+
221
+
222
+ def _set_current_agent_run_id(agent_run_id: Optional[str]) -> Token[Optional[str]]:
223
+ """Set the current agent run ID in context."""
224
+ return _current_agent_run_id.set(agent_run_id)
225
+
226
+
227
+ def _set_current_collection_id(collection_id: Optional[str]) -> Token[Optional[str]]:
228
+ """Set the current collection ID in context."""
229
+ return _current_collection_id.set(collection_id)
230
+
231
+
232
+ def _send_to_api(endpoint: str, data: Dict[str, Any]) -> None:
233
+ """Send data to the Docent API endpoint.
234
+
235
+ Args:
236
+ endpoint: The API endpoint URL
237
+ data: The data to send
238
+ """
239
+ try:
240
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {_api_key}"}
241
+
242
+ response = requests.post(endpoint, json=data, headers=headers, timeout=10)
243
+ response.raise_for_status()
244
+
245
+ logging.debug(f"Successfully sent data to {endpoint}")
246
+ except requests.exceptions.RequestException as e:
247
+ logging.error(f"Failed to send data to {endpoint}: {e}")
248
+ except Exception as e:
249
+ logging.error(f"Unexpected error sending data to {endpoint}: {e}")
250
+
251
+
252
+ def _notify_trace_done() -> None:
253
+ """Notify the Docent API that the trace is done."""
254
+ collection_id = _get_current_collection_id()
255
+ if collection_id and _endpoint:
256
+ data = {"collection_id": collection_id, "status": "completed"}
257
+ _send_to_api(f"{_endpoint}/v1/trace-done", data)
258
+
259
+
260
+ def agent_run_score(name: str, score: float, attributes: Optional[Dict[str, Any]] = None) -> None:
261
+ """
262
+ Record a score event on the current span.
263
+ Automatically works in both sync and async contexts.
264
+
265
+ Args:
266
+ name: Name of the score metric
267
+ score: Numeric score value
268
+ attributes: Optional additional attributes for the score event
269
+ """
270
+ _ensure_tracing_initialized()
271
+
272
+ agent_run_id = _get_current_agent_run_id()
273
+ if not agent_run_id:
274
+ logging.warning("No active agent run context. Score will not be sent.")
275
+ return
276
+
277
+ collection_id = _get_current_collection_id() or _collection_id
278
+ if not collection_id:
279
+ logging.warning("No collection ID available. Score will not be sent.")
280
+ return
281
+
282
+ # Send score directly to API
283
+ score_data = {
284
+ "collection_id": collection_id,
285
+ "agent_run_id": agent_run_id,
286
+ "score_name": name,
287
+ "score_value": score,
288
+ }
289
+
290
+ # Add additional attributes if provided
291
+ if attributes:
292
+ score_data.update(attributes)
293
+
294
+ _send_to_api(f"{_endpoint}/v1/scores", score_data)
295
+
296
+
297
+ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
298
+ """Attach metadata to the current agent run.
299
+
300
+ Args:
301
+ metadata: Dictionary of metadata to attach
302
+ """
303
+ _ensure_tracing_initialized()
304
+
305
+ agent_run_id = _get_current_agent_run_id()
306
+ if not agent_run_id:
307
+ logging.warning("No active agent run context. Metadata will not be sent.")
308
+ return
309
+
310
+ collection_id = _get_current_collection_id() or _collection_id
311
+ if not collection_id:
312
+ logging.warning("No collection ID available. Metadata will not be sent.")
313
+ return
314
+
315
+ # Send metadata directly to API
316
+ metadata_data = {
317
+ "collection_id": collection_id,
318
+ "agent_run_id": agent_run_id,
319
+ "metadata": metadata,
320
+ }
321
+
322
+ _send_to_api(f"{_endpoint}/v1/metadata", metadata_data)
323
+
324
+
325
+ @contextmanager
326
+ def _agent_run_context_sync(
327
+ agent_run_id: Optional[str] = None,
328
+ metadata: Optional[Dict[str, Any]] = None,
329
+ ) -> Iterator[tuple[str, Optional[str]]]:
330
+ """Synchronous context manager for creating and managing agent runs."""
331
+ _ensure_tracing_initialized()
332
+
333
+ # Generate IDs if not provided
334
+ current_agent_run_id = agent_run_id or _generate_id()
335
+
336
+ # Set up context
337
+ agent_run_token = _set_current_agent_run_id(current_agent_run_id)
338
+
339
+ try:
340
+ # Send metadata to API if provided
341
+ if metadata:
342
+ agent_run_metadata(metadata)
343
+
344
+ # Yield the agent run ID and None for transcript_id (handled by backend)
345
+ # Traceloop will automatically create spans for any instrumented operations
346
+ # and our DocentSpanProcessor will add the appropriate metadata
347
+ yield (current_agent_run_id, None)
348
+ finally:
349
+ # Restore context
350
+ _current_agent_run_id.reset(agent_run_token)
351
+
352
+
353
+ @asynccontextmanager
354
+ async def _agent_run_context_async(
355
+ agent_run_id: Optional[str] = None,
356
+ metadata: Optional[Dict[str, Any]] = None,
357
+ ) -> AsyncIterator[tuple[str, Optional[str]]]:
358
+ """Asynchronous context manager for creating and managing agent runs."""
359
+ _ensure_tracing_initialized()
360
+
361
+ # Generate IDs if not provided
362
+ current_agent_run_id = agent_run_id or _generate_id()
363
+
364
+ # Set up context
365
+ agent_run_token = _set_current_agent_run_id(current_agent_run_id)
366
+
367
+ try:
368
+ # Send metadata to API if provided
369
+ if metadata:
370
+ agent_run_metadata(metadata)
371
+
372
+ # Yield the agent run ID and None for transcript_id (handled by backend)
373
+ # Traceloop will automatically create spans for any instrumented operations
374
+ # and our DocentSpanProcessor will add the appropriate metadata
375
+ yield (current_agent_run_id, None)
376
+ finally:
377
+ # Restore context
378
+ _current_agent_run_id.reset(agent_run_token)
379
+
380
+
381
+ def agent_run_context(
382
+ agent_run_id: Optional[str] = None,
383
+ metadata: Optional[Dict[str, Any]] = None,
384
+ ):
385
+ """Context manager for creating and managing agent runs.
386
+
387
+ This context manager can be used in both synchronous and asynchronous contexts.
388
+ In async contexts, use it with `async with agent_run_context()`.
389
+ In sync contexts, use it with `with agent_run_context()`.
390
+
391
+ Args:
392
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
393
+ metadata: Optional metadata to attach to the agent run
394
+
395
+ Returns:
396
+ A context manager that yields a tuple of (agent_run_id, transcript_id)
397
+ where transcript_id is None for now as it's handled by backend
398
+ """
399
+ # Check if we're in an async context by looking at the current frame
400
+ import inspect
401
+
402
+ frame = inspect.currentframe()
403
+ try:
404
+ # Look for async context indicators in the call stack
405
+ while frame:
406
+ if frame.f_code.co_flags & 0x80: # CO_COROUTINE flag
407
+ return _agent_run_context_async(agent_run_id=agent_run_id, metadata=metadata)
408
+ frame = frame.f_back
409
+ finally:
410
+ # Clean up the frame reference
411
+ del frame
412
+
413
+ # Default to sync context manager
414
+ return _agent_run_context_sync(agent_run_id=agent_run_id, metadata=metadata)
415
+
416
+
417
+ def agent_run(
418
+ func: Optional[Callable[..., Any]] = None,
419
+ *,
420
+ agent_run_id: Optional[str] = None,
421
+ metadata: Optional[Dict[str, Any]] = None,
422
+ ) -> Callable[..., Any]:
423
+ """Decorator for creating agent runs around functions.
424
+
425
+ Args:
426
+ func: Function to decorate
427
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
428
+ metadata: Optional metadata to attach to the agent run
429
+
430
+ Returns:
431
+ Decorated function
432
+ """
433
+
434
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
435
+ @functools.wraps(func)
436
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
437
+ with _agent_run_context_sync(agent_run_id=agent_run_id, metadata=metadata) as (
438
+ run_id,
439
+ _,
440
+ ):
441
+ result = func(*args, **kwargs)
442
+ # Store agent run ID as an attribute for access
443
+ setattr(sync_wrapper, "docent", type("DocentInfo", (), {"agent_run_id": run_id})()) # type: ignore
444
+ return result
445
+
446
+ @functools.wraps(func)
447
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
448
+ async with _agent_run_context_async(agent_run_id=agent_run_id, metadata=metadata) as (
449
+ run_id,
450
+ _,
451
+ ):
452
+ result = await func(*args, **kwargs)
453
+ # Store agent run ID as an attribute for access
454
+ setattr(async_wrapper, "docent", type("DocentInfo", (), {"agent_run_id": run_id})()) # type: ignore
455
+ return result
456
+
457
+ # Return appropriate wrapper based on function type
458
+ if asyncio.iscoroutinefunction(func):
459
+ return async_wrapper
460
+ else:
461
+ return sync_wrapper
462
+
463
+ # Handle both @agent_run and @agent_run(agent_run_id=..., metadata=...)
464
+ if func is None:
465
+ return decorator
466
+ else:
467
+ return decorator(func)
468
+
469
+
470
+ # Additional utility functions for better integration
471
+
472
+
473
+ def get_current_agent_run_id() -> Optional[str]:
474
+ """Get the current agent run ID from context.
475
+
476
+ Returns:
477
+ The current agent run ID if available, None otherwise
478
+ """
479
+ return _get_current_agent_run_id()
480
+
481
+
482
+ def get_current_collection_id() -> Optional[str]:
483
+ """Get the current collection ID from context.
484
+
485
+ Returns:
486
+ The current collection ID if available, None otherwise
487
+ """
488
+ return _get_current_collection_id()
489
+
490
+
491
+ def is_tracing_initialized() -> bool:
492
+ """Check if tracing has been initialized.
493
+
494
+ Returns:
495
+ True if tracing is initialized, False otherwise
496
+ """
497
+ return _tracing_initialized
498
+
499
+
500
+ def flush_spans() -> None:
501
+ """Force flush any pending spans to the backend.
502
+
503
+ This is useful for ensuring all spans are sent before shutdown
504
+ or for debugging purposes.
505
+ """
506
+ if _tracing_initialized:
507
+ try:
508
+ traceloop_instance = Traceloop.get()
509
+ if hasattr(traceloop_instance, "flush"):
510
+ traceloop_instance.flush() # type: ignore
511
+ logging.debug("Spans flushed successfully")
512
+ except Exception as e:
513
+ logging.warning(f"Error flushing spans: {e}")