genai-otel-instrument 0.1.2.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/auto_instrument.py +18 -1
- genai_otel/config.py +22 -1
- genai_otel/cost_calculator.py +204 -13
- genai_otel/cost_enrichment_processor.py +175 -0
- genai_otel/gpu_metrics.py +50 -0
- genai_otel/instrumentors/base.py +300 -44
- genai_otel/instrumentors/cohere_instrumentor.py +140 -76
- genai_otel/instrumentors/huggingface_instrumentor.py +142 -13
- genai_otel/instrumentors/langchain_instrumentor.py +75 -75
- genai_otel/instrumentors/mistralai_instrumentor.py +234 -38
- genai_otel/instrumentors/ollama_instrumentor.py +104 -35
- genai_otel/instrumentors/replicate_instrumentor.py +59 -14
- genai_otel/instrumentors/togetherai_instrumentor.py +120 -16
- genai_otel/instrumentors/vertexai_instrumentor.py +79 -15
- genai_otel/llm_pricing.json +869 -589
- genai_otel/logging_config.py +45 -45
- genai_otel/py.typed +2 -2
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/METADATA +294 -33
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/RECORD +24 -23
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/top_level.txt +0 -0
genai_otel/instrumentors/base.py
CHANGED
|
@@ -7,11 +7,12 @@ It includes methods for creating OpenTelemetry spans, recording metrics,
|
|
|
7
7
|
and handling configuration and cost calculation.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
import json
|
|
10
11
|
import logging
|
|
11
12
|
import threading
|
|
12
13
|
import time
|
|
13
14
|
from abc import ABC, abstractmethod
|
|
14
|
-
from typing import Any, Callable, Dict, Optional
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
15
16
|
|
|
16
17
|
import wrapt
|
|
17
18
|
from opentelemetry import metrics, trace
|
|
@@ -82,6 +83,12 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
82
83
|
_shared_latency_histogram = None
|
|
83
84
|
_shared_cost_counter = None
|
|
84
85
|
_shared_error_counter = None
|
|
86
|
+
# Granular cost counters (Phase 3.2)
|
|
87
|
+
_shared_prompt_cost_counter = None
|
|
88
|
+
_shared_completion_cost_counter = None
|
|
89
|
+
_shared_reasoning_cost_counter = None
|
|
90
|
+
_shared_cache_read_cost_counter = None
|
|
91
|
+
_shared_cache_write_cost_counter = None
|
|
85
92
|
# Streaming metrics (Phase 3.4)
|
|
86
93
|
_shared_ttft_histogram = None
|
|
87
94
|
_shared_tbt_histogram = None
|
|
@@ -91,7 +98,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
91
98
|
self.tracer = trace.get_tracer(__name__)
|
|
92
99
|
self.meter = metrics.get_meter(__name__)
|
|
93
100
|
self.config: Optional[OTelConfig] = None
|
|
94
|
-
self.cost_calculator = CostCalculator()
|
|
101
|
+
self.cost_calculator = CostCalculator() # Will be updated when instrument() is called
|
|
95
102
|
self._instrumented = False
|
|
96
103
|
|
|
97
104
|
# Use shared metrics to avoid duplicate warnings
|
|
@@ -103,6 +110,12 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
103
110
|
self.latency_histogram = self._shared_latency_histogram
|
|
104
111
|
self.cost_counter = self._shared_cost_counter
|
|
105
112
|
self.error_counter = self._shared_error_counter
|
|
113
|
+
# Granular cost counters (Phase 3.2)
|
|
114
|
+
self.prompt_cost_counter = self._shared_prompt_cost_counter
|
|
115
|
+
self.completion_cost_counter = self._shared_completion_cost_counter
|
|
116
|
+
self.reasoning_cost_counter = self._shared_reasoning_cost_counter
|
|
117
|
+
self.cache_read_cost_counter = self._shared_cache_read_cost_counter
|
|
118
|
+
self.cache_write_cost_counter = self._shared_cache_write_cost_counter
|
|
106
119
|
# Streaming metrics
|
|
107
120
|
self.ttft_histogram = self._shared_ttft_histogram
|
|
108
121
|
self.tbt_histogram = self._shared_tbt_histogram
|
|
@@ -193,10 +206,25 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
193
206
|
cls._shared_ttft_histogram = None
|
|
194
207
|
cls._shared_tbt_histogram = None
|
|
195
208
|
|
|
209
|
+
def _setup_config(self, config: OTelConfig):
|
|
210
|
+
"""Set up configuration and reinitialize cost calculator with custom pricing if provided.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
config (OTelConfig): The OpenTelemetry configuration object.
|
|
214
|
+
"""
|
|
215
|
+
self.config = config
|
|
216
|
+
# Reinitialize cost calculator with custom pricing if provided
|
|
217
|
+
if config.custom_pricing_json:
|
|
218
|
+
self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
|
|
219
|
+
logger.info("Cost calculator reinitialized with custom pricing")
|
|
220
|
+
|
|
196
221
|
@abstractmethod
|
|
197
222
|
def instrument(self, config: OTelConfig):
|
|
198
223
|
"""Abstract method to implement library-specific instrumentation.
|
|
199
224
|
|
|
225
|
+
Implementers should call self._setup_config(config) at the beginning of this method
|
|
226
|
+
to ensure custom pricing is loaded.
|
|
227
|
+
|
|
200
228
|
Args:
|
|
201
229
|
config (OTelConfig): The OpenTelemetry configuration object.
|
|
202
230
|
"""
|
|
@@ -236,6 +264,26 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
236
264
|
span = self.tracer.start_span(span_name, attributes=initial_attributes)
|
|
237
265
|
start_time = time.time()
|
|
238
266
|
|
|
267
|
+
# Extract session and user context (Phase 4.1)
|
|
268
|
+
if self.config:
|
|
269
|
+
if self.config.session_id_extractor:
|
|
270
|
+
try:
|
|
271
|
+
session_id = self.config.session_id_extractor(instance, args, kwargs)
|
|
272
|
+
if session_id:
|
|
273
|
+
span.set_attribute("session.id", session_id)
|
|
274
|
+
logger.debug("Set session.id: %s", session_id)
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger.debug("Failed to extract session ID: %s", e)
|
|
277
|
+
|
|
278
|
+
if self.config.user_id_extractor:
|
|
279
|
+
try:
|
|
280
|
+
user_id = self.config.user_id_extractor(instance, args, kwargs)
|
|
281
|
+
if user_id:
|
|
282
|
+
span.set_attribute("user.id", user_id)
|
|
283
|
+
logger.debug("Set user.id: %s", user_id)
|
|
284
|
+
except Exception as e:
|
|
285
|
+
logger.debug("Failed to extract user ID: %s", e)
|
|
286
|
+
|
|
239
287
|
try:
|
|
240
288
|
# Call the original function
|
|
241
289
|
result = wrapped(*args, **kwargs)
|
|
@@ -346,45 +394,54 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
346
394
|
and "dup" in self.config.semconv_stability_opt_in
|
|
347
395
|
)
|
|
348
396
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
)
|
|
357
|
-
# New semantic convention
|
|
397
|
+
# Record prompt tokens
|
|
398
|
+
if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
|
|
399
|
+
# Record metric if available
|
|
400
|
+
if self.token_counter:
|
|
401
|
+
self.token_counter.add(
|
|
402
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
403
|
+
)
|
|
404
|
+
# Always set span attributes (needed for cost calculation)
|
|
358
405
|
span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
|
|
359
406
|
# Old semantic convention (if dual emission enabled)
|
|
360
407
|
if emit_old_attrs:
|
|
361
408
|
span.set_attribute("gen_ai.usage.input_tokens", int(prompt_tokens))
|
|
362
409
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
)
|
|
371
|
-
# New semantic convention
|
|
410
|
+
# Record completion tokens
|
|
411
|
+
if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
|
|
412
|
+
# Record metric if available
|
|
413
|
+
if self.token_counter:
|
|
414
|
+
self.token_counter.add(
|
|
415
|
+
completion_tokens, {"token_type": "completion", "operation": span.name}
|
|
416
|
+
)
|
|
417
|
+
# Always set span attributes (needed for cost calculation)
|
|
372
418
|
span.set_attribute("gen_ai.usage.completion_tokens", int(completion_tokens))
|
|
373
419
|
# Old semantic convention (if dual emission enabled)
|
|
374
420
|
if emit_old_attrs:
|
|
375
421
|
span.set_attribute("gen_ai.usage.output_tokens", int(completion_tokens))
|
|
376
422
|
|
|
423
|
+
# Record total tokens
|
|
377
424
|
if isinstance(total_tokens, (int, float)) and total_tokens > 0:
|
|
378
425
|
span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
|
|
379
426
|
|
|
380
427
|
# Calculate and record cost if enabled and applicable
|
|
381
|
-
|
|
428
|
+
logger.debug(
|
|
429
|
+
f"Cost tracking check: config={self.config is not None}, "
|
|
430
|
+
f"enable_cost_tracking={self.config.enable_cost_tracking if self.config else 'N/A'}"
|
|
431
|
+
)
|
|
432
|
+
if self.config and self.config.enable_cost_tracking:
|
|
382
433
|
try:
|
|
383
434
|
model = span.attributes.get("gen_ai.request.model", "unknown")
|
|
384
435
|
# Assuming 'chat' as a default call_type for generic base instrumentor tests.
|
|
385
436
|
# Specific instrumentors will provide the actual call_type.
|
|
386
437
|
call_type = span.attributes.get("gen_ai.request.type", "chat")
|
|
387
438
|
|
|
439
|
+
logger.debug(
|
|
440
|
+
f"Calculating cost for model={model}, call_type={call_type}, "
|
|
441
|
+
f"prompt_tokens={usage.get('prompt_tokens')}, "
|
|
442
|
+
f"completion_tokens={usage.get('completion_tokens')}"
|
|
443
|
+
)
|
|
444
|
+
|
|
388
445
|
# Use granular cost calculation for chat requests
|
|
389
446
|
if call_type == "chat":
|
|
390
447
|
costs = self.cost_calculator.calculate_granular_cost(
|
|
@@ -394,45 +451,59 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
394
451
|
|
|
395
452
|
# Record total cost
|
|
396
453
|
if total_cost > 0:
|
|
397
|
-
self.
|
|
398
|
-
|
|
454
|
+
if self.cost_counter:
|
|
455
|
+
self.cost_counter.add(total_cost, {"model": str(model)})
|
|
456
|
+
# Always set span attributes (needed for cost tracking)
|
|
399
457
|
span.set_attribute("gen_ai.usage.cost.total", total_cost)
|
|
458
|
+
logger.debug(
|
|
459
|
+
f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
|
|
460
|
+
)
|
|
461
|
+
else:
|
|
462
|
+
logger.debug(
|
|
463
|
+
f"Cost is zero, not setting attributes. Costs: {costs}"
|
|
464
|
+
)
|
|
400
465
|
|
|
401
466
|
# Record and set attributes for granular costs
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
467
|
+
# Note: Metrics recording is optional, span attributes are always set
|
|
468
|
+
if costs["prompt"] > 0:
|
|
469
|
+
if self.prompt_cost_counter:
|
|
470
|
+
self.prompt_cost_counter.add(
|
|
471
|
+
costs["prompt"], {"model": str(model)}
|
|
472
|
+
)
|
|
406
473
|
span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
|
|
407
474
|
|
|
408
|
-
if costs["completion"] > 0
|
|
409
|
-
self.
|
|
410
|
-
|
|
411
|
-
|
|
475
|
+
if costs["completion"] > 0:
|
|
476
|
+
if self.completion_cost_counter:
|
|
477
|
+
self.completion_cost_counter.add(
|
|
478
|
+
costs["completion"], {"model": str(model)}
|
|
479
|
+
)
|
|
412
480
|
span.set_attribute(
|
|
413
481
|
"gen_ai.usage.cost.completion", costs["completion"]
|
|
414
482
|
)
|
|
415
483
|
|
|
416
|
-
if costs["reasoning"] > 0
|
|
417
|
-
self.
|
|
418
|
-
|
|
419
|
-
|
|
484
|
+
if costs["reasoning"] > 0:
|
|
485
|
+
if self.reasoning_cost_counter:
|
|
486
|
+
self.reasoning_cost_counter.add(
|
|
487
|
+
costs["reasoning"], {"model": str(model)}
|
|
488
|
+
)
|
|
420
489
|
span.set_attribute(
|
|
421
490
|
"gen_ai.usage.cost.reasoning", costs["reasoning"]
|
|
422
491
|
)
|
|
423
492
|
|
|
424
|
-
if costs["cache_read"] > 0
|
|
425
|
-
self.
|
|
426
|
-
|
|
427
|
-
|
|
493
|
+
if costs["cache_read"] > 0:
|
|
494
|
+
if self.cache_read_cost_counter:
|
|
495
|
+
self.cache_read_cost_counter.add(
|
|
496
|
+
costs["cache_read"], {"model": str(model)}
|
|
497
|
+
)
|
|
428
498
|
span.set_attribute(
|
|
429
499
|
"gen_ai.usage.cost.cache_read", costs["cache_read"]
|
|
430
500
|
)
|
|
431
501
|
|
|
432
|
-
if costs["cache_write"] > 0
|
|
433
|
-
self.
|
|
434
|
-
|
|
435
|
-
|
|
502
|
+
if costs["cache_write"] > 0:
|
|
503
|
+
if self.cache_write_cost_counter:
|
|
504
|
+
self.cache_write_cost_counter.add(
|
|
505
|
+
costs["cache_write"], {"model": str(model)}
|
|
506
|
+
)
|
|
436
507
|
span.set_attribute(
|
|
437
508
|
"gen_ai.usage.cost.cache_write", costs["cache_write"]
|
|
438
509
|
)
|
|
@@ -440,7 +511,8 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
440
511
|
# For non-chat requests, use simple cost calculation
|
|
441
512
|
cost = self.cost_calculator.calculate_cost(model, usage, call_type)
|
|
442
513
|
if cost and cost > 0:
|
|
443
|
-
self.
|
|
514
|
+
if self.cost_counter:
|
|
515
|
+
self.cost_counter.add(cost, {"model": str(model)})
|
|
444
516
|
except Exception as e:
|
|
445
517
|
logger.warning("Failed to calculate cost for span '%s': %s", span.name, e)
|
|
446
518
|
|
|
@@ -470,6 +542,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
470
542
|
first_token = True
|
|
471
543
|
last_token_time = start_time
|
|
472
544
|
token_count = 0
|
|
545
|
+
last_chunk = None # Store last chunk to extract usage
|
|
473
546
|
|
|
474
547
|
try:
|
|
475
548
|
for chunk in stream:
|
|
@@ -491,6 +564,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
491
564
|
self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
|
|
492
565
|
|
|
493
566
|
last_token_time = current_time
|
|
567
|
+
last_chunk = chunk # Keep track of last chunk for usage extraction
|
|
494
568
|
yield chunk
|
|
495
569
|
|
|
496
570
|
# Stream completed successfully
|
|
@@ -498,6 +572,123 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
498
572
|
if self.latency_histogram:
|
|
499
573
|
self.latency_histogram.record(duration, {"operation": span.name})
|
|
500
574
|
span.set_attribute("gen_ai.streaming.token_count", token_count)
|
|
575
|
+
|
|
576
|
+
# Extract usage from last chunk and calculate cost
|
|
577
|
+
# Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
|
|
578
|
+
try:
|
|
579
|
+
if last_chunk is not None:
|
|
580
|
+
usage = self._extract_usage(last_chunk)
|
|
581
|
+
if usage and isinstance(usage, dict):
|
|
582
|
+
# Record token usage metrics and calculate cost
|
|
583
|
+
# This will set span attributes and record cost metrics
|
|
584
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
585
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
586
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
587
|
+
|
|
588
|
+
# Record token counts
|
|
589
|
+
if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
|
|
590
|
+
if self.token_counter:
|
|
591
|
+
self.token_counter.add(
|
|
592
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
593
|
+
)
|
|
594
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
|
|
595
|
+
|
|
596
|
+
if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
|
|
597
|
+
if self.token_counter:
|
|
598
|
+
self.token_counter.add(
|
|
599
|
+
completion_tokens,
|
|
600
|
+
{"token_type": "completion", "operation": span.name},
|
|
601
|
+
)
|
|
602
|
+
span.set_attribute(
|
|
603
|
+
"gen_ai.usage.completion_tokens", int(completion_tokens)
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
if isinstance(total_tokens, (int, float)) and total_tokens > 0:
|
|
607
|
+
span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
|
|
608
|
+
|
|
609
|
+
# Calculate and record cost if enabled
|
|
610
|
+
if self.config and self.config.enable_cost_tracking:
|
|
611
|
+
try:
|
|
612
|
+
# Get call_type from span attributes or default to "chat"
|
|
613
|
+
call_type = span.attributes.get("gen_ai.request.type", "chat")
|
|
614
|
+
|
|
615
|
+
# Use granular cost calculation for chat requests
|
|
616
|
+
if call_type == "chat":
|
|
617
|
+
costs = self.cost_calculator.calculate_granular_cost(
|
|
618
|
+
model, usage, call_type
|
|
619
|
+
)
|
|
620
|
+
total_cost = costs["total"]
|
|
621
|
+
|
|
622
|
+
# Record total cost
|
|
623
|
+
if total_cost > 0:
|
|
624
|
+
if self.cost_counter:
|
|
625
|
+
self.cost_counter.add(total_cost, {"model": str(model)})
|
|
626
|
+
span.set_attribute("gen_ai.usage.cost.total", total_cost)
|
|
627
|
+
logger.debug(f"Streaming cost: {total_cost} USD")
|
|
628
|
+
|
|
629
|
+
# Record granular costs
|
|
630
|
+
if costs["prompt"] > 0:
|
|
631
|
+
if self.prompt_cost_counter:
|
|
632
|
+
self.prompt_cost_counter.add(
|
|
633
|
+
costs["prompt"], {"model": str(model)}
|
|
634
|
+
)
|
|
635
|
+
span.set_attribute(
|
|
636
|
+
"gen_ai.usage.cost.prompt", costs["prompt"]
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
if costs["completion"] > 0:
|
|
640
|
+
if self.completion_cost_counter:
|
|
641
|
+
self.completion_cost_counter.add(
|
|
642
|
+
costs["completion"], {"model": str(model)}
|
|
643
|
+
)
|
|
644
|
+
span.set_attribute(
|
|
645
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if costs["reasoning"] > 0:
|
|
649
|
+
if self.reasoning_cost_counter:
|
|
650
|
+
self.reasoning_cost_counter.add(
|
|
651
|
+
costs["reasoning"], {"model": str(model)}
|
|
652
|
+
)
|
|
653
|
+
span.set_attribute(
|
|
654
|
+
"gen_ai.usage.cost.reasoning", costs["reasoning"]
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
if costs["cache_read"] > 0:
|
|
658
|
+
if self.cache_read_cost_counter:
|
|
659
|
+
self.cache_read_cost_counter.add(
|
|
660
|
+
costs["cache_read"], {"model": str(model)}
|
|
661
|
+
)
|
|
662
|
+
span.set_attribute(
|
|
663
|
+
"gen_ai.usage.cost.cache_read", costs["cache_read"]
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
if costs["cache_write"] > 0:
|
|
667
|
+
if self.cache_write_cost_counter:
|
|
668
|
+
self.cache_write_cost_counter.add(
|
|
669
|
+
costs["cache_write"], {"model": str(model)}
|
|
670
|
+
)
|
|
671
|
+
span.set_attribute(
|
|
672
|
+
"gen_ai.usage.cost.cache_write", costs["cache_write"]
|
|
673
|
+
)
|
|
674
|
+
else:
|
|
675
|
+
# For non-chat requests, use simple cost calculation
|
|
676
|
+
cost = self.cost_calculator.calculate_cost(
|
|
677
|
+
model, usage, call_type
|
|
678
|
+
)
|
|
679
|
+
if cost and cost > 0:
|
|
680
|
+
if self.cost_counter:
|
|
681
|
+
self.cost_counter.add(cost, {"model": str(model)})
|
|
682
|
+
span.set_attribute("gen_ai.usage.cost.total", cost)
|
|
683
|
+
except Exception as e:
|
|
684
|
+
logger.warning(
|
|
685
|
+
"Failed to calculate cost for streaming response: %s", e
|
|
686
|
+
)
|
|
687
|
+
else:
|
|
688
|
+
logger.debug("No usage information found in streaming response")
|
|
689
|
+
except Exception as e:
|
|
690
|
+
logger.warning("Failed to extract usage from streaming response: %s", e)
|
|
691
|
+
|
|
501
692
|
span.set_status(Status(StatusCode.OK))
|
|
502
693
|
span.end() # Close the span when streaming completes
|
|
503
694
|
logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
|
|
@@ -512,6 +703,71 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
512
703
|
logger.warning(f"Error in streaming wrapper: {e}")
|
|
513
704
|
raise
|
|
514
705
|
|
|
706
|
+
# Phase 4.2: RAG/Embedding Helper Methods
|
|
707
|
+
def add_embedding_attributes(
|
|
708
|
+
self, span, model: str, input_text: str, vector: Optional[List[float]] = None
|
|
709
|
+
):
|
|
710
|
+
"""Add embedding-specific attributes to a span.
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
span: The OpenTelemetry span
|
|
714
|
+
model: The embedding model name
|
|
715
|
+
input_text: The text being embedded (will be truncated to 500 chars)
|
|
716
|
+
vector: Optional embedding vector (use with caution - can be large!)
|
|
717
|
+
"""
|
|
718
|
+
span.set_attribute("embedding.model_name", model)
|
|
719
|
+
span.set_attribute("embedding.text", input_text[:500]) # Truncate to avoid large spans
|
|
720
|
+
|
|
721
|
+
if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
|
|
722
|
+
# Only capture vectors if explicitly enabled (they can be very large)
|
|
723
|
+
span.set_attribute("embedding.vector", json.dumps(vector))
|
|
724
|
+
span.set_attribute("embedding.vector.dimension", len(vector))
|
|
725
|
+
|
|
726
|
+
def add_retrieval_attributes(
|
|
727
|
+
self,
|
|
728
|
+
span,
|
|
729
|
+
documents: List[Dict[str, Any]],
|
|
730
|
+
query: Optional[str] = None,
|
|
731
|
+
max_docs: int = 5,
|
|
732
|
+
):
|
|
733
|
+
"""Add retrieval/RAG-specific attributes to a span.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
span: The OpenTelemetry span
|
|
737
|
+
documents: List of retrieved documents. Each dict should have:
|
|
738
|
+
- id: Document identifier
|
|
739
|
+
- score: Relevance score
|
|
740
|
+
- content: Document content
|
|
741
|
+
- metadata: Optional metadata dict
|
|
742
|
+
query: Optional query string
|
|
743
|
+
max_docs: Maximum number of documents to include in attributes (default: 5)
|
|
744
|
+
"""
|
|
745
|
+
if query:
|
|
746
|
+
span.set_attribute("retrieval.query", query[:500]) # Truncate
|
|
747
|
+
|
|
748
|
+
# Limit to first N documents to avoid attribute explosion
|
|
749
|
+
for i, doc in enumerate(documents[:max_docs]):
|
|
750
|
+
prefix = f"retrieval.documents.{i}.document"
|
|
751
|
+
|
|
752
|
+
if "id" in doc:
|
|
753
|
+
span.set_attribute(f"{prefix}.id", str(doc["id"]))
|
|
754
|
+
if "score" in doc:
|
|
755
|
+
span.set_attribute(f"{prefix}.score", float(doc["score"]))
|
|
756
|
+
if "content" in doc:
|
|
757
|
+
# Truncate content to avoid large attributes
|
|
758
|
+
content = str(doc["content"])[:500]
|
|
759
|
+
span.set_attribute(f"{prefix}.content", content)
|
|
760
|
+
|
|
761
|
+
# Add metadata if present
|
|
762
|
+
if "metadata" in doc and isinstance(doc["metadata"], dict):
|
|
763
|
+
for key, value in doc["metadata"].items():
|
|
764
|
+
# Flatten metadata, limit key names to avoid explosion
|
|
765
|
+
safe_key = str(key)[:50] # Limit key length
|
|
766
|
+
safe_value = str(value)[:200] # Limit value length
|
|
767
|
+
span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
|
|
768
|
+
|
|
769
|
+
span.set_attribute("retrieval.document_count", len(documents))
|
|
770
|
+
|
|
515
771
|
@abstractmethod
|
|
516
772
|
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
517
773
|
"""Abstract method to extract token usage information from a function result.
|