genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

@@ -7,11 +7,12 @@ It includes methods for creating OpenTelemetry spans, recording metrics,
7
7
  and handling configuration and cost calculation.
8
8
  """
9
9
 
10
+ import json
10
11
  import logging
11
12
  import threading
12
13
  import time
13
14
  from abc import ABC, abstractmethod
14
- from typing import Any, Callable, Dict, Optional
15
+ from typing import Any, Callable, Dict, List, Optional
15
16
 
16
17
  import wrapt
17
18
  from opentelemetry import metrics, trace
@@ -97,7 +98,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
97
98
  self.tracer = trace.get_tracer(__name__)
98
99
  self.meter = metrics.get_meter(__name__)
99
100
  self.config: Optional[OTelConfig] = None
100
- self.cost_calculator = CostCalculator()
101
+ self.cost_calculator = CostCalculator() # Will be updated when instrument() is called
101
102
  self._instrumented = False
102
103
 
103
104
  # Use shared metrics to avoid duplicate warnings
@@ -205,10 +206,25 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
205
206
  cls._shared_ttft_histogram = None
206
207
  cls._shared_tbt_histogram = None
207
208
 
209
+ def _setup_config(self, config: OTelConfig):
210
+ """Set up configuration and reinitialize cost calculator with custom pricing if provided.
211
+
212
+ Args:
213
+ config (OTelConfig): The OpenTelemetry configuration object.
214
+ """
215
+ self.config = config
216
+ # Reinitialize cost calculator with custom pricing if provided
217
+ if config.custom_pricing_json:
218
+ self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
219
+ logger.info("Cost calculator reinitialized with custom pricing")
220
+
208
221
  @abstractmethod
209
222
  def instrument(self, config: OTelConfig):
210
223
  """Abstract method to implement library-specific instrumentation.
211
224
 
225
+ Implementers should call self._setup_config(config) at the beginning of this method
226
+ to ensure custom pricing is loaded.
227
+
212
228
  Args:
213
229
  config (OTelConfig): The OpenTelemetry configuration object.
214
230
  """
@@ -248,6 +264,26 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
248
264
  span = self.tracer.start_span(span_name, attributes=initial_attributes)
249
265
  start_time = time.time()
250
266
 
267
+ # Extract session and user context (Phase 4.1)
268
+ if self.config:
269
+ if self.config.session_id_extractor:
270
+ try:
271
+ session_id = self.config.session_id_extractor(instance, args, kwargs)
272
+ if session_id:
273
+ span.set_attribute("session.id", session_id)
274
+ logger.debug("Set session.id: %s", session_id)
275
+ except Exception as e:
276
+ logger.debug("Failed to extract session ID: %s", e)
277
+
278
+ if self.config.user_id_extractor:
279
+ try:
280
+ user_id = self.config.user_id_extractor(instance, args, kwargs)
281
+ if user_id:
282
+ span.set_attribute("user.id", user_id)
283
+ logger.debug("Set user.id: %s", user_id)
284
+ except Exception as e:
285
+ logger.debug("Failed to extract user ID: %s", e)
286
+
251
287
  try:
252
288
  # Call the original function
253
289
  result = wrapped(*args, **kwargs)
@@ -419,9 +455,13 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
419
455
  self.cost_counter.add(total_cost, {"model": str(model)})
420
456
  # Always set span attributes (needed for cost tracking)
421
457
  span.set_attribute("gen_ai.usage.cost.total", total_cost)
422
- logger.debug(f"Set cost attribute: gen_ai.usage.cost.total={total_cost}")
458
+ logger.debug(
459
+ f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
460
+ )
423
461
  else:
424
- logger.debug(f"Cost is zero, not setting attributes. Costs: {costs}")
462
+ logger.debug(
463
+ f"Cost is zero, not setting attributes. Costs: {costs}"
464
+ )
425
465
 
426
466
  # Record and set attributes for granular costs
427
467
  # Note: Metrics recording is optional, span attributes are always set
@@ -502,6 +542,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
502
542
  first_token = True
503
543
  last_token_time = start_time
504
544
  token_count = 0
545
+ last_chunk = None # Store last chunk to extract usage
505
546
 
506
547
  try:
507
548
  for chunk in stream:
@@ -523,6 +564,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
523
564
  self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
524
565
 
525
566
  last_token_time = current_time
567
+ last_chunk = chunk # Keep track of last chunk for usage extraction
526
568
  yield chunk
527
569
 
528
570
  # Stream completed successfully
@@ -530,6 +572,123 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
530
572
  if self.latency_histogram:
531
573
  self.latency_histogram.record(duration, {"operation": span.name})
532
574
  span.set_attribute("gen_ai.streaming.token_count", token_count)
575
+
576
+ # Extract usage from last chunk and calculate cost
577
+ # Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
578
+ try:
579
+ if last_chunk is not None:
580
+ usage = self._extract_usage(last_chunk)
581
+ if usage and isinstance(usage, dict):
582
+ # Record token usage metrics and calculate cost
583
+ # This will set span attributes and record cost metrics
584
+ prompt_tokens = usage.get("prompt_tokens", 0)
585
+ completion_tokens = usage.get("completion_tokens", 0)
586
+ total_tokens = usage.get("total_tokens", 0)
587
+
588
+ # Record token counts
589
+ if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
590
+ if self.token_counter:
591
+ self.token_counter.add(
592
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
593
+ )
594
+ span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
595
+
596
+ if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
597
+ if self.token_counter:
598
+ self.token_counter.add(
599
+ completion_tokens,
600
+ {"token_type": "completion", "operation": span.name},
601
+ )
602
+ span.set_attribute(
603
+ "gen_ai.usage.completion_tokens", int(completion_tokens)
604
+ )
605
+
606
+ if isinstance(total_tokens, (int, float)) and total_tokens > 0:
607
+ span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
608
+
609
+ # Calculate and record cost if enabled
610
+ if self.config and self.config.enable_cost_tracking:
611
+ try:
612
+ # Get call_type from span attributes or default to "chat"
613
+ call_type = span.attributes.get("gen_ai.request.type", "chat")
614
+
615
+ # Use granular cost calculation for chat requests
616
+ if call_type == "chat":
617
+ costs = self.cost_calculator.calculate_granular_cost(
618
+ model, usage, call_type
619
+ )
620
+ total_cost = costs["total"]
621
+
622
+ # Record total cost
623
+ if total_cost > 0:
624
+ if self.cost_counter:
625
+ self.cost_counter.add(total_cost, {"model": str(model)})
626
+ span.set_attribute("gen_ai.usage.cost.total", total_cost)
627
+ logger.debug(f"Streaming cost: {total_cost} USD")
628
+
629
+ # Record granular costs
630
+ if costs["prompt"] > 0:
631
+ if self.prompt_cost_counter:
632
+ self.prompt_cost_counter.add(
633
+ costs["prompt"], {"model": str(model)}
634
+ )
635
+ span.set_attribute(
636
+ "gen_ai.usage.cost.prompt", costs["prompt"]
637
+ )
638
+
639
+ if costs["completion"] > 0:
640
+ if self.completion_cost_counter:
641
+ self.completion_cost_counter.add(
642
+ costs["completion"], {"model": str(model)}
643
+ )
644
+ span.set_attribute(
645
+ "gen_ai.usage.cost.completion", costs["completion"]
646
+ )
647
+
648
+ if costs["reasoning"] > 0:
649
+ if self.reasoning_cost_counter:
650
+ self.reasoning_cost_counter.add(
651
+ costs["reasoning"], {"model": str(model)}
652
+ )
653
+ span.set_attribute(
654
+ "gen_ai.usage.cost.reasoning", costs["reasoning"]
655
+ )
656
+
657
+ if costs["cache_read"] > 0:
658
+ if self.cache_read_cost_counter:
659
+ self.cache_read_cost_counter.add(
660
+ costs["cache_read"], {"model": str(model)}
661
+ )
662
+ span.set_attribute(
663
+ "gen_ai.usage.cost.cache_read", costs["cache_read"]
664
+ )
665
+
666
+ if costs["cache_write"] > 0:
667
+ if self.cache_write_cost_counter:
668
+ self.cache_write_cost_counter.add(
669
+ costs["cache_write"], {"model": str(model)}
670
+ )
671
+ span.set_attribute(
672
+ "gen_ai.usage.cost.cache_write", costs["cache_write"]
673
+ )
674
+ else:
675
+ # For non-chat requests, use simple cost calculation
676
+ cost = self.cost_calculator.calculate_cost(
677
+ model, usage, call_type
678
+ )
679
+ if cost and cost > 0:
680
+ if self.cost_counter:
681
+ self.cost_counter.add(cost, {"model": str(model)})
682
+ span.set_attribute("gen_ai.usage.cost.total", cost)
683
+ except Exception as e:
684
+ logger.warning(
685
+ "Failed to calculate cost for streaming response: %s", e
686
+ )
687
+ else:
688
+ logger.debug("No usage information found in streaming response")
689
+ except Exception as e:
690
+ logger.warning("Failed to extract usage from streaming response: %s", e)
691
+
533
692
  span.set_status(Status(StatusCode.OK))
534
693
  span.end() # Close the span when streaming completes
535
694
  logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
@@ -544,6 +703,71 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
544
703
  logger.warning(f"Error in streaming wrapper: {e}")
545
704
  raise
546
705
 
706
+ # Phase 4.2: RAG/Embedding Helper Methods
707
+ def add_embedding_attributes(
708
+ self, span, model: str, input_text: str, vector: Optional[List[float]] = None
709
+ ):
710
+ """Add embedding-specific attributes to a span.
711
+
712
+ Args:
713
+ span: The OpenTelemetry span
714
+ model: The embedding model name
715
+ input_text: The text being embedded (will be truncated to 500 chars)
716
+ vector: Optional embedding vector (use with caution - can be large!)
717
+ """
718
+ span.set_attribute("embedding.model_name", model)
719
+ span.set_attribute("embedding.text", input_text[:500]) # Truncate to avoid large spans
720
+
721
+ if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
722
+ # Only capture vectors if explicitly enabled (they can be very large)
723
+ span.set_attribute("embedding.vector", json.dumps(vector))
724
+ span.set_attribute("embedding.vector.dimension", len(vector))
725
+
726
+ def add_retrieval_attributes(
727
+ self,
728
+ span,
729
+ documents: List[Dict[str, Any]],
730
+ query: Optional[str] = None,
731
+ max_docs: int = 5,
732
+ ):
733
+ """Add retrieval/RAG-specific attributes to a span.
734
+
735
+ Args:
736
+ span: The OpenTelemetry span
737
+ documents: List of retrieved documents. Each dict should have:
738
+ - id: Document identifier
739
+ - score: Relevance score
740
+ - content: Document content
741
+ - metadata: Optional metadata dict
742
+ query: Optional query string
743
+ max_docs: Maximum number of documents to include in attributes (default: 5)
744
+ """
745
+ if query:
746
+ span.set_attribute("retrieval.query", query[:500]) # Truncate
747
+
748
+ # Limit to first N documents to avoid attribute explosion
749
+ for i, doc in enumerate(documents[:max_docs]):
750
+ prefix = f"retrieval.documents.{i}.document"
751
+
752
+ if "id" in doc:
753
+ span.set_attribute(f"{prefix}.id", str(doc["id"]))
754
+ if "score" in doc:
755
+ span.set_attribute(f"{prefix}.score", float(doc["score"]))
756
+ if "content" in doc:
757
+ # Truncate content to avoid large attributes
758
+ content = str(doc["content"])[:500]
759
+ span.set_attribute(f"{prefix}.content", content)
760
+
761
+ # Add metadata if present
762
+ if "metadata" in doc and isinstance(doc["metadata"], dict):
763
+ for key, value in doc["metadata"].items():
764
+ # Flatten metadata, limit key names to avoid explosion
765
+ safe_key = str(key)[:50] # Limit key length
766
+ safe_value = str(value)[:200] # Limit value length
767
+ span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
768
+
769
+ span.set_attribute("retrieval.document_count", len(documents))
770
+
547
771
  @abstractmethod
548
772
  def _extract_usage(self, result) -> Optional[Dict[str, int]]:
549
773
  """Abstract method to extract token usage information from a function result.
@@ -1,140 +1,140 @@
1
- """OpenTelemetry instrumentor for the Cohere SDK.
2
-
3
- This instrumentor automatically traces calls to Cohere models, capturing
4
- relevant attributes such as the model name and token usage.
5
- """
6
-
7
- import logging
8
- from typing import Any, Dict, Optional
9
-
10
- from ..config import OTelConfig
11
- from .base import BaseInstrumentor
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- class CohereInstrumentor(BaseInstrumentor):
17
- """Instrumentor for Cohere"""
18
-
19
- def __init__(self):
20
- """Initialize the instrumentor."""
21
- super().__init__()
22
- self._cohere_available = False
23
- self._check_availability()
24
-
25
- def _check_availability(self):
26
- """Check if cohere library is available."""
27
- try:
28
- import cohere
29
-
30
- self._cohere_available = True
31
- logger.debug("cohere library detected and available for instrumentation")
32
- except ImportError:
33
- logger.debug("cohere library not installed, instrumentation will be skipped")
34
- self._cohere_available = False
35
-
36
- def instrument(self, config: OTelConfig):
37
- """Instrument cohere if available."""
38
- if not self._cohere_available:
39
- logger.debug("Skipping instrumentation - library not available")
40
- return
41
-
42
- self.config = config
43
- try:
44
- import cohere
45
-
46
- original_init = cohere.Client.__init__
47
-
48
- def wrapped_init(instance, *args, **kwargs):
49
- original_init(instance, *args, **kwargs)
50
- self._instrument_client(instance)
51
-
52
- cohere.Client.__init__ = wrapped_init
53
- self._instrumented = True
54
- logger.info("Cohere instrumentation enabled")
55
-
56
- except Exception as e:
57
- logger.error("Failed to instrument Cohere: %s", e, exc_info=True)
58
- if config.fail_on_error:
59
- raise
60
-
61
- def _instrument_client(self, client):
62
- """Instrument Cohere client methods."""
63
- original_generate = client.generate
64
-
65
- # Wrap using create_span_wrapper
66
- wrapped_generate = self.create_span_wrapper(
67
- span_name="cohere.generate",
68
- extract_attributes=self._extract_generate_attributes,
69
- )(original_generate)
70
-
71
- client.generate = wrapped_generate
72
-
73
- def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
74
- """Extract attributes from Cohere generate call.
75
-
76
- Args:
77
- instance: The client instance.
78
- args: Positional arguments.
79
- kwargs: Keyword arguments.
80
-
81
- Returns:
82
- Dict[str, Any]: Dictionary of attributes to set on the span.
83
- """
84
- attrs = {}
85
- model = kwargs.get("model", "command")
86
- prompt = kwargs.get("prompt", "")
87
-
88
- attrs["gen_ai.system"] = "cohere"
89
- attrs["gen_ai.request.model"] = model
90
- attrs["gen_ai.operation.name"] = "generate"
91
- attrs["gen_ai.request.message_count"] = 1 if prompt else 0
92
-
93
- return attrs
94
-
95
- def _extract_usage(self, result) -> Optional[Dict[str, int]]:
96
- """Extract token usage from Cohere response.
97
-
98
- Cohere responses include meta.tokens with:
99
- - input_tokens: Input tokens
100
- - output_tokens: Output tokens
101
-
102
- Args:
103
- result: The API response object.
104
-
105
- Returns:
106
- Optional[Dict[str, int]]: Dictionary with token counts or None.
107
- """
108
- try:
109
- # Handle object response
110
- if hasattr(result, "meta") and result.meta:
111
- meta = result.meta
112
- # Check for tokens object
113
- if hasattr(meta, "tokens") and meta.tokens:
114
- tokens = meta.tokens
115
- input_tokens = getattr(tokens, "input_tokens", 0)
116
- output_tokens = getattr(tokens, "output_tokens", 0)
117
-
118
- if input_tokens or output_tokens:
119
- return {
120
- "prompt_tokens": int(input_tokens) if input_tokens else 0,
121
- "completion_tokens": int(output_tokens) if output_tokens else 0,
122
- "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
123
- }
124
- # Fallback to billed_units
125
- elif hasattr(meta, "billed_units") and meta.billed_units:
126
- billed = meta.billed_units
127
- input_tokens = getattr(billed, "input_tokens", 0)
128
- output_tokens = getattr(billed, "output_tokens", 0)
129
-
130
- if input_tokens or output_tokens:
131
- return {
132
- "prompt_tokens": int(input_tokens) if input_tokens else 0,
133
- "completion_tokens": int(output_tokens) if output_tokens else 0,
134
- "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
135
- }
136
-
137
- return None
138
- except Exception as e:
139
- logger.debug("Failed to extract usage from Cohere response: %s", e)
140
- return None
1
+ """OpenTelemetry instrumentor for the Cohere SDK.
2
+
3
+ This instrumentor automatically traces calls to Cohere models, capturing
4
+ relevant attributes such as the model name and token usage.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any, Dict, Optional
9
+
10
+ from ..config import OTelConfig
11
+ from .base import BaseInstrumentor
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CohereInstrumentor(BaseInstrumentor):
17
+ """Instrumentor for Cohere"""
18
+
19
+ def __init__(self):
20
+ """Initialize the instrumentor."""
21
+ super().__init__()
22
+ self._cohere_available = False
23
+ self._check_availability()
24
+
25
+ def _check_availability(self):
26
+ """Check if cohere library is available."""
27
+ try:
28
+ import cohere
29
+
30
+ self._cohere_available = True
31
+ logger.debug("cohere library detected and available for instrumentation")
32
+ except ImportError:
33
+ logger.debug("cohere library not installed, instrumentation will be skipped")
34
+ self._cohere_available = False
35
+
36
+ def instrument(self, config: OTelConfig):
37
+ """Instrument cohere if available."""
38
+ if not self._cohere_available:
39
+ logger.debug("Skipping instrumentation - library not available")
40
+ return
41
+
42
+ self.config = config
43
+ try:
44
+ import cohere
45
+
46
+ original_init = cohere.Client.__init__
47
+
48
+ def wrapped_init(instance, *args, **kwargs):
49
+ original_init(instance, *args, **kwargs)
50
+ self._instrument_client(instance)
51
+
52
+ cohere.Client.__init__ = wrapped_init
53
+ self._instrumented = True
54
+ logger.info("Cohere instrumentation enabled")
55
+
56
+ except Exception as e:
57
+ logger.error("Failed to instrument Cohere: %s", e, exc_info=True)
58
+ if config.fail_on_error:
59
+ raise
60
+
61
+ def _instrument_client(self, client):
62
+ """Instrument Cohere client methods."""
63
+ original_generate = client.generate
64
+
65
+ # Wrap using create_span_wrapper
66
+ wrapped_generate = self.create_span_wrapper(
67
+ span_name="cohere.generate",
68
+ extract_attributes=self._extract_generate_attributes,
69
+ )(original_generate)
70
+
71
+ client.generate = wrapped_generate
72
+
73
+ def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
74
+ """Extract attributes from Cohere generate call.
75
+
76
+ Args:
77
+ instance: The client instance.
78
+ args: Positional arguments.
79
+ kwargs: Keyword arguments.
80
+
81
+ Returns:
82
+ Dict[str, Any]: Dictionary of attributes to set on the span.
83
+ """
84
+ attrs = {}
85
+ model = kwargs.get("model", "command")
86
+ prompt = kwargs.get("prompt", "")
87
+
88
+ attrs["gen_ai.system"] = "cohere"
89
+ attrs["gen_ai.request.model"] = model
90
+ attrs["gen_ai.operation.name"] = "generate"
91
+ attrs["gen_ai.request.message_count"] = 1 if prompt else 0
92
+
93
+ return attrs
94
+
95
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
96
+ """Extract token usage from Cohere response.
97
+
98
+ Cohere responses include meta.tokens with:
99
+ - input_tokens: Input tokens
100
+ - output_tokens: Output tokens
101
+
102
+ Args:
103
+ result: The API response object.
104
+
105
+ Returns:
106
+ Optional[Dict[str, int]]: Dictionary with token counts or None.
107
+ """
108
+ try:
109
+ # Handle object response
110
+ if hasattr(result, "meta") and result.meta:
111
+ meta = result.meta
112
+ # Check for tokens object
113
+ if hasattr(meta, "tokens") and meta.tokens:
114
+ tokens = meta.tokens
115
+ input_tokens = getattr(tokens, "input_tokens", 0)
116
+ output_tokens = getattr(tokens, "output_tokens", 0)
117
+
118
+ if input_tokens or output_tokens:
119
+ return {
120
+ "prompt_tokens": int(input_tokens) if input_tokens else 0,
121
+ "completion_tokens": int(output_tokens) if output_tokens else 0,
122
+ "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
123
+ }
124
+ # Fallback to billed_units
125
+ elif hasattr(meta, "billed_units") and meta.billed_units:
126
+ billed = meta.billed_units
127
+ input_tokens = getattr(billed, "input_tokens", 0)
128
+ output_tokens = getattr(billed, "output_tokens", 0)
129
+
130
+ if input_tokens or output_tokens:
131
+ return {
132
+ "prompt_tokens": int(input_tokens) if input_tokens else 0,
133
+ "completion_tokens": int(output_tokens) if output_tokens else 0,
134
+ "total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
135
+ }
136
+
137
+ return None
138
+ except Exception as e:
139
+ logger.debug("Failed to extract usage from Cohere response: %s", e)
140
+ return None