genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/auto_instrument.py +7 -3
- genai_otel/config.py +19 -1
- genai_otel/cost_calculator.py +72 -6
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -177
- genai_otel/gpu_metrics.py +50 -0
- genai_otel/instrumentors/base.py +228 -4
- genai_otel/instrumentors/cohere_instrumentor.py +140 -140
- genai_otel/instrumentors/huggingface_instrumentor.py +184 -7
- genai_otel/instrumentors/langchain_instrumentor.py +75 -75
- genai_otel/instrumentors/mistralai_instrumentor.py +17 -33
- genai_otel/llm_pricing.json +869 -869
- genai_otel/logging_config.py +45 -45
- genai_otel/py.typed +2 -2
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/METADATA +256 -28
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/RECORD +21 -20
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/top_level.txt +0 -0
genai_otel/instrumentors/base.py
CHANGED
|
@@ -7,11 +7,12 @@ It includes methods for creating OpenTelemetry spans, recording metrics,
|
|
|
7
7
|
and handling configuration and cost calculation.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
import json
|
|
10
11
|
import logging
|
|
11
12
|
import threading
|
|
12
13
|
import time
|
|
13
14
|
from abc import ABC, abstractmethod
|
|
14
|
-
from typing import Any, Callable, Dict, Optional
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
15
16
|
|
|
16
17
|
import wrapt
|
|
17
18
|
from opentelemetry import metrics, trace
|
|
@@ -97,7 +98,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
97
98
|
self.tracer = trace.get_tracer(__name__)
|
|
98
99
|
self.meter = metrics.get_meter(__name__)
|
|
99
100
|
self.config: Optional[OTelConfig] = None
|
|
100
|
-
self.cost_calculator = CostCalculator()
|
|
101
|
+
self.cost_calculator = CostCalculator() # Will be updated when instrument() is called
|
|
101
102
|
self._instrumented = False
|
|
102
103
|
|
|
103
104
|
# Use shared metrics to avoid duplicate warnings
|
|
@@ -205,10 +206,25 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
205
206
|
cls._shared_ttft_histogram = None
|
|
206
207
|
cls._shared_tbt_histogram = None
|
|
207
208
|
|
|
209
|
+
def _setup_config(self, config: OTelConfig):
|
|
210
|
+
"""Set up configuration and reinitialize cost calculator with custom pricing if provided.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
config (OTelConfig): The OpenTelemetry configuration object.
|
|
214
|
+
"""
|
|
215
|
+
self.config = config
|
|
216
|
+
# Reinitialize cost calculator with custom pricing if provided
|
|
217
|
+
if config.custom_pricing_json:
|
|
218
|
+
self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
|
|
219
|
+
logger.info("Cost calculator reinitialized with custom pricing")
|
|
220
|
+
|
|
208
221
|
@abstractmethod
|
|
209
222
|
def instrument(self, config: OTelConfig):
|
|
210
223
|
"""Abstract method to implement library-specific instrumentation.
|
|
211
224
|
|
|
225
|
+
Implementers should call self._setup_config(config) at the beginning of this method
|
|
226
|
+
to ensure custom pricing is loaded.
|
|
227
|
+
|
|
212
228
|
Args:
|
|
213
229
|
config (OTelConfig): The OpenTelemetry configuration object.
|
|
214
230
|
"""
|
|
@@ -248,6 +264,26 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
248
264
|
span = self.tracer.start_span(span_name, attributes=initial_attributes)
|
|
249
265
|
start_time = time.time()
|
|
250
266
|
|
|
267
|
+
# Extract session and user context (Phase 4.1)
|
|
268
|
+
if self.config:
|
|
269
|
+
if self.config.session_id_extractor:
|
|
270
|
+
try:
|
|
271
|
+
session_id = self.config.session_id_extractor(instance, args, kwargs)
|
|
272
|
+
if session_id:
|
|
273
|
+
span.set_attribute("session.id", session_id)
|
|
274
|
+
logger.debug("Set session.id: %s", session_id)
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger.debug("Failed to extract session ID: %s", e)
|
|
277
|
+
|
|
278
|
+
if self.config.user_id_extractor:
|
|
279
|
+
try:
|
|
280
|
+
user_id = self.config.user_id_extractor(instance, args, kwargs)
|
|
281
|
+
if user_id:
|
|
282
|
+
span.set_attribute("user.id", user_id)
|
|
283
|
+
logger.debug("Set user.id: %s", user_id)
|
|
284
|
+
except Exception as e:
|
|
285
|
+
logger.debug("Failed to extract user ID: %s", e)
|
|
286
|
+
|
|
251
287
|
try:
|
|
252
288
|
# Call the original function
|
|
253
289
|
result = wrapped(*args, **kwargs)
|
|
@@ -419,9 +455,13 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
419
455
|
self.cost_counter.add(total_cost, {"model": str(model)})
|
|
420
456
|
# Always set span attributes (needed for cost tracking)
|
|
421
457
|
span.set_attribute("gen_ai.usage.cost.total", total_cost)
|
|
422
|
-
logger.debug(
|
|
458
|
+
logger.debug(
|
|
459
|
+
f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
|
|
460
|
+
)
|
|
423
461
|
else:
|
|
424
|
-
logger.debug(
|
|
462
|
+
logger.debug(
|
|
463
|
+
f"Cost is zero, not setting attributes. Costs: {costs}"
|
|
464
|
+
)
|
|
425
465
|
|
|
426
466
|
# Record and set attributes for granular costs
|
|
427
467
|
# Note: Metrics recording is optional, span attributes are always set
|
|
@@ -502,6 +542,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
502
542
|
first_token = True
|
|
503
543
|
last_token_time = start_time
|
|
504
544
|
token_count = 0
|
|
545
|
+
last_chunk = None # Store last chunk to extract usage
|
|
505
546
|
|
|
506
547
|
try:
|
|
507
548
|
for chunk in stream:
|
|
@@ -523,6 +564,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
523
564
|
self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
|
|
524
565
|
|
|
525
566
|
last_token_time = current_time
|
|
567
|
+
last_chunk = chunk # Keep track of last chunk for usage extraction
|
|
526
568
|
yield chunk
|
|
527
569
|
|
|
528
570
|
# Stream completed successfully
|
|
@@ -530,6 +572,123 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
530
572
|
if self.latency_histogram:
|
|
531
573
|
self.latency_histogram.record(duration, {"operation": span.name})
|
|
532
574
|
span.set_attribute("gen_ai.streaming.token_count", token_count)
|
|
575
|
+
|
|
576
|
+
# Extract usage from last chunk and calculate cost
|
|
577
|
+
# Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
|
|
578
|
+
try:
|
|
579
|
+
if last_chunk is not None:
|
|
580
|
+
usage = self._extract_usage(last_chunk)
|
|
581
|
+
if usage and isinstance(usage, dict):
|
|
582
|
+
# Record token usage metrics and calculate cost
|
|
583
|
+
# This will set span attributes and record cost metrics
|
|
584
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
585
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
586
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
587
|
+
|
|
588
|
+
# Record token counts
|
|
589
|
+
if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
|
|
590
|
+
if self.token_counter:
|
|
591
|
+
self.token_counter.add(
|
|
592
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
593
|
+
)
|
|
594
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
|
|
595
|
+
|
|
596
|
+
if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
|
|
597
|
+
if self.token_counter:
|
|
598
|
+
self.token_counter.add(
|
|
599
|
+
completion_tokens,
|
|
600
|
+
{"token_type": "completion", "operation": span.name},
|
|
601
|
+
)
|
|
602
|
+
span.set_attribute(
|
|
603
|
+
"gen_ai.usage.completion_tokens", int(completion_tokens)
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
if isinstance(total_tokens, (int, float)) and total_tokens > 0:
|
|
607
|
+
span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
|
|
608
|
+
|
|
609
|
+
# Calculate and record cost if enabled
|
|
610
|
+
if self.config and self.config.enable_cost_tracking:
|
|
611
|
+
try:
|
|
612
|
+
# Get call_type from span attributes or default to "chat"
|
|
613
|
+
call_type = span.attributes.get("gen_ai.request.type", "chat")
|
|
614
|
+
|
|
615
|
+
# Use granular cost calculation for chat requests
|
|
616
|
+
if call_type == "chat":
|
|
617
|
+
costs = self.cost_calculator.calculate_granular_cost(
|
|
618
|
+
model, usage, call_type
|
|
619
|
+
)
|
|
620
|
+
total_cost = costs["total"]
|
|
621
|
+
|
|
622
|
+
# Record total cost
|
|
623
|
+
if total_cost > 0:
|
|
624
|
+
if self.cost_counter:
|
|
625
|
+
self.cost_counter.add(total_cost, {"model": str(model)})
|
|
626
|
+
span.set_attribute("gen_ai.usage.cost.total", total_cost)
|
|
627
|
+
logger.debug(f"Streaming cost: {total_cost} USD")
|
|
628
|
+
|
|
629
|
+
# Record granular costs
|
|
630
|
+
if costs["prompt"] > 0:
|
|
631
|
+
if self.prompt_cost_counter:
|
|
632
|
+
self.prompt_cost_counter.add(
|
|
633
|
+
costs["prompt"], {"model": str(model)}
|
|
634
|
+
)
|
|
635
|
+
span.set_attribute(
|
|
636
|
+
"gen_ai.usage.cost.prompt", costs["prompt"]
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
if costs["completion"] > 0:
|
|
640
|
+
if self.completion_cost_counter:
|
|
641
|
+
self.completion_cost_counter.add(
|
|
642
|
+
costs["completion"], {"model": str(model)}
|
|
643
|
+
)
|
|
644
|
+
span.set_attribute(
|
|
645
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if costs["reasoning"] > 0:
|
|
649
|
+
if self.reasoning_cost_counter:
|
|
650
|
+
self.reasoning_cost_counter.add(
|
|
651
|
+
costs["reasoning"], {"model": str(model)}
|
|
652
|
+
)
|
|
653
|
+
span.set_attribute(
|
|
654
|
+
"gen_ai.usage.cost.reasoning", costs["reasoning"]
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
if costs["cache_read"] > 0:
|
|
658
|
+
if self.cache_read_cost_counter:
|
|
659
|
+
self.cache_read_cost_counter.add(
|
|
660
|
+
costs["cache_read"], {"model": str(model)}
|
|
661
|
+
)
|
|
662
|
+
span.set_attribute(
|
|
663
|
+
"gen_ai.usage.cost.cache_read", costs["cache_read"]
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
if costs["cache_write"] > 0:
|
|
667
|
+
if self.cache_write_cost_counter:
|
|
668
|
+
self.cache_write_cost_counter.add(
|
|
669
|
+
costs["cache_write"], {"model": str(model)}
|
|
670
|
+
)
|
|
671
|
+
span.set_attribute(
|
|
672
|
+
"gen_ai.usage.cost.cache_write", costs["cache_write"]
|
|
673
|
+
)
|
|
674
|
+
else:
|
|
675
|
+
# For non-chat requests, use simple cost calculation
|
|
676
|
+
cost = self.cost_calculator.calculate_cost(
|
|
677
|
+
model, usage, call_type
|
|
678
|
+
)
|
|
679
|
+
if cost and cost > 0:
|
|
680
|
+
if self.cost_counter:
|
|
681
|
+
self.cost_counter.add(cost, {"model": str(model)})
|
|
682
|
+
span.set_attribute("gen_ai.usage.cost.total", cost)
|
|
683
|
+
except Exception as e:
|
|
684
|
+
logger.warning(
|
|
685
|
+
"Failed to calculate cost for streaming response: %s", e
|
|
686
|
+
)
|
|
687
|
+
else:
|
|
688
|
+
logger.debug("No usage information found in streaming response")
|
|
689
|
+
except Exception as e:
|
|
690
|
+
logger.warning("Failed to extract usage from streaming response: %s", e)
|
|
691
|
+
|
|
533
692
|
span.set_status(Status(StatusCode.OK))
|
|
534
693
|
span.end() # Close the span when streaming completes
|
|
535
694
|
logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
|
|
@@ -544,6 +703,71 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
|
|
|
544
703
|
logger.warning(f"Error in streaming wrapper: {e}")
|
|
545
704
|
raise
|
|
546
705
|
|
|
706
|
+
# Phase 4.2: RAG/Embedding Helper Methods
|
|
707
|
+
def add_embedding_attributes(
|
|
708
|
+
self, span, model: str, input_text: str, vector: Optional[List[float]] = None
|
|
709
|
+
):
|
|
710
|
+
"""Add embedding-specific attributes to a span.
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
span: The OpenTelemetry span
|
|
714
|
+
model: The embedding model name
|
|
715
|
+
input_text: The text being embedded (will be truncated to 500 chars)
|
|
716
|
+
vector: Optional embedding vector (use with caution - can be large!)
|
|
717
|
+
"""
|
|
718
|
+
span.set_attribute("embedding.model_name", model)
|
|
719
|
+
span.set_attribute("embedding.text", input_text[:500]) # Truncate to avoid large spans
|
|
720
|
+
|
|
721
|
+
if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
|
|
722
|
+
# Only capture vectors if explicitly enabled (they can be very large)
|
|
723
|
+
span.set_attribute("embedding.vector", json.dumps(vector))
|
|
724
|
+
span.set_attribute("embedding.vector.dimension", len(vector))
|
|
725
|
+
|
|
726
|
+
def add_retrieval_attributes(
|
|
727
|
+
self,
|
|
728
|
+
span,
|
|
729
|
+
documents: List[Dict[str, Any]],
|
|
730
|
+
query: Optional[str] = None,
|
|
731
|
+
max_docs: int = 5,
|
|
732
|
+
):
|
|
733
|
+
"""Add retrieval/RAG-specific attributes to a span.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
span: The OpenTelemetry span
|
|
737
|
+
documents: List of retrieved documents. Each dict should have:
|
|
738
|
+
- id: Document identifier
|
|
739
|
+
- score: Relevance score
|
|
740
|
+
- content: Document content
|
|
741
|
+
- metadata: Optional metadata dict
|
|
742
|
+
query: Optional query string
|
|
743
|
+
max_docs: Maximum number of documents to include in attributes (default: 5)
|
|
744
|
+
"""
|
|
745
|
+
if query:
|
|
746
|
+
span.set_attribute("retrieval.query", query[:500]) # Truncate
|
|
747
|
+
|
|
748
|
+
# Limit to first N documents to avoid attribute explosion
|
|
749
|
+
for i, doc in enumerate(documents[:max_docs]):
|
|
750
|
+
prefix = f"retrieval.documents.{i}.document"
|
|
751
|
+
|
|
752
|
+
if "id" in doc:
|
|
753
|
+
span.set_attribute(f"{prefix}.id", str(doc["id"]))
|
|
754
|
+
if "score" in doc:
|
|
755
|
+
span.set_attribute(f"{prefix}.score", float(doc["score"]))
|
|
756
|
+
if "content" in doc:
|
|
757
|
+
# Truncate content to avoid large attributes
|
|
758
|
+
content = str(doc["content"])[:500]
|
|
759
|
+
span.set_attribute(f"{prefix}.content", content)
|
|
760
|
+
|
|
761
|
+
# Add metadata if present
|
|
762
|
+
if "metadata" in doc and isinstance(doc["metadata"], dict):
|
|
763
|
+
for key, value in doc["metadata"].items():
|
|
764
|
+
# Flatten metadata, limit key names to avoid explosion
|
|
765
|
+
safe_key = str(key)[:50] # Limit key length
|
|
766
|
+
safe_value = str(value)[:200] # Limit value length
|
|
767
|
+
span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
|
|
768
|
+
|
|
769
|
+
span.set_attribute("retrieval.document_count", len(documents))
|
|
770
|
+
|
|
547
771
|
@abstractmethod
|
|
548
772
|
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
549
773
|
"""Abstract method to extract token usage information from a function result.
|
|
@@ -1,140 +1,140 @@
|
|
|
1
|
-
"""OpenTelemetry instrumentor for the Cohere SDK.
|
|
2
|
-
|
|
3
|
-
This instrumentor automatically traces calls to Cohere models, capturing
|
|
4
|
-
relevant attributes such as the model name and token usage.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
from typing import Any, Dict, Optional
|
|
9
|
-
|
|
10
|
-
from ..config import OTelConfig
|
|
11
|
-
from .base import BaseInstrumentor
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class CohereInstrumentor(BaseInstrumentor):
|
|
17
|
-
"""Instrumentor for Cohere"""
|
|
18
|
-
|
|
19
|
-
def __init__(self):
|
|
20
|
-
"""Initialize the instrumentor."""
|
|
21
|
-
super().__init__()
|
|
22
|
-
self._cohere_available = False
|
|
23
|
-
self._check_availability()
|
|
24
|
-
|
|
25
|
-
def _check_availability(self):
|
|
26
|
-
"""Check if cohere library is available."""
|
|
27
|
-
try:
|
|
28
|
-
import cohere
|
|
29
|
-
|
|
30
|
-
self._cohere_available = True
|
|
31
|
-
logger.debug("cohere library detected and available for instrumentation")
|
|
32
|
-
except ImportError:
|
|
33
|
-
logger.debug("cohere library not installed, instrumentation will be skipped")
|
|
34
|
-
self._cohere_available = False
|
|
35
|
-
|
|
36
|
-
def instrument(self, config: OTelConfig):
|
|
37
|
-
"""Instrument cohere if available."""
|
|
38
|
-
if not self._cohere_available:
|
|
39
|
-
logger.debug("Skipping instrumentation - library not available")
|
|
40
|
-
return
|
|
41
|
-
|
|
42
|
-
self.config = config
|
|
43
|
-
try:
|
|
44
|
-
import cohere
|
|
45
|
-
|
|
46
|
-
original_init = cohere.Client.__init__
|
|
47
|
-
|
|
48
|
-
def wrapped_init(instance, *args, **kwargs):
|
|
49
|
-
original_init(instance, *args, **kwargs)
|
|
50
|
-
self._instrument_client(instance)
|
|
51
|
-
|
|
52
|
-
cohere.Client.__init__ = wrapped_init
|
|
53
|
-
self._instrumented = True
|
|
54
|
-
logger.info("Cohere instrumentation enabled")
|
|
55
|
-
|
|
56
|
-
except Exception as e:
|
|
57
|
-
logger.error("Failed to instrument Cohere: %s", e, exc_info=True)
|
|
58
|
-
if config.fail_on_error:
|
|
59
|
-
raise
|
|
60
|
-
|
|
61
|
-
def _instrument_client(self, client):
|
|
62
|
-
"""Instrument Cohere client methods."""
|
|
63
|
-
original_generate = client.generate
|
|
64
|
-
|
|
65
|
-
# Wrap using create_span_wrapper
|
|
66
|
-
wrapped_generate = self.create_span_wrapper(
|
|
67
|
-
span_name="cohere.generate",
|
|
68
|
-
extract_attributes=self._extract_generate_attributes,
|
|
69
|
-
)(original_generate)
|
|
70
|
-
|
|
71
|
-
client.generate = wrapped_generate
|
|
72
|
-
|
|
73
|
-
def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
74
|
-
"""Extract attributes from Cohere generate call.
|
|
75
|
-
|
|
76
|
-
Args:
|
|
77
|
-
instance: The client instance.
|
|
78
|
-
args: Positional arguments.
|
|
79
|
-
kwargs: Keyword arguments.
|
|
80
|
-
|
|
81
|
-
Returns:
|
|
82
|
-
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
83
|
-
"""
|
|
84
|
-
attrs = {}
|
|
85
|
-
model = kwargs.get("model", "command")
|
|
86
|
-
prompt = kwargs.get("prompt", "")
|
|
87
|
-
|
|
88
|
-
attrs["gen_ai.system"] = "cohere"
|
|
89
|
-
attrs["gen_ai.request.model"] = model
|
|
90
|
-
attrs["gen_ai.operation.name"] = "generate"
|
|
91
|
-
attrs["gen_ai.request.message_count"] = 1 if prompt else 0
|
|
92
|
-
|
|
93
|
-
return attrs
|
|
94
|
-
|
|
95
|
-
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
96
|
-
"""Extract token usage from Cohere response.
|
|
97
|
-
|
|
98
|
-
Cohere responses include meta.tokens with:
|
|
99
|
-
- input_tokens: Input tokens
|
|
100
|
-
- output_tokens: Output tokens
|
|
101
|
-
|
|
102
|
-
Args:
|
|
103
|
-
result: The API response object.
|
|
104
|
-
|
|
105
|
-
Returns:
|
|
106
|
-
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
107
|
-
"""
|
|
108
|
-
try:
|
|
109
|
-
# Handle object response
|
|
110
|
-
if hasattr(result, "meta") and result.meta:
|
|
111
|
-
meta = result.meta
|
|
112
|
-
# Check for tokens object
|
|
113
|
-
if hasattr(meta, "tokens") and meta.tokens:
|
|
114
|
-
tokens = meta.tokens
|
|
115
|
-
input_tokens = getattr(tokens, "input_tokens", 0)
|
|
116
|
-
output_tokens = getattr(tokens, "output_tokens", 0)
|
|
117
|
-
|
|
118
|
-
if input_tokens or output_tokens:
|
|
119
|
-
return {
|
|
120
|
-
"prompt_tokens": int(input_tokens) if input_tokens else 0,
|
|
121
|
-
"completion_tokens": int(output_tokens) if output_tokens else 0,
|
|
122
|
-
"total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
|
|
123
|
-
}
|
|
124
|
-
# Fallback to billed_units
|
|
125
|
-
elif hasattr(meta, "billed_units") and meta.billed_units:
|
|
126
|
-
billed = meta.billed_units
|
|
127
|
-
input_tokens = getattr(billed, "input_tokens", 0)
|
|
128
|
-
output_tokens = getattr(billed, "output_tokens", 0)
|
|
129
|
-
|
|
130
|
-
if input_tokens or output_tokens:
|
|
131
|
-
return {
|
|
132
|
-
"prompt_tokens": int(input_tokens) if input_tokens else 0,
|
|
133
|
-
"completion_tokens": int(output_tokens) if output_tokens else 0,
|
|
134
|
-
"total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
return None
|
|
138
|
-
except Exception as e:
|
|
139
|
-
logger.debug("Failed to extract usage from Cohere response: %s", e)
|
|
140
|
-
return None
|
|
1
|
+
"""OpenTelemetry instrumentor for the Cohere SDK.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces calls to Cohere models, capturing
|
|
4
|
+
relevant attributes such as the model name and token usage.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
from ..config import OTelConfig
|
|
11
|
+
from .base import BaseInstrumentor
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CohereInstrumentor(BaseInstrumentor):
|
|
17
|
+
"""Instrumentor for Cohere"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
"""Initialize the instrumentor."""
|
|
21
|
+
super().__init__()
|
|
22
|
+
self._cohere_available = False
|
|
23
|
+
self._check_availability()
|
|
24
|
+
|
|
25
|
+
def _check_availability(self):
|
|
26
|
+
"""Check if cohere library is available."""
|
|
27
|
+
try:
|
|
28
|
+
import cohere
|
|
29
|
+
|
|
30
|
+
self._cohere_available = True
|
|
31
|
+
logger.debug("cohere library detected and available for instrumentation")
|
|
32
|
+
except ImportError:
|
|
33
|
+
logger.debug("cohere library not installed, instrumentation will be skipped")
|
|
34
|
+
self._cohere_available = False
|
|
35
|
+
|
|
36
|
+
def instrument(self, config: OTelConfig):
|
|
37
|
+
"""Instrument cohere if available."""
|
|
38
|
+
if not self._cohere_available:
|
|
39
|
+
logger.debug("Skipping instrumentation - library not available")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
self.config = config
|
|
43
|
+
try:
|
|
44
|
+
import cohere
|
|
45
|
+
|
|
46
|
+
original_init = cohere.Client.__init__
|
|
47
|
+
|
|
48
|
+
def wrapped_init(instance, *args, **kwargs):
|
|
49
|
+
original_init(instance, *args, **kwargs)
|
|
50
|
+
self._instrument_client(instance)
|
|
51
|
+
|
|
52
|
+
cohere.Client.__init__ = wrapped_init
|
|
53
|
+
self._instrumented = True
|
|
54
|
+
logger.info("Cohere instrumentation enabled")
|
|
55
|
+
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logger.error("Failed to instrument Cohere: %s", e, exc_info=True)
|
|
58
|
+
if config.fail_on_error:
|
|
59
|
+
raise
|
|
60
|
+
|
|
61
|
+
def _instrument_client(self, client):
|
|
62
|
+
"""Instrument Cohere client methods."""
|
|
63
|
+
original_generate = client.generate
|
|
64
|
+
|
|
65
|
+
# Wrap using create_span_wrapper
|
|
66
|
+
wrapped_generate = self.create_span_wrapper(
|
|
67
|
+
span_name="cohere.generate",
|
|
68
|
+
extract_attributes=self._extract_generate_attributes,
|
|
69
|
+
)(original_generate)
|
|
70
|
+
|
|
71
|
+
client.generate = wrapped_generate
|
|
72
|
+
|
|
73
|
+
def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
74
|
+
"""Extract attributes from Cohere generate call.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
instance: The client instance.
|
|
78
|
+
args: Positional arguments.
|
|
79
|
+
kwargs: Keyword arguments.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
83
|
+
"""
|
|
84
|
+
attrs = {}
|
|
85
|
+
model = kwargs.get("model", "command")
|
|
86
|
+
prompt = kwargs.get("prompt", "")
|
|
87
|
+
|
|
88
|
+
attrs["gen_ai.system"] = "cohere"
|
|
89
|
+
attrs["gen_ai.request.model"] = model
|
|
90
|
+
attrs["gen_ai.operation.name"] = "generate"
|
|
91
|
+
attrs["gen_ai.request.message_count"] = 1 if prompt else 0
|
|
92
|
+
|
|
93
|
+
return attrs
|
|
94
|
+
|
|
95
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
96
|
+
"""Extract token usage from Cohere response.
|
|
97
|
+
|
|
98
|
+
Cohere responses include meta.tokens with:
|
|
99
|
+
- input_tokens: Input tokens
|
|
100
|
+
- output_tokens: Output tokens
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
result: The API response object.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
# Handle object response
|
|
110
|
+
if hasattr(result, "meta") and result.meta:
|
|
111
|
+
meta = result.meta
|
|
112
|
+
# Check for tokens object
|
|
113
|
+
if hasattr(meta, "tokens") and meta.tokens:
|
|
114
|
+
tokens = meta.tokens
|
|
115
|
+
input_tokens = getattr(tokens, "input_tokens", 0)
|
|
116
|
+
output_tokens = getattr(tokens, "output_tokens", 0)
|
|
117
|
+
|
|
118
|
+
if input_tokens or output_tokens:
|
|
119
|
+
return {
|
|
120
|
+
"prompt_tokens": int(input_tokens) if input_tokens else 0,
|
|
121
|
+
"completion_tokens": int(output_tokens) if output_tokens else 0,
|
|
122
|
+
"total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
|
|
123
|
+
}
|
|
124
|
+
# Fallback to billed_units
|
|
125
|
+
elif hasattr(meta, "billed_units") and meta.billed_units:
|
|
126
|
+
billed = meta.billed_units
|
|
127
|
+
input_tokens = getattr(billed, "input_tokens", 0)
|
|
128
|
+
output_tokens = getattr(billed, "output_tokens", 0)
|
|
129
|
+
|
|
130
|
+
if input_tokens or output_tokens:
|
|
131
|
+
return {
|
|
132
|
+
"prompt_tokens": int(input_tokens) if input_tokens else 0,
|
|
133
|
+
"completion_tokens": int(output_tokens) if output_tokens else 0,
|
|
134
|
+
"total_tokens": int(input_tokens or 0) + int(output_tokens or 0),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return None
|
|
138
|
+
except Exception as e:
|
|
139
|
+
logger.debug("Failed to extract usage from Cohere response: %s", e)
|
|
140
|
+
return None
|