genai-otel-instrument 0.1.2.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (24) hide show
  1. genai_otel/__version__.py +2 -2
  2. genai_otel/auto_instrument.py +18 -1
  3. genai_otel/config.py +22 -1
  4. genai_otel/cost_calculator.py +204 -13
  5. genai_otel/cost_enrichment_processor.py +175 -0
  6. genai_otel/gpu_metrics.py +50 -0
  7. genai_otel/instrumentors/base.py +300 -44
  8. genai_otel/instrumentors/cohere_instrumentor.py +140 -76
  9. genai_otel/instrumentors/huggingface_instrumentor.py +142 -13
  10. genai_otel/instrumentors/langchain_instrumentor.py +75 -75
  11. genai_otel/instrumentors/mistralai_instrumentor.py +234 -38
  12. genai_otel/instrumentors/ollama_instrumentor.py +104 -35
  13. genai_otel/instrumentors/replicate_instrumentor.py +59 -14
  14. genai_otel/instrumentors/togetherai_instrumentor.py +120 -16
  15. genai_otel/instrumentors/vertexai_instrumentor.py +79 -15
  16. genai_otel/llm_pricing.json +869 -589
  17. genai_otel/logging_config.py +45 -45
  18. genai_otel/py.typed +2 -2
  19. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/METADATA +294 -33
  20. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/RECORD +24 -23
  21. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/WHEEL +0 -0
  22. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/entry_points.txt +0 -0
  23. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/licenses/LICENSE +0 -0
  24. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/top_level.txt +0 -0
@@ -7,11 +7,12 @@ It includes methods for creating OpenTelemetry spans, recording metrics,
7
7
  and handling configuration and cost calculation.
8
8
  """
9
9
 
10
+ import json
10
11
  import logging
11
12
  import threading
12
13
  import time
13
14
  from abc import ABC, abstractmethod
14
- from typing import Any, Callable, Dict, Optional
15
+ from typing import Any, Callable, Dict, List, Optional
15
16
 
16
17
  import wrapt
17
18
  from opentelemetry import metrics, trace
@@ -82,6 +83,12 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
82
83
  _shared_latency_histogram = None
83
84
  _shared_cost_counter = None
84
85
  _shared_error_counter = None
86
+ # Granular cost counters (Phase 3.2)
87
+ _shared_prompt_cost_counter = None
88
+ _shared_completion_cost_counter = None
89
+ _shared_reasoning_cost_counter = None
90
+ _shared_cache_read_cost_counter = None
91
+ _shared_cache_write_cost_counter = None
85
92
  # Streaming metrics (Phase 3.4)
86
93
  _shared_ttft_histogram = None
87
94
  _shared_tbt_histogram = None
@@ -91,7 +98,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
91
98
  self.tracer = trace.get_tracer(__name__)
92
99
  self.meter = metrics.get_meter(__name__)
93
100
  self.config: Optional[OTelConfig] = None
94
- self.cost_calculator = CostCalculator()
101
+ self.cost_calculator = CostCalculator() # Will be updated when instrument() is called
95
102
  self._instrumented = False
96
103
 
97
104
  # Use shared metrics to avoid duplicate warnings
@@ -103,6 +110,12 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
103
110
  self.latency_histogram = self._shared_latency_histogram
104
111
  self.cost_counter = self._shared_cost_counter
105
112
  self.error_counter = self._shared_error_counter
113
+ # Granular cost counters (Phase 3.2)
114
+ self.prompt_cost_counter = self._shared_prompt_cost_counter
115
+ self.completion_cost_counter = self._shared_completion_cost_counter
116
+ self.reasoning_cost_counter = self._shared_reasoning_cost_counter
117
+ self.cache_read_cost_counter = self._shared_cache_read_cost_counter
118
+ self.cache_write_cost_counter = self._shared_cache_write_cost_counter
106
119
  # Streaming metrics
107
120
  self.ttft_histogram = self._shared_ttft_histogram
108
121
  self.tbt_histogram = self._shared_tbt_histogram
@@ -193,10 +206,25 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
193
206
  cls._shared_ttft_histogram = None
194
207
  cls._shared_tbt_histogram = None
195
208
 
209
+ def _setup_config(self, config: OTelConfig):
210
+ """Set up configuration and reinitialize cost calculator with custom pricing if provided.
211
+
212
+ Args:
213
+ config (OTelConfig): The OpenTelemetry configuration object.
214
+ """
215
+ self.config = config
216
+ # Reinitialize cost calculator with custom pricing if provided
217
+ if config.custom_pricing_json:
218
+ self.cost_calculator = CostCalculator(custom_pricing_json=config.custom_pricing_json)
219
+ logger.info("Cost calculator reinitialized with custom pricing")
220
+
196
221
  @abstractmethod
197
222
  def instrument(self, config: OTelConfig):
198
223
  """Abstract method to implement library-specific instrumentation.
199
224
 
225
+ Implementers should call self._setup_config(config) at the beginning of this method
226
+ to ensure custom pricing is loaded.
227
+
200
228
  Args:
201
229
  config (OTelConfig): The OpenTelemetry configuration object.
202
230
  """
@@ -236,6 +264,26 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
236
264
  span = self.tracer.start_span(span_name, attributes=initial_attributes)
237
265
  start_time = time.time()
238
266
 
267
+ # Extract session and user context (Phase 4.1)
268
+ if self.config:
269
+ if self.config.session_id_extractor:
270
+ try:
271
+ session_id = self.config.session_id_extractor(instance, args, kwargs)
272
+ if session_id:
273
+ span.set_attribute("session.id", session_id)
274
+ logger.debug("Set session.id: %s", session_id)
275
+ except Exception as e:
276
+ logger.debug("Failed to extract session ID: %s", e)
277
+
278
+ if self.config.user_id_extractor:
279
+ try:
280
+ user_id = self.config.user_id_extractor(instance, args, kwargs)
281
+ if user_id:
282
+ span.set_attribute("user.id", user_id)
283
+ logger.debug("Set user.id: %s", user_id)
284
+ except Exception as e:
285
+ logger.debug("Failed to extract user ID: %s", e)
286
+
239
287
  try:
240
288
  # Call the original function
241
289
  result = wrapped(*args, **kwargs)
@@ -346,45 +394,54 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
346
394
  and "dup" in self.config.semconv_stability_opt_in
347
395
  )
348
396
 
349
- if (
350
- self.token_counter
351
- and isinstance(prompt_tokens, (int, float))
352
- and prompt_tokens > 0
353
- ):
354
- self.token_counter.add(
355
- prompt_tokens, {"token_type": "prompt", "operation": span.name}
356
- )
357
- # New semantic convention
397
+ # Record prompt tokens
398
+ if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
399
+ # Record metric if available
400
+ if self.token_counter:
401
+ self.token_counter.add(
402
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
403
+ )
404
+ # Always set span attributes (needed for cost calculation)
358
405
  span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
359
406
  # Old semantic convention (if dual emission enabled)
360
407
  if emit_old_attrs:
361
408
  span.set_attribute("gen_ai.usage.input_tokens", int(prompt_tokens))
362
409
 
363
- if (
364
- self.token_counter
365
- and isinstance(completion_tokens, (int, float))
366
- and completion_tokens > 0
367
- ):
368
- self.token_counter.add(
369
- completion_tokens, {"token_type": "completion", "operation": span.name}
370
- )
371
- # New semantic convention
410
+ # Record completion tokens
411
+ if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
412
+ # Record metric if available
413
+ if self.token_counter:
414
+ self.token_counter.add(
415
+ completion_tokens, {"token_type": "completion", "operation": span.name}
416
+ )
417
+ # Always set span attributes (needed for cost calculation)
372
418
  span.set_attribute("gen_ai.usage.completion_tokens", int(completion_tokens))
373
419
  # Old semantic convention (if dual emission enabled)
374
420
  if emit_old_attrs:
375
421
  span.set_attribute("gen_ai.usage.output_tokens", int(completion_tokens))
376
422
 
423
+ # Record total tokens
377
424
  if isinstance(total_tokens, (int, float)) and total_tokens > 0:
378
425
  span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
379
426
 
380
427
  # Calculate and record cost if enabled and applicable
381
- if self.config and self.config.enable_cost_tracking and self._shared_cost_counter:
428
+ logger.debug(
429
+ f"Cost tracking check: config={self.config is not None}, "
430
+ f"enable_cost_tracking={self.config.enable_cost_tracking if self.config else 'N/A'}"
431
+ )
432
+ if self.config and self.config.enable_cost_tracking:
382
433
  try:
383
434
  model = span.attributes.get("gen_ai.request.model", "unknown")
384
435
  # Assuming 'chat' as a default call_type for generic base instrumentor tests.
385
436
  # Specific instrumentors will provide the actual call_type.
386
437
  call_type = span.attributes.get("gen_ai.request.type", "chat")
387
438
 
439
+ logger.debug(
440
+ f"Calculating cost for model={model}, call_type={call_type}, "
441
+ f"prompt_tokens={usage.get('prompt_tokens')}, "
442
+ f"completion_tokens={usage.get('completion_tokens')}"
443
+ )
444
+
388
445
  # Use granular cost calculation for chat requests
389
446
  if call_type == "chat":
390
447
  costs = self.cost_calculator.calculate_granular_cost(
@@ -394,45 +451,59 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
394
451
 
395
452
  # Record total cost
396
453
  if total_cost > 0:
397
- self._shared_cost_counter.add(total_cost, {"model": str(model)})
398
- # Set span attributes for granular costs
454
+ if self.cost_counter:
455
+ self.cost_counter.add(total_cost, {"model": str(model)})
456
+ # Always set span attributes (needed for cost tracking)
399
457
  span.set_attribute("gen_ai.usage.cost.total", total_cost)
458
+ logger.debug(
459
+ f"Set cost attribute: gen_ai.usage.cost.total={total_cost}"
460
+ )
461
+ else:
462
+ logger.debug(
463
+ f"Cost is zero, not setting attributes. Costs: {costs}"
464
+ )
400
465
 
401
466
  # Record and set attributes for granular costs
402
- if costs["prompt"] > 0 and self._shared_prompt_cost_counter:
403
- self._shared_prompt_cost_counter.add(
404
- costs["prompt"], {"model": str(model)}
405
- )
467
+ # Note: Metrics recording is optional, span attributes are always set
468
+ if costs["prompt"] > 0:
469
+ if self.prompt_cost_counter:
470
+ self.prompt_cost_counter.add(
471
+ costs["prompt"], {"model": str(model)}
472
+ )
406
473
  span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
407
474
 
408
- if costs["completion"] > 0 and self._shared_completion_cost_counter:
409
- self._shared_completion_cost_counter.add(
410
- costs["completion"], {"model": str(model)}
411
- )
475
+ if costs["completion"] > 0:
476
+ if self.completion_cost_counter:
477
+ self.completion_cost_counter.add(
478
+ costs["completion"], {"model": str(model)}
479
+ )
412
480
  span.set_attribute(
413
481
  "gen_ai.usage.cost.completion", costs["completion"]
414
482
  )
415
483
 
416
- if costs["reasoning"] > 0 and self._shared_reasoning_cost_counter:
417
- self._shared_reasoning_cost_counter.add(
418
- costs["reasoning"], {"model": str(model)}
419
- )
484
+ if costs["reasoning"] > 0:
485
+ if self.reasoning_cost_counter:
486
+ self.reasoning_cost_counter.add(
487
+ costs["reasoning"], {"model": str(model)}
488
+ )
420
489
  span.set_attribute(
421
490
  "gen_ai.usage.cost.reasoning", costs["reasoning"]
422
491
  )
423
492
 
424
- if costs["cache_read"] > 0 and self._shared_cache_read_cost_counter:
425
- self._shared_cache_read_cost_counter.add(
426
- costs["cache_read"], {"model": str(model)}
427
- )
493
+ if costs["cache_read"] > 0:
494
+ if self.cache_read_cost_counter:
495
+ self.cache_read_cost_counter.add(
496
+ costs["cache_read"], {"model": str(model)}
497
+ )
428
498
  span.set_attribute(
429
499
  "gen_ai.usage.cost.cache_read", costs["cache_read"]
430
500
  )
431
501
 
432
- if costs["cache_write"] > 0 and self._shared_cache_write_cost_counter:
433
- self._shared_cache_write_cost_counter.add(
434
- costs["cache_write"], {"model": str(model)}
435
- )
502
+ if costs["cache_write"] > 0:
503
+ if self.cache_write_cost_counter:
504
+ self.cache_write_cost_counter.add(
505
+ costs["cache_write"], {"model": str(model)}
506
+ )
436
507
  span.set_attribute(
437
508
  "gen_ai.usage.cost.cache_write", costs["cache_write"]
438
509
  )
@@ -440,7 +511,8 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
440
511
  # For non-chat requests, use simple cost calculation
441
512
  cost = self.cost_calculator.calculate_cost(model, usage, call_type)
442
513
  if cost and cost > 0:
443
- self._shared_cost_counter.add(cost, {"model": str(model)})
514
+ if self.cost_counter:
515
+ self.cost_counter.add(cost, {"model": str(model)})
444
516
  except Exception as e:
445
517
  logger.warning("Failed to calculate cost for span '%s': %s", span.name, e)
446
518
 
@@ -470,6 +542,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
470
542
  first_token = True
471
543
  last_token_time = start_time
472
544
  token_count = 0
545
+ last_chunk = None # Store last chunk to extract usage
473
546
 
474
547
  try:
475
548
  for chunk in stream:
@@ -491,6 +564,7 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
491
564
  self.tbt_histogram.record(tbt, {"model": model, "operation": span.name})
492
565
 
493
566
  last_token_time = current_time
567
+ last_chunk = chunk # Keep track of last chunk for usage extraction
494
568
  yield chunk
495
569
 
496
570
  # Stream completed successfully
@@ -498,6 +572,123 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
498
572
  if self.latency_histogram:
499
573
  self.latency_histogram.record(duration, {"operation": span.name})
500
574
  span.set_attribute("gen_ai.streaming.token_count", token_count)
575
+
576
+ # Extract usage from last chunk and calculate cost
577
+ # Many providers (OpenAI, Anthropic, etc.) include usage in the final chunk
578
+ try:
579
+ if last_chunk is not None:
580
+ usage = self._extract_usage(last_chunk)
581
+ if usage and isinstance(usage, dict):
582
+ # Record token usage metrics and calculate cost
583
+ # This will set span attributes and record cost metrics
584
+ prompt_tokens = usage.get("prompt_tokens", 0)
585
+ completion_tokens = usage.get("completion_tokens", 0)
586
+ total_tokens = usage.get("total_tokens", 0)
587
+
588
+ # Record token counts
589
+ if isinstance(prompt_tokens, (int, float)) and prompt_tokens > 0:
590
+ if self.token_counter:
591
+ self.token_counter.add(
592
+ prompt_tokens, {"token_type": "prompt", "operation": span.name}
593
+ )
594
+ span.set_attribute("gen_ai.usage.prompt_tokens", int(prompt_tokens))
595
+
596
+ if isinstance(completion_tokens, (int, float)) and completion_tokens > 0:
597
+ if self.token_counter:
598
+ self.token_counter.add(
599
+ completion_tokens,
600
+ {"token_type": "completion", "operation": span.name},
601
+ )
602
+ span.set_attribute(
603
+ "gen_ai.usage.completion_tokens", int(completion_tokens)
604
+ )
605
+
606
+ if isinstance(total_tokens, (int, float)) and total_tokens > 0:
607
+ span.set_attribute("gen_ai.usage.total_tokens", int(total_tokens))
608
+
609
+ # Calculate and record cost if enabled
610
+ if self.config and self.config.enable_cost_tracking:
611
+ try:
612
+ # Get call_type from span attributes or default to "chat"
613
+ call_type = span.attributes.get("gen_ai.request.type", "chat")
614
+
615
+ # Use granular cost calculation for chat requests
616
+ if call_type == "chat":
617
+ costs = self.cost_calculator.calculate_granular_cost(
618
+ model, usage, call_type
619
+ )
620
+ total_cost = costs["total"]
621
+
622
+ # Record total cost
623
+ if total_cost > 0:
624
+ if self.cost_counter:
625
+ self.cost_counter.add(total_cost, {"model": str(model)})
626
+ span.set_attribute("gen_ai.usage.cost.total", total_cost)
627
+ logger.debug(f"Streaming cost: {total_cost} USD")
628
+
629
+ # Record granular costs
630
+ if costs["prompt"] > 0:
631
+ if self.prompt_cost_counter:
632
+ self.prompt_cost_counter.add(
633
+ costs["prompt"], {"model": str(model)}
634
+ )
635
+ span.set_attribute(
636
+ "gen_ai.usage.cost.prompt", costs["prompt"]
637
+ )
638
+
639
+ if costs["completion"] > 0:
640
+ if self.completion_cost_counter:
641
+ self.completion_cost_counter.add(
642
+ costs["completion"], {"model": str(model)}
643
+ )
644
+ span.set_attribute(
645
+ "gen_ai.usage.cost.completion", costs["completion"]
646
+ )
647
+
648
+ if costs["reasoning"] > 0:
649
+ if self.reasoning_cost_counter:
650
+ self.reasoning_cost_counter.add(
651
+ costs["reasoning"], {"model": str(model)}
652
+ )
653
+ span.set_attribute(
654
+ "gen_ai.usage.cost.reasoning", costs["reasoning"]
655
+ )
656
+
657
+ if costs["cache_read"] > 0:
658
+ if self.cache_read_cost_counter:
659
+ self.cache_read_cost_counter.add(
660
+ costs["cache_read"], {"model": str(model)}
661
+ )
662
+ span.set_attribute(
663
+ "gen_ai.usage.cost.cache_read", costs["cache_read"]
664
+ )
665
+
666
+ if costs["cache_write"] > 0:
667
+ if self.cache_write_cost_counter:
668
+ self.cache_write_cost_counter.add(
669
+ costs["cache_write"], {"model": str(model)}
670
+ )
671
+ span.set_attribute(
672
+ "gen_ai.usage.cost.cache_write", costs["cache_write"]
673
+ )
674
+ else:
675
+ # For non-chat requests, use simple cost calculation
676
+ cost = self.cost_calculator.calculate_cost(
677
+ model, usage, call_type
678
+ )
679
+ if cost and cost > 0:
680
+ if self.cost_counter:
681
+ self.cost_counter.add(cost, {"model": str(model)})
682
+ span.set_attribute("gen_ai.usage.cost.total", cost)
683
+ except Exception as e:
684
+ logger.warning(
685
+ "Failed to calculate cost for streaming response: %s", e
686
+ )
687
+ else:
688
+ logger.debug("No usage information found in streaming response")
689
+ except Exception as e:
690
+ logger.warning("Failed to extract usage from streaming response: %s", e)
691
+
501
692
  span.set_status(Status(StatusCode.OK))
502
693
  span.end() # Close the span when streaming completes
503
694
  logger.debug(f"Streaming completed: {token_count} chunks in {duration:.3f}s")
@@ -512,6 +703,71 @@ class BaseInstrumentor(ABC): # pylint: disable=R0902
512
703
  logger.warning(f"Error in streaming wrapper: {e}")
513
704
  raise
514
705
 
706
+ # Phase 4.2: RAG/Embedding Helper Methods
707
+ def add_embedding_attributes(
708
+ self, span, model: str, input_text: str, vector: Optional[List[float]] = None
709
+ ):
710
+ """Add embedding-specific attributes to a span.
711
+
712
+ Args:
713
+ span: The OpenTelemetry span
714
+ model: The embedding model name
715
+ input_text: The text being embedded (will be truncated to 500 chars)
716
+ vector: Optional embedding vector (use with caution - can be large!)
717
+ """
718
+ span.set_attribute("embedding.model_name", model)
719
+ span.set_attribute("embedding.text", input_text[:500]) # Truncate to avoid large spans
720
+
721
+ if vector and self.config and hasattr(self.config, "capture_embedding_vectors"):
722
+ # Only capture vectors if explicitly enabled (they can be very large)
723
+ span.set_attribute("embedding.vector", json.dumps(vector))
724
+ span.set_attribute("embedding.vector.dimension", len(vector))
725
+
726
+ def add_retrieval_attributes(
727
+ self,
728
+ span,
729
+ documents: List[Dict[str, Any]],
730
+ query: Optional[str] = None,
731
+ max_docs: int = 5,
732
+ ):
733
+ """Add retrieval/RAG-specific attributes to a span.
734
+
735
+ Args:
736
+ span: The OpenTelemetry span
737
+ documents: List of retrieved documents. Each dict should have:
738
+ - id: Document identifier
739
+ - score: Relevance score
740
+ - content: Document content
741
+ - metadata: Optional metadata dict
742
+ query: Optional query string
743
+ max_docs: Maximum number of documents to include in attributes (default: 5)
744
+ """
745
+ if query:
746
+ span.set_attribute("retrieval.query", query[:500]) # Truncate
747
+
748
+ # Limit to first N documents to avoid attribute explosion
749
+ for i, doc in enumerate(documents[:max_docs]):
750
+ prefix = f"retrieval.documents.{i}.document"
751
+
752
+ if "id" in doc:
753
+ span.set_attribute(f"{prefix}.id", str(doc["id"]))
754
+ if "score" in doc:
755
+ span.set_attribute(f"{prefix}.score", float(doc["score"]))
756
+ if "content" in doc:
757
+ # Truncate content to avoid large attributes
758
+ content = str(doc["content"])[:500]
759
+ span.set_attribute(f"{prefix}.content", content)
760
+
761
+ # Add metadata if present
762
+ if "metadata" in doc and isinstance(doc["metadata"], dict):
763
+ for key, value in doc["metadata"].items():
764
+ # Flatten metadata, limit key names to avoid explosion
765
+ safe_key = str(key)[:50] # Limit key length
766
+ safe_value = str(value)[:200] # Limit value length
767
+ span.set_attribute(f"{prefix}.metadata.{safe_key}", safe_value)
768
+
769
+ span.set_attribute("retrieval.document_count", len(documents))
770
+
515
771
  @abstractmethod
516
772
  def _extract_usage(self, result) -> Optional[Dict[str, int]]:
517
773
  """Abstract method to extract token usage information from a function result.