lucidicai 2.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. lucidicai/__init__.py +350 -899
  2. lucidicai/api/__init__.py +1 -0
  3. lucidicai/api/client.py +218 -0
  4. lucidicai/api/resources/__init__.py +1 -0
  5. lucidicai/api/resources/dataset.py +192 -0
  6. lucidicai/api/resources/event.py +88 -0
  7. lucidicai/api/resources/session.py +126 -0
  8. lucidicai/core/__init__.py +1 -0
  9. lucidicai/core/config.py +223 -0
  10. lucidicai/core/errors.py +60 -0
  11. lucidicai/core/types.py +35 -0
  12. lucidicai/sdk/__init__.py +1 -0
  13. lucidicai/sdk/context.py +144 -0
  14. lucidicai/sdk/decorators.py +187 -0
  15. lucidicai/sdk/error_boundary.py +299 -0
  16. lucidicai/sdk/event.py +122 -0
  17. lucidicai/sdk/event_builder.py +304 -0
  18. lucidicai/sdk/features/__init__.py +1 -0
  19. lucidicai/sdk/features/dataset.py +605 -0
  20. lucidicai/sdk/features/feature_flag.py +383 -0
  21. lucidicai/sdk/init.py +271 -0
  22. lucidicai/sdk/shutdown_manager.py +302 -0
  23. lucidicai/telemetry/context_bridge.py +82 -0
  24. lucidicai/telemetry/context_capture_processor.py +25 -9
  25. lucidicai/telemetry/litellm_bridge.py +18 -24
  26. lucidicai/telemetry/lucidic_exporter.py +51 -36
  27. lucidicai/telemetry/utils/model_pricing.py +278 -0
  28. lucidicai/utils/__init__.py +1 -0
  29. lucidicai/utils/images.py +337 -0
  30. lucidicai/utils/logger.py +168 -0
  31. lucidicai/utils/queue.py +393 -0
  32. {lucidicai-2.0.2.dist-info → lucidicai-2.1.0.dist-info}/METADATA +1 -1
  33. {lucidicai-2.0.2.dist-info → lucidicai-2.1.0.dist-info}/RECORD +35 -8
  34. {lucidicai-2.0.2.dist-info → lucidicai-2.1.0.dist-info}/WHEEL +0 -0
  35. {lucidicai-2.0.2.dist-info → lucidicai-2.1.0.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,6 @@ Converts completed spans into immutable typed LLM events via Client.create_event
4
4
  which enqueues non-blocking delivery through the EventQueue.
5
5
  """
6
6
  import json
7
- import logging
8
7
  from typing import Sequence, Optional, Dict, Any, List
9
8
  from datetime import datetime, timezone
10
9
  from opentelemetry.sdk.trace import ReadableSpan
@@ -12,16 +11,12 @@ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
12
11
  from opentelemetry.trace import StatusCode
13
12
  from opentelemetry.semconv_ai import SpanAttributes
14
13
 
15
- from lucidicai.client import Client
16
- from lucidicai.context import current_session_id, current_parent_event_id
17
- from lucidicai.model_pricing import calculate_cost
14
+ from ..sdk.event import create_event
15
+ from ..sdk.init import get_session_id
16
+ from ..sdk.context import current_session_id, current_parent_event_id
17
+ from ..telemetry.utils.model_pricing import calculate_cost
18
18
  from .extract import detect_is_llm_span, extract_images, extract_prompts, extract_completions, extract_model
19
-
20
- logger = logging.getLogger("Lucidic")
21
- import os
22
-
23
- DEBUG = os.getenv("LUCIDIC_DEBUG", "False") == "True"
24
- VERBOSE = os.getenv("LUCIDIC_VERBOSE", "False") == "True"
19
+ from ..utils.logger import debug, info, warning, error, verbose, truncate_id
25
20
 
26
21
 
27
22
  class LucidicSpanExporter(SpanExporter):
@@ -29,23 +24,25 @@ class LucidicSpanExporter(SpanExporter):
29
24
 
30
25
  def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
31
26
  try:
32
- client = Client()
33
- if DEBUG and spans:
34
- logger.debug(f"[LucidicSpanExporter] Processing {len(spans)} spans")
27
+ if spans:
28
+ debug(f"[Telemetry] Processing {len(spans)} OpenTelemetry spans")
35
29
  for span in spans:
36
- self._process_span(span, client)
37
- if DEBUG and spans:
38
- logger.debug(f"[LucidicSpanExporter] Successfully exported {len(spans)} spans")
30
+ self._process_span(span)
31
+ if spans:
32
+ debug(f"[Telemetry] Successfully exported {len(spans)} spans")
39
33
  return SpanExportResult.SUCCESS
40
34
  except Exception as e:
41
- logger.error(f"Failed to export spans: {e}")
35
+ error(f"[Telemetry] Failed to export spans: {e}")
42
36
  return SpanExportResult.FAILURE
43
37
 
44
- def _process_span(self, span: ReadableSpan, client: Client) -> None:
38
+ def _process_span(self, span: ReadableSpan) -> None:
45
39
  """Convert a single LLM span into a typed, immutable event."""
46
40
  try:
47
41
  if not detect_is_llm_span(span):
42
+ verbose(f"[Telemetry] Skipping non-LLM span: {span.name}")
48
43
  return
44
+
45
+ debug(f"[Telemetry] Processing LLM span: {span.name}")
49
46
 
50
47
  attributes = dict(span.attributes or {})
51
48
 
@@ -56,22 +53,30 @@ class LucidicSpanExporter(SpanExporter):
56
53
  target_session_id = current_session_id.get(None)
57
54
  except Exception:
58
55
  target_session_id = None
59
- if not target_session_id and getattr(client, 'session', None) and getattr(client.session, 'session_id', None):
60
- target_session_id = client.session.session_id
61
56
  if not target_session_id:
57
+ target_session_id = get_session_id()
58
+ if not target_session_id:
59
+ debug(f"[Telemetry] No session ID for span {span.name}, skipping")
62
60
  return
63
61
 
64
62
  # Parent nesting - get from span attributes (captured at span creation)
65
63
  parent_id = attributes.get('lucidic.parent_event_id')
64
+ debug(f"[Telemetry] Span {span.name} has parent_id from attributes: {truncate_id(parent_id)}")
66
65
  if not parent_id:
67
66
  # Fallback to trying context (may work if same thread)
68
67
  try:
69
68
  parent_id = current_parent_event_id.get(None)
69
+ if parent_id:
70
+ debug(f"[Telemetry] Got parent_id from context for span {span.name}: {truncate_id(parent_id)}")
70
71
  except Exception:
71
72
  parent_id = None
73
+
74
+ if not parent_id:
75
+ debug(f"[Telemetry] No parent_id available for span {span.name}")
72
76
 
73
77
  # Timing
74
- occurred_at = datetime.fromtimestamp(span.start_time / 1_000_000_000, tz=timezone.utc) if span.start_time else datetime.now(tz=timezone.utc)
78
+ occurred_at_dt = datetime.fromtimestamp(span.start_time / 1_000_000_000, tz=timezone.utc) if span.start_time else datetime.now(tz=timezone.utc)
79
+ occurred_at = occurred_at_dt.isoformat() # Convert to ISO string for JSON serialization
75
80
  duration_seconds = ((span.end_time - span.start_time) / 1_000_000_000) if (span.start_time and span.end_time) else None
76
81
 
77
82
  # Typed fields using extract utilities
@@ -85,11 +90,18 @@ class LucidicSpanExporter(SpanExporter):
85
90
  cost = self._calculate_cost(attributes)
86
91
  images = extract_images(attributes)
87
92
 
88
- # Create immutable event via non-blocking queue
89
- event_id = client.create_event(
93
+ # Set context for parent if needed
94
+ from ..sdk.context import current_parent_event_id as parent_context
95
+ if parent_id:
96
+ token = parent_context.set(parent_id)
97
+ else:
98
+ token = None
99
+
100
+ try:
101
+ # Create immutable event via non-blocking queue
102
+ debug(f"[Telemetry] Creating LLM event with parent_id: {truncate_id(parent_id)}")
103
+ event_id = create_event(
90
104
  type="llm_generation",
91
- session_id=target_session_id,
92
- parent_event_id=parent_id,
93
105
  occurred_at=occurred_at,
94
106
  duration=duration_seconds,
95
107
  provider=provider,
@@ -101,16 +113,20 @@ class LucidicSpanExporter(SpanExporter):
101
113
  output_tokens=output_tokens,
102
114
  cost=cost,
103
115
  raw={"images": images} if images else None,
116
+ parent_event_id=parent_id, # Pass the parent_id explicitly
104
117
  )
118
+ finally:
119
+ # Reset parent context
120
+ if token:
121
+ parent_context.reset(token)
105
122
 
106
- if DEBUG:
107
- logger.debug(f"[LucidicSpanExporter] Created LLM event {event_id} for session {target_session_id[:8]}...")
123
+ debug(f"[Telemetry] Created LLM event {truncate_id(event_id)} from span {span.name} for session {truncate_id(target_session_id)}")
108
124
 
109
125
  except Exception as e:
110
- logger.error(f"Failed to process span {span.name}: {e}")
126
+ error(f"[Telemetry] Failed to process span {span.name}: {e}")
111
127
 
112
128
 
113
- def _create_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any], client: Client) -> Optional[str]:
129
+ def _create_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any]) -> Optional[str]:
114
130
  """Create a Lucidic event from span start"""
115
131
  try:
116
132
  # Extract description from prompts/messages
@@ -132,9 +148,9 @@ class LucidicSpanExporter(SpanExporter):
132
148
  except Exception:
133
149
  target_session_id = None
134
150
  if not target_session_id:
135
- if getattr(client, 'session', None) and getattr(client.session, 'session_id', None):
136
- target_session_id = client.session.session_id
151
+ target_session_id = get_session_id()
137
152
  if not target_session_id:
153
+ debug(f"[Telemetry] No session ID for span {span.name}, skipping")
138
154
  return None
139
155
 
140
156
  # Create event
@@ -147,13 +163,13 @@ class LucidicSpanExporter(SpanExporter):
147
163
  if images:
148
164
  event_kwargs['screenshots'] = images
149
165
 
150
- return client.create_event_for_session(target_session_id, **event_kwargs)
166
+ return create_event(**event_kwargs)
151
167
 
152
168
  except Exception as e:
153
- logger.error(f"Failed to create event from span: {e}")
169
+ error(f"[Telemetry] Failed to create event from span: {e}")
154
170
  return None
155
171
 
156
- def _update_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any], event_id: str, client: Client) -> None:
172
+ def _update_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any], event_id: str) -> None:
157
173
  """Deprecated: events are immutable; no updates performed."""
158
174
  return
159
175
 
@@ -163,8 +179,7 @@ class LucidicSpanExporter(SpanExporter):
163
179
  prompts = attributes.get(SpanAttributes.LLM_PROMPTS) or \
164
180
  attributes.get('gen_ai.prompt')
165
181
 
166
- if VERBOSE:
167
- logger.info(f"[SpaneExporter -- DEBUG] Extracting Description attributes: {attributes}, prompts: {prompts}")
182
+ verbose(f"[Telemetry] Extracting description from attributes: {attributes}, prompts: {prompts}")
168
183
 
169
184
  if prompts:
170
185
  if isinstance(prompts, list) and prompts:
@@ -0,0 +1,278 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("Lucidic")
4
+
5
+ MODEL_PRICING = {
6
+
7
+ # OpenAI GPT-5 Series (Verified 2025)
8
+ "gpt-5": {"input": 10.0, "output": 10.0},
9
+ "gpt-5-mini": {"input": 0.250, "output": 2.0},
10
+ "gpt-5-nano": {"input": 0.05, "output": 0.4},
11
+
12
+ # OpenAI GPT-4o Series (Verified 2025)
13
+ "gpt-4o": {"input": 2.5, "output": 10.0},
14
+ "gpt-4o-mini": {"input": 0.15, "output": 0.6},
15
+ "gpt-4o-realtime-preview": {"input": 5.0, "output": 20.0}, # Text pricing
16
+ "gpt-4o-audio-preview": {"input": 100.0, "output": 200.0}, # Audio pricing per 1M tokens
17
+
18
+ # OpenAI GPT-4.1 Series (2025)
19
+ "gpt-4.1": {"input": 2.00, "output": 8.0},
20
+ "gpt-4.1-mini": {"input": 0.4, "output": 1.6},
21
+ "gpt-4.1-nano": {"input": 0.2, "output": 0.8},
22
+
23
+ # OpenAI GPT-4 Series
24
+ "gpt-4": {"input": 30.0, "output": 60.0},
25
+ "gpt-4-turbo": {"input": 10.0, "output": 30.0},
26
+ "gpt-4-turbo-preview": {"input": 10.0, "output": 30.0},
27
+ "gpt-4-vision-preview": {"input": 10.0, "output": 30.0},
28
+ "gpt-4-32k": {"input": 60.0, "output": 120.0},
29
+
30
+ # OpenAI GPT-3.5 Series
31
+ "gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
32
+ "gpt-3.5-turbo-16k": {"input": 3.0, "output": 4.0},
33
+ "gpt-3.5-turbo-instruct": {"input": 1.5, "output": 2.0},
34
+
35
+ # OpenAI o-Series (Reasoning Models) - Verified 2025
36
+ "o1": {"input": 15.0, "output": 60.0},
37
+ "o1-preview": {"input": 15.0, "output": 60.0},
38
+ "o1-mini": {"input": 3.0, "output": 15.0},
39
+ "o3": {"input": 15.0, "output": 60.0},
40
+ "o3-mini": {"input": 1.1, "output": 4.4},
41
+ "o4-mini": {"input": 4.00, "output": 16.0},
42
+
43
+ # OpenAI Legacy Models
44
+ "text-davinci-003": {"input": 20.0, "output": 20.0},
45
+ "text-davinci-002": {"input": 20.0, "output": 20.0},
46
+ "code-davinci-002": {"input": 20.0, "output": 20.0},
47
+
48
+ # Claude 4 Models (2025) - Verified
49
+ "claude-4-opus": {"input": 15.0, "output": 75.0},
50
+ "claude-opus-4": {"input": 15.0, "output": 75.0},
51
+ "claude-4-sonnet": {"input": 3.0, "output": 15.0},
52
+ "claude-sonnet-4": {"input": 3.0, "output": 15.0},
53
+
54
+ # Claude 3.5 Models - Verified 2025
55
+ "claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
56
+ "claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0},
57
+ "claude-3-5-haiku": {"input": 1.0, "output": 5.0},
58
+ "claude-3-5-haiku-latest": {"input": 1.0, "output": 5.0},
59
+ "claude-3-7-sonnet": {"input": 3.0, "output": 15.0}, # Same as 3.5 sonnet
60
+ "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0},
61
+
62
+ # Claude 3 Models
63
+ "claude-3-opus": {"input": 15.0, "output": 75.0},
64
+ "claude-3-opus-latest": {"input": 15.0, "output": 75.0},
65
+ "claude-3-sonnet": {"input": 3.0, "output": 15.0},
66
+ "claude-3-haiku": {"input": 0.25, "output": 1.25},
67
+
68
+ # Claude 2 Models
69
+ "claude-2": {"input": 8.0, "output": 24.0},
70
+ "claude-2.1": {"input": 8.0, "output": 24.0},
71
+ "claude-2.0": {"input": 8.0, "output": 24.0},
72
+
73
+ # Claude Instant
74
+ "claude-instant": {"input": 0.8, "output": 2.4},
75
+ "claude-instant-1": {"input": 0.8, "output": 2.4},
76
+ "claude-instant-1.2": {"input": 0.8, "output": 2.4},
77
+
78
+ # Google Gemini 2.5 Series (2025) - Verified
79
+ "gemini-2.5-pro": {"input": 1.25, "output": 10.0}, # Up to 200k tokens
80
+ "gemini-2.5-pro-preview": {"input": 1.25, "output": 10.0},
81
+ "gemini-2.5-flash": {"input": 0.15, "output": 0.6}, # Non-thinking
82
+ "gemini-2.5-flash-preview": {"input": 0.15, "output": 0.6},
83
+
84
+ # Google Gemini 2.0 Series - Verified
85
+ "gemini-2.0-flash": {"input": 0.1, "output": 0.4},
86
+ "gemini-2.0-flash-exp": {"input": 0.0, "output": 0.0}, # Free experimental
87
+ "gemini-2.0-flash-experimental": {"input": 0.0, "output": 0.0},
88
+
89
+ # Google Gemini 1.5 Series - Verified
90
+ "gemini-1.5-pro": {"input": 1.25, "output": 5.0}, # Up to 128k tokens
91
+ "gemini-1.5-pro-preview": {"input": 1.25, "output": 5.0},
92
+ "gemini-1.5-flash": {"input": 0.075, "output": 0.3}, # Up to 128k tokens
93
+ "gemini-1.5-flash-8b": {"input": 0.0375, "output": 0.15},
94
+
95
+ # Google Gemini 1.0 Series
96
+ "gemini-pro": {"input": 0.5, "output": 1.5},
97
+ "gemini-pro-vision": {"input": 0.25, "output": 0.5},
98
+ "gemini-1.0-pro": {"input": 0.5, "output": 1.5},
99
+
100
+ # Google PaLM Series
101
+ "text-bison": {"input": 1.0, "output": 1.0},
102
+ "text-bison-32k": {"input": 1.0, "output": 1.0},
103
+ "chat-bison": {"input": 1.0, "output": 1.0},
104
+ "chat-bison-32k": {"input": 1.0, "output": 1.0},
105
+
106
+ # Meta Llama 4 Series (2025)
107
+ "llama-4-maverick-17b": {"input": 0.2, "output": 0.6},
108
+ "llama-4-scout-17b": {"input": 0.11, "output": 0.34},
109
+ "llama-guard-4-12b": {"input": 0.20, "output": 0.20},
110
+ "meta-llama/llama-4-maverick-17b-128e-instruct": {"input": 0.2, "output": 0.6},
111
+ "meta-llama/llama-4-scout-17b-16e-instruct": {"input": 0.11, "output": 0.34},
112
+ "meta-llama/llama-guard-4-12b-128k": {"input": 0.20, "output": 0.20},
113
+
114
+ # Meta Llama 3.x Series - Verified 2025 (Together AI pricing)
115
+ "llama-3.3-70b": {"input": 0.54, "output": 0.88},
116
+ "llama-3.1-405b": {"input": 6.0, "output": 12.0},
117
+ "llama-3.1-70b": {"input": 0.54, "output": 0.88},
118
+ "llama-3.1-8b": {"input": 0.10, "output": 0.18},
119
+ "llama-3-70b": {"input": 0.54, "output": 0.88},
120
+ "llama-3-8b": {"input": 0.10, "output": 0.18},
121
+ "llama-guard-3-8b": {"input": 0.20, "output": 0.20},
122
+ "meta-llama/llama-3.3-70b-versatile-128k": {"input": 0.54, "output": 0.88},
123
+ "meta-llama/llama-3.1-8b-instant-128k": {"input": 0.10, "output": 0.18},
124
+ "meta-llama/llama-3-70b-8k": {"input": 0.54, "output": 0.88},
125
+ "meta-llama/llama-3-8b-8k": {"input": 0.10, "output": 0.18},
126
+ "meta-llama/llama-guard-3-8b-8k": {"input": 0.20, "output": 0.20},
127
+
128
+ # Mistral Models
129
+ "mistral-large": {"input": 2.0, "output": 6.0},
130
+ "mistral-medium": {"input": 2.7, "output": 8.1},
131
+ "mistral-small": {"input": 0.1, "output": 0.3},
132
+ "mistral-tiny": {"input": 0.14, "output": 0.42},
133
+ "mistral-7b-instruct": {"input": 0.15, "output": 0.15},
134
+ "mistral-8x7b-instruct": {"input": 0.24, "output": 0.24},
135
+ "mistral-saba-24b": {"input": 0.79, "output": 0.79},
136
+ "mistral/mistral-saba-24b": {"input": 0.79, "output": 0.79},
137
+
138
+ # Cohere Models
139
+ "command": {"input": 1.0, "output": 2.0},
140
+ "command-light": {"input": 0.3, "output": 0.6},
141
+ "command-nightly": {"input": 1.0, "output": 2.0},
142
+ "command-r": {"input": 0.5, "output": 1.5},
143
+ "command-r-plus": {"input": 3.0, "output": 15.0},
144
+
145
+ # DeepSeek Models
146
+ "deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
147
+ "deepseek-ai/deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
148
+ "deepseek-coder": {"input": 0.14, "output": 0.28},
149
+ "deepseek-chat": {"input": 0.14, "output": 0.28},
150
+ "deepseek/deepseek-v3-0324": {"input": 0.14, "output": 0.28},
151
+
152
+ # Qwen Models
153
+ "qwen-qwq-32b": {"input": 0.29, "output": 0.39},
154
+ "qwen/qwen-qwq-32b-preview-128k": {"input": 0.29, "output": 0.39},
155
+ "qwen-turbo": {"input": 0.3, "output": 0.6},
156
+ "qwen-plus": {"input": 0.5, "output": 2.0},
157
+ "qwen-max": {"input": 2.0, "output": 6.0},
158
+ "qwen2.5-32b-instruct": {"input": 0.7, "output": 2.8},
159
+ "qwen2.5-max": {"input": 1.6, "output": 6.4},
160
+
161
+ # Google Gemma Models
162
+ "gemma-2-9b": {"input": 0.20, "output": 0.20},
163
+ "gemma-2-27b": {"input": 0.27, "output": 0.27},
164
+ "gemma-7b-it": {"input": 0.07, "output": 0.07},
165
+ "google/gemma-2-9b-8k": {"input": 0.20, "output": 0.20},
166
+
167
+ # Together AI Models
168
+ "together-ai/redpajama-incite-7b-chat": {"input": 0.2, "output": 0.2},
169
+ "together-ai/redpajama-incite-base-3b-v1": {"input": 0.1, "output": 0.1},
170
+
171
+ # Perplexity Models
172
+ "pplx-7b-chat": {"input": 0.07, "output": 0.28},
173
+ "pplx-70b-chat": {"input": 0.7, "output": 2.8},
174
+ "pplx-7b-online": {"input": 0.07, "output": 0.28},
175
+ "pplx-70b-online": {"input": 0.7, "output": 2.8},
176
+
177
+ # Grok Models
178
+ "grok-3-latest": {"input": 3, "output": 15},
179
+ "grok-3": {"input": 3, "output": 15},
180
+ "grok-3-fast": {"input": 5, "output": 25},
181
+ "grok-3-mini": {"input": 0.3, "output": 0.5},
182
+ "grok-3-mini-fast": {"input": 0.6, "output": 4},
183
+
184
+ }
185
+
186
+ # Provider average pricing fallbacks
187
+ PROVIDER_AVERAGES = {
188
+ "anthropic": {"input": 3.0, "output": 15.0}, # Average of Claude 3.5 Sonnet
189
+ "openai": {"input": 2.5, "output": 10.0}, # GPT-4o pricing
190
+ "google": {"input": 0.5, "output": 1.5}, # Gemini Pro average
191
+ "meta": {"input": 0.3, "output": 0.5}, # Llama average
192
+ "mistral": {"input": 0.5, "output": 1.5}, # Mistral average
193
+ "cohere": {"input": 1.0, "output": 2.0}, # Command model average
194
+ "deepseek": {"input": 0.3, "output": 0.5}, # DeepSeek average
195
+ "qwen": {"input": 0.5, "output": 1.0}, # Qwen average
196
+ "together": {"input": 0.15, "output": 0.15}, # Together AI average
197
+ "perplexity": {"input": 0.4, "output": 1.5}, # Perplexity average
198
+ "grok": {"input": 2.4, "output": 12}, # Grok average
199
+ "groq": {"input": 0.3, "output": 0.6}, # Groq average (placeholder)
200
+ }
201
+
202
+ def get_provider_from_model(model: str) -> str:
203
+ """Extract provider name from model string"""
204
+ model_lower = model.lower()
205
+
206
+ if any(claude in model_lower for claude in ["claude", "anthropic"]):
207
+ return "anthropic"
208
+ elif any(gpt in model_lower for gpt in ["gpt", "openai", "o1", "o3", "o4", "text-davinci", "code-davinci"]):
209
+ return "openai"
210
+ elif any(gemini in model_lower for gemini in ["gemini", "google", "gemma", "palm", "bison"]):
211
+ return "google"
212
+ elif any(llama in model_lower for llama in ["llama", "meta"]):
213
+ return "meta"
214
+ elif "mistral" in model_lower:
215
+ return "mistral"
216
+ elif any(cohere in model_lower for cohere in ["command", "cohere"]):
217
+ return "cohere"
218
+ elif "deepseek" in model_lower:
219
+ return "deepseek"
220
+ elif any(qwen in model_lower for qwen in ["qwen", "qwq"]):
221
+ return "qwen"
222
+ elif any(together in model_lower for together in ["together", "redpajama"]):
223
+ return "together"
224
+ elif any(pplx in model_lower for pplx in ["pplx", "perplexity"]):
225
+ return "perplexity"
226
+ elif any(grok in model_lower for grok in ["grok", "xAI"]):
227
+ return "grok"
228
+ elif "groq" in model_lower:
229
+ return "groq"
230
+ else:
231
+ return "unknown"
232
+
233
+ def normalize_model_name(model: str) -> str:
234
+ """Normalize model name by stripping dates and provider prefixes"""
235
+ import re
236
+
237
+ model_lower = model.lower()
238
+ # Remove provider prefixes (generalizable pattern: any_provider/)
239
+ model_lower = re.sub(r'^[^/]+/', '', model_lower)
240
+ # Strip Google/Vertex prefixes
241
+ model_lower = model_lower.replace('publishers/google/models/', '').replace('models/', '')
242
+
243
+ # Strip date suffixes (20240229, 20241022, etc.) but preserve model versions like o1-mini, o3-mini
244
+ # Pattern: remove -YYYYMMDD or -YYYY-MM-DD at the end
245
+ date_pattern = r'-\d{8}$|_\d{8}$|-\d{4}-\d{2}-\d{2}$'
246
+ model_lower = re.sub(date_pattern, '', model_lower)
247
+
248
+ return model_lower
249
+
250
+ def calculate_cost(model: str, token_usage: dict) -> float:
251
+ model_lower = normalize_model_name(model)
252
+
253
+ # Try exact match first, then longest prefix match
254
+ pricing = (
255
+ MODEL_PRICING.get(model_lower) or
256
+ MODEL_PRICING.get(
257
+ next((prefix for prefix in sorted(MODEL_PRICING.keys(), key=len, reverse=True)
258
+ if model_lower.startswith(prefix)), None)
259
+ ) or
260
+ PROVIDER_AVERAGES.get(
261
+ get_provider_from_model(model),
262
+ {"input": 2.5, "output": 10.0}
263
+ )
264
+ )
265
+
266
+ # Print warning only if using fallback pricing
267
+ if model_lower not in MODEL_PRICING:
268
+ provider = get_provider_from_model(model)
269
+ if provider in PROVIDER_AVERAGES:
270
+ logger.warning(f"No pricing found for model: {model}, using {provider} average pricing")
271
+ else:
272
+ logger.warning(f"No pricing found for model: {model}, using default pricing")
273
+
274
+ input_tokens = token_usage.get("prompt_tokens", token_usage.get("input_tokens", 0))
275
+ output_tokens = token_usage.get("completion_tokens", token_usage.get("output_tokens", 0))
276
+
277
+ cost = ((input_tokens * pricing["input"]) + (output_tokens * pricing["output"])) / 1_000_000
278
+ return cost
@@ -0,0 +1 @@
1
+ """Utility modules."""