lucidicai 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lucidicai/__init__.py +351 -876
- lucidicai/api/__init__.py +1 -0
- lucidicai/api/client.py +218 -0
- lucidicai/api/resources/__init__.py +1 -0
- lucidicai/api/resources/dataset.py +192 -0
- lucidicai/api/resources/event.py +88 -0
- lucidicai/api/resources/session.py +126 -0
- lucidicai/client.py +4 -1
- lucidicai/core/__init__.py +1 -0
- lucidicai/core/config.py +223 -0
- lucidicai/core/errors.py +60 -0
- lucidicai/core/types.py +35 -0
- lucidicai/dataset.py +2 -0
- lucidicai/errors.py +6 -0
- lucidicai/feature_flag.py +8 -0
- lucidicai/sdk/__init__.py +1 -0
- lucidicai/sdk/context.py +144 -0
- lucidicai/sdk/decorators.py +187 -0
- lucidicai/sdk/error_boundary.py +299 -0
- lucidicai/sdk/event.py +122 -0
- lucidicai/sdk/event_builder.py +304 -0
- lucidicai/sdk/features/__init__.py +1 -0
- lucidicai/sdk/features/dataset.py +605 -0
- lucidicai/sdk/features/feature_flag.py +383 -0
- lucidicai/sdk/init.py +271 -0
- lucidicai/sdk/shutdown_manager.py +302 -0
- lucidicai/telemetry/context_bridge.py +82 -0
- lucidicai/telemetry/context_capture_processor.py +25 -9
- lucidicai/telemetry/litellm_bridge.py +18 -24
- lucidicai/telemetry/lucidic_exporter.py +51 -36
- lucidicai/telemetry/utils/model_pricing.py +278 -0
- lucidicai/utils/__init__.py +1 -0
- lucidicai/utils/images.py +337 -0
- lucidicai/utils/logger.py +168 -0
- lucidicai/utils/queue.py +393 -0
- {lucidicai-2.0.1.dist-info → lucidicai-2.1.0.dist-info}/METADATA +1 -1
- {lucidicai-2.0.1.dist-info → lucidicai-2.1.0.dist-info}/RECORD +39 -12
- {lucidicai-2.0.1.dist-info → lucidicai-2.1.0.dist-info}/WHEEL +0 -0
- {lucidicai-2.0.1.dist-info → lucidicai-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,6 @@ Converts completed spans into immutable typed LLM events via Client.create_event
|
|
|
4
4
|
which enqueues non-blocking delivery through the EventQueue.
|
|
5
5
|
"""
|
|
6
6
|
import json
|
|
7
|
-
import logging
|
|
8
7
|
from typing import Sequence, Optional, Dict, Any, List
|
|
9
8
|
from datetime import datetime, timezone
|
|
10
9
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
@@ -12,16 +11,12 @@ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
|
12
11
|
from opentelemetry.trace import StatusCode
|
|
13
12
|
from opentelemetry.semconv_ai import SpanAttributes
|
|
14
13
|
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
14
|
+
from ..sdk.event import create_event
|
|
15
|
+
from ..sdk.init import get_session_id
|
|
16
|
+
from ..sdk.context import current_session_id, current_parent_event_id
|
|
17
|
+
from ..telemetry.utils.model_pricing import calculate_cost
|
|
18
18
|
from .extract import detect_is_llm_span, extract_images, extract_prompts, extract_completions, extract_model
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger("Lucidic")
|
|
21
|
-
import os
|
|
22
|
-
|
|
23
|
-
DEBUG = os.getenv("LUCIDIC_DEBUG", "False") == "True"
|
|
24
|
-
VERBOSE = os.getenv("LUCIDIC_VERBOSE", "False") == "True"
|
|
19
|
+
from ..utils.logger import debug, info, warning, error, verbose, truncate_id
|
|
25
20
|
|
|
26
21
|
|
|
27
22
|
class LucidicSpanExporter(SpanExporter):
|
|
@@ -29,23 +24,25 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
29
24
|
|
|
30
25
|
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
31
26
|
try:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
logger.debug(f"[LucidicSpanExporter] Processing {len(spans)} spans")
|
|
27
|
+
if spans:
|
|
28
|
+
debug(f"[Telemetry] Processing {len(spans)} OpenTelemetry spans")
|
|
35
29
|
for span in spans:
|
|
36
|
-
self._process_span(span
|
|
37
|
-
if
|
|
38
|
-
|
|
30
|
+
self._process_span(span)
|
|
31
|
+
if spans:
|
|
32
|
+
debug(f"[Telemetry] Successfully exported {len(spans)} spans")
|
|
39
33
|
return SpanExportResult.SUCCESS
|
|
40
34
|
except Exception as e:
|
|
41
|
-
|
|
35
|
+
error(f"[Telemetry] Failed to export spans: {e}")
|
|
42
36
|
return SpanExportResult.FAILURE
|
|
43
37
|
|
|
44
|
-
def _process_span(self, span: ReadableSpan
|
|
38
|
+
def _process_span(self, span: ReadableSpan) -> None:
|
|
45
39
|
"""Convert a single LLM span into a typed, immutable event."""
|
|
46
40
|
try:
|
|
47
41
|
if not detect_is_llm_span(span):
|
|
42
|
+
verbose(f"[Telemetry] Skipping non-LLM span: {span.name}")
|
|
48
43
|
return
|
|
44
|
+
|
|
45
|
+
debug(f"[Telemetry] Processing LLM span: {span.name}")
|
|
49
46
|
|
|
50
47
|
attributes = dict(span.attributes or {})
|
|
51
48
|
|
|
@@ -56,22 +53,30 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
56
53
|
target_session_id = current_session_id.get(None)
|
|
57
54
|
except Exception:
|
|
58
55
|
target_session_id = None
|
|
59
|
-
if not target_session_id and getattr(client, 'session', None) and getattr(client.session, 'session_id', None):
|
|
60
|
-
target_session_id = client.session.session_id
|
|
61
56
|
if not target_session_id:
|
|
57
|
+
target_session_id = get_session_id()
|
|
58
|
+
if not target_session_id:
|
|
59
|
+
debug(f"[Telemetry] No session ID for span {span.name}, skipping")
|
|
62
60
|
return
|
|
63
61
|
|
|
64
62
|
# Parent nesting - get from span attributes (captured at span creation)
|
|
65
63
|
parent_id = attributes.get('lucidic.parent_event_id')
|
|
64
|
+
debug(f"[Telemetry] Span {span.name} has parent_id from attributes: {truncate_id(parent_id)}")
|
|
66
65
|
if not parent_id:
|
|
67
66
|
# Fallback to trying context (may work if same thread)
|
|
68
67
|
try:
|
|
69
68
|
parent_id = current_parent_event_id.get(None)
|
|
69
|
+
if parent_id:
|
|
70
|
+
debug(f"[Telemetry] Got parent_id from context for span {span.name}: {truncate_id(parent_id)}")
|
|
70
71
|
except Exception:
|
|
71
72
|
parent_id = None
|
|
73
|
+
|
|
74
|
+
if not parent_id:
|
|
75
|
+
debug(f"[Telemetry] No parent_id available for span {span.name}")
|
|
72
76
|
|
|
73
77
|
# Timing
|
|
74
|
-
|
|
78
|
+
occurred_at_dt = datetime.fromtimestamp(span.start_time / 1_000_000_000, tz=timezone.utc) if span.start_time else datetime.now(tz=timezone.utc)
|
|
79
|
+
occurred_at = occurred_at_dt.isoformat() # Convert to ISO string for JSON serialization
|
|
75
80
|
duration_seconds = ((span.end_time - span.start_time) / 1_000_000_000) if (span.start_time and span.end_time) else None
|
|
76
81
|
|
|
77
82
|
# Typed fields using extract utilities
|
|
@@ -85,11 +90,18 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
85
90
|
cost = self._calculate_cost(attributes)
|
|
86
91
|
images = extract_images(attributes)
|
|
87
92
|
|
|
88
|
-
#
|
|
89
|
-
|
|
93
|
+
# Set context for parent if needed
|
|
94
|
+
from ..sdk.context import current_parent_event_id as parent_context
|
|
95
|
+
if parent_id:
|
|
96
|
+
token = parent_context.set(parent_id)
|
|
97
|
+
else:
|
|
98
|
+
token = None
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Create immutable event via non-blocking queue
|
|
102
|
+
debug(f"[Telemetry] Creating LLM event with parent_id: {truncate_id(parent_id)}")
|
|
103
|
+
event_id = create_event(
|
|
90
104
|
type="llm_generation",
|
|
91
|
-
session_id=target_session_id,
|
|
92
|
-
parent_event_id=parent_id,
|
|
93
105
|
occurred_at=occurred_at,
|
|
94
106
|
duration=duration_seconds,
|
|
95
107
|
provider=provider,
|
|
@@ -101,16 +113,20 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
101
113
|
output_tokens=output_tokens,
|
|
102
114
|
cost=cost,
|
|
103
115
|
raw={"images": images} if images else None,
|
|
116
|
+
parent_event_id=parent_id, # Pass the parent_id explicitly
|
|
104
117
|
)
|
|
118
|
+
finally:
|
|
119
|
+
# Reset parent context
|
|
120
|
+
if token:
|
|
121
|
+
parent_context.reset(token)
|
|
105
122
|
|
|
106
|
-
|
|
107
|
-
logger.debug(f"[LucidicSpanExporter] Created LLM event {event_id} for session {target_session_id[:8]}...")
|
|
123
|
+
debug(f"[Telemetry] Created LLM event {truncate_id(event_id)} from span {span.name} for session {truncate_id(target_session_id)}")
|
|
108
124
|
|
|
109
125
|
except Exception as e:
|
|
110
|
-
|
|
126
|
+
error(f"[Telemetry] Failed to process span {span.name}: {e}")
|
|
111
127
|
|
|
112
128
|
|
|
113
|
-
def _create_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any]
|
|
129
|
+
def _create_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any]) -> Optional[str]:
|
|
114
130
|
"""Create a Lucidic event from span start"""
|
|
115
131
|
try:
|
|
116
132
|
# Extract description from prompts/messages
|
|
@@ -132,9 +148,9 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
132
148
|
except Exception:
|
|
133
149
|
target_session_id = None
|
|
134
150
|
if not target_session_id:
|
|
135
|
-
|
|
136
|
-
target_session_id = client.session.session_id
|
|
151
|
+
target_session_id = get_session_id()
|
|
137
152
|
if not target_session_id:
|
|
153
|
+
debug(f"[Telemetry] No session ID for span {span.name}, skipping")
|
|
138
154
|
return None
|
|
139
155
|
|
|
140
156
|
# Create event
|
|
@@ -147,13 +163,13 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
147
163
|
if images:
|
|
148
164
|
event_kwargs['screenshots'] = images
|
|
149
165
|
|
|
150
|
-
return
|
|
166
|
+
return create_event(**event_kwargs)
|
|
151
167
|
|
|
152
168
|
except Exception as e:
|
|
153
|
-
|
|
169
|
+
error(f"[Telemetry] Failed to create event from span: {e}")
|
|
154
170
|
return None
|
|
155
171
|
|
|
156
|
-
def _update_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any], event_id: str
|
|
172
|
+
def _update_event_from_span(self, span: ReadableSpan, attributes: Dict[str, Any], event_id: str) -> None:
|
|
157
173
|
"""Deprecated: events are immutable; no updates performed."""
|
|
158
174
|
return
|
|
159
175
|
|
|
@@ -163,8 +179,7 @@ class LucidicSpanExporter(SpanExporter):
|
|
|
163
179
|
prompts = attributes.get(SpanAttributes.LLM_PROMPTS) or \
|
|
164
180
|
attributes.get('gen_ai.prompt')
|
|
165
181
|
|
|
166
|
-
|
|
167
|
-
logger.info(f"[SpaneExporter -- DEBUG] Extracting Description attributes: {attributes}, prompts: {prompts}")
|
|
182
|
+
verbose(f"[Telemetry] Extracting description from attributes: {attributes}, prompts: {prompts}")
|
|
168
183
|
|
|
169
184
|
if prompts:
|
|
170
185
|
if isinstance(prompts, list) and prompts:
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logger = logging.getLogger("Lucidic")
|
|
4
|
+
|
|
5
|
+
MODEL_PRICING = {
|
|
6
|
+
|
|
7
|
+
# OpenAI GPT-5 Series (Verified 2025)
|
|
8
|
+
"gpt-5": {"input": 10.0, "output": 10.0},
|
|
9
|
+
"gpt-5-mini": {"input": 0.250, "output": 2.0},
|
|
10
|
+
"gpt-5-nano": {"input": 0.05, "output": 0.4},
|
|
11
|
+
|
|
12
|
+
# OpenAI GPT-4o Series (Verified 2025)
|
|
13
|
+
"gpt-4o": {"input": 2.5, "output": 10.0},
|
|
14
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.6},
|
|
15
|
+
"gpt-4o-realtime-preview": {"input": 5.0, "output": 20.0}, # Text pricing
|
|
16
|
+
"gpt-4o-audio-preview": {"input": 100.0, "output": 200.0}, # Audio pricing per 1M tokens
|
|
17
|
+
|
|
18
|
+
# OpenAI GPT-4.1 Series (2025)
|
|
19
|
+
"gpt-4.1": {"input": 2.00, "output": 8.0},
|
|
20
|
+
"gpt-4.1-mini": {"input": 0.4, "output": 1.6},
|
|
21
|
+
"gpt-4.1-nano": {"input": 0.2, "output": 0.8},
|
|
22
|
+
|
|
23
|
+
# OpenAI GPT-4 Series
|
|
24
|
+
"gpt-4": {"input": 30.0, "output": 60.0},
|
|
25
|
+
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
|
|
26
|
+
"gpt-4-turbo-preview": {"input": 10.0, "output": 30.0},
|
|
27
|
+
"gpt-4-vision-preview": {"input": 10.0, "output": 30.0},
|
|
28
|
+
"gpt-4-32k": {"input": 60.0, "output": 120.0},
|
|
29
|
+
|
|
30
|
+
# OpenAI GPT-3.5 Series
|
|
31
|
+
"gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
|
|
32
|
+
"gpt-3.5-turbo-16k": {"input": 3.0, "output": 4.0},
|
|
33
|
+
"gpt-3.5-turbo-instruct": {"input": 1.5, "output": 2.0},
|
|
34
|
+
|
|
35
|
+
# OpenAI o-Series (Reasoning Models) - Verified 2025
|
|
36
|
+
"o1": {"input": 15.0, "output": 60.0},
|
|
37
|
+
"o1-preview": {"input": 15.0, "output": 60.0},
|
|
38
|
+
"o1-mini": {"input": 3.0, "output": 15.0},
|
|
39
|
+
"o3": {"input": 15.0, "output": 60.0},
|
|
40
|
+
"o3-mini": {"input": 1.1, "output": 4.4},
|
|
41
|
+
"o4-mini": {"input": 4.00, "output": 16.0},
|
|
42
|
+
|
|
43
|
+
# OpenAI Legacy Models
|
|
44
|
+
"text-davinci-003": {"input": 20.0, "output": 20.0},
|
|
45
|
+
"text-davinci-002": {"input": 20.0, "output": 20.0},
|
|
46
|
+
"code-davinci-002": {"input": 20.0, "output": 20.0},
|
|
47
|
+
|
|
48
|
+
# Claude 4 Models (2025) - Verified
|
|
49
|
+
"claude-4-opus": {"input": 15.0, "output": 75.0},
|
|
50
|
+
"claude-opus-4": {"input": 15.0, "output": 75.0},
|
|
51
|
+
"claude-4-sonnet": {"input": 3.0, "output": 15.0},
|
|
52
|
+
"claude-sonnet-4": {"input": 3.0, "output": 15.0},
|
|
53
|
+
|
|
54
|
+
# Claude 3.5 Models - Verified 2025
|
|
55
|
+
"claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
|
|
56
|
+
"claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0},
|
|
57
|
+
"claude-3-5-haiku": {"input": 1.0, "output": 5.0},
|
|
58
|
+
"claude-3-5-haiku-latest": {"input": 1.0, "output": 5.0},
|
|
59
|
+
"claude-3-7-sonnet": {"input": 3.0, "output": 15.0}, # Same as 3.5 sonnet
|
|
60
|
+
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0},
|
|
61
|
+
|
|
62
|
+
# Claude 3 Models
|
|
63
|
+
"claude-3-opus": {"input": 15.0, "output": 75.0},
|
|
64
|
+
"claude-3-opus-latest": {"input": 15.0, "output": 75.0},
|
|
65
|
+
"claude-3-sonnet": {"input": 3.0, "output": 15.0},
|
|
66
|
+
"claude-3-haiku": {"input": 0.25, "output": 1.25},
|
|
67
|
+
|
|
68
|
+
# Claude 2 Models
|
|
69
|
+
"claude-2": {"input": 8.0, "output": 24.0},
|
|
70
|
+
"claude-2.1": {"input": 8.0, "output": 24.0},
|
|
71
|
+
"claude-2.0": {"input": 8.0, "output": 24.0},
|
|
72
|
+
|
|
73
|
+
# Claude Instant
|
|
74
|
+
"claude-instant": {"input": 0.8, "output": 2.4},
|
|
75
|
+
"claude-instant-1": {"input": 0.8, "output": 2.4},
|
|
76
|
+
"claude-instant-1.2": {"input": 0.8, "output": 2.4},
|
|
77
|
+
|
|
78
|
+
# Google Gemini 2.5 Series (2025) - Verified
|
|
79
|
+
"gemini-2.5-pro": {"input": 1.25, "output": 10.0}, # Up to 200k tokens
|
|
80
|
+
"gemini-2.5-pro-preview": {"input": 1.25, "output": 10.0},
|
|
81
|
+
"gemini-2.5-flash": {"input": 0.15, "output": 0.6}, # Non-thinking
|
|
82
|
+
"gemini-2.5-flash-preview": {"input": 0.15, "output": 0.6},
|
|
83
|
+
|
|
84
|
+
# Google Gemini 2.0 Series - Verified
|
|
85
|
+
"gemini-2.0-flash": {"input": 0.1, "output": 0.4},
|
|
86
|
+
"gemini-2.0-flash-exp": {"input": 0.0, "output": 0.0}, # Free experimental
|
|
87
|
+
"gemini-2.0-flash-experimental": {"input": 0.0, "output": 0.0},
|
|
88
|
+
|
|
89
|
+
# Google Gemini 1.5 Series - Verified
|
|
90
|
+
"gemini-1.5-pro": {"input": 1.25, "output": 5.0}, # Up to 128k tokens
|
|
91
|
+
"gemini-1.5-pro-preview": {"input": 1.25, "output": 5.0},
|
|
92
|
+
"gemini-1.5-flash": {"input": 0.075, "output": 0.3}, # Up to 128k tokens
|
|
93
|
+
"gemini-1.5-flash-8b": {"input": 0.0375, "output": 0.15},
|
|
94
|
+
|
|
95
|
+
# Google Gemini 1.0 Series
|
|
96
|
+
"gemini-pro": {"input": 0.5, "output": 1.5},
|
|
97
|
+
"gemini-pro-vision": {"input": 0.25, "output": 0.5},
|
|
98
|
+
"gemini-1.0-pro": {"input": 0.5, "output": 1.5},
|
|
99
|
+
|
|
100
|
+
# Google PaLM Series
|
|
101
|
+
"text-bison": {"input": 1.0, "output": 1.0},
|
|
102
|
+
"text-bison-32k": {"input": 1.0, "output": 1.0},
|
|
103
|
+
"chat-bison": {"input": 1.0, "output": 1.0},
|
|
104
|
+
"chat-bison-32k": {"input": 1.0, "output": 1.0},
|
|
105
|
+
|
|
106
|
+
# Meta Llama 4 Series (2025)
|
|
107
|
+
"llama-4-maverick-17b": {"input": 0.2, "output": 0.6},
|
|
108
|
+
"llama-4-scout-17b": {"input": 0.11, "output": 0.34},
|
|
109
|
+
"llama-guard-4-12b": {"input": 0.20, "output": 0.20},
|
|
110
|
+
"meta-llama/llama-4-maverick-17b-128e-instruct": {"input": 0.2, "output": 0.6},
|
|
111
|
+
"meta-llama/llama-4-scout-17b-16e-instruct": {"input": 0.11, "output": 0.34},
|
|
112
|
+
"meta-llama/llama-guard-4-12b-128k": {"input": 0.20, "output": 0.20},
|
|
113
|
+
|
|
114
|
+
# Meta Llama 3.x Series - Verified 2025 (Together AI pricing)
|
|
115
|
+
"llama-3.3-70b": {"input": 0.54, "output": 0.88},
|
|
116
|
+
"llama-3.1-405b": {"input": 6.0, "output": 12.0},
|
|
117
|
+
"llama-3.1-70b": {"input": 0.54, "output": 0.88},
|
|
118
|
+
"llama-3.1-8b": {"input": 0.10, "output": 0.18},
|
|
119
|
+
"llama-3-70b": {"input": 0.54, "output": 0.88},
|
|
120
|
+
"llama-3-8b": {"input": 0.10, "output": 0.18},
|
|
121
|
+
"llama-guard-3-8b": {"input": 0.20, "output": 0.20},
|
|
122
|
+
"meta-llama/llama-3.3-70b-versatile-128k": {"input": 0.54, "output": 0.88},
|
|
123
|
+
"meta-llama/llama-3.1-8b-instant-128k": {"input": 0.10, "output": 0.18},
|
|
124
|
+
"meta-llama/llama-3-70b-8k": {"input": 0.54, "output": 0.88},
|
|
125
|
+
"meta-llama/llama-3-8b-8k": {"input": 0.10, "output": 0.18},
|
|
126
|
+
"meta-llama/llama-guard-3-8b-8k": {"input": 0.20, "output": 0.20},
|
|
127
|
+
|
|
128
|
+
# Mistral Models
|
|
129
|
+
"mistral-large": {"input": 2.0, "output": 6.0},
|
|
130
|
+
"mistral-medium": {"input": 2.7, "output": 8.1},
|
|
131
|
+
"mistral-small": {"input": 0.1, "output": 0.3},
|
|
132
|
+
"mistral-tiny": {"input": 0.14, "output": 0.42},
|
|
133
|
+
"mistral-7b-instruct": {"input": 0.15, "output": 0.15},
|
|
134
|
+
"mistral-8x7b-instruct": {"input": 0.24, "output": 0.24},
|
|
135
|
+
"mistral-saba-24b": {"input": 0.79, "output": 0.79},
|
|
136
|
+
"mistral/mistral-saba-24b": {"input": 0.79, "output": 0.79},
|
|
137
|
+
|
|
138
|
+
# Cohere Models
|
|
139
|
+
"command": {"input": 1.0, "output": 2.0},
|
|
140
|
+
"command-light": {"input": 0.3, "output": 0.6},
|
|
141
|
+
"command-nightly": {"input": 1.0, "output": 2.0},
|
|
142
|
+
"command-r": {"input": 0.5, "output": 1.5},
|
|
143
|
+
"command-r-plus": {"input": 3.0, "output": 15.0},
|
|
144
|
+
|
|
145
|
+
# DeepSeek Models
|
|
146
|
+
"deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
|
|
147
|
+
"deepseek-ai/deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
|
|
148
|
+
"deepseek-coder": {"input": 0.14, "output": 0.28},
|
|
149
|
+
"deepseek-chat": {"input": 0.14, "output": 0.28},
|
|
150
|
+
"deepseek/deepseek-v3-0324": {"input": 0.14, "output": 0.28},
|
|
151
|
+
|
|
152
|
+
# Qwen Models
|
|
153
|
+
"qwen-qwq-32b": {"input": 0.29, "output": 0.39},
|
|
154
|
+
"qwen/qwen-qwq-32b-preview-128k": {"input": 0.29, "output": 0.39},
|
|
155
|
+
"qwen-turbo": {"input": 0.3, "output": 0.6},
|
|
156
|
+
"qwen-plus": {"input": 0.5, "output": 2.0},
|
|
157
|
+
"qwen-max": {"input": 2.0, "output": 6.0},
|
|
158
|
+
"qwen2.5-32b-instruct": {"input": 0.7, "output": 2.8},
|
|
159
|
+
"qwen2.5-max": {"input": 1.6, "output": 6.4},
|
|
160
|
+
|
|
161
|
+
# Google Gemma Models
|
|
162
|
+
"gemma-2-9b": {"input": 0.20, "output": 0.20},
|
|
163
|
+
"gemma-2-27b": {"input": 0.27, "output": 0.27},
|
|
164
|
+
"gemma-7b-it": {"input": 0.07, "output": 0.07},
|
|
165
|
+
"google/gemma-2-9b-8k": {"input": 0.20, "output": 0.20},
|
|
166
|
+
|
|
167
|
+
# Together AI Models
|
|
168
|
+
"together-ai/redpajama-incite-7b-chat": {"input": 0.2, "output": 0.2},
|
|
169
|
+
"together-ai/redpajama-incite-base-3b-v1": {"input": 0.1, "output": 0.1},
|
|
170
|
+
|
|
171
|
+
# Perplexity Models
|
|
172
|
+
"pplx-7b-chat": {"input": 0.07, "output": 0.28},
|
|
173
|
+
"pplx-70b-chat": {"input": 0.7, "output": 2.8},
|
|
174
|
+
"pplx-7b-online": {"input": 0.07, "output": 0.28},
|
|
175
|
+
"pplx-70b-online": {"input": 0.7, "output": 2.8},
|
|
176
|
+
|
|
177
|
+
# Grok Models
|
|
178
|
+
"grok-3-latest": {"input": 3, "output": 15},
|
|
179
|
+
"grok-3": {"input": 3, "output": 15},
|
|
180
|
+
"grok-3-fast": {"input": 5, "output": 25},
|
|
181
|
+
"grok-3-mini": {"input": 0.3, "output": 0.5},
|
|
182
|
+
"grok-3-mini-fast": {"input": 0.6, "output": 4},
|
|
183
|
+
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# Provider average pricing fallbacks
|
|
187
|
+
PROVIDER_AVERAGES = {
|
|
188
|
+
"anthropic": {"input": 3.0, "output": 15.0}, # Average of Claude 3.5 Sonnet
|
|
189
|
+
"openai": {"input": 2.5, "output": 10.0}, # GPT-4o pricing
|
|
190
|
+
"google": {"input": 0.5, "output": 1.5}, # Gemini Pro average
|
|
191
|
+
"meta": {"input": 0.3, "output": 0.5}, # Llama average
|
|
192
|
+
"mistral": {"input": 0.5, "output": 1.5}, # Mistral average
|
|
193
|
+
"cohere": {"input": 1.0, "output": 2.0}, # Command model average
|
|
194
|
+
"deepseek": {"input": 0.3, "output": 0.5}, # DeepSeek average
|
|
195
|
+
"qwen": {"input": 0.5, "output": 1.0}, # Qwen average
|
|
196
|
+
"together": {"input": 0.15, "output": 0.15}, # Together AI average
|
|
197
|
+
"perplexity": {"input": 0.4, "output": 1.5}, # Perplexity average
|
|
198
|
+
"grok": {"input": 2.4, "output": 12}, # Grok average
|
|
199
|
+
"groq": {"input": 0.3, "output": 0.6}, # Groq average (placeholder)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def get_provider_from_model(model: str) -> str:
|
|
203
|
+
"""Extract provider name from model string"""
|
|
204
|
+
model_lower = model.lower()
|
|
205
|
+
|
|
206
|
+
if any(claude in model_lower for claude in ["claude", "anthropic"]):
|
|
207
|
+
return "anthropic"
|
|
208
|
+
elif any(gpt in model_lower for gpt in ["gpt", "openai", "o1", "o3", "o4", "text-davinci", "code-davinci"]):
|
|
209
|
+
return "openai"
|
|
210
|
+
elif any(gemini in model_lower for gemini in ["gemini", "google", "gemma", "palm", "bison"]):
|
|
211
|
+
return "google"
|
|
212
|
+
elif any(llama in model_lower for llama in ["llama", "meta"]):
|
|
213
|
+
return "meta"
|
|
214
|
+
elif "mistral" in model_lower:
|
|
215
|
+
return "mistral"
|
|
216
|
+
elif any(cohere in model_lower for cohere in ["command", "cohere"]):
|
|
217
|
+
return "cohere"
|
|
218
|
+
elif "deepseek" in model_lower:
|
|
219
|
+
return "deepseek"
|
|
220
|
+
elif any(qwen in model_lower for qwen in ["qwen", "qwq"]):
|
|
221
|
+
return "qwen"
|
|
222
|
+
elif any(together in model_lower for together in ["together", "redpajama"]):
|
|
223
|
+
return "together"
|
|
224
|
+
elif any(pplx in model_lower for pplx in ["pplx", "perplexity"]):
|
|
225
|
+
return "perplexity"
|
|
226
|
+
elif any(grok in model_lower for grok in ["grok", "xAI"]):
|
|
227
|
+
return "grok"
|
|
228
|
+
elif "groq" in model_lower:
|
|
229
|
+
return "groq"
|
|
230
|
+
else:
|
|
231
|
+
return "unknown"
|
|
232
|
+
|
|
233
|
+
def normalize_model_name(model: str) -> str:
|
|
234
|
+
"""Normalize model name by stripping dates and provider prefixes"""
|
|
235
|
+
import re
|
|
236
|
+
|
|
237
|
+
model_lower = model.lower()
|
|
238
|
+
# Remove provider prefixes (generalizable pattern: any_provider/)
|
|
239
|
+
model_lower = re.sub(r'^[^/]+/', '', model_lower)
|
|
240
|
+
# Strip Google/Vertex prefixes
|
|
241
|
+
model_lower = model_lower.replace('publishers/google/models/', '').replace('models/', '')
|
|
242
|
+
|
|
243
|
+
# Strip date suffixes (20240229, 20241022, etc.) but preserve model versions like o1-mini, o3-mini
|
|
244
|
+
# Pattern: remove -YYYYMMDD or -YYYY-MM-DD at the end
|
|
245
|
+
date_pattern = r'-\d{8}$|_\d{8}$|-\d{4}-\d{2}-\d{2}$'
|
|
246
|
+
model_lower = re.sub(date_pattern, '', model_lower)
|
|
247
|
+
|
|
248
|
+
return model_lower
|
|
249
|
+
|
|
250
|
+
def calculate_cost(model: str, token_usage: dict) -> float:
|
|
251
|
+
model_lower = normalize_model_name(model)
|
|
252
|
+
|
|
253
|
+
# Try exact match first, then longest prefix match
|
|
254
|
+
pricing = (
|
|
255
|
+
MODEL_PRICING.get(model_lower) or
|
|
256
|
+
MODEL_PRICING.get(
|
|
257
|
+
next((prefix for prefix in sorted(MODEL_PRICING.keys(), key=len, reverse=True)
|
|
258
|
+
if model_lower.startswith(prefix)), None)
|
|
259
|
+
) or
|
|
260
|
+
PROVIDER_AVERAGES.get(
|
|
261
|
+
get_provider_from_model(model),
|
|
262
|
+
{"input": 2.5, "output": 10.0}
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Print warning only if using fallback pricing
|
|
267
|
+
if model_lower not in MODEL_PRICING:
|
|
268
|
+
provider = get_provider_from_model(model)
|
|
269
|
+
if provider in PROVIDER_AVERAGES:
|
|
270
|
+
logger.warning(f"No pricing found for model: {model}, using {provider} average pricing")
|
|
271
|
+
else:
|
|
272
|
+
logger.warning(f"No pricing found for model: {model}, using default pricing")
|
|
273
|
+
|
|
274
|
+
input_tokens = token_usage.get("prompt_tokens", token_usage.get("input_tokens", 0))
|
|
275
|
+
output_tokens = token_usage.get("completion_tokens", token_usage.get("output_tokens", 0))
|
|
276
|
+
|
|
277
|
+
cost = ((input_tokens * pricing["input"]) + (output_tokens * pricing["output"])) / 1_000_000
|
|
278
|
+
return cost
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules."""
|