lucidicai 2.0.2__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lucidicai/__init__.py +367 -899
- lucidicai/api/__init__.py +1 -0
- lucidicai/api/client.py +218 -0
- lucidicai/api/resources/__init__.py +1 -0
- lucidicai/api/resources/dataset.py +192 -0
- lucidicai/api/resources/event.py +88 -0
- lucidicai/api/resources/session.py +126 -0
- lucidicai/core/__init__.py +1 -0
- lucidicai/core/config.py +223 -0
- lucidicai/core/errors.py +60 -0
- lucidicai/core/types.py +35 -0
- lucidicai/sdk/__init__.py +1 -0
- lucidicai/sdk/context.py +231 -0
- lucidicai/sdk/decorators.py +187 -0
- lucidicai/sdk/error_boundary.py +299 -0
- lucidicai/sdk/event.py +126 -0
- lucidicai/sdk/event_builder.py +304 -0
- lucidicai/sdk/features/__init__.py +1 -0
- lucidicai/sdk/features/dataset.py +605 -0
- lucidicai/sdk/features/feature_flag.py +383 -0
- lucidicai/sdk/init.py +361 -0
- lucidicai/sdk/shutdown_manager.py +302 -0
- lucidicai/telemetry/context_bridge.py +82 -0
- lucidicai/telemetry/context_capture_processor.py +25 -9
- lucidicai/telemetry/litellm_bridge.py +20 -24
- lucidicai/telemetry/lucidic_exporter.py +99 -60
- lucidicai/telemetry/openai_patch.py +295 -0
- lucidicai/telemetry/openai_uninstrument.py +87 -0
- lucidicai/telemetry/telemetry_init.py +16 -1
- lucidicai/telemetry/utils/model_pricing.py +278 -0
- lucidicai/utils/__init__.py +1 -0
- lucidicai/utils/images.py +337 -0
- lucidicai/utils/logger.py +168 -0
- lucidicai/utils/queue.py +393 -0
- {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/METADATA +1 -1
- {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/RECORD +38 -9
- {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/WHEEL +0 -0
- {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logger = logging.getLogger("Lucidic")
|
|
4
|
+
|
|
5
|
+
MODEL_PRICING = {
|
|
6
|
+
|
|
7
|
+
# OpenAI GPT-5 Series (Verified 2025)
|
|
8
|
+
"gpt-5": {"input": 10.0, "output": 10.0},
|
|
9
|
+
"gpt-5-mini": {"input": 0.250, "output": 2.0},
|
|
10
|
+
"gpt-5-nano": {"input": 0.05, "output": 0.4},
|
|
11
|
+
|
|
12
|
+
# OpenAI GPT-4o Series (Verified 2025)
|
|
13
|
+
"gpt-4o": {"input": 2.5, "output": 10.0},
|
|
14
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.6},
|
|
15
|
+
"gpt-4o-realtime-preview": {"input": 5.0, "output": 20.0}, # Text pricing
|
|
16
|
+
"gpt-4o-audio-preview": {"input": 100.0, "output": 200.0}, # Audio pricing per 1M tokens
|
|
17
|
+
|
|
18
|
+
# OpenAI GPT-4.1 Series (2025)
|
|
19
|
+
"gpt-4.1": {"input": 2.00, "output": 8.0},
|
|
20
|
+
"gpt-4.1-mini": {"input": 0.4, "output": 1.6},
|
|
21
|
+
"gpt-4.1-nano": {"input": 0.2, "output": 0.8},
|
|
22
|
+
|
|
23
|
+
# OpenAI GPT-4 Series
|
|
24
|
+
"gpt-4": {"input": 30.0, "output": 60.0},
|
|
25
|
+
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
|
|
26
|
+
"gpt-4-turbo-preview": {"input": 10.0, "output": 30.0},
|
|
27
|
+
"gpt-4-vision-preview": {"input": 10.0, "output": 30.0},
|
|
28
|
+
"gpt-4-32k": {"input": 60.0, "output": 120.0},
|
|
29
|
+
|
|
30
|
+
# OpenAI GPT-3.5 Series
|
|
31
|
+
"gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
|
|
32
|
+
"gpt-3.5-turbo-16k": {"input": 3.0, "output": 4.0},
|
|
33
|
+
"gpt-3.5-turbo-instruct": {"input": 1.5, "output": 2.0},
|
|
34
|
+
|
|
35
|
+
# OpenAI o-Series (Reasoning Models) - Verified 2025
|
|
36
|
+
"o1": {"input": 15.0, "output": 60.0},
|
|
37
|
+
"o1-preview": {"input": 15.0, "output": 60.0},
|
|
38
|
+
"o1-mini": {"input": 3.0, "output": 15.0},
|
|
39
|
+
"o3": {"input": 15.0, "output": 60.0},
|
|
40
|
+
"o3-mini": {"input": 1.1, "output": 4.4},
|
|
41
|
+
"o4-mini": {"input": 4.00, "output": 16.0},
|
|
42
|
+
|
|
43
|
+
# OpenAI Legacy Models
|
|
44
|
+
"text-davinci-003": {"input": 20.0, "output": 20.0},
|
|
45
|
+
"text-davinci-002": {"input": 20.0, "output": 20.0},
|
|
46
|
+
"code-davinci-002": {"input": 20.0, "output": 20.0},
|
|
47
|
+
|
|
48
|
+
# Claude 4 Models (2025) - Verified
|
|
49
|
+
"claude-4-opus": {"input": 15.0, "output": 75.0},
|
|
50
|
+
"claude-opus-4": {"input": 15.0, "output": 75.0},
|
|
51
|
+
"claude-4-sonnet": {"input": 3.0, "output": 15.0},
|
|
52
|
+
"claude-sonnet-4": {"input": 3.0, "output": 15.0},
|
|
53
|
+
|
|
54
|
+
# Claude 3.5 Models - Verified 2025
|
|
55
|
+
"claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
|
|
56
|
+
"claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0},
|
|
57
|
+
"claude-3-5-haiku": {"input": 1.0, "output": 5.0},
|
|
58
|
+
"claude-3-5-haiku-latest": {"input": 1.0, "output": 5.0},
|
|
59
|
+
"claude-3-7-sonnet": {"input": 3.0, "output": 15.0}, # Same as 3.5 sonnet
|
|
60
|
+
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0},
|
|
61
|
+
|
|
62
|
+
# Claude 3 Models
|
|
63
|
+
"claude-3-opus": {"input": 15.0, "output": 75.0},
|
|
64
|
+
"claude-3-opus-latest": {"input": 15.0, "output": 75.0},
|
|
65
|
+
"claude-3-sonnet": {"input": 3.0, "output": 15.0},
|
|
66
|
+
"claude-3-haiku": {"input": 0.25, "output": 1.25},
|
|
67
|
+
|
|
68
|
+
# Claude 2 Models
|
|
69
|
+
"claude-2": {"input": 8.0, "output": 24.0},
|
|
70
|
+
"claude-2.1": {"input": 8.0, "output": 24.0},
|
|
71
|
+
"claude-2.0": {"input": 8.0, "output": 24.0},
|
|
72
|
+
|
|
73
|
+
# Claude Instant
|
|
74
|
+
"claude-instant": {"input": 0.8, "output": 2.4},
|
|
75
|
+
"claude-instant-1": {"input": 0.8, "output": 2.4},
|
|
76
|
+
"claude-instant-1.2": {"input": 0.8, "output": 2.4},
|
|
77
|
+
|
|
78
|
+
# Google Gemini 2.5 Series (2025) - Verified
|
|
79
|
+
"gemini-2.5-pro": {"input": 1.25, "output": 10.0}, # Up to 200k tokens
|
|
80
|
+
"gemini-2.5-pro-preview": {"input": 1.25, "output": 10.0},
|
|
81
|
+
"gemini-2.5-flash": {"input": 0.15, "output": 0.6}, # Non-thinking
|
|
82
|
+
"gemini-2.5-flash-preview": {"input": 0.15, "output": 0.6},
|
|
83
|
+
|
|
84
|
+
# Google Gemini 2.0 Series - Verified
|
|
85
|
+
"gemini-2.0-flash": {"input": 0.1, "output": 0.4},
|
|
86
|
+
"gemini-2.0-flash-exp": {"input": 0.0, "output": 0.0}, # Free experimental
|
|
87
|
+
"gemini-2.0-flash-experimental": {"input": 0.0, "output": 0.0},
|
|
88
|
+
|
|
89
|
+
# Google Gemini 1.5 Series - Verified
|
|
90
|
+
"gemini-1.5-pro": {"input": 1.25, "output": 5.0}, # Up to 128k tokens
|
|
91
|
+
"gemini-1.5-pro-preview": {"input": 1.25, "output": 5.0},
|
|
92
|
+
"gemini-1.5-flash": {"input": 0.075, "output": 0.3}, # Up to 128k tokens
|
|
93
|
+
"gemini-1.5-flash-8b": {"input": 0.0375, "output": 0.15},
|
|
94
|
+
|
|
95
|
+
# Google Gemini 1.0 Series
|
|
96
|
+
"gemini-pro": {"input": 0.5, "output": 1.5},
|
|
97
|
+
"gemini-pro-vision": {"input": 0.25, "output": 0.5},
|
|
98
|
+
"gemini-1.0-pro": {"input": 0.5, "output": 1.5},
|
|
99
|
+
|
|
100
|
+
# Google PaLM Series
|
|
101
|
+
"text-bison": {"input": 1.0, "output": 1.0},
|
|
102
|
+
"text-bison-32k": {"input": 1.0, "output": 1.0},
|
|
103
|
+
"chat-bison": {"input": 1.0, "output": 1.0},
|
|
104
|
+
"chat-bison-32k": {"input": 1.0, "output": 1.0},
|
|
105
|
+
|
|
106
|
+
# Meta Llama 4 Series (2025)
|
|
107
|
+
"llama-4-maverick-17b": {"input": 0.2, "output": 0.6},
|
|
108
|
+
"llama-4-scout-17b": {"input": 0.11, "output": 0.34},
|
|
109
|
+
"llama-guard-4-12b": {"input": 0.20, "output": 0.20},
|
|
110
|
+
"meta-llama/llama-4-maverick-17b-128e-instruct": {"input": 0.2, "output": 0.6},
|
|
111
|
+
"meta-llama/llama-4-scout-17b-16e-instruct": {"input": 0.11, "output": 0.34},
|
|
112
|
+
"meta-llama/llama-guard-4-12b-128k": {"input": 0.20, "output": 0.20},
|
|
113
|
+
|
|
114
|
+
# Meta Llama 3.x Series - Verified 2025 (Together AI pricing)
|
|
115
|
+
"llama-3.3-70b": {"input": 0.54, "output": 0.88},
|
|
116
|
+
"llama-3.1-405b": {"input": 6.0, "output": 12.0},
|
|
117
|
+
"llama-3.1-70b": {"input": 0.54, "output": 0.88},
|
|
118
|
+
"llama-3.1-8b": {"input": 0.10, "output": 0.18},
|
|
119
|
+
"llama-3-70b": {"input": 0.54, "output": 0.88},
|
|
120
|
+
"llama-3-8b": {"input": 0.10, "output": 0.18},
|
|
121
|
+
"llama-guard-3-8b": {"input": 0.20, "output": 0.20},
|
|
122
|
+
"meta-llama/llama-3.3-70b-versatile-128k": {"input": 0.54, "output": 0.88},
|
|
123
|
+
"meta-llama/llama-3.1-8b-instant-128k": {"input": 0.10, "output": 0.18},
|
|
124
|
+
"meta-llama/llama-3-70b-8k": {"input": 0.54, "output": 0.88},
|
|
125
|
+
"meta-llama/llama-3-8b-8k": {"input": 0.10, "output": 0.18},
|
|
126
|
+
"meta-llama/llama-guard-3-8b-8k": {"input": 0.20, "output": 0.20},
|
|
127
|
+
|
|
128
|
+
# Mistral Models
|
|
129
|
+
"mistral-large": {"input": 2.0, "output": 6.0},
|
|
130
|
+
"mistral-medium": {"input": 2.7, "output": 8.1},
|
|
131
|
+
"mistral-small": {"input": 0.1, "output": 0.3},
|
|
132
|
+
"mistral-tiny": {"input": 0.14, "output": 0.42},
|
|
133
|
+
"mistral-7b-instruct": {"input": 0.15, "output": 0.15},
|
|
134
|
+
"mistral-8x7b-instruct": {"input": 0.24, "output": 0.24},
|
|
135
|
+
"mistral-saba-24b": {"input": 0.79, "output": 0.79},
|
|
136
|
+
"mistral/mistral-saba-24b": {"input": 0.79, "output": 0.79},
|
|
137
|
+
|
|
138
|
+
# Cohere Models
|
|
139
|
+
"command": {"input": 1.0, "output": 2.0},
|
|
140
|
+
"command-light": {"input": 0.3, "output": 0.6},
|
|
141
|
+
"command-nightly": {"input": 1.0, "output": 2.0},
|
|
142
|
+
"command-r": {"input": 0.5, "output": 1.5},
|
|
143
|
+
"command-r-plus": {"input": 3.0, "output": 15.0},
|
|
144
|
+
|
|
145
|
+
# DeepSeek Models
|
|
146
|
+
"deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
|
|
147
|
+
"deepseek-ai/deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
|
|
148
|
+
"deepseek-coder": {"input": 0.14, "output": 0.28},
|
|
149
|
+
"deepseek-chat": {"input": 0.14, "output": 0.28},
|
|
150
|
+
"deepseek/deepseek-v3-0324": {"input": 0.14, "output": 0.28},
|
|
151
|
+
|
|
152
|
+
# Qwen Models
|
|
153
|
+
"qwen-qwq-32b": {"input": 0.29, "output": 0.39},
|
|
154
|
+
"qwen/qwen-qwq-32b-preview-128k": {"input": 0.29, "output": 0.39},
|
|
155
|
+
"qwen-turbo": {"input": 0.3, "output": 0.6},
|
|
156
|
+
"qwen-plus": {"input": 0.5, "output": 2.0},
|
|
157
|
+
"qwen-max": {"input": 2.0, "output": 6.0},
|
|
158
|
+
"qwen2.5-32b-instruct": {"input": 0.7, "output": 2.8},
|
|
159
|
+
"qwen2.5-max": {"input": 1.6, "output": 6.4},
|
|
160
|
+
|
|
161
|
+
# Google Gemma Models
|
|
162
|
+
"gemma-2-9b": {"input": 0.20, "output": 0.20},
|
|
163
|
+
"gemma-2-27b": {"input": 0.27, "output": 0.27},
|
|
164
|
+
"gemma-7b-it": {"input": 0.07, "output": 0.07},
|
|
165
|
+
"google/gemma-2-9b-8k": {"input": 0.20, "output": 0.20},
|
|
166
|
+
|
|
167
|
+
# Together AI Models
|
|
168
|
+
"together-ai/redpajama-incite-7b-chat": {"input": 0.2, "output": 0.2},
|
|
169
|
+
"together-ai/redpajama-incite-base-3b-v1": {"input": 0.1, "output": 0.1},
|
|
170
|
+
|
|
171
|
+
# Perplexity Models
|
|
172
|
+
"pplx-7b-chat": {"input": 0.07, "output": 0.28},
|
|
173
|
+
"pplx-70b-chat": {"input": 0.7, "output": 2.8},
|
|
174
|
+
"pplx-7b-online": {"input": 0.07, "output": 0.28},
|
|
175
|
+
"pplx-70b-online": {"input": 0.7, "output": 2.8},
|
|
176
|
+
|
|
177
|
+
# Grok Models
|
|
178
|
+
"grok-3-latest": {"input": 3, "output": 15},
|
|
179
|
+
"grok-3": {"input": 3, "output": 15},
|
|
180
|
+
"grok-3-fast": {"input": 5, "output": 25},
|
|
181
|
+
"grok-3-mini": {"input": 0.3, "output": 0.5},
|
|
182
|
+
"grok-3-mini-fast": {"input": 0.6, "output": 4},
|
|
183
|
+
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# Provider average pricing fallbacks
|
|
187
|
+
PROVIDER_AVERAGES = {
|
|
188
|
+
"anthropic": {"input": 3.0, "output": 15.0}, # Average of Claude 3.5 Sonnet
|
|
189
|
+
"openai": {"input": 2.5, "output": 10.0}, # GPT-4o pricing
|
|
190
|
+
"google": {"input": 0.5, "output": 1.5}, # Gemini Pro average
|
|
191
|
+
"meta": {"input": 0.3, "output": 0.5}, # Llama average
|
|
192
|
+
"mistral": {"input": 0.5, "output": 1.5}, # Mistral average
|
|
193
|
+
"cohere": {"input": 1.0, "output": 2.0}, # Command model average
|
|
194
|
+
"deepseek": {"input": 0.3, "output": 0.5}, # DeepSeek average
|
|
195
|
+
"qwen": {"input": 0.5, "output": 1.0}, # Qwen average
|
|
196
|
+
"together": {"input": 0.15, "output": 0.15}, # Together AI average
|
|
197
|
+
"perplexity": {"input": 0.4, "output": 1.5}, # Perplexity average
|
|
198
|
+
"grok": {"input": 2.4, "output": 12}, # Grok average
|
|
199
|
+
"groq": {"input": 0.3, "output": 0.6}, # Groq average (placeholder)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def get_provider_from_model(model: str) -> str:
|
|
203
|
+
"""Extract provider name from model string"""
|
|
204
|
+
model_lower = model.lower()
|
|
205
|
+
|
|
206
|
+
if any(claude in model_lower for claude in ["claude", "anthropic"]):
|
|
207
|
+
return "anthropic"
|
|
208
|
+
elif any(gpt in model_lower for gpt in ["gpt", "openai", "o1", "o3", "o4", "text-davinci", "code-davinci"]):
|
|
209
|
+
return "openai"
|
|
210
|
+
elif any(gemini in model_lower for gemini in ["gemini", "google", "gemma", "palm", "bison"]):
|
|
211
|
+
return "google"
|
|
212
|
+
elif any(llama in model_lower for llama in ["llama", "meta"]):
|
|
213
|
+
return "meta"
|
|
214
|
+
elif "mistral" in model_lower:
|
|
215
|
+
return "mistral"
|
|
216
|
+
elif any(cohere in model_lower for cohere in ["command", "cohere"]):
|
|
217
|
+
return "cohere"
|
|
218
|
+
elif "deepseek" in model_lower:
|
|
219
|
+
return "deepseek"
|
|
220
|
+
elif any(qwen in model_lower for qwen in ["qwen", "qwq"]):
|
|
221
|
+
return "qwen"
|
|
222
|
+
elif any(together in model_lower for together in ["together", "redpajama"]):
|
|
223
|
+
return "together"
|
|
224
|
+
elif any(pplx in model_lower for pplx in ["pplx", "perplexity"]):
|
|
225
|
+
return "perplexity"
|
|
226
|
+
elif any(grok in model_lower for grok in ["grok", "xAI"]):
|
|
227
|
+
return "grok"
|
|
228
|
+
elif "groq" in model_lower:
|
|
229
|
+
return "groq"
|
|
230
|
+
else:
|
|
231
|
+
return "unknown"
|
|
232
|
+
|
|
233
|
+
def normalize_model_name(model: str) -> str:
|
|
234
|
+
"""Normalize model name by stripping dates and provider prefixes"""
|
|
235
|
+
import re
|
|
236
|
+
|
|
237
|
+
model_lower = model.lower()
|
|
238
|
+
# Remove provider prefixes (generalizable pattern: any_provider/)
|
|
239
|
+
model_lower = re.sub(r'^[^/]+/', '', model_lower)
|
|
240
|
+
# Strip Google/Vertex prefixes
|
|
241
|
+
model_lower = model_lower.replace('publishers/google/models/', '').replace('models/', '')
|
|
242
|
+
|
|
243
|
+
# Strip date suffixes (20240229, 20241022, etc.) but preserve model versions like o1-mini, o3-mini
|
|
244
|
+
# Pattern: remove -YYYYMMDD or -YYYY-MM-DD at the end
|
|
245
|
+
date_pattern = r'-\d{8}$|_\d{8}$|-\d{4}-\d{2}-\d{2}$'
|
|
246
|
+
model_lower = re.sub(date_pattern, '', model_lower)
|
|
247
|
+
|
|
248
|
+
return model_lower
|
|
249
|
+
|
|
250
|
+
def calculate_cost(model: str, token_usage: dict) -> float:
|
|
251
|
+
model_lower = normalize_model_name(model)
|
|
252
|
+
|
|
253
|
+
# Try exact match first, then longest prefix match
|
|
254
|
+
pricing = (
|
|
255
|
+
MODEL_PRICING.get(model_lower) or
|
|
256
|
+
MODEL_PRICING.get(
|
|
257
|
+
next((prefix for prefix in sorted(MODEL_PRICING.keys(), key=len, reverse=True)
|
|
258
|
+
if model_lower.startswith(prefix)), None)
|
|
259
|
+
) or
|
|
260
|
+
PROVIDER_AVERAGES.get(
|
|
261
|
+
get_provider_from_model(model),
|
|
262
|
+
{"input": 2.5, "output": 10.0}
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Print warning only if using fallback pricing
|
|
267
|
+
if model_lower not in MODEL_PRICING:
|
|
268
|
+
provider = get_provider_from_model(model)
|
|
269
|
+
if provider in PROVIDER_AVERAGES:
|
|
270
|
+
logger.warning(f"No pricing found for model: {model}, using {provider} average pricing")
|
|
271
|
+
else:
|
|
272
|
+
logger.warning(f"No pricing found for model: {model}, using default pricing")
|
|
273
|
+
|
|
274
|
+
input_tokens = token_usage.get("prompt_tokens", token_usage.get("input_tokens", 0))
|
|
275
|
+
output_tokens = token_usage.get("completion_tokens", token_usage.get("output_tokens", 0))
|
|
276
|
+
|
|
277
|
+
cost = ((input_tokens * pricing["input"]) + (output_tokens * pricing["output"])) / 1_000_000
|
|
278
|
+
return cost
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules."""
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""Consolidated image handling utilities.
|
|
2
|
+
|
|
3
|
+
This module unifies all image-related functionality from:
|
|
4
|
+
- image_upload.py
|
|
5
|
+
- telemetry/utils/image_storage.py
|
|
6
|
+
- Various extraction functions scattered across the codebase
|
|
7
|
+
"""
|
|
8
|
+
import base64
|
|
9
|
+
import io
|
|
10
|
+
import logging
|
|
11
|
+
import threading
|
|
12
|
+
from typing import List, Dict, Any, Optional, Tuple, Union
|
|
13
|
+
from PIL import Image
|
|
14
|
+
import requests
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("Lucidic")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ImageHandler:
|
|
20
|
+
"""Centralized image handling for the SDK."""
|
|
21
|
+
|
|
22
|
+
# Thread-local storage for images (from telemetry)
|
|
23
|
+
_thread_local = threading.local()
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def extract_base64_images(cls, data: Any) -> List[str]:
|
|
27
|
+
"""Extract base64 image URLs from various data structures.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
data: Can be a string, dict, list, or nested structure containing image data
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
List of base64 image data URLs (data:image/...)
|
|
34
|
+
"""
|
|
35
|
+
images = []
|
|
36
|
+
|
|
37
|
+
if isinstance(data, str):
|
|
38
|
+
if data.startswith('data:image'):
|
|
39
|
+
images.append(data)
|
|
40
|
+
elif isinstance(data, dict):
|
|
41
|
+
# Check for specific image fields
|
|
42
|
+
if 'image' in data and isinstance(data['image'], str):
|
|
43
|
+
if data['image'].startswith('data:image'):
|
|
44
|
+
images.append(data['image'])
|
|
45
|
+
|
|
46
|
+
# Check for image_url structures (OpenAI format)
|
|
47
|
+
if data.get('type') == 'image_url':
|
|
48
|
+
image_url = data.get('image_url', {})
|
|
49
|
+
if isinstance(image_url, dict) and 'url' in image_url:
|
|
50
|
+
url = image_url['url']
|
|
51
|
+
if url.startswith('data:image'):
|
|
52
|
+
images.append(url)
|
|
53
|
+
|
|
54
|
+
# Recursively check all values
|
|
55
|
+
for value in data.values():
|
|
56
|
+
images.extend(cls.extract_base64_images(value))
|
|
57
|
+
|
|
58
|
+
elif isinstance(data, list):
|
|
59
|
+
for item in data:
|
|
60
|
+
images.extend(cls.extract_base64_images(item))
|
|
61
|
+
|
|
62
|
+
return images
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def extract_images_from_messages(cls, messages: List[Dict[str, Any]]) -> List[str]:
|
|
66
|
+
"""Extract images from chat messages (OpenAI/Anthropic format).
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
messages: List of message dictionaries
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of base64 image data URLs
|
|
73
|
+
"""
|
|
74
|
+
images = []
|
|
75
|
+
|
|
76
|
+
for message in messages:
|
|
77
|
+
if not isinstance(message, dict):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
content = message.get('content', '')
|
|
81
|
+
|
|
82
|
+
# Handle multimodal content
|
|
83
|
+
if isinstance(content, list):
|
|
84
|
+
for item in content:
|
|
85
|
+
if isinstance(item, dict):
|
|
86
|
+
if item.get('type') == 'image_url':
|
|
87
|
+
image_url = item.get('image_url', {})
|
|
88
|
+
if isinstance(image_url, dict):
|
|
89
|
+
url = image_url.get('url', '')
|
|
90
|
+
if url.startswith('data:image'):
|
|
91
|
+
images.append(url)
|
|
92
|
+
elif item.get('type') == 'image':
|
|
93
|
+
# Anthropic format
|
|
94
|
+
source = item.get('source', {})
|
|
95
|
+
if isinstance(source, dict):
|
|
96
|
+
data = source.get('data', '')
|
|
97
|
+
if data:
|
|
98
|
+
media_type = source.get('media_type', 'image/jpeg')
|
|
99
|
+
images.append(f"data:{media_type};base64,{data}")
|
|
100
|
+
|
|
101
|
+
return images
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def store_image_thread_local(cls, image_base64: str) -> str:
|
|
105
|
+
"""Store image in thread-local storage and return placeholder.
|
|
106
|
+
|
|
107
|
+
Used for working around OpenTelemetry attribute size limits.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
image_base64: Base64 encoded image data
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Placeholder string for the stored image
|
|
114
|
+
"""
|
|
115
|
+
if not hasattr(cls._thread_local, 'images'):
|
|
116
|
+
cls._thread_local.images = []
|
|
117
|
+
|
|
118
|
+
cls._thread_local.images.append(image_base64)
|
|
119
|
+
placeholder = f"lucidic_image_{len(cls._thread_local.images) - 1}"
|
|
120
|
+
|
|
121
|
+
logger.debug(f"[ImageHandler] Stored image in thread-local, placeholder: {placeholder}")
|
|
122
|
+
return placeholder
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def get_stored_images(cls) -> List[str]:
|
|
126
|
+
"""Get all images stored in thread-local storage."""
|
|
127
|
+
if hasattr(cls._thread_local, 'images'):
|
|
128
|
+
return cls._thread_local.images
|
|
129
|
+
return []
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def clear_stored_images(cls) -> None:
|
|
133
|
+
"""Clear thread-local image storage."""
|
|
134
|
+
if hasattr(cls._thread_local, 'images'):
|
|
135
|
+
cls._thread_local.images.clear()
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def get_image_by_placeholder(cls, placeholder: str) -> Optional[str]:
|
|
139
|
+
"""Retrieve image by its placeholder from thread-local storage."""
|
|
140
|
+
if hasattr(cls._thread_local, 'images') and placeholder.startswith('lucidic_image_'):
|
|
141
|
+
try:
|
|
142
|
+
index = int(placeholder.split('_')[-1])
|
|
143
|
+
if 0 <= index < len(cls._thread_local.images):
|
|
144
|
+
return cls._thread_local.images[index]
|
|
145
|
+
except (ValueError, IndexError):
|
|
146
|
+
pass
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def path_to_base64(cls, image_path: str, format: str = "JPEG") -> str:
|
|
151
|
+
"""Convert image file to base64 string.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
image_path: Path to the image file
|
|
155
|
+
format: Output format (JPEG or PNG)
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Base64 encoded image string
|
|
159
|
+
"""
|
|
160
|
+
img = Image.open(image_path)
|
|
161
|
+
|
|
162
|
+
if format == "JPEG":
|
|
163
|
+
# Convert to RGB if necessary
|
|
164
|
+
if img.mode in ("RGBA", "LA", "P"):
|
|
165
|
+
background = Image.new("RGB", img.size, (255, 255, 255))
|
|
166
|
+
if img.mode == "RGBA" or img.mode == "LA":
|
|
167
|
+
alpha = img.split()[-1]
|
|
168
|
+
background.paste(img, mask=alpha)
|
|
169
|
+
else:
|
|
170
|
+
background.paste(img)
|
|
171
|
+
img = background
|
|
172
|
+
else:
|
|
173
|
+
img = img.convert("RGB")
|
|
174
|
+
|
|
175
|
+
buffered = io.BytesIO()
|
|
176
|
+
img.save(buffered, format=format)
|
|
177
|
+
img_bytes = buffered.getvalue()
|
|
178
|
+
|
|
179
|
+
return base64.b64encode(img_bytes).decode('utf-8')
|
|
180
|
+
|
|
181
|
+
@classmethod
|
|
182
|
+
def base64_to_pil(cls, base64_str: str) -> Image.Image:
|
|
183
|
+
"""Convert base64 string to PIL Image.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
base64_str: Base64 encoded image (with or without data URI prefix)
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
PIL Image object
|
|
190
|
+
"""
|
|
191
|
+
# Remove data URI prefix if present
|
|
192
|
+
if base64_str.startswith('data:'):
|
|
193
|
+
base64_str = base64_str.split(',')[1] if ',' in base64_str else base64_str
|
|
194
|
+
|
|
195
|
+
image_data = base64.b64decode(base64_str)
|
|
196
|
+
image_stream = io.BytesIO(image_data)
|
|
197
|
+
return Image.open(image_stream)
|
|
198
|
+
|
|
199
|
+
@classmethod
|
|
200
|
+
def prepare_for_upload(cls, image_data: Union[str, bytes], format: str = "JPEG") -> Tuple[io.BytesIO, str]:
|
|
201
|
+
"""Prepare image data for upload to S3.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
image_data: Base64 string or raw bytes
|
|
205
|
+
format: Target format (JPEG or GIF)
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Tuple of (BytesIO object, content-type)
|
|
209
|
+
"""
|
|
210
|
+
if format == "JPEG":
|
|
211
|
+
# Handle base64 string
|
|
212
|
+
if isinstance(image_data, str):
|
|
213
|
+
pil_image = cls.base64_to_pil(image_data)
|
|
214
|
+
else:
|
|
215
|
+
pil_image = Image.open(io.BytesIO(image_data))
|
|
216
|
+
|
|
217
|
+
# Convert to RGB
|
|
218
|
+
if pil_image.mode in ("RGBA", "LA"):
|
|
219
|
+
background = Image.new("RGB", pil_image.size, (255, 255, 255))
|
|
220
|
+
alpha = pil_image.split()[-1]
|
|
221
|
+
background.paste(pil_image, mask=alpha)
|
|
222
|
+
pil_image = background
|
|
223
|
+
else:
|
|
224
|
+
pil_image = pil_image.convert("RGB")
|
|
225
|
+
|
|
226
|
+
image_obj = io.BytesIO()
|
|
227
|
+
pil_image.save(image_obj, format="JPEG")
|
|
228
|
+
image_obj.seek(0)
|
|
229
|
+
content_type = "image/jpeg"
|
|
230
|
+
|
|
231
|
+
elif format == "GIF":
|
|
232
|
+
if isinstance(image_data, str):
|
|
233
|
+
image_data = base64.b64decode(image_data.split(',')[1] if ',' in image_data else image_data)
|
|
234
|
+
image_obj = io.BytesIO(image_data)
|
|
235
|
+
content_type = "image/gif"
|
|
236
|
+
|
|
237
|
+
else:
|
|
238
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
239
|
+
|
|
240
|
+
return image_obj, content_type
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class ImageUploader:
|
|
244
|
+
"""Handle image uploads to S3."""
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def get_presigned_url(
|
|
248
|
+
agent_id: str,
|
|
249
|
+
session_id: Optional[str] = None,
|
|
250
|
+
event_id: Optional[str] = None,
|
|
251
|
+
nthscreenshot: Optional[int] = None
|
|
252
|
+
) -> Tuple[str, str, str]:
|
|
253
|
+
"""Get a presigned URL for uploading an image to S3.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
agent_id: The ID of the agent
|
|
257
|
+
session_id: Optional session ID for the image
|
|
258
|
+
event_id: Optional event ID for the image
|
|
259
|
+
nthscreenshot: Optional nth screenshot for the image
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Tuple of (presigned_url, bucket_name, object_key)
|
|
263
|
+
"""
|
|
264
|
+
# Import here to avoid circular dependency
|
|
265
|
+
from ..sdk.init import get_http
|
|
266
|
+
|
|
267
|
+
http = get_http()
|
|
268
|
+
if not http:
|
|
269
|
+
raise RuntimeError("SDK not initialized")
|
|
270
|
+
|
|
271
|
+
request_data = {"agent_id": agent_id}
|
|
272
|
+
|
|
273
|
+
if session_id:
|
|
274
|
+
request_data["session_id"] = session_id
|
|
275
|
+
|
|
276
|
+
if event_id:
|
|
277
|
+
request_data["event_id"] = event_id
|
|
278
|
+
if nthscreenshot is None:
|
|
279
|
+
raise ValueError("nth_screenshot is required when event_id is provided")
|
|
280
|
+
request_data["nth_screenshot"] = nthscreenshot
|
|
281
|
+
|
|
282
|
+
response = http.get('getpresigneduploadurl', params=request_data)
|
|
283
|
+
return response['presigned_url'], response['bucket_name'], response['object_key']
|
|
284
|
+
|
|
285
|
+
@staticmethod
|
|
286
|
+
def upload_to_s3(url: str, image_data: Union[str, bytes, io.BytesIO], format: str = "JPEG") -> None:
|
|
287
|
+
"""Upload an image to S3 using presigned URL.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
url: The presigned URL for the upload
|
|
291
|
+
image_data: Image data (base64 string, bytes, or BytesIO)
|
|
292
|
+
format: Format of the image (JPEG or GIF)
|
|
293
|
+
"""
|
|
294
|
+
# Prepare image for upload
|
|
295
|
+
if isinstance(image_data, io.BytesIO):
|
|
296
|
+
image_obj = image_data
|
|
297
|
+
content_type = "image/jpeg" if format == "JPEG" else "image/gif"
|
|
298
|
+
else:
|
|
299
|
+
image_obj, content_type = ImageHandler.prepare_for_upload(image_data, format)
|
|
300
|
+
|
|
301
|
+
# Upload to S3
|
|
302
|
+
upload_response = requests.put(
|
|
303
|
+
url,
|
|
304
|
+
data=image_obj.getvalue() if hasattr(image_obj, 'getvalue') else image_obj,
|
|
305
|
+
headers={"Content-Type": content_type}
|
|
306
|
+
)
|
|
307
|
+
upload_response.raise_for_status()
|
|
308
|
+
|
|
309
|
+
logger.debug(f"[ImageUploader] Successfully uploaded image to S3")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# Convenience functions for backward compatibility
|
|
313
|
+
def extract_base64_images(data: Any) -> List[str]:
|
|
314
|
+
"""Extract base64 images from data (backward compatibility)."""
|
|
315
|
+
return ImageHandler.extract_base64_images(data)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def screenshot_path_to_jpeg(screenshot_path: str) -> str:
|
|
319
|
+
"""Convert screenshot to base64 JPEG (backward compatibility)."""
|
|
320
|
+
return ImageHandler.path_to_base64(screenshot_path, "JPEG")
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def upload_image_to_s3(url: str, image: Union[str, bytes], format: str) -> None:
|
|
324
|
+
"""Upload image to S3 (backward compatibility)."""
|
|
325
|
+
ImageUploader.upload_to_s3(url, image, format)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def get_presigned_url(
|
|
329
|
+
agent_id: str,
|
|
330
|
+
step_id: Optional[str] = None,
|
|
331
|
+
session_id: Optional[str] = None,
|
|
332
|
+
event_id: Optional[str] = None,
|
|
333
|
+
nthscreenshot: Optional[int] = None
|
|
334
|
+
) -> Tuple[str, str, str]:
|
|
335
|
+
"""Get presigned URL (backward compatibility)."""
|
|
336
|
+
# Note: step_id parameter is deprecated
|
|
337
|
+
return ImageUploader.get_presigned_url(agent_id, session_id, event_id, nthscreenshot)
|