posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- posthoganalytics/__init__.py +84 -7
- posthoganalytics/ai/anthropic/__init__.py +10 -0
- posthoganalytics/ai/anthropic/anthropic.py +95 -65
- posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
- posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
- posthoganalytics/ai/gemini/__init__.py +15 -1
- posthoganalytics/ai/gemini/gemini.py +66 -71
- posthoganalytics/ai/gemini/gemini_async.py +423 -0
- posthoganalytics/ai/gemini/gemini_converter.py +652 -0
- posthoganalytics/ai/langchain/callbacks.py +58 -13
- posthoganalytics/ai/openai/__init__.py +16 -1
- posthoganalytics/ai/openai/openai.py +140 -149
- posthoganalytics/ai/openai/openai_async.py +127 -82
- posthoganalytics/ai/openai/openai_converter.py +741 -0
- posthoganalytics/ai/sanitization.py +248 -0
- posthoganalytics/ai/types.py +125 -0
- posthoganalytics/ai/utils.py +339 -356
- posthoganalytics/client.py +345 -97
- posthoganalytics/contexts.py +81 -0
- posthoganalytics/exception_utils.py +250 -2
- posthoganalytics/feature_flags.py +26 -10
- posthoganalytics/flag_definition_cache.py +127 -0
- posthoganalytics/integrations/django.py +157 -19
- posthoganalytics/request.py +203 -23
- posthoganalytics/test/test_client.py +250 -22
- posthoganalytics/test/test_exception_capture.py +418 -0
- posthoganalytics/test/test_feature_flag_result.py +441 -2
- posthoganalytics/test/test_feature_flags.py +308 -104
- posthoganalytics/test/test_flag_definition_cache.py +612 -0
- posthoganalytics/test/test_module.py +0 -8
- posthoganalytics/test/test_request.py +536 -0
- posthoganalytics/test/test_utils.py +4 -1
- posthoganalytics/types.py +40 -0
- posthoganalytics/version.py +1 -1
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
- posthoganalytics-7.4.3.dist-info/RECORD +57 -0
- posthoganalytics-6.7.0.dist-info/RECORD +0 -49
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gemini-specific conversion utilities.
|
|
3
|
+
|
|
4
|
+
This module handles the conversion of Gemini API responses and inputs
|
|
5
|
+
into standardized formats for PostHog tracking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional, TypedDict, Union
|
|
9
|
+
|
|
10
|
+
from posthoganalytics.ai.types import (
|
|
11
|
+
FormattedContentItem,
|
|
12
|
+
FormattedMessage,
|
|
13
|
+
TokenUsage,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GeminiPart(TypedDict, total=False):
|
|
18
|
+
"""Represents a part in a Gemini message."""
|
|
19
|
+
|
|
20
|
+
text: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GeminiMessage(TypedDict, total=False):
|
|
24
|
+
"""Represents a Gemini message with various possible fields."""
|
|
25
|
+
|
|
26
|
+
role: str
|
|
27
|
+
parts: List[Union[GeminiPart, Dict[str, Any]]]
|
|
28
|
+
content: Union[str, List[Any]]
|
|
29
|
+
text: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _format_parts_as_content_blocks(parts: List[Any]) -> List[FormattedContentItem]:
|
|
33
|
+
"""
|
|
34
|
+
Format Gemini parts array into structured content blocks.
|
|
35
|
+
|
|
36
|
+
Preserves structure for multimodal content (text + images) instead of
|
|
37
|
+
concatenating everything into a string.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
parts: List of parts that may contain text, inline_data, etc.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of formatted content blocks
|
|
44
|
+
"""
|
|
45
|
+
content_blocks: List[FormattedContentItem] = []
|
|
46
|
+
|
|
47
|
+
for part in parts:
|
|
48
|
+
# Handle dict with text field
|
|
49
|
+
if isinstance(part, dict) and "text" in part:
|
|
50
|
+
content_blocks.append({"type": "text", "text": part["text"]})
|
|
51
|
+
|
|
52
|
+
# Handle string parts
|
|
53
|
+
elif isinstance(part, str):
|
|
54
|
+
content_blocks.append({"type": "text", "text": part})
|
|
55
|
+
|
|
56
|
+
# Handle dict with inline_data (images, documents, etc.)
|
|
57
|
+
elif isinstance(part, dict) and "inline_data" in part:
|
|
58
|
+
inline_data = part["inline_data"]
|
|
59
|
+
mime_type = inline_data.get("mime_type", "")
|
|
60
|
+
content_type = "image" if mime_type.startswith("image/") else "document"
|
|
61
|
+
|
|
62
|
+
content_blocks.append(
|
|
63
|
+
{
|
|
64
|
+
"type": content_type,
|
|
65
|
+
"inline_data": inline_data,
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Handle object with text attribute
|
|
70
|
+
elif hasattr(part, "text"):
|
|
71
|
+
text_value = getattr(part, "text", "")
|
|
72
|
+
if text_value:
|
|
73
|
+
content_blocks.append({"type": "text", "text": text_value})
|
|
74
|
+
|
|
75
|
+
# Handle object with inline_data attribute
|
|
76
|
+
elif hasattr(part, "inline_data"):
|
|
77
|
+
inline_data = part.inline_data
|
|
78
|
+
# Convert to dict if needed
|
|
79
|
+
if hasattr(inline_data, "mime_type") and hasattr(inline_data, "data"):
|
|
80
|
+
# Determine type based on mime_type
|
|
81
|
+
mime_type = inline_data.mime_type
|
|
82
|
+
content_type = "image" if mime_type.startswith("image/") else "document"
|
|
83
|
+
|
|
84
|
+
content_blocks.append(
|
|
85
|
+
{
|
|
86
|
+
"type": content_type,
|
|
87
|
+
"inline_data": {
|
|
88
|
+
"mime_type": mime_type,
|
|
89
|
+
"data": inline_data.data,
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
content_blocks.append(
|
|
95
|
+
{
|
|
96
|
+
"type": "image",
|
|
97
|
+
"inline_data": inline_data,
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return content_blocks
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:
|
|
105
|
+
"""
|
|
106
|
+
Format a dictionary message into standardized format.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
item: Dictionary containing message data
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Formatted message with role and content
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
# Handle dict format with parts array (Gemini-specific format)
|
|
116
|
+
if "parts" in item and isinstance(item["parts"], list):
|
|
117
|
+
content_blocks = _format_parts_as_content_blocks(item["parts"])
|
|
118
|
+
return {"role": item.get("role", "user"), "content": content_blocks}
|
|
119
|
+
|
|
120
|
+
# Handle dict with content field
|
|
121
|
+
if "content" in item:
|
|
122
|
+
content = item["content"]
|
|
123
|
+
|
|
124
|
+
if isinstance(content, list):
|
|
125
|
+
# If content is a list, format it as content blocks
|
|
126
|
+
content_blocks = _format_parts_as_content_blocks(content)
|
|
127
|
+
return {"role": item.get("role", "user"), "content": content_blocks}
|
|
128
|
+
|
|
129
|
+
elif not isinstance(content, str):
|
|
130
|
+
content = str(content)
|
|
131
|
+
|
|
132
|
+
return {"role": item.get("role", "user"), "content": content}
|
|
133
|
+
|
|
134
|
+
# Handle dict with text field
|
|
135
|
+
if "text" in item:
|
|
136
|
+
return {"role": item.get("role", "user"), "content": item["text"]}
|
|
137
|
+
|
|
138
|
+
# Fallback to string representation
|
|
139
|
+
return {"role": "user", "content": str(item)}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _format_object_message(item: Any) -> FormattedMessage:
|
|
143
|
+
"""
|
|
144
|
+
Format an object (with attributes) into standardized format.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
item: Object that may have text or parts attributes
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Formatted message with role and content
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
# Handle object with parts attribute
|
|
154
|
+
if hasattr(item, "parts") and hasattr(item.parts, "__iter__"):
|
|
155
|
+
content_blocks = _format_parts_as_content_blocks(list(item.parts))
|
|
156
|
+
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
|
|
157
|
+
|
|
158
|
+
# Ensure role is a string
|
|
159
|
+
if not isinstance(role, str):
|
|
160
|
+
role = "user"
|
|
161
|
+
|
|
162
|
+
return {"role": role, "content": content_blocks}
|
|
163
|
+
|
|
164
|
+
# Handle object with text attribute
|
|
165
|
+
if hasattr(item, "text"):
|
|
166
|
+
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
|
|
167
|
+
|
|
168
|
+
# Ensure role is a string
|
|
169
|
+
if not isinstance(role, str):
|
|
170
|
+
role = "user"
|
|
171
|
+
|
|
172
|
+
return {"role": role, "content": item.text}
|
|
173
|
+
|
|
174
|
+
# Handle object with content attribute
|
|
175
|
+
if hasattr(item, "content"):
|
|
176
|
+
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
|
|
177
|
+
|
|
178
|
+
# Ensure role is a string
|
|
179
|
+
if not isinstance(role, str):
|
|
180
|
+
role = "user"
|
|
181
|
+
|
|
182
|
+
content = item.content
|
|
183
|
+
|
|
184
|
+
if isinstance(content, list):
|
|
185
|
+
content_blocks = _format_parts_as_content_blocks(content)
|
|
186
|
+
return {"role": role, "content": content_blocks}
|
|
187
|
+
|
|
188
|
+
elif not isinstance(content, str):
|
|
189
|
+
content = str(content)
|
|
190
|
+
return {"role": role, "content": content}
|
|
191
|
+
|
|
192
|
+
# Fallback to string representation
|
|
193
|
+
return {"role": "user", "content": str(item)}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def format_gemini_response(response: Any) -> List[FormattedMessage]:
|
|
197
|
+
"""
|
|
198
|
+
Format a Gemini response into standardized message format.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
response: The response object from Gemini API
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of formatted messages with role and content
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
output: List[FormattedMessage] = []
|
|
208
|
+
|
|
209
|
+
if response is None:
|
|
210
|
+
return output
|
|
211
|
+
|
|
212
|
+
if hasattr(response, "candidates") and response.candidates:
|
|
213
|
+
for candidate in response.candidates:
|
|
214
|
+
if hasattr(candidate, "content") and candidate.content:
|
|
215
|
+
content: List[FormattedContentItem] = []
|
|
216
|
+
|
|
217
|
+
if hasattr(candidate.content, "parts") and candidate.content.parts:
|
|
218
|
+
for part in candidate.content.parts:
|
|
219
|
+
if hasattr(part, "text") and part.text:
|
|
220
|
+
content.append(
|
|
221
|
+
{
|
|
222
|
+
"type": "text",
|
|
223
|
+
"text": part.text,
|
|
224
|
+
}
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
elif hasattr(part, "function_call") and part.function_call:
|
|
228
|
+
function_call = part.function_call
|
|
229
|
+
content.append(
|
|
230
|
+
{
|
|
231
|
+
"type": "function",
|
|
232
|
+
"function": {
|
|
233
|
+
"name": function_call.name,
|
|
234
|
+
"arguments": function_call.args,
|
|
235
|
+
},
|
|
236
|
+
}
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
elif hasattr(part, "inline_data") and part.inline_data:
|
|
240
|
+
# Handle audio/media inline data
|
|
241
|
+
import base64
|
|
242
|
+
|
|
243
|
+
inline_data = part.inline_data
|
|
244
|
+
mime_type = getattr(inline_data, "mime_type", "audio/pcm")
|
|
245
|
+
raw_data = getattr(inline_data, "data", b"")
|
|
246
|
+
|
|
247
|
+
# Encode binary data as base64 string for JSON serialization
|
|
248
|
+
if isinstance(raw_data, bytes):
|
|
249
|
+
data = base64.b64encode(raw_data).decode("utf-8")
|
|
250
|
+
else:
|
|
251
|
+
# Already a string (base64)
|
|
252
|
+
data = raw_data
|
|
253
|
+
|
|
254
|
+
content.append(
|
|
255
|
+
{
|
|
256
|
+
"type": "audio",
|
|
257
|
+
"mime_type": mime_type,
|
|
258
|
+
"data": data,
|
|
259
|
+
}
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
if content:
|
|
263
|
+
output.append(
|
|
264
|
+
{
|
|
265
|
+
"role": "assistant",
|
|
266
|
+
"content": content,
|
|
267
|
+
}
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
elif hasattr(candidate, "text") and candidate.text:
|
|
271
|
+
output.append(
|
|
272
|
+
{
|
|
273
|
+
"role": "assistant",
|
|
274
|
+
"content": [{"type": "text", "text": candidate.text}],
|
|
275
|
+
}
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
elif hasattr(response, "text") and response.text:
|
|
279
|
+
output.append(
|
|
280
|
+
{
|
|
281
|
+
"role": "assistant",
|
|
282
|
+
"content": [{"type": "text", "text": response.text}],
|
|
283
|
+
}
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
return output
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def extract_gemini_system_instruction(config: Any) -> Optional[str]:
|
|
290
|
+
"""
|
|
291
|
+
Extract system instruction from Gemini config parameter.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
config: Config object or dict that may contain system instruction
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
System instruction string if present, None otherwise
|
|
298
|
+
"""
|
|
299
|
+
if config is None:
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
# Handle different config formats
|
|
303
|
+
if hasattr(config, "system_instruction"):
|
|
304
|
+
return config.system_instruction
|
|
305
|
+
elif isinstance(config, dict) and "system_instruction" in config:
|
|
306
|
+
return config["system_instruction"]
|
|
307
|
+
elif isinstance(config, dict) and "systemInstruction" in config:
|
|
308
|
+
return config["systemInstruction"]
|
|
309
|
+
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def extract_gemini_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
|
|
314
|
+
"""
|
|
315
|
+
Extract tool definitions from Gemini API kwargs.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
kwargs: Keyword arguments passed to Gemini API
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Tool definitions if present, None otherwise
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
if "config" in kwargs and hasattr(kwargs["config"], "tools"):
|
|
325
|
+
return kwargs["config"].tools
|
|
326
|
+
|
|
327
|
+
return None
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def format_gemini_input_with_system(
|
|
331
|
+
contents: Any, config: Any = None
|
|
332
|
+
) -> List[FormattedMessage]:
|
|
333
|
+
"""
|
|
334
|
+
Format Gemini input contents into standardized message format, including system instruction handling.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
contents: Input contents in various possible formats
|
|
338
|
+
config: Config object or dict that may contain system instruction
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
List of formatted messages with role and content fields, with system message prepended if needed
|
|
342
|
+
"""
|
|
343
|
+
formatted_messages = format_gemini_input(contents)
|
|
344
|
+
|
|
345
|
+
# Check if system instruction is provided in config parameter
|
|
346
|
+
system_instruction = extract_gemini_system_instruction(config)
|
|
347
|
+
|
|
348
|
+
if system_instruction is not None:
|
|
349
|
+
has_system = any(msg.get("role") == "system" for msg in formatted_messages)
|
|
350
|
+
if not has_system:
|
|
351
|
+
from posthoganalytics.ai.types import FormattedMessage
|
|
352
|
+
|
|
353
|
+
system_message: FormattedMessage = {
|
|
354
|
+
"role": "system",
|
|
355
|
+
"content": system_instruction,
|
|
356
|
+
}
|
|
357
|
+
formatted_messages = [system_message] + list(formatted_messages)
|
|
358
|
+
|
|
359
|
+
return formatted_messages
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def format_gemini_input(contents: Any) -> List[FormattedMessage]:
|
|
363
|
+
"""
|
|
364
|
+
Format Gemini input contents into standardized message format for PostHog tracking.
|
|
365
|
+
|
|
366
|
+
This function handles various input formats:
|
|
367
|
+
- String inputs
|
|
368
|
+
- List of strings, dicts, or objects
|
|
369
|
+
- Single dict or object
|
|
370
|
+
- Gemini-specific format with parts array
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
contents: Input contents in various possible formats
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
List of formatted messages with role and content fields
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
# Handle string input
|
|
380
|
+
if isinstance(contents, str):
|
|
381
|
+
return [{"role": "user", "content": contents}]
|
|
382
|
+
|
|
383
|
+
# Handle list input
|
|
384
|
+
if isinstance(contents, list):
|
|
385
|
+
formatted: List[FormattedMessage] = []
|
|
386
|
+
|
|
387
|
+
for item in contents:
|
|
388
|
+
if isinstance(item, str):
|
|
389
|
+
formatted.append({"role": "user", "content": item})
|
|
390
|
+
|
|
391
|
+
elif isinstance(item, dict):
|
|
392
|
+
formatted.append(_format_dict_message(item))
|
|
393
|
+
|
|
394
|
+
else:
|
|
395
|
+
formatted.append(_format_object_message(item))
|
|
396
|
+
|
|
397
|
+
return formatted
|
|
398
|
+
|
|
399
|
+
# Handle single dict input
|
|
400
|
+
if isinstance(contents, dict):
|
|
401
|
+
return [_format_dict_message(contents)]
|
|
402
|
+
|
|
403
|
+
# Handle single object input
|
|
404
|
+
return [_format_object_message(contents)]
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def extract_gemini_web_search_count(response: Any) -> int:
|
|
408
|
+
"""
|
|
409
|
+
Extract web search count from Gemini response.
|
|
410
|
+
|
|
411
|
+
Gemini bills per request that uses grounding, not per query.
|
|
412
|
+
Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
response: The response from Gemini API
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
1 if web search/grounding was used, 0 otherwise
|
|
419
|
+
"""
|
|
420
|
+
|
|
421
|
+
# Check for grounding_metadata in candidates
|
|
422
|
+
if hasattr(response, "candidates"):
|
|
423
|
+
for candidate in response.candidates:
|
|
424
|
+
if (
|
|
425
|
+
hasattr(candidate, "grounding_metadata")
|
|
426
|
+
and candidate.grounding_metadata
|
|
427
|
+
):
|
|
428
|
+
grounding_metadata = candidate.grounding_metadata
|
|
429
|
+
|
|
430
|
+
# Check if web_search_queries exists and is non-empty
|
|
431
|
+
if hasattr(grounding_metadata, "web_search_queries"):
|
|
432
|
+
queries = grounding_metadata.web_search_queries
|
|
433
|
+
|
|
434
|
+
if queries is not None and len(queries) > 0:
|
|
435
|
+
return 1
|
|
436
|
+
|
|
437
|
+
# Check if grounding_chunks exists and is non-empty
|
|
438
|
+
if hasattr(grounding_metadata, "grounding_chunks"):
|
|
439
|
+
chunks = grounding_metadata.grounding_chunks
|
|
440
|
+
|
|
441
|
+
if chunks is not None and len(chunks) > 0:
|
|
442
|
+
return 1
|
|
443
|
+
|
|
444
|
+
# Also check for google_search or grounding in function call names
|
|
445
|
+
if hasattr(candidate, "content") and candidate.content:
|
|
446
|
+
if hasattr(candidate.content, "parts") and candidate.content.parts:
|
|
447
|
+
for part in candidate.content.parts:
|
|
448
|
+
if hasattr(part, "function_call") and part.function_call:
|
|
449
|
+
function_name = getattr(
|
|
450
|
+
part.function_call, "name", ""
|
|
451
|
+
).lower()
|
|
452
|
+
|
|
453
|
+
if (
|
|
454
|
+
"google_search" in function_name
|
|
455
|
+
or "grounding" in function_name
|
|
456
|
+
):
|
|
457
|
+
return 1
|
|
458
|
+
|
|
459
|
+
return 0
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
|
|
463
|
+
"""
|
|
464
|
+
Common logic to extract usage from Gemini metadata.
|
|
465
|
+
Used by both streaming and non-streaming paths.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
metadata: usage_metadata from Gemini response or chunk
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
TokenUsage with standardized usage
|
|
472
|
+
"""
|
|
473
|
+
usage = TokenUsage(
|
|
474
|
+
input_tokens=getattr(metadata, "prompt_token_count", 0),
|
|
475
|
+
output_tokens=getattr(metadata, "candidates_token_count", 0),
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# Add cache tokens if present (don't add if 0)
|
|
479
|
+
if hasattr(metadata, "cached_content_token_count"):
|
|
480
|
+
cache_tokens = metadata.cached_content_token_count
|
|
481
|
+
if cache_tokens and cache_tokens > 0:
|
|
482
|
+
usage["cache_read_input_tokens"] = cache_tokens
|
|
483
|
+
|
|
484
|
+
# Add reasoning tokens if present (don't add if 0)
|
|
485
|
+
if hasattr(metadata, "thoughts_token_count"):
|
|
486
|
+
reasoning_tokens = metadata.thoughts_token_count
|
|
487
|
+
if reasoning_tokens and reasoning_tokens > 0:
|
|
488
|
+
usage["reasoning_tokens"] = reasoning_tokens
|
|
489
|
+
|
|
490
|
+
return usage
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
|
|
494
|
+
"""
|
|
495
|
+
Extract usage statistics from a full Gemini response (non-streaming).
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
response: The complete response from Gemini API
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
TokenUsage with standardized usage statistics
|
|
502
|
+
"""
|
|
503
|
+
if not hasattr(response, "usage_metadata") or not response.usage_metadata:
|
|
504
|
+
return TokenUsage(input_tokens=0, output_tokens=0)
|
|
505
|
+
|
|
506
|
+
usage = _extract_usage_from_metadata(response.usage_metadata)
|
|
507
|
+
|
|
508
|
+
# Add web search count if present
|
|
509
|
+
web_search_count = extract_gemini_web_search_count(response)
|
|
510
|
+
if web_search_count > 0:
|
|
511
|
+
usage["web_search_count"] = web_search_count
|
|
512
|
+
|
|
513
|
+
return usage
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
|
|
517
|
+
"""
|
|
518
|
+
Extract usage statistics from a Gemini streaming chunk.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
chunk: Streaming chunk from Gemini API
|
|
522
|
+
|
|
523
|
+
Returns:
|
|
524
|
+
TokenUsage with standardized usage statistics
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
usage: TokenUsage = TokenUsage()
|
|
528
|
+
|
|
529
|
+
# Extract web search count from the chunk before checking for usage_metadata
|
|
530
|
+
# Web search indicators can appear on any chunk, not just those with usage data
|
|
531
|
+
web_search_count = extract_gemini_web_search_count(chunk)
|
|
532
|
+
if web_search_count > 0:
|
|
533
|
+
usage["web_search_count"] = web_search_count
|
|
534
|
+
|
|
535
|
+
if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
|
|
536
|
+
return usage
|
|
537
|
+
|
|
538
|
+
usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
|
|
539
|
+
|
|
540
|
+
# Merge the usage from metadata with any web search count we found
|
|
541
|
+
usage.update(usage_from_metadata)
|
|
542
|
+
|
|
543
|
+
return usage
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def extract_gemini_content_from_chunk(chunk: Any) -> Optional[Dict[str, Any]]:
|
|
547
|
+
"""
|
|
548
|
+
Extract content (text or function call) from a Gemini streaming chunk.
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
chunk: Streaming chunk from Gemini API
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
Content block dictionary if present, None otherwise
|
|
555
|
+
"""
|
|
556
|
+
|
|
557
|
+
# Check for text content
|
|
558
|
+
if hasattr(chunk, "text") and chunk.text:
|
|
559
|
+
return {"type": "text", "text": chunk.text}
|
|
560
|
+
|
|
561
|
+
# Check for function calls in candidates
|
|
562
|
+
if hasattr(chunk, "candidates") and chunk.candidates:
|
|
563
|
+
for candidate in chunk.candidates:
|
|
564
|
+
if hasattr(candidate, "content") and candidate.content:
|
|
565
|
+
if hasattr(candidate.content, "parts") and candidate.content.parts:
|
|
566
|
+
for part in candidate.content.parts:
|
|
567
|
+
# Check for function_call part
|
|
568
|
+
if hasattr(part, "function_call") and part.function_call:
|
|
569
|
+
function_call = part.function_call
|
|
570
|
+
return {
|
|
571
|
+
"type": "function",
|
|
572
|
+
"function": {
|
|
573
|
+
"name": function_call.name,
|
|
574
|
+
"arguments": function_call.args,
|
|
575
|
+
},
|
|
576
|
+
}
|
|
577
|
+
# Also check for text in parts
|
|
578
|
+
elif hasattr(part, "text") and part.text:
|
|
579
|
+
return {"type": "text", "text": part.text}
|
|
580
|
+
|
|
581
|
+
return None
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def format_gemini_streaming_output(
|
|
585
|
+
accumulated_content: Union[str, List[Any]],
|
|
586
|
+
) -> List[FormattedMessage]:
|
|
587
|
+
"""
|
|
588
|
+
Format the final output from Gemini streaming.
|
|
589
|
+
|
|
590
|
+
Args:
|
|
591
|
+
accumulated_content: Accumulated content from streaming (string, list of strings, or list of content blocks)
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
List of formatted messages
|
|
595
|
+
"""
|
|
596
|
+
|
|
597
|
+
# Handle legacy string input (backward compatibility)
|
|
598
|
+
if isinstance(accumulated_content, str):
|
|
599
|
+
return [
|
|
600
|
+
{
|
|
601
|
+
"role": "assistant",
|
|
602
|
+
"content": [{"type": "text", "text": accumulated_content}],
|
|
603
|
+
}
|
|
604
|
+
]
|
|
605
|
+
|
|
606
|
+
# Handle list input
|
|
607
|
+
if isinstance(accumulated_content, list):
|
|
608
|
+
content: List[FormattedContentItem] = []
|
|
609
|
+
text_parts = []
|
|
610
|
+
|
|
611
|
+
for item in accumulated_content:
|
|
612
|
+
if isinstance(item, str):
|
|
613
|
+
# Legacy support: accumulate strings
|
|
614
|
+
text_parts.append(item)
|
|
615
|
+
elif isinstance(item, dict):
|
|
616
|
+
# New format: content blocks
|
|
617
|
+
if item.get("type") == "text":
|
|
618
|
+
text_parts.append(item.get("text", ""))
|
|
619
|
+
elif item.get("type") == "function":
|
|
620
|
+
# If we have accumulated text, add it first
|
|
621
|
+
if text_parts:
|
|
622
|
+
content.append(
|
|
623
|
+
{
|
|
624
|
+
"type": "text",
|
|
625
|
+
"text": "".join(text_parts),
|
|
626
|
+
}
|
|
627
|
+
)
|
|
628
|
+
text_parts = []
|
|
629
|
+
|
|
630
|
+
# Add the function call
|
|
631
|
+
content.append(
|
|
632
|
+
{
|
|
633
|
+
"type": "function",
|
|
634
|
+
"function": item.get("function", {}),
|
|
635
|
+
}
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
# Add any remaining text
|
|
639
|
+
if text_parts:
|
|
640
|
+
content.append(
|
|
641
|
+
{
|
|
642
|
+
"type": "text",
|
|
643
|
+
"text": "".join(text_parts),
|
|
644
|
+
}
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
# If we have content, return it
|
|
648
|
+
if content:
|
|
649
|
+
return [{"role": "assistant", "content": content}]
|
|
650
|
+
|
|
651
|
+
# Fallback for empty or unexpected input
|
|
652
|
+
return [{"role": "assistant", "content": [{"type": "text", "text": ""}]}]
|