posthoganalytics 6.7.1__py3-none-any.whl → 6.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,460 @@
1
+ """
2
+ Gemini-specific conversion utilities.
3
+
4
+ This module handles the conversion of Gemini API responses and inputs
5
+ into standardized formats for PostHog tracking.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional, TypedDict, Union
9
+
10
+ from posthoganalytics.ai.types import (
11
+ FormattedContentItem,
12
+ FormattedMessage,
13
+ TokenUsage,
14
+ )
15
+
16
+
17
+ class GeminiPart(TypedDict, total=False):
18
+ """Represents a part in a Gemini message."""
19
+
20
+ text: str
21
+
22
+
23
+ class GeminiMessage(TypedDict, total=False):
24
+ """Represents a Gemini message with various possible fields."""
25
+
26
+ role: str
27
+ parts: List[Union[GeminiPart, Dict[str, Any]]]
28
+ content: Union[str, List[Any]]
29
+ text: str
30
+
31
+
32
+ def _extract_text_from_parts(parts: List[Any]) -> str:
33
+ """
34
+ Extract and concatenate text from a parts array.
35
+
36
+ Args:
37
+ parts: List of parts that may contain text content
38
+
39
+ Returns:
40
+ Concatenated text from all parts
41
+ """
42
+
43
+ content_parts = []
44
+
45
+ for part in parts:
46
+ if isinstance(part, dict) and "text" in part:
47
+ content_parts.append(part["text"])
48
+
49
+ elif isinstance(part, str):
50
+ content_parts.append(part)
51
+
52
+ elif hasattr(part, "text"):
53
+ # Get the text attribute value
54
+ text_value = getattr(part, "text", "")
55
+ content_parts.append(text_value if text_value else str(part))
56
+
57
+ else:
58
+ content_parts.append(str(part))
59
+
60
+ return "".join(content_parts)
61
+
62
+
63
+ def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:
64
+ """
65
+ Format a dictionary message into standardized format.
66
+
67
+ Args:
68
+ item: Dictionary containing message data
69
+
70
+ Returns:
71
+ Formatted message with role and content
72
+ """
73
+
74
+ # Handle dict format with parts array (Gemini-specific format)
75
+ if "parts" in item and isinstance(item["parts"], list):
76
+ content = _extract_text_from_parts(item["parts"])
77
+ return {"role": item.get("role", "user"), "content": content}
78
+
79
+ # Handle dict with content field
80
+ if "content" in item:
81
+ content = item["content"]
82
+
83
+ if isinstance(content, list):
84
+ # If content is a list, extract text from it
85
+ content = _extract_text_from_parts(content)
86
+
87
+ elif not isinstance(content, str):
88
+ content = str(content)
89
+
90
+ return {"role": item.get("role", "user"), "content": content}
91
+
92
+ # Handle dict with text field
93
+ if "text" in item:
94
+ return {"role": item.get("role", "user"), "content": item["text"]}
95
+
96
+ # Fallback to string representation
97
+ return {"role": "user", "content": str(item)}
98
+
99
+
100
+ def _format_object_message(item: Any) -> FormattedMessage:
101
+ """
102
+ Format an object (with attributes) into standardized format.
103
+
104
+ Args:
105
+ item: Object that may have text or parts attributes
106
+
107
+ Returns:
108
+ Formatted message with role and content
109
+ """
110
+
111
+ # Handle object with parts attribute
112
+ if hasattr(item, "parts") and hasattr(item.parts, "__iter__"):
113
+ content = _extract_text_from_parts(item.parts)
114
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
115
+
116
+ # Ensure role is a string
117
+ if not isinstance(role, str):
118
+ role = "user"
119
+
120
+ return {"role": role, "content": content}
121
+
122
+ # Handle object with text attribute
123
+ if hasattr(item, "text"):
124
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
125
+
126
+ # Ensure role is a string
127
+ if not isinstance(role, str):
128
+ role = "user"
129
+
130
+ return {"role": role, "content": item.text}
131
+
132
+ # Handle object with content attribute
133
+ if hasattr(item, "content"):
134
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
135
+
136
+ # Ensure role is a string
137
+ if not isinstance(role, str):
138
+ role = "user"
139
+
140
+ content = item.content
141
+
142
+ if isinstance(content, list):
143
+ content = _extract_text_from_parts(content)
144
+
145
+ elif not isinstance(content, str):
146
+ content = str(content)
147
+ return {"role": role, "content": content}
148
+
149
+ # Fallback to string representation
150
+ return {"role": "user", "content": str(item)}
151
+
152
+
153
+ def format_gemini_response(response: Any) -> List[FormattedMessage]:
154
+ """
155
+ Format a Gemini response into standardized message format.
156
+
157
+ Args:
158
+ response: The response object from Gemini API
159
+
160
+ Returns:
161
+ List of formatted messages with role and content
162
+ """
163
+
164
+ output: List[FormattedMessage] = []
165
+
166
+ if response is None:
167
+ return output
168
+
169
+ if hasattr(response, "candidates") and response.candidates:
170
+ for candidate in response.candidates:
171
+ if hasattr(candidate, "content") and candidate.content:
172
+ content: List[FormattedContentItem] = []
173
+
174
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
175
+ for part in candidate.content.parts:
176
+ if hasattr(part, "text") and part.text:
177
+ content.append(
178
+ {
179
+ "type": "text",
180
+ "text": part.text,
181
+ }
182
+ )
183
+
184
+ elif hasattr(part, "function_call") and part.function_call:
185
+ function_call = part.function_call
186
+ content.append(
187
+ {
188
+ "type": "function",
189
+ "function": {
190
+ "name": function_call.name,
191
+ "arguments": function_call.args,
192
+ },
193
+ }
194
+ )
195
+
196
+ if content:
197
+ output.append(
198
+ {
199
+ "role": "assistant",
200
+ "content": content,
201
+ }
202
+ )
203
+
204
+ elif hasattr(candidate, "text") and candidate.text:
205
+ output.append(
206
+ {
207
+ "role": "assistant",
208
+ "content": [{"type": "text", "text": candidate.text}],
209
+ }
210
+ )
211
+
212
+ elif hasattr(response, "text") and response.text:
213
+ output.append(
214
+ {
215
+ "role": "assistant",
216
+ "content": [{"type": "text", "text": response.text}],
217
+ }
218
+ )
219
+
220
+ return output
221
+
222
+
223
+ def extract_gemini_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
224
+ """
225
+ Extract tool definitions from Gemini API kwargs.
226
+
227
+ Args:
228
+ kwargs: Keyword arguments passed to Gemini API
229
+
230
+ Returns:
231
+ Tool definitions if present, None otherwise
232
+ """
233
+
234
+ if "config" in kwargs and hasattr(kwargs["config"], "tools"):
235
+ return kwargs["config"].tools
236
+
237
+ return None
238
+
239
+
240
+ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
241
+ """
242
+ Format Gemini input contents into standardized message format for PostHog tracking.
243
+
244
+ This function handles various input formats:
245
+ - String inputs
246
+ - List of strings, dicts, or objects
247
+ - Single dict or object
248
+ - Gemini-specific format with parts array
249
+
250
+ Args:
251
+ contents: Input contents in various possible formats
252
+
253
+ Returns:
254
+ List of formatted messages with role and content fields
255
+ """
256
+
257
+ # Handle string input
258
+ if isinstance(contents, str):
259
+ return [{"role": "user", "content": contents}]
260
+
261
+ # Handle list input
262
+ if isinstance(contents, list):
263
+ formatted: List[FormattedMessage] = []
264
+
265
+ for item in contents:
266
+ if isinstance(item, str):
267
+ formatted.append({"role": "user", "content": item})
268
+
269
+ elif isinstance(item, dict):
270
+ formatted.append(_format_dict_message(item))
271
+
272
+ else:
273
+ formatted.append(_format_object_message(item))
274
+
275
+ return formatted
276
+
277
+ # Handle single dict input
278
+ if isinstance(contents, dict):
279
+ return [_format_dict_message(contents)]
280
+
281
+ # Handle single object input
282
+ return [_format_object_message(contents)]
283
+
284
+
285
+ def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
286
+ """
287
+ Common logic to extract usage from Gemini metadata.
288
+ Used by both streaming and non-streaming paths.
289
+
290
+ Args:
291
+ metadata: usage_metadata from Gemini response or chunk
292
+
293
+ Returns:
294
+ TokenUsage with standardized usage
295
+ """
296
+ usage = TokenUsage(
297
+ input_tokens=getattr(metadata, "prompt_token_count", 0),
298
+ output_tokens=getattr(metadata, "candidates_token_count", 0),
299
+ )
300
+
301
+ # Add cache tokens if present (don't add if 0)
302
+ if hasattr(metadata, "cached_content_token_count"):
303
+ cache_tokens = metadata.cached_content_token_count
304
+ if cache_tokens and cache_tokens > 0:
305
+ usage["cache_read_input_tokens"] = cache_tokens
306
+
307
+ # Add reasoning tokens if present (don't add if 0)
308
+ if hasattr(metadata, "thoughts_token_count"):
309
+ reasoning_tokens = metadata.thoughts_token_count
310
+ if reasoning_tokens and reasoning_tokens > 0:
311
+ usage["reasoning_tokens"] = reasoning_tokens
312
+
313
+ return usage
314
+
315
+
316
+ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
317
+ """
318
+ Extract usage statistics from a full Gemini response (non-streaming).
319
+
320
+ Args:
321
+ response: The complete response from Gemini API
322
+
323
+ Returns:
324
+ TokenUsage with standardized usage statistics
325
+ """
326
+ if not hasattr(response, "usage_metadata") or not response.usage_metadata:
327
+ return TokenUsage(input_tokens=0, output_tokens=0)
328
+
329
+ return _extract_usage_from_metadata(response.usage_metadata)
330
+
331
+
332
+ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
333
+ """
334
+ Extract usage statistics from a Gemini streaming chunk.
335
+
336
+ Args:
337
+ chunk: Streaming chunk from Gemini API
338
+
339
+ Returns:
340
+ TokenUsage with standardized usage statistics
341
+ """
342
+
343
+ usage: TokenUsage = TokenUsage()
344
+
345
+ if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
346
+ return usage
347
+
348
+ # Use the shared helper to extract usage
349
+ usage = _extract_usage_from_metadata(chunk.usage_metadata)
350
+
351
+ return usage
352
+
353
+
354
+ def extract_gemini_content_from_chunk(chunk: Any) -> Optional[Dict[str, Any]]:
355
+ """
356
+ Extract content (text or function call) from a Gemini streaming chunk.
357
+
358
+ Args:
359
+ chunk: Streaming chunk from Gemini API
360
+
361
+ Returns:
362
+ Content block dictionary if present, None otherwise
363
+ """
364
+
365
+ # Check for text content
366
+ if hasattr(chunk, "text") and chunk.text:
367
+ return {"type": "text", "text": chunk.text}
368
+
369
+ # Check for function calls in candidates
370
+ if hasattr(chunk, "candidates") and chunk.candidates:
371
+ for candidate in chunk.candidates:
372
+ if hasattr(candidate, "content") and candidate.content:
373
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
374
+ for part in candidate.content.parts:
375
+ # Check for function_call part
376
+ if hasattr(part, "function_call") and part.function_call:
377
+ function_call = part.function_call
378
+ return {
379
+ "type": "function",
380
+ "function": {
381
+ "name": function_call.name,
382
+ "arguments": function_call.args,
383
+ },
384
+ }
385
+ # Also check for text in parts
386
+ elif hasattr(part, "text") and part.text:
387
+ return {"type": "text", "text": part.text}
388
+
389
+ return None
390
+
391
+
392
+ def format_gemini_streaming_output(
393
+ accumulated_content: Union[str, List[Any]],
394
+ ) -> List[FormattedMessage]:
395
+ """
396
+ Format the final output from Gemini streaming.
397
+
398
+ Args:
399
+ accumulated_content: Accumulated content from streaming (string, list of strings, or list of content blocks)
400
+
401
+ Returns:
402
+ List of formatted messages
403
+ """
404
+
405
+ # Handle legacy string input (backward compatibility)
406
+ if isinstance(accumulated_content, str):
407
+ return [
408
+ {
409
+ "role": "assistant",
410
+ "content": [{"type": "text", "text": accumulated_content}],
411
+ }
412
+ ]
413
+
414
+ # Handle list input
415
+ if isinstance(accumulated_content, list):
416
+ content: List[FormattedContentItem] = []
417
+ text_parts = []
418
+
419
+ for item in accumulated_content:
420
+ if isinstance(item, str):
421
+ # Legacy support: accumulate strings
422
+ text_parts.append(item)
423
+ elif isinstance(item, dict):
424
+ # New format: content blocks
425
+ if item.get("type") == "text":
426
+ text_parts.append(item.get("text", ""))
427
+ elif item.get("type") == "function":
428
+ # If we have accumulated text, add it first
429
+ if text_parts:
430
+ content.append(
431
+ {
432
+ "type": "text",
433
+ "text": "".join(text_parts),
434
+ }
435
+ )
436
+ text_parts = []
437
+
438
+ # Add the function call
439
+ content.append(
440
+ {
441
+ "type": "function",
442
+ "function": item.get("function", {}),
443
+ }
444
+ )
445
+
446
+ # Add any remaining text
447
+ if text_parts:
448
+ content.append(
449
+ {
450
+ "type": "text",
451
+ "text": "".join(text_parts),
452
+ }
453
+ )
454
+
455
+ # If we have content, return it
456
+ if content:
457
+ return [{"role": "assistant", "content": content}]
458
+
459
+ # Fallback for empty or unexpected input
460
+ return [{"role": "assistant", "content": [{"type": "text", "text": ""}]}]
@@ -1,5 +1,20 @@
1
1
  from .openai import OpenAI
2
2
  from .openai_async import AsyncOpenAI
3
3
  from .openai_providers import AsyncAzureOpenAI, AzureOpenAI
4
+ from .openai_converter import (
5
+ format_openai_response,
6
+ format_openai_input,
7
+ extract_openai_tools,
8
+ format_openai_streaming_content,
9
+ )
4
10
 
5
- __all__ = ["OpenAI", "AsyncOpenAI", "AzureOpenAI", "AsyncAzureOpenAI"]
11
+ __all__ = [
12
+ "OpenAI",
13
+ "AsyncOpenAI",
14
+ "AzureOpenAI",
15
+ "AsyncAzureOpenAI",
16
+ "format_openai_response",
17
+ "format_openai_input",
18
+ "extract_openai_tools",
19
+ "format_openai_streaming_content",
20
+ ]