posthoganalytics 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,438 @@
1
+ """
2
+ Gemini-specific conversion utilities.
3
+
4
+ This module handles the conversion of Gemini API responses and inputs
5
+ into standardized formats for PostHog tracking.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional, TypedDict, Union
9
+
10
+ from posthoganalytics.ai.types import (
11
+ FormattedContentItem,
12
+ FormattedMessage,
13
+ StreamingUsageStats,
14
+ TokenUsage,
15
+ )
16
+
17
+
18
+ class GeminiPart(TypedDict, total=False):
19
+ """Represents a part in a Gemini message."""
20
+
21
+ text: str
22
+
23
+
24
+ class GeminiMessage(TypedDict, total=False):
25
+ """Represents a Gemini message with various possible fields."""
26
+
27
+ role: str
28
+ parts: List[Union[GeminiPart, Dict[str, Any]]]
29
+ content: Union[str, List[Any]]
30
+ text: str
31
+
32
+
33
+ def _extract_text_from_parts(parts: List[Any]) -> str:
34
+ """
35
+ Extract and concatenate text from a parts array.
36
+
37
+ Args:
38
+ parts: List of parts that may contain text content
39
+
40
+ Returns:
41
+ Concatenated text from all parts
42
+ """
43
+
44
+ content_parts = []
45
+
46
+ for part in parts:
47
+ if isinstance(part, dict) and "text" in part:
48
+ content_parts.append(part["text"])
49
+
50
+ elif isinstance(part, str):
51
+ content_parts.append(part)
52
+
53
+ elif hasattr(part, "text"):
54
+ # Get the text attribute value
55
+ text_value = getattr(part, "text", "")
56
+ content_parts.append(text_value if text_value else str(part))
57
+
58
+ else:
59
+ content_parts.append(str(part))
60
+
61
+ return "".join(content_parts)
62
+
63
+
64
+ def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:
65
+ """
66
+ Format a dictionary message into standardized format.
67
+
68
+ Args:
69
+ item: Dictionary containing message data
70
+
71
+ Returns:
72
+ Formatted message with role and content
73
+ """
74
+
75
+ # Handle dict format with parts array (Gemini-specific format)
76
+ if "parts" in item and isinstance(item["parts"], list):
77
+ content = _extract_text_from_parts(item["parts"])
78
+ return {"role": item.get("role", "user"), "content": content}
79
+
80
+ # Handle dict with content field
81
+ if "content" in item:
82
+ content = item["content"]
83
+
84
+ if isinstance(content, list):
85
+ # If content is a list, extract text from it
86
+ content = _extract_text_from_parts(content)
87
+
88
+ elif not isinstance(content, str):
89
+ content = str(content)
90
+
91
+ return {"role": item.get("role", "user"), "content": content}
92
+
93
+ # Handle dict with text field
94
+ if "text" in item:
95
+ return {"role": item.get("role", "user"), "content": item["text"]}
96
+
97
+ # Fallback to string representation
98
+ return {"role": "user", "content": str(item)}
99
+
100
+
101
+ def _format_object_message(item: Any) -> FormattedMessage:
102
+ """
103
+ Format an object (with attributes) into standardized format.
104
+
105
+ Args:
106
+ item: Object that may have text or parts attributes
107
+
108
+ Returns:
109
+ Formatted message with role and content
110
+ """
111
+
112
+ # Handle object with parts attribute
113
+ if hasattr(item, "parts") and hasattr(item.parts, "__iter__"):
114
+ content = _extract_text_from_parts(item.parts)
115
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
116
+
117
+ # Ensure role is a string
118
+ if not isinstance(role, str):
119
+ role = "user"
120
+
121
+ return {"role": role, "content": content}
122
+
123
+ # Handle object with text attribute
124
+ if hasattr(item, "text"):
125
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
126
+
127
+ # Ensure role is a string
128
+ if not isinstance(role, str):
129
+ role = "user"
130
+
131
+ return {"role": role, "content": item.text}
132
+
133
+ # Handle object with content attribute
134
+ if hasattr(item, "content"):
135
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
136
+
137
+ # Ensure role is a string
138
+ if not isinstance(role, str):
139
+ role = "user"
140
+
141
+ content = item.content
142
+
143
+ if isinstance(content, list):
144
+ content = _extract_text_from_parts(content)
145
+
146
+ elif not isinstance(content, str):
147
+ content = str(content)
148
+ return {"role": role, "content": content}
149
+
150
+ # Fallback to string representation
151
+ return {"role": "user", "content": str(item)}
152
+
153
+
154
+ def format_gemini_response(response: Any) -> List[FormattedMessage]:
155
+ """
156
+ Format a Gemini response into standardized message format.
157
+
158
+ Args:
159
+ response: The response object from Gemini API
160
+
161
+ Returns:
162
+ List of formatted messages with role and content
163
+ """
164
+
165
+ output: List[FormattedMessage] = []
166
+
167
+ if response is None:
168
+ return output
169
+
170
+ if hasattr(response, "candidates") and response.candidates:
171
+ for candidate in response.candidates:
172
+ if hasattr(candidate, "content") and candidate.content:
173
+ content: List[FormattedContentItem] = []
174
+
175
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
176
+ for part in candidate.content.parts:
177
+ if hasattr(part, "text") and part.text:
178
+ content.append(
179
+ {
180
+ "type": "text",
181
+ "text": part.text,
182
+ }
183
+ )
184
+
185
+ elif hasattr(part, "function_call") and part.function_call:
186
+ function_call = part.function_call
187
+ content.append(
188
+ {
189
+ "type": "function",
190
+ "function": {
191
+ "name": function_call.name,
192
+ "arguments": function_call.args,
193
+ },
194
+ }
195
+ )
196
+
197
+ if content:
198
+ output.append(
199
+ {
200
+ "role": "assistant",
201
+ "content": content,
202
+ }
203
+ )
204
+
205
+ elif hasattr(candidate, "text") and candidate.text:
206
+ output.append(
207
+ {
208
+ "role": "assistant",
209
+ "content": [{"type": "text", "text": candidate.text}],
210
+ }
211
+ )
212
+
213
+ elif hasattr(response, "text") and response.text:
214
+ output.append(
215
+ {
216
+ "role": "assistant",
217
+ "content": [{"type": "text", "text": response.text}],
218
+ }
219
+ )
220
+
221
+ return output
222
+
223
+
224
+ def extract_gemini_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
225
+ """
226
+ Extract tool definitions from Gemini API kwargs.
227
+
228
+ Args:
229
+ kwargs: Keyword arguments passed to Gemini API
230
+
231
+ Returns:
232
+ Tool definitions if present, None otherwise
233
+ """
234
+
235
+ if "config" in kwargs and hasattr(kwargs["config"], "tools"):
236
+ return kwargs["config"].tools
237
+
238
+ return None
239
+
240
+
241
+ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
242
+ """
243
+ Format Gemini input contents into standardized message format for PostHog tracking.
244
+
245
+ This function handles various input formats:
246
+ - String inputs
247
+ - List of strings, dicts, or objects
248
+ - Single dict or object
249
+ - Gemini-specific format with parts array
250
+
251
+ Args:
252
+ contents: Input contents in various possible formats
253
+
254
+ Returns:
255
+ List of formatted messages with role and content fields
256
+ """
257
+
258
+ # Handle string input
259
+ if isinstance(contents, str):
260
+ return [{"role": "user", "content": contents}]
261
+
262
+ # Handle list input
263
+ if isinstance(contents, list):
264
+ formatted: List[FormattedMessage] = []
265
+
266
+ for item in contents:
267
+ if isinstance(item, str):
268
+ formatted.append({"role": "user", "content": item})
269
+
270
+ elif isinstance(item, dict):
271
+ formatted.append(_format_dict_message(item))
272
+
273
+ else:
274
+ formatted.append(_format_object_message(item))
275
+
276
+ return formatted
277
+
278
+ # Handle single dict input
279
+ if isinstance(contents, dict):
280
+ return [_format_dict_message(contents)]
281
+
282
+ # Handle single object input
283
+ return [_format_object_message(contents)]
284
+
285
+
286
+ def extract_gemini_usage_from_chunk(chunk: Any) -> StreamingUsageStats:
287
+ """
288
+ Extract usage statistics from a Gemini streaming chunk.
289
+
290
+ Args:
291
+ chunk: Streaming chunk from Gemini API
292
+
293
+ Returns:
294
+ Dictionary of usage statistics
295
+ """
296
+
297
+ usage: StreamingUsageStats = {}
298
+
299
+ if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
300
+ return usage
301
+
302
+ # Gemini uses prompt_token_count and candidates_token_count
303
+ usage["input_tokens"] = getattr(chunk.usage_metadata, "prompt_token_count", 0)
304
+ usage["output_tokens"] = getattr(chunk.usage_metadata, "candidates_token_count", 0)
305
+
306
+ # Calculate total if both values are defined (including 0)
307
+ if "input_tokens" in usage and "output_tokens" in usage:
308
+ usage["total_tokens"] = usage["input_tokens"] + usage["output_tokens"]
309
+
310
+ return usage
311
+
312
+
313
+ def extract_gemini_content_from_chunk(chunk: Any) -> Optional[Dict[str, Any]]:
314
+ """
315
+ Extract content (text or function call) from a Gemini streaming chunk.
316
+
317
+ Args:
318
+ chunk: Streaming chunk from Gemini API
319
+
320
+ Returns:
321
+ Content block dictionary if present, None otherwise
322
+ """
323
+
324
+ # Check for text content
325
+ if hasattr(chunk, "text") and chunk.text:
326
+ return {"type": "text", "text": chunk.text}
327
+
328
+ # Check for function calls in candidates
329
+ if hasattr(chunk, "candidates") and chunk.candidates:
330
+ for candidate in chunk.candidates:
331
+ if hasattr(candidate, "content") and candidate.content:
332
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
333
+ for part in candidate.content.parts:
334
+ # Check for function_call part
335
+ if hasattr(part, "function_call") and part.function_call:
336
+ function_call = part.function_call
337
+ return {
338
+ "type": "function",
339
+ "function": {
340
+ "name": function_call.name,
341
+ "arguments": function_call.args,
342
+ },
343
+ }
344
+ # Also check for text in parts
345
+ elif hasattr(part, "text") and part.text:
346
+ return {"type": "text", "text": part.text}
347
+
348
+ return None
349
+
350
+
351
+ def format_gemini_streaming_output(
352
+ accumulated_content: Union[str, List[Any]],
353
+ ) -> List[FormattedMessage]:
354
+ """
355
+ Format the final output from Gemini streaming.
356
+
357
+ Args:
358
+ accumulated_content: Accumulated content from streaming (string, list of strings, or list of content blocks)
359
+
360
+ Returns:
361
+ List of formatted messages
362
+ """
363
+
364
+ # Handle legacy string input (backward compatibility)
365
+ if isinstance(accumulated_content, str):
366
+ return [
367
+ {
368
+ "role": "assistant",
369
+ "content": [{"type": "text", "text": accumulated_content}],
370
+ }
371
+ ]
372
+
373
+ # Handle list input
374
+ if isinstance(accumulated_content, list):
375
+ content: List[FormattedContentItem] = []
376
+ text_parts = []
377
+
378
+ for item in accumulated_content:
379
+ if isinstance(item, str):
380
+ # Legacy support: accumulate strings
381
+ text_parts.append(item)
382
+ elif isinstance(item, dict):
383
+ # New format: content blocks
384
+ if item.get("type") == "text":
385
+ text_parts.append(item.get("text", ""))
386
+ elif item.get("type") == "function":
387
+ # If we have accumulated text, add it first
388
+ if text_parts:
389
+ content.append(
390
+ {
391
+ "type": "text",
392
+ "text": "".join(text_parts),
393
+ }
394
+ )
395
+ text_parts = []
396
+
397
+ # Add the function call
398
+ content.append(
399
+ {
400
+ "type": "function",
401
+ "function": item.get("function", {}),
402
+ }
403
+ )
404
+
405
+ # Add any remaining text
406
+ if text_parts:
407
+ content.append(
408
+ {
409
+ "type": "text",
410
+ "text": "".join(text_parts),
411
+ }
412
+ )
413
+
414
+ # If we have content, return it
415
+ if content:
416
+ return [{"role": "assistant", "content": content}]
417
+
418
+ # Fallback for empty or unexpected input
419
+ return [{"role": "assistant", "content": [{"type": "text", "text": ""}]}]
420
+
421
+
422
+ def standardize_gemini_usage(usage: Dict[str, Any]) -> TokenUsage:
423
+ """
424
+ Standardize Gemini usage statistics to common TokenUsage format.
425
+
426
+ Gemini already uses standard field names (input_tokens/output_tokens).
427
+
428
+ Args:
429
+ usage: Raw usage statistics from Gemini
430
+
431
+ Returns:
432
+ Standardized TokenUsage dict
433
+ """
434
+ return TokenUsage(
435
+ input_tokens=usage.get("input_tokens", 0),
436
+ output_tokens=usage.get("output_tokens", 0),
437
+ # Gemini doesn't currently support cache or reasoning tokens
438
+ )
@@ -37,6 +37,7 @@ from pydantic import BaseModel
37
37
 
38
38
  from posthoganalytics import setup
39
39
  from posthoganalytics.ai.utils import get_model_params, with_privacy_mode
40
+ from posthoganalytics.ai.sanitization import sanitize_langchain
40
41
  from posthoganalytics.client import Client
41
42
 
42
43
  log = logging.getLogger("posthog")
@@ -480,7 +481,7 @@ class CallbackHandler(BaseCallbackHandler):
480
481
  event_properties = {
481
482
  "$ai_trace_id": trace_id,
482
483
  "$ai_input_state": with_privacy_mode(
483
- self._ph_client, self._privacy_mode, run.input
484
+ self._ph_client, self._privacy_mode, sanitize_langchain(run.input)
484
485
  ),
485
486
  "$ai_latency": run.latency,
486
487
  "$ai_span_name": run.name,
@@ -550,7 +551,7 @@ class CallbackHandler(BaseCallbackHandler):
550
551
  "$ai_model": run.model,
551
552
  "$ai_model_parameters": run.model_params,
552
553
  "$ai_input": with_privacy_mode(
553
- self._ph_client, self._privacy_mode, run.input
554
+ self._ph_client, self._privacy_mode, sanitize_langchain(run.input)
554
555
  ),
555
556
  "$ai_http_status": 200,
556
557
  "$ai_latency": run.latency,
@@ -1,5 +1,20 @@
1
1
  from .openai import OpenAI
2
2
  from .openai_async import AsyncOpenAI
3
3
  from .openai_providers import AsyncAzureOpenAI, AzureOpenAI
4
+ from .openai_converter import (
5
+ format_openai_response,
6
+ format_openai_input,
7
+ extract_openai_tools,
8
+ format_openai_streaming_content,
9
+ )
4
10
 
5
- __all__ = ["OpenAI", "AsyncOpenAI", "AzureOpenAI", "AsyncAzureOpenAI"]
11
+ __all__ = [
12
+ "OpenAI",
13
+ "AsyncOpenAI",
14
+ "AzureOpenAI",
15
+ "AsyncAzureOpenAI",
16
+ "format_openai_response",
17
+ "format_openai_input",
18
+ "extract_openai_tools",
19
+ "format_openai_streaming_content",
20
+ ]