posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. posthoganalytics/__init__.py +84 -7
  2. posthoganalytics/ai/anthropic/__init__.py +10 -0
  3. posthoganalytics/ai/anthropic/anthropic.py +95 -65
  4. posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
  5. posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
  6. posthoganalytics/ai/gemini/__init__.py +15 -1
  7. posthoganalytics/ai/gemini/gemini.py +66 -71
  8. posthoganalytics/ai/gemini/gemini_async.py +423 -0
  9. posthoganalytics/ai/gemini/gemini_converter.py +652 -0
  10. posthoganalytics/ai/langchain/callbacks.py +58 -13
  11. posthoganalytics/ai/openai/__init__.py +16 -1
  12. posthoganalytics/ai/openai/openai.py +140 -149
  13. posthoganalytics/ai/openai/openai_async.py +127 -82
  14. posthoganalytics/ai/openai/openai_converter.py +741 -0
  15. posthoganalytics/ai/sanitization.py +248 -0
  16. posthoganalytics/ai/types.py +125 -0
  17. posthoganalytics/ai/utils.py +339 -356
  18. posthoganalytics/client.py +345 -97
  19. posthoganalytics/contexts.py +81 -0
  20. posthoganalytics/exception_utils.py +250 -2
  21. posthoganalytics/feature_flags.py +26 -10
  22. posthoganalytics/flag_definition_cache.py +127 -0
  23. posthoganalytics/integrations/django.py +157 -19
  24. posthoganalytics/request.py +203 -23
  25. posthoganalytics/test/test_client.py +250 -22
  26. posthoganalytics/test/test_exception_capture.py +418 -0
  27. posthoganalytics/test/test_feature_flag_result.py +441 -2
  28. posthoganalytics/test/test_feature_flags.py +308 -104
  29. posthoganalytics/test/test_flag_definition_cache.py +612 -0
  30. posthoganalytics/test/test_module.py +0 -8
  31. posthoganalytics/test/test_request.py +536 -0
  32. posthoganalytics/test/test_utils.py +4 -1
  33. posthoganalytics/types.py +40 -0
  34. posthoganalytics/version.py +1 -1
  35. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
  36. posthoganalytics-7.4.3.dist-info/RECORD +57 -0
  37. posthoganalytics-6.7.0.dist-info/RECORD +0 -49
  38. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
  39. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
  40. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,652 @@
1
+ """
2
+ Gemini-specific conversion utilities.
3
+
4
+ This module handles the conversion of Gemini API responses and inputs
5
+ into standardized formats for PostHog tracking.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional, TypedDict, Union
9
+
10
+ from posthoganalytics.ai.types import (
11
+ FormattedContentItem,
12
+ FormattedMessage,
13
+ TokenUsage,
14
+ )
15
+
16
+
17
+ class GeminiPart(TypedDict, total=False):
18
+ """Represents a part in a Gemini message."""
19
+
20
+ text: str
21
+
22
+
23
+ class GeminiMessage(TypedDict, total=False):
24
+ """Represents a Gemini message with various possible fields."""
25
+
26
+ role: str
27
+ parts: List[Union[GeminiPart, Dict[str, Any]]]
28
+ content: Union[str, List[Any]]
29
+ text: str
30
+
31
+
32
+ def _format_parts_as_content_blocks(parts: List[Any]) -> List[FormattedContentItem]:
33
+ """
34
+ Format Gemini parts array into structured content blocks.
35
+
36
+ Preserves structure for multimodal content (text + images) instead of
37
+ concatenating everything into a string.
38
+
39
+ Args:
40
+ parts: List of parts that may contain text, inline_data, etc.
41
+
42
+ Returns:
43
+ List of formatted content blocks
44
+ """
45
+ content_blocks: List[FormattedContentItem] = []
46
+
47
+ for part in parts:
48
+ # Handle dict with text field
49
+ if isinstance(part, dict) and "text" in part:
50
+ content_blocks.append({"type": "text", "text": part["text"]})
51
+
52
+ # Handle string parts
53
+ elif isinstance(part, str):
54
+ content_blocks.append({"type": "text", "text": part})
55
+
56
+ # Handle dict with inline_data (images, documents, etc.)
57
+ elif isinstance(part, dict) and "inline_data" in part:
58
+ inline_data = part["inline_data"]
59
+ mime_type = inline_data.get("mime_type", "")
60
+ content_type = "image" if mime_type.startswith("image/") else "document"
61
+
62
+ content_blocks.append(
63
+ {
64
+ "type": content_type,
65
+ "inline_data": inline_data,
66
+ }
67
+ )
68
+
69
+ # Handle object with text attribute
70
+ elif hasattr(part, "text"):
71
+ text_value = getattr(part, "text", "")
72
+ if text_value:
73
+ content_blocks.append({"type": "text", "text": text_value})
74
+
75
+ # Handle object with inline_data attribute
76
+ elif hasattr(part, "inline_data"):
77
+ inline_data = part.inline_data
78
+ # Convert to dict if needed
79
+ if hasattr(inline_data, "mime_type") and hasattr(inline_data, "data"):
80
+ # Determine type based on mime_type
81
+ mime_type = inline_data.mime_type
82
+ content_type = "image" if mime_type.startswith("image/") else "document"
83
+
84
+ content_blocks.append(
85
+ {
86
+ "type": content_type,
87
+ "inline_data": {
88
+ "mime_type": mime_type,
89
+ "data": inline_data.data,
90
+ },
91
+ }
92
+ )
93
+ else:
94
+ content_blocks.append(
95
+ {
96
+ "type": "image",
97
+ "inline_data": inline_data,
98
+ }
99
+ )
100
+
101
+ return content_blocks
102
+
103
+
104
+ def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:
105
+ """
106
+ Format a dictionary message into standardized format.
107
+
108
+ Args:
109
+ item: Dictionary containing message data
110
+
111
+ Returns:
112
+ Formatted message with role and content
113
+ """
114
+
115
+ # Handle dict format with parts array (Gemini-specific format)
116
+ if "parts" in item and isinstance(item["parts"], list):
117
+ content_blocks = _format_parts_as_content_blocks(item["parts"])
118
+ return {"role": item.get("role", "user"), "content": content_blocks}
119
+
120
+ # Handle dict with content field
121
+ if "content" in item:
122
+ content = item["content"]
123
+
124
+ if isinstance(content, list):
125
+ # If content is a list, format it as content blocks
126
+ content_blocks = _format_parts_as_content_blocks(content)
127
+ return {"role": item.get("role", "user"), "content": content_blocks}
128
+
129
+ elif not isinstance(content, str):
130
+ content = str(content)
131
+
132
+ return {"role": item.get("role", "user"), "content": content}
133
+
134
+ # Handle dict with text field
135
+ if "text" in item:
136
+ return {"role": item.get("role", "user"), "content": item["text"]}
137
+
138
+ # Fallback to string representation
139
+ return {"role": "user", "content": str(item)}
140
+
141
+
142
+ def _format_object_message(item: Any) -> FormattedMessage:
143
+ """
144
+ Format an object (with attributes) into standardized format.
145
+
146
+ Args:
147
+ item: Object that may have text or parts attributes
148
+
149
+ Returns:
150
+ Formatted message with role and content
151
+ """
152
+
153
+ # Handle object with parts attribute
154
+ if hasattr(item, "parts") and hasattr(item.parts, "__iter__"):
155
+ content_blocks = _format_parts_as_content_blocks(list(item.parts))
156
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
157
+
158
+ # Ensure role is a string
159
+ if not isinstance(role, str):
160
+ role = "user"
161
+
162
+ return {"role": role, "content": content_blocks}
163
+
164
+ # Handle object with text attribute
165
+ if hasattr(item, "text"):
166
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
167
+
168
+ # Ensure role is a string
169
+ if not isinstance(role, str):
170
+ role = "user"
171
+
172
+ return {"role": role, "content": item.text}
173
+
174
+ # Handle object with content attribute
175
+ if hasattr(item, "content"):
176
+ role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
177
+
178
+ # Ensure role is a string
179
+ if not isinstance(role, str):
180
+ role = "user"
181
+
182
+ content = item.content
183
+
184
+ if isinstance(content, list):
185
+ content_blocks = _format_parts_as_content_blocks(content)
186
+ return {"role": role, "content": content_blocks}
187
+
188
+ elif not isinstance(content, str):
189
+ content = str(content)
190
+ return {"role": role, "content": content}
191
+
192
+ # Fallback to string representation
193
+ return {"role": "user", "content": str(item)}
194
+
195
+
196
+ def format_gemini_response(response: Any) -> List[FormattedMessage]:
197
+ """
198
+ Format a Gemini response into standardized message format.
199
+
200
+ Args:
201
+ response: The response object from Gemini API
202
+
203
+ Returns:
204
+ List of formatted messages with role and content
205
+ """
206
+
207
+ output: List[FormattedMessage] = []
208
+
209
+ if response is None:
210
+ return output
211
+
212
+ if hasattr(response, "candidates") and response.candidates:
213
+ for candidate in response.candidates:
214
+ if hasattr(candidate, "content") and candidate.content:
215
+ content: List[FormattedContentItem] = []
216
+
217
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
218
+ for part in candidate.content.parts:
219
+ if hasattr(part, "text") and part.text:
220
+ content.append(
221
+ {
222
+ "type": "text",
223
+ "text": part.text,
224
+ }
225
+ )
226
+
227
+ elif hasattr(part, "function_call") and part.function_call:
228
+ function_call = part.function_call
229
+ content.append(
230
+ {
231
+ "type": "function",
232
+ "function": {
233
+ "name": function_call.name,
234
+ "arguments": function_call.args,
235
+ },
236
+ }
237
+ )
238
+
239
+ elif hasattr(part, "inline_data") and part.inline_data:
240
+ # Handle audio/media inline data
241
+ import base64
242
+
243
+ inline_data = part.inline_data
244
+ mime_type = getattr(inline_data, "mime_type", "audio/pcm")
245
+ raw_data = getattr(inline_data, "data", b"")
246
+
247
+ # Encode binary data as base64 string for JSON serialization
248
+ if isinstance(raw_data, bytes):
249
+ data = base64.b64encode(raw_data).decode("utf-8")
250
+ else:
251
+ # Already a string (base64)
252
+ data = raw_data
253
+
254
+ content.append(
255
+ {
256
+ "type": "audio",
257
+ "mime_type": mime_type,
258
+ "data": data,
259
+ }
260
+ )
261
+
262
+ if content:
263
+ output.append(
264
+ {
265
+ "role": "assistant",
266
+ "content": content,
267
+ }
268
+ )
269
+
270
+ elif hasattr(candidate, "text") and candidate.text:
271
+ output.append(
272
+ {
273
+ "role": "assistant",
274
+ "content": [{"type": "text", "text": candidate.text}],
275
+ }
276
+ )
277
+
278
+ elif hasattr(response, "text") and response.text:
279
+ output.append(
280
+ {
281
+ "role": "assistant",
282
+ "content": [{"type": "text", "text": response.text}],
283
+ }
284
+ )
285
+
286
+ return output
287
+
288
+
289
+ def extract_gemini_system_instruction(config: Any) -> Optional[str]:
290
+ """
291
+ Extract system instruction from Gemini config parameter.
292
+
293
+ Args:
294
+ config: Config object or dict that may contain system instruction
295
+
296
+ Returns:
297
+ System instruction string if present, None otherwise
298
+ """
299
+ if config is None:
300
+ return None
301
+
302
+ # Handle different config formats
303
+ if hasattr(config, "system_instruction"):
304
+ return config.system_instruction
305
+ elif isinstance(config, dict) and "system_instruction" in config:
306
+ return config["system_instruction"]
307
+ elif isinstance(config, dict) and "systemInstruction" in config:
308
+ return config["systemInstruction"]
309
+
310
+ return None
311
+
312
+
313
+ def extract_gemini_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
314
+ """
315
+ Extract tool definitions from Gemini API kwargs.
316
+
317
+ Args:
318
+ kwargs: Keyword arguments passed to Gemini API
319
+
320
+ Returns:
321
+ Tool definitions if present, None otherwise
322
+ """
323
+
324
+ if "config" in kwargs and hasattr(kwargs["config"], "tools"):
325
+ return kwargs["config"].tools
326
+
327
+ return None
328
+
329
+
330
+ def format_gemini_input_with_system(
331
+ contents: Any, config: Any = None
332
+ ) -> List[FormattedMessage]:
333
+ """
334
+ Format Gemini input contents into standardized message format, including system instruction handling.
335
+
336
+ Args:
337
+ contents: Input contents in various possible formats
338
+ config: Config object or dict that may contain system instruction
339
+
340
+ Returns:
341
+ List of formatted messages with role and content fields, with system message prepended if needed
342
+ """
343
+ formatted_messages = format_gemini_input(contents)
344
+
345
+ # Check if system instruction is provided in config parameter
346
+ system_instruction = extract_gemini_system_instruction(config)
347
+
348
+ if system_instruction is not None:
349
+ has_system = any(msg.get("role") == "system" for msg in formatted_messages)
350
+ if not has_system:
351
+ from posthoganalytics.ai.types import FormattedMessage
352
+
353
+ system_message: FormattedMessage = {
354
+ "role": "system",
355
+ "content": system_instruction,
356
+ }
357
+ formatted_messages = [system_message] + list(formatted_messages)
358
+
359
+ return formatted_messages
360
+
361
+
362
+ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
363
+ """
364
+ Format Gemini input contents into standardized message format for PostHog tracking.
365
+
366
+ This function handles various input formats:
367
+ - String inputs
368
+ - List of strings, dicts, or objects
369
+ - Single dict or object
370
+ - Gemini-specific format with parts array
371
+
372
+ Args:
373
+ contents: Input contents in various possible formats
374
+
375
+ Returns:
376
+ List of formatted messages with role and content fields
377
+ """
378
+
379
+ # Handle string input
380
+ if isinstance(contents, str):
381
+ return [{"role": "user", "content": contents}]
382
+
383
+ # Handle list input
384
+ if isinstance(contents, list):
385
+ formatted: List[FormattedMessage] = []
386
+
387
+ for item in contents:
388
+ if isinstance(item, str):
389
+ formatted.append({"role": "user", "content": item})
390
+
391
+ elif isinstance(item, dict):
392
+ formatted.append(_format_dict_message(item))
393
+
394
+ else:
395
+ formatted.append(_format_object_message(item))
396
+
397
+ return formatted
398
+
399
+ # Handle single dict input
400
+ if isinstance(contents, dict):
401
+ return [_format_dict_message(contents)]
402
+
403
+ # Handle single object input
404
+ return [_format_object_message(contents)]
405
+
406
+
407
+ def extract_gemini_web_search_count(response: Any) -> int:
408
+ """
409
+ Extract web search count from Gemini response.
410
+
411
+ Gemini bills per request that uses grounding, not per query.
412
+ Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
413
+
414
+ Args:
415
+ response: The response from Gemini API
416
+
417
+ Returns:
418
+ 1 if web search/grounding was used, 0 otherwise
419
+ """
420
+
421
+ # Check for grounding_metadata in candidates
422
+ if hasattr(response, "candidates"):
423
+ for candidate in response.candidates:
424
+ if (
425
+ hasattr(candidate, "grounding_metadata")
426
+ and candidate.grounding_metadata
427
+ ):
428
+ grounding_metadata = candidate.grounding_metadata
429
+
430
+ # Check if web_search_queries exists and is non-empty
431
+ if hasattr(grounding_metadata, "web_search_queries"):
432
+ queries = grounding_metadata.web_search_queries
433
+
434
+ if queries is not None and len(queries) > 0:
435
+ return 1
436
+
437
+ # Check if grounding_chunks exists and is non-empty
438
+ if hasattr(grounding_metadata, "grounding_chunks"):
439
+ chunks = grounding_metadata.grounding_chunks
440
+
441
+ if chunks is not None and len(chunks) > 0:
442
+ return 1
443
+
444
+ # Also check for google_search or grounding in function call names
445
+ if hasattr(candidate, "content") and candidate.content:
446
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
447
+ for part in candidate.content.parts:
448
+ if hasattr(part, "function_call") and part.function_call:
449
+ function_name = getattr(
450
+ part.function_call, "name", ""
451
+ ).lower()
452
+
453
+ if (
454
+ "google_search" in function_name
455
+ or "grounding" in function_name
456
+ ):
457
+ return 1
458
+
459
+ return 0
460
+
461
+
462
+ def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
463
+ """
464
+ Common logic to extract usage from Gemini metadata.
465
+ Used by both streaming and non-streaming paths.
466
+
467
+ Args:
468
+ metadata: usage_metadata from Gemini response or chunk
469
+
470
+ Returns:
471
+ TokenUsage with standardized usage
472
+ """
473
+ usage = TokenUsage(
474
+ input_tokens=getattr(metadata, "prompt_token_count", 0),
475
+ output_tokens=getattr(metadata, "candidates_token_count", 0),
476
+ )
477
+
478
+ # Add cache tokens if present (don't add if 0)
479
+ if hasattr(metadata, "cached_content_token_count"):
480
+ cache_tokens = metadata.cached_content_token_count
481
+ if cache_tokens and cache_tokens > 0:
482
+ usage["cache_read_input_tokens"] = cache_tokens
483
+
484
+ # Add reasoning tokens if present (don't add if 0)
485
+ if hasattr(metadata, "thoughts_token_count"):
486
+ reasoning_tokens = metadata.thoughts_token_count
487
+ if reasoning_tokens and reasoning_tokens > 0:
488
+ usage["reasoning_tokens"] = reasoning_tokens
489
+
490
+ return usage
491
+
492
+
493
+ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
494
+ """
495
+ Extract usage statistics from a full Gemini response (non-streaming).
496
+
497
+ Args:
498
+ response: The complete response from Gemini API
499
+
500
+ Returns:
501
+ TokenUsage with standardized usage statistics
502
+ """
503
+ if not hasattr(response, "usage_metadata") or not response.usage_metadata:
504
+ return TokenUsage(input_tokens=0, output_tokens=0)
505
+
506
+ usage = _extract_usage_from_metadata(response.usage_metadata)
507
+
508
+ # Add web search count if present
509
+ web_search_count = extract_gemini_web_search_count(response)
510
+ if web_search_count > 0:
511
+ usage["web_search_count"] = web_search_count
512
+
513
+ return usage
514
+
515
+
516
+ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
517
+ """
518
+ Extract usage statistics from a Gemini streaming chunk.
519
+
520
+ Args:
521
+ chunk: Streaming chunk from Gemini API
522
+
523
+ Returns:
524
+ TokenUsage with standardized usage statistics
525
+ """
526
+
527
+ usage: TokenUsage = TokenUsage()
528
+
529
+ # Extract web search count from the chunk before checking for usage_metadata
530
+ # Web search indicators can appear on any chunk, not just those with usage data
531
+ web_search_count = extract_gemini_web_search_count(chunk)
532
+ if web_search_count > 0:
533
+ usage["web_search_count"] = web_search_count
534
+
535
+ if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
536
+ return usage
537
+
538
+ usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
539
+
540
+ # Merge the usage from metadata with any web search count we found
541
+ usage.update(usage_from_metadata)
542
+
543
+ return usage
544
+
545
+
546
+ def extract_gemini_content_from_chunk(chunk: Any) -> Optional[Dict[str, Any]]:
547
+ """
548
+ Extract content (text or function call) from a Gemini streaming chunk.
549
+
550
+ Args:
551
+ chunk: Streaming chunk from Gemini API
552
+
553
+ Returns:
554
+ Content block dictionary if present, None otherwise
555
+ """
556
+
557
+ # Check for text content
558
+ if hasattr(chunk, "text") and chunk.text:
559
+ return {"type": "text", "text": chunk.text}
560
+
561
+ # Check for function calls in candidates
562
+ if hasattr(chunk, "candidates") and chunk.candidates:
563
+ for candidate in chunk.candidates:
564
+ if hasattr(candidate, "content") and candidate.content:
565
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
566
+ for part in candidate.content.parts:
567
+ # Check for function_call part
568
+ if hasattr(part, "function_call") and part.function_call:
569
+ function_call = part.function_call
570
+ return {
571
+ "type": "function",
572
+ "function": {
573
+ "name": function_call.name,
574
+ "arguments": function_call.args,
575
+ },
576
+ }
577
+ # Also check for text in parts
578
+ elif hasattr(part, "text") and part.text:
579
+ return {"type": "text", "text": part.text}
580
+
581
+ return None
582
+
583
+
584
+ def format_gemini_streaming_output(
585
+ accumulated_content: Union[str, List[Any]],
586
+ ) -> List[FormattedMessage]:
587
+ """
588
+ Format the final output from Gemini streaming.
589
+
590
+ Args:
591
+ accumulated_content: Accumulated content from streaming (string, list of strings, or list of content blocks)
592
+
593
+ Returns:
594
+ List of formatted messages
595
+ """
596
+
597
+ # Handle legacy string input (backward compatibility)
598
+ if isinstance(accumulated_content, str):
599
+ return [
600
+ {
601
+ "role": "assistant",
602
+ "content": [{"type": "text", "text": accumulated_content}],
603
+ }
604
+ ]
605
+
606
+ # Handle list input
607
+ if isinstance(accumulated_content, list):
608
+ content: List[FormattedContentItem] = []
609
+ text_parts = []
610
+
611
+ for item in accumulated_content:
612
+ if isinstance(item, str):
613
+ # Legacy support: accumulate strings
614
+ text_parts.append(item)
615
+ elif isinstance(item, dict):
616
+ # New format: content blocks
617
+ if item.get("type") == "text":
618
+ text_parts.append(item.get("text", ""))
619
+ elif item.get("type") == "function":
620
+ # If we have accumulated text, add it first
621
+ if text_parts:
622
+ content.append(
623
+ {
624
+ "type": "text",
625
+ "text": "".join(text_parts),
626
+ }
627
+ )
628
+ text_parts = []
629
+
630
+ # Add the function call
631
+ content.append(
632
+ {
633
+ "type": "function",
634
+ "function": item.get("function", {}),
635
+ }
636
+ )
637
+
638
+ # Add any remaining text
639
+ if text_parts:
640
+ content.append(
641
+ {
642
+ "type": "text",
643
+ "text": "".join(text_parts),
644
+ }
645
+ )
646
+
647
+ # If we have content, return it
648
+ if content:
649
+ return [{"role": "assistant", "content": content}]
650
+
651
+ # Fallback for empty or unexpected input
652
+ return [{"role": "assistant", "content": [{"type": "text", "text": ""}]}]