posthog 6.7.1__py3-none-any.whl → 6.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
posthog/ai/types.py ADDED
@@ -0,0 +1,142 @@
1
+ """
2
+ Common type definitions for PostHog AI SDK.
3
+
4
+ These types are used for formatting messages and responses across different AI providers
5
+ (Anthropic, OpenAI, Gemini, etc.) to ensure consistency in tracking and data structure.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional, TypedDict, Union
9
+
10
+
11
+ class FormattedTextContent(TypedDict):
12
+ """Formatted text content item."""
13
+
14
+ type: str # Literal["text"]
15
+ text: str
16
+
17
+
18
+ class FormattedFunctionCall(TypedDict, total=False):
19
+ """Formatted function/tool call content item."""
20
+
21
+ type: str # Literal["function"]
22
+ id: Optional[str]
23
+ function: Dict[str, Any] # Contains 'name' and 'arguments'
24
+
25
+
26
+ class FormattedImageContent(TypedDict):
27
+ """Formatted image content item."""
28
+
29
+ type: str # Literal["image"]
30
+ image: str
31
+
32
+
33
+ # Union type for all formatted content items
34
+ FormattedContentItem = Union[
35
+ FormattedTextContent,
36
+ FormattedFunctionCall,
37
+ FormattedImageContent,
38
+ Dict[str, Any], # Fallback for unknown content types
39
+ ]
40
+
41
+
42
+ class FormattedMessage(TypedDict):
43
+ """
44
+ Standardized message format for PostHog tracking.
45
+
46
+ Used across all providers to ensure consistent message structure
47
+ when sending events to PostHog.
48
+ """
49
+
50
+ role: str
51
+ content: Union[str, List[FormattedContentItem], Any]
52
+
53
+
54
+ class TokenUsage(TypedDict, total=False):
55
+ """
56
+ Token usage information for AI model responses.
57
+
58
+ Different providers may populate different fields.
59
+ """
60
+
61
+ input_tokens: int
62
+ output_tokens: int
63
+ cache_read_input_tokens: Optional[int]
64
+ cache_creation_input_tokens: Optional[int]
65
+ reasoning_tokens: Optional[int]
66
+
67
+
68
+ class ProviderResponse(TypedDict, total=False):
69
+ """
70
+ Standardized provider response format.
71
+
72
+ Used for consistent response formatting across all providers.
73
+ """
74
+
75
+ messages: List[FormattedMessage]
76
+ usage: TokenUsage
77
+ error: Optional[str]
78
+
79
+
80
+ class StreamingUsageStats(TypedDict, total=False):
81
+ """
82
+ Usage statistics collected during streaming.
83
+
84
+ Different providers populate different fields during streaming.
85
+ """
86
+
87
+ input_tokens: int
88
+ output_tokens: int
89
+ cache_read_input_tokens: Optional[int]
90
+ cache_creation_input_tokens: Optional[int]
91
+ reasoning_tokens: Optional[int]
92
+ # OpenAI-specific names
93
+ prompt_tokens: Optional[int]
94
+ completion_tokens: Optional[int]
95
+ total_tokens: Optional[int]
96
+
97
+
98
+ class StreamingContentBlock(TypedDict, total=False):
99
+ """
100
+ Content block used during streaming to accumulate content.
101
+
102
+ Used for tracking text and function calls as they stream in.
103
+ """
104
+
105
+ type: str # "text" or "function"
106
+ text: Optional[str]
107
+ id: Optional[str]
108
+ function: Optional[Dict[str, Any]]
109
+
110
+
111
+ class ToolInProgress(TypedDict):
112
+ """
113
+ Tracks a tool/function call being accumulated during streaming.
114
+
115
+ Used by Anthropic to accumulate JSON input for tools.
116
+ """
117
+
118
+ block: StreamingContentBlock
119
+ input_string: str
120
+
121
+
122
+ class StreamingEventData(TypedDict):
123
+ """
124
+ Standardized data for streaming events across all providers.
125
+
126
+ This type ensures consistent data structure when capturing streaming events,
127
+ with all provider-specific formatting already completed.
128
+ """
129
+
130
+ provider: str # "openai", "anthropic", "gemini"
131
+ model: str
132
+ base_url: str
133
+ kwargs: Dict[str, Any] # Original kwargs for tool extraction and special handling
134
+ formatted_input: Any # Provider-formatted input ready for tracking
135
+ formatted_output: Any # Provider-formatted output ready for tracking
136
+ usage_stats: TokenUsage # Standardized token counts
137
+ latency: float
138
+ distinct_id: Optional[str]
139
+ trace_id: Optional[str]
140
+ properties: Optional[Dict[str, Any]]
141
+ privacy_mode: bool
142
+ groups: Optional[Dict[str, Any]]
posthog/ai/utils.py CHANGED
@@ -1,10 +1,10 @@
1
1
  import time
2
2
  import uuid
3
- from typing import Any, Callable, Dict, List, Optional
3
+ from typing import Any, Callable, Dict, Optional
4
4
 
5
- from httpx import URL
6
5
 
7
6
  from posthog.client import Client as PostHogClient
7
+ from posthog.ai.types import StreamingEventData, StreamingUsageStats
8
8
  from posthog.ai.sanitization import (
9
9
  sanitize_openai,
10
10
  sanitize_anthropic,
@@ -13,6 +13,35 @@ from posthog.ai.sanitization import (
13
13
  )
14
14
 
15
15
 
16
+ def merge_usage_stats(
17
+ target: Dict[str, int], source: StreamingUsageStats, mode: str = "incremental"
18
+ ) -> None:
19
+ """
20
+ Merge streaming usage statistics into target dict, handling None values.
21
+
22
+ Supports two modes:
23
+ - "incremental": Add source values to target (for APIs that report new tokens)
24
+ - "cumulative": Replace target with source values (for APIs that report totals)
25
+
26
+ Args:
27
+ target: Dictionary to update with usage stats
28
+ source: StreamingUsageStats that may contain None values
29
+ mode: Either "incremental" or "cumulative"
30
+ """
31
+ if mode == "incremental":
32
+ # Add new values to existing totals
33
+ for key, value in source.items():
34
+ if value is not None and isinstance(value, int):
35
+ target[key] = target.get(key, 0) + value
36
+ elif mode == "cumulative":
37
+ # Replace with latest values (already cumulative)
38
+ for key, value in source.items():
39
+ if value is not None and isinstance(value, int):
40
+ target[key] = value
41
+ else:
42
+ raise ValueError(f"Invalid mode: {mode}. Must be 'incremental' or 'cumulative'")
43
+
44
+
16
45
  def get_model_params(kwargs: Dict[str, Any]) -> Dict[str, Any]:
17
46
  """
18
47
  Extracts model parameters from the kwargs dictionary.
@@ -109,275 +138,96 @@ def format_response(response, provider: str):
109
138
  """
110
139
  Format a regular (non-streaming) response.
111
140
  """
112
- output = []
113
- if response is None:
114
- return output
115
141
  if provider == "anthropic":
116
- return format_response_anthropic(response)
117
- elif provider == "openai":
118
- return format_response_openai(response)
119
- elif provider == "gemini":
120
- return format_response_gemini(response)
121
- return output
122
-
142
+ from posthog.ai.anthropic.anthropic_converter import format_anthropic_response
123
143
 
124
- def format_response_anthropic(response):
125
- output = []
126
- content = []
127
-
128
- for choice in response.content:
129
- if (
130
- hasattr(choice, "type")
131
- and choice.type == "text"
132
- and hasattr(choice, "text")
133
- and choice.text
134
- ):
135
- content.append({"type": "text", "text": choice.text})
136
- elif (
137
- hasattr(choice, "type")
138
- and choice.type == "tool_use"
139
- and hasattr(choice, "name")
140
- and hasattr(choice, "id")
141
- ):
142
- tool_call = {
143
- "type": "function",
144
- "id": choice.id,
145
- "function": {
146
- "name": choice.name,
147
- "arguments": getattr(choice, "input", {}),
148
- },
149
- }
150
- content.append(tool_call)
151
-
152
- if content:
153
- message = {
154
- "role": "assistant",
155
- "content": content,
156
- }
157
- output.append(message)
158
-
159
- return output
160
-
161
-
162
- def format_response_openai(response):
163
- output = []
164
-
165
- if hasattr(response, "choices"):
166
- content = []
167
- role = "assistant"
168
-
169
- for choice in response.choices:
170
- # Handle Chat Completions response format
171
- if hasattr(choice, "message") and choice.message:
172
- if choice.message.role:
173
- role = choice.message.role
174
-
175
- if choice.message.content:
176
- content.append({"type": "text", "text": choice.message.content})
177
-
178
- if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
179
- for tool_call in choice.message.tool_calls:
180
- content.append(
181
- {
182
- "type": "function",
183
- "id": tool_call.id,
184
- "function": {
185
- "name": tool_call.function.name,
186
- "arguments": tool_call.function.arguments,
187
- },
188
- }
189
- )
190
-
191
- if content:
192
- message = {
193
- "role": role,
194
- "content": content,
195
- }
196
- output.append(message)
197
-
198
- # Handle Responses API format
199
- if hasattr(response, "output"):
200
- content = []
201
- role = "assistant"
202
-
203
- for item in response.output:
204
- if item.type == "message":
205
- role = item.role
206
-
207
- if hasattr(item, "content") and isinstance(item.content, list):
208
- for content_item in item.content:
209
- if (
210
- hasattr(content_item, "type")
211
- and content_item.type == "output_text"
212
- and hasattr(content_item, "text")
213
- ):
214
- content.append({"type": "text", "text": content_item.text})
215
- elif hasattr(content_item, "text"):
216
- content.append({"type": "text", "text": content_item.text})
217
- elif (
218
- hasattr(content_item, "type")
219
- and content_item.type == "input_image"
220
- and hasattr(content_item, "image_url")
221
- ):
222
- content.append(
223
- {
224
- "type": "image",
225
- "image": content_item.image_url,
226
- }
227
- )
228
- elif hasattr(item, "content"):
229
- content.append({"type": "text", "text": str(item.content)})
230
-
231
- elif hasattr(item, "type") and item.type == "function_call":
232
- content.append(
233
- {
234
- "type": "function",
235
- "id": getattr(item, "call_id", getattr(item, "id", "")),
236
- "function": {
237
- "name": item.name,
238
- "arguments": getattr(item, "arguments", {}),
239
- },
240
- }
241
- )
144
+ return format_anthropic_response(response)
145
+ elif provider == "openai":
146
+ from posthog.ai.openai.openai_converter import format_openai_response
242
147
 
243
- if content:
244
- message = {
245
- "role": role,
246
- "content": content,
247
- }
248
- output.append(message)
249
-
250
- return output
251
-
252
-
253
- def format_response_gemini(response):
254
- output = []
255
-
256
- if hasattr(response, "candidates") and response.candidates:
257
- for candidate in response.candidates:
258
- if hasattr(candidate, "content") and candidate.content:
259
- content = []
260
-
261
- if hasattr(candidate.content, "parts") and candidate.content.parts:
262
- for part in candidate.content.parts:
263
- if hasattr(part, "text") and part.text:
264
- content.append({"type": "text", "text": part.text})
265
- elif hasattr(part, "function_call") and part.function_call:
266
- function_call = part.function_call
267
- content.append(
268
- {
269
- "type": "function",
270
- "function": {
271
- "name": function_call.name,
272
- "arguments": function_call.args,
273
- },
274
- }
275
- )
276
-
277
- if content:
278
- message = {
279
- "role": "assistant",
280
- "content": content,
281
- }
282
- output.append(message)
283
-
284
- elif hasattr(candidate, "text") and candidate.text:
285
- output.append(
286
- {
287
- "role": "assistant",
288
- "content": [{"type": "text", "text": candidate.text}],
289
- }
290
- )
291
- elif hasattr(response, "text") and response.text:
292
- output.append(
293
- {
294
- "role": "assistant",
295
- "content": [{"type": "text", "text": response.text}],
296
- }
297
- )
148
+ return format_openai_response(response)
149
+ elif provider == "gemini":
150
+ from posthog.ai.gemini.gemini_converter import format_gemini_response
298
151
 
299
- return output
152
+ return format_gemini_response(response)
153
+ return []
300
154
 
301
155
 
302
156
  def extract_available_tool_calls(provider: str, kwargs: Dict[str, Any]):
157
+ """
158
+ Extract available tool calls for the given provider.
159
+ """
303
160
  if provider == "anthropic":
304
- if "tools" in kwargs:
305
- return kwargs["tools"]
161
+ from posthog.ai.anthropic.anthropic_converter import extract_anthropic_tools
306
162
 
307
- return None
163
+ return extract_anthropic_tools(kwargs)
308
164
  elif provider == "gemini":
309
- if "config" in kwargs and hasattr(kwargs["config"], "tools"):
310
- return kwargs["config"].tools
165
+ from posthog.ai.gemini.gemini_converter import extract_gemini_tools
311
166
 
312
- return None
167
+ return extract_gemini_tools(kwargs)
313
168
  elif provider == "openai":
314
- if "tools" in kwargs:
315
- return kwargs["tools"]
169
+ from posthog.ai.openai.openai_converter import extract_openai_tools
316
170
 
317
- return None
171
+ return extract_openai_tools(kwargs)
318
172
 
319
173
 
320
174
  def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
321
- messages: List[Dict[str, Any]] = []
175
+ """
176
+ Merge system prompts and format messages for the given provider.
177
+ """
322
178
  if provider == "anthropic":
179
+ from posthog.ai.anthropic.anthropic_converter import format_anthropic_input
180
+
323
181
  messages = kwargs.get("messages") or []
324
- if kwargs.get("system") is None:
325
- return messages
326
- return [{"role": "system", "content": kwargs.get("system")}] + messages
182
+ system = kwargs.get("system")
183
+ return format_anthropic_input(messages, system)
327
184
  elif provider == "gemini":
328
- contents = kwargs.get("contents", [])
329
- if isinstance(contents, str):
330
- return [{"role": "user", "content": contents}]
331
- elif isinstance(contents, list):
332
- formatted = []
333
- for item in contents:
334
- if isinstance(item, str):
335
- formatted.append({"role": "user", "content": item})
336
- elif hasattr(item, "text"):
337
- formatted.append({"role": "user", "content": item.text})
338
- else:
339
- formatted.append({"role": "user", "content": str(item)})
340
- return formatted
341
- else:
342
- return [{"role": "user", "content": str(contents)}]
343
-
344
- # For OpenAI, handle both Chat Completions and Responses API
345
- if kwargs.get("messages") is not None:
346
- messages = list(kwargs.get("messages", []))
347
-
348
- if kwargs.get("input") is not None:
349
- input_data = kwargs.get("input")
350
- if isinstance(input_data, list):
351
- messages.extend(input_data)
352
- else:
353
- messages.append({"role": "user", "content": input_data})
354
-
355
- # Check if system prompt is provided as a separate parameter
356
- if kwargs.get("system") is not None:
357
- has_system = any(msg.get("role") == "system" for msg in messages)
358
- if not has_system:
359
- messages = [{"role": "system", "content": kwargs.get("system")}] + messages
360
-
361
- # For Responses API, add instructions to the system prompt if provided
362
- if kwargs.get("instructions") is not None:
363
- # Find the system message if it exists
364
- system_idx = next(
365
- (i for i, msg in enumerate(messages) if msg.get("role") == "system"), None
366
- )
185
+ from posthog.ai.gemini.gemini_converter import format_gemini_input
367
186
 
368
- if system_idx is not None:
369
- # Append instructions to existing system message
370
- system_content = messages[system_idx].get("content", "")
371
- messages[system_idx]["content"] = (
372
- f"{system_content}\n\n{kwargs.get('instructions')}"
187
+ contents = kwargs.get("contents", [])
188
+ return format_gemini_input(contents)
189
+ elif provider == "openai":
190
+ # For OpenAI, handle both Chat Completions and Responses API
191
+ from posthog.ai.openai.openai_converter import format_openai_input
192
+
193
+ messages_param = kwargs.get("messages")
194
+ input_param = kwargs.get("input")
195
+
196
+ # Get base formatted messages
197
+ messages = format_openai_input(messages_param, input_param)
198
+
199
+ # Check if system prompt is provided as a separate parameter
200
+ if kwargs.get("system") is not None:
201
+ has_system = any(msg.get("role") == "system" for msg in messages)
202
+ if not has_system:
203
+ messages = [
204
+ {"role": "system", "content": kwargs.get("system")}
205
+ ] + messages
206
+
207
+ # For Responses API, add instructions to the system prompt if provided
208
+ if kwargs.get("instructions") is not None:
209
+ # Find the system message if it exists
210
+ system_idx = next(
211
+ (i for i, msg in enumerate(messages) if msg.get("role") == "system"),
212
+ None,
373
213
  )
374
- else:
375
- # Create a new system message with instructions
376
- messages = [
377
- {"role": "system", "content": kwargs.get("instructions")}
378
- ] + messages
379
214
 
380
- return messages
215
+ if system_idx is not None:
216
+ # Append instructions to existing system message
217
+ system_content = messages[system_idx].get("content", "")
218
+ messages[system_idx]["content"] = (
219
+ f"{system_content}\n\n{kwargs.get('instructions')}"
220
+ )
221
+ else:
222
+ # Create a new system message with instructions
223
+ messages = [
224
+ {"role": "system", "content": kwargs.get("instructions")}
225
+ ] + messages
226
+
227
+ return messages
228
+
229
+ # Default case - return empty list
230
+ return []
381
231
 
382
232
 
383
233
  def call_llm_and_track_usage(
@@ -388,7 +238,7 @@ def call_llm_and_track_usage(
388
238
  posthog_properties: Optional[Dict[str, Any]],
389
239
  posthog_privacy_mode: bool,
390
240
  posthog_groups: Optional[Dict[str, Any]],
391
- base_url: URL,
241
+ base_url: str,
392
242
  call_method: Callable[..., Any],
393
243
  **kwargs: Any,
394
244
  ) -> Any:
@@ -401,7 +251,7 @@ def call_llm_and_track_usage(
401
251
  error = None
402
252
  http_status = 200
403
253
  usage: Dict[str, Any] = {}
404
- error_params: Dict[str, any] = {}
254
+ error_params: Dict[str, Any] = {}
405
255
 
406
256
  try:
407
257
  response = call_method(**kwargs)
@@ -509,7 +359,7 @@ async def call_llm_and_track_usage_async(
509
359
  posthog_properties: Optional[Dict[str, Any]],
510
360
  posthog_privacy_mode: bool,
511
361
  posthog_groups: Optional[Dict[str, Any]],
512
- base_url: URL,
362
+ base_url: str,
513
363
  call_async_method: Callable[..., Any],
514
364
  **kwargs: Any,
515
365
  ) -> Any:
@@ -518,7 +368,7 @@ async def call_llm_and_track_usage_async(
518
368
  error = None
519
369
  http_status = 200
520
370
  usage: Dict[str, Any] = {}
521
- error_params: Dict[str, any] = {}
371
+ error_params: Dict[str, Any] = {}
522
372
 
523
373
  try:
524
374
  response = await call_async_method(**kwargs)
@@ -629,3 +479,105 @@ def with_privacy_mode(ph_client: PostHogClient, privacy_mode: bool, value: Any):
629
479
  if ph_client.privacy_mode or privacy_mode:
630
480
  return None
631
481
  return value
482
+
483
+
484
+ def capture_streaming_event(
485
+ ph_client: PostHogClient,
486
+ event_data: StreamingEventData,
487
+ ):
488
+ """
489
+ Unified streaming event capture for all LLM providers.
490
+
491
+ This function handles the common logic for capturing streaming events across all providers.
492
+ All provider-specific formatting should be done BEFORE calling this function.
493
+
494
+ The function handles:
495
+ - Building PostHog event properties
496
+ - Extracting and adding tools based on provider
497
+ - Applying privacy mode
498
+ - Adding special token fields (cache, reasoning)
499
+ - Provider-specific fields (e.g., OpenAI instructions)
500
+ - Sending the event to PostHog
501
+
502
+ Args:
503
+ ph_client: PostHog client instance
504
+ event_data: Standardized streaming event data containing all necessary information
505
+ """
506
+ trace_id = event_data.get("trace_id") or str(uuid.uuid4())
507
+
508
+ # Build base event properties
509
+ event_properties = {
510
+ "$ai_provider": event_data["provider"],
511
+ "$ai_model": event_data["model"],
512
+ "$ai_model_parameters": get_model_params(event_data["kwargs"]),
513
+ "$ai_input": with_privacy_mode(
514
+ ph_client,
515
+ event_data["privacy_mode"],
516
+ event_data["formatted_input"],
517
+ ),
518
+ "$ai_output_choices": with_privacy_mode(
519
+ ph_client,
520
+ event_data["privacy_mode"],
521
+ event_data["formatted_output"],
522
+ ),
523
+ "$ai_http_status": 200,
524
+ "$ai_input_tokens": event_data["usage_stats"].get("input_tokens", 0),
525
+ "$ai_output_tokens": event_data["usage_stats"].get("output_tokens", 0),
526
+ "$ai_latency": event_data["latency"],
527
+ "$ai_trace_id": trace_id,
528
+ "$ai_base_url": str(event_data["base_url"]),
529
+ **(event_data.get("properties") or {}),
530
+ }
531
+
532
+ # Extract and add tools based on provider
533
+ available_tools = extract_available_tool_calls(
534
+ event_data["provider"],
535
+ event_data["kwargs"],
536
+ )
537
+ if available_tools:
538
+ event_properties["$ai_tools"] = available_tools
539
+
540
+ # Add optional token fields
541
+ # For Anthropic, always include cache fields even if 0 (backward compatibility)
542
+ # For others, only include if present and non-zero
543
+ if event_data["provider"] == "anthropic":
544
+ # Anthropic always includes cache fields
545
+ cache_read = event_data["usage_stats"].get("cache_read_input_tokens", 0)
546
+ cache_creation = event_data["usage_stats"].get("cache_creation_input_tokens", 0)
547
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
548
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
549
+ else:
550
+ # Other providers only include if non-zero
551
+ optional_token_fields = [
552
+ "cache_read_input_tokens",
553
+ "cache_creation_input_tokens",
554
+ "reasoning_tokens",
555
+ ]
556
+
557
+ for field in optional_token_fields:
558
+ value = event_data["usage_stats"].get(field)
559
+ if value is not None and isinstance(value, int) and value > 0:
560
+ event_properties[f"$ai_{field}"] = value
561
+
562
+ # Handle provider-specific fields
563
+ if (
564
+ event_data["provider"] == "openai"
565
+ and event_data["kwargs"].get("instructions") is not None
566
+ ):
567
+ event_properties["$ai_instructions"] = with_privacy_mode(
568
+ ph_client,
569
+ event_data["privacy_mode"],
570
+ event_data["kwargs"]["instructions"],
571
+ )
572
+
573
+ if event_data.get("distinct_id") is None:
574
+ event_properties["$process_person_profile"] = False
575
+
576
+ # Send event to PostHog
577
+ if hasattr(ph_client, "capture"):
578
+ ph_client.capture(
579
+ distinct_id=event_data.get("distinct_id") or trace_id,
580
+ event="$ai_generation",
581
+ properties=event_properties,
582
+ groups=event_data.get("groups"),
583
+ )