posthoganalytics 6.7.1__py3-none-any.whl → 6.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ """
2
+ Anthropic-specific conversion utilities.
3
+
4
+ This module handles the conversion of Anthropic API responses and inputs
5
+ into standardized formats for PostHog tracking.
6
+ """
7
+
8
+ import json
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ from posthoganalytics.ai.types import (
12
+ FormattedContentItem,
13
+ FormattedFunctionCall,
14
+ FormattedMessage,
15
+ FormattedTextContent,
16
+ StreamingContentBlock,
17
+ TokenUsage,
18
+ ToolInProgress,
19
+ )
20
+
21
+
22
+ def format_anthropic_response(response: Any) -> List[FormattedMessage]:
23
+ """
24
+ Format an Anthropic response into standardized message format.
25
+
26
+ Args:
27
+ response: The response object from Anthropic API
28
+
29
+ Returns:
30
+ List of formatted messages with role and content
31
+ """
32
+
33
+ output: List[FormattedMessage] = []
34
+
35
+ if response is None:
36
+ return output
37
+
38
+ content: List[FormattedContentItem] = []
39
+
40
+ # Process content blocks from the response
41
+ if hasattr(response, "content"):
42
+ for choice in response.content:
43
+ if (
44
+ hasattr(choice, "type")
45
+ and choice.type == "text"
46
+ and hasattr(choice, "text")
47
+ and choice.text
48
+ ):
49
+ text_content: FormattedTextContent = {
50
+ "type": "text",
51
+ "text": choice.text,
52
+ }
53
+ content.append(text_content)
54
+
55
+ elif (
56
+ hasattr(choice, "type")
57
+ and choice.type == "tool_use"
58
+ and hasattr(choice, "name")
59
+ and hasattr(choice, "id")
60
+ ):
61
+ function_call: FormattedFunctionCall = {
62
+ "type": "function",
63
+ "id": choice.id,
64
+ "function": {
65
+ "name": choice.name,
66
+ "arguments": getattr(choice, "input", {}),
67
+ },
68
+ }
69
+ content.append(function_call)
70
+
71
+ if content:
72
+ message: FormattedMessage = {
73
+ "role": "assistant",
74
+ "content": content,
75
+ }
76
+ output.append(message)
77
+
78
+ return output
79
+
80
+
81
+ def format_anthropic_input(
82
+ messages: List[Dict[str, Any]], system: Optional[str] = None
83
+ ) -> List[FormattedMessage]:
84
+ """
85
+ Format Anthropic input messages with optional system prompt.
86
+
87
+ Args:
88
+ messages: List of message dictionaries
89
+ system: Optional system prompt to prepend
90
+
91
+ Returns:
92
+ List of formatted messages
93
+ """
94
+
95
+ formatted_messages: List[FormattedMessage] = []
96
+
97
+ # Add system message if provided
98
+ if system is not None:
99
+ formatted_messages.append({"role": "system", "content": system})
100
+
101
+ # Add user messages
102
+ if messages:
103
+ for msg in messages:
104
+ # Messages are already in the correct format, just ensure type safety
105
+ formatted_msg: FormattedMessage = {
106
+ "role": msg.get("role", "user"),
107
+ "content": msg.get("content", ""),
108
+ }
109
+ formatted_messages.append(formatted_msg)
110
+
111
+ return formatted_messages
112
+
113
+
114
+ def extract_anthropic_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
115
+ """
116
+ Extract tool definitions from Anthropic API kwargs.
117
+
118
+ Args:
119
+ kwargs: Keyword arguments passed to Anthropic API
120
+
121
+ Returns:
122
+ Tool definitions if present, None otherwise
123
+ """
124
+
125
+ return kwargs.get("tools", None)
126
+
127
+
128
+ def format_anthropic_streaming_content(
129
+ content_blocks: List[StreamingContentBlock],
130
+ ) -> List[FormattedContentItem]:
131
+ """
132
+ Format content blocks from Anthropic streaming response.
133
+
134
+ Used by streaming handlers to format accumulated content blocks.
135
+
136
+ Args:
137
+ content_blocks: List of content block dictionaries from streaming
138
+
139
+ Returns:
140
+ List of formatted content items
141
+ """
142
+
143
+ formatted: List[FormattedContentItem] = []
144
+
145
+ for block in content_blocks:
146
+ if block.get("type") == "text":
147
+ formatted.append(
148
+ {
149
+ "type": "text",
150
+ "text": block.get("text") or "",
151
+ }
152
+ )
153
+
154
+ elif block.get("type") == "function":
155
+ formatted.append(
156
+ {
157
+ "type": "function",
158
+ "id": block.get("id"),
159
+ "function": block.get("function") or {},
160
+ }
161
+ )
162
+
163
+ return formatted
164
+
165
+
166
+ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
167
+ """
168
+ Extract usage from a full Anthropic response (non-streaming).
169
+
170
+ Args:
171
+ response: The complete response from Anthropic API
172
+
173
+ Returns:
174
+ TokenUsage with standardized usage
175
+ """
176
+ if not hasattr(response, "usage"):
177
+ return TokenUsage(input_tokens=0, output_tokens=0)
178
+
179
+ result = TokenUsage(
180
+ input_tokens=getattr(response.usage, "input_tokens", 0),
181
+ output_tokens=getattr(response.usage, "output_tokens", 0),
182
+ )
183
+
184
+ if hasattr(response.usage, "cache_read_input_tokens"):
185
+ cache_read = response.usage.cache_read_input_tokens
186
+ if cache_read and cache_read > 0:
187
+ result["cache_read_input_tokens"] = cache_read
188
+
189
+ if hasattr(response.usage, "cache_creation_input_tokens"):
190
+ cache_creation = response.usage.cache_creation_input_tokens
191
+ if cache_creation and cache_creation > 0:
192
+ result["cache_creation_input_tokens"] = cache_creation
193
+
194
+ return result
195
+
196
+
197
+ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
198
+ """
199
+ Extract usage statistics from an Anthropic streaming event.
200
+
201
+ Args:
202
+ event: Streaming event from Anthropic API
203
+
204
+ Returns:
205
+ Dictionary of usage statistics
206
+ """
207
+
208
+ usage: TokenUsage = TokenUsage()
209
+
210
+ # Handle usage stats from message_start event
211
+ if hasattr(event, "type") and event.type == "message_start":
212
+ if hasattr(event, "message") and hasattr(event.message, "usage"):
213
+ usage["input_tokens"] = getattr(event.message.usage, "input_tokens", 0)
214
+ usage["cache_creation_input_tokens"] = getattr(
215
+ event.message.usage, "cache_creation_input_tokens", 0
216
+ )
217
+ usage["cache_read_input_tokens"] = getattr(
218
+ event.message.usage, "cache_read_input_tokens", 0
219
+ )
220
+
221
+ # Handle usage stats from message_delta event
222
+ if hasattr(event, "usage") and event.usage:
223
+ usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
224
+
225
+ return usage
226
+
227
+
228
+ def handle_anthropic_content_block_start(
229
+ event: Any,
230
+ ) -> Tuple[Optional[StreamingContentBlock], Optional[ToolInProgress]]:
231
+ """
232
+ Handle content block start event from Anthropic streaming.
233
+
234
+ Args:
235
+ event: Content block start event
236
+
237
+ Returns:
238
+ Tuple of (content_block, tool_in_progress)
239
+ """
240
+
241
+ if not (hasattr(event, "type") and event.type == "content_block_start"):
242
+ return None, None
243
+
244
+ if not hasattr(event, "content_block"):
245
+ return None, None
246
+
247
+ block = event.content_block
248
+
249
+ if not hasattr(block, "type"):
250
+ return None, None
251
+
252
+ if block.type == "text":
253
+ content_block: StreamingContentBlock = {"type": "text", "text": ""}
254
+ return content_block, None
255
+
256
+ elif block.type == "tool_use":
257
+ tool_block: StreamingContentBlock = {
258
+ "type": "function",
259
+ "id": getattr(block, "id", ""),
260
+ "function": {"name": getattr(block, "name", ""), "arguments": {}},
261
+ }
262
+ tool_in_progress: ToolInProgress = {"block": tool_block, "input_string": ""}
263
+ return tool_block, tool_in_progress
264
+
265
+ return None, None
266
+
267
+
268
+ def handle_anthropic_text_delta(
269
+ event: Any, current_block: Optional[StreamingContentBlock]
270
+ ) -> Optional[str]:
271
+ """
272
+ Handle text delta event from Anthropic streaming.
273
+
274
+ Args:
275
+ event: Delta event
276
+ current_block: Current text block being accumulated
277
+
278
+ Returns:
279
+ Text delta if present
280
+ """
281
+
282
+ if hasattr(event, "delta") and hasattr(event.delta, "text"):
283
+ delta_text = event.delta.text or ""
284
+
285
+ if current_block is not None and current_block.get("type") == "text":
286
+ text_val = current_block.get("text")
287
+ if text_val is not None:
288
+ current_block["text"] = text_val + delta_text
289
+ else:
290
+ current_block["text"] = delta_text
291
+
292
+ return delta_text
293
+
294
+ return None
295
+
296
+
297
+ def handle_anthropic_tool_delta(
298
+ event: Any,
299
+ content_blocks: List[StreamingContentBlock],
300
+ tools_in_progress: Dict[str, ToolInProgress],
301
+ ) -> None:
302
+ """
303
+ Handle tool input delta event from Anthropic streaming.
304
+
305
+ Args:
306
+ event: Tool delta event
307
+ content_blocks: List of content blocks
308
+ tools_in_progress: Dictionary tracking tools being accumulated
309
+ """
310
+
311
+ if not (hasattr(event, "type") and event.type == "content_block_delta"):
312
+ return
313
+
314
+ if not (
315
+ hasattr(event, "delta")
316
+ and hasattr(event.delta, "type")
317
+ and event.delta.type == "input_json_delta"
318
+ ):
319
+ return
320
+
321
+ if hasattr(event, "index") and event.index < len(content_blocks):
322
+ block = content_blocks[event.index]
323
+
324
+ if block.get("type") == "function" and block.get("id") in tools_in_progress:
325
+ tool = tools_in_progress[block["id"]]
326
+ partial_json = getattr(event.delta, "partial_json", "")
327
+ tool["input_string"] += partial_json
328
+
329
+
330
+ def finalize_anthropic_tool_input(
331
+ event: Any,
332
+ content_blocks: List[StreamingContentBlock],
333
+ tools_in_progress: Dict[str, ToolInProgress],
334
+ ) -> None:
335
+ """
336
+ Finalize tool input when content block stops.
337
+
338
+ Args:
339
+ event: Content block stop event
340
+ content_blocks: List of content blocks
341
+ tools_in_progress: Dictionary tracking tools being accumulated
342
+ """
343
+
344
+ if not (hasattr(event, "type") and event.type == "content_block_stop"):
345
+ return
346
+
347
+ if hasattr(event, "index") and event.index < len(content_blocks):
348
+ block = content_blocks[event.index]
349
+
350
+ if block.get("type") == "function" and block.get("id") in tools_in_progress:
351
+ tool = tools_in_progress[block["id"]]
352
+
353
+ try:
354
+ block["function"]["arguments"] = json.loads(tool["input_string"])
355
+ except (json.JSONDecodeError, Exception):
356
+ # Keep empty dict if parsing fails
357
+ pass
358
+
359
+ del tools_in_progress[block["id"]]
360
+
361
+
362
+ def format_anthropic_streaming_input(kwargs: Dict[str, Any]) -> Any:
363
+ """
364
+ Format Anthropic streaming input using system prompt merging.
365
+
366
+ Args:
367
+ kwargs: Keyword arguments passed to Anthropic API
368
+
369
+ Returns:
370
+ Formatted input ready for PostHog tracking
371
+ """
372
+ from posthoganalytics.ai.utils import merge_system_prompt
373
+
374
+ return merge_system_prompt(kwargs, "anthropic")
375
+
376
+
377
+ def format_anthropic_streaming_output_complete(
378
+ content_blocks: List[StreamingContentBlock], accumulated_content: str
379
+ ) -> List[FormattedMessage]:
380
+ """
381
+ Format complete Anthropic streaming output.
382
+
383
+ Combines existing logic for formatting content blocks with fallback to accumulated content.
384
+
385
+ Args:
386
+ content_blocks: List of content blocks accumulated during streaming
387
+ accumulated_content: Raw accumulated text content as fallback
388
+
389
+ Returns:
390
+ Formatted messages ready for PostHog tracking
391
+ """
392
+ formatted_content = format_anthropic_streaming_content(content_blocks)
393
+
394
+ if formatted_content:
395
+ return [{"role": "assistant", "content": formatted_content}]
396
+ else:
397
+ # Fallback to accumulated content if no blocks
398
+ return [
399
+ {
400
+ "role": "assistant",
401
+ "content": [{"type": "text", "text": accumulated_content}],
402
+ }
403
+ ]
@@ -1,4 +1,9 @@
1
1
  from .gemini import Client
2
+ from .gemini_converter import (
3
+ format_gemini_input,
4
+ format_gemini_response,
5
+ extract_gemini_tools,
6
+ )
2
7
 
3
8
 
4
9
  # Create a genai-like module for perfect drop-in replacement
@@ -8,4 +13,10 @@ class _GenAI:
8
13
 
9
14
  genai = _GenAI()
10
15
 
11
- __all__ = ["Client", "genai"]
16
+ __all__ = [
17
+ "Client",
18
+ "genai",
19
+ "format_gemini_input",
20
+ "format_gemini_response",
21
+ "extract_gemini_tools",
22
+ ]
@@ -3,6 +3,8 @@ import time
3
3
  import uuid
4
4
  from typing import Any, Dict, Optional
5
5
 
6
+ from posthoganalytics.ai.types import TokenUsage
7
+
6
8
  try:
7
9
  from google import genai
8
10
  except ImportError:
@@ -13,8 +15,14 @@ except ImportError:
13
15
  from posthoganalytics import setup
14
16
  from posthoganalytics.ai.utils import (
15
17
  call_llm_and_track_usage,
16
- get_model_params,
17
- with_privacy_mode,
18
+ capture_streaming_event,
19
+ merge_usage_stats,
20
+ )
21
+ from posthoganalytics.ai.gemini.gemini_converter import (
22
+ format_gemini_input,
23
+ extract_gemini_usage_from_chunk,
24
+ extract_gemini_content_from_chunk,
25
+ format_gemini_streaming_output,
18
26
  )
19
27
  from posthoganalytics.ai.sanitization import sanitize_gemini
20
28
  from posthoganalytics.client import Client as PostHogClient
@@ -72,6 +80,7 @@ class Client:
72
80
  posthog_groups: Default groups for all calls (can be overridden per call)
73
81
  **kwargs: Additional arguments (for future compatibility)
74
82
  """
83
+
75
84
  self._ph_client = posthog_client or setup()
76
85
 
77
86
  if self._ph_client is None:
@@ -133,6 +142,7 @@ class Models:
133
142
  posthog_groups: Default groups for all calls
134
143
  **kwargs: Additional arguments (for future compatibility)
135
144
  """
145
+
136
146
  self._ph_client = posthog_client or setup()
137
147
 
138
148
  if self._ph_client is None:
@@ -150,14 +160,19 @@ class Models:
150
160
  # Add Vertex AI parameters if provided
151
161
  if vertexai is not None:
152
162
  client_args["vertexai"] = vertexai
163
+
153
164
  if credentials is not None:
154
165
  client_args["credentials"] = credentials
166
+
155
167
  if project is not None:
156
168
  client_args["project"] = project
169
+
157
170
  if location is not None:
158
171
  client_args["location"] = location
172
+
159
173
  if debug_config is not None:
160
174
  client_args["debug_config"] = debug_config
175
+
161
176
  if http_options is not None:
162
177
  client_args["http_options"] = http_options
163
178
 
@@ -175,6 +190,7 @@ class Models:
175
190
  raise ValueError(
176
191
  "API key must be provided either as parameter or via GOOGLE_API_KEY/API_KEY environment variable"
177
192
  )
193
+
178
194
  client_args["api_key"] = api_key
179
195
 
180
196
  self._client = genai.Client(**client_args)
@@ -189,6 +205,7 @@ class Models:
189
205
  call_groups: Optional[Dict[str, Any]],
190
206
  ):
191
207
  """Merge call-level PostHog parameters with client defaults."""
208
+
192
209
  # Use call-level values if provided, otherwise fall back to defaults
193
210
  distinct_id = (
194
211
  call_distinct_id
@@ -204,6 +221,7 @@ class Models:
204
221
 
205
222
  # Merge properties: default properties + call properties (call properties override)
206
223
  properties = dict(self._default_properties)
224
+
207
225
  if call_properties:
208
226
  properties.update(call_properties)
209
227
 
@@ -239,6 +257,7 @@ class Models:
239
257
  posthog_groups: Group analytics properties (overrides client default)
240
258
  **kwargs: Arguments passed to Gemini's generate_content
241
259
  """
260
+
242
261
  # Merge PostHog parameters
243
262
  distinct_id, trace_id, properties, privacy_mode, groups = (
244
263
  self._merge_posthog_params(
@@ -277,7 +296,7 @@ class Models:
277
296
  **kwargs: Any,
278
297
  ):
279
298
  start_time = time.time()
280
- usage_stats: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0}
299
+ usage_stats: TokenUsage = TokenUsage(input_tokens=0, output_tokens=0)
281
300
  accumulated_content = []
282
301
 
283
302
  kwargs_without_stream = {"model": model, "contents": contents, **kwargs}
@@ -288,25 +307,24 @@ class Models:
288
307
  nonlocal accumulated_content # noqa: F824
289
308
  try:
290
309
  for chunk in response:
291
- if hasattr(chunk, "usage_metadata") and chunk.usage_metadata:
292
- usage_stats = {
293
- "input_tokens": getattr(
294
- chunk.usage_metadata, "prompt_token_count", 0
295
- ),
296
- "output_tokens": getattr(
297
- chunk.usage_metadata, "candidates_token_count", 0
298
- ),
299
- }
300
-
301
- if hasattr(chunk, "text") and chunk.text:
302
- accumulated_content.append(chunk.text)
310
+ # Extract usage stats from chunk
311
+ chunk_usage = extract_gemini_usage_from_chunk(chunk)
312
+
313
+ if chunk_usage:
314
+ # Gemini reports cumulative totals, not incremental values
315
+ merge_usage_stats(usage_stats, chunk_usage, mode="cumulative")
316
+
317
+ # Extract content from chunk (now returns content blocks)
318
+ content_block = extract_gemini_content_from_chunk(chunk)
319
+
320
+ if content_block is not None:
321
+ accumulated_content.append(content_block)
303
322
 
304
323
  yield chunk
305
324
 
306
325
  finally:
307
326
  end_time = time.time()
308
327
  latency = end_time - start_time
309
- output = "".join(accumulated_content)
310
328
 
311
329
  self._capture_streaming_event(
312
330
  model,
@@ -319,7 +337,7 @@ class Models:
319
337
  kwargs,
320
338
  usage_stats,
321
339
  latency,
322
- output,
340
+ accumulated_content,
323
341
  )
324
342
 
325
343
  return generator()
@@ -334,63 +352,39 @@ class Models:
334
352
  privacy_mode: bool,
335
353
  groups: Optional[Dict[str, Any]],
336
354
  kwargs: Dict[str, Any],
337
- usage_stats: Dict[str, int],
355
+ usage_stats: TokenUsage,
338
356
  latency: float,
339
- output: str,
357
+ output: Any,
340
358
  ):
341
- if trace_id is None:
342
- trace_id = str(uuid.uuid4())
343
-
344
- event_properties = {
345
- "$ai_provider": "gemini",
346
- "$ai_model": model,
347
- "$ai_model_parameters": get_model_params(kwargs),
348
- "$ai_input": with_privacy_mode(
349
- self._ph_client,
350
- privacy_mode,
351
- sanitize_gemini(self._format_input(contents)),
352
- ),
353
- "$ai_output_choices": with_privacy_mode(
354
- self._ph_client,
355
- privacy_mode,
356
- [{"content": output, "role": "assistant"}],
357
- ),
358
- "$ai_http_status": 200,
359
- "$ai_input_tokens": usage_stats.get("input_tokens", 0),
360
- "$ai_output_tokens": usage_stats.get("output_tokens", 0),
361
- "$ai_latency": latency,
362
- "$ai_trace_id": trace_id,
363
- "$ai_base_url": self._base_url,
364
- **(properties or {}),
365
- }
366
-
367
- if distinct_id is None:
368
- event_properties["$process_person_profile"] = False
369
-
370
- if hasattr(self._ph_client, "capture"):
371
- self._ph_client.capture(
372
- distinct_id=distinct_id,
373
- event="$ai_generation",
374
- properties=event_properties,
375
- groups=groups,
376
- )
359
+ from posthoganalytics.ai.types import StreamingEventData
360
+
361
+ # Prepare standardized event data
362
+ formatted_input = self._format_input(contents)
363
+ sanitized_input = sanitize_gemini(formatted_input)
364
+
365
+ event_data = StreamingEventData(
366
+ provider="gemini",
367
+ model=model,
368
+ base_url=self._base_url,
369
+ kwargs=kwargs,
370
+ formatted_input=sanitized_input,
371
+ formatted_output=format_gemini_streaming_output(output),
372
+ usage_stats=usage_stats,
373
+ latency=latency,
374
+ distinct_id=distinct_id,
375
+ trace_id=trace_id,
376
+ properties=properties,
377
+ privacy_mode=privacy_mode,
378
+ groups=groups,
379
+ )
380
+
381
+ # Use the common capture function
382
+ capture_streaming_event(self._ph_client, event_data)
377
383
 
378
384
  def _format_input(self, contents):
379
385
  """Format input contents for PostHog tracking"""
380
- if isinstance(contents, str):
381
- return [{"role": "user", "content": contents}]
382
- elif isinstance(contents, list):
383
- formatted = []
384
- for item in contents:
385
- if isinstance(item, str):
386
- formatted.append({"role": "user", "content": item})
387
- elif hasattr(item, "text"):
388
- formatted.append({"role": "user", "content": item.text})
389
- else:
390
- formatted.append({"role": "user", "content": str(item)})
391
- return formatted
392
- else:
393
- return [{"role": "user", "content": str(contents)}]
386
+
387
+ return format_gemini_input(contents)
394
388
 
395
389
  def generate_content_stream(
396
390
  self,