posthoganalytics 6.7.1__py3-none-any.whl → 6.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,393 @@
1
+ """
2
+ Anthropic-specific conversion utilities.
3
+
4
+ This module handles the conversion of Anthropic API responses and inputs
5
+ into standardized formats for PostHog tracking.
6
+ """
7
+
8
+ import json
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ from posthoganalytics.ai.types import (
12
+ FormattedContentItem,
13
+ FormattedFunctionCall,
14
+ FormattedMessage,
15
+ FormattedTextContent,
16
+ StreamingContentBlock,
17
+ StreamingUsageStats,
18
+ TokenUsage,
19
+ ToolInProgress,
20
+ )
21
+
22
+
23
+ def format_anthropic_response(response: Any) -> List[FormattedMessage]:
24
+ """
25
+ Format an Anthropic response into standardized message format.
26
+
27
+ Args:
28
+ response: The response object from Anthropic API
29
+
30
+ Returns:
31
+ List of formatted messages with role and content
32
+ """
33
+
34
+ output: List[FormattedMessage] = []
35
+
36
+ if response is None:
37
+ return output
38
+
39
+ content: List[FormattedContentItem] = []
40
+
41
+ # Process content blocks from the response
42
+ if hasattr(response, "content"):
43
+ for choice in response.content:
44
+ if (
45
+ hasattr(choice, "type")
46
+ and choice.type == "text"
47
+ and hasattr(choice, "text")
48
+ and choice.text
49
+ ):
50
+ text_content: FormattedTextContent = {
51
+ "type": "text",
52
+ "text": choice.text,
53
+ }
54
+ content.append(text_content)
55
+
56
+ elif (
57
+ hasattr(choice, "type")
58
+ and choice.type == "tool_use"
59
+ and hasattr(choice, "name")
60
+ and hasattr(choice, "id")
61
+ ):
62
+ function_call: FormattedFunctionCall = {
63
+ "type": "function",
64
+ "id": choice.id,
65
+ "function": {
66
+ "name": choice.name,
67
+ "arguments": getattr(choice, "input", {}),
68
+ },
69
+ }
70
+ content.append(function_call)
71
+
72
+ if content:
73
+ message: FormattedMessage = {
74
+ "role": "assistant",
75
+ "content": content,
76
+ }
77
+ output.append(message)
78
+
79
+ return output
80
+
81
+
82
+ def format_anthropic_input(
83
+ messages: List[Dict[str, Any]], system: Optional[str] = None
84
+ ) -> List[FormattedMessage]:
85
+ """
86
+ Format Anthropic input messages with optional system prompt.
87
+
88
+ Args:
89
+ messages: List of message dictionaries
90
+ system: Optional system prompt to prepend
91
+
92
+ Returns:
93
+ List of formatted messages
94
+ """
95
+
96
+ formatted_messages: List[FormattedMessage] = []
97
+
98
+ # Add system message if provided
99
+ if system is not None:
100
+ formatted_messages.append({"role": "system", "content": system})
101
+
102
+ # Add user messages
103
+ if messages:
104
+ for msg in messages:
105
+ # Messages are already in the correct format, just ensure type safety
106
+ formatted_msg: FormattedMessage = {
107
+ "role": msg.get("role", "user"),
108
+ "content": msg.get("content", ""),
109
+ }
110
+ formatted_messages.append(formatted_msg)
111
+
112
+ return formatted_messages
113
+
114
+
115
+ def extract_anthropic_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
116
+ """
117
+ Extract tool definitions from Anthropic API kwargs.
118
+
119
+ Args:
120
+ kwargs: Keyword arguments passed to Anthropic API
121
+
122
+ Returns:
123
+ Tool definitions if present, None otherwise
124
+ """
125
+
126
+ return kwargs.get("tools", None)
127
+
128
+
129
+ def format_anthropic_streaming_content(
130
+ content_blocks: List[StreamingContentBlock],
131
+ ) -> List[FormattedContentItem]:
132
+ """
133
+ Format content blocks from Anthropic streaming response.
134
+
135
+ Used by streaming handlers to format accumulated content blocks.
136
+
137
+ Args:
138
+ content_blocks: List of content block dictionaries from streaming
139
+
140
+ Returns:
141
+ List of formatted content items
142
+ """
143
+
144
+ formatted: List[FormattedContentItem] = []
145
+
146
+ for block in content_blocks:
147
+ if block.get("type") == "text":
148
+ formatted.append(
149
+ {
150
+ "type": "text",
151
+ "text": block.get("text") or "",
152
+ }
153
+ )
154
+
155
+ elif block.get("type") == "function":
156
+ formatted.append(
157
+ {
158
+ "type": "function",
159
+ "id": block.get("id"),
160
+ "function": block.get("function") or {},
161
+ }
162
+ )
163
+
164
+ return formatted
165
+
166
+
167
+ def extract_anthropic_usage_from_event(event: Any) -> StreamingUsageStats:
168
+ """
169
+ Extract usage statistics from an Anthropic streaming event.
170
+
171
+ Args:
172
+ event: Streaming event from Anthropic API
173
+
174
+ Returns:
175
+ Dictionary of usage statistics
176
+ """
177
+
178
+ usage: StreamingUsageStats = {}
179
+
180
+ # Handle usage stats from message_start event
181
+ if hasattr(event, "type") and event.type == "message_start":
182
+ if hasattr(event, "message") and hasattr(event.message, "usage"):
183
+ usage["input_tokens"] = getattr(event.message.usage, "input_tokens", 0)
184
+ usage["cache_creation_input_tokens"] = getattr(
185
+ event.message.usage, "cache_creation_input_tokens", 0
186
+ )
187
+ usage["cache_read_input_tokens"] = getattr(
188
+ event.message.usage, "cache_read_input_tokens", 0
189
+ )
190
+
191
+ # Handle usage stats from message_delta event
192
+ if hasattr(event, "usage") and event.usage:
193
+ usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
194
+
195
+ return usage
196
+
197
+
198
+ def handle_anthropic_content_block_start(
199
+ event: Any,
200
+ ) -> Tuple[Optional[StreamingContentBlock], Optional[ToolInProgress]]:
201
+ """
202
+ Handle content block start event from Anthropic streaming.
203
+
204
+ Args:
205
+ event: Content block start event
206
+
207
+ Returns:
208
+ Tuple of (content_block, tool_in_progress)
209
+ """
210
+
211
+ if not (hasattr(event, "type") and event.type == "content_block_start"):
212
+ return None, None
213
+
214
+ if not hasattr(event, "content_block"):
215
+ return None, None
216
+
217
+ block = event.content_block
218
+
219
+ if not hasattr(block, "type"):
220
+ return None, None
221
+
222
+ if block.type == "text":
223
+ content_block: StreamingContentBlock = {"type": "text", "text": ""}
224
+ return content_block, None
225
+
226
+ elif block.type == "tool_use":
227
+ tool_block: StreamingContentBlock = {
228
+ "type": "function",
229
+ "id": getattr(block, "id", ""),
230
+ "function": {"name": getattr(block, "name", ""), "arguments": {}},
231
+ }
232
+ tool_in_progress: ToolInProgress = {"block": tool_block, "input_string": ""}
233
+ return tool_block, tool_in_progress
234
+
235
+ return None, None
236
+
237
+
238
+ def handle_anthropic_text_delta(
239
+ event: Any, current_block: Optional[StreamingContentBlock]
240
+ ) -> Optional[str]:
241
+ """
242
+ Handle text delta event from Anthropic streaming.
243
+
244
+ Args:
245
+ event: Delta event
246
+ current_block: Current text block being accumulated
247
+
248
+ Returns:
249
+ Text delta if present
250
+ """
251
+
252
+ if hasattr(event, "delta") and hasattr(event.delta, "text"):
253
+ delta_text = event.delta.text or ""
254
+
255
+ if current_block is not None and current_block.get("type") == "text":
256
+ text_val = current_block.get("text")
257
+ if text_val is not None:
258
+ current_block["text"] = text_val + delta_text
259
+ else:
260
+ current_block["text"] = delta_text
261
+
262
+ return delta_text
263
+
264
+ return None
265
+
266
+
267
+ def handle_anthropic_tool_delta(
268
+ event: Any,
269
+ content_blocks: List[StreamingContentBlock],
270
+ tools_in_progress: Dict[str, ToolInProgress],
271
+ ) -> None:
272
+ """
273
+ Handle tool input delta event from Anthropic streaming.
274
+
275
+ Args:
276
+ event: Tool delta event
277
+ content_blocks: List of content blocks
278
+ tools_in_progress: Dictionary tracking tools being accumulated
279
+ """
280
+
281
+ if not (hasattr(event, "type") and event.type == "content_block_delta"):
282
+ return
283
+
284
+ if not (
285
+ hasattr(event, "delta")
286
+ and hasattr(event.delta, "type")
287
+ and event.delta.type == "input_json_delta"
288
+ ):
289
+ return
290
+
291
+ if hasattr(event, "index") and event.index < len(content_blocks):
292
+ block = content_blocks[event.index]
293
+
294
+ if block.get("type") == "function" and block.get("id") in tools_in_progress:
295
+ tool = tools_in_progress[block["id"]]
296
+ partial_json = getattr(event.delta, "partial_json", "")
297
+ tool["input_string"] += partial_json
298
+
299
+
300
+ def finalize_anthropic_tool_input(
301
+ event: Any,
302
+ content_blocks: List[StreamingContentBlock],
303
+ tools_in_progress: Dict[str, ToolInProgress],
304
+ ) -> None:
305
+ """
306
+ Finalize tool input when content block stops.
307
+
308
+ Args:
309
+ event: Content block stop event
310
+ content_blocks: List of content blocks
311
+ tools_in_progress: Dictionary tracking tools being accumulated
312
+ """
313
+
314
+ if not (hasattr(event, "type") and event.type == "content_block_stop"):
315
+ return
316
+
317
+ if hasattr(event, "index") and event.index < len(content_blocks):
318
+ block = content_blocks[event.index]
319
+
320
+ if block.get("type") == "function" and block.get("id") in tools_in_progress:
321
+ tool = tools_in_progress[block["id"]]
322
+
323
+ try:
324
+ block["function"]["arguments"] = json.loads(tool["input_string"])
325
+ except (json.JSONDecodeError, Exception):
326
+ # Keep empty dict if parsing fails
327
+ pass
328
+
329
+ del tools_in_progress[block["id"]]
330
+
331
+
332
+ def standardize_anthropic_usage(usage: Dict[str, Any]) -> TokenUsage:
333
+ """
334
+ Standardize Anthropic usage statistics to common TokenUsage format.
335
+
336
+ Anthropic already uses standard field names, so this mainly structures the data.
337
+
338
+ Args:
339
+ usage: Raw usage statistics from Anthropic
340
+
341
+ Returns:
342
+ Standardized TokenUsage dict
343
+ """
344
+ return TokenUsage(
345
+ input_tokens=usage.get("input_tokens", 0),
346
+ output_tokens=usage.get("output_tokens", 0),
347
+ cache_read_input_tokens=usage.get("cache_read_input_tokens"),
348
+ cache_creation_input_tokens=usage.get("cache_creation_input_tokens"),
349
+ )
350
+
351
+
352
+ def format_anthropic_streaming_input(kwargs: Dict[str, Any]) -> Any:
353
+ """
354
+ Format Anthropic streaming input using system prompt merging.
355
+
356
+ Args:
357
+ kwargs: Keyword arguments passed to Anthropic API
358
+
359
+ Returns:
360
+ Formatted input ready for PostHog tracking
361
+ """
362
+ from posthoganalytics.ai.utils import merge_system_prompt
363
+
364
+ return merge_system_prompt(kwargs, "anthropic")
365
+
366
+
367
+ def format_anthropic_streaming_output_complete(
368
+ content_blocks: List[StreamingContentBlock], accumulated_content: str
369
+ ) -> List[FormattedMessage]:
370
+ """
371
+ Format complete Anthropic streaming output.
372
+
373
+ Combines existing logic for formatting content blocks with fallback to accumulated content.
374
+
375
+ Args:
376
+ content_blocks: List of content blocks accumulated during streaming
377
+ accumulated_content: Raw accumulated text content as fallback
378
+
379
+ Returns:
380
+ Formatted messages ready for PostHog tracking
381
+ """
382
+ formatted_content = format_anthropic_streaming_content(content_blocks)
383
+
384
+ if formatted_content:
385
+ return [{"role": "assistant", "content": formatted_content}]
386
+ else:
387
+ # Fallback to accumulated content if no blocks
388
+ return [
389
+ {
390
+ "role": "assistant",
391
+ "content": [{"type": "text", "text": accumulated_content}],
392
+ }
393
+ ]
@@ -1,4 +1,9 @@
1
1
  from .gemini import Client
2
+ from .gemini_converter import (
3
+ format_gemini_input,
4
+ format_gemini_response,
5
+ extract_gemini_tools,
6
+ )
2
7
 
3
8
 
4
9
  # Create a genai-like module for perfect drop-in replacement
@@ -8,4 +13,10 @@ class _GenAI:
8
13
 
9
14
  genai = _GenAI()
10
15
 
11
- __all__ = ["Client", "genai"]
16
+ __all__ = [
17
+ "Client",
18
+ "genai",
19
+ "format_gemini_input",
20
+ "format_gemini_response",
21
+ "extract_gemini_tools",
22
+ ]
@@ -13,8 +13,14 @@ except ImportError:
13
13
  from posthoganalytics import setup
14
14
  from posthoganalytics.ai.utils import (
15
15
  call_llm_and_track_usage,
16
- get_model_params,
17
- with_privacy_mode,
16
+ capture_streaming_event,
17
+ merge_usage_stats,
18
+ )
19
+ from posthoganalytics.ai.gemini.gemini_converter import (
20
+ format_gemini_input,
21
+ extract_gemini_usage_from_chunk,
22
+ extract_gemini_content_from_chunk,
23
+ format_gemini_streaming_output,
18
24
  )
19
25
  from posthoganalytics.ai.sanitization import sanitize_gemini
20
26
  from posthoganalytics.client import Client as PostHogClient
@@ -72,6 +78,7 @@ class Client:
72
78
  posthog_groups: Default groups for all calls (can be overridden per call)
73
79
  **kwargs: Additional arguments (for future compatibility)
74
80
  """
81
+
75
82
  self._ph_client = posthog_client or setup()
76
83
 
77
84
  if self._ph_client is None:
@@ -133,6 +140,7 @@ class Models:
133
140
  posthog_groups: Default groups for all calls
134
141
  **kwargs: Additional arguments (for future compatibility)
135
142
  """
143
+
136
144
  self._ph_client = posthog_client or setup()
137
145
 
138
146
  if self._ph_client is None:
@@ -150,14 +158,19 @@ class Models:
150
158
  # Add Vertex AI parameters if provided
151
159
  if vertexai is not None:
152
160
  client_args["vertexai"] = vertexai
161
+
153
162
  if credentials is not None:
154
163
  client_args["credentials"] = credentials
164
+
155
165
  if project is not None:
156
166
  client_args["project"] = project
167
+
157
168
  if location is not None:
158
169
  client_args["location"] = location
170
+
159
171
  if debug_config is not None:
160
172
  client_args["debug_config"] = debug_config
173
+
161
174
  if http_options is not None:
162
175
  client_args["http_options"] = http_options
163
176
 
@@ -175,6 +188,7 @@ class Models:
175
188
  raise ValueError(
176
189
  "API key must be provided either as parameter or via GOOGLE_API_KEY/API_KEY environment variable"
177
190
  )
191
+
178
192
  client_args["api_key"] = api_key
179
193
 
180
194
  self._client = genai.Client(**client_args)
@@ -189,6 +203,7 @@ class Models:
189
203
  call_groups: Optional[Dict[str, Any]],
190
204
  ):
191
205
  """Merge call-level PostHog parameters with client defaults."""
206
+
192
207
  # Use call-level values if provided, otherwise fall back to defaults
193
208
  distinct_id = (
194
209
  call_distinct_id
@@ -204,6 +219,7 @@ class Models:
204
219
 
205
220
  # Merge properties: default properties + call properties (call properties override)
206
221
  properties = dict(self._default_properties)
222
+
207
223
  if call_properties:
208
224
  properties.update(call_properties)
209
225
 
@@ -239,6 +255,7 @@ class Models:
239
255
  posthog_groups: Group analytics properties (overrides client default)
240
256
  **kwargs: Arguments passed to Gemini's generate_content
241
257
  """
258
+
242
259
  # Merge PostHog parameters
243
260
  distinct_id, trace_id, properties, privacy_mode, groups = (
244
261
  self._merge_posthog_params(
@@ -288,25 +305,24 @@ class Models:
288
305
  nonlocal accumulated_content # noqa: F824
289
306
  try:
290
307
  for chunk in response:
291
- if hasattr(chunk, "usage_metadata") and chunk.usage_metadata:
292
- usage_stats = {
293
- "input_tokens": getattr(
294
- chunk.usage_metadata, "prompt_token_count", 0
295
- ),
296
- "output_tokens": getattr(
297
- chunk.usage_metadata, "candidates_token_count", 0
298
- ),
299
- }
300
-
301
- if hasattr(chunk, "text") and chunk.text:
302
- accumulated_content.append(chunk.text)
308
+ # Extract usage stats from chunk
309
+ chunk_usage = extract_gemini_usage_from_chunk(chunk)
310
+
311
+ if chunk_usage:
312
+ # Gemini reports cumulative totals, not incremental values
313
+ merge_usage_stats(usage_stats, chunk_usage, mode="cumulative")
314
+
315
+ # Extract content from chunk (now returns content blocks)
316
+ content_block = extract_gemini_content_from_chunk(chunk)
317
+
318
+ if content_block is not None:
319
+ accumulated_content.append(content_block)
303
320
 
304
321
  yield chunk
305
322
 
306
323
  finally:
307
324
  end_time = time.time()
308
325
  latency = end_time - start_time
309
- output = "".join(accumulated_content)
310
326
 
311
327
  self._capture_streaming_event(
312
328
  model,
@@ -319,7 +335,7 @@ class Models:
319
335
  kwargs,
320
336
  usage_stats,
321
337
  latency,
322
- output,
338
+ accumulated_content,
323
339
  )
324
340
 
325
341
  return generator()
@@ -336,61 +352,38 @@ class Models:
336
352
  kwargs: Dict[str, Any],
337
353
  usage_stats: Dict[str, int],
338
354
  latency: float,
339
- output: str,
355
+ output: Any,
340
356
  ):
341
- if trace_id is None:
342
- trace_id = str(uuid.uuid4())
343
-
344
- event_properties = {
345
- "$ai_provider": "gemini",
346
- "$ai_model": model,
347
- "$ai_model_parameters": get_model_params(kwargs),
348
- "$ai_input": with_privacy_mode(
349
- self._ph_client,
350
- privacy_mode,
351
- sanitize_gemini(self._format_input(contents)),
352
- ),
353
- "$ai_output_choices": with_privacy_mode(
354
- self._ph_client,
355
- privacy_mode,
356
- [{"content": output, "role": "assistant"}],
357
- ),
358
- "$ai_http_status": 200,
359
- "$ai_input_tokens": usage_stats.get("input_tokens", 0),
360
- "$ai_output_tokens": usage_stats.get("output_tokens", 0),
361
- "$ai_latency": latency,
362
- "$ai_trace_id": trace_id,
363
- "$ai_base_url": self._base_url,
364
- **(properties or {}),
365
- }
366
-
367
- if distinct_id is None:
368
- event_properties["$process_person_profile"] = False
369
-
370
- if hasattr(self._ph_client, "capture"):
371
- self._ph_client.capture(
372
- distinct_id=distinct_id,
373
- event="$ai_generation",
374
- properties=event_properties,
375
- groups=groups,
376
- )
357
+ from posthoganalytics.ai.types import StreamingEventData
358
+ from posthoganalytics.ai.gemini.gemini_converter import standardize_gemini_usage
359
+
360
+ # Prepare standardized event data
361
+ formatted_input = self._format_input(contents)
362
+ sanitized_input = sanitize_gemini(formatted_input)
363
+
364
+ event_data = StreamingEventData(
365
+ provider="gemini",
366
+ model=model,
367
+ base_url=self._base_url,
368
+ kwargs=kwargs,
369
+ formatted_input=sanitized_input,
370
+ formatted_output=format_gemini_streaming_output(output),
371
+ usage_stats=standardize_gemini_usage(usage_stats),
372
+ latency=latency,
373
+ distinct_id=distinct_id,
374
+ trace_id=trace_id,
375
+ properties=properties,
376
+ privacy_mode=privacy_mode,
377
+ groups=groups,
378
+ )
379
+
380
+ # Use the common capture function
381
+ capture_streaming_event(self._ph_client, event_data)
377
382
 
378
383
  def _format_input(self, contents):
379
384
  """Format input contents for PostHog tracking"""
380
- if isinstance(contents, str):
381
- return [{"role": "user", "content": contents}]
382
- elif isinstance(contents, list):
383
- formatted = []
384
- for item in contents:
385
- if isinstance(item, str):
386
- formatted.append({"role": "user", "content": item})
387
- elif hasattr(item, "text"):
388
- formatted.append({"role": "user", "content": item.text})
389
- else:
390
- formatted.append({"role": "user", "content": str(item)})
391
- return formatted
392
- else:
393
- return [{"role": "user", "content": str(contents)}]
385
+
386
+ return format_gemini_input(contents)
394
387
 
395
388
  def generate_content_stream(
396
389
  self,