posthoganalytics 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,393 @@
1
+ """
2
+ Anthropic-specific conversion utilities.
3
+
4
+ This module handles the conversion of Anthropic API responses and inputs
5
+ into standardized formats for PostHog tracking.
6
+ """
7
+
8
+ import json
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ from posthoganalytics.ai.types import (
12
+ FormattedContentItem,
13
+ FormattedFunctionCall,
14
+ FormattedMessage,
15
+ FormattedTextContent,
16
+ StreamingContentBlock,
17
+ StreamingUsageStats,
18
+ TokenUsage,
19
+ ToolInProgress,
20
+ )
21
+
22
+
23
+ def format_anthropic_response(response: Any) -> List[FormattedMessage]:
24
+ """
25
+ Format an Anthropic response into standardized message format.
26
+
27
+ Args:
28
+ response: The response object from Anthropic API
29
+
30
+ Returns:
31
+ List of formatted messages with role and content
32
+ """
33
+
34
+ output: List[FormattedMessage] = []
35
+
36
+ if response is None:
37
+ return output
38
+
39
+ content: List[FormattedContentItem] = []
40
+
41
+ # Process content blocks from the response
42
+ if hasattr(response, "content"):
43
+ for choice in response.content:
44
+ if (
45
+ hasattr(choice, "type")
46
+ and choice.type == "text"
47
+ and hasattr(choice, "text")
48
+ and choice.text
49
+ ):
50
+ text_content: FormattedTextContent = {
51
+ "type": "text",
52
+ "text": choice.text,
53
+ }
54
+ content.append(text_content)
55
+
56
+ elif (
57
+ hasattr(choice, "type")
58
+ and choice.type == "tool_use"
59
+ and hasattr(choice, "name")
60
+ and hasattr(choice, "id")
61
+ ):
62
+ function_call: FormattedFunctionCall = {
63
+ "type": "function",
64
+ "id": choice.id,
65
+ "function": {
66
+ "name": choice.name,
67
+ "arguments": getattr(choice, "input", {}),
68
+ },
69
+ }
70
+ content.append(function_call)
71
+
72
+ if content:
73
+ message: FormattedMessage = {
74
+ "role": "assistant",
75
+ "content": content,
76
+ }
77
+ output.append(message)
78
+
79
+ return output
80
+
81
+
82
+ def format_anthropic_input(
83
+ messages: List[Dict[str, Any]], system: Optional[str] = None
84
+ ) -> List[FormattedMessage]:
85
+ """
86
+ Format Anthropic input messages with optional system prompt.
87
+
88
+ Args:
89
+ messages: List of message dictionaries
90
+ system: Optional system prompt to prepend
91
+
92
+ Returns:
93
+ List of formatted messages
94
+ """
95
+
96
+ formatted_messages: List[FormattedMessage] = []
97
+
98
+ # Add system message if provided
99
+ if system is not None:
100
+ formatted_messages.append({"role": "system", "content": system})
101
+
102
+ # Add user messages
103
+ if messages:
104
+ for msg in messages:
105
+ # Messages are already in the correct format, just ensure type safety
106
+ formatted_msg: FormattedMessage = {
107
+ "role": msg.get("role", "user"),
108
+ "content": msg.get("content", ""),
109
+ }
110
+ formatted_messages.append(formatted_msg)
111
+
112
+ return formatted_messages
113
+
114
+
115
+ def extract_anthropic_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
116
+ """
117
+ Extract tool definitions from Anthropic API kwargs.
118
+
119
+ Args:
120
+ kwargs: Keyword arguments passed to Anthropic API
121
+
122
+ Returns:
123
+ Tool definitions if present, None otherwise
124
+ """
125
+
126
+ return kwargs.get("tools", None)
127
+
128
+
129
+ def format_anthropic_streaming_content(
130
+ content_blocks: List[StreamingContentBlock],
131
+ ) -> List[FormattedContentItem]:
132
+ """
133
+ Format content blocks from Anthropic streaming response.
134
+
135
+ Used by streaming handlers to format accumulated content blocks.
136
+
137
+ Args:
138
+ content_blocks: List of content block dictionaries from streaming
139
+
140
+ Returns:
141
+ List of formatted content items
142
+ """
143
+
144
+ formatted: List[FormattedContentItem] = []
145
+
146
+ for block in content_blocks:
147
+ if block.get("type") == "text":
148
+ formatted.append(
149
+ {
150
+ "type": "text",
151
+ "text": block.get("text") or "",
152
+ }
153
+ )
154
+
155
+ elif block.get("type") == "function":
156
+ formatted.append(
157
+ {
158
+ "type": "function",
159
+ "id": block.get("id"),
160
+ "function": block.get("function") or {},
161
+ }
162
+ )
163
+
164
+ return formatted
165
+
166
+
167
+ def extract_anthropic_usage_from_event(event: Any) -> StreamingUsageStats:
168
+ """
169
+ Extract usage statistics from an Anthropic streaming event.
170
+
171
+ Args:
172
+ event: Streaming event from Anthropic API
173
+
174
+ Returns:
175
+ Dictionary of usage statistics
176
+ """
177
+
178
+ usage: StreamingUsageStats = {}
179
+
180
+ # Handle usage stats from message_start event
181
+ if hasattr(event, "type") and event.type == "message_start":
182
+ if hasattr(event, "message") and hasattr(event.message, "usage"):
183
+ usage["input_tokens"] = getattr(event.message.usage, "input_tokens", 0)
184
+ usage["cache_creation_input_tokens"] = getattr(
185
+ event.message.usage, "cache_creation_input_tokens", 0
186
+ )
187
+ usage["cache_read_input_tokens"] = getattr(
188
+ event.message.usage, "cache_read_input_tokens", 0
189
+ )
190
+
191
+ # Handle usage stats from message_delta event
192
+ if hasattr(event, "usage") and event.usage:
193
+ usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
194
+
195
+ return usage
196
+
197
+
198
+ def handle_anthropic_content_block_start(
199
+ event: Any,
200
+ ) -> Tuple[Optional[StreamingContentBlock], Optional[ToolInProgress]]:
201
+ """
202
+ Handle content block start event from Anthropic streaming.
203
+
204
+ Args:
205
+ event: Content block start event
206
+
207
+ Returns:
208
+ Tuple of (content_block, tool_in_progress)
209
+ """
210
+
211
+ if not (hasattr(event, "type") and event.type == "content_block_start"):
212
+ return None, None
213
+
214
+ if not hasattr(event, "content_block"):
215
+ return None, None
216
+
217
+ block = event.content_block
218
+
219
+ if not hasattr(block, "type"):
220
+ return None, None
221
+
222
+ if block.type == "text":
223
+ content_block: StreamingContentBlock = {"type": "text", "text": ""}
224
+ return content_block, None
225
+
226
+ elif block.type == "tool_use":
227
+ tool_block: StreamingContentBlock = {
228
+ "type": "function",
229
+ "id": getattr(block, "id", ""),
230
+ "function": {"name": getattr(block, "name", ""), "arguments": {}},
231
+ }
232
+ tool_in_progress: ToolInProgress = {"block": tool_block, "input_string": ""}
233
+ return tool_block, tool_in_progress
234
+
235
+ return None, None
236
+
237
+
238
+ def handle_anthropic_text_delta(
239
+ event: Any, current_block: Optional[StreamingContentBlock]
240
+ ) -> Optional[str]:
241
+ """
242
+ Handle text delta event from Anthropic streaming.
243
+
244
+ Args:
245
+ event: Delta event
246
+ current_block: Current text block being accumulated
247
+
248
+ Returns:
249
+ Text delta if present
250
+ """
251
+
252
+ if hasattr(event, "delta") and hasattr(event.delta, "text"):
253
+ delta_text = event.delta.text or ""
254
+
255
+ if current_block is not None and current_block.get("type") == "text":
256
+ text_val = current_block.get("text")
257
+ if text_val is not None:
258
+ current_block["text"] = text_val + delta_text
259
+ else:
260
+ current_block["text"] = delta_text
261
+
262
+ return delta_text
263
+
264
+ return None
265
+
266
+
267
+ def handle_anthropic_tool_delta(
268
+ event: Any,
269
+ content_blocks: List[StreamingContentBlock],
270
+ tools_in_progress: Dict[str, ToolInProgress],
271
+ ) -> None:
272
+ """
273
+ Handle tool input delta event from Anthropic streaming.
274
+
275
+ Args:
276
+ event: Tool delta event
277
+ content_blocks: List of content blocks
278
+ tools_in_progress: Dictionary tracking tools being accumulated
279
+ """
280
+
281
+ if not (hasattr(event, "type") and event.type == "content_block_delta"):
282
+ return
283
+
284
+ if not (
285
+ hasattr(event, "delta")
286
+ and hasattr(event.delta, "type")
287
+ and event.delta.type == "input_json_delta"
288
+ ):
289
+ return
290
+
291
+ if hasattr(event, "index") and event.index < len(content_blocks):
292
+ block = content_blocks[event.index]
293
+
294
+ if block.get("type") == "function" and block.get("id") in tools_in_progress:
295
+ tool = tools_in_progress[block["id"]]
296
+ partial_json = getattr(event.delta, "partial_json", "")
297
+ tool["input_string"] += partial_json
298
+
299
+
300
+ def finalize_anthropic_tool_input(
301
+ event: Any,
302
+ content_blocks: List[StreamingContentBlock],
303
+ tools_in_progress: Dict[str, ToolInProgress],
304
+ ) -> None:
305
+ """
306
+ Finalize tool input when content block stops.
307
+
308
+ Args:
309
+ event: Content block stop event
310
+ content_blocks: List of content blocks
311
+ tools_in_progress: Dictionary tracking tools being accumulated
312
+ """
313
+
314
+ if not (hasattr(event, "type") and event.type == "content_block_stop"):
315
+ return
316
+
317
+ if hasattr(event, "index") and event.index < len(content_blocks):
318
+ block = content_blocks[event.index]
319
+
320
+ if block.get("type") == "function" and block.get("id") in tools_in_progress:
321
+ tool = tools_in_progress[block["id"]]
322
+
323
+ try:
324
+ block["function"]["arguments"] = json.loads(tool["input_string"])
325
+ except (json.JSONDecodeError, Exception):
326
+ # Keep empty dict if parsing fails
327
+ pass
328
+
329
+ del tools_in_progress[block["id"]]
330
+
331
+
332
+ def standardize_anthropic_usage(usage: Dict[str, Any]) -> TokenUsage:
333
+ """
334
+ Standardize Anthropic usage statistics to common TokenUsage format.
335
+
336
+ Anthropic already uses standard field names, so this mainly structures the data.
337
+
338
+ Args:
339
+ usage: Raw usage statistics from Anthropic
340
+
341
+ Returns:
342
+ Standardized TokenUsage dict
343
+ """
344
+ return TokenUsage(
345
+ input_tokens=usage.get("input_tokens", 0),
346
+ output_tokens=usage.get("output_tokens", 0),
347
+ cache_read_input_tokens=usage.get("cache_read_input_tokens"),
348
+ cache_creation_input_tokens=usage.get("cache_creation_input_tokens"),
349
+ )
350
+
351
+
352
+ def format_anthropic_streaming_input(kwargs: Dict[str, Any]) -> Any:
353
+ """
354
+ Format Anthropic streaming input using system prompt merging.
355
+
356
+ Args:
357
+ kwargs: Keyword arguments passed to Anthropic API
358
+
359
+ Returns:
360
+ Formatted input ready for PostHog tracking
361
+ """
362
+ from posthoganalytics.ai.utils import merge_system_prompt
363
+
364
+ return merge_system_prompt(kwargs, "anthropic")
365
+
366
+
367
+ def format_anthropic_streaming_output_complete(
368
+ content_blocks: List[StreamingContentBlock], accumulated_content: str
369
+ ) -> List[FormattedMessage]:
370
+ """
371
+ Format complete Anthropic streaming output.
372
+
373
+ Combines existing logic for formatting content blocks with fallback to accumulated content.
374
+
375
+ Args:
376
+ content_blocks: List of content blocks accumulated during streaming
377
+ accumulated_content: Raw accumulated text content as fallback
378
+
379
+ Returns:
380
+ Formatted messages ready for PostHog tracking
381
+ """
382
+ formatted_content = format_anthropic_streaming_content(content_blocks)
383
+
384
+ if formatted_content:
385
+ return [{"role": "assistant", "content": formatted_content}]
386
+ else:
387
+ # Fallback to accumulated content if no blocks
388
+ return [
389
+ {
390
+ "role": "assistant",
391
+ "content": [{"type": "text", "text": accumulated_content}],
392
+ }
393
+ ]
@@ -1,4 +1,9 @@
1
1
  from .gemini import Client
2
+ from .gemini_converter import (
3
+ format_gemini_input,
4
+ format_gemini_response,
5
+ extract_gemini_tools,
6
+ )
2
7
 
3
8
 
4
9
  # Create a genai-like module for perfect drop-in replacement
@@ -8,4 +13,10 @@ class _GenAI:
8
13
 
9
14
  genai = _GenAI()
10
15
 
11
- __all__ = ["Client", "genai"]
16
+ __all__ = [
17
+ "Client",
18
+ "genai",
19
+ "format_gemini_input",
20
+ "format_gemini_response",
21
+ "extract_gemini_tools",
22
+ ]
@@ -13,9 +13,16 @@ except ImportError:
13
13
  from posthoganalytics import setup
14
14
  from posthoganalytics.ai.utils import (
15
15
  call_llm_and_track_usage,
16
- get_model_params,
17
- with_privacy_mode,
16
+ capture_streaming_event,
17
+ merge_usage_stats,
18
18
  )
19
+ from posthoganalytics.ai.gemini.gemini_converter import (
20
+ format_gemini_input,
21
+ extract_gemini_usage_from_chunk,
22
+ extract_gemini_content_from_chunk,
23
+ format_gemini_streaming_output,
24
+ )
25
+ from posthoganalytics.ai.sanitization import sanitize_gemini
19
26
  from posthoganalytics.client import Client as PostHogClient
20
27
 
21
28
 
@@ -71,6 +78,7 @@ class Client:
71
78
  posthog_groups: Default groups for all calls (can be overridden per call)
72
79
  **kwargs: Additional arguments (for future compatibility)
73
80
  """
81
+
74
82
  self._ph_client = posthog_client or setup()
75
83
 
76
84
  if self._ph_client is None:
@@ -132,6 +140,7 @@ class Models:
132
140
  posthog_groups: Default groups for all calls
133
141
  **kwargs: Additional arguments (for future compatibility)
134
142
  """
143
+
135
144
  self._ph_client = posthog_client or setup()
136
145
 
137
146
  if self._ph_client is None:
@@ -149,14 +158,19 @@ class Models:
149
158
  # Add Vertex AI parameters if provided
150
159
  if vertexai is not None:
151
160
  client_args["vertexai"] = vertexai
161
+
152
162
  if credentials is not None:
153
163
  client_args["credentials"] = credentials
164
+
154
165
  if project is not None:
155
166
  client_args["project"] = project
167
+
156
168
  if location is not None:
157
169
  client_args["location"] = location
170
+
158
171
  if debug_config is not None:
159
172
  client_args["debug_config"] = debug_config
173
+
160
174
  if http_options is not None:
161
175
  client_args["http_options"] = http_options
162
176
 
@@ -174,6 +188,7 @@ class Models:
174
188
  raise ValueError(
175
189
  "API key must be provided either as parameter or via GOOGLE_API_KEY/API_KEY environment variable"
176
190
  )
191
+
177
192
  client_args["api_key"] = api_key
178
193
 
179
194
  self._client = genai.Client(**client_args)
@@ -188,6 +203,7 @@ class Models:
188
203
  call_groups: Optional[Dict[str, Any]],
189
204
  ):
190
205
  """Merge call-level PostHog parameters with client defaults."""
206
+
191
207
  # Use call-level values if provided, otherwise fall back to defaults
192
208
  distinct_id = (
193
209
  call_distinct_id
@@ -203,6 +219,7 @@ class Models:
203
219
 
204
220
  # Merge properties: default properties + call properties (call properties override)
205
221
  properties = dict(self._default_properties)
222
+
206
223
  if call_properties:
207
224
  properties.update(call_properties)
208
225
 
@@ -238,6 +255,7 @@ class Models:
238
255
  posthog_groups: Group analytics properties (overrides client default)
239
256
  **kwargs: Arguments passed to Gemini's generate_content
240
257
  """
258
+
241
259
  # Merge PostHog parameters
242
260
  distinct_id, trace_id, properties, privacy_mode, groups = (
243
261
  self._merge_posthog_params(
@@ -287,25 +305,24 @@ class Models:
287
305
  nonlocal accumulated_content # noqa: F824
288
306
  try:
289
307
  for chunk in response:
290
- if hasattr(chunk, "usage_metadata") and chunk.usage_metadata:
291
- usage_stats = {
292
- "input_tokens": getattr(
293
- chunk.usage_metadata, "prompt_token_count", 0
294
- ),
295
- "output_tokens": getattr(
296
- chunk.usage_metadata, "candidates_token_count", 0
297
- ),
298
- }
299
-
300
- if hasattr(chunk, "text") and chunk.text:
301
- accumulated_content.append(chunk.text)
308
+ # Extract usage stats from chunk
309
+ chunk_usage = extract_gemini_usage_from_chunk(chunk)
310
+
311
+ if chunk_usage:
312
+ # Gemini reports cumulative totals, not incremental values
313
+ merge_usage_stats(usage_stats, chunk_usage, mode="cumulative")
314
+
315
+ # Extract content from chunk (now returns content blocks)
316
+ content_block = extract_gemini_content_from_chunk(chunk)
317
+
318
+ if content_block is not None:
319
+ accumulated_content.append(content_block)
302
320
 
303
321
  yield chunk
304
322
 
305
323
  finally:
306
324
  end_time = time.time()
307
325
  latency = end_time - start_time
308
- output = "".join(accumulated_content)
309
326
 
310
327
  self._capture_streaming_event(
311
328
  model,
@@ -318,7 +335,7 @@ class Models:
318
335
  kwargs,
319
336
  usage_stats,
320
337
  latency,
321
- output,
338
+ accumulated_content,
322
339
  )
323
340
 
324
341
  return generator()
@@ -335,61 +352,38 @@ class Models:
335
352
  kwargs: Dict[str, Any],
336
353
  usage_stats: Dict[str, int],
337
354
  latency: float,
338
- output: str,
355
+ output: Any,
339
356
  ):
340
- if trace_id is None:
341
- trace_id = str(uuid.uuid4())
342
-
343
- event_properties = {
344
- "$ai_provider": "gemini",
345
- "$ai_model": model,
346
- "$ai_model_parameters": get_model_params(kwargs),
347
- "$ai_input": with_privacy_mode(
348
- self._ph_client,
349
- privacy_mode,
350
- self._format_input(contents),
351
- ),
352
- "$ai_output_choices": with_privacy_mode(
353
- self._ph_client,
354
- privacy_mode,
355
- [{"content": output, "role": "assistant"}],
356
- ),
357
- "$ai_http_status": 200,
358
- "$ai_input_tokens": usage_stats.get("input_tokens", 0),
359
- "$ai_output_tokens": usage_stats.get("output_tokens", 0),
360
- "$ai_latency": latency,
361
- "$ai_trace_id": trace_id,
362
- "$ai_base_url": self._base_url,
363
- **(properties or {}),
364
- }
365
-
366
- if distinct_id is None:
367
- event_properties["$process_person_profile"] = False
368
-
369
- if hasattr(self._ph_client, "capture"):
370
- self._ph_client.capture(
371
- distinct_id=distinct_id,
372
- event="$ai_generation",
373
- properties=event_properties,
374
- groups=groups,
375
- )
357
+ from posthoganalytics.ai.types import StreamingEventData
358
+ from posthoganalytics.ai.gemini.gemini_converter import standardize_gemini_usage
359
+
360
+ # Prepare standardized event data
361
+ formatted_input = self._format_input(contents)
362
+ sanitized_input = sanitize_gemini(formatted_input)
363
+
364
+ event_data = StreamingEventData(
365
+ provider="gemini",
366
+ model=model,
367
+ base_url=self._base_url,
368
+ kwargs=kwargs,
369
+ formatted_input=sanitized_input,
370
+ formatted_output=format_gemini_streaming_output(output),
371
+ usage_stats=standardize_gemini_usage(usage_stats),
372
+ latency=latency,
373
+ distinct_id=distinct_id,
374
+ trace_id=trace_id,
375
+ properties=properties,
376
+ privacy_mode=privacy_mode,
377
+ groups=groups,
378
+ )
379
+
380
+ # Use the common capture function
381
+ capture_streaming_event(self._ph_client, event_data)
376
382
 
377
383
  def _format_input(self, contents):
378
384
  """Format input contents for PostHog tracking"""
379
- if isinstance(contents, str):
380
- return [{"role": "user", "content": contents}]
381
- elif isinstance(contents, list):
382
- formatted = []
383
- for item in contents:
384
- if isinstance(item, str):
385
- formatted.append({"role": "user", "content": item})
386
- elif hasattr(item, "text"):
387
- formatted.append({"role": "user", "content": item.text})
388
- else:
389
- formatted.append({"role": "user", "content": str(item)})
390
- return formatted
391
- else:
392
- return [{"role": "user", "content": str(contents)}]
385
+
386
+ return format_gemini_input(contents)
393
387
 
394
388
  def generate_content_stream(
395
389
  self,