posthoganalytics 6.6.1__py3-none-any.whl → 6.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ except ImportError:
8
8
 
9
9
  import time
10
10
  import uuid
11
- from typing import Any, Dict, Optional, cast
11
+ from typing import Any, Dict, Optional
12
12
 
13
13
  from posthoganalytics.ai.utils import (
14
14
  call_llm_and_track_usage,
@@ -16,6 +16,7 @@ from posthoganalytics.ai.utils import (
16
16
  merge_system_prompt,
17
17
  with_privacy_mode,
18
18
  )
19
+ from posthoganalytics.ai.sanitization import sanitize_anthropic
19
20
  from posthoganalytics.client import Client as PostHogClient
20
21
  from posthoganalytics import setup
21
22
 
@@ -184,7 +185,7 @@ class WrappedMessages(Messages):
184
185
  "$ai_input": with_privacy_mode(
185
186
  self._client._ph_client,
186
187
  posthog_privacy_mode,
187
- merge_system_prompt(kwargs, "anthropic"),
188
+ sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
188
189
  ),
189
190
  "$ai_output_choices": with_privacy_mode(
190
191
  self._client._ph_client,
@@ -17,6 +17,7 @@ from posthoganalytics.ai.utils import (
17
17
  merge_system_prompt,
18
18
  with_privacy_mode,
19
19
  )
20
+ from posthoganalytics.ai.sanitization import sanitize_anthropic
20
21
  from posthoganalytics.client import Client as PostHogClient
21
22
 
22
23
 
@@ -184,7 +185,7 @@ class AsyncWrappedMessages(AsyncMessages):
184
185
  "$ai_input": with_privacy_mode(
185
186
  self._client._ph_client,
186
187
  posthog_privacy_mode,
187
- merge_system_prompt(kwargs, "anthropic"),
188
+ sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
188
189
  ),
189
190
  "$ai_output_choices": with_privacy_mode(
190
191
  self._client._ph_client,
@@ -16,6 +16,7 @@ from posthoganalytics.ai.utils import (
16
16
  get_model_params,
17
17
  with_privacy_mode,
18
18
  )
19
+ from posthoganalytics.ai.sanitization import sanitize_gemini
19
20
  from posthoganalytics.client import Client as PostHogClient
20
21
 
21
22
 
@@ -347,7 +348,7 @@ class Models:
347
348
  "$ai_input": with_privacy_mode(
348
349
  self._ph_client,
349
350
  privacy_mode,
350
- self._format_input(contents),
351
+ sanitize_gemini(self._format_input(contents)),
351
352
  ),
352
353
  "$ai_output_choices": with_privacy_mode(
353
354
  self._ph_client,
@@ -37,6 +37,7 @@ from pydantic import BaseModel
37
37
 
38
38
  from posthoganalytics import setup
39
39
  from posthoganalytics.ai.utils import get_model_params, with_privacy_mode
40
+ from posthoganalytics.ai.sanitization import sanitize_langchain
40
41
  from posthoganalytics.client import Client
41
42
 
42
43
  log = logging.getLogger("posthog")
@@ -480,7 +481,7 @@ class CallbackHandler(BaseCallbackHandler):
480
481
  event_properties = {
481
482
  "$ai_trace_id": trace_id,
482
483
  "$ai_input_state": with_privacy_mode(
483
- self._ph_client, self._privacy_mode, run.input
484
+ self._ph_client, self._privacy_mode, sanitize_langchain(run.input)
484
485
  ),
485
486
  "$ai_latency": run.latency,
486
487
  "$ai_span_name": run.name,
@@ -550,7 +551,7 @@ class CallbackHandler(BaseCallbackHandler):
550
551
  "$ai_model": run.model,
551
552
  "$ai_model_parameters": run.model_params,
552
553
  "$ai_input": with_privacy_mode(
553
- self._ph_client, self._privacy_mode, run.input
554
+ self._ph_client, self._privacy_mode, sanitize_langchain(run.input)
554
555
  ),
555
556
  "$ai_http_status": 200,
556
557
  "$ai_latency": run.latency,
@@ -15,6 +15,7 @@ from posthoganalytics.ai.utils import (
15
15
  get_model_params,
16
16
  with_privacy_mode,
17
17
  )
18
+ from posthoganalytics.ai.sanitization import sanitize_openai, sanitize_openai_response
18
19
  from posthoganalytics.client import Client as PostHogClient
19
20
  from posthoganalytics import setup
20
21
 
@@ -194,7 +195,9 @@ class WrappedResponses:
194
195
  "$ai_model": kwargs.get("model"),
195
196
  "$ai_model_parameters": get_model_params(kwargs),
196
197
  "$ai_input": with_privacy_mode(
197
- self._client._ph_client, posthog_privacy_mode, kwargs.get("input")
198
+ self._client._ph_client,
199
+ posthog_privacy_mode,
200
+ sanitize_openai_response(kwargs.get("input")),
198
201
  ),
199
202
  "$ai_output_choices": with_privacy_mode(
200
203
  self._client._ph_client,
@@ -427,7 +430,9 @@ class WrappedCompletions:
427
430
  "$ai_model": kwargs.get("model"),
428
431
  "$ai_model_parameters": get_model_params(kwargs),
429
432
  "$ai_input": with_privacy_mode(
430
- self._client._ph_client, posthog_privacy_mode, kwargs.get("messages")
433
+ self._client._ph_client,
434
+ posthog_privacy_mode,
435
+ sanitize_openai(kwargs.get("messages")),
431
436
  ),
432
437
  "$ai_output_choices": with_privacy_mode(
433
438
  self._client._ph_client,
@@ -518,7 +523,9 @@ class WrappedEmbeddings:
518
523
  "$ai_provider": "openai",
519
524
  "$ai_model": kwargs.get("model"),
520
525
  "$ai_input": with_privacy_mode(
521
- self._client._ph_client, posthog_privacy_mode, kwargs.get("input")
526
+ self._client._ph_client,
527
+ posthog_privacy_mode,
528
+ sanitize_openai_response(kwargs.get("input")),
522
529
  ),
523
530
  "$ai_http_status": 200,
524
531
  "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
@@ -1,6 +1,6 @@
1
1
  import time
2
2
  import uuid
3
- from typing import Any, Dict, List, Optional, cast
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
5
  try:
6
6
  import openai
@@ -16,6 +16,7 @@ from posthoganalytics.ai.utils import (
16
16
  get_model_params,
17
17
  with_privacy_mode,
18
18
  )
19
+ from posthoganalytics.ai.sanitization import sanitize_openai, sanitize_openai_response
19
20
  from posthoganalytics.client import Client as PostHogClient
20
21
 
21
22
 
@@ -195,7 +196,9 @@ class WrappedResponses:
195
196
  "$ai_model": kwargs.get("model"),
196
197
  "$ai_model_parameters": get_model_params(kwargs),
197
198
  "$ai_input": with_privacy_mode(
198
- self._client._ph_client, posthog_privacy_mode, kwargs.get("input")
199
+ self._client._ph_client,
200
+ posthog_privacy_mode,
201
+ sanitize_openai_response(kwargs.get("input")),
199
202
  ),
200
203
  "$ai_output_choices": with_privacy_mode(
201
204
  self._client._ph_client,
@@ -431,7 +434,9 @@ class WrappedCompletions:
431
434
  "$ai_model": kwargs.get("model"),
432
435
  "$ai_model_parameters": get_model_params(kwargs),
433
436
  "$ai_input": with_privacy_mode(
434
- self._client._ph_client, posthog_privacy_mode, kwargs.get("messages")
437
+ self._client._ph_client,
438
+ posthog_privacy_mode,
439
+ sanitize_openai(kwargs.get("messages")),
435
440
  ),
436
441
  "$ai_output_choices": with_privacy_mode(
437
442
  self._client._ph_client,
@@ -522,7 +527,9 @@ class WrappedEmbeddings:
522
527
  "$ai_provider": "openai",
523
528
  "$ai_model": kwargs.get("model"),
524
529
  "$ai_input": with_privacy_mode(
525
- self._client._ph_client, posthog_privacy_mode, kwargs.get("input")
530
+ self._client._ph_client,
531
+ posthog_privacy_mode,
532
+ sanitize_openai_response(kwargs.get("input")),
526
533
  ),
527
534
  "$ai_http_status": 200,
528
535
  "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
@@ -0,0 +1,226 @@
1
+ import re
2
+ from typing import Any
3
+ from urllib.parse import urlparse
4
+
5
+ REDACTED_IMAGE_PLACEHOLDER = "[base64 image redacted]"
6
+
7
+
8
+ def is_base64_data_url(text: str) -> bool:
9
+ return re.match(r"^data:([^;]+);base64,", text) is not None
10
+
11
+
12
+ def is_valid_url(text: str) -> bool:
13
+ try:
14
+ result = urlparse(text)
15
+ return bool(result.scheme and result.netloc)
16
+ except Exception:
17
+ pass
18
+
19
+ return text.startswith(("/", "./", "../"))
20
+
21
+
22
+ def is_raw_base64(text: str) -> bool:
23
+ if is_valid_url(text):
24
+ return False
25
+
26
+ return len(text) > 20 and re.match(r"^[A-Za-z0-9+/]+=*$", text) is not None
27
+
28
+
29
+ def redact_base64_data_url(value: Any) -> Any:
30
+ if not isinstance(value, str):
31
+ return value
32
+
33
+ if is_base64_data_url(value):
34
+ return REDACTED_IMAGE_PLACEHOLDER
35
+
36
+ if is_raw_base64(value):
37
+ return REDACTED_IMAGE_PLACEHOLDER
38
+
39
+ return value
40
+
41
+
42
+ def process_messages(messages: Any, transform_content_func) -> Any:
43
+ if not messages:
44
+ return messages
45
+
46
+ def process_content(content: Any) -> Any:
47
+ if isinstance(content, str):
48
+ return content
49
+
50
+ if not content:
51
+ return content
52
+
53
+ if isinstance(content, list):
54
+ return [transform_content_func(item) for item in content]
55
+
56
+ return transform_content_func(content)
57
+
58
+ def process_message(msg: Any) -> Any:
59
+ if not isinstance(msg, dict) or "content" not in msg:
60
+ return msg
61
+ return {**msg, "content": process_content(msg["content"])}
62
+
63
+ if isinstance(messages, list):
64
+ return [process_message(msg) for msg in messages]
65
+
66
+ return process_message(messages)
67
+
68
+
69
+ def sanitize_openai_image(item: Any) -> Any:
70
+ if not isinstance(item, dict):
71
+ return item
72
+
73
+ if (
74
+ item.get("type") == "image_url"
75
+ and isinstance(item.get("image_url"), dict)
76
+ and "url" in item["image_url"]
77
+ ):
78
+ return {
79
+ **item,
80
+ "image_url": {
81
+ **item["image_url"],
82
+ "url": redact_base64_data_url(item["image_url"]["url"]),
83
+ },
84
+ }
85
+
86
+ return item
87
+
88
+
89
+ def sanitize_openai_response_image(item: Any) -> Any:
90
+ if not isinstance(item, dict):
91
+ return item
92
+
93
+ if item.get("type") == "input_image" and "image_url" in item:
94
+ return {
95
+ **item,
96
+ "image_url": redact_base64_data_url(item["image_url"]),
97
+ }
98
+
99
+ return item
100
+
101
+
102
+ def sanitize_anthropic_image(item: Any) -> Any:
103
+ if not isinstance(item, dict):
104
+ return item
105
+
106
+ if (
107
+ item.get("type") == "image"
108
+ and isinstance(item.get("source"), dict)
109
+ and item["source"].get("type") == "base64"
110
+ and "data" in item["source"]
111
+ ):
112
+ # For Anthropic, if the source type is "base64", we should always redact the data
113
+ # The provider is explicitly telling us this is base64 data
114
+ return {
115
+ **item,
116
+ "source": {
117
+ **item["source"],
118
+ "data": REDACTED_IMAGE_PLACEHOLDER,
119
+ },
120
+ }
121
+
122
+ return item
123
+
124
+
125
+ def sanitize_gemini_part(part: Any) -> Any:
126
+ if not isinstance(part, dict):
127
+ return part
128
+
129
+ if (
130
+ "inline_data" in part
131
+ and isinstance(part["inline_data"], dict)
132
+ and "data" in part["inline_data"]
133
+ ):
134
+ # For Gemini, the inline_data structure indicates base64 data
135
+ # We should redact any string data in this context
136
+ return {
137
+ **part,
138
+ "inline_data": {
139
+ **part["inline_data"],
140
+ "data": REDACTED_IMAGE_PLACEHOLDER,
141
+ },
142
+ }
143
+
144
+ return part
145
+
146
+
147
+ def process_gemini_item(item: Any) -> Any:
148
+ if not isinstance(item, dict):
149
+ return item
150
+
151
+ if "parts" in item and item["parts"]:
152
+ parts = item["parts"]
153
+ if isinstance(parts, list):
154
+ parts = [sanitize_gemini_part(part) for part in parts]
155
+ else:
156
+ parts = sanitize_gemini_part(parts)
157
+
158
+ return {**item, "parts": parts}
159
+
160
+ return item
161
+
162
+
163
+ def sanitize_langchain_image(item: Any) -> Any:
164
+ if not isinstance(item, dict):
165
+ return item
166
+
167
+ if (
168
+ item.get("type") == "image_url"
169
+ and isinstance(item.get("image_url"), dict)
170
+ and "url" in item["image_url"]
171
+ ):
172
+ return {
173
+ **item,
174
+ "image_url": {
175
+ **item["image_url"],
176
+ "url": redact_base64_data_url(item["image_url"]["url"]),
177
+ },
178
+ }
179
+
180
+ if item.get("type") == "image" and "data" in item:
181
+ return {**item, "data": redact_base64_data_url(item["data"])}
182
+
183
+ if (
184
+ item.get("type") == "image"
185
+ and isinstance(item.get("source"), dict)
186
+ and "data" in item["source"]
187
+ ):
188
+ # Anthropic style - raw base64 in structured format, always redact
189
+ return {
190
+ **item,
191
+ "source": {
192
+ **item["source"],
193
+ "data": REDACTED_IMAGE_PLACEHOLDER,
194
+ },
195
+ }
196
+
197
+ if item.get("type") == "media" and "data" in item:
198
+ return {**item, "data": redact_base64_data_url(item["data"])}
199
+
200
+ return item
201
+
202
+
203
+ def sanitize_openai(data: Any) -> Any:
204
+ return process_messages(data, sanitize_openai_image)
205
+
206
+
207
+ def sanitize_openai_response(data: Any) -> Any:
208
+ return process_messages(data, sanitize_openai_response_image)
209
+
210
+
211
+ def sanitize_anthropic(data: Any) -> Any:
212
+ return process_messages(data, sanitize_anthropic_image)
213
+
214
+
215
+ def sanitize_gemini(data: Any) -> Any:
216
+ if not data:
217
+ return data
218
+
219
+ if isinstance(data, list):
220
+ return [process_gemini_item(item) for item in data]
221
+
222
+ return process_gemini_item(data)
223
+
224
+
225
+ def sanitize_langchain(data: Any) -> Any:
226
+ return process_messages(data, sanitize_langchain_image)
@@ -5,6 +5,12 @@ from typing import Any, Callable, Dict, List, Optional
5
5
  from httpx import URL
6
6
 
7
7
  from posthoganalytics.client import Client as PostHogClient
8
+ from posthoganalytics.ai.sanitization import (
9
+ sanitize_openai,
10
+ sanitize_anthropic,
11
+ sanitize_gemini,
12
+ sanitize_langchain,
13
+ )
8
14
 
9
15
 
10
16
  def get_model_params(kwargs: Dict[str, Any]) -> Dict[str, Any]:
@@ -422,12 +428,15 @@ def call_llm_and_track_usage(
422
428
  usage = get_usage(response, provider)
423
429
 
424
430
  messages = merge_system_prompt(kwargs, provider)
431
+ sanitized_messages = sanitize_messages(messages, provider)
425
432
 
426
433
  event_properties = {
427
434
  "$ai_provider": provider,
428
435
  "$ai_model": kwargs.get("model"),
429
436
  "$ai_model_parameters": get_model_params(kwargs),
430
- "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, messages),
437
+ "$ai_input": with_privacy_mode(
438
+ ph_client, posthog_privacy_mode, sanitized_messages
439
+ ),
431
440
  "$ai_output_choices": with_privacy_mode(
432
441
  ph_client, posthog_privacy_mode, format_response(response, provider)
433
442
  ),
@@ -536,12 +545,15 @@ async def call_llm_and_track_usage_async(
536
545
  usage = get_usage(response, provider)
537
546
 
538
547
  messages = merge_system_prompt(kwargs, provider)
548
+ sanitized_messages = sanitize_messages(messages, provider)
539
549
 
540
550
  event_properties = {
541
551
  "$ai_provider": provider,
542
552
  "$ai_model": kwargs.get("model"),
543
553
  "$ai_model_parameters": get_model_params(kwargs),
544
- "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, messages),
554
+ "$ai_input": with_privacy_mode(
555
+ ph_client, posthog_privacy_mode, sanitized_messages
556
+ ),
545
557
  "$ai_output_choices": with_privacy_mode(
546
558
  ph_client, posthog_privacy_mode, format_response(response, provider)
547
559
  ),
@@ -600,6 +612,19 @@ async def call_llm_and_track_usage_async(
600
612
  return response
601
613
 
602
614
 
615
+ def sanitize_messages(data: Any, provider: str) -> Any:
616
+ """Sanitize messages using provider-specific sanitization functions."""
617
+ if provider == "anthropic":
618
+ return sanitize_anthropic(data)
619
+ elif provider == "openai":
620
+ return sanitize_openai(data)
621
+ elif provider == "gemini":
622
+ return sanitize_gemini(data)
623
+ elif provider == "langchain":
624
+ return sanitize_langchain(data)
625
+ return data
626
+
627
+
603
628
  def with_privacy_mode(ph_client: PostHogClient, privacy_mode: bool, value: Any):
604
629
  if ph_client.privacy_mode or privacy_mode:
605
630
  return None
@@ -329,7 +329,7 @@ class Client(object):
329
329
  only these flags will be evaluated, improving performance.
330
330
 
331
331
  Category:
332
- Feature Flags
332
+ Feature flags
333
333
  """
334
334
  resp_data = self.get_flags_decision(
335
335
  distinct_id,
@@ -368,7 +368,7 @@ class Client(object):
368
368
  ```
369
369
 
370
370
  Category:
371
- Feature Flags
371
+ Feature flags
372
372
  """
373
373
  resp_data = self.get_flags_decision(
374
374
  distinct_id,
@@ -407,7 +407,7 @@ class Client(object):
407
407
  ```
408
408
 
409
409
  Category:
410
- Feature Flags
410
+ Feature flags
411
411
  """
412
412
  resp = self.get_flags_decision(
413
413
  distinct_id,
@@ -446,7 +446,7 @@ class Client(object):
446
446
  ```
447
447
 
448
448
  Category:
449
- Feature Flags
449
+ Feature flags
450
450
  """
451
451
  groups = groups or {}
452
452
  person_properties = person_properties or {}
@@ -1169,7 +1169,7 @@ class Client(object):
1169
1169
  ```
1170
1170
 
1171
1171
  Category:
1172
- Feature Flags
1172
+ Feature flags
1173
1173
  """
1174
1174
  if not self.personal_api_key:
1175
1175
  self.log.warning(
@@ -1291,7 +1291,7 @@ class Client(object):
1291
1291
  ```
1292
1292
 
1293
1293
  Category:
1294
- Feature Flags
1294
+ Feature flags
1295
1295
  """
1296
1296
  response = self.get_feature_flag(
1297
1297
  key,
@@ -1499,7 +1499,7 @@ class Client(object):
1499
1499
  ```
1500
1500
 
1501
1501
  Category:
1502
- Feature Flags
1502
+ Feature flags
1503
1503
  """
1504
1504
  feature_flag_result = self.get_feature_flag_result(
1505
1505
  key,
@@ -1589,7 +1589,7 @@ class Client(object):
1589
1589
  ```
1590
1590
 
1591
1591
  Category:
1592
- Feature Flags
1592
+ Feature flags
1593
1593
  """
1594
1594
  feature_flag_result = self._get_feature_flag_result(
1595
1595
  key,
@@ -1759,7 +1759,7 @@ class Client(object):
1759
1759
  ```
1760
1760
 
1761
1761
  Category:
1762
- Feature Flags
1762
+ Feature flags
1763
1763
  """
1764
1764
  response = self.get_all_flags_and_payloads(
1765
1765
  distinct_id,
@@ -1803,7 +1803,7 @@ class Client(object):
1803
1803
  ```
1804
1804
 
1805
1805
  Category:
1806
- Feature Flags
1806
+ Feature flags
1807
1807
  """
1808
1808
  if self.disabled:
1809
1809
  return {"featureFlags": None, "featureFlagPayloads": None}
@@ -139,9 +139,70 @@ def evaluate_flag_dependency(
139
139
  # Definitive False result - dependency failed
140
140
  return False
141
141
 
142
+ # All dependencies in the chain have been evaluated successfully
143
+ # Now check if the final flag value matches the expected value in the property
144
+ flag_key = property.get("key")
145
+ expected_value = property.get("value")
146
+ operator = property.get("operator", "exact")
147
+
148
+ if flag_key and expected_value is not None:
149
+ # Get the actual value of the flag we're checking
150
+ actual_value = evaluation_cache.get(flag_key)
151
+
152
+ if actual_value is None:
153
+ # Flag wasn't evaluated - this shouldn't happen if dependency chain is correct
154
+ raise InconclusiveMatchError(
155
+ f"Flag '{flag_key}' was not evaluated despite being in dependency chain"
156
+ )
157
+
158
+ # For flag dependencies, we need to compare the actual flag result with expected value
159
+ # using the flag_evaluates_to operator logic
160
+ if operator == "flag_evaluates_to":
161
+ return matches_dependency_value(expected_value, actual_value)
162
+ else:
163
+ # This should never happen, but just to be defensive.
164
+ raise InconclusiveMatchError(
165
+ f"Flag dependency property for '{property.get('key', 'unknown')}' has invalid operator '{operator}'"
166
+ )
167
+
168
+ # If no value check needed, return True (all dependencies passed)
142
169
  return True
143
170
 
144
171
 
172
+ def matches_dependency_value(expected_value, actual_value):
173
+ """
174
+ Check if the actual flag value matches the expected dependency value.
175
+
176
+ This follows the same logic as the C# MatchesDependencyValue function:
177
+ - String variant case: check for exact match or boolean true
178
+ - Boolean case: must match expected boolean value
179
+
180
+ Args:
181
+ expected_value: The expected value from the property
182
+ actual_value: The actual value returned by the flag evaluation
183
+
184
+ Returns:
185
+ bool: True if the values match according to flag dependency rules
186
+ """
187
+ # String variant case - check for exact match or boolean true
188
+ if isinstance(actual_value, str) and len(actual_value) > 0:
189
+ if isinstance(expected_value, bool):
190
+ # Any variant matches boolean true
191
+ return expected_value
192
+ elif isinstance(expected_value, str):
193
+ # variants are case-sensitive, hence our comparison is too
194
+ return actual_value == expected_value
195
+ else:
196
+ return False
197
+
198
+ # Boolean case - must match expected boolean value
199
+ elif isinstance(actual_value, bool) and isinstance(expected_value, bool):
200
+ return actual_value == expected_value
201
+
202
+ # Default case
203
+ return False
204
+
205
+
145
206
  def match_feature_flag_properties(
146
207
  flag,
147
208
  distinct_id,