opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
opik/__init__.py CHANGED
@@ -6,22 +6,31 @@ from .api_objects.experiment.experiment_item import (
6
6
  ExperimentItemReferences,
7
7
  )
8
8
  from .api_objects.opik_client import Opik
9
- from .api_objects.prompt import Prompt
9
+ from .api_objects.prompt import Prompt, ChatPrompt
10
10
  from .api_objects.prompt.types import PromptType
11
11
  from .api_objects.span import Span
12
12
  from .api_objects.trace import Trace
13
13
  from .configurator.configure import configure
14
14
  from .decorator.tracker import flush_tracker, track
15
- from .evaluation import evaluate, evaluate_experiment, evaluate_prompt
15
+ from .evaluation import (
16
+ evaluate,
17
+ evaluate_experiment,
18
+ evaluate_on_dict_items,
19
+ evaluate_prompt,
20
+ )
16
21
  from .integrations.sagemaker import auth as sagemaker_auth
17
22
  from .plugins.pytest.decorator import llm_unit
18
23
  from .types import LLMProvider
19
24
  from . import opik_context
20
- from .decorator.tracing_runtime_config import (
25
+ from .tracing_runtime_config import (
21
26
  is_tracing_active,
22
27
  reset_tracing_to_config_default,
23
28
  set_tracing_active,
24
29
  )
30
+ from .decorator.context_manager.span_context_manager import start_as_current_span
31
+ from .decorator.context_manager.trace_context_manager import start_as_current_trace
32
+ from .simulation import SimulatedUser, run_simulation
33
+ from .api_objects.local_recording import record_traces_locally
25
34
 
26
35
 
27
36
  _logging.setup()
@@ -33,6 +42,7 @@ __all__ = [
33
42
  "evaluate",
34
43
  "evaluate_prompt",
35
44
  "evaluate_experiment",
45
+ "evaluate_on_dict_items",
36
46
  "ExperimentItemContent",
37
47
  "ExperimentItemReferences",
38
48
  "track",
@@ -45,11 +55,17 @@ __all__ = [
45
55
  "llm_unit",
46
56
  "configure",
47
57
  "Prompt",
58
+ "ChatPrompt",
48
59
  "PromptType",
49
60
  "LLMProvider",
50
61
  "reset_tracing_to_config_default",
51
62
  "set_tracing_active",
52
63
  "is_tracing_active",
64
+ "start_as_current_span",
65
+ "start_as_current_trace",
66
+ "SimulatedUser",
67
+ "run_simulation",
68
+ "record_traces_locally",
53
69
  ]
54
70
 
55
71
  sagemaker_auth.setup_aws_sagemaker_session_hook()
@@ -0,0 +1,5 @@
1
+ from .anonymizer import Anonymizer
2
+ from .factory import create_anonymizer
3
+ from .recursive_anonymizer import RecursiveAnonymizer
4
+
5
+ __all__ = ["Anonymizer", "create_anonymizer", "RecursiveAnonymizer"]
@@ -0,0 +1,12 @@
1
+ import abc
2
+ from typing import Dict, Any, Union, List
3
+
4
+ AnonymizerDataType = Union[Dict[str, Any], str, List[Any]]
5
+
6
+
7
+ class Anonymizer(abc.ABC):
8
+ """Abstract base class for anonymizing sensitive data in various data structures."""
9
+
10
+ @abc.abstractmethod
11
+ def anonymize(self, data: AnonymizerDataType, **kwargs: Any) -> AnonymizerDataType:
12
+ pass
@@ -0,0 +1,80 @@
1
+ from typing import Union, List, Dict, Callable, Tuple
2
+
3
+ from . import anonymizer, rules_anonymizer, rules
4
+
5
+ RulesType = Union[
6
+ List[Dict[str, str]],
7
+ List[Tuple[str, str]],
8
+ List[Callable[[str], str]],
9
+ List[Union[Dict[str, str], Tuple[str, str], Callable[[str], str]]],
10
+ Dict[str, str],
11
+ Tuple[str, str],
12
+ Callable[[str], str],
13
+ ]
14
+
15
+
16
+ def create_anonymizer(
17
+ anonymizer_rules: RulesType, max_depth: int = 10
18
+ ) -> anonymizer.Anonymizer:
19
+ """Create an anonymizer with the specified rules.
20
+
21
+ Args:
22
+ anonymizer_rules: Anonymizer rules specification in various formats:
23
+ - Dict with "regex" and "replace" keys for a single regex rule
24
+ - Tuple with (regex, replacement) for a single regex rule
25
+ - Callable that takes a string and returns anonymized string
26
+ - List of any of the above for multiple rules
27
+ max_depth: Maximum recursion depth for nested data structures.
28
+
29
+ Returns:
30
+ An Anonymizer instance configured with the specified rules.
31
+
32
+ Raises:
33
+ ValueError: If a rule format is invalid.
34
+ """
35
+ rule_objects: List[rules.Rule] = []
36
+
37
+ if callable(anonymizer_rules):
38
+ # Single function rule
39
+ rule_objects.append(rules.FunctionRule(anonymizer_rules))
40
+ elif isinstance(anonymizer_rules, dict):
41
+ # Single dictionary rule
42
+ _check_dictionary_rule(anonymizer_rules)
43
+ rule_objects.append(
44
+ rules.RegexRule(anonymizer_rules["regex"], anonymizer_rules["replace"])
45
+ )
46
+ elif isinstance(anonymizer_rules, tuple):
47
+ # Single tuple rule
48
+ _check_tuple_rule(anonymizer_rules)
49
+ regex_pattern, replacement = anonymizer_rules
50
+ rule_objects.append(rules.RegexRule(regex_pattern, replacement))
51
+ elif isinstance(anonymizer_rules, list):
52
+ # List of rules
53
+ for rule in anonymizer_rules:
54
+ if callable(rule) and not isinstance(rule, (dict, tuple)):
55
+ rule_objects.append(rules.FunctionRule(rule))
56
+ elif isinstance(rule, dict):
57
+ _check_dictionary_rule(rule)
58
+ rule_objects.append(rules.RegexRule(rule["regex"], rule["replace"]))
59
+ elif isinstance(rule, tuple):
60
+ _check_tuple_rule(rule)
61
+ regex_pattern, replacement = rule
62
+ rule_objects.append(rules.RegexRule(regex_pattern, replacement))
63
+ else:
64
+ raise ValueError(f"Unsupported rule type in list: {type(rule)}")
65
+ else:
66
+ raise ValueError(f"Unsupported rules type: {type(anonymizer_rules)}")
67
+
68
+ return rules_anonymizer.RulesAnonymizer(rule_objects, max_depth=max_depth)
69
+
70
+
71
+ def _check_dictionary_rule(rule: Dict[str, str]) -> None:
72
+ if "regex" not in rule or "replace" not in rule:
73
+ raise ValueError("Dictionary rule must have 'regex' and 'replace' keys")
74
+
75
+
76
+ def _check_tuple_rule(rule: Tuple[str, str]) -> None:
77
+ if len(rule) != 2:
78
+ raise ValueError(
79
+ "Tuple rule must have exactly 2 elements: (regex, replacement)"
80
+ )
@@ -0,0 +1,64 @@
1
+ import abc
2
+ from typing import Any, Optional
3
+
4
+ from . import anonymizer
5
+
6
+
7
+ class RecursiveAnonymizer(anonymizer.Anonymizer):
8
+ """Abstract base class for anonymizing sensitive data in various data structures.
9
+
10
+ This class provides a framework for recursively anonymizing text content within
11
+ nested data structures such as dictionaries, lists, and strings. Subclasses must
12
+ implement the anonymize_text() method to define the specific anonymization logic.
13
+ """
14
+
15
+ def __init__(self, max_depth: int = 10):
16
+ """Initialize the Anonymizer with depth limiting.
17
+
18
+ Args:
19
+ max_depth: Maximum recursion depth to prevent infinite loops when
20
+ processing deeply nested or circular data structures.
21
+ Defaults to 10.
22
+ """
23
+ self.max_depth = max_depth
24
+
25
+ def anonymize(
26
+ self, data: anonymizer.AnonymizerDataType, **kwargs: Any
27
+ ) -> anonymizer.AnonymizerDataType:
28
+ return self._recursive_anonymize(data, **kwargs)
29
+
30
+ @abc.abstractmethod
31
+ def anonymize_text(self, data: str, **kwargs: Any) -> str:
32
+ pass
33
+
34
+ def _recursive_anonymize(
35
+ self,
36
+ data: anonymizer.AnonymizerDataType,
37
+ depth: int = 0,
38
+ field_name: Optional[str] = None,
39
+ **kwargs: Any,
40
+ ) -> anonymizer.AnonymizerDataType:
41
+ if depth >= self.max_depth:
42
+ return data
43
+
44
+ if field_name is None:
45
+ field_name = ""
46
+
47
+ if isinstance(data, str):
48
+ return self.anonymize_text(data, field_name=field_name, **kwargs)
49
+ elif isinstance(data, dict):
50
+ return {
51
+ key: self._recursive_anonymize(
52
+ value, depth + 1, field_name=f"{field_name}.{key}", **kwargs
53
+ )
54
+ for key, value in data.items()
55
+ }
56
+ elif isinstance(data, list):
57
+ return [
58
+ self._recursive_anonymize(
59
+ item, depth + 1, field_name=f"{field_name}.{i}", **kwargs
60
+ )
61
+ for i, item in enumerate(data)
62
+ ]
63
+ else:
64
+ return data
@@ -0,0 +1,56 @@
1
+ import abc
2
+ import re
3
+ from typing import Callable
4
+
5
+
6
+ class Rule(abc.ABC):
7
+ """Abstract base class for text anonymization rules.
8
+
9
+ Rules define specific patterns or conditions for anonymizing sensitive
10
+ information in text. Subclasses must implement the apply() method to
11
+ define the anonymization logic.
12
+ """
13
+
14
+ @abc.abstractmethod
15
+ def apply(self, text: str) -> str:
16
+ pass
17
+
18
+
19
+ class RegexRule(Rule):
20
+ """A rule that uses regular expressions to find and replace patterns in text.
21
+
22
+ This rule compiles a regular expression pattern and applies it to input text,
23
+ replacing all matches with a specified replacement string.
24
+ """
25
+
26
+ def __init__(self, regex: str, replacement: str):
27
+ """Initialize the regex rule with a pattern and replacement.
28
+
29
+ Args:
30
+ regex: Regular expression pattern to match sensitive data.
31
+ replacement: String to replace matched patterns with.
32
+ """
33
+ self.pattern = re.compile(regex)
34
+ self.replacement = replacement
35
+
36
+ def apply(self, text: str) -> str:
37
+ return self.pattern.sub(self.replacement, text)
38
+
39
+
40
+ class FunctionRule(Rule):
41
+ """A rule that applies a custom function to anonymize text.
42
+
43
+ This rule allows for flexible anonymization by accepting any callable
44
+ that takes a string as input and returns an anonymized string.
45
+ """
46
+
47
+ def __init__(self, func: Callable[[str], str]):
48
+ """Initialize the function rule with a custom anonymization function.
49
+
50
+ Args:
51
+ func: A callable that takes a string and returns an anonymized version.
52
+ """
53
+ self.func = func
54
+
55
+ def apply(self, text: str) -> str:
56
+ return self.func(text)
@@ -0,0 +1,35 @@
1
+ from typing import List, Any
2
+
3
+ from . import recursive_anonymizer, rules
4
+
5
+
6
+ class RulesAnonymizer(recursive_anonymizer.RecursiveAnonymizer):
7
+ """An anonymizer that applies a list of rules sequentially to text data.
8
+
9
+ This class takes a list of Rule objects and applies them to
10
+ anonymize sensitive information in text.
11
+ """
12
+
13
+ def __init__(self, anonymizer_rules: List[rules.Rule], max_depth: int = 10):
14
+ """Initialize the RulesAnonymizer with a list of rules.
15
+
16
+ Args:
17
+ anonymizer_rules: List of Rule objects to apply for anonymization.
18
+ max_depth: Maximum recursion depth for nested data structures.
19
+ """
20
+ super().__init__(max_depth)
21
+ self.rules = anonymizer_rules
22
+
23
+ def anonymize_text(self, data: str, **kwargs: Any) -> str:
24
+ """Apply all rules sequentially to the input text.
25
+
26
+ Args:
27
+ data: The text to anonymize.
28
+
29
+ Returns:
30
+ The anonymized text after applying all rules.
31
+ """
32
+ result = data
33
+ for rule in self.rules:
34
+ result = rule.apply(result)
35
+ return result
@@ -0,0 +1,36 @@
1
+ import dataclasses
2
+ from typing import Literal
3
+
4
+ from . import attachment
5
+
6
+
7
+ @dataclasses.dataclass
8
+ class AttachmentWithContext:
9
+ """
10
+ Represents an attachment along with its associated context.
11
+
12
+ This class is used to pair an attachment with additional contextual
13
+ information such as the entity type, entity ID, project name, and
14
+ context description. It is specifically useful when dealing with
15
+ attachments related to entities like spans or traces. The context
16
+ can help provide further insights or classification of the
17
+ attachment's purpose.
18
+
19
+ Attributes:
20
+ attachment_data: The actual attachment
21
+ object containing the associated data.
22
+ entity_type: The type of entity the
23
+ attachment is associated with. It must be either "span"
24
+ or "trace".
25
+ entity_id: The unique identifier of the related entity.
26
+ project_name: The name of the project to which the
27
+ attachment and its entity belong.
28
+ context: A brief context description for the attachment,
29
+ explaining its purpose or relevance.
30
+ """
31
+
32
+ attachment_data: attachment.Attachment
33
+ entity_type: Literal["span", "trace"]
34
+ entity_id: str
35
+ project_name: str
36
+ context: str
@@ -0,0 +1,153 @@
1
+ import re
2
+ from typing import Dict, Any, Literal, List, NamedTuple
3
+
4
+ from . import attachment, attachment_context, decoder_base64
5
+
6
+
7
+ class ExtractionResult(NamedTuple):
8
+ attachments: List[attachment.Attachment]
9
+ sanitized_data: Any
10
+
11
+
12
+ class AttachmentsExtractor:
13
+ """
14
+ Extracts and processes attachments embedded as Base64 strings within data structures.
15
+
16
+ This class is designed to identify and decode Base64-encoded attachments located
17
+ within the provided data. It uses a regular expression pattern to search for
18
+ Base64 strings that meet a specified minimum length. Extracted attachments are
19
+ decoded and replaced with sanitized placeholders in the original data.
20
+ """
21
+
22
+ def __init__(self, min_attachment_size: int):
23
+ """
24
+ Initializes the class with a minimum attachment size and configures the base64
25
+ pattern for decoding attachments based on its length.
26
+
27
+ Args:
28
+ min_attachment_size: The minimum size of the attachment in characters
29
+ for it to be considered valid. This ensures that only large enough
30
+ base64 strings are matched to minimize false positives.
31
+ """
32
+ self._min_attachment_size = min_attachment_size
33
+ self.decoder = decoder_base64.Base64AttachmentDecoder()
34
+
35
+ # Pattern to match base64 strings (can be embedded in text)
36
+ # Requires at least min_attachment_size characters to reduce false positives
37
+ min_base64_groups = int(min_attachment_size / 4)
38
+ BASE64_PATTERN = (
39
+ r"(?:[A-Za-z0-9+/]{4}){"
40
+ + str(min_base64_groups)
41
+ + ",}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"
42
+ )
43
+ self.pattern = re.compile(BASE64_PATTERN)
44
+
45
+ def extract_and_replace(
46
+ self,
47
+ data: Dict[str, Any],
48
+ entity_type: Literal["span", "trace"],
49
+ entity_id: str,
50
+ project_name: str,
51
+ context: Literal["input", "output", "metadata"],
52
+ ) -> List[attachment_context.AttachmentWithContext]:
53
+ # iterate over all items and extract attachments
54
+ attachments: List[attachment_context.AttachmentWithContext] = []
55
+ for key, value in data.items():
56
+ extraction_result = self._try_extract_attachments(value, context)
57
+ if extraction_result.attachments:
58
+ # replace the original value with the sanitized one and collect attachments
59
+ data[key] = extraction_result.sanitized_data
60
+ for extracted_attachment in extraction_result.attachments:
61
+ attachments.append(
62
+ attachment_context.AttachmentWithContext(
63
+ attachment_data=extracted_attachment,
64
+ entity_type=entity_type,
65
+ entity_id=entity_id,
66
+ project_name=project_name,
67
+ context=context,
68
+ )
69
+ )
70
+
71
+ return attachments
72
+
73
+ def _try_extract_attachments(
74
+ self, data: Any, context: Literal["input", "output", "metadata"]
75
+ ) -> ExtractionResult:
76
+ """
77
+ Recursively extract attachments from data that can be a string, dict, list, or other type.
78
+
79
+ Args:
80
+ data: The data to process (can be str, dict, list, or other types)
81
+ context: The context where the data is located (input, output, or metadata)
82
+
83
+ Returns:
84
+ ExtractionResult with extracted attachments and sanitized data
85
+ """
86
+ # Handle string data - check for base64 attachments
87
+ if isinstance(data, str):
88
+ return self._extract_from_string(data, context)
89
+
90
+ # Handle dictionary data - recursively process each value
91
+ elif isinstance(data, dict):
92
+ return self._extract_from_dict(data, context)
93
+
94
+ # Handle list data - recursively process each element
95
+ elif isinstance(data, list):
96
+ return self._extract_from_list(data, context)
97
+
98
+ # For other types (int, bool, None, etc.), return as-is
99
+ else:
100
+ return ExtractionResult(attachments=[], sanitized_data=data)
101
+
102
+ def _extract_from_string(
103
+ self, data: str, context: Literal["input", "output", "metadata"]
104
+ ) -> ExtractionResult:
105
+ """Extract attachments from a string value."""
106
+ if len(data) < self._min_attachment_size:
107
+ # skip short strings
108
+ return ExtractionResult(attachments=[], sanitized_data=data)
109
+
110
+ attachments: List[attachment.Attachment] = []
111
+ sanitized_data = data
112
+ for match in self.pattern.finditer(data):
113
+ to_decode = match.group()
114
+ decoded_attachment = self.decoder.decode(to_decode, context)
115
+ if decoded_attachment is not None:
116
+ attachments.append(decoded_attachment)
117
+ sanitized_data = sanitized_data.replace(
118
+ to_decode, f"[{decoded_attachment.file_name}]"
119
+ )
120
+
121
+ return ExtractionResult(attachments=attachments, sanitized_data=sanitized_data)
122
+
123
+ def _extract_from_dict(
124
+ self, data: Dict[str, Any], context: Literal["input", "output", "metadata"]
125
+ ) -> ExtractionResult:
126
+ """Recursively extract attachments from a dictionary."""
127
+ all_attachments: List[attachment.Attachment] = []
128
+ sanitized_dict = {}
129
+
130
+ for key, value in data.items():
131
+ result = self._try_extract_attachments(value, context)
132
+ sanitized_dict[key] = result.sanitized_data
133
+ all_attachments.extend(result.attachments)
134
+
135
+ return ExtractionResult(
136
+ attachments=all_attachments, sanitized_data=sanitized_dict
137
+ )
138
+
139
+ def _extract_from_list(
140
+ self, data: List[Any], context: Literal["input", "output", "metadata"]
141
+ ) -> ExtractionResult:
142
+ """Recursively extract attachments from a list."""
143
+ all_attachments: List[attachment.Attachment] = []
144
+ sanitized_list = []
145
+
146
+ for item in data:
147
+ result = self._try_extract_attachments(item, context)
148
+ sanitized_list.append(result.sanitized_data)
149
+ all_attachments.extend(result.attachments)
150
+
151
+ return ExtractionResult(
152
+ attachments=all_attachments, sanitized_data=sanitized_list
153
+ )
@@ -206,6 +206,7 @@ class AttachmentClient:
206
206
  entity_id=entity_id,
207
207
  project_name=project_name,
208
208
  encoded_url_override=encoded_url_override,
209
+ delete_after_upload=False,
209
210
  )
210
211
 
211
212
  file_uploader.upload_attachment(
@@ -13,6 +13,7 @@ def attachment_to_message(
13
13
  entity_id: str,
14
14
  project_name: str,
15
15
  url_override: str,
16
+ delete_after_upload: bool = False,
16
17
  ) -> messages.CreateAttachmentMessage:
17
18
  if attachment_data.data is None:
18
19
  raise ValueError("Attachment data cannot be None")
@@ -32,6 +33,7 @@ def attachment_to_message(
32
33
  entity_id=entity_id,
33
34
  project_name=project_name,
34
35
  encoded_url_override=base_url_path,
36
+ delete_after_upload=delete_after_upload,
35
37
  )
36
38
 
37
39
 
@@ -0,0 +1,18 @@
1
+ import abc
2
+ from typing import Any, Optional
3
+
4
+ from . import attachment
5
+
6
+
7
+ class AttachmentDecoder(abc.ABC):
8
+ """
9
+ Abstract base class for decoding file attachments.
10
+
11
+ This class serves as an interface for decoding raw attachment data into
12
+ an `Attachment` object. Implementing classes should define the specific
13
+ logic to handle various attachment decoding formats.
14
+ """
15
+
16
+ @abc.abstractmethod
17
+ def decode(self, raw_data: str, **kwargs: Any) -> Optional[attachment.Attachment]:
18
+ pass
@@ -0,0 +1,83 @@
1
+ import base64
2
+ import binascii
3
+ import logging
4
+ import tempfile
5
+ from typing import Any, Optional, Literal
6
+
7
+ from . import attachment, decoder, decoder_helpers
8
+
9
+ LOGGER = logging.getLogger(__name__)
10
+
11
+
12
+ class Base64AttachmentDecoder(decoder.AttachmentDecoder):
13
+ """Decodes base64 encoded attachment data.
14
+
15
+ This decoder decodes base64 strings, detects MIME types from content, and creates Attachment objects.
16
+ """
17
+
18
+ def decode(
19
+ self,
20
+ raw_data: str,
21
+ context: Literal["input", "output", "metadata"] = "input",
22
+ **kwargs: Any,
23
+ ) -> Optional[attachment.Attachment]:
24
+ """Decode base64 encoded data into an Attachment object.
25
+
26
+ Args:
27
+ raw_data: Base64 encoded string data
28
+ context: Context string for filename generation.
29
+
30
+ Returns:
31
+ Attachment object with decoded data, or None if decoding fails or type is not recognizable
32
+ """
33
+ if not isinstance(raw_data, str):
34
+ LOGGER.warning("Attachment data is not a string, skipping.")
35
+ return None
36
+
37
+ try:
38
+ # Decode base64 string to bytes
39
+ decoded_bytes = base64.b64decode(raw_data, validate=True)
40
+
41
+ # Detect MIME type from content
42
+ mime_type = decoder_helpers.detect_mime_type(decoded_bytes)
43
+
44
+ # Skip if not a recognizable file type
45
+ if not mime_type or mime_type in ("application/octet-stream", "text/plain"):
46
+ LOGGER.debug("Attachment type is not recognized, skipping.")
47
+ return None
48
+
49
+ # Get file extension from the MIME type
50
+ extension = decoder_helpers.get_file_extension(mime_type)
51
+
52
+ # Generate filename
53
+ file_name = decoder_helpers.create_attachment_filename(
54
+ context, extension=extension
55
+ )
56
+
57
+ # Save decoded bytes to a temporary file
58
+ temp_file = tempfile.NamedTemporaryFile(
59
+ mode="wb", delete=False, suffix=extension
60
+ )
61
+ temp_file.write(decoded_bytes)
62
+ temp_file.flush()
63
+ temp_file.close()
64
+
65
+ # Return Attachment object with a file path
66
+ return attachment.Attachment(
67
+ data=temp_file.name, file_name=file_name, content_type=mime_type
68
+ )
69
+
70
+ except (ValueError, binascii.Error) as e:
71
+ LOGGER.debug(
72
+ "Failed to decode attachment data, reason: invalid base64. Reason: %s",
73
+ e,
74
+ exc_info=True,
75
+ )
76
+ # Not valid base64, return None
77
+ return None
78
+ except Exception as ex:
79
+ LOGGER.warning(
80
+ "Failed to decode attachment data, reason: %s", ex, exc_info=True
81
+ )
82
+ # Unexpected error, return None to avoid crashing the pipeline
83
+ return None