opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
opik/__init__.py
CHANGED
|
@@ -6,22 +6,31 @@ from .api_objects.experiment.experiment_item import (
|
|
|
6
6
|
ExperimentItemReferences,
|
|
7
7
|
)
|
|
8
8
|
from .api_objects.opik_client import Opik
|
|
9
|
-
from .api_objects.prompt import Prompt
|
|
9
|
+
from .api_objects.prompt import Prompt, ChatPrompt
|
|
10
10
|
from .api_objects.prompt.types import PromptType
|
|
11
11
|
from .api_objects.span import Span
|
|
12
12
|
from .api_objects.trace import Trace
|
|
13
13
|
from .configurator.configure import configure
|
|
14
14
|
from .decorator.tracker import flush_tracker, track
|
|
15
|
-
from .evaluation import
|
|
15
|
+
from .evaluation import (
|
|
16
|
+
evaluate,
|
|
17
|
+
evaluate_experiment,
|
|
18
|
+
evaluate_on_dict_items,
|
|
19
|
+
evaluate_prompt,
|
|
20
|
+
)
|
|
16
21
|
from .integrations.sagemaker import auth as sagemaker_auth
|
|
17
22
|
from .plugins.pytest.decorator import llm_unit
|
|
18
23
|
from .types import LLMProvider
|
|
19
24
|
from . import opik_context
|
|
20
|
-
from .
|
|
25
|
+
from .tracing_runtime_config import (
|
|
21
26
|
is_tracing_active,
|
|
22
27
|
reset_tracing_to_config_default,
|
|
23
28
|
set_tracing_active,
|
|
24
29
|
)
|
|
30
|
+
from .decorator.context_manager.span_context_manager import start_as_current_span
|
|
31
|
+
from .decorator.context_manager.trace_context_manager import start_as_current_trace
|
|
32
|
+
from .simulation import SimulatedUser, run_simulation
|
|
33
|
+
from .api_objects.local_recording import record_traces_locally
|
|
25
34
|
|
|
26
35
|
|
|
27
36
|
_logging.setup()
|
|
@@ -33,6 +42,7 @@ __all__ = [
|
|
|
33
42
|
"evaluate",
|
|
34
43
|
"evaluate_prompt",
|
|
35
44
|
"evaluate_experiment",
|
|
45
|
+
"evaluate_on_dict_items",
|
|
36
46
|
"ExperimentItemContent",
|
|
37
47
|
"ExperimentItemReferences",
|
|
38
48
|
"track",
|
|
@@ -45,11 +55,17 @@ __all__ = [
|
|
|
45
55
|
"llm_unit",
|
|
46
56
|
"configure",
|
|
47
57
|
"Prompt",
|
|
58
|
+
"ChatPrompt",
|
|
48
59
|
"PromptType",
|
|
49
60
|
"LLMProvider",
|
|
50
61
|
"reset_tracing_to_config_default",
|
|
51
62
|
"set_tracing_active",
|
|
52
63
|
"is_tracing_active",
|
|
64
|
+
"start_as_current_span",
|
|
65
|
+
"start_as_current_trace",
|
|
66
|
+
"SimulatedUser",
|
|
67
|
+
"run_simulation",
|
|
68
|
+
"record_traces_locally",
|
|
53
69
|
]
|
|
54
70
|
|
|
55
71
|
sagemaker_auth.setup_aws_sagemaker_session_hook()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Dict, Any, Union, List
|
|
3
|
+
|
|
4
|
+
AnonymizerDataType = Union[Dict[str, Any], str, List[Any]]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Anonymizer(abc.ABC):
|
|
8
|
+
"""Abstract base class for anonymizing sensitive data in various data structures."""
|
|
9
|
+
|
|
10
|
+
@abc.abstractmethod
|
|
11
|
+
def anonymize(self, data: AnonymizerDataType, **kwargs: Any) -> AnonymizerDataType:
|
|
12
|
+
pass
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from typing import Union, List, Dict, Callable, Tuple
|
|
2
|
+
|
|
3
|
+
from . import anonymizer, rules_anonymizer, rules
|
|
4
|
+
|
|
5
|
+
RulesType = Union[
|
|
6
|
+
List[Dict[str, str]],
|
|
7
|
+
List[Tuple[str, str]],
|
|
8
|
+
List[Callable[[str], str]],
|
|
9
|
+
List[Union[Dict[str, str], Tuple[str, str], Callable[[str], str]]],
|
|
10
|
+
Dict[str, str],
|
|
11
|
+
Tuple[str, str],
|
|
12
|
+
Callable[[str], str],
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_anonymizer(
|
|
17
|
+
anonymizer_rules: RulesType, max_depth: int = 10
|
|
18
|
+
) -> anonymizer.Anonymizer:
|
|
19
|
+
"""Create an anonymizer with the specified rules.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
anonymizer_rules: Anonymizer rules specification in various formats:
|
|
23
|
+
- Dict with "regex" and "replace" keys for a single regex rule
|
|
24
|
+
- Tuple with (regex, replacement) for a single regex rule
|
|
25
|
+
- Callable that takes a string and returns anonymized string
|
|
26
|
+
- List of any of the above for multiple rules
|
|
27
|
+
max_depth: Maximum recursion depth for nested data structures.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
An Anonymizer instance configured with the specified rules.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ValueError: If a rule format is invalid.
|
|
34
|
+
"""
|
|
35
|
+
rule_objects: List[rules.Rule] = []
|
|
36
|
+
|
|
37
|
+
if callable(anonymizer_rules):
|
|
38
|
+
# Single function rule
|
|
39
|
+
rule_objects.append(rules.FunctionRule(anonymizer_rules))
|
|
40
|
+
elif isinstance(anonymizer_rules, dict):
|
|
41
|
+
# Single dictionary rule
|
|
42
|
+
_check_dictionary_rule(anonymizer_rules)
|
|
43
|
+
rule_objects.append(
|
|
44
|
+
rules.RegexRule(anonymizer_rules["regex"], anonymizer_rules["replace"])
|
|
45
|
+
)
|
|
46
|
+
elif isinstance(anonymizer_rules, tuple):
|
|
47
|
+
# Single tuple rule
|
|
48
|
+
_check_tuple_rule(anonymizer_rules)
|
|
49
|
+
regex_pattern, replacement = anonymizer_rules
|
|
50
|
+
rule_objects.append(rules.RegexRule(regex_pattern, replacement))
|
|
51
|
+
elif isinstance(anonymizer_rules, list):
|
|
52
|
+
# List of rules
|
|
53
|
+
for rule in anonymizer_rules:
|
|
54
|
+
if callable(rule) and not isinstance(rule, (dict, tuple)):
|
|
55
|
+
rule_objects.append(rules.FunctionRule(rule))
|
|
56
|
+
elif isinstance(rule, dict):
|
|
57
|
+
_check_dictionary_rule(rule)
|
|
58
|
+
rule_objects.append(rules.RegexRule(rule["regex"], rule["replace"]))
|
|
59
|
+
elif isinstance(rule, tuple):
|
|
60
|
+
_check_tuple_rule(rule)
|
|
61
|
+
regex_pattern, replacement = rule
|
|
62
|
+
rule_objects.append(rules.RegexRule(regex_pattern, replacement))
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError(f"Unsupported rule type in list: {type(rule)}")
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError(f"Unsupported rules type: {type(anonymizer_rules)}")
|
|
67
|
+
|
|
68
|
+
return rules_anonymizer.RulesAnonymizer(rule_objects, max_depth=max_depth)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _check_dictionary_rule(rule: Dict[str, str]) -> None:
|
|
72
|
+
if "regex" not in rule or "replace" not in rule:
|
|
73
|
+
raise ValueError("Dictionary rule must have 'regex' and 'replace' keys")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _check_tuple_rule(rule: Tuple[str, str]) -> None:
|
|
77
|
+
if len(rule) != 2:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"Tuple rule must have exactly 2 elements: (regex, replacement)"
|
|
80
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
|
|
4
|
+
from . import anonymizer
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RecursiveAnonymizer(anonymizer.Anonymizer):
|
|
8
|
+
"""Abstract base class for anonymizing sensitive data in various data structures.
|
|
9
|
+
|
|
10
|
+
This class provides a framework for recursively anonymizing text content within
|
|
11
|
+
nested data structures such as dictionaries, lists, and strings. Subclasses must
|
|
12
|
+
implement the anonymize_text() method to define the specific anonymization logic.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, max_depth: int = 10):
|
|
16
|
+
"""Initialize the Anonymizer with depth limiting.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
max_depth: Maximum recursion depth to prevent infinite loops when
|
|
20
|
+
processing deeply nested or circular data structures.
|
|
21
|
+
Defaults to 10.
|
|
22
|
+
"""
|
|
23
|
+
self.max_depth = max_depth
|
|
24
|
+
|
|
25
|
+
def anonymize(
|
|
26
|
+
self, data: anonymizer.AnonymizerDataType, **kwargs: Any
|
|
27
|
+
) -> anonymizer.AnonymizerDataType:
|
|
28
|
+
return self._recursive_anonymize(data, **kwargs)
|
|
29
|
+
|
|
30
|
+
@abc.abstractmethod
|
|
31
|
+
def anonymize_text(self, data: str, **kwargs: Any) -> str:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
def _recursive_anonymize(
|
|
35
|
+
self,
|
|
36
|
+
data: anonymizer.AnonymizerDataType,
|
|
37
|
+
depth: int = 0,
|
|
38
|
+
field_name: Optional[str] = None,
|
|
39
|
+
**kwargs: Any,
|
|
40
|
+
) -> anonymizer.AnonymizerDataType:
|
|
41
|
+
if depth >= self.max_depth:
|
|
42
|
+
return data
|
|
43
|
+
|
|
44
|
+
if field_name is None:
|
|
45
|
+
field_name = ""
|
|
46
|
+
|
|
47
|
+
if isinstance(data, str):
|
|
48
|
+
return self.anonymize_text(data, field_name=field_name, **kwargs)
|
|
49
|
+
elif isinstance(data, dict):
|
|
50
|
+
return {
|
|
51
|
+
key: self._recursive_anonymize(
|
|
52
|
+
value, depth + 1, field_name=f"{field_name}.{key}", **kwargs
|
|
53
|
+
)
|
|
54
|
+
for key, value in data.items()
|
|
55
|
+
}
|
|
56
|
+
elif isinstance(data, list):
|
|
57
|
+
return [
|
|
58
|
+
self._recursive_anonymize(
|
|
59
|
+
item, depth + 1, field_name=f"{field_name}.{i}", **kwargs
|
|
60
|
+
)
|
|
61
|
+
for i, item in enumerate(data)
|
|
62
|
+
]
|
|
63
|
+
else:
|
|
64
|
+
return data
|
opik/anonymizer/rules.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import re
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Rule(abc.ABC):
|
|
7
|
+
"""Abstract base class for text anonymization rules.
|
|
8
|
+
|
|
9
|
+
Rules define specific patterns or conditions for anonymizing sensitive
|
|
10
|
+
information in text. Subclasses must implement the apply() method to
|
|
11
|
+
define the anonymization logic.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@abc.abstractmethod
|
|
15
|
+
def apply(self, text: str) -> str:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RegexRule(Rule):
|
|
20
|
+
"""A rule that uses regular expressions to find and replace patterns in text.
|
|
21
|
+
|
|
22
|
+
This rule compiles a regular expression pattern and applies it to input text,
|
|
23
|
+
replacing all matches with a specified replacement string.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, regex: str, replacement: str):
|
|
27
|
+
"""Initialize the regex rule with a pattern and replacement.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
regex: Regular expression pattern to match sensitive data.
|
|
31
|
+
replacement: String to replace matched patterns with.
|
|
32
|
+
"""
|
|
33
|
+
self.pattern = re.compile(regex)
|
|
34
|
+
self.replacement = replacement
|
|
35
|
+
|
|
36
|
+
def apply(self, text: str) -> str:
|
|
37
|
+
return self.pattern.sub(self.replacement, text)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class FunctionRule(Rule):
|
|
41
|
+
"""A rule that applies a custom function to anonymize text.
|
|
42
|
+
|
|
43
|
+
This rule allows for flexible anonymization by accepting any callable
|
|
44
|
+
that takes a string as input and returns an anonymized string.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, func: Callable[[str], str]):
|
|
48
|
+
"""Initialize the function rule with a custom anonymization function.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
func: A callable that takes a string and returns an anonymized version.
|
|
52
|
+
"""
|
|
53
|
+
self.func = func
|
|
54
|
+
|
|
55
|
+
def apply(self, text: str) -> str:
|
|
56
|
+
return self.func(text)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from typing import List, Any
|
|
2
|
+
|
|
3
|
+
from . import recursive_anonymizer, rules
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RulesAnonymizer(recursive_anonymizer.RecursiveAnonymizer):
|
|
7
|
+
"""An anonymizer that applies a list of rules sequentially to text data.
|
|
8
|
+
|
|
9
|
+
This class takes a list of Rule objects and applies them to
|
|
10
|
+
anonymize sensitive information in text.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, anonymizer_rules: List[rules.Rule], max_depth: int = 10):
|
|
14
|
+
"""Initialize the RulesAnonymizer with a list of rules.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
anonymizer_rules: List of Rule objects to apply for anonymization.
|
|
18
|
+
max_depth: Maximum recursion depth for nested data structures.
|
|
19
|
+
"""
|
|
20
|
+
super().__init__(max_depth)
|
|
21
|
+
self.rules = anonymizer_rules
|
|
22
|
+
|
|
23
|
+
def anonymize_text(self, data: str, **kwargs: Any) -> str:
|
|
24
|
+
"""Apply all rules sequentially to the input text.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
data: The text to anonymize.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
The anonymized text after applying all rules.
|
|
31
|
+
"""
|
|
32
|
+
result = data
|
|
33
|
+
for rule in self.rules:
|
|
34
|
+
result = rule.apply(result)
|
|
35
|
+
return result
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
from . import attachment
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclasses.dataclass
|
|
8
|
+
class AttachmentWithContext:
|
|
9
|
+
"""
|
|
10
|
+
Represents an attachment along with its associated context.
|
|
11
|
+
|
|
12
|
+
This class is used to pair an attachment with additional contextual
|
|
13
|
+
information such as the entity type, entity ID, project name, and
|
|
14
|
+
context description. It is specifically useful when dealing with
|
|
15
|
+
attachments related to entities like spans or traces. The context
|
|
16
|
+
can help provide further insights or classification of the
|
|
17
|
+
attachment's purpose.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
attachment_data: The actual attachment
|
|
21
|
+
object containing the associated data.
|
|
22
|
+
entity_type: The type of entity the
|
|
23
|
+
attachment is associated with. It must be either "span"
|
|
24
|
+
or "trace".
|
|
25
|
+
entity_id: The unique identifier of the related entity.
|
|
26
|
+
project_name: The name of the project to which the
|
|
27
|
+
attachment and its entity belong.
|
|
28
|
+
context: A brief context description for the attachment,
|
|
29
|
+
explaining its purpose or relevance.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
attachment_data: attachment.Attachment
|
|
33
|
+
entity_type: Literal["span", "trace"]
|
|
34
|
+
entity_id: str
|
|
35
|
+
project_name: str
|
|
36
|
+
context: str
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any, Literal, List, NamedTuple
|
|
3
|
+
|
|
4
|
+
from . import attachment, attachment_context, decoder_base64
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExtractionResult(NamedTuple):
|
|
8
|
+
attachments: List[attachment.Attachment]
|
|
9
|
+
sanitized_data: Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AttachmentsExtractor:
|
|
13
|
+
"""
|
|
14
|
+
Extracts and processes attachments embedded as Base64 strings within data structures.
|
|
15
|
+
|
|
16
|
+
This class is designed to identify and decode Base64-encoded attachments located
|
|
17
|
+
within the provided data. It uses a regular expression pattern to search for
|
|
18
|
+
Base64 strings that meet a specified minimum length. Extracted attachments are
|
|
19
|
+
decoded and replaced with sanitized placeholders in the original data.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, min_attachment_size: int):
|
|
23
|
+
"""
|
|
24
|
+
Initializes the class with a minimum attachment size and configures the base64
|
|
25
|
+
pattern for decoding attachments based on its length.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
min_attachment_size: The minimum size of the attachment in characters
|
|
29
|
+
for it to be considered valid. This ensures that only large enough
|
|
30
|
+
base64 strings are matched to minimize false positives.
|
|
31
|
+
"""
|
|
32
|
+
self._min_attachment_size = min_attachment_size
|
|
33
|
+
self.decoder = decoder_base64.Base64AttachmentDecoder()
|
|
34
|
+
|
|
35
|
+
# Pattern to match base64 strings (can be embedded in text)
|
|
36
|
+
# Requires at least min_attachment_size characters to reduce false positives
|
|
37
|
+
min_base64_groups = int(min_attachment_size / 4)
|
|
38
|
+
BASE64_PATTERN = (
|
|
39
|
+
r"(?:[A-Za-z0-9+/]{4}){"
|
|
40
|
+
+ str(min_base64_groups)
|
|
41
|
+
+ ",}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"
|
|
42
|
+
)
|
|
43
|
+
self.pattern = re.compile(BASE64_PATTERN)
|
|
44
|
+
|
|
45
|
+
def extract_and_replace(
|
|
46
|
+
self,
|
|
47
|
+
data: Dict[str, Any],
|
|
48
|
+
entity_type: Literal["span", "trace"],
|
|
49
|
+
entity_id: str,
|
|
50
|
+
project_name: str,
|
|
51
|
+
context: Literal["input", "output", "metadata"],
|
|
52
|
+
) -> List[attachment_context.AttachmentWithContext]:
|
|
53
|
+
# iterate over all items and extract attachments
|
|
54
|
+
attachments: List[attachment_context.AttachmentWithContext] = []
|
|
55
|
+
for key, value in data.items():
|
|
56
|
+
extraction_result = self._try_extract_attachments(value, context)
|
|
57
|
+
if extraction_result.attachments:
|
|
58
|
+
# replace the original value with the sanitized one and collect attachments
|
|
59
|
+
data[key] = extraction_result.sanitized_data
|
|
60
|
+
for extracted_attachment in extraction_result.attachments:
|
|
61
|
+
attachments.append(
|
|
62
|
+
attachment_context.AttachmentWithContext(
|
|
63
|
+
attachment_data=extracted_attachment,
|
|
64
|
+
entity_type=entity_type,
|
|
65
|
+
entity_id=entity_id,
|
|
66
|
+
project_name=project_name,
|
|
67
|
+
context=context,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return attachments
|
|
72
|
+
|
|
73
|
+
def _try_extract_attachments(
|
|
74
|
+
self, data: Any, context: Literal["input", "output", "metadata"]
|
|
75
|
+
) -> ExtractionResult:
|
|
76
|
+
"""
|
|
77
|
+
Recursively extract attachments from data that can be a string, dict, list, or other type.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
data: The data to process (can be str, dict, list, or other types)
|
|
81
|
+
context: The context where the data is located (input, output, or metadata)
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
ExtractionResult with extracted attachments and sanitized data
|
|
85
|
+
"""
|
|
86
|
+
# Handle string data - check for base64 attachments
|
|
87
|
+
if isinstance(data, str):
|
|
88
|
+
return self._extract_from_string(data, context)
|
|
89
|
+
|
|
90
|
+
# Handle dictionary data - recursively process each value
|
|
91
|
+
elif isinstance(data, dict):
|
|
92
|
+
return self._extract_from_dict(data, context)
|
|
93
|
+
|
|
94
|
+
# Handle list data - recursively process each element
|
|
95
|
+
elif isinstance(data, list):
|
|
96
|
+
return self._extract_from_list(data, context)
|
|
97
|
+
|
|
98
|
+
# For other types (int, bool, None, etc.), return as-is
|
|
99
|
+
else:
|
|
100
|
+
return ExtractionResult(attachments=[], sanitized_data=data)
|
|
101
|
+
|
|
102
|
+
def _extract_from_string(
|
|
103
|
+
self, data: str, context: Literal["input", "output", "metadata"]
|
|
104
|
+
) -> ExtractionResult:
|
|
105
|
+
"""Extract attachments from a string value."""
|
|
106
|
+
if len(data) < self._min_attachment_size:
|
|
107
|
+
# skip short strings
|
|
108
|
+
return ExtractionResult(attachments=[], sanitized_data=data)
|
|
109
|
+
|
|
110
|
+
attachments: List[attachment.Attachment] = []
|
|
111
|
+
sanitized_data = data
|
|
112
|
+
for match in self.pattern.finditer(data):
|
|
113
|
+
to_decode = match.group()
|
|
114
|
+
decoded_attachment = self.decoder.decode(to_decode, context)
|
|
115
|
+
if decoded_attachment is not None:
|
|
116
|
+
attachments.append(decoded_attachment)
|
|
117
|
+
sanitized_data = sanitized_data.replace(
|
|
118
|
+
to_decode, f"[{decoded_attachment.file_name}]"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return ExtractionResult(attachments=attachments, sanitized_data=sanitized_data)
|
|
122
|
+
|
|
123
|
+
def _extract_from_dict(
|
|
124
|
+
self, data: Dict[str, Any], context: Literal["input", "output", "metadata"]
|
|
125
|
+
) -> ExtractionResult:
|
|
126
|
+
"""Recursively extract attachments from a dictionary."""
|
|
127
|
+
all_attachments: List[attachment.Attachment] = []
|
|
128
|
+
sanitized_dict = {}
|
|
129
|
+
|
|
130
|
+
for key, value in data.items():
|
|
131
|
+
result = self._try_extract_attachments(value, context)
|
|
132
|
+
sanitized_dict[key] = result.sanitized_data
|
|
133
|
+
all_attachments.extend(result.attachments)
|
|
134
|
+
|
|
135
|
+
return ExtractionResult(
|
|
136
|
+
attachments=all_attachments, sanitized_data=sanitized_dict
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _extract_from_list(
|
|
140
|
+
self, data: List[Any], context: Literal["input", "output", "metadata"]
|
|
141
|
+
) -> ExtractionResult:
|
|
142
|
+
"""Recursively extract attachments from a list."""
|
|
143
|
+
all_attachments: List[attachment.Attachment] = []
|
|
144
|
+
sanitized_list = []
|
|
145
|
+
|
|
146
|
+
for item in data:
|
|
147
|
+
result = self._try_extract_attachments(item, context)
|
|
148
|
+
sanitized_list.append(result.sanitized_data)
|
|
149
|
+
all_attachments.extend(result.attachments)
|
|
150
|
+
|
|
151
|
+
return ExtractionResult(
|
|
152
|
+
attachments=all_attachments, sanitized_data=sanitized_list
|
|
153
|
+
)
|
|
@@ -13,6 +13,7 @@ def attachment_to_message(
|
|
|
13
13
|
entity_id: str,
|
|
14
14
|
project_name: str,
|
|
15
15
|
url_override: str,
|
|
16
|
+
delete_after_upload: bool = False,
|
|
16
17
|
) -> messages.CreateAttachmentMessage:
|
|
17
18
|
if attachment_data.data is None:
|
|
18
19
|
raise ValueError("Attachment data cannot be None")
|
|
@@ -32,6 +33,7 @@ def attachment_to_message(
|
|
|
32
33
|
entity_id=entity_id,
|
|
33
34
|
project_name=project_name,
|
|
34
35
|
encoded_url_override=base_url_path,
|
|
36
|
+
delete_after_upload=delete_after_upload,
|
|
35
37
|
)
|
|
36
38
|
|
|
37
39
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
|
|
4
|
+
from . import attachment
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AttachmentDecoder(abc.ABC):
|
|
8
|
+
"""
|
|
9
|
+
Abstract base class for decoding file attachments.
|
|
10
|
+
|
|
11
|
+
This class serves as an interface for decoding raw attachment data into
|
|
12
|
+
an `Attachment` object. Implementing classes should define the specific
|
|
13
|
+
logic to handle various attachment decoding formats.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@abc.abstractmethod
|
|
17
|
+
def decode(self, raw_data: str, **kwargs: Any) -> Optional[attachment.Attachment]:
|
|
18
|
+
pass
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import binascii
|
|
3
|
+
import logging
|
|
4
|
+
import tempfile
|
|
5
|
+
from typing import Any, Optional, Literal
|
|
6
|
+
|
|
7
|
+
from . import attachment, decoder, decoder_helpers
|
|
8
|
+
|
|
9
|
+
LOGGER = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Base64AttachmentDecoder(decoder.AttachmentDecoder):
|
|
13
|
+
"""Decodes base64 encoded attachment data.
|
|
14
|
+
|
|
15
|
+
This decoder decodes base64 strings, detects MIME types from content, and creates Attachment objects.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def decode(
|
|
19
|
+
self,
|
|
20
|
+
raw_data: str,
|
|
21
|
+
context: Literal["input", "output", "metadata"] = "input",
|
|
22
|
+
**kwargs: Any,
|
|
23
|
+
) -> Optional[attachment.Attachment]:
|
|
24
|
+
"""Decode base64 encoded data into an Attachment object.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
raw_data: Base64 encoded string data
|
|
28
|
+
context: Context string for filename generation.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Attachment object with decoded data, or None if decoding fails or type is not recognizable
|
|
32
|
+
"""
|
|
33
|
+
if not isinstance(raw_data, str):
|
|
34
|
+
LOGGER.warning("Attachment data is not a string, skipping.")
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
# Decode base64 string to bytes
|
|
39
|
+
decoded_bytes = base64.b64decode(raw_data, validate=True)
|
|
40
|
+
|
|
41
|
+
# Detect MIME type from content
|
|
42
|
+
mime_type = decoder_helpers.detect_mime_type(decoded_bytes)
|
|
43
|
+
|
|
44
|
+
# Skip if not a recognizable file type
|
|
45
|
+
if not mime_type or mime_type in ("application/octet-stream", "text/plain"):
|
|
46
|
+
LOGGER.debug("Attachment type is not recognized, skipping.")
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
# Get file extension from the MIME type
|
|
50
|
+
extension = decoder_helpers.get_file_extension(mime_type)
|
|
51
|
+
|
|
52
|
+
# Generate filename
|
|
53
|
+
file_name = decoder_helpers.create_attachment_filename(
|
|
54
|
+
context, extension=extension
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Save decoded bytes to a temporary file
|
|
58
|
+
temp_file = tempfile.NamedTemporaryFile(
|
|
59
|
+
mode="wb", delete=False, suffix=extension
|
|
60
|
+
)
|
|
61
|
+
temp_file.write(decoded_bytes)
|
|
62
|
+
temp_file.flush()
|
|
63
|
+
temp_file.close()
|
|
64
|
+
|
|
65
|
+
# Return Attachment object with a file path
|
|
66
|
+
return attachment.Attachment(
|
|
67
|
+
data=temp_file.name, file_name=file_name, content_type=mime_type
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
except (ValueError, binascii.Error) as e:
|
|
71
|
+
LOGGER.debug(
|
|
72
|
+
"Failed to decode attachment data, reason: invalid base64. Reason: %s",
|
|
73
|
+
e,
|
|
74
|
+
exc_info=True,
|
|
75
|
+
)
|
|
76
|
+
# Not valid base64, return None
|
|
77
|
+
return None
|
|
78
|
+
except Exception as ex:
|
|
79
|
+
LOGGER.warning(
|
|
80
|
+
"Failed to decode attachment data, reason: %s", ex, exc_info=True
|
|
81
|
+
)
|
|
82
|
+
# Unexpected error, return None to avoid crashing the pipeline
|
|
83
|
+
return None
|