opik 1.6.4__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +33 -2
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/__init__.py +5 -0
- opik/api_objects/attachment/attachment.py +20 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +220 -0
- opik/api_objects/attachment/converters.py +51 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/conversation/__init__.py +0 -0
- opik/api_objects/conversation/conversation_factory.py +43 -0
- opik/api_objects/conversation/conversation_thread.py +49 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +107 -45
- opik/api_objects/dataset/rest_operations.py +12 -3
- opik/api_objects/experiment/experiment.py +81 -45
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +88 -19
- opik/api_objects/helpers.py +104 -7
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +872 -174
- opik/api_objects/opik_query_language.py +136 -18
- opik/api_objects/optimization/__init__.py +3 -0
- opik/api_objects/optimization/optimization.py +39 -0
- opik/api_objects/prompt/__init__.py +13 -1
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +193 -41
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/text/prompt_template.py +55 -0
- opik/api_objects/prompt/types.py +29 -0
- opik/api_objects/rest_stream_parser.py +98 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_client.py +165 -45
- opik/api_objects/span/span_data.py +136 -25
- opik/api_objects/threads/__init__.py +0 -0
- opik/api_objects/threads/threads_client.py +185 -0
- opik/api_objects/trace/trace_client.py +72 -36
- opik/api_objects/trace/trace_data.py +112 -26
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +62 -4
- opik/configurator/configure.py +45 -6
- opik/configurator/opik_rest_helpers.py +4 -1
- opik/context_storage.py +164 -65
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +298 -146
- opik/decorator/context_manager/__init__.py +0 -0
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/generator_wrappers.py +3 -2
- opik/decorator/inspect_helpers.py +11 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +49 -21
- opik/decorator/tracker.py +9 -1
- opik/dict_utils.py +3 -3
- opik/environment.py +13 -1
- opik/error_tracking/api.py +1 -1
- opik/error_tracking/before_send.py +6 -5
- opik/error_tracking/environment_details.py +29 -7
- opik/error_tracking/error_filtering/filter_by_response_status_code.py +42 -0
- opik/error_tracking/error_filtering/filter_chain_builder.py +14 -3
- opik/evaluation/__init__.py +14 -2
- opik/evaluation/engine/engine.py +280 -82
- opik/evaluation/engine/evaluation_tasks_executor.py +15 -10
- opik/evaluation/engine/helpers.py +34 -9
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/engine/types.py +5 -4
- opik/evaluation/evaluation_result.py +169 -2
- opik/evaluation/evaluator.py +659 -58
- opik/evaluation/metrics/__init__.py +121 -6
- opik/evaluation/metrics/aggregated_metric.py +92 -0
- opik/evaluation/metrics/arguments_helpers.py +15 -21
- opik/evaluation/metrics/arguments_validator.py +38 -0
- opik/evaluation/metrics/base_metric.py +20 -10
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +79 -0
- opik/evaluation/metrics/conversation/conversation_turns_factory.py +39 -0
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +84 -0
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/__init__.py +0 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +274 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/schema.py +16 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/templates.py +95 -0
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/__init__.py +0 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +295 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/schema.py +22 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/templates.py +139 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +277 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/schema.py +16 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/templates.py +135 -0
- opik/evaluation/metrics/conversation/types.py +34 -0
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +43 -16
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +50 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/equals.py +4 -1
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/is_json.py +9 -3
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/levenshtein_ratio.py +6 -5
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/regex_match.py +4 -1
- opik/evaluation/metrics/heuristics/rouge.py +148 -0
- opik/evaluation/metrics/heuristics/sentiment.py +98 -0
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +27 -30
- opik/evaluation/metrics/llm_judges/answer_relevance/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/templates.py +10 -10
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +28 -31
- opik/evaluation/metrics/llm_judges/context_precision/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/context_precision/template.py +7 -7
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +27 -31
- opik/evaluation/metrics/llm_judges/context_recall/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/context_recall/template.py +7 -7
- opik/evaluation/metrics/llm_judges/factuality/metric.py +7 -26
- opik/evaluation/metrics/llm_judges/factuality/parser.py +35 -0
- opik/evaluation/metrics/llm_judges/factuality/template.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +244 -113
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +161 -0
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +23 -27
- opik/evaluation/metrics/llm_judges/hallucination/parser.py +29 -0
- opik/evaluation/metrics/llm_judges/hallucination/template.py +2 -4
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +23 -28
- opik/evaluation/metrics/llm_judges/moderation/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/moderation/template.py +2 -2
- opik/evaluation/metrics/llm_judges/parsing_helpers.py +26 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +171 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/parser.py +38 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/templates.py +65 -0
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +23 -32
- opik/evaluation/metrics/llm_judges/usefulness/parser.py +28 -0
- opik/evaluation/metrics/ragas_metric.py +112 -0
- opik/evaluation/models/__init__.py +10 -0
- opik/evaluation/models/base_model.py +140 -18
- opik/evaluation/models/langchain/__init__.py +3 -0
- opik/evaluation/models/langchain/langchain_chat_model.py +166 -0
- opik/evaluation/models/langchain/message_converters.py +106 -0
- opik/evaluation/models/langchain/opik_monitoring.py +23 -0
- opik/evaluation/models/litellm/litellm_chat_model.py +186 -40
- opik/evaluation/models/litellm/opik_monitor.py +24 -21
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/__init__.py +0 -0
- opik/evaluation/threads/context_helper.py +32 -0
- opik/evaluation/threads/evaluation_engine.py +181 -0
- opik/evaluation/threads/evaluation_result.py +18 -0
- opik/evaluation/threads/evaluator.py +120 -0
- opik/evaluation/threads/helpers.py +51 -0
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +116 -3
- opik/file_upload/__init__.py +0 -0
- opik/file_upload/base_upload_manager.py +39 -0
- opik/file_upload/file_upload_monitor.py +14 -0
- opik/file_upload/file_uploader.py +141 -0
- opik/file_upload/mime_type.py +9 -0
- opik/file_upload/s3_multipart_upload/__init__.py +0 -0
- opik/file_upload/s3_multipart_upload/file_parts_strategy.py +89 -0
- opik/file_upload/s3_multipart_upload/s3_file_uploader.py +86 -0
- opik/file_upload/s3_multipart_upload/s3_upload_error.py +29 -0
- opik/file_upload/thread_pool.py +17 -0
- opik/file_upload/upload_client.py +114 -0
- opik/file_upload/upload_manager.py +255 -0
- opik/file_upload/upload_options.py +37 -0
- opik/format_helpers.py +17 -0
- opik/guardrails/__init__.py +4 -0
- opik/guardrails/guardrail.py +157 -0
- opik/guardrails/guards/__init__.py +5 -0
- opik/guardrails/guards/guard.py +17 -0
- opik/guardrails/guards/pii.py +47 -0
- opik/guardrails/guards/topic.py +76 -0
- opik/guardrails/rest_api_client.py +34 -0
- opik/guardrails/schemas.py +24 -0
- opik/guardrails/tracing.py +61 -0
- opik/healthcheck/__init__.py +2 -1
- opik/healthcheck/checks.py +2 -2
- opik/healthcheck/rich_representation.py +1 -1
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +75 -4
- opik/id_helpers.py +18 -0
- opik/integrations/adk/__init__.py +14 -0
- opik/integrations/adk/callback_context_info_extractors.py +32 -0
- opik/integrations/adk/graph/__init__.py +0 -0
- opik/integrations/adk/graph/mermaid_graph_builder.py +128 -0
- opik/integrations/adk/graph/nodes.py +101 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +41 -0
- opik/integrations/adk/helpers.py +48 -0
- opik/integrations/adk/legacy_opik_tracer.py +381 -0
- opik/integrations/adk/opik_tracer.py +370 -0
- opik/integrations/adk/patchers/__init__.py +4 -0
- opik/integrations/adk/patchers/adk_otel_tracer/__init__.py +0 -0
- opik/integrations/adk/patchers/adk_otel_tracer/llm_span_helpers.py +30 -0
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +201 -0
- opik/integrations/adk/patchers/litellm_wrappers.py +91 -0
- opik/integrations/adk/patchers/llm_response_wrapper.py +105 -0
- opik/integrations/adk/patchers/patchers.py +64 -0
- opik/integrations/adk/recursive_callback_injector.py +126 -0
- opik/integrations/aisuite/aisuite_decorator.py +8 -3
- opik/integrations/aisuite/opik_tracker.py +1 -0
- opik/integrations/anthropic/messages_create_decorator.py +8 -3
- opik/integrations/anthropic/opik_tracker.py +0 -1
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +18 -8
- opik/integrations/bedrock/invoke_agent_decorator.py +12 -7
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +43 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +34 -56
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +246 -84
- opik/integrations/dspy/graph.py +88 -0
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/genai/encoder_extension.py +2 -6
- opik/integrations/genai/generate_content_decorator.py +20 -13
- opik/integrations/guardrails/guardrails_decorator.py +4 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/constants.py +35 -0
- opik/integrations/haystack/converters.py +1 -2
- opik/integrations/haystack/opik_connector.py +28 -6
- opik/integrations/haystack/opik_span_bridge.py +284 -0
- opik/integrations/haystack/opik_tracer.py +124 -222
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +2 -2
- opik/integrations/langchain/opik_tracer.py +641 -206
- opik/integrations/langchain/provider_usage_extractors/__init__.py +5 -0
- opik/integrations/langchain/provider_usage_extractors/anthropic_usage_extractor.py +101 -0
- opik/integrations/langchain/provider_usage_extractors/anthropic_vertexai_usage_extractor.py +67 -0
- opik/integrations/langchain/provider_usage_extractors/bedrock_usage_extractor.py +94 -0
- opik/integrations/langchain/provider_usage_extractors/google_generative_ai_usage_extractor.py +109 -0
- opik/integrations/langchain/provider_usage_extractors/groq_usage_extractor.py +92 -0
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/__init__.py +15 -0
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +134 -0
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/langchain_usage.py +163 -0
- opik/integrations/langchain/provider_usage_extractors/openai_usage_extractor.py +124 -0
- opik/integrations/langchain/provider_usage_extractors/provider_usage_extractor_protocol.py +29 -0
- opik/integrations/langchain/provider_usage_extractors/usage_extractor.py +48 -0
- opik/integrations/langchain/provider_usage_extractors/vertexai_usage_extractor.py +109 -0
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +179 -78
- opik/integrations/llama_index/event_parsing_utils.py +29 -9
- opik/integrations/openai/agents/opik_tracing_processor.py +204 -32
- opik/integrations/openai/agents/span_data_parsers.py +15 -6
- opik/integrations/openai/chat_completion_chunks_aggregator.py +1 -1
- opik/integrations/openai/{openai_decorator.py → openai_chat_completions_decorator.py} +45 -35
- opik/integrations/openai/openai_responses_decorator.py +158 -0
- opik/integrations/openai/opik_tracker.py +94 -13
- opik/integrations/openai/response_events_aggregator.py +36 -0
- opik/integrations/openai/stream_patchers.py +125 -15
- opik/integrations/sagemaker/auth.py +5 -1
- opik/jsonable_encoder.py +29 -1
- opik/llm_usage/base_original_provider_usage.py +15 -8
- opik/llm_usage/bedrock_usage.py +8 -2
- opik/llm_usage/google_usage.py +6 -1
- opik/llm_usage/llm_usage_info.py +6 -0
- opik/llm_usage/{openai_usage.py → openai_chat_completions_usage.py} +2 -12
- opik/llm_usage/{openai_agent_usage.py → openai_responses_usage.py} +7 -15
- opik/llm_usage/opik_usage.py +36 -10
- opik/llm_usage/opik_usage_factory.py +35 -19
- opik/logging_messages.py +19 -7
- opik/message_processing/arguments_utils.py +22 -0
- opik/message_processing/batching/base_batcher.py +45 -17
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +36 -11
- opik/message_processing/batching/batchers.py +167 -44
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/batching/sequence_splitter.py +50 -5
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/message_queue.py +79 -0
- opik/message_processing/messages.py +154 -12
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/processors/online_message_processor.py +324 -0
- opik/message_processing/queue_consumer.py +61 -13
- opik/message_processing/streamer.py +102 -31
- opik/message_processing/streamer_constructors.py +67 -12
- opik/opik_context.py +103 -11
- opik/plugins/pytest/decorator.py +2 -2
- opik/plugins/pytest/experiment_runner.py +3 -2
- opik/plugins/pytest/hooks.py +6 -4
- opik/rate_limit/__init__.py +0 -0
- opik/rate_limit/rate_limit.py +25 -0
- opik/rest_api/__init__.py +643 -11
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/attachments/__init__.py +17 -0
- opik/rest_api/attachments/client.py +752 -0
- opik/rest_api/attachments/raw_client.py +1125 -0
- opik/rest_api/attachments/types/__init__.py +15 -0
- opik/rest_api/attachments/types/attachment_list_request_entity_type.py +5 -0
- opik/rest_api/attachments/types/download_attachment_request_entity_type.py +5 -0
- opik/rest_api/attachments/types/start_multipart_upload_request_entity_type.py +5 -0
- opik/rest_api/attachments/types/upload_attachment_request_entity_type.py +5 -0
- opik/rest_api/automation_rule_evaluators/__init__.py +2 -0
- opik/rest_api/automation_rule_evaluators/client.py +182 -1162
- opik/rest_api/automation_rule_evaluators/raw_client.py +598 -0
- opik/rest_api/chat_completions/__init__.py +2 -0
- opik/rest_api/chat_completions/client.py +115 -149
- opik/rest_api/chat_completions/raw_client.py +339 -0
- opik/rest_api/check/__init__.py +2 -0
- opik/rest_api/check/client.py +88 -106
- opik/rest_api/check/raw_client.py +258 -0
- opik/rest_api/client.py +112 -212
- opik/rest_api/core/__init__.py +5 -0
- opik/rest_api/core/api_error.py +12 -6
- opik/rest_api/core/client_wrapper.py +4 -14
- opik/rest_api/core/datetime_utils.py +1 -3
- opik/rest_api/core/file.py +2 -5
- opik/rest_api/core/http_client.py +42 -120
- opik/rest_api/core/http_response.py +55 -0
- opik/rest_api/core/jsonable_encoder.py +1 -4
- opik/rest_api/core/pydantic_utilities.py +79 -147
- opik/rest_api/core/query_encoder.py +1 -3
- opik/rest_api/core/serialization.py +10 -10
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/__init__.py +5 -0
- opik/rest_api/datasets/client.py +1638 -1091
- opik/rest_api/datasets/raw_client.py +3389 -0
- opik/rest_api/datasets/types/__init__.py +8 -0
- opik/rest_api/datasets/types/dataset_update_visibility.py +5 -0
- opik/rest_api/datasets/types/dataset_write_visibility.py +5 -0
- opik/rest_api/errors/__init__.py +2 -0
- opik/rest_api/errors/bad_request_error.py +4 -3
- opik/rest_api/errors/conflict_error.py +4 -3
- opik/rest_api/errors/forbidden_error.py +4 -2
- opik/rest_api/errors/not_found_error.py +4 -3
- opik/rest_api/errors/not_implemented_error.py +4 -3
- opik/rest_api/errors/unauthorized_error.py +4 -3
- opik/rest_api/errors/unprocessable_entity_error.py +4 -3
- opik/rest_api/experiments/__init__.py +5 -0
- opik/rest_api/experiments/client.py +676 -752
- opik/rest_api/experiments/raw_client.py +1872 -0
- opik/rest_api/experiments/types/__init__.py +10 -0
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/experiments/types/experiment_write_type.py +5 -0
- opik/rest_api/feedback_definitions/__init__.py +2 -0
- opik/rest_api/feedback_definitions/client.py +96 -370
- opik/rest_api/feedback_definitions/raw_client.py +541 -0
- opik/rest_api/feedback_definitions/types/__init__.py +2 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -3
- opik/rest_api/guardrails/__init__.py +4 -0
- opik/rest_api/guardrails/client.py +104 -0
- opik/rest_api/guardrails/raw_client.py +102 -0
- opik/rest_api/llm_provider_key/__init__.py +2 -0
- opik/rest_api/llm_provider_key/client.py +166 -440
- opik/rest_api/llm_provider_key/raw_client.py +643 -0
- opik/rest_api/llm_provider_key/types/__init__.py +2 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/open_telemetry_ingestion/__init__.py +2 -0
- opik/rest_api/open_telemetry_ingestion/client.py +38 -63
- opik/rest_api/open_telemetry_ingestion/raw_client.py +88 -0
- opik/rest_api/optimizations/__init__.py +7 -0
- opik/rest_api/optimizations/client.py +704 -0
- opik/rest_api/optimizations/raw_client.py +920 -0
- opik/rest_api/optimizations/types/__init__.py +7 -0
- opik/rest_api/optimizations/types/optimization_update_status.py +7 -0
- opik/rest_api/projects/__init__.py +10 -1
- opik/rest_api/projects/client.py +180 -855
- opik/rest_api/projects/raw_client.py +1216 -0
- opik/rest_api/projects/types/__init__.py +11 -4
- opik/rest_api/projects/types/project_metric_request_public_interval.py +1 -3
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +11 -1
- opik/rest_api/projects/types/project_update_visibility.py +5 -0
- opik/rest_api/projects/types/project_write_visibility.py +5 -0
- opik/rest_api/prompts/__init__.py +4 -2
- opik/rest_api/prompts/client.py +381 -970
- opik/rest_api/prompts/raw_client.py +1634 -0
- opik/rest_api/prompts/types/__init__.py +5 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/raw_client.py +156 -0
- opik/rest_api/redirect/__init__.py +4 -0
- opik/rest_api/redirect/client.py +375 -0
- opik/rest_api/redirect/raw_client.py +566 -0
- opik/rest_api/service_toggles/__init__.py +4 -0
- opik/rest_api/service_toggles/client.py +91 -0
- opik/rest_api/service_toggles/raw_client.py +93 -0
- opik/rest_api/spans/__init__.py +2 -0
- opik/rest_api/spans/client.py +659 -1354
- opik/rest_api/spans/raw_client.py +2383 -0
- opik/rest_api/spans/types/__init__.py +2 -0
- opik/rest_api/spans/types/find_feedback_score_names_1_request_type.py +1 -3
- opik/rest_api/spans/types/get_span_stats_request_type.py +1 -3
- opik/rest_api/spans/types/get_spans_by_project_request_type.py +1 -3
- opik/rest_api/spans/types/span_search_stream_request_public_type.py +1 -3
- opik/rest_api/system_usage/__init__.py +2 -0
- opik/rest_api/system_usage/client.py +157 -216
- opik/rest_api/system_usage/raw_client.py +455 -0
- opik/rest_api/traces/__init__.py +2 -0
- opik/rest_api/traces/client.py +2102 -1625
- opik/rest_api/traces/raw_client.py +4144 -0
- opik/rest_api/types/__init__.py +629 -24
- opik/rest_api/types/aggregation_data.py +27 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/{json_schema_element.py → annotation_queue_item_ids.py} +5 -7
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/{workspace_metadata.py → annotation_queue_reviewer.py} +6 -7
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/assistant_message.py +7 -8
- opik/rest_api/types/assistant_message_role.py +1 -3
- opik/rest_api/types/attachment.py +22 -0
- opik/rest_api/types/attachment_page.py +28 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +160 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +143 -0
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_write.py +143 -0
- opik/rest_api/types/avg_value_stat_public.py +3 -5
- opik/rest_api/types/batch_delete.py +3 -5
- opik/rest_api/types/batch_delete_by_project.py +20 -0
- opik/rest_api/types/bi_information.py +3 -5
- opik/rest_api/types/bi_information_response.py +4 -6
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/categorical_feedback_definition.py +5 -7
- opik/rest_api/types/categorical_feedback_definition_create.py +4 -6
- opik/rest_api/types/categorical_feedback_definition_public.py +5 -7
- opik/rest_api/types/categorical_feedback_definition_update.py +4 -6
- opik/rest_api/types/categorical_feedback_detail.py +3 -5
- opik/rest_api/types/categorical_feedback_detail_create.py +3 -5
- opik/rest_api/types/categorical_feedback_detail_public.py +3 -5
- opik/rest_api/types/categorical_feedback_detail_update.py +3 -5
- opik/rest_api/types/chat_completion_choice.py +4 -6
- opik/rest_api/types/chat_completion_response.py +5 -6
- opik/rest_api/types/check.py +22 -0
- opik/rest_api/types/{json_node_compare.py → check_name.py} +1 -1
- opik/rest_api/types/check_public.py +22 -0
- opik/rest_api/types/check_public_name.py +5 -0
- opik/rest_api/types/check_public_result.py +5 -0
- opik/rest_api/types/check_result.py +5 -0
- opik/rest_api/types/chunked_output_json_node.py +4 -6
- opik/rest_api/types/chunked_output_json_node_public.py +4 -6
- opik/rest_api/types/chunked_output_json_node_public_type.py +6 -10
- opik/rest_api/types/chunked_output_json_node_type.py +6 -10
- opik/rest_api/types/column.py +8 -10
- opik/rest_api/types/column_compare.py +8 -10
- opik/rest_api/types/column_public.py +8 -10
- opik/rest_api/types/column_types_item.py +1 -3
- opik/rest_api/types/comment.py +4 -6
- opik/rest_api/types/comment_compare.py +4 -6
- opik/rest_api/types/comment_public.py +4 -6
- opik/rest_api/types/complete_multipart_upload_request.py +33 -0
- opik/rest_api/types/complete_multipart_upload_request_entity_type.py +5 -0
- opik/rest_api/types/completion_tokens_details.py +3 -5
- opik/rest_api/types/count_value_stat_public.py +3 -5
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/data_point_double.py +21 -0
- opik/rest_api/types/data_point_number_public.py +3 -5
- opik/rest_api/types/dataset.py +14 -6
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +9 -8
- opik/rest_api/types/dataset_item_batch.py +3 -5
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +9 -8
- opik/rest_api/types/dataset_item_compare_source.py +1 -3
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +10 -7
- opik/rest_api/types/dataset_item_page_public.py +10 -7
- opik/rest_api/types/dataset_item_public.py +9 -8
- opik/rest_api/types/dataset_item_public_source.py +1 -3
- opik/rest_api/types/dataset_item_source.py +1 -3
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +5 -6
- opik/rest_api/types/dataset_item_write_source.py +1 -3
- opik/rest_api/types/dataset_page_public.py +9 -6
- opik/rest_api/types/dataset_public.py +14 -6
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_public_visibility.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/dataset_visibility.py +5 -0
- opik/rest_api/types/delete_attachments_request.py +23 -0
- opik/rest_api/types/delete_attachments_request_entity_type.py +5 -0
- opik/rest_api/types/delete_feedback_score.py +4 -5
- opik/rest_api/types/delete_ids_holder.py +19 -0
- opik/rest_api/types/delta.py +7 -9
- opik/rest_api/types/error_count_with_deviation.py +21 -0
- opik/rest_api/types/error_count_with_deviation_detailed.py +21 -0
- opik/rest_api/types/error_info.py +3 -5
- opik/rest_api/types/error_info_experiment_item_bulk_write_view.py +21 -0
- opik/rest_api/types/error_info_public.py +3 -5
- opik/rest_api/types/error_info_write.py +3 -5
- opik/rest_api/types/error_message.py +3 -5
- opik/rest_api/types/error_message_detail.py +3 -5
- opik/rest_api/types/error_message_detailed.py +3 -5
- opik/rest_api/types/error_message_public.py +3 -5
- opik/rest_api/types/experiment.py +21 -10
- opik/rest_api/types/experiment_group_aggregations_response.py +20 -0
- opik/rest_api/types/experiment_group_response.py +22 -0
- opik/rest_api/types/experiment_item.py +14 -11
- opik/rest_api/types/experiment_item_bulk_record.py +27 -0
- opik/rest_api/types/experiment_item_bulk_record_experiment_item_bulk_write_view.py +27 -0
- opik/rest_api/types/experiment_item_bulk_upload.py +27 -0
- opik/rest_api/types/experiment_item_compare.py +14 -11
- opik/rest_api/types/experiment_item_compare_trace_visibility_mode.py +5 -0
- opik/rest_api/types/experiment_item_public.py +6 -6
- opik/rest_api/types/experiment_item_public_trace_visibility_mode.py +5 -0
- opik/rest_api/types/experiment_item_trace_visibility_mode.py +5 -0
- opik/rest_api/types/experiment_page_public.py +9 -6
- opik/rest_api/types/experiment_public.py +21 -10
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_public_type.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/experiment_type.py +5 -0
- opik/rest_api/types/export_trace_service_request.py +5 -0
- opik/rest_api/types/feedback.py +40 -27
- opik/rest_api/types/feedback_create.py +27 -13
- opik/rest_api/types/feedback_definition_page_public.py +4 -6
- opik/rest_api/types/feedback_object_public.py +40 -27
- opik/rest_api/types/feedback_public.py +40 -27
- opik/rest_api/types/feedback_score.py +7 -7
- opik/rest_api/types/feedback_score_average.py +3 -5
- opik/rest_api/types/feedback_score_average_detailed.py +3 -5
- opik/rest_api/types/feedback_score_average_public.py +3 -5
- opik/rest_api/types/feedback_score_batch.py +4 -6
- opik/rest_api/types/feedback_score_batch_item.py +6 -6
- opik/rest_api/types/feedback_score_batch_item_source.py +1 -3
- opik/rest_api/types/feedback_score_batch_item_thread.py +32 -0
- opik/rest_api/types/feedback_score_batch_item_thread_source.py +5 -0
- opik/rest_api/types/feedback_score_compare.py +7 -7
- opik/rest_api/types/feedback_score_compare_source.py +1 -3
- opik/rest_api/types/feedback_score_experiment_item_bulk_write_view.py +31 -0
- opik/rest_api/types/feedback_score_experiment_item_bulk_write_view_source.py +5 -0
- opik/rest_api/types/feedback_score_names.py +4 -6
- opik/rest_api/types/feedback_score_public.py +11 -7
- opik/rest_api/types/feedback_score_public_source.py +1 -3
- opik/rest_api/types/feedback_score_source.py +1 -3
- opik/rest_api/types/feedback_update.py +27 -13
- opik/rest_api/types/function.py +4 -7
- opik/rest_api/types/function_call.py +3 -5
- opik/rest_api/types/group_content.py +19 -0
- opik/rest_api/types/group_content_with_aggregations.py +21 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +34 -0
- opik/rest_api/types/guardrail_batch.py +20 -0
- opik/rest_api/types/guardrail_name.py +5 -0
- opik/rest_api/types/guardrail_result.py +5 -0
- opik/rest_api/types/guardrail_write.py +33 -0
- opik/rest_api/types/guardrail_write_name.py +5 -0
- opik/rest_api/types/guardrail_write_result.py +5 -0
- opik/rest_api/types/guardrails_validation.py +21 -0
- opik/rest_api/types/guardrails_validation_public.py +21 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/json_list_string.py +7 -0
- opik/rest_api/types/json_list_string_compare.py +7 -0
- opik/rest_api/types/json_list_string_experiment_item_bulk_write_view.py +7 -0
- opik/rest_api/types/json_list_string_public.py +7 -0
- opik/rest_api/types/json_list_string_write.py +7 -0
- opik/rest_api/types/json_schema.py +5 -8
- opik/rest_api/types/llm_as_judge_code.py +8 -12
- opik/rest_api/types/llm_as_judge_code_public.py +8 -12
- opik/rest_api/types/llm_as_judge_code_write.py +8 -12
- opik/rest_api/types/llm_as_judge_message.py +9 -7
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +9 -7
- opik/rest_api/types/llm_as_judge_message_public_role.py +1 -1
- opik/rest_api/types/llm_as_judge_message_role.py +1 -1
- opik/rest_api/types/llm_as_judge_message_write.py +9 -7
- opik/rest_api/types/llm_as_judge_message_write_role.py +1 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +6 -5
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +6 -5
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +6 -5
- opik/rest_api/types/llm_as_judge_output_schema.py +4 -6
- opik/rest_api/types/llm_as_judge_output_schema_public.py +4 -6
- opik/rest_api/types/llm_as_judge_output_schema_public_type.py +1 -3
- opik/rest_api/types/llm_as_judge_output_schema_type.py +1 -3
- opik/rest_api/types/llm_as_judge_output_schema_write.py +4 -6
- opik/rest_api/types/llm_as_judge_output_schema_write_type.py +1 -3
- opik/rest_api/types/log_item.py +5 -7
- opik/rest_api/types/log_item_level.py +1 -3
- opik/rest_api/types/log_page.py +4 -6
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/multipart_upload_part.py +20 -0
- opik/rest_api/types/numerical_feedback_definition.py +5 -7
- opik/rest_api/types/numerical_feedback_definition_create.py +4 -6
- opik/rest_api/types/numerical_feedback_definition_public.py +5 -7
- opik/rest_api/types/numerical_feedback_definition_update.py +4 -6
- opik/rest_api/types/numerical_feedback_detail.py +3 -5
- opik/rest_api/types/numerical_feedback_detail_create.py +3 -5
- opik/rest_api/types/numerical_feedback_detail_public.py +3 -5
- opik/rest_api/types/numerical_feedback_detail_update.py +3 -5
- opik/rest_api/types/optimization.py +37 -0
- opik/rest_api/types/optimization_page_public.py +28 -0
- opik/rest_api/types/optimization_public.py +37 -0
- opik/rest_api/types/optimization_public_status.py +7 -0
- opik/rest_api/types/optimization_status.py +7 -0
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +30 -0
- opik/rest_api/types/optimization_write_status.py +7 -0
- opik/rest_api/types/page_columns.py +4 -6
- opik/rest_api/types/percentage_value_stat_public.py +4 -6
- opik/rest_api/types/percentage_values.py +8 -16
- opik/rest_api/types/percentage_values_detailed.py +8 -16
- opik/rest_api/types/percentage_values_public.py +8 -16
- opik/rest_api/types/project.py +12 -7
- opik/rest_api/types/project_detailed.py +12 -7
- opik/rest_api/types/project_detailed_visibility.py +5 -0
- opik/rest_api/types/project_metric_response_public.py +5 -9
- opik/rest_api/types/project_metric_response_public_interval.py +1 -3
- opik/rest_api/types/project_metric_response_public_metric_type.py +11 -1
- opik/rest_api/types/project_page_public.py +8 -10
- opik/rest_api/types/project_public.py +6 -6
- opik/rest_api/types/project_public_visibility.py +5 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stat_item_object_public.py +8 -17
- opik/rest_api/types/project_stats_public.py +4 -6
- opik/rest_api/types/project_stats_summary.py +4 -6
- opik/rest_api/types/project_stats_summary_item.py +9 -6
- opik/rest_api/types/project_visibility.py +5 -0
- opik/rest_api/types/prompt.py +12 -7
- opik/rest_api/types/prompt_detail.py +12 -7
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_page_public.py +9 -6
- opik/rest_api/types/prompt_public.py +11 -6
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_tokens_details.py +19 -0
- opik/rest_api/types/prompt_version.py +7 -6
- opik/rest_api/types/prompt_version_detail.py +7 -6
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +4 -5
- opik/rest_api/types/prompt_version_link_public.py +4 -5
- opik/rest_api/types/prompt_version_link_write.py +3 -5
- opik/rest_api/types/prompt_version_page_public.py +9 -6
- opik/rest_api/types/prompt_version_public.py +7 -6
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +18 -8
- opik/rest_api/types/provider_api_key_page_public.py +27 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +18 -8
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/response_format.py +5 -7
- opik/rest_api/types/response_format_type.py +1 -3
- opik/rest_api/types/result.py +21 -0
- opik/rest_api/types/results_number_public.py +4 -6
- opik/rest_api/types/score_name.py +4 -5
- opik/rest_api/types/service_toggles_config.py +44 -0
- opik/rest_api/types/span.py +13 -15
- opik/rest_api/types/span_batch.py +4 -6
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +39 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view_type.py +5 -0
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_public.py +4 -6
- opik/rest_api/types/span_filter_public_operator.py +2 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_page_public.py +9 -6
- opik/rest_api/types/span_public.py +19 -16
- opik/rest_api/types/span_public_type.py +1 -1
- opik/rest_api/types/span_type.py +1 -1
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_update_type.py +5 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +13 -14
- opik/rest_api/types/span_write_type.py +1 -1
- opik/rest_api/types/spans_count_response.py +20 -0
- opik/rest_api/types/start_multipart_upload_response.py +20 -0
- opik/rest_api/types/stream_options.py +3 -5
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/tool.py +4 -6
- opik/rest_api/types/tool_call.py +4 -6
- opik/rest_api/types/trace.py +26 -12
- opik/rest_api/types/trace_batch.py +4 -6
- opik/rest_api/types/trace_count_response.py +4 -6
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +41 -0
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_public.py +23 -0
- opik/rest_api/types/trace_filter_public_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_page_public.py +8 -10
- opik/rest_api/types/trace_public.py +27 -13
- opik/rest_api/types/trace_public_visibility_mode.py +5 -0
- opik/rest_api/types/trace_thread.py +18 -9
- opik/rest_api/types/trace_thread_filter.py +23 -0
- opik/rest_api/types/trace_thread_filter_operator.py +21 -0
- opik/rest_api/types/trace_thread_filter_public.py +23 -0
- opik/rest_api/types/trace_thread_filter_public_operator.py +21 -0
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +22 -0
- opik/rest_api/types/trace_thread_llm_as_judge_code.py +26 -0
- opik/rest_api/types/trace_thread_llm_as_judge_code_public.py +26 -0
- opik/rest_api/types/trace_thread_llm_as_judge_code_write.py +26 -0
- opik/rest_api/types/trace_thread_page.py +9 -6
- opik/rest_api/types/trace_thread_status.py +5 -0
- opik/rest_api/types/trace_thread_update.py +19 -0
- opik/rest_api/types/trace_thread_user_defined_metric_python_code.py +19 -0
- opik/rest_api/types/trace_thread_user_defined_metric_python_code_public.py +19 -0
- opik/rest_api/types/trace_thread_user_defined_metric_python_code_write.py +19 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_visibility_mode.py +5 -0
- opik/rest_api/types/trace_write.py +10 -11
- opik/rest_api/types/usage.py +6 -6
- opik/rest_api/types/user_defined_metric_python_code.py +3 -5
- opik/rest_api/types/user_defined_metric_python_code_public.py +3 -5
- opik/rest_api/types/user_defined_metric_python_code_write.py +3 -5
- opik/rest_api/types/value_entry.py +27 -0
- opik/rest_api/types/value_entry_compare.py +27 -0
- opik/rest_api/types/value_entry_compare_source.py +5 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +27 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view_source.py +5 -0
- opik/rest_api/types/value_entry_public.py +27 -0
- opik/rest_api/types/value_entry_public_source.py +5 -0
- opik/rest_api/types/value_entry_source.py +5 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +27 -0
- opik/rest_api/types/workspace_metric_request.py +24 -0
- opik/rest_api/types/workspace_metric_response.py +20 -0
- opik/rest_api/types/workspace_metrics_summary_request.py +23 -0
- opik/rest_api/types/workspace_metrics_summary_response.py +20 -0
- opik/rest_api/types/workspace_name_holder.py +19 -0
- opik/rest_api/types/workspace_spans_count.py +20 -0
- opik/rest_api/types/workspace_trace_count.py +3 -5
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/__init__.py +2 -0
- opik/rest_api/workspaces/client.py +550 -77
- opik/rest_api/workspaces/raw_client.py +923 -0
- opik/rest_client_configurator/api.py +1 -0
- opik/rest_client_configurator/retry_decorator.py +1 -0
- opik/s3_httpx_client.py +67 -0
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +11 -24
- opik/tracing_runtime_config.py +48 -0
- opik/types.py +48 -2
- opik/url_helpers.py +13 -3
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +4 -5
- opik/validation/parameter.py +122 -0
- opik/validation/parameters_validator.py +175 -0
- opik/validation/validator.py +30 -2
- opik/validation/validator_helpers.py +147 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/WHEEL +1 -1
- opik-1.9.71.dist-info/licenses/LICENSE +203 -0
- opik/api_objects/prompt/prompt.py +0 -107
- opik/api_objects/prompt/prompt_template.py +0 -35
- opik/cli.py +0 -193
- opik/evaluation/metrics/models.py +0 -8
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/integrations/langchain/google_run_helpers.py +0 -75
- opik/integrations/langchain/openai_run_helpers.py +0 -122
- opik/message_processing/message_processors.py +0 -203
- opik/rest_api/types/delta_role.py +0 -7
- opik/rest_api/types/json_object_schema.py +0 -34
- opik-1.6.4.dist-info/METADATA +0 -270
- opik-1.6.4.dist-info/RECORD +0 -507
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
import warnings
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import Any, Iterator, List, Optional, Tuple, Union
|
|
2
4
|
|
|
3
5
|
from opik.exceptions import MetricComputationError
|
|
4
6
|
from opik.evaluation.metrics import base_metric, score_result
|
|
@@ -15,8 +17,12 @@ class BaseBLEU(base_metric.BaseMetric):
|
|
|
15
17
|
and weights initialization. This class is not intended to be used directly.
|
|
16
18
|
|
|
17
19
|
References:
|
|
18
|
-
-
|
|
19
|
-
https://
|
|
20
|
+
- BLEU: Papineni et al., "BLEU: a Method for Automatic Evaluation of Machine Translation" (ACL 2002)
|
|
21
|
+
https://aclanthology.org/P02-1040/
|
|
22
|
+
- NLTK BLEU documentation
|
|
23
|
+
https://www.nltk.org/api/nltk.translate.bleu_score.html
|
|
24
|
+
- Hugging Face Evaluate: BLEU metric overview
|
|
25
|
+
https://huggingface.co/spaces/evaluate-metric/bleu
|
|
20
26
|
|
|
21
27
|
Args:
|
|
22
28
|
name: The name of the metric (e.g. "sentence_bleu_metric" or "corpus_bleu_metric").
|
|
@@ -25,6 +31,8 @@ class BaseBLEU(base_metric.BaseMetric):
|
|
|
25
31
|
smoothing_method: One of NLTK's SmoothingFunction methods (e.g. "method0", "method1", etc.).
|
|
26
32
|
weights: Optional custom weights for n-gram orders. Must sum to 1.0. If None,
|
|
27
33
|
defaults to uniform distribution across `n_grams`.
|
|
34
|
+
project_name: Optional project name to track the metric in for the cases when
|
|
35
|
+
there are no parent span/trace to inherit project name from.
|
|
28
36
|
"""
|
|
29
37
|
|
|
30
38
|
def __init__(
|
|
@@ -34,8 +42,9 @@ class BaseBLEU(base_metric.BaseMetric):
|
|
|
34
42
|
n_grams: int,
|
|
35
43
|
smoothing_method: str,
|
|
36
44
|
weights: Optional[List[float]],
|
|
45
|
+
project_name: Optional[str],
|
|
37
46
|
):
|
|
38
|
-
super().__init__(name=name, track=track)
|
|
47
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
39
48
|
|
|
40
49
|
if nltk_bleu_score is None:
|
|
41
50
|
raise ImportError(
|
|
@@ -70,6 +79,18 @@ class BaseBLEU(base_metric.BaseMetric):
|
|
|
70
79
|
return tuple(normalized)
|
|
71
80
|
|
|
72
81
|
|
|
82
|
+
@contextmanager
|
|
83
|
+
def _suppress_bleu_warnings() -> Iterator[None]:
|
|
84
|
+
with warnings.catch_warnings():
|
|
85
|
+
warnings.filterwarnings(
|
|
86
|
+
"ignore",
|
|
87
|
+
message=r"The hypothesis contains 0 counts of 2-gram overlaps\.",
|
|
88
|
+
category=UserWarning,
|
|
89
|
+
module="nltk\\.translate\\.bleu_score",
|
|
90
|
+
)
|
|
91
|
+
yield
|
|
92
|
+
|
|
93
|
+
|
|
73
94
|
class SentenceBLEU(BaseBLEU):
|
|
74
95
|
"""
|
|
75
96
|
Computes sentence-level BLEU for a single candidate string vs. one or more references.
|
|
@@ -89,6 +110,7 @@ class SentenceBLEU(BaseBLEU):
|
|
|
89
110
|
n_grams: int = 4,
|
|
90
111
|
smoothing_method: str = "method1",
|
|
91
112
|
weights: Optional[List[float]] = None,
|
|
113
|
+
project_name: Optional[str] = None,
|
|
92
114
|
):
|
|
93
115
|
super().__init__(
|
|
94
116
|
name=name,
|
|
@@ -96,6 +118,7 @@ class SentenceBLEU(BaseBLEU):
|
|
|
96
118
|
n_grams=n_grams,
|
|
97
119
|
smoothing_method=smoothing_method,
|
|
98
120
|
weights=weights,
|
|
121
|
+
project_name=project_name,
|
|
99
122
|
)
|
|
100
123
|
|
|
101
124
|
def score(
|
|
@@ -146,12 +169,13 @@ class SentenceBLEU(BaseBLEU):
|
|
|
146
169
|
smoothing_func = self._get_smoothing_func()
|
|
147
170
|
|
|
148
171
|
try:
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
172
|
+
with _suppress_bleu_warnings():
|
|
173
|
+
bleu_val = nltk_bleu_score.sentence_bleu(
|
|
174
|
+
ref_lists,
|
|
175
|
+
candidate_tokens,
|
|
176
|
+
weights=used_weights,
|
|
177
|
+
smoothing_function=smoothing_func,
|
|
178
|
+
)
|
|
155
179
|
except ZeroDivisionError:
|
|
156
180
|
bleu_val = 0.0
|
|
157
181
|
|
|
@@ -190,6 +214,7 @@ class CorpusBLEU(BaseBLEU):
|
|
|
190
214
|
n_grams: int = 4,
|
|
191
215
|
smoothing_method: str = "method1",
|
|
192
216
|
weights: Optional[List[float]] = None,
|
|
217
|
+
project_name: Optional[str] = None,
|
|
193
218
|
):
|
|
194
219
|
super().__init__(
|
|
195
220
|
name=name,
|
|
@@ -197,6 +222,7 @@ class CorpusBLEU(BaseBLEU):
|
|
|
197
222
|
n_grams=n_grams,
|
|
198
223
|
smoothing_method=smoothing_method,
|
|
199
224
|
weights=weights,
|
|
225
|
+
project_name=project_name,
|
|
200
226
|
)
|
|
201
227
|
|
|
202
228
|
def score(
|
|
@@ -261,12 +287,13 @@ class CorpusBLEU(BaseBLEU):
|
|
|
261
287
|
smoothing_func = self._get_smoothing_func()
|
|
262
288
|
|
|
263
289
|
try:
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
290
|
+
with _suppress_bleu_warnings():
|
|
291
|
+
bleu_val = nltk_bleu_score.corpus_bleu(
|
|
292
|
+
all_references,
|
|
293
|
+
all_candidates,
|
|
294
|
+
weights=used_weights,
|
|
295
|
+
smoothing_function=smoothing_func,
|
|
296
|
+
)
|
|
270
297
|
except ZeroDivisionError:
|
|
271
298
|
bleu_val = 0.0
|
|
272
299
|
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Character n-gram F-score (chrF/chrF++) metric wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Callable, Optional, Sequence, Union
|
|
6
|
+
|
|
7
|
+
from opik.evaluation.metrics.base_metric import BaseMetric
|
|
8
|
+
from opik.evaluation.metrics.score_result import ScoreResult
|
|
9
|
+
from opik.exceptions import MetricComputationError
|
|
10
|
+
|
|
11
|
+
try: # pragma: no cover - optional dependency
|
|
12
|
+
from nltk.translate import chrf_score as nltk_chrf_score
|
|
13
|
+
except ImportError: # pragma: no cover - optional dependency
|
|
14
|
+
nltk_chrf_score = None
|
|
15
|
+
|
|
16
|
+
ChrFFn = Callable[[Sequence[str], Sequence[str]], float]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ChrF(BaseMetric):
|
|
20
|
+
"""
|
|
21
|
+
Compute chrF / chrF++ scores between a candidate string and references.
|
|
22
|
+
|
|
23
|
+
By default the implementation delegates to ``nltk.translate.chrf_score`` and
|
|
24
|
+
supports both chrF (character n-gram overlap) and chrF++ (when ``word_order``
|
|
25
|
+
is non-zero). Scores range from `0.0` (no overlap) to `1.0` (perfect match).
|
|
26
|
+
|
|
27
|
+
References:
|
|
28
|
+
- Popović, "chrF: character n-gram F-score for automatic MT evaluation" (WMT 2015)
|
|
29
|
+
https://aclanthology.org/W15-3049/
|
|
30
|
+
- NLTK chrf_score module documentation
|
|
31
|
+
https://www.nltk.org/api/nltk.translate.chrf_score.html
|
|
32
|
+
- Hugging Face Evaluate: chrF metric overview
|
|
33
|
+
https://huggingface.co/spaces/evaluate-metric/chrf
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Display name for the metric result. Defaults to ``"chrf_metric"``.
|
|
37
|
+
track: Whether to automatically track metric results. Defaults to ``True``.
|
|
38
|
+
project_name: Optional tracking project name. Defaults to ``None``.
|
|
39
|
+
beta: Weighting between precision and recall (``beta = 2`` is standard).
|
|
40
|
+
ignore_whitespace: Whether whitespace is ignored before scoring.
|
|
41
|
+
char_order: Maximum character n-gram order.
|
|
42
|
+
word_order: Maximum word n-gram order (set ``>0`` to enable chrF++).
|
|
43
|
+
lowercase: Whether to lowercase candidate and references prior to scoring.
|
|
44
|
+
chrf_fn: Optional custom scoring callable for testing or offline usage.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
>>> from opik.evaluation.metrics import ChrF
|
|
48
|
+
>>> metric = ChrF(beta=2.0, char_order=6, lowercase=True)
|
|
49
|
+
>>> result = metric.score(
|
|
50
|
+
... output="The quick brown fox",
|
|
51
|
+
... reference="The quick brown fox jumps",
|
|
52
|
+
... )
|
|
53
|
+
>>> round(result.value, 4) # doctest: +SKIP
|
|
54
|
+
0.8795
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
name: str = "chrf_metric",
|
|
60
|
+
track: bool = True,
|
|
61
|
+
project_name: Optional[str] = None,
|
|
62
|
+
beta: float = 2.0,
|
|
63
|
+
ignore_whitespace: bool = False,
|
|
64
|
+
char_order: int = 6,
|
|
65
|
+
word_order: int = 0,
|
|
66
|
+
lowercase: bool = False,
|
|
67
|
+
chrf_fn: Optional[ChrFFn] = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
70
|
+
self._beta = beta
|
|
71
|
+
self._ignore_whitespace = ignore_whitespace
|
|
72
|
+
self._char_order = char_order
|
|
73
|
+
self._word_order = word_order
|
|
74
|
+
self._lowercase = lowercase
|
|
75
|
+
|
|
76
|
+
if chrf_fn is not None:
|
|
77
|
+
self._chrf_fn = chrf_fn
|
|
78
|
+
else:
|
|
79
|
+
if nltk_chrf_score is None: # pragma: no cover - optional dependency
|
|
80
|
+
raise ImportError(
|
|
81
|
+
"chrF metric requires the optional 'nltk' package. Install via"
|
|
82
|
+
" `pip install nltk` or provide `chrf_fn`."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def _compute(candidate: Sequence[str], references: Sequence[str]) -> float:
|
|
86
|
+
try:
|
|
87
|
+
return float(
|
|
88
|
+
nltk_chrf_score.sentence_chrf(
|
|
89
|
+
references,
|
|
90
|
+
candidate,
|
|
91
|
+
beta=self._beta,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
except TypeError:
|
|
95
|
+
# Older NLTK versions expose the helper with fewer keyword arguments.
|
|
96
|
+
return float(nltk_chrf_score.sentence_chrf(references, candidate))
|
|
97
|
+
|
|
98
|
+
self._chrf_fn = _compute
|
|
99
|
+
|
|
100
|
+
def score(
|
|
101
|
+
self,
|
|
102
|
+
output: str,
|
|
103
|
+
reference: Union[str, Sequence[str]],
|
|
104
|
+
**ignored_kwargs: Any,
|
|
105
|
+
) -> ScoreResult:
|
|
106
|
+
if not output.strip():
|
|
107
|
+
raise MetricComputationError("Candidate is empty (chrF metric).")
|
|
108
|
+
if isinstance(reference, str):
|
|
109
|
+
references = [reference]
|
|
110
|
+
else:
|
|
111
|
+
references = list(reference)
|
|
112
|
+
if not references or any(not ref.strip() for ref in references):
|
|
113
|
+
raise MetricComputationError("Reference is empty (chrF metric).")
|
|
114
|
+
|
|
115
|
+
if self._lowercase:
|
|
116
|
+
output_text = output.lower()
|
|
117
|
+
references = [ref.lower() for ref in references]
|
|
118
|
+
else:
|
|
119
|
+
output_text = output
|
|
120
|
+
|
|
121
|
+
value = self._chrf_fn(output_text, references)
|
|
122
|
+
|
|
123
|
+
return ScoreResult(
|
|
124
|
+
value=float(value),
|
|
125
|
+
name=self.name,
|
|
126
|
+
reason=f"chrF score: {float(value):.4f}",
|
|
127
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
from .. import base_metric, score_result
|
|
4
4
|
|
|
@@ -12,52 +12,91 @@ class Contains(base_metric.BaseMetric):
|
|
|
12
12
|
|
|
13
13
|
Args:
|
|
14
14
|
case_sensitive: Whether the comparison should be case-sensitive. Defaults to False.
|
|
15
|
+
reference: Optional default reference string. If provided, it will be used unless
|
|
16
|
+
a reference is explicitly passed to `score()`.
|
|
15
17
|
name: The name of the metric. Defaults to "contains_metric".
|
|
16
18
|
track: Whether to track the metric. Defaults to True.
|
|
19
|
+
project_name: Optional project name to track the metric in for the cases when there are
|
|
20
|
+
no parent span/trace to inherit project name from.
|
|
17
21
|
|
|
18
|
-
|
|
19
|
-
>>>
|
|
20
|
-
>>> contains_metric = Contains(
|
|
21
|
-
>>> result = contains_metric.score("Hello, World!"
|
|
22
|
+
Examples:
|
|
23
|
+
>>> # Using a default reference at initialization
|
|
24
|
+
>>> contains_metric = Contains(reference="world")
|
|
25
|
+
>>> result = contains_metric.score("Hello, World!")
|
|
22
26
|
>>> print(result.value)
|
|
23
27
|
1.0
|
|
24
|
-
|
|
28
|
+
|
|
29
|
+
>>> # Overriding the default reference at score time
|
|
30
|
+
>>> result = contains_metric.score("Hello, World!", reference="there")
|
|
25
31
|
>>> print(result.value)
|
|
26
32
|
0.0
|
|
33
|
+
|
|
34
|
+
>>> # If no reference is set at all, score() raises an error
|
|
35
|
+
>>> contains_metric = Contains()
|
|
36
|
+
>>> contains_metric.score("Hello")
|
|
37
|
+
Traceback (most recent call last):
|
|
38
|
+
...
|
|
39
|
+
ValueError: No reference string provided. Either pass `reference` to `score()` or set a default reference when creating the metric.
|
|
40
|
+
|
|
41
|
+
>>> # Empty reference string is invalid
|
|
42
|
+
>>> contains_metric = Contains(reference="")
|
|
43
|
+
>>> contains_metric.score("Hello")
|
|
44
|
+
Traceback (most recent call last):
|
|
45
|
+
...
|
|
46
|
+
ValueError: Invalid reference string provided. Reference must be a non-empty string.
|
|
27
47
|
"""
|
|
28
48
|
|
|
29
49
|
def __init__(
|
|
30
50
|
self,
|
|
31
51
|
case_sensitive: bool = False,
|
|
52
|
+
reference: Optional[str] = None,
|
|
32
53
|
name: str = "contains_metric",
|
|
33
54
|
track: bool = True,
|
|
55
|
+
project_name: Optional[str] = None,
|
|
34
56
|
):
|
|
35
57
|
super().__init__(
|
|
36
58
|
name=name,
|
|
37
59
|
track=track,
|
|
60
|
+
project_name=project_name,
|
|
38
61
|
)
|
|
39
|
-
|
|
40
62
|
self._case_sensitive = case_sensitive
|
|
63
|
+
self._default_reference = reference
|
|
41
64
|
|
|
42
65
|
def score(
|
|
43
|
-
self, output: str, reference: str, **ignored_kwargs: Any
|
|
66
|
+
self, output: str, reference: Optional[str] = None, **ignored_kwargs: Any
|
|
44
67
|
) -> score_result.ScoreResult:
|
|
45
68
|
"""
|
|
46
69
|
Calculate the score based on whether the reference string is contained in the output string.
|
|
47
70
|
|
|
48
71
|
Args:
|
|
49
72
|
output: The output string to check.
|
|
50
|
-
reference: The reference string to look for in the output.
|
|
73
|
+
reference: The reference string to look for in the output. If None, falls back to the
|
|
74
|
+
default reference provided at initialization.
|
|
51
75
|
**ignored_kwargs: Additional keyword arguments that are ignored.
|
|
52
76
|
|
|
53
77
|
Returns:
|
|
54
78
|
score_result.ScoreResult: A ScoreResult object with a value of 1.0 if the reference
|
|
55
79
|
is found in the output, 0.0 otherwise.
|
|
56
80
|
"""
|
|
81
|
+
# Use provided reference, else fall back to default
|
|
82
|
+
ref = reference if reference is not None else self._default_reference
|
|
83
|
+
|
|
84
|
+
# Handle missing reference (None) separately
|
|
85
|
+
if ref is None:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
"No reference string provided. Either pass `reference` to `score()` or set a default reference when creating the metric."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Handle empty string separately
|
|
91
|
+
if ref == "":
|
|
92
|
+
raise ValueError(
|
|
93
|
+
"Invalid reference string provided. Reference must be a non-empty string."
|
|
94
|
+
)
|
|
95
|
+
|
|
57
96
|
value = output if self._case_sensitive else output.lower()
|
|
58
|
-
|
|
97
|
+
ref = ref if self._case_sensitive else ref.lower()
|
|
59
98
|
|
|
60
|
-
if
|
|
99
|
+
if ref in value:
|
|
61
100
|
return score_result.ScoreResult(value=1.0, name=self.name)
|
|
62
101
|
|
|
63
102
|
return score_result.ScoreResult(value=0.0, name=self.name)
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from collections import Counter
|
|
5
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol, Sequence
|
|
6
|
+
|
|
7
|
+
from opik.exceptions import MetricComputationError
|
|
8
|
+
from opik.evaluation.metrics import base_metric, score_result
|
|
9
|
+
|
|
10
|
+
TokenizeFn = Callable[[str], Iterable[str]]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _JSDistanceFn(Protocol):
|
|
14
|
+
def __call__(
|
|
15
|
+
self,
|
|
16
|
+
p: Sequence[float],
|
|
17
|
+
q: Sequence[float],
|
|
18
|
+
base: Optional[
|
|
19
|
+
float
|
|
20
|
+
] = ..., # matches scipy signature allowing positional or keyword use
|
|
21
|
+
) -> float: ...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_jensen_shannon_distance() -> _JSDistanceFn:
|
|
25
|
+
try:
|
|
26
|
+
from scipy.spatial.distance import jensenshannon
|
|
27
|
+
except ImportError as error: # pragma: no cover - optional dependency
|
|
28
|
+
raise ImportError(
|
|
29
|
+
"Install scipy via `pip install scipy` to use Jensen-Shannon metrics."
|
|
30
|
+
) from error
|
|
31
|
+
|
|
32
|
+
return jensenshannon
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _default_tokenizer(text: str) -> Iterable[str]:
|
|
36
|
+
return text.lower().split()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class _DistributionMetricBase(base_metric.BaseMetric):
|
|
40
|
+
"""
|
|
41
|
+
Internal helper for metrics that compare token distributions.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
tokenizer: Optional tokenizer returning an iterable of tokens given text.
|
|
45
|
+
name: Display name for the metric.
|
|
46
|
+
track: Whether to automatically track metric results.
|
|
47
|
+
project_name: Optional tracking project.
|
|
48
|
+
normalize: When ``True`` the histogram is converted to probabilities.
|
|
49
|
+
smoothing: Optional additive constant applied during KL-like computations.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
tokenizer: Optional[TokenizeFn],
|
|
55
|
+
name: str,
|
|
56
|
+
track: bool,
|
|
57
|
+
project_name: Optional[str],
|
|
58
|
+
normalize: bool,
|
|
59
|
+
smoothing: float = 0.0,
|
|
60
|
+
) -> None:
|
|
61
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
62
|
+
self._tokenizer = tokenizer or _default_tokenizer
|
|
63
|
+
self._normalize = normalize
|
|
64
|
+
self._smoothing = max(0.0, smoothing)
|
|
65
|
+
|
|
66
|
+
def _build_distribution(self, text: str) -> Dict[str, float]:
|
|
67
|
+
tokens = list(self._tokenizer(text))
|
|
68
|
+
if len(tokens) == 0:
|
|
69
|
+
raise MetricComputationError(
|
|
70
|
+
"Tokenized text is empty (distribution-based metric)."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
counts = Counter(tokens)
|
|
74
|
+
if not self._normalize:
|
|
75
|
+
return {token: float(count) for token, count in counts.items()}
|
|
76
|
+
|
|
77
|
+
total = float(sum(counts.values()))
|
|
78
|
+
return {token: count / total for token, count in counts.items()}
|
|
79
|
+
|
|
80
|
+
def _smooth(self, value: float) -> float:
|
|
81
|
+
if self._smoothing == 0.0:
|
|
82
|
+
return value
|
|
83
|
+
return value + self._smoothing
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class JSDivergence(_DistributionMetricBase):
|
|
87
|
+
"""
|
|
88
|
+
Compute Jensen–Shannon similarity (``1 - JSD``) between two texts.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
tokenizer: Optional tokenizer function. Defaults to whitespace split.
|
|
92
|
+
base: Logarithm base used when computing divergence (> ``1.0``).
|
|
93
|
+
normalize: Whether to normalise token counts to probabilities first.
|
|
94
|
+
name: Display name for the metric result.
|
|
95
|
+
track: Whether to automatically track metric results.
|
|
96
|
+
project_name: Optional tracking project name.
|
|
97
|
+
|
|
98
|
+
Note:
|
|
99
|
+
Requires :mod:`scipy` to be installed.
|
|
100
|
+
|
|
101
|
+
Example:
|
|
102
|
+
>>> from opik.evaluation.metrics import JSDivergence
|
|
103
|
+
>>> metric = JSDivergence()
|
|
104
|
+
>>> result = metric.score(
|
|
105
|
+
... output="cat cat sat",
|
|
106
|
+
... reference="cat sat on mat",
|
|
107
|
+
... )
|
|
108
|
+
>>> round(result.value, 3) # doctest: +SKIP
|
|
109
|
+
0.812
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
tokenizer: Optional[TokenizeFn] = None,
|
|
115
|
+
base: float = 2.0,
|
|
116
|
+
normalize: bool = True,
|
|
117
|
+
name: str = "js_divergence_metric",
|
|
118
|
+
track: bool = True,
|
|
119
|
+
project_name: Optional[str] = None,
|
|
120
|
+
) -> None:
|
|
121
|
+
if base <= 1.0:
|
|
122
|
+
raise ValueError("base must be greater than 1.0")
|
|
123
|
+
super().__init__(
|
|
124
|
+
tokenizer=tokenizer,
|
|
125
|
+
name=name,
|
|
126
|
+
track=track,
|
|
127
|
+
project_name=project_name,
|
|
128
|
+
normalize=normalize,
|
|
129
|
+
)
|
|
130
|
+
self._base = base
|
|
131
|
+
self._js_distance_fn = _load_jensen_shannon_distance()
|
|
132
|
+
|
|
133
|
+
def score(
|
|
134
|
+
self,
|
|
135
|
+
output: str,
|
|
136
|
+
reference: str,
|
|
137
|
+
**ignored_kwargs: Any,
|
|
138
|
+
) -> score_result.ScoreResult:
|
|
139
|
+
if not output.strip():
|
|
140
|
+
raise MetricComputationError(
|
|
141
|
+
"Candidate is empty (Jensen-Shannon divergence)."
|
|
142
|
+
)
|
|
143
|
+
if not reference.strip():
|
|
144
|
+
raise MetricComputationError(
|
|
145
|
+
"Reference is empty (Jensen-Shannon divergence)."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
output_dist = self._build_distribution(output)
|
|
149
|
+
reference_dist = self._build_distribution(reference)
|
|
150
|
+
|
|
151
|
+
divergence = self._js_divergence(output_dist, reference_dist)
|
|
152
|
+
score = max(0.0, min(1.0, 1.0 - divergence))
|
|
153
|
+
|
|
154
|
+
return score_result.ScoreResult(
|
|
155
|
+
value=score,
|
|
156
|
+
name=self.name,
|
|
157
|
+
reason=(
|
|
158
|
+
f"Jensen-Shannon similarity (base={self._base:g}): {score:.4f} "
|
|
159
|
+
f"(divergence={divergence:.4f})"
|
|
160
|
+
),
|
|
161
|
+
metadata={
|
|
162
|
+
"divergence": divergence,
|
|
163
|
+
"distance": math.sqrt(divergence),
|
|
164
|
+
"base": self._base,
|
|
165
|
+
},
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def _js_divergence(
|
|
169
|
+
self,
|
|
170
|
+
p_dist: Dict[str, float],
|
|
171
|
+
q_dist: Dict[str, float],
|
|
172
|
+
) -> float:
|
|
173
|
+
vocabulary = sorted(set(p_dist) | set(q_dist))
|
|
174
|
+
if not vocabulary:
|
|
175
|
+
return 0.0
|
|
176
|
+
|
|
177
|
+
p_vector = [p_dist.get(token, 0.0) for token in vocabulary]
|
|
178
|
+
q_vector = [q_dist.get(token, 0.0) for token in vocabulary]
|
|
179
|
+
|
|
180
|
+
p_probs = self._ensure_probability_vector(p_vector)
|
|
181
|
+
q_probs = self._ensure_probability_vector(q_vector)
|
|
182
|
+
|
|
183
|
+
distance = float(self._js_distance_fn(p_probs, q_probs, base=self._base))
|
|
184
|
+
return distance**2
|
|
185
|
+
|
|
186
|
+
def _ensure_probability_vector(self, values: Sequence[float]) -> List[float]:
|
|
187
|
+
total = sum(values)
|
|
188
|
+
if total <= 0.0:
|
|
189
|
+
raise MetricComputationError(
|
|
190
|
+
"Distribution is empty after tokenisation (Jensen-Shannon metric)."
|
|
191
|
+
)
|
|
192
|
+
return [value / total for value in values]
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class JSDistance(JSDivergence):
|
|
196
|
+
"""
|
|
197
|
+
Return the raw Jensen–Shannon divergence instead of similarity.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
tokenizer: Optional tokenizer function.
|
|
201
|
+
base: Logarithm base used for the divergence calculation.
|
|
202
|
+
normalize: Whether to normalise counts into probabilities.
|
|
203
|
+
name: Display name for the metric result.
|
|
204
|
+
track: Whether to automatically track metric results.
|
|
205
|
+
project_name: Optional tracking project name.
|
|
206
|
+
|
|
207
|
+
Example:
|
|
208
|
+
>>> from opik.evaluation.metrics import JSDistance
|
|
209
|
+
>>> metric = JSDistance()
|
|
210
|
+
>>> result = metric.score("a a b", reference="a b b")
|
|
211
|
+
>>> round(result.value, 3) # doctest: +SKIP
|
|
212
|
+
0.188
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
def __init__(
|
|
216
|
+
self,
|
|
217
|
+
tokenizer: Optional[TokenizeFn] = None,
|
|
218
|
+
base: float = 2.0,
|
|
219
|
+
normalize: bool = True,
|
|
220
|
+
name: str = "js_distance_metric",
|
|
221
|
+
track: bool = True,
|
|
222
|
+
project_name: Optional[str] = None,
|
|
223
|
+
) -> None:
|
|
224
|
+
super().__init__(
|
|
225
|
+
tokenizer=tokenizer,
|
|
226
|
+
base=base,
|
|
227
|
+
normalize=normalize,
|
|
228
|
+
name=name,
|
|
229
|
+
track=track,
|
|
230
|
+
project_name=project_name,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def score(
|
|
234
|
+
self,
|
|
235
|
+
output: str,
|
|
236
|
+
reference: str,
|
|
237
|
+
**ignored_kwargs: Any,
|
|
238
|
+
) -> score_result.ScoreResult:
|
|
239
|
+
similarity = super().score(output=output, reference=reference)
|
|
240
|
+
metadata = similarity.metadata or {}
|
|
241
|
+
divergence = float(metadata.get("divergence", 0.0))
|
|
242
|
+
distance = float(metadata.get("distance", math.sqrt(divergence)))
|
|
243
|
+
return score_result.ScoreResult(
|
|
244
|
+
value=divergence,
|
|
245
|
+
name=self.name,
|
|
246
|
+
reason=f"Jensen-Shannon divergence (base={self._base:g}): {divergence:.4f}",
|
|
247
|
+
metadata={
|
|
248
|
+
"distance": distance,
|
|
249
|
+
"base": metadata.get("base", self._base),
|
|
250
|
+
},
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class KLDivergence(_DistributionMetricBase):
|
|
255
|
+
"""
|
|
256
|
+
Compute the (optionally symmetric) KL divergence between token distributions.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
tokenizer: Optional tokenizer function. Defaults to whitespace split.
|
|
260
|
+
direction: Direction to compute (``"pq"``, ``"qp"``, or ``"avg"`` for
|
|
261
|
+
symmetric).
|
|
262
|
+
normalize: Whether to normalise token counts to probabilities first.
|
|
263
|
+
smoothing: Additive smoothing constant to avoid divide-by-zero.
|
|
264
|
+
name: Display name for the metric result.
|
|
265
|
+
track: Whether to automatically track metric results.
|
|
266
|
+
project_name: Optional tracking project name.
|
|
267
|
+
|
|
268
|
+
Example:
|
|
269
|
+
>>> from opik.evaluation.metrics import KLDivergence
|
|
270
|
+
>>> metric = KLDivergence(direction="avg")
|
|
271
|
+
>>> result = metric.score("hello hello world", reference="hello world")
|
|
272
|
+
>>> round(result.value, 4) # doctest: +SKIP
|
|
273
|
+
0.0583
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
def __init__(
|
|
277
|
+
self,
|
|
278
|
+
tokenizer: Optional[TokenizeFn] = None,
|
|
279
|
+
direction: str = "pq",
|
|
280
|
+
normalize: bool = True,
|
|
281
|
+
smoothing: float = 1e-12,
|
|
282
|
+
name: str = "kl_divergence_metric",
|
|
283
|
+
track: bool = True,
|
|
284
|
+
project_name: Optional[str] = None,
|
|
285
|
+
) -> None:
|
|
286
|
+
if direction not in {"pq", "qp", "avg"}:
|
|
287
|
+
raise ValueError("direction must be one of {'pq', 'qp', 'avg'}")
|
|
288
|
+
super().__init__(
|
|
289
|
+
tokenizer=tokenizer,
|
|
290
|
+
name=name,
|
|
291
|
+
track=track,
|
|
292
|
+
project_name=project_name,
|
|
293
|
+
normalize=normalize,
|
|
294
|
+
smoothing=smoothing,
|
|
295
|
+
)
|
|
296
|
+
self._direction = direction
|
|
297
|
+
|
|
298
|
+
def score(
|
|
299
|
+
self,
|
|
300
|
+
output: str,
|
|
301
|
+
reference: str,
|
|
302
|
+
**ignored_kwargs: Any,
|
|
303
|
+
) -> score_result.ScoreResult:
|
|
304
|
+
if not output.strip():
|
|
305
|
+
raise MetricComputationError("Candidate is empty (KL divergence metric).")
|
|
306
|
+
if not reference.strip():
|
|
307
|
+
raise MetricComputationError("Reference is empty (KL divergence metric).")
|
|
308
|
+
|
|
309
|
+
p_dist = self._build_distribution(output)
|
|
310
|
+
q_dist = self._build_distribution(reference)
|
|
311
|
+
|
|
312
|
+
if self._direction == "pq":
|
|
313
|
+
divergence = self._kl(p_dist, q_dist)
|
|
314
|
+
elif self._direction == "qp":
|
|
315
|
+
divergence = self._kl(q_dist, p_dist)
|
|
316
|
+
else:
|
|
317
|
+
divergence = 0.5 * (self._kl(p_dist, q_dist) + self._kl(q_dist, p_dist))
|
|
318
|
+
|
|
319
|
+
return score_result.ScoreResult(
|
|
320
|
+
value=divergence,
|
|
321
|
+
name=self.name,
|
|
322
|
+
reason=f"KL divergence ({self._direction}): {divergence:.4f}",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _kl(self, p_dist: Dict[str, float], q_dist: Dict[str, float]) -> float:
|
|
326
|
+
divergence = 0.0
|
|
327
|
+
for token, p_val in p_dist.items():
|
|
328
|
+
p_val = self._smooth(p_val)
|
|
329
|
+
q_val = self._smooth(q_dist.get(token, 0.0))
|
|
330
|
+
divergence += p_val * math.log(p_val / q_val)
|
|
331
|
+
return divergence
|