opik 1.6.4__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +33 -2
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/__init__.py +5 -0
- opik/api_objects/attachment/attachment.py +20 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +220 -0
- opik/api_objects/attachment/converters.py +51 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/conversation/__init__.py +0 -0
- opik/api_objects/conversation/conversation_factory.py +43 -0
- opik/api_objects/conversation/conversation_thread.py +49 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +107 -45
- opik/api_objects/dataset/rest_operations.py +12 -3
- opik/api_objects/experiment/experiment.py +81 -45
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +88 -19
- opik/api_objects/helpers.py +104 -7
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +872 -174
- opik/api_objects/opik_query_language.py +136 -18
- opik/api_objects/optimization/__init__.py +3 -0
- opik/api_objects/optimization/optimization.py +39 -0
- opik/api_objects/prompt/__init__.py +13 -1
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +193 -41
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/text/prompt_template.py +55 -0
- opik/api_objects/prompt/types.py +29 -0
- opik/api_objects/rest_stream_parser.py +98 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_client.py +165 -45
- opik/api_objects/span/span_data.py +136 -25
- opik/api_objects/threads/__init__.py +0 -0
- opik/api_objects/threads/threads_client.py +185 -0
- opik/api_objects/trace/trace_client.py +72 -36
- opik/api_objects/trace/trace_data.py +112 -26
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +62 -4
- opik/configurator/configure.py +45 -6
- opik/configurator/opik_rest_helpers.py +4 -1
- opik/context_storage.py +164 -65
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +298 -146
- opik/decorator/context_manager/__init__.py +0 -0
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/generator_wrappers.py +3 -2
- opik/decorator/inspect_helpers.py +11 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +49 -21
- opik/decorator/tracker.py +9 -1
- opik/dict_utils.py +3 -3
- opik/environment.py +13 -1
- opik/error_tracking/api.py +1 -1
- opik/error_tracking/before_send.py +6 -5
- opik/error_tracking/environment_details.py +29 -7
- opik/error_tracking/error_filtering/filter_by_response_status_code.py +42 -0
- opik/error_tracking/error_filtering/filter_chain_builder.py +14 -3
- opik/evaluation/__init__.py +14 -2
- opik/evaluation/engine/engine.py +280 -82
- opik/evaluation/engine/evaluation_tasks_executor.py +15 -10
- opik/evaluation/engine/helpers.py +34 -9
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/engine/types.py +5 -4
- opik/evaluation/evaluation_result.py +169 -2
- opik/evaluation/evaluator.py +659 -58
- opik/evaluation/metrics/__init__.py +121 -6
- opik/evaluation/metrics/aggregated_metric.py +92 -0
- opik/evaluation/metrics/arguments_helpers.py +15 -21
- opik/evaluation/metrics/arguments_validator.py +38 -0
- opik/evaluation/metrics/base_metric.py +20 -10
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +79 -0
- opik/evaluation/metrics/conversation/conversation_turns_factory.py +39 -0
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +84 -0
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/__init__.py +0 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +274 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/schema.py +16 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/templates.py +95 -0
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/__init__.py +0 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +295 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/schema.py +22 -0
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/templates.py +139 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +277 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/schema.py +16 -0
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/templates.py +135 -0
- opik/evaluation/metrics/conversation/types.py +34 -0
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +43 -16
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +50 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/equals.py +4 -1
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/is_json.py +9 -3
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/levenshtein_ratio.py +6 -5
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/regex_match.py +4 -1
- opik/evaluation/metrics/heuristics/rouge.py +148 -0
- opik/evaluation/metrics/heuristics/sentiment.py +98 -0
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +27 -30
- opik/evaluation/metrics/llm_judges/answer_relevance/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/templates.py +10 -10
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +28 -31
- opik/evaluation/metrics/llm_judges/context_precision/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/context_precision/template.py +7 -7
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +27 -31
- opik/evaluation/metrics/llm_judges/context_recall/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/context_recall/template.py +7 -7
- opik/evaluation/metrics/llm_judges/factuality/metric.py +7 -26
- opik/evaluation/metrics/llm_judges/factuality/parser.py +35 -0
- opik/evaluation/metrics/llm_judges/factuality/template.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +244 -113
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +161 -0
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +23 -27
- opik/evaluation/metrics/llm_judges/hallucination/parser.py +29 -0
- opik/evaluation/metrics/llm_judges/hallucination/template.py +2 -4
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +23 -28
- opik/evaluation/metrics/llm_judges/moderation/parser.py +27 -0
- opik/evaluation/metrics/llm_judges/moderation/template.py +2 -2
- opik/evaluation/metrics/llm_judges/parsing_helpers.py +26 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +171 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/parser.py +38 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/templates.py +65 -0
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +23 -32
- opik/evaluation/metrics/llm_judges/usefulness/parser.py +28 -0
- opik/evaluation/metrics/ragas_metric.py +112 -0
- opik/evaluation/models/__init__.py +10 -0
- opik/evaluation/models/base_model.py +140 -18
- opik/evaluation/models/langchain/__init__.py +3 -0
- opik/evaluation/models/langchain/langchain_chat_model.py +166 -0
- opik/evaluation/models/langchain/message_converters.py +106 -0
- opik/evaluation/models/langchain/opik_monitoring.py +23 -0
- opik/evaluation/models/litellm/litellm_chat_model.py +186 -40
- opik/evaluation/models/litellm/opik_monitor.py +24 -21
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/__init__.py +0 -0
- opik/evaluation/threads/context_helper.py +32 -0
- opik/evaluation/threads/evaluation_engine.py +181 -0
- opik/evaluation/threads/evaluation_result.py +18 -0
- opik/evaluation/threads/evaluator.py +120 -0
- opik/evaluation/threads/helpers.py +51 -0
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +116 -3
- opik/file_upload/__init__.py +0 -0
- opik/file_upload/base_upload_manager.py +39 -0
- opik/file_upload/file_upload_monitor.py +14 -0
- opik/file_upload/file_uploader.py +141 -0
- opik/file_upload/mime_type.py +9 -0
- opik/file_upload/s3_multipart_upload/__init__.py +0 -0
- opik/file_upload/s3_multipart_upload/file_parts_strategy.py +89 -0
- opik/file_upload/s3_multipart_upload/s3_file_uploader.py +86 -0
- opik/file_upload/s3_multipart_upload/s3_upload_error.py +29 -0
- opik/file_upload/thread_pool.py +17 -0
- opik/file_upload/upload_client.py +114 -0
- opik/file_upload/upload_manager.py +255 -0
- opik/file_upload/upload_options.py +37 -0
- opik/format_helpers.py +17 -0
- opik/guardrails/__init__.py +4 -0
- opik/guardrails/guardrail.py +157 -0
- opik/guardrails/guards/__init__.py +5 -0
- opik/guardrails/guards/guard.py +17 -0
- opik/guardrails/guards/pii.py +47 -0
- opik/guardrails/guards/topic.py +76 -0
- opik/guardrails/rest_api_client.py +34 -0
- opik/guardrails/schemas.py +24 -0
- opik/guardrails/tracing.py +61 -0
- opik/healthcheck/__init__.py +2 -1
- opik/healthcheck/checks.py +2 -2
- opik/healthcheck/rich_representation.py +1 -1
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +75 -4
- opik/id_helpers.py +18 -0
- opik/integrations/adk/__init__.py +14 -0
- opik/integrations/adk/callback_context_info_extractors.py +32 -0
- opik/integrations/adk/graph/__init__.py +0 -0
- opik/integrations/adk/graph/mermaid_graph_builder.py +128 -0
- opik/integrations/adk/graph/nodes.py +101 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +41 -0
- opik/integrations/adk/helpers.py +48 -0
- opik/integrations/adk/legacy_opik_tracer.py +381 -0
- opik/integrations/adk/opik_tracer.py +370 -0
- opik/integrations/adk/patchers/__init__.py +4 -0
- opik/integrations/adk/patchers/adk_otel_tracer/__init__.py +0 -0
- opik/integrations/adk/patchers/adk_otel_tracer/llm_span_helpers.py +30 -0
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +201 -0
- opik/integrations/adk/patchers/litellm_wrappers.py +91 -0
- opik/integrations/adk/patchers/llm_response_wrapper.py +105 -0
- opik/integrations/adk/patchers/patchers.py +64 -0
- opik/integrations/adk/recursive_callback_injector.py +126 -0
- opik/integrations/aisuite/aisuite_decorator.py +8 -3
- opik/integrations/aisuite/opik_tracker.py +1 -0
- opik/integrations/anthropic/messages_create_decorator.py +8 -3
- opik/integrations/anthropic/opik_tracker.py +0 -1
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +18 -8
- opik/integrations/bedrock/invoke_agent_decorator.py +12 -7
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +43 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +34 -56
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +246 -84
- opik/integrations/dspy/graph.py +88 -0
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/genai/encoder_extension.py +2 -6
- opik/integrations/genai/generate_content_decorator.py +20 -13
- opik/integrations/guardrails/guardrails_decorator.py +4 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/constants.py +35 -0
- opik/integrations/haystack/converters.py +1 -2
- opik/integrations/haystack/opik_connector.py +28 -6
- opik/integrations/haystack/opik_span_bridge.py +284 -0
- opik/integrations/haystack/opik_tracer.py +124 -222
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +2 -2
- opik/integrations/langchain/opik_tracer.py +641 -206
- opik/integrations/langchain/provider_usage_extractors/__init__.py +5 -0
- opik/integrations/langchain/provider_usage_extractors/anthropic_usage_extractor.py +101 -0
- opik/integrations/langchain/provider_usage_extractors/anthropic_vertexai_usage_extractor.py +67 -0
- opik/integrations/langchain/provider_usage_extractors/bedrock_usage_extractor.py +94 -0
- opik/integrations/langchain/provider_usage_extractors/google_generative_ai_usage_extractor.py +109 -0
- opik/integrations/langchain/provider_usage_extractors/groq_usage_extractor.py +92 -0
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/__init__.py +15 -0
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +134 -0
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/langchain_usage.py +163 -0
- opik/integrations/langchain/provider_usage_extractors/openai_usage_extractor.py +124 -0
- opik/integrations/langchain/provider_usage_extractors/provider_usage_extractor_protocol.py +29 -0
- opik/integrations/langchain/provider_usage_extractors/usage_extractor.py +48 -0
- opik/integrations/langchain/provider_usage_extractors/vertexai_usage_extractor.py +109 -0
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +179 -78
- opik/integrations/llama_index/event_parsing_utils.py +29 -9
- opik/integrations/openai/agents/opik_tracing_processor.py +204 -32
- opik/integrations/openai/agents/span_data_parsers.py +15 -6
- opik/integrations/openai/chat_completion_chunks_aggregator.py +1 -1
- opik/integrations/openai/{openai_decorator.py → openai_chat_completions_decorator.py} +45 -35
- opik/integrations/openai/openai_responses_decorator.py +158 -0
- opik/integrations/openai/opik_tracker.py +94 -13
- opik/integrations/openai/response_events_aggregator.py +36 -0
- opik/integrations/openai/stream_patchers.py +125 -15
- opik/integrations/sagemaker/auth.py +5 -1
- opik/jsonable_encoder.py +29 -1
- opik/llm_usage/base_original_provider_usage.py +15 -8
- opik/llm_usage/bedrock_usage.py +8 -2
- opik/llm_usage/google_usage.py +6 -1
- opik/llm_usage/llm_usage_info.py +6 -0
- opik/llm_usage/{openai_usage.py → openai_chat_completions_usage.py} +2 -12
- opik/llm_usage/{openai_agent_usage.py → openai_responses_usage.py} +7 -15
- opik/llm_usage/opik_usage.py +36 -10
- opik/llm_usage/opik_usage_factory.py +35 -19
- opik/logging_messages.py +19 -7
- opik/message_processing/arguments_utils.py +22 -0
- opik/message_processing/batching/base_batcher.py +45 -17
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +36 -11
- opik/message_processing/batching/batchers.py +167 -44
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/batching/sequence_splitter.py +50 -5
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/message_queue.py +79 -0
- opik/message_processing/messages.py +154 -12
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/processors/online_message_processor.py +324 -0
- opik/message_processing/queue_consumer.py +61 -13
- opik/message_processing/streamer.py +102 -31
- opik/message_processing/streamer_constructors.py +67 -12
- opik/opik_context.py +103 -11
- opik/plugins/pytest/decorator.py +2 -2
- opik/plugins/pytest/experiment_runner.py +3 -2
- opik/plugins/pytest/hooks.py +6 -4
- opik/rate_limit/__init__.py +0 -0
- opik/rate_limit/rate_limit.py +25 -0
- opik/rest_api/__init__.py +643 -11
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/attachments/__init__.py +17 -0
- opik/rest_api/attachments/client.py +752 -0
- opik/rest_api/attachments/raw_client.py +1125 -0
- opik/rest_api/attachments/types/__init__.py +15 -0
- opik/rest_api/attachments/types/attachment_list_request_entity_type.py +5 -0
- opik/rest_api/attachments/types/download_attachment_request_entity_type.py +5 -0
- opik/rest_api/attachments/types/start_multipart_upload_request_entity_type.py +5 -0
- opik/rest_api/attachments/types/upload_attachment_request_entity_type.py +5 -0
- opik/rest_api/automation_rule_evaluators/__init__.py +2 -0
- opik/rest_api/automation_rule_evaluators/client.py +182 -1162
- opik/rest_api/automation_rule_evaluators/raw_client.py +598 -0
- opik/rest_api/chat_completions/__init__.py +2 -0
- opik/rest_api/chat_completions/client.py +115 -149
- opik/rest_api/chat_completions/raw_client.py +339 -0
- opik/rest_api/check/__init__.py +2 -0
- opik/rest_api/check/client.py +88 -106
- opik/rest_api/check/raw_client.py +258 -0
- opik/rest_api/client.py +112 -212
- opik/rest_api/core/__init__.py +5 -0
- opik/rest_api/core/api_error.py +12 -6
- opik/rest_api/core/client_wrapper.py +4 -14
- opik/rest_api/core/datetime_utils.py +1 -3
- opik/rest_api/core/file.py +2 -5
- opik/rest_api/core/http_client.py +42 -120
- opik/rest_api/core/http_response.py +55 -0
- opik/rest_api/core/jsonable_encoder.py +1 -4
- opik/rest_api/core/pydantic_utilities.py +79 -147
- opik/rest_api/core/query_encoder.py +1 -3
- opik/rest_api/core/serialization.py +10 -10
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/__init__.py +5 -0
- opik/rest_api/datasets/client.py +1638 -1091
- opik/rest_api/datasets/raw_client.py +3389 -0
- opik/rest_api/datasets/types/__init__.py +8 -0
- opik/rest_api/datasets/types/dataset_update_visibility.py +5 -0
- opik/rest_api/datasets/types/dataset_write_visibility.py +5 -0
- opik/rest_api/errors/__init__.py +2 -0
- opik/rest_api/errors/bad_request_error.py +4 -3
- opik/rest_api/errors/conflict_error.py +4 -3
- opik/rest_api/errors/forbidden_error.py +4 -2
- opik/rest_api/errors/not_found_error.py +4 -3
- opik/rest_api/errors/not_implemented_error.py +4 -3
- opik/rest_api/errors/unauthorized_error.py +4 -3
- opik/rest_api/errors/unprocessable_entity_error.py +4 -3
- opik/rest_api/experiments/__init__.py +5 -0
- opik/rest_api/experiments/client.py +676 -752
- opik/rest_api/experiments/raw_client.py +1872 -0
- opik/rest_api/experiments/types/__init__.py +10 -0
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/experiments/types/experiment_write_type.py +5 -0
- opik/rest_api/feedback_definitions/__init__.py +2 -0
- opik/rest_api/feedback_definitions/client.py +96 -370
- opik/rest_api/feedback_definitions/raw_client.py +541 -0
- opik/rest_api/feedback_definitions/types/__init__.py +2 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -3
- opik/rest_api/guardrails/__init__.py +4 -0
- opik/rest_api/guardrails/client.py +104 -0
- opik/rest_api/guardrails/raw_client.py +102 -0
- opik/rest_api/llm_provider_key/__init__.py +2 -0
- opik/rest_api/llm_provider_key/client.py +166 -440
- opik/rest_api/llm_provider_key/raw_client.py +643 -0
- opik/rest_api/llm_provider_key/types/__init__.py +2 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/open_telemetry_ingestion/__init__.py +2 -0
- opik/rest_api/open_telemetry_ingestion/client.py +38 -63
- opik/rest_api/open_telemetry_ingestion/raw_client.py +88 -0
- opik/rest_api/optimizations/__init__.py +7 -0
- opik/rest_api/optimizations/client.py +704 -0
- opik/rest_api/optimizations/raw_client.py +920 -0
- opik/rest_api/optimizations/types/__init__.py +7 -0
- opik/rest_api/optimizations/types/optimization_update_status.py +7 -0
- opik/rest_api/projects/__init__.py +10 -1
- opik/rest_api/projects/client.py +180 -855
- opik/rest_api/projects/raw_client.py +1216 -0
- opik/rest_api/projects/types/__init__.py +11 -4
- opik/rest_api/projects/types/project_metric_request_public_interval.py +1 -3
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +11 -1
- opik/rest_api/projects/types/project_update_visibility.py +5 -0
- opik/rest_api/projects/types/project_write_visibility.py +5 -0
- opik/rest_api/prompts/__init__.py +4 -2
- opik/rest_api/prompts/client.py +381 -970
- opik/rest_api/prompts/raw_client.py +1634 -0
- opik/rest_api/prompts/types/__init__.py +5 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/raw_client.py +156 -0
- opik/rest_api/redirect/__init__.py +4 -0
- opik/rest_api/redirect/client.py +375 -0
- opik/rest_api/redirect/raw_client.py +566 -0
- opik/rest_api/service_toggles/__init__.py +4 -0
- opik/rest_api/service_toggles/client.py +91 -0
- opik/rest_api/service_toggles/raw_client.py +93 -0
- opik/rest_api/spans/__init__.py +2 -0
- opik/rest_api/spans/client.py +659 -1354
- opik/rest_api/spans/raw_client.py +2383 -0
- opik/rest_api/spans/types/__init__.py +2 -0
- opik/rest_api/spans/types/find_feedback_score_names_1_request_type.py +1 -3
- opik/rest_api/spans/types/get_span_stats_request_type.py +1 -3
- opik/rest_api/spans/types/get_spans_by_project_request_type.py +1 -3
- opik/rest_api/spans/types/span_search_stream_request_public_type.py +1 -3
- opik/rest_api/system_usage/__init__.py +2 -0
- opik/rest_api/system_usage/client.py +157 -216
- opik/rest_api/system_usage/raw_client.py +455 -0
- opik/rest_api/traces/__init__.py +2 -0
- opik/rest_api/traces/client.py +2102 -1625
- opik/rest_api/traces/raw_client.py +4144 -0
- opik/rest_api/types/__init__.py +629 -24
- opik/rest_api/types/aggregation_data.py +27 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/{json_schema_element.py → annotation_queue_item_ids.py} +5 -7
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/{workspace_metadata.py → annotation_queue_reviewer.py} +6 -7
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/assistant_message.py +7 -8
- opik/rest_api/types/assistant_message_role.py +1 -3
- opik/rest_api/types/attachment.py +22 -0
- opik/rest_api/types/attachment_page.py +28 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +160 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +143 -0
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +6 -6
- opik/rest_api/types/automation_rule_evaluator_write.py +143 -0
- opik/rest_api/types/avg_value_stat_public.py +3 -5
- opik/rest_api/types/batch_delete.py +3 -5
- opik/rest_api/types/batch_delete_by_project.py +20 -0
- opik/rest_api/types/bi_information.py +3 -5
- opik/rest_api/types/bi_information_response.py +4 -6
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/categorical_feedback_definition.py +5 -7
- opik/rest_api/types/categorical_feedback_definition_create.py +4 -6
- opik/rest_api/types/categorical_feedback_definition_public.py +5 -7
- opik/rest_api/types/categorical_feedback_definition_update.py +4 -6
- opik/rest_api/types/categorical_feedback_detail.py +3 -5
- opik/rest_api/types/categorical_feedback_detail_create.py +3 -5
- opik/rest_api/types/categorical_feedback_detail_public.py +3 -5
- opik/rest_api/types/categorical_feedback_detail_update.py +3 -5
- opik/rest_api/types/chat_completion_choice.py +4 -6
- opik/rest_api/types/chat_completion_response.py +5 -6
- opik/rest_api/types/check.py +22 -0
- opik/rest_api/types/{json_node_compare.py → check_name.py} +1 -1
- opik/rest_api/types/check_public.py +22 -0
- opik/rest_api/types/check_public_name.py +5 -0
- opik/rest_api/types/check_public_result.py +5 -0
- opik/rest_api/types/check_result.py +5 -0
- opik/rest_api/types/chunked_output_json_node.py +4 -6
- opik/rest_api/types/chunked_output_json_node_public.py +4 -6
- opik/rest_api/types/chunked_output_json_node_public_type.py +6 -10
- opik/rest_api/types/chunked_output_json_node_type.py +6 -10
- opik/rest_api/types/column.py +8 -10
- opik/rest_api/types/column_compare.py +8 -10
- opik/rest_api/types/column_public.py +8 -10
- opik/rest_api/types/column_types_item.py +1 -3
- opik/rest_api/types/comment.py +4 -6
- opik/rest_api/types/comment_compare.py +4 -6
- opik/rest_api/types/comment_public.py +4 -6
- opik/rest_api/types/complete_multipart_upload_request.py +33 -0
- opik/rest_api/types/complete_multipart_upload_request_entity_type.py +5 -0
- opik/rest_api/types/completion_tokens_details.py +3 -5
- opik/rest_api/types/count_value_stat_public.py +3 -5
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/data_point_double.py +21 -0
- opik/rest_api/types/data_point_number_public.py +3 -5
- opik/rest_api/types/dataset.py +14 -6
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +9 -8
- opik/rest_api/types/dataset_item_batch.py +3 -5
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +9 -8
- opik/rest_api/types/dataset_item_compare_source.py +1 -3
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +10 -7
- opik/rest_api/types/dataset_item_page_public.py +10 -7
- opik/rest_api/types/dataset_item_public.py +9 -8
- opik/rest_api/types/dataset_item_public_source.py +1 -3
- opik/rest_api/types/dataset_item_source.py +1 -3
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +5 -6
- opik/rest_api/types/dataset_item_write_source.py +1 -3
- opik/rest_api/types/dataset_page_public.py +9 -6
- opik/rest_api/types/dataset_public.py +14 -6
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_public_visibility.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/dataset_visibility.py +5 -0
- opik/rest_api/types/delete_attachments_request.py +23 -0
- opik/rest_api/types/delete_attachments_request_entity_type.py +5 -0
- opik/rest_api/types/delete_feedback_score.py +4 -5
- opik/rest_api/types/delete_ids_holder.py +19 -0
- opik/rest_api/types/delta.py +7 -9
- opik/rest_api/types/error_count_with_deviation.py +21 -0
- opik/rest_api/types/error_count_with_deviation_detailed.py +21 -0
- opik/rest_api/types/error_info.py +3 -5
- opik/rest_api/types/error_info_experiment_item_bulk_write_view.py +21 -0
- opik/rest_api/types/error_info_public.py +3 -5
- opik/rest_api/types/error_info_write.py +3 -5
- opik/rest_api/types/error_message.py +3 -5
- opik/rest_api/types/error_message_detail.py +3 -5
- opik/rest_api/types/error_message_detailed.py +3 -5
- opik/rest_api/types/error_message_public.py +3 -5
- opik/rest_api/types/experiment.py +21 -10
- opik/rest_api/types/experiment_group_aggregations_response.py +20 -0
- opik/rest_api/types/experiment_group_response.py +22 -0
- opik/rest_api/types/experiment_item.py +14 -11
- opik/rest_api/types/experiment_item_bulk_record.py +27 -0
- opik/rest_api/types/experiment_item_bulk_record_experiment_item_bulk_write_view.py +27 -0
- opik/rest_api/types/experiment_item_bulk_upload.py +27 -0
- opik/rest_api/types/experiment_item_compare.py +14 -11
- opik/rest_api/types/experiment_item_compare_trace_visibility_mode.py +5 -0
- opik/rest_api/types/experiment_item_public.py +6 -6
- opik/rest_api/types/experiment_item_public_trace_visibility_mode.py +5 -0
- opik/rest_api/types/experiment_item_trace_visibility_mode.py +5 -0
- opik/rest_api/types/experiment_page_public.py +9 -6
- opik/rest_api/types/experiment_public.py +21 -10
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_public_type.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/experiment_type.py +5 -0
- opik/rest_api/types/export_trace_service_request.py +5 -0
- opik/rest_api/types/feedback.py +40 -27
- opik/rest_api/types/feedback_create.py +27 -13
- opik/rest_api/types/feedback_definition_page_public.py +4 -6
- opik/rest_api/types/feedback_object_public.py +40 -27
- opik/rest_api/types/feedback_public.py +40 -27
- opik/rest_api/types/feedback_score.py +7 -7
- opik/rest_api/types/feedback_score_average.py +3 -5
- opik/rest_api/types/feedback_score_average_detailed.py +3 -5
- opik/rest_api/types/feedback_score_average_public.py +3 -5
- opik/rest_api/types/feedback_score_batch.py +4 -6
- opik/rest_api/types/feedback_score_batch_item.py +6 -6
- opik/rest_api/types/feedback_score_batch_item_source.py +1 -3
- opik/rest_api/types/feedback_score_batch_item_thread.py +32 -0
- opik/rest_api/types/feedback_score_batch_item_thread_source.py +5 -0
- opik/rest_api/types/feedback_score_compare.py +7 -7
- opik/rest_api/types/feedback_score_compare_source.py +1 -3
- opik/rest_api/types/feedback_score_experiment_item_bulk_write_view.py +31 -0
- opik/rest_api/types/feedback_score_experiment_item_bulk_write_view_source.py +5 -0
- opik/rest_api/types/feedback_score_names.py +4 -6
- opik/rest_api/types/feedback_score_public.py +11 -7
- opik/rest_api/types/feedback_score_public_source.py +1 -3
- opik/rest_api/types/feedback_score_source.py +1 -3
- opik/rest_api/types/feedback_update.py +27 -13
- opik/rest_api/types/function.py +4 -7
- opik/rest_api/types/function_call.py +3 -5
- opik/rest_api/types/group_content.py +19 -0
- opik/rest_api/types/group_content_with_aggregations.py +21 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +34 -0
- opik/rest_api/types/guardrail_batch.py +20 -0
- opik/rest_api/types/guardrail_name.py +5 -0
- opik/rest_api/types/guardrail_result.py +5 -0
- opik/rest_api/types/guardrail_write.py +33 -0
- opik/rest_api/types/guardrail_write_name.py +5 -0
- opik/rest_api/types/guardrail_write_result.py +5 -0
- opik/rest_api/types/guardrails_validation.py +21 -0
- opik/rest_api/types/guardrails_validation_public.py +21 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/json_list_string.py +7 -0
- opik/rest_api/types/json_list_string_compare.py +7 -0
- opik/rest_api/types/json_list_string_experiment_item_bulk_write_view.py +7 -0
- opik/rest_api/types/json_list_string_public.py +7 -0
- opik/rest_api/types/json_list_string_write.py +7 -0
- opik/rest_api/types/json_schema.py +5 -8
- opik/rest_api/types/llm_as_judge_code.py +8 -12
- opik/rest_api/types/llm_as_judge_code_public.py +8 -12
- opik/rest_api/types/llm_as_judge_code_write.py +8 -12
- opik/rest_api/types/llm_as_judge_message.py +9 -7
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +9 -7
- opik/rest_api/types/llm_as_judge_message_public_role.py +1 -1
- opik/rest_api/types/llm_as_judge_message_role.py +1 -1
- opik/rest_api/types/llm_as_judge_message_write.py +9 -7
- opik/rest_api/types/llm_as_judge_message_write_role.py +1 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +6 -5
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +6 -5
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +6 -5
- opik/rest_api/types/llm_as_judge_output_schema.py +4 -6
- opik/rest_api/types/llm_as_judge_output_schema_public.py +4 -6
- opik/rest_api/types/llm_as_judge_output_schema_public_type.py +1 -3
- opik/rest_api/types/llm_as_judge_output_schema_type.py +1 -3
- opik/rest_api/types/llm_as_judge_output_schema_write.py +4 -6
- opik/rest_api/types/llm_as_judge_output_schema_write_type.py +1 -3
- opik/rest_api/types/log_item.py +5 -7
- opik/rest_api/types/log_item_level.py +1 -3
- opik/rest_api/types/log_page.py +4 -6
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/multipart_upload_part.py +20 -0
- opik/rest_api/types/numerical_feedback_definition.py +5 -7
- opik/rest_api/types/numerical_feedback_definition_create.py +4 -6
- opik/rest_api/types/numerical_feedback_definition_public.py +5 -7
- opik/rest_api/types/numerical_feedback_definition_update.py +4 -6
- opik/rest_api/types/numerical_feedback_detail.py +3 -5
- opik/rest_api/types/numerical_feedback_detail_create.py +3 -5
- opik/rest_api/types/numerical_feedback_detail_public.py +3 -5
- opik/rest_api/types/numerical_feedback_detail_update.py +3 -5
- opik/rest_api/types/optimization.py +37 -0
- opik/rest_api/types/optimization_page_public.py +28 -0
- opik/rest_api/types/optimization_public.py +37 -0
- opik/rest_api/types/optimization_public_status.py +7 -0
- opik/rest_api/types/optimization_status.py +7 -0
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +30 -0
- opik/rest_api/types/optimization_write_status.py +7 -0
- opik/rest_api/types/page_columns.py +4 -6
- opik/rest_api/types/percentage_value_stat_public.py +4 -6
- opik/rest_api/types/percentage_values.py +8 -16
- opik/rest_api/types/percentage_values_detailed.py +8 -16
- opik/rest_api/types/percentage_values_public.py +8 -16
- opik/rest_api/types/project.py +12 -7
- opik/rest_api/types/project_detailed.py +12 -7
- opik/rest_api/types/project_detailed_visibility.py +5 -0
- opik/rest_api/types/project_metric_response_public.py +5 -9
- opik/rest_api/types/project_metric_response_public_interval.py +1 -3
- opik/rest_api/types/project_metric_response_public_metric_type.py +11 -1
- opik/rest_api/types/project_page_public.py +8 -10
- opik/rest_api/types/project_public.py +6 -6
- opik/rest_api/types/project_public_visibility.py +5 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stat_item_object_public.py +8 -17
- opik/rest_api/types/project_stats_public.py +4 -6
- opik/rest_api/types/project_stats_summary.py +4 -6
- opik/rest_api/types/project_stats_summary_item.py +9 -6
- opik/rest_api/types/project_visibility.py +5 -0
- opik/rest_api/types/prompt.py +12 -7
- opik/rest_api/types/prompt_detail.py +12 -7
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_page_public.py +9 -6
- opik/rest_api/types/prompt_public.py +11 -6
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_tokens_details.py +19 -0
- opik/rest_api/types/prompt_version.py +7 -6
- opik/rest_api/types/prompt_version_detail.py +7 -6
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +4 -5
- opik/rest_api/types/prompt_version_link_public.py +4 -5
- opik/rest_api/types/prompt_version_link_write.py +3 -5
- opik/rest_api/types/prompt_version_page_public.py +9 -6
- opik/rest_api/types/prompt_version_public.py +7 -6
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +18 -8
- opik/rest_api/types/provider_api_key_page_public.py +27 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +18 -8
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/response_format.py +5 -7
- opik/rest_api/types/response_format_type.py +1 -3
- opik/rest_api/types/result.py +21 -0
- opik/rest_api/types/results_number_public.py +4 -6
- opik/rest_api/types/score_name.py +4 -5
- opik/rest_api/types/service_toggles_config.py +44 -0
- opik/rest_api/types/span.py +13 -15
- opik/rest_api/types/span_batch.py +4 -6
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +39 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view_type.py +5 -0
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_public.py +4 -6
- opik/rest_api/types/span_filter_public_operator.py +2 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_page_public.py +9 -6
- opik/rest_api/types/span_public.py +19 -16
- opik/rest_api/types/span_public_type.py +1 -1
- opik/rest_api/types/span_type.py +1 -1
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_update_type.py +5 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +13 -14
- opik/rest_api/types/span_write_type.py +1 -1
- opik/rest_api/types/spans_count_response.py +20 -0
- opik/rest_api/types/start_multipart_upload_response.py +20 -0
- opik/rest_api/types/stream_options.py +3 -5
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/tool.py +4 -6
- opik/rest_api/types/tool_call.py +4 -6
- opik/rest_api/types/trace.py +26 -12
- opik/rest_api/types/trace_batch.py +4 -6
- opik/rest_api/types/trace_count_response.py +4 -6
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +41 -0
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_public.py +23 -0
- opik/rest_api/types/trace_filter_public_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_page_public.py +8 -10
- opik/rest_api/types/trace_public.py +27 -13
- opik/rest_api/types/trace_public_visibility_mode.py +5 -0
- opik/rest_api/types/trace_thread.py +18 -9
- opik/rest_api/types/trace_thread_filter.py +23 -0
- opik/rest_api/types/trace_thread_filter_operator.py +21 -0
- opik/rest_api/types/trace_thread_filter_public.py +23 -0
- opik/rest_api/types/trace_thread_filter_public_operator.py +21 -0
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +22 -0
- opik/rest_api/types/trace_thread_llm_as_judge_code.py +26 -0
- opik/rest_api/types/trace_thread_llm_as_judge_code_public.py +26 -0
- opik/rest_api/types/trace_thread_llm_as_judge_code_write.py +26 -0
- opik/rest_api/types/trace_thread_page.py +9 -6
- opik/rest_api/types/trace_thread_status.py +5 -0
- opik/rest_api/types/trace_thread_update.py +19 -0
- opik/rest_api/types/trace_thread_user_defined_metric_python_code.py +19 -0
- opik/rest_api/types/trace_thread_user_defined_metric_python_code_public.py +19 -0
- opik/rest_api/types/trace_thread_user_defined_metric_python_code_write.py +19 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_visibility_mode.py +5 -0
- opik/rest_api/types/trace_write.py +10 -11
- opik/rest_api/types/usage.py +6 -6
- opik/rest_api/types/user_defined_metric_python_code.py +3 -5
- opik/rest_api/types/user_defined_metric_python_code_public.py +3 -5
- opik/rest_api/types/user_defined_metric_python_code_write.py +3 -5
- opik/rest_api/types/value_entry.py +27 -0
- opik/rest_api/types/value_entry_compare.py +27 -0
- opik/rest_api/types/value_entry_compare_source.py +5 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +27 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view_source.py +5 -0
- opik/rest_api/types/value_entry_public.py +27 -0
- opik/rest_api/types/value_entry_public_source.py +5 -0
- opik/rest_api/types/value_entry_source.py +5 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +27 -0
- opik/rest_api/types/workspace_metric_request.py +24 -0
- opik/rest_api/types/workspace_metric_response.py +20 -0
- opik/rest_api/types/workspace_metrics_summary_request.py +23 -0
- opik/rest_api/types/workspace_metrics_summary_response.py +20 -0
- opik/rest_api/types/workspace_name_holder.py +19 -0
- opik/rest_api/types/workspace_spans_count.py +20 -0
- opik/rest_api/types/workspace_trace_count.py +3 -5
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/__init__.py +2 -0
- opik/rest_api/workspaces/client.py +550 -77
- opik/rest_api/workspaces/raw_client.py +923 -0
- opik/rest_client_configurator/api.py +1 -0
- opik/rest_client_configurator/retry_decorator.py +1 -0
- opik/s3_httpx_client.py +67 -0
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +11 -24
- opik/tracing_runtime_config.py +48 -0
- opik/types.py +48 -2
- opik/url_helpers.py +13 -3
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +4 -5
- opik/validation/parameter.py +122 -0
- opik/validation/parameters_validator.py +175 -0
- opik/validation/validator.py +30 -2
- opik/validation/validator_helpers.py +147 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/WHEEL +1 -1
- opik-1.9.71.dist-info/licenses/LICENSE +203 -0
- opik/api_objects/prompt/prompt.py +0 -107
- opik/api_objects/prompt/prompt_template.py +0 -35
- opik/cli.py +0 -193
- opik/evaluation/metrics/models.py +0 -8
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/integrations/langchain/google_run_helpers.py +0 -75
- opik/integrations/langchain/openai_run_helpers.py +0 -122
- opik/message_processing/message_processors.py +0 -203
- opik/rest_api/types/delta_role.py +0 -7
- opik/rest_api/types/json_object_schema.py +0 -34
- opik-1.6.4.dist-info/METADATA +0 -270
- opik-1.6.4.dist-info/RECORD +0 -507
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Readability heuristics backed by the ``textstat`` library."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
from opik.evaluation.metrics.base_metric import BaseMetric
|
|
8
|
+
from opik.evaluation.metrics.score_result import ScoreResult
|
|
9
|
+
from opik.exceptions import MetricComputationError
|
|
10
|
+
|
|
11
|
+
try: # pragma: no cover - optional dependency
|
|
12
|
+
import textstat as _textstat_lib
|
|
13
|
+
except ImportError: # pragma: no cover - optional dependency
|
|
14
|
+
_textstat_lib = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Readability(BaseMetric):
|
|
18
|
+
"""Compute common readability statistics using ``textstat``.
|
|
19
|
+
|
|
20
|
+
The metric reports the Flesch Reading Ease (0–100) alongside the Flesch–Kincaid
|
|
21
|
+
grade level. The score value is the reading-ease score normalised to ``[0, 1]``.
|
|
22
|
+
You can optionally enforce grade bounds to turn the metric into a guardrail.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name: Display name for the metric result.
|
|
26
|
+
track: Whether to automatically track metric results.
|
|
27
|
+
project_name: Optional tracking project name.
|
|
28
|
+
min_grade: Inclusive lower bound for the acceptable grade.
|
|
29
|
+
max_grade: Inclusive upper bound for the acceptable grade.
|
|
30
|
+
language: Locale forwarded to ``textstat`` when counting syllables.
|
|
31
|
+
textstat_module: Optional ``textstat``-compatible module for dependency
|
|
32
|
+
injection (mainly used in tests).
|
|
33
|
+
enforce_bounds: When ``True`` the metric returns ``1.0`` if the grade lies
|
|
34
|
+
within bounds and ``0.0`` otherwise, effectively acting as a guardrail.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
*,
|
|
40
|
+
name: str = "readability_metric",
|
|
41
|
+
track: bool = True,
|
|
42
|
+
project_name: Optional[str] = None,
|
|
43
|
+
min_grade: Optional[float] = None,
|
|
44
|
+
max_grade: Optional[float] = None,
|
|
45
|
+
language: str = "en_US",
|
|
46
|
+
textstat_module: Optional[Any] = None,
|
|
47
|
+
enforce_bounds: bool = False,
|
|
48
|
+
) -> None:
|
|
49
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
50
|
+
if textstat_module is not None:
|
|
51
|
+
self._textstat = textstat_module
|
|
52
|
+
else:
|
|
53
|
+
if _textstat_lib is None: # pragma: no cover - optional dependency
|
|
54
|
+
raise ImportError(
|
|
55
|
+
"Readability metric requires the optional 'textstat' package. "
|
|
56
|
+
"Install via `pip install textstat`."
|
|
57
|
+
)
|
|
58
|
+
self._textstat = _textstat_lib
|
|
59
|
+
|
|
60
|
+
self._min_grade = min_grade
|
|
61
|
+
self._max_grade = max_grade
|
|
62
|
+
self._language = language
|
|
63
|
+
self._enforce_bounds = enforce_bounds
|
|
64
|
+
|
|
65
|
+
def score(
|
|
66
|
+
self,
|
|
67
|
+
output: str,
|
|
68
|
+
**ignored_kwargs: Any,
|
|
69
|
+
) -> ScoreResult:
|
|
70
|
+
if not output or not output.strip():
|
|
71
|
+
raise MetricComputationError("Text is empty (Readability metric).")
|
|
72
|
+
|
|
73
|
+
cleaned = output.strip()
|
|
74
|
+
sentence_count = self._textstat.sentence_count(cleaned)
|
|
75
|
+
word_count = self._textstat.lexicon_count(cleaned, removepunct=True)
|
|
76
|
+
if sentence_count <= 0 or word_count <= 0:
|
|
77
|
+
raise MetricComputationError(
|
|
78
|
+
"Unable to parse text for readability metrics."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
syllable_count = self._textstat.syllable_count(cleaned, lang=self._language)
|
|
82
|
+
reading_ease = float(self._textstat.flesch_reading_ease(cleaned))
|
|
83
|
+
fk_grade = float(self._textstat.flesch_kincaid_grade(cleaned))
|
|
84
|
+
|
|
85
|
+
words_per_sentence = word_count / sentence_count
|
|
86
|
+
syllables_per_word = syllable_count / word_count if word_count else 0.0
|
|
87
|
+
within_bounds = self._is_within_grade_bounds(fk_grade)
|
|
88
|
+
|
|
89
|
+
if self._enforce_bounds:
|
|
90
|
+
value = 1.0 if within_bounds else 0.0
|
|
91
|
+
reason = (
|
|
92
|
+
"Text meets readability targets"
|
|
93
|
+
if within_bounds
|
|
94
|
+
else "Text falls outside readability targets"
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
normalised = max(0.0, min(100.0, reading_ease)) / 100.0
|
|
98
|
+
value = normalised
|
|
99
|
+
reason = (
|
|
100
|
+
f"Flesch Reading Ease: {reading_ease:.2f} | "
|
|
101
|
+
f"Flesch-Kincaid Grade: {fk_grade:.2f}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
metadata = {
|
|
105
|
+
"flesch_reading_ease": reading_ease,
|
|
106
|
+
"flesch_kincaid_grade": fk_grade,
|
|
107
|
+
"words_per_sentence": words_per_sentence,
|
|
108
|
+
"syllables_per_word": syllables_per_word,
|
|
109
|
+
"sentence_count": sentence_count,
|
|
110
|
+
"word_count": word_count,
|
|
111
|
+
"syllable_count": syllable_count,
|
|
112
|
+
"min_grade": self._min_grade,
|
|
113
|
+
"max_grade": self._max_grade,
|
|
114
|
+
"within_grade_bounds": within_bounds,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return ScoreResult(
|
|
118
|
+
value=value,
|
|
119
|
+
name=self.name,
|
|
120
|
+
reason=reason,
|
|
121
|
+
metadata=metadata,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _is_within_grade_bounds(self, grade: float) -> bool:
|
|
125
|
+
if self._min_grade is not None and grade < self._min_grade:
|
|
126
|
+
return False
|
|
127
|
+
if self._max_grade is not None and grade > self._max_grade:
|
|
128
|
+
return False
|
|
129
|
+
return True
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Any, Union
|
|
2
|
+
from typing import Any, Union, Optional
|
|
3
3
|
|
|
4
4
|
from .. import base_metric, score_result
|
|
5
5
|
|
|
@@ -15,6 +15,7 @@ class RegexMatch(base_metric.BaseMetric):
|
|
|
15
15
|
regex: The regular expression pattern to match against. Can be a string or a compiled regex pattern.
|
|
16
16
|
name: The name of the metric. Defaults to "regex_match_metric".
|
|
17
17
|
track: Whether to track the metric. Defaults to True.
|
|
18
|
+
project_name: Optional project name to track the metric in for the cases when there are no parent span/trace to inherit project name from.
|
|
18
19
|
|
|
19
20
|
Example:
|
|
20
21
|
>>> from opik.evaluation.metrics import RegexMatch
|
|
@@ -32,10 +33,12 @@ class RegexMatch(base_metric.BaseMetric):
|
|
|
32
33
|
regex: Union[str, re.Pattern],
|
|
33
34
|
name: str = "regex_match_metric",
|
|
34
35
|
track: bool = True,
|
|
36
|
+
project_name: Optional[str] = None,
|
|
35
37
|
):
|
|
36
38
|
super().__init__(
|
|
37
39
|
name=name,
|
|
38
40
|
track=track,
|
|
41
|
+
project_name=project_name,
|
|
39
42
|
)
|
|
40
43
|
|
|
41
44
|
self._regex_pattern: re.Pattern = (
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from typing import Any, List, Optional, Union
|
|
2
|
+
from opik.exceptions import MetricComputationError
|
|
3
|
+
from opik.evaluation.metrics import base_metric, score_result
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from rouge_score import rouge_scorer
|
|
7
|
+
except ImportError:
|
|
8
|
+
rouge_scorer = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ROUGE(base_metric.BaseMetric):
|
|
12
|
+
"""
|
|
13
|
+
A metric that computes the ROUGE, or Recall-Oriented Understudy for Gisting Evaluation score between an output and reference string mainly used for evaluating text summarization.
|
|
14
|
+
ROUGE is case insensitive, meaning that upper case letters are treated the same way as lower case letters.
|
|
15
|
+
This metrics is a wrapper around the Google Research reimplementation of ROUGE, which is based on the `rouge-score` library.
|
|
16
|
+
|
|
17
|
+
References:
|
|
18
|
+
- https://github.com/google-research/google-research/tree/master/rouge
|
|
19
|
+
- https://huggingface.co/spaces/evaluate-metric/rouge
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
name: The name of the metric. Defaults to "rouge_metric".
|
|
23
|
+
track: Whether to track the metric. Defaults to True.
|
|
24
|
+
rouge_type: Type of ROUGE score to compute. Defaults to "rouge1". Must be one of the following:
|
|
25
|
+
- "rouge1": unigram (1-gram) based scoring
|
|
26
|
+
- "rouge2": bigram (2-gram) based scoring
|
|
27
|
+
- "rougeL": Longest common subsequence based scoring
|
|
28
|
+
- "rougeLSum": splits text using '\\n'"
|
|
29
|
+
use_stemmer: Whether to use stemming when computing ROUGE. Defaults to False.
|
|
30
|
+
split_summaries: Whether to split summaries into sentences. Defaults to False.
|
|
31
|
+
tokenizer: A tokenizer to use when splitting summaries into sentences. Defaults to None.
|
|
32
|
+
project_name: Optional project name to track the metric in for the cases when there are no parent span/trace to inherit project name from.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
>>> from opik.evaluation.metrics import ROUGE
|
|
36
|
+
>>> rouge_metric = ROUGE()
|
|
37
|
+
>>> result = rouge_metric.score(
|
|
38
|
+
... output="The quick brown fox jumps over the lazy dog.",
|
|
39
|
+
... reference="The quick brown fox jumps over the lazy dog."
|
|
40
|
+
... )
|
|
41
|
+
>>> print(result.value)
|
|
42
|
+
1.0
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
name: str = "rouge_metric",
|
|
48
|
+
track: bool = True,
|
|
49
|
+
rouge_type: str = "rouge1",
|
|
50
|
+
use_stemmer: bool = False,
|
|
51
|
+
split_summaries: bool = False,
|
|
52
|
+
tokenizer: Optional[Any] = None,
|
|
53
|
+
project_name: Optional[str] = None,
|
|
54
|
+
):
|
|
55
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
56
|
+
|
|
57
|
+
valid_rouge_types = {"rouge1", "rouge2", "rougeL", "rougeLsum"}
|
|
58
|
+
if rouge_type not in valid_rouge_types:
|
|
59
|
+
raise MetricComputationError(
|
|
60
|
+
f"Invalid rouge_type '{rouge_type}'. Must be one of {valid_rouge_types}."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
self._rouge_type = rouge_type
|
|
64
|
+
self._rouge = _build_rouge_backend(
|
|
65
|
+
rouge_type=rouge_type,
|
|
66
|
+
use_stemmer=use_stemmer,
|
|
67
|
+
split_summaries=split_summaries,
|
|
68
|
+
tokenizer=tokenizer,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def score(
|
|
72
|
+
self,
|
|
73
|
+
output: str,
|
|
74
|
+
reference: Union[str, List[str]],
|
|
75
|
+
**ignored_kwargs: Any,
|
|
76
|
+
) -> score_result.ScoreResult:
|
|
77
|
+
"""
|
|
78
|
+
Compute the ROUGE score based on the given rouge_type between the output and reference strings.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
output: The output string to score.
|
|
82
|
+
reference: The reference string or list of reference strings.
|
|
83
|
+
**ignored_kwargs: Additional keyword arguments that are ignored.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
score_result.ScoreResult with:
|
|
87
|
+
- `value`: The ROUGE score (float).
|
|
88
|
+
- `name`: The metric name.
|
|
89
|
+
- `reason`: A short explanation (e.g. "rouge1 score: 0.91").
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
MetricComputationError:
|
|
93
|
+
- If the candidate or any reference is empty.
|
|
94
|
+
- If the reference is not a string or a list of strings.
|
|
95
|
+
"""
|
|
96
|
+
if not output.strip():
|
|
97
|
+
raise MetricComputationError("Candidate is empty.")
|
|
98
|
+
|
|
99
|
+
if isinstance(reference, str):
|
|
100
|
+
if not reference.strip():
|
|
101
|
+
raise MetricComputationError("Reference is empty.")
|
|
102
|
+
|
|
103
|
+
reference = [reference]
|
|
104
|
+
elif isinstance(reference, list):
|
|
105
|
+
if len(reference) == 0:
|
|
106
|
+
raise MetricComputationError("Reference is empty.")
|
|
107
|
+
|
|
108
|
+
if not all(isinstance(item, str) for item in reference):
|
|
109
|
+
raise MetricComputationError(
|
|
110
|
+
"Reference must be a string or a list of strings."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
for ref_str in reference:
|
|
114
|
+
if not ref_str.strip():
|
|
115
|
+
raise MetricComputationError("Encountered empty reference.")
|
|
116
|
+
|
|
117
|
+
rouge_score_type = self._rouge_type
|
|
118
|
+
if self._rouge is None:
|
|
119
|
+
raise MetricComputationError("ROUGE backend is not initialized.")
|
|
120
|
+
results = self._rouge.score_multi(reference, output)
|
|
121
|
+
rouge_f1_value = results[rouge_score_type].fmeasure
|
|
122
|
+
|
|
123
|
+
return score_result.ScoreResult(
|
|
124
|
+
value=rouge_f1_value,
|
|
125
|
+
name=self.name,
|
|
126
|
+
reason=f"{rouge_score_type} score: {rouge_f1_value:.4f}",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_rouge_backend(
|
|
131
|
+
*,
|
|
132
|
+
rouge_type: str,
|
|
133
|
+
use_stemmer: bool,
|
|
134
|
+
split_summaries: bool,
|
|
135
|
+
tokenizer: Optional[Any],
|
|
136
|
+
) -> Optional[Any]:
|
|
137
|
+
if rouge_scorer is None:
|
|
138
|
+
raise ImportError(
|
|
139
|
+
"`rouge-score` libraries are required for ROUGE score calculation. "
|
|
140
|
+
"Install via `pip install rouge-score`."
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return rouge_scorer.RougeScorer(
|
|
144
|
+
[rouge_type],
|
|
145
|
+
use_stemmer=use_stemmer,
|
|
146
|
+
split_summaries=split_summaries,
|
|
147
|
+
tokenizer=tokenizer,
|
|
148
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
from opik.evaluation.metrics import base_metric, score_result
|
|
4
|
+
from opik.exceptions import MetricComputationError
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
import nltk
|
|
8
|
+
from nltk.sentiment import vader
|
|
9
|
+
except ImportError:
|
|
10
|
+
nltk = None
|
|
11
|
+
vader = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Sentiment(base_metric.BaseMetric):
|
|
15
|
+
"""
|
|
16
|
+
A metric that analyzes the sentiment of text using NLTK's VADER sentiment analyzer.
|
|
17
|
+
|
|
18
|
+
Returns sentiment scores for positive, neutral, negative, and compound sentiment.
|
|
19
|
+
The compound score is a normalized score between -1.0 (extremely negative) and
|
|
20
|
+
1.0 (extremely positive).
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
name: The name of the metric. Defaults to "sentiment_metric".
|
|
24
|
+
track: Whether to track the metric. Defaults to True.
|
|
25
|
+
project_name: Optional project name to track the metric in for the cases when
|
|
26
|
+
there are no parent span/trace to inherit project name from.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
>>> from opik.evaluation.metrics import Sentiment
|
|
30
|
+
>>> sentiment_metric = Sentiment()
|
|
31
|
+
>>> result = sentiment_metric.score("I love this product! It's amazing.")
|
|
32
|
+
>>> print(result.value) # Compound score (e.g., 0.8802)
|
|
33
|
+
>>> print(result.metadata) # All sentiment scores
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
name: str = "sentiment_metric",
|
|
39
|
+
track: bool = True,
|
|
40
|
+
project_name: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
super().__init__(
|
|
43
|
+
name=name,
|
|
44
|
+
track=track,
|
|
45
|
+
project_name=project_name,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if nltk is None or vader is None:
|
|
49
|
+
raise ImportError(
|
|
50
|
+
"`nltk` library is required for sentiment analysis. "
|
|
51
|
+
"Install via `pip install nltk` and then download the vader_lexicon: "
|
|
52
|
+
"`python -m nltk.downloader vader_lexicon`."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
self._analyzer = vader.SentimentIntensityAnalyzer()
|
|
57
|
+
except LookupError:
|
|
58
|
+
# If vader_lexicon is not downloaded, attempt to download it
|
|
59
|
+
nltk.download("vader_lexicon")
|
|
60
|
+
self._analyzer = vader.SentimentIntensityAnalyzer()
|
|
61
|
+
|
|
62
|
+
def score(self, output: str, **ignored_kwargs: Any) -> score_result.ScoreResult:
|
|
63
|
+
"""
|
|
64
|
+
Analyze the sentiment of the provided text.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
output: The text to analyze for sentiment.
|
|
68
|
+
**ignored_kwargs: Additional keyword arguments that are ignored.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
score_result.ScoreResult: A ScoreResult object with:
|
|
72
|
+
- value: The compound sentiment score (-1.0 to 1.0)
|
|
73
|
+
- name: The metric name
|
|
74
|
+
- reason: A brief explanation of the sentiment analysis
|
|
75
|
+
- metadata: Dictionary containing all sentiment scores (pos, neu, neg, compound)
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
MetricComputationError: If the input text is empty.
|
|
79
|
+
"""
|
|
80
|
+
if not output.strip():
|
|
81
|
+
raise MetricComputationError("Empty text provided for sentiment analysis.")
|
|
82
|
+
|
|
83
|
+
sentiment_scores = self._analyzer.polarity_scores(output)
|
|
84
|
+
compound_score = sentiment_scores["compound"]
|
|
85
|
+
|
|
86
|
+
if compound_score >= 0.05:
|
|
87
|
+
sentiment_category = "positive"
|
|
88
|
+
elif compound_score <= -0.05:
|
|
89
|
+
sentiment_category = "negative"
|
|
90
|
+
else:
|
|
91
|
+
sentiment_category = "neutral"
|
|
92
|
+
|
|
93
|
+
return score_result.ScoreResult(
|
|
94
|
+
value=compound_score,
|
|
95
|
+
name=self.name,
|
|
96
|
+
reason=f"Text sentiment analysis: {sentiment_category} (compound score: {compound_score:.4f})",
|
|
97
|
+
metadata=sentiment_scores,
|
|
98
|
+
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Spearman rank correlation between reference and predicted rankings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Sequence
|
|
6
|
+
|
|
7
|
+
from opik.evaluation.metrics.base_metric import BaseMetric
|
|
8
|
+
from opik.evaluation.metrics.score_result import ScoreResult
|
|
9
|
+
from opik.exceptions import MetricComputationError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SpearmanRanking(BaseMetric):
|
|
13
|
+
"""
|
|
14
|
+
Compute Spearman's rank correlation for two rankings of the same items.
|
|
15
|
+
|
|
16
|
+
Scores are normalised to ``[0.0, 1.0]`` where `1.0` indicates perfect rank
|
|
17
|
+
agreement and `0.0` indicates complete disagreement (``rho = -1``).
|
|
18
|
+
|
|
19
|
+
References:
|
|
20
|
+
- Spearman's rank correlation coefficient (Wikipedia overview)
|
|
21
|
+
https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
|
|
22
|
+
- SciPy documentation: ``scipy.stats.spearmanr``
|
|
23
|
+
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
name: Display name for the metric result. Defaults to
|
|
27
|
+
``"spearman_ranking_metric"``.
|
|
28
|
+
track: Whether to automatically track metric results. Defaults to ``True``.
|
|
29
|
+
project_name: Optional tracking project name. Defaults to ``None``.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
>>> from opik.evaluation.metrics import SpearmanRanking
|
|
33
|
+
>>> metric = SpearmanRanking()
|
|
34
|
+
>>> result = metric.score(
|
|
35
|
+
... output=["b", "a", "c"],
|
|
36
|
+
... reference=["a", "b", "c"],
|
|
37
|
+
... )
|
|
38
|
+
>>> round(result.metadata["rho"], 2) # doctest: +SKIP
|
|
39
|
+
-0.5
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
name: str = "spearman_ranking_metric",
|
|
45
|
+
track: bool = True,
|
|
46
|
+
project_name: str | None = None,
|
|
47
|
+
) -> None:
|
|
48
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
49
|
+
|
|
50
|
+
def score(
|
|
51
|
+
self,
|
|
52
|
+
output: Sequence[Any],
|
|
53
|
+
reference: Sequence[Any],
|
|
54
|
+
**ignored_kwargs: Any,
|
|
55
|
+
) -> ScoreResult:
|
|
56
|
+
if len(output) != len(reference):
|
|
57
|
+
raise MetricComputationError(
|
|
58
|
+
"output and reference rankings must have the same length."
|
|
59
|
+
)
|
|
60
|
+
if len(output) == 0:
|
|
61
|
+
raise MetricComputationError(
|
|
62
|
+
"Rankings cannot be empty for Spearman correlation."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
ref_ranks = {item: idx for idx, item in enumerate(reference)}
|
|
66
|
+
if set(output) != set(reference):
|
|
67
|
+
raise MetricComputationError("Rankings must contain the same items.")
|
|
68
|
+
|
|
69
|
+
diffs_sq = 0
|
|
70
|
+
for idx, item in enumerate(output):
|
|
71
|
+
ref_idx = ref_ranks[item]
|
|
72
|
+
diffs_sq += (idx - ref_idx) ** 2
|
|
73
|
+
|
|
74
|
+
n = len(output)
|
|
75
|
+
if n == 1:
|
|
76
|
+
rho = 1.0
|
|
77
|
+
else:
|
|
78
|
+
rho = 1 - (6 * diffs_sq) / (n * (n * n - 1))
|
|
79
|
+
|
|
80
|
+
# normalize to [0, 1] for convenience
|
|
81
|
+
normalized = (rho + 1) / 2
|
|
82
|
+
|
|
83
|
+
return ScoreResult(
|
|
84
|
+
value=normalized,
|
|
85
|
+
name=self.name,
|
|
86
|
+
reason=f"Spearman correlation (normalized): {normalized:.4f}",
|
|
87
|
+
metadata={"rho": rho},
|
|
88
|
+
)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Rule-based tone metric for assistant responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Iterable, Optional, Sequence
|
|
7
|
+
|
|
8
|
+
from opik.exceptions import MetricComputationError
|
|
9
|
+
from opik.evaluation.metrics.base_metric import BaseMetric
|
|
10
|
+
from opik.evaluation.metrics.score_result import ScoreResult
|
|
11
|
+
|
|
12
|
+
# Default tone lexicons/phrases kept inline for easier discoverability.
|
|
13
|
+
_POSITIVE_LEXICON = {
|
|
14
|
+
"appreciate",
|
|
15
|
+
"assist",
|
|
16
|
+
"glad",
|
|
17
|
+
"helpful",
|
|
18
|
+
"please",
|
|
19
|
+
"thank",
|
|
20
|
+
"welcome",
|
|
21
|
+
"happy",
|
|
22
|
+
"support",
|
|
23
|
+
"great",
|
|
24
|
+
"excellent",
|
|
25
|
+
"wonderful",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
_NEGATIVE_LEXICON = {
|
|
29
|
+
"angry",
|
|
30
|
+
"awful",
|
|
31
|
+
"bad",
|
|
32
|
+
"complain",
|
|
33
|
+
"frustrated",
|
|
34
|
+
"hate",
|
|
35
|
+
"incompetent",
|
|
36
|
+
"terrible",
|
|
37
|
+
"useless",
|
|
38
|
+
"stupid",
|
|
39
|
+
"idiot",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
_FORBIDDEN_PHRASES = {
|
|
43
|
+
"shut up",
|
|
44
|
+
"this is pointless",
|
|
45
|
+
"not my problem",
|
|
46
|
+
"i refuse to assist",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Tone(BaseMetric):
|
|
51
|
+
"""
|
|
52
|
+
Flag tone issues like excessive negativity, shouting, or forbidden phrases.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
name: Display name for the metric result. Defaults to ``"tone_metric"``.
|
|
56
|
+
track: Whether to automatically track results. Defaults to ``True``.
|
|
57
|
+
project_name: Optional tracking project name. Defaults to ``None``.
|
|
58
|
+
min_sentiment: Minimum sentiment score required (``-1.0`` to ``1.0`` scale).
|
|
59
|
+
max_upper_ratio: Maximum allowed ratio of uppercase characters.
|
|
60
|
+
max_exclamations: Cap on the number of exclamation marks.
|
|
61
|
+
positive_lexicon: Optional iterable of positive tokens counted for sentiment.
|
|
62
|
+
negative_lexicon: Optional iterable of negative tokens counted for sentiment.
|
|
63
|
+
forbidden_phrases: Optional sequence of phrases that immediately fail the
|
|
64
|
+
check.
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> from opik.evaluation.metrics import Tone
|
|
68
|
+
>>> metric = Tone(max_exclamations=2)
|
|
69
|
+
>>> result = metric.score("THANK YOU for your patience!!!")
|
|
70
|
+
>>> result.value # doctest: +SKIP
|
|
71
|
+
0.0
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
name: str = "tone_metric",
|
|
77
|
+
track: bool = True,
|
|
78
|
+
project_name: Optional[str] = None,
|
|
79
|
+
min_sentiment: float = -0.2,
|
|
80
|
+
max_upper_ratio: float = 0.3,
|
|
81
|
+
max_exclamations: int = 3,
|
|
82
|
+
positive_lexicon: Optional[Iterable[str]] = None,
|
|
83
|
+
negative_lexicon: Optional[Iterable[str]] = None,
|
|
84
|
+
forbidden_phrases: Optional[Sequence[str]] = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
super().__init__(name=name, track=track, project_name=project_name)
|
|
87
|
+
self._min_sentiment = min_sentiment
|
|
88
|
+
self._max_upper_ratio = max_upper_ratio
|
|
89
|
+
self._max_exclamations = max_exclamations
|
|
90
|
+
self._positive = set(
|
|
91
|
+
word.lower() for word in (positive_lexicon or _POSITIVE_LEXICON)
|
|
92
|
+
)
|
|
93
|
+
self._negative = set(
|
|
94
|
+
word.lower() for word in (negative_lexicon or _NEGATIVE_LEXICON)
|
|
95
|
+
)
|
|
96
|
+
phrases = forbidden_phrases or _FORBIDDEN_PHRASES
|
|
97
|
+
self._forbidden = [phrase.lower() for phrase in phrases]
|
|
98
|
+
|
|
99
|
+
def score(self, output: str, **ignored_kwargs: Any) -> ScoreResult:
|
|
100
|
+
if not output or not output.strip():
|
|
101
|
+
raise MetricComputationError("Text is empty (Tone metric).")
|
|
102
|
+
|
|
103
|
+
tokens = re.findall(r"\b\w+\b", output.lower())
|
|
104
|
+
if not tokens:
|
|
105
|
+
raise MetricComputationError("Unable to tokenize text for Tone metric.")
|
|
106
|
+
|
|
107
|
+
sentiment_score = self._compute_sentiment(tokens)
|
|
108
|
+
upper_ratio = _uppercase_ratio(output)
|
|
109
|
+
exclamation_count = output.count("!")
|
|
110
|
+
forbidden_hit = any(phrase in output.lower() for phrase in self._forbidden)
|
|
111
|
+
|
|
112
|
+
passes = (
|
|
113
|
+
sentiment_score >= self._min_sentiment
|
|
114
|
+
and upper_ratio <= self._max_upper_ratio
|
|
115
|
+
and exclamation_count <= self._max_exclamations
|
|
116
|
+
and not forbidden_hit
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
metadata = {
|
|
120
|
+
"sentiment_score": sentiment_score,
|
|
121
|
+
"uppercase_ratio": upper_ratio,
|
|
122
|
+
"exclamation_count": exclamation_count,
|
|
123
|
+
"forbidden_hit": forbidden_hit,
|
|
124
|
+
"thresholds": {
|
|
125
|
+
"min_sentiment": self._min_sentiment,
|
|
126
|
+
"max_upper_ratio": self._max_upper_ratio,
|
|
127
|
+
"max_exclamations": self._max_exclamations,
|
|
128
|
+
},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
reason = (
|
|
132
|
+
"Tone is within configured guardrails"
|
|
133
|
+
if passes
|
|
134
|
+
else "Tone violates guardrails"
|
|
135
|
+
)
|
|
136
|
+
value = 1.0 if passes else 0.0
|
|
137
|
+
return ScoreResult(
|
|
138
|
+
value=value, name=self.name, reason=reason, metadata=metadata
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def _compute_sentiment(self, tokens: Sequence[str]) -> float:
|
|
142
|
+
pos_hits = sum(token in self._positive for token in tokens)
|
|
143
|
+
neg_hits = sum(token in self._negative for token in tokens)
|
|
144
|
+
total = pos_hits + neg_hits
|
|
145
|
+
if total == 0:
|
|
146
|
+
return 0.0
|
|
147
|
+
return (pos_hits - neg_hits) / total
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _uppercase_ratio(text: str) -> float:
|
|
151
|
+
letters = [char for char in text if char.isalpha()]
|
|
152
|
+
if not letters:
|
|
153
|
+
return 0.0
|
|
154
|
+
upper = sum(1 for char in letters if char.isupper())
|
|
155
|
+
return upper / len(letters)
|