PyPI - opik - Versions diffs - 1.6.4__py3-none-any.whl → 1.9.71__py3-none-any.whl - Mend

opik 1.6.4py3-none-any.whl → 1.9.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1021) hide show

opik/__init__.py +33 -2
opik/anonymizer/__init__.py +5 -0
opik/anonymizer/anonymizer.py +12 -0
opik/anonymizer/factory.py +80 -0
opik/anonymizer/recursive_anonymizer.py +64 -0
opik/anonymizer/rules.py +56 -0
opik/anonymizer/rules_anonymizer.py +35 -0
opik/api_objects/attachment/__init__.py +5 -0
opik/api_objects/attachment/attachment.py +20 -0
opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +220 -0
opik/api_objects/attachment/converters.py +51 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/conversation/__init__.py +0 -0
opik/api_objects/conversation/conversation_factory.py +43 -0
opik/api_objects/conversation/conversation_thread.py +49 -0
opik/api_objects/data_helpers.py +79 -0
opik/api_objects/dataset/dataset.py +107 -45
opik/api_objects/dataset/rest_operations.py +12 -3
opik/api_objects/experiment/experiment.py +81 -45
opik/api_objects/experiment/experiment_item.py +2 -1
opik/api_objects/experiment/experiments_client.py +64 -0
opik/api_objects/experiment/helpers.py +35 -11
opik/api_objects/experiment/rest_operations.py +88 -19
opik/api_objects/helpers.py +104 -7
opik/api_objects/local_recording.py +81 -0
opik/api_objects/opik_client.py +872 -174
opik/api_objects/opik_query_language.py +136 -18
opik/api_objects/optimization/__init__.py +3 -0
opik/api_objects/optimization/optimization.py +39 -0
opik/api_objects/prompt/__init__.py +13 -1
opik/api_objects/prompt/base_prompt.py +69 -0
opik/api_objects/prompt/base_prompt_template.py +29 -0
opik/api_objects/prompt/chat/__init__.py +1 -0
opik/api_objects/prompt/chat/chat_prompt.py +210 -0
opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
opik/api_objects/prompt/client.py +193 -41
opik/api_objects/prompt/text/__init__.py +1 -0
opik/api_objects/prompt/text/prompt.py +174 -0
opik/api_objects/prompt/text/prompt_template.py +55 -0
opik/api_objects/prompt/types.py +29 -0
opik/api_objects/rest_stream_parser.py +98 -0
opik/api_objects/search_helpers.py +89 -0
opik/api_objects/span/span_client.py +165 -45
opik/api_objects/span/span_data.py +136 -25
opik/api_objects/threads/__init__.py +0 -0
opik/api_objects/threads/threads_client.py +185 -0
opik/api_objects/trace/trace_client.py +72 -36
opik/api_objects/trace/trace_data.py +112 -26
opik/api_objects/validation_helpers.py +3 -3
opik/cli/__init__.py +5 -0
opik/cli/__main__.py +6 -0
opik/cli/configure.py +66 -0
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/healthcheck.py +21 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +49 -0
opik/cli/proxy.py +93 -0
opik/cli/usage_report/__init__.py +16 -0
opik/cli/usage_report/charts.py +783 -0
opik/cli/usage_report/cli.py +274 -0
opik/cli/usage_report/constants.py +9 -0
opik/cli/usage_report/extraction.py +749 -0
opik/cli/usage_report/pdf.py +244 -0
opik/cli/usage_report/statistics.py +78 -0
opik/cli/usage_report/utils.py +235 -0
opik/config.py +62 -4
opik/configurator/configure.py +45 -6
opik/configurator/opik_rest_helpers.py +4 -1
opik/context_storage.py +164 -65
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +9 -1
opik/decorator/base_track_decorator.py +298 -146
opik/decorator/context_manager/__init__.py +0 -0
opik/decorator/context_manager/span_context_manager.py +123 -0
opik/decorator/context_manager/trace_context_manager.py +84 -0
opik/decorator/generator_wrappers.py +3 -2
opik/decorator/inspect_helpers.py +11 -0
opik/decorator/opik_args/__init__.py +13 -0
opik/decorator/opik_args/api_classes.py +71 -0
opik/decorator/opik_args/helpers.py +120 -0
opik/decorator/span_creation_handler.py +49 -21
opik/decorator/tracker.py +9 -1
opik/dict_utils.py +3 -3
opik/environment.py +13 -1
opik/error_tracking/api.py +1 -1
opik/error_tracking/before_send.py +6 -5
opik/error_tracking/environment_details.py +29 -7
opik/error_tracking/error_filtering/filter_by_response_status_code.py +42 -0
opik/error_tracking/error_filtering/filter_chain_builder.py +14 -3
opik/evaluation/__init__.py +14 -2
opik/evaluation/engine/engine.py +280 -82
opik/evaluation/engine/evaluation_tasks_executor.py +15 -10
opik/evaluation/engine/helpers.py +34 -9
opik/evaluation/engine/metrics_evaluator.py +237 -0
opik/evaluation/engine/types.py +5 -4
opik/evaluation/evaluation_result.py +169 -2
opik/evaluation/evaluator.py +659 -58
opik/evaluation/metrics/__init__.py +121 -6
opik/evaluation/metrics/aggregated_metric.py +92 -0
opik/evaluation/metrics/arguments_helpers.py +15 -21
opik/evaluation/metrics/arguments_validator.py +38 -0
opik/evaluation/metrics/base_metric.py +20 -10
opik/evaluation/metrics/conversation/__init__.py +48 -0
opik/evaluation/metrics/conversation/conversation_thread_metric.py +79 -0
opik/evaluation/metrics/conversation/conversation_turns_factory.py +39 -0
opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
opik/evaluation/metrics/conversation/helpers.py +84 -0
opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/__init__.py +0 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +274 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/schema.py +16 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/templates.py +95 -0
opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
opik/evaluation/metrics/conversation/llm_judges/session_completeness/__init__.py +0 -0
opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +295 -0
opik/evaluation/metrics/conversation/llm_judges/session_completeness/schema.py +22 -0
opik/evaluation/metrics/conversation/llm_judges/session_completeness/templates.py +139 -0
opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +277 -0
opik/evaluation/metrics/conversation/llm_judges/user_frustration/schema.py +16 -0
opik/evaluation/metrics/conversation/llm_judges/user_frustration/templates.py +135 -0
opik/evaluation/metrics/conversation/types.py +34 -0
opik/evaluation/metrics/conversation_types.py +9 -0
opik/evaluation/metrics/heuristics/bertscore.py +107 -0
opik/evaluation/metrics/heuristics/bleu.py +43 -16
opik/evaluation/metrics/heuristics/chrf.py +127 -0
opik/evaluation/metrics/heuristics/contains.py +50 -11
opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
opik/evaluation/metrics/heuristics/equals.py +4 -1
opik/evaluation/metrics/heuristics/gleu.py +113 -0
opik/evaluation/metrics/heuristics/is_json.py +9 -3
opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
opik/evaluation/metrics/heuristics/levenshtein_ratio.py +6 -5
opik/evaluation/metrics/heuristics/meteor.py +119 -0
opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
opik/evaluation/metrics/heuristics/readability.py +129 -0
opik/evaluation/metrics/heuristics/regex_match.py +4 -1
opik/evaluation/metrics/heuristics/rouge.py +148 -0
opik/evaluation/metrics/heuristics/sentiment.py +98 -0
opik/evaluation/metrics/heuristics/spearman.py +88 -0
opik/evaluation/metrics/heuristics/tone.py +155 -0
opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +27 -30
opik/evaluation/metrics/llm_judges/answer_relevance/parser.py +27 -0
opik/evaluation/metrics/llm_judges/answer_relevance/templates.py +10 -10
opik/evaluation/metrics/llm_judges/context_precision/metric.py +28 -31
opik/evaluation/metrics/llm_judges/context_precision/parser.py +27 -0
opik/evaluation/metrics/llm_judges/context_precision/template.py +7 -7
opik/evaluation/metrics/llm_judges/context_recall/metric.py +27 -31
opik/evaluation/metrics/llm_judges/context_recall/parser.py +27 -0
opik/evaluation/metrics/llm_judges/context_recall/template.py +7 -7
opik/evaluation/metrics/llm_judges/factuality/metric.py +7 -26
opik/evaluation/metrics/llm_judges/factuality/parser.py +35 -0
opik/evaluation/metrics/llm_judges/factuality/template.py +1 -1
opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
opik/evaluation/metrics/llm_judges/g_eval/metric.py +244 -113
opik/evaluation/metrics/llm_judges/g_eval/parser.py +161 -0
opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
opik/evaluation/metrics/llm_judges/hallucination/metric.py +23 -27
opik/evaluation/metrics/llm_judges/hallucination/parser.py +29 -0
opik/evaluation/metrics/llm_judges/hallucination/template.py +2 -4
opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
opik/evaluation/metrics/llm_judges/moderation/metric.py +23 -28
opik/evaluation/metrics/llm_judges/moderation/parser.py +27 -0
opik/evaluation/metrics/llm_judges/moderation/template.py +2 -2
opik/evaluation/metrics/llm_judges/parsing_helpers.py +26 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/__init__.py +3 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +171 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/parser.py +38 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/templates.py +65 -0
opik/evaluation/metrics/llm_judges/usefulness/metric.py +23 -32
opik/evaluation/metrics/llm_judges/usefulness/parser.py +28 -0
opik/evaluation/metrics/ragas_metric.py +112 -0
opik/evaluation/models/__init__.py +10 -0
opik/evaluation/models/base_model.py +140 -18
opik/evaluation/models/langchain/__init__.py +3 -0
opik/evaluation/models/langchain/langchain_chat_model.py +166 -0
opik/evaluation/models/langchain/message_converters.py +106 -0
opik/evaluation/models/langchain/opik_monitoring.py +23 -0
opik/evaluation/models/litellm/litellm_chat_model.py +186 -40
opik/evaluation/models/litellm/opik_monitor.py +24 -21
opik/evaluation/models/litellm/util.py +125 -0
opik/evaluation/models/litellm/warning_filters.py +16 -4
opik/evaluation/models/model_capabilities.py +187 -0
opik/evaluation/models/models_factory.py +25 -3
opik/evaluation/preprocessing.py +92 -0
opik/evaluation/report.py +70 -12
opik/evaluation/rest_operations.py +49 -45
opik/evaluation/samplers/__init__.py +4 -0
opik/evaluation/samplers/base_dataset_sampler.py +40 -0
opik/evaluation/samplers/random_dataset_sampler.py +48 -0
opik/evaluation/score_statistics.py +66 -0
opik/evaluation/scorers/__init__.py +4 -0
opik/evaluation/scorers/scorer_function.py +55 -0
opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
opik/evaluation/test_case.py +3 -2
opik/evaluation/test_result.py +1 -0
opik/evaluation/threads/__init__.py +0 -0
opik/evaluation/threads/context_helper.py +32 -0
opik/evaluation/threads/evaluation_engine.py +181 -0
opik/evaluation/threads/evaluation_result.py +18 -0
opik/evaluation/threads/evaluator.py +120 -0
opik/evaluation/threads/helpers.py +51 -0
opik/evaluation/types.py +9 -1
opik/exceptions.py +116 -3
opik/file_upload/__init__.py +0 -0
opik/file_upload/base_upload_manager.py +39 -0
opik/file_upload/file_upload_monitor.py +14 -0
opik/file_upload/file_uploader.py +141 -0
opik/file_upload/mime_type.py +9 -0
opik/file_upload/s3_multipart_upload/__init__.py +0 -0
opik/file_upload/s3_multipart_upload/file_parts_strategy.py +89 -0
opik/file_upload/s3_multipart_upload/s3_file_uploader.py +86 -0
opik/file_upload/s3_multipart_upload/s3_upload_error.py +29 -0
opik/file_upload/thread_pool.py +17 -0
opik/file_upload/upload_client.py +114 -0
opik/file_upload/upload_manager.py +255 -0
opik/file_upload/upload_options.py +37 -0
opik/format_helpers.py +17 -0
opik/guardrails/__init__.py +4 -0
opik/guardrails/guardrail.py +157 -0
opik/guardrails/guards/__init__.py +5 -0
opik/guardrails/guards/guard.py +17 -0
opik/guardrails/guards/pii.py +47 -0
opik/guardrails/guards/topic.py +76 -0
opik/guardrails/rest_api_client.py +34 -0
opik/guardrails/schemas.py +24 -0
opik/guardrails/tracing.py +61 -0
opik/healthcheck/__init__.py +2 -1
opik/healthcheck/checks.py +2 -2
opik/healthcheck/rich_representation.py +1 -1
opik/hooks/__init__.py +23 -0
opik/hooks/anonymizer_hook.py +36 -0
opik/hooks/httpx_client_hook.py +112 -0
opik/httpx_client.py +75 -4
opik/id_helpers.py +18 -0
opik/integrations/adk/__init__.py +14 -0
opik/integrations/adk/callback_context_info_extractors.py +32 -0
opik/integrations/adk/graph/__init__.py +0 -0
opik/integrations/adk/graph/mermaid_graph_builder.py +128 -0
opik/integrations/adk/graph/nodes.py +101 -0
opik/integrations/adk/graph/subgraph_edges_builders.py +41 -0
opik/integrations/adk/helpers.py +48 -0
opik/integrations/adk/legacy_opik_tracer.py +381 -0
opik/integrations/adk/opik_tracer.py +370 -0
opik/integrations/adk/patchers/__init__.py +4 -0
opik/integrations/adk/patchers/adk_otel_tracer/__init__.py +0 -0
opik/integrations/adk/patchers/adk_otel_tracer/llm_span_helpers.py +30 -0
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +201 -0
opik/integrations/adk/patchers/litellm_wrappers.py +91 -0
opik/integrations/adk/patchers/llm_response_wrapper.py +105 -0
opik/integrations/adk/patchers/patchers.py +64 -0
opik/integrations/adk/recursive_callback_injector.py +126 -0
opik/integrations/aisuite/aisuite_decorator.py +8 -3
opik/integrations/aisuite/opik_tracker.py +1 -0
opik/integrations/anthropic/messages_create_decorator.py +8 -3
opik/integrations/anthropic/opik_tracker.py +0 -1
opik/integrations/bedrock/converse/__init__.py +0 -0
opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +18 -8
opik/integrations/bedrock/invoke_agent_decorator.py +12 -7
opik/integrations/bedrock/invoke_model/__init__.py +0 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
opik/integrations/bedrock/invoke_model/response_types.py +34 -0
opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
opik/integrations/bedrock/opik_tracker.py +43 -4
opik/integrations/bedrock/types.py +19 -0
opik/integrations/crewai/crewai_decorator.py +34 -56
opik/integrations/crewai/opik_tracker.py +31 -10
opik/integrations/crewai/patchers/__init__.py +5 -0
opik/integrations/crewai/patchers/flow.py +118 -0
opik/integrations/crewai/patchers/litellm_completion.py +30 -0
opik/integrations/crewai/patchers/llm_client.py +207 -0
opik/integrations/dspy/callback.py +246 -84
opik/integrations/dspy/graph.py +88 -0
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/genai/encoder_extension.py +2 -6
opik/integrations/genai/generate_content_decorator.py +20 -13
opik/integrations/guardrails/guardrails_decorator.py +4 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/constants.py +35 -0
opik/integrations/haystack/converters.py +1 -2
opik/integrations/haystack/opik_connector.py +28 -6
opik/integrations/haystack/opik_span_bridge.py +284 -0
opik/integrations/haystack/opik_tracer.py +124 -222
opik/integrations/langchain/__init__.py +3 -1
opik/integrations/langchain/helpers.py +96 -0
opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_encoder_extension.py +2 -2
opik/integrations/langchain/opik_tracer.py +641 -206
opik/integrations/langchain/provider_usage_extractors/__init__.py +5 -0
opik/integrations/langchain/provider_usage_extractors/anthropic_usage_extractor.py +101 -0
opik/integrations/langchain/provider_usage_extractors/anthropic_vertexai_usage_extractor.py +67 -0
opik/integrations/langchain/provider_usage_extractors/bedrock_usage_extractor.py +94 -0
opik/integrations/langchain/provider_usage_extractors/google_generative_ai_usage_extractor.py +109 -0
opik/integrations/langchain/provider_usage_extractors/groq_usage_extractor.py +92 -0
opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/__init__.py +15 -0
opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +134 -0
opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/langchain_usage.py +163 -0
opik/integrations/langchain/provider_usage_extractors/openai_usage_extractor.py +124 -0
opik/integrations/langchain/provider_usage_extractors/provider_usage_extractor_protocol.py +29 -0
opik/integrations/langchain/provider_usage_extractors/usage_extractor.py +48 -0
opik/integrations/langchain/provider_usage_extractors/vertexai_usage_extractor.py +109 -0
opik/integrations/litellm/__init__.py +5 -0
opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
opik/integrations/litellm/litellm_completion_decorator.py +242 -0
opik/integrations/litellm/opik_tracker.py +43 -0
opik/integrations/litellm/stream_patchers.py +151 -0
opik/integrations/llama_index/callback.py +179 -78
opik/integrations/llama_index/event_parsing_utils.py +29 -9
opik/integrations/openai/agents/opik_tracing_processor.py +204 -32
opik/integrations/openai/agents/span_data_parsers.py +15 -6
opik/integrations/openai/chat_completion_chunks_aggregator.py +1 -1
opik/integrations/openai/{openai_decorator.py → openai_chat_completions_decorator.py} +45 -35
opik/integrations/openai/openai_responses_decorator.py +158 -0
opik/integrations/openai/opik_tracker.py +94 -13
opik/integrations/openai/response_events_aggregator.py +36 -0
opik/integrations/openai/stream_patchers.py +125 -15
opik/integrations/sagemaker/auth.py +5 -1
opik/jsonable_encoder.py +29 -1
opik/llm_usage/base_original_provider_usage.py +15 -8
opik/llm_usage/bedrock_usage.py +8 -2
opik/llm_usage/google_usage.py +6 -1
opik/llm_usage/llm_usage_info.py +6 -0
opik/llm_usage/{openai_usage.py → openai_chat_completions_usage.py} +2 -12
opik/llm_usage/{openai_agent_usage.py → openai_responses_usage.py} +7 -15
opik/llm_usage/opik_usage.py +36 -10
opik/llm_usage/opik_usage_factory.py +35 -19
opik/logging_messages.py +19 -7
opik/message_processing/arguments_utils.py +22 -0
opik/message_processing/batching/base_batcher.py +45 -17
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batch_manager_constuctors.py +36 -11
opik/message_processing/batching/batchers.py +167 -44
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/batching/sequence_splitter.py +50 -5
opik/message_processing/emulation/__init__.py +0 -0
opik/message_processing/emulation/emulator_message_processor.py +578 -0
opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
opik/message_processing/emulation/models.py +162 -0
opik/message_processing/encoder_helpers.py +79 -0
opik/message_processing/message_queue.py +79 -0
opik/message_processing/messages.py +154 -12
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/processors/message_processors.py +92 -0
opik/message_processing/processors/message_processors_chain.py +96 -0
opik/message_processing/processors/online_message_processor.py +324 -0
opik/message_processing/queue_consumer.py +61 -13
opik/message_processing/streamer.py +102 -31
opik/message_processing/streamer_constructors.py +67 -12
opik/opik_context.py +103 -11
opik/plugins/pytest/decorator.py +2 -2
opik/plugins/pytest/experiment_runner.py +3 -2
opik/plugins/pytest/hooks.py +6 -4
opik/rate_limit/__init__.py +0 -0
opik/rate_limit/rate_limit.py +25 -0
opik/rest_api/__init__.py +643 -11
opik/rest_api/alerts/__init__.py +7 -0
opik/rest_api/alerts/client.py +667 -0
opik/rest_api/alerts/raw_client.py +1015 -0
opik/rest_api/alerts/types/__init__.py +7 -0
opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
opik/rest_api/annotation_queues/__init__.py +4 -0
opik/rest_api/annotation_queues/client.py +668 -0
opik/rest_api/annotation_queues/raw_client.py +1019 -0
opik/rest_api/attachments/__init__.py +17 -0
opik/rest_api/attachments/client.py +752 -0
opik/rest_api/attachments/raw_client.py +1125 -0
opik/rest_api/attachments/types/__init__.py +15 -0
opik/rest_api/attachments/types/attachment_list_request_entity_type.py +5 -0
opik/rest_api/attachments/types/download_attachment_request_entity_type.py +5 -0
opik/rest_api/attachments/types/start_multipart_upload_request_entity_type.py +5 -0
opik/rest_api/attachments/types/upload_attachment_request_entity_type.py +5 -0
opik/rest_api/automation_rule_evaluators/__init__.py +2 -0
opik/rest_api/automation_rule_evaluators/client.py +182 -1162
opik/rest_api/automation_rule_evaluators/raw_client.py +598 -0
opik/rest_api/chat_completions/__init__.py +2 -0
opik/rest_api/chat_completions/client.py +115 -149
opik/rest_api/chat_completions/raw_client.py +339 -0
opik/rest_api/check/__init__.py +2 -0
opik/rest_api/check/client.py +88 -106
opik/rest_api/check/raw_client.py +258 -0
opik/rest_api/client.py +112 -212
opik/rest_api/core/__init__.py +5 -0
opik/rest_api/core/api_error.py +12 -6
opik/rest_api/core/client_wrapper.py +4 -14
opik/rest_api/core/datetime_utils.py +1 -3
opik/rest_api/core/file.py +2 -5
opik/rest_api/core/http_client.py +42 -120
opik/rest_api/core/http_response.py +55 -0
opik/rest_api/core/jsonable_encoder.py +1 -4
opik/rest_api/core/pydantic_utilities.py +79 -147
opik/rest_api/core/query_encoder.py +1 -3
opik/rest_api/core/serialization.py +10 -10
opik/rest_api/dashboards/__init__.py +4 -0
opik/rest_api/dashboards/client.py +462 -0
opik/rest_api/dashboards/raw_client.py +648 -0
opik/rest_api/datasets/__init__.py +5 -0
opik/rest_api/datasets/client.py +1638 -1091
opik/rest_api/datasets/raw_client.py +3389 -0
opik/rest_api/datasets/types/__init__.py +8 -0
opik/rest_api/datasets/types/dataset_update_visibility.py +5 -0
opik/rest_api/datasets/types/dataset_write_visibility.py +5 -0
opik/rest_api/errors/__init__.py +2 -0
opik/rest_api/errors/bad_request_error.py +4 -3
opik/rest_api/errors/conflict_error.py +4 -3
opik/rest_api/errors/forbidden_error.py +4 -2
opik/rest_api/errors/not_found_error.py +4 -3
opik/rest_api/errors/not_implemented_error.py +4 -3
opik/rest_api/errors/unauthorized_error.py +4 -3
opik/rest_api/errors/unprocessable_entity_error.py +4 -3
opik/rest_api/experiments/__init__.py +5 -0
opik/rest_api/experiments/client.py +676 -752
opik/rest_api/experiments/raw_client.py +1872 -0
opik/rest_api/experiments/types/__init__.py +10 -0
opik/rest_api/experiments/types/experiment_update_status.py +5 -0
opik/rest_api/experiments/types/experiment_update_type.py +5 -0
opik/rest_api/experiments/types/experiment_write_status.py +5 -0
opik/rest_api/experiments/types/experiment_write_type.py +5 -0
opik/rest_api/feedback_definitions/__init__.py +2 -0
opik/rest_api/feedback_definitions/client.py +96 -370
opik/rest_api/feedback_definitions/raw_client.py +541 -0
opik/rest_api/feedback_definitions/types/__init__.py +2 -0
opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -3
opik/rest_api/guardrails/__init__.py +4 -0
opik/rest_api/guardrails/client.py +104 -0
opik/rest_api/guardrails/raw_client.py +102 -0
opik/rest_api/llm_provider_key/__init__.py +2 -0
opik/rest_api/llm_provider_key/client.py +166 -440
opik/rest_api/llm_provider_key/raw_client.py +643 -0
opik/rest_api/llm_provider_key/types/__init__.py +2 -0
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
opik/rest_api/manual_evaluation/__init__.py +4 -0
opik/rest_api/manual_evaluation/client.py +347 -0
opik/rest_api/manual_evaluation/raw_client.py +543 -0
opik/rest_api/open_telemetry_ingestion/__init__.py +2 -0
opik/rest_api/open_telemetry_ingestion/client.py +38 -63
opik/rest_api/open_telemetry_ingestion/raw_client.py +88 -0
opik/rest_api/optimizations/__init__.py +7 -0
opik/rest_api/optimizations/client.py +704 -0
opik/rest_api/optimizations/raw_client.py +920 -0
opik/rest_api/optimizations/types/__init__.py +7 -0
opik/rest_api/optimizations/types/optimization_update_status.py +7 -0
opik/rest_api/projects/__init__.py +10 -1
opik/rest_api/projects/client.py +180 -855
opik/rest_api/projects/raw_client.py +1216 -0
opik/rest_api/projects/types/__init__.py +11 -4
opik/rest_api/projects/types/project_metric_request_public_interval.py +1 -3
opik/rest_api/projects/types/project_metric_request_public_metric_type.py +11 -1
opik/rest_api/projects/types/project_update_visibility.py +5 -0
opik/rest_api/projects/types/project_write_visibility.py +5 -0
opik/rest_api/prompts/__init__.py +4 -2
opik/rest_api/prompts/client.py +381 -970
opik/rest_api/prompts/raw_client.py +1634 -0
opik/rest_api/prompts/types/__init__.py +5 -1
opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
opik/rest_api/raw_client.py +156 -0
opik/rest_api/redirect/__init__.py +4 -0
opik/rest_api/redirect/client.py +375 -0
opik/rest_api/redirect/raw_client.py +566 -0
opik/rest_api/service_toggles/__init__.py +4 -0
opik/rest_api/service_toggles/client.py +91 -0
opik/rest_api/service_toggles/raw_client.py +93 -0
opik/rest_api/spans/__init__.py +2 -0
opik/rest_api/spans/client.py +659 -1354
opik/rest_api/spans/raw_client.py +2383 -0
opik/rest_api/spans/types/__init__.py +2 -0
opik/rest_api/spans/types/find_feedback_score_names_1_request_type.py +1 -3
opik/rest_api/spans/types/get_span_stats_request_type.py +1 -3
opik/rest_api/spans/types/get_spans_by_project_request_type.py +1 -3
opik/rest_api/spans/types/span_search_stream_request_public_type.py +1 -3
opik/rest_api/system_usage/__init__.py +2 -0
opik/rest_api/system_usage/client.py +157 -216
opik/rest_api/system_usage/raw_client.py +455 -0
opik/rest_api/traces/__init__.py +2 -0
opik/rest_api/traces/client.py +2102 -1625
opik/rest_api/traces/raw_client.py +4144 -0
opik/rest_api/types/__init__.py +629 -24
opik/rest_api/types/aggregation_data.py +27 -0
opik/rest_api/types/alert.py +33 -0
opik/rest_api/types/alert_alert_type.py +5 -0
opik/rest_api/types/alert_page_public.py +24 -0
opik/rest_api/types/alert_public.py +33 -0
opik/rest_api/types/alert_public_alert_type.py +5 -0
opik/rest_api/types/alert_trigger.py +27 -0
opik/rest_api/types/alert_trigger_config.py +28 -0
opik/rest_api/types/alert_trigger_config_public.py +28 -0
opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
opik/rest_api/types/alert_trigger_config_type.py +10 -0
opik/rest_api/types/alert_trigger_config_write.py +22 -0
opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
opik/rest_api/types/alert_trigger_event_type.py +19 -0
opik/rest_api/types/alert_trigger_public.py +27 -0
opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
opik/rest_api/types/alert_trigger_write.py +23 -0
opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
opik/rest_api/types/alert_write.py +28 -0
opik/rest_api/types/alert_write_alert_type.py +5 -0
opik/rest_api/types/annotation_queue.py +42 -0
opik/rest_api/types/annotation_queue_batch.py +27 -0
opik/rest_api/types/{json_schema_element.py → annotation_queue_item_ids.py} +5 -7
opik/rest_api/types/annotation_queue_page_public.py +28 -0
opik/rest_api/types/annotation_queue_public.py +38 -0
opik/rest_api/types/annotation_queue_public_scope.py +5 -0
opik/rest_api/types/{workspace_metadata.py → annotation_queue_reviewer.py} +6 -7
opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
opik/rest_api/types/annotation_queue_scope.py +5 -0
opik/rest_api/types/annotation_queue_write.py +31 -0
opik/rest_api/types/annotation_queue_write_scope.py +5 -0
opik/rest_api/types/assistant_message.py +7 -8
opik/rest_api/types/assistant_message_role.py +1 -3
opik/rest_api/types/attachment.py +22 -0
opik/rest_api/types/attachment_page.py +28 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +160 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +6 -6
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +6 -6
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +6 -6
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
opik/rest_api/types/automation_rule_evaluator_page_public.py +6 -6
opik/rest_api/types/automation_rule_evaluator_public.py +155 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update.py +143 -0
opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +6 -6
opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +6 -6
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +6 -6
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +6 -6
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +6 -6
opik/rest_api/types/automation_rule_evaluator_write.py +143 -0
opik/rest_api/types/avg_value_stat_public.py +3 -5
opik/rest_api/types/batch_delete.py +3 -5
opik/rest_api/types/batch_delete_by_project.py +20 -0
opik/rest_api/types/bi_information.py +3 -5
opik/rest_api/types/bi_information_response.py +4 -6
opik/rest_api/types/boolean_feedback_definition.py +25 -0
opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
opik/rest_api/types/boolean_feedback_detail.py +29 -0
opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
opik/rest_api/types/categorical_feedback_definition.py +5 -7
opik/rest_api/types/categorical_feedback_definition_create.py +4 -6
opik/rest_api/types/categorical_feedback_definition_public.py +5 -7
opik/rest_api/types/categorical_feedback_definition_update.py +4 -6
opik/rest_api/types/categorical_feedback_detail.py +3 -5
opik/rest_api/types/categorical_feedback_detail_create.py +3 -5
opik/rest_api/types/categorical_feedback_detail_public.py +3 -5
opik/rest_api/types/categorical_feedback_detail_update.py +3 -5
opik/rest_api/types/chat_completion_choice.py +4 -6
opik/rest_api/types/chat_completion_response.py +5 -6
opik/rest_api/types/check.py +22 -0
opik/rest_api/types/{json_node_compare.py → check_name.py} +1 -1
opik/rest_api/types/check_public.py +22 -0
opik/rest_api/types/check_public_name.py +5 -0
opik/rest_api/types/check_public_result.py +5 -0
opik/rest_api/types/check_result.py +5 -0
opik/rest_api/types/chunked_output_json_node.py +4 -6
opik/rest_api/types/chunked_output_json_node_public.py +4 -6
opik/rest_api/types/chunked_output_json_node_public_type.py +6 -10
opik/rest_api/types/chunked_output_json_node_type.py +6 -10
opik/rest_api/types/column.py +8 -10
opik/rest_api/types/column_compare.py +8 -10
opik/rest_api/types/column_public.py +8 -10
opik/rest_api/types/column_types_item.py +1 -3
opik/rest_api/types/comment.py +4 -6
opik/rest_api/types/comment_compare.py +4 -6
opik/rest_api/types/comment_public.py +4 -6
opik/rest_api/types/complete_multipart_upload_request.py +33 -0
opik/rest_api/types/complete_multipart_upload_request_entity_type.py +5 -0
opik/rest_api/types/completion_tokens_details.py +3 -5
opik/rest_api/types/count_value_stat_public.py +3 -5
opik/rest_api/types/dashboard_page_public.py +24 -0
opik/rest_api/types/dashboard_public.py +30 -0
opik/rest_api/types/data_point_double.py +21 -0
opik/rest_api/types/data_point_number_public.py +3 -5
opik/rest_api/types/dataset.py +14 -6
opik/rest_api/types/dataset_expansion.py +42 -0
opik/rest_api/types/dataset_expansion_response.py +39 -0
opik/rest_api/types/dataset_item.py +9 -8
opik/rest_api/types/dataset_item_batch.py +3 -5
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +9 -8
opik/rest_api/types/dataset_item_compare_source.py +1 -3
opik/rest_api/types/dataset_item_filter.py +27 -0
opik/rest_api/types/dataset_item_filter_operator.py +21 -0
opik/rest_api/types/dataset_item_page_compare.py +10 -7
opik/rest_api/types/dataset_item_page_public.py +10 -7
opik/rest_api/types/dataset_item_public.py +9 -8
opik/rest_api/types/dataset_item_public_source.py +1 -3
opik/rest_api/types/dataset_item_source.py +1 -3
opik/rest_api/types/dataset_item_update.py +39 -0
opik/rest_api/types/dataset_item_write.py +5 -6
opik/rest_api/types/dataset_item_write_source.py +1 -3
opik/rest_api/types/dataset_page_public.py +9 -6
opik/rest_api/types/dataset_public.py +14 -6
opik/rest_api/types/dataset_public_status.py +5 -0
opik/rest_api/types/dataset_public_visibility.py +5 -0
opik/rest_api/types/dataset_status.py +5 -0
opik/rest_api/types/dataset_version_diff.py +22 -0
opik/rest_api/types/dataset_version_diff_stats.py +24 -0
opik/rest_api/types/dataset_version_page_public.py +23 -0
opik/rest_api/types/dataset_version_public.py +59 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/dataset_visibility.py +5 -0
opik/rest_api/types/delete_attachments_request.py +23 -0
opik/rest_api/types/delete_attachments_request_entity_type.py +5 -0
opik/rest_api/types/delete_feedback_score.py +4 -5
opik/rest_api/types/delete_ids_holder.py +19 -0
opik/rest_api/types/delta.py +7 -9
opik/rest_api/types/error_count_with_deviation.py +21 -0
opik/rest_api/types/error_count_with_deviation_detailed.py +21 -0
opik/rest_api/types/error_info.py +3 -5
opik/rest_api/types/error_info_experiment_item_bulk_write_view.py +21 -0
opik/rest_api/types/error_info_public.py +3 -5
opik/rest_api/types/error_info_write.py +3 -5
opik/rest_api/types/error_message.py +3 -5
opik/rest_api/types/error_message_detail.py +3 -5
opik/rest_api/types/error_message_detailed.py +3 -5
opik/rest_api/types/error_message_public.py +3 -5
opik/rest_api/types/experiment.py +21 -10
opik/rest_api/types/experiment_group_aggregations_response.py +20 -0
opik/rest_api/types/experiment_group_response.py +22 -0
opik/rest_api/types/experiment_item.py +14 -11
opik/rest_api/types/experiment_item_bulk_record.py +27 -0
opik/rest_api/types/experiment_item_bulk_record_experiment_item_bulk_write_view.py +27 -0
opik/rest_api/types/experiment_item_bulk_upload.py +27 -0
opik/rest_api/types/experiment_item_compare.py +14 -11
opik/rest_api/types/experiment_item_compare_trace_visibility_mode.py +5 -0
opik/rest_api/types/experiment_item_public.py +6 -6
opik/rest_api/types/experiment_item_public_trace_visibility_mode.py +5 -0
opik/rest_api/types/experiment_item_trace_visibility_mode.py +5 -0
opik/rest_api/types/experiment_page_public.py +9 -6
opik/rest_api/types/experiment_public.py +21 -10
opik/rest_api/types/experiment_public_status.py +5 -0
opik/rest_api/types/experiment_public_type.py +5 -0
opik/rest_api/types/experiment_score.py +20 -0
opik/rest_api/types/experiment_score_public.py +20 -0
opik/rest_api/types/experiment_score_write.py +20 -0
opik/rest_api/types/experiment_status.py +5 -0
opik/rest_api/types/experiment_type.py +5 -0
opik/rest_api/types/export_trace_service_request.py +5 -0
opik/rest_api/types/feedback.py +40 -27
opik/rest_api/types/feedback_create.py +27 -13
opik/rest_api/types/feedback_definition_page_public.py +4 -6
opik/rest_api/types/feedback_object_public.py +40 -27
opik/rest_api/types/feedback_public.py +40 -27
opik/rest_api/types/feedback_score.py +7 -7
opik/rest_api/types/feedback_score_average.py +3 -5
opik/rest_api/types/feedback_score_average_detailed.py +3 -5
opik/rest_api/types/feedback_score_average_public.py +3 -5
opik/rest_api/types/feedback_score_batch.py +4 -6
opik/rest_api/types/feedback_score_batch_item.py +6 -6
opik/rest_api/types/feedback_score_batch_item_source.py +1 -3
opik/rest_api/types/feedback_score_batch_item_thread.py +32 -0
opik/rest_api/types/feedback_score_batch_item_thread_source.py +5 -0
opik/rest_api/types/feedback_score_compare.py +7 -7
opik/rest_api/types/feedback_score_compare_source.py +1 -3
opik/rest_api/types/feedback_score_experiment_item_bulk_write_view.py +31 -0
opik/rest_api/types/feedback_score_experiment_item_bulk_write_view_source.py +5 -0
opik/rest_api/types/feedback_score_names.py +4 -6
opik/rest_api/types/feedback_score_public.py +11 -7
opik/rest_api/types/feedback_score_public_source.py +1 -3
opik/rest_api/types/feedback_score_source.py +1 -3
opik/rest_api/types/feedback_update.py +27 -13
opik/rest_api/types/function.py +4 -7
opik/rest_api/types/function_call.py +3 -5
opik/rest_api/types/group_content.py +19 -0
opik/rest_api/types/group_content_with_aggregations.py +21 -0
opik/rest_api/types/group_detail.py +19 -0
opik/rest_api/types/group_details.py +20 -0
opik/rest_api/types/guardrail.py +34 -0
opik/rest_api/types/guardrail_batch.py +20 -0
opik/rest_api/types/guardrail_name.py +5 -0
opik/rest_api/types/guardrail_result.py +5 -0
opik/rest_api/types/guardrail_write.py +33 -0
opik/rest_api/types/guardrail_write_name.py +5 -0
opik/rest_api/types/guardrail_write_result.py +5 -0
opik/rest_api/types/guardrails_validation.py +21 -0
opik/rest_api/types/guardrails_validation_public.py +21 -0
opik/rest_api/types/ids_holder.py +19 -0
opik/rest_api/types/image_url.py +20 -0
opik/rest_api/types/image_url_public.py +20 -0
opik/rest_api/types/image_url_write.py +20 -0
opik/rest_api/types/json_list_string.py +7 -0
opik/rest_api/types/json_list_string_compare.py +7 -0
opik/rest_api/types/json_list_string_experiment_item_bulk_write_view.py +7 -0
opik/rest_api/types/json_list_string_public.py +7 -0
opik/rest_api/types/json_list_string_write.py +7 -0
opik/rest_api/types/json_schema.py +5 -8
opik/rest_api/types/llm_as_judge_code.py +8 -12
opik/rest_api/types/llm_as_judge_code_public.py +8 -12
opik/rest_api/types/llm_as_judge_code_write.py +8 -12
opik/rest_api/types/llm_as_judge_message.py +9 -7
opik/rest_api/types/llm_as_judge_message_content.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
opik/rest_api/types/llm_as_judge_message_public.py +9 -7
opik/rest_api/types/llm_as_judge_message_public_role.py +1 -1
opik/rest_api/types/llm_as_judge_message_role.py +1 -1
opik/rest_api/types/llm_as_judge_message_write.py +9 -7
opik/rest_api/types/llm_as_judge_message_write_role.py +1 -1
opik/rest_api/types/llm_as_judge_model_parameters.py +6 -5
opik/rest_api/types/llm_as_judge_model_parameters_public.py +6 -5
opik/rest_api/types/llm_as_judge_model_parameters_write.py +6 -5
opik/rest_api/types/llm_as_judge_output_schema.py +4 -6
opik/rest_api/types/llm_as_judge_output_schema_public.py +4 -6
opik/rest_api/types/llm_as_judge_output_schema_public_type.py +1 -3
opik/rest_api/types/llm_as_judge_output_schema_type.py +1 -3
opik/rest_api/types/llm_as_judge_output_schema_write.py +4 -6
opik/rest_api/types/llm_as_judge_output_schema_write_type.py +1 -3
opik/rest_api/types/log_item.py +5 -7
opik/rest_api/types/log_item_level.py +1 -3
opik/rest_api/types/log_page.py +4 -6
opik/rest_api/types/manual_evaluation_request.py +38 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
opik/rest_api/types/manual_evaluation_response.py +27 -0
opik/rest_api/types/multipart_upload_part.py +20 -0
opik/rest_api/types/numerical_feedback_definition.py +5 -7
opik/rest_api/types/numerical_feedback_definition_create.py +4 -6
opik/rest_api/types/numerical_feedback_definition_public.py +5 -7
opik/rest_api/types/numerical_feedback_definition_update.py +4 -6
opik/rest_api/types/numerical_feedback_detail.py +3 -5
opik/rest_api/types/numerical_feedback_detail_create.py +3 -5
opik/rest_api/types/numerical_feedback_detail_public.py +3 -5
opik/rest_api/types/numerical_feedback_detail_update.py +3 -5
opik/rest_api/types/optimization.py +37 -0
opik/rest_api/types/optimization_page_public.py +28 -0
opik/rest_api/types/optimization_public.py +37 -0
opik/rest_api/types/optimization_public_status.py +7 -0
opik/rest_api/types/optimization_status.py +7 -0
opik/rest_api/types/optimization_studio_config.py +27 -0
opik/rest_api/types/optimization_studio_config_public.py +27 -0
opik/rest_api/types/optimization_studio_config_write.py +27 -0
opik/rest_api/types/optimization_studio_log.py +22 -0
opik/rest_api/types/optimization_write.py +30 -0
opik/rest_api/types/optimization_write_status.py +7 -0
opik/rest_api/types/page_columns.py +4 -6
opik/rest_api/types/percentage_value_stat_public.py +4 -6
opik/rest_api/types/percentage_values.py +8 -16
opik/rest_api/types/percentage_values_detailed.py +8 -16
opik/rest_api/types/percentage_values_public.py +8 -16
opik/rest_api/types/project.py +12 -7
opik/rest_api/types/project_detailed.py +12 -7
opik/rest_api/types/project_detailed_visibility.py +5 -0
opik/rest_api/types/project_metric_response_public.py +5 -9
opik/rest_api/types/project_metric_response_public_interval.py +1 -3
opik/rest_api/types/project_metric_response_public_metric_type.py +11 -1
opik/rest_api/types/project_page_public.py +8 -10
opik/rest_api/types/project_public.py +6 -6
opik/rest_api/types/project_public_visibility.py +5 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stat_item_object_public.py +8 -17
opik/rest_api/types/project_stats_public.py +4 -6
opik/rest_api/types/project_stats_summary.py +4 -6
opik/rest_api/types/project_stats_summary_item.py +9 -6
opik/rest_api/types/project_visibility.py +5 -0
opik/rest_api/types/prompt.py +12 -7
opik/rest_api/types/prompt_detail.py +12 -7
opik/rest_api/types/prompt_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_page_public.py +9 -6
opik/rest_api/types/prompt_public.py +11 -6
opik/rest_api/types/prompt_public_template_structure.py +5 -0
opik/rest_api/types/prompt_template_structure.py +5 -0
opik/rest_api/types/prompt_tokens_details.py +19 -0
opik/rest_api/types/prompt_version.py +7 -6
opik/rest_api/types/prompt_version_detail.py +7 -6
opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_version_link.py +4 -5
opik/rest_api/types/prompt_version_link_public.py +4 -5
opik/rest_api/types/prompt_version_link_write.py +3 -5
opik/rest_api/types/prompt_version_page_public.py +9 -6
opik/rest_api/types/prompt_version_public.py +7 -6
opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
opik/rest_api/types/prompt_version_template_structure.py +5 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +18 -8
opik/rest_api/types/provider_api_key_page_public.py +27 -0
opik/rest_api/types/provider_api_key_provider.py +1 -1
opik/rest_api/types/provider_api_key_public.py +18 -8
opik/rest_api/types/provider_api_key_public_provider.py +1 -1
opik/rest_api/types/response_format.py +5 -7
opik/rest_api/types/response_format_type.py +1 -3
opik/rest_api/types/result.py +21 -0
opik/rest_api/types/results_number_public.py +4 -6
opik/rest_api/types/score_name.py +4 -5
opik/rest_api/types/service_toggles_config.py +44 -0
opik/rest_api/types/span.py +13 -15
opik/rest_api/types/span_batch.py +4 -6
opik/rest_api/types/span_enrichment_options.py +31 -0
opik/rest_api/types/span_experiment_item_bulk_write_view.py +39 -0
opik/rest_api/types/span_experiment_item_bulk_write_view_type.py +5 -0
opik/rest_api/types/span_filter.py +23 -0
opik/rest_api/types/span_filter_operator.py +21 -0
opik/rest_api/types/span_filter_public.py +4 -6
opik/rest_api/types/span_filter_public_operator.py +2 -0
opik/rest_api/types/span_filter_write.py +23 -0
opik/rest_api/types/span_filter_write_operator.py +21 -0
opik/rest_api/types/span_llm_as_judge_code.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
opik/rest_api/types/span_page_public.py +9 -6
opik/rest_api/types/span_public.py +19 -16
opik/rest_api/types/span_public_type.py +1 -1
opik/rest_api/types/span_type.py +1 -1
opik/rest_api/types/span_update.py +46 -0
opik/rest_api/types/span_update_type.py +5 -0
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/rest_api/types/span_write.py +13 -14
opik/rest_api/types/span_write_type.py +1 -1
opik/rest_api/types/spans_count_response.py +20 -0
opik/rest_api/types/start_multipart_upload_response.py +20 -0
opik/rest_api/types/stream_options.py +3 -5
opik/rest_api/types/studio_evaluation.py +20 -0
opik/rest_api/types/studio_evaluation_public.py +20 -0
opik/rest_api/types/studio_evaluation_write.py +20 -0
opik/rest_api/types/studio_llm_model.py +21 -0
opik/rest_api/types/studio_llm_model_public.py +21 -0
opik/rest_api/types/studio_llm_model_write.py +21 -0
opik/rest_api/types/studio_message.py +20 -0
opik/rest_api/types/studio_message_public.py +20 -0
opik/rest_api/types/studio_message_write.py +20 -0
opik/rest_api/types/studio_metric.py +21 -0
opik/rest_api/types/studio_metric_public.py +21 -0
opik/rest_api/types/studio_metric_write.py +21 -0
opik/rest_api/types/studio_optimizer.py +21 -0
opik/rest_api/types/studio_optimizer_public.py +21 -0
opik/rest_api/types/studio_optimizer_write.py +21 -0
opik/rest_api/types/studio_prompt.py +20 -0
opik/rest_api/types/studio_prompt_public.py +20 -0
opik/rest_api/types/studio_prompt_write.py +20 -0
opik/rest_api/types/tool.py +4 -6
opik/rest_api/types/tool_call.py +4 -6
opik/rest_api/types/trace.py +26 -12
opik/rest_api/types/trace_batch.py +4 -6
opik/rest_api/types/trace_count_response.py +4 -6
opik/rest_api/types/trace_enrichment_options.py +32 -0
opik/rest_api/types/trace_experiment_item_bulk_write_view.py +41 -0
opik/rest_api/types/trace_filter.py +23 -0
opik/rest_api/types/trace_filter_operator.py +21 -0
opik/rest_api/types/trace_filter_public.py +23 -0
opik/rest_api/types/trace_filter_public_operator.py +21 -0
opik/rest_api/types/trace_filter_write.py +23 -0
opik/rest_api/types/trace_filter_write_operator.py +21 -0
opik/rest_api/types/trace_page_public.py +8 -10
opik/rest_api/types/trace_public.py +27 -13
opik/rest_api/types/trace_public_visibility_mode.py +5 -0
opik/rest_api/types/trace_thread.py +18 -9
opik/rest_api/types/trace_thread_filter.py +23 -0
opik/rest_api/types/trace_thread_filter_operator.py +21 -0
opik/rest_api/types/trace_thread_filter_public.py +23 -0
opik/rest_api/types/trace_thread_filter_public_operator.py +21 -0
opik/rest_api/types/trace_thread_filter_write.py +23 -0
opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
opik/rest_api/types/trace_thread_identifier.py +22 -0
opik/rest_api/types/trace_thread_llm_as_judge_code.py +26 -0
opik/rest_api/types/trace_thread_llm_as_judge_code_public.py +26 -0
opik/rest_api/types/trace_thread_llm_as_judge_code_write.py +26 -0
opik/rest_api/types/trace_thread_page.py +9 -6
opik/rest_api/types/trace_thread_status.py +5 -0
opik/rest_api/types/trace_thread_update.py +19 -0
opik/rest_api/types/trace_thread_user_defined_metric_python_code.py +19 -0
opik/rest_api/types/trace_thread_user_defined_metric_python_code_public.py +19 -0
opik/rest_api/types/trace_thread_user_defined_metric_python_code_write.py +19 -0
opik/rest_api/types/trace_update.py +39 -0
opik/rest_api/types/trace_visibility_mode.py +5 -0
opik/rest_api/types/trace_write.py +10 -11
opik/rest_api/types/usage.py +6 -6
opik/rest_api/types/user_defined_metric_python_code.py +3 -5
opik/rest_api/types/user_defined_metric_python_code_public.py +3 -5
opik/rest_api/types/user_defined_metric_python_code_write.py +3 -5
opik/rest_api/types/value_entry.py +27 -0
opik/rest_api/types/value_entry_compare.py +27 -0
opik/rest_api/types/value_entry_compare_source.py +5 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +27 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view_source.py +5 -0
opik/rest_api/types/value_entry_public.py +27 -0
opik/rest_api/types/value_entry_public_source.py +5 -0
opik/rest_api/types/value_entry_source.py +5 -0
opik/rest_api/types/video_url.py +19 -0
opik/rest_api/types/video_url_public.py +19 -0
opik/rest_api/types/video_url_write.py +19 -0
opik/rest_api/types/webhook.py +28 -0
opik/rest_api/types/webhook_examples.py +19 -0
opik/rest_api/types/webhook_public.py +28 -0
opik/rest_api/types/webhook_test_result.py +23 -0
opik/rest_api/types/webhook_test_result_status.py +5 -0
opik/rest_api/types/webhook_write.py +23 -0
opik/rest_api/types/welcome_wizard_tracking.py +22 -0
opik/rest_api/types/workspace_configuration.py +27 -0
opik/rest_api/types/workspace_metric_request.py +24 -0
opik/rest_api/types/workspace_metric_response.py +20 -0
opik/rest_api/types/workspace_metrics_summary_request.py +23 -0
opik/rest_api/types/workspace_metrics_summary_response.py +20 -0
opik/rest_api/types/workspace_name_holder.py +19 -0
opik/rest_api/types/workspace_spans_count.py +20 -0
opik/rest_api/types/workspace_trace_count.py +3 -5
opik/rest_api/welcome_wizard/__init__.py +4 -0
opik/rest_api/welcome_wizard/client.py +195 -0
opik/rest_api/welcome_wizard/raw_client.py +208 -0
opik/rest_api/workspaces/__init__.py +2 -0
opik/rest_api/workspaces/client.py +550 -77
opik/rest_api/workspaces/raw_client.py +923 -0
opik/rest_client_configurator/api.py +1 -0
opik/rest_client_configurator/retry_decorator.py +1 -0
opik/s3_httpx_client.py +67 -0
opik/simulation/__init__.py +6 -0
opik/simulation/simulated_user.py +99 -0
opik/simulation/simulator.py +108 -0
opik/synchronization.py +11 -24
opik/tracing_runtime_config.py +48 -0
opik/types.py +48 -2
opik/url_helpers.py +13 -3
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +4 -5
opik/validation/parameter.py +122 -0
opik/validation/parameters_validator.py +175 -0
opik/validation/validator.py +30 -2
opik/validation/validator_helpers.py +147 -0
opik-1.9.71.dist-info/METADATA +370 -0
opik-1.9.71.dist-info/RECORD +1110 -0
{opik-1.6.4.dist-info → opik-1.9.71.dist-info}/WHEEL +1 -1
opik-1.9.71.dist-info/licenses/LICENSE +203 -0
opik/api_objects/prompt/prompt.py +0 -107
opik/api_objects/prompt/prompt_template.py +0 -35
opik/cli.py +0 -193
opik/evaluation/metrics/models.py +0 -8
opik/hooks.py +0 -13
opik/integrations/bedrock/chunks_aggregator.py +0 -55
opik/integrations/bedrock/helpers.py +0 -8
opik/integrations/langchain/google_run_helpers.py +0 -75
opik/integrations/langchain/openai_run_helpers.py +0 -122
opik/message_processing/message_processors.py +0 -203
opik/rest_api/types/delta_role.py +0 -7
opik/rest_api/types/json_object_schema.py +0 -34
opik-1.6.4.dist-info/METADATA +0 -270
opik-1.6.4.dist-info/RECORD +0 -507
/opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
{opik-1.6.4.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
{opik-1.6.4.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0

opik/evaluation/score_statistics.py ADDED Viewed

@@ -0,0 +1,66 @@
+import dataclasses
+import math
+import statistics
+from collections import defaultdict
+from typing import List, Optional, Dict
+from opik.evaluation import test_result
+@dataclasses.dataclass
+class ScoreStatistics:
+    """Statistics for a single score metric across multiple trials."""
+    mean: float
+    max: float
+    min: float
+    values: List[float]
+    std: Optional[float] = None  # Standard deviation (None if count < 2)
+def calculate_aggregated_statistics(
+    evaluation_results: List[test_result.TestResult],
+) -> Dict[str, ScoreStatistics]:
+    """
+    Calculate mean, max, and min scores for each score name in the evaluation test results.
+    Args:
+        evaluation_results: List of TestResult objects to be aggregated
+    Returns:
+        Dict mapping score names to their aggregated statistics
+    """
+    if not evaluation_results:
+        return {}
+    # Group scores by name across all trials
+    scores_by_name = defaultdict(list)
+    for test_result_ in evaluation_results:
+        for score_result in test_result_.score_results:
+            # Only include successful scores with valid values
+            if not score_result.scoring_failed and _is_valid_score_value(
+                score_result.value
+            ):
+                scores_by_name[score_result.name].append(score_result.value)
+    # Calculate aggregated statistics for each score name
+    aggregated_scores = {}
+    for score_name, values in scores_by_name.items():
+        if values:
+            std = statistics.stdev(values) if len(values) >= 2 else None
+            aggregated_scores[score_name] = ScoreStatistics(
+                mean=statistics.mean(values),
+                max=max(values),
+                min=min(values),
+                values=values.copy(),  # Store the actual values used
+                std=std,
+            )
+    return aggregated_scores
+def _is_valid_score_value(value: float) -> bool:
+    """Check if a score value is valid for statistical calculations."""
+    return isinstance(value, (int, float)) and math.isfinite(value)

opik/evaluation/scorers/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .scorer_function import ScorerFunction
+from .scorer_wrapper_metric import ScorerWrapperMetric
+__all__ = ["ScorerFunction", "ScorerWrapperMetric"]

opik/evaluation/scorers/scorer_function.py ADDED Viewed

@@ -0,0 +1,55 @@
+import inspect
+from typing import Any, Dict, Optional, Protocol, Union, List
+from opik.evaluation.metrics import score_result
+from opik.message_processing.emulation import models
+class ScorerFunctionProtocol(Protocol):
+    """
+    Represents a protocol defining the structure for a scorer function.
+    This protocol serves as a contract for implementing scorer functions used in
+    evaluating tasks. A scorer function adhering to this protocol should take
+    dataset item data, task outputs, and optionally a task span model as input
+    parameters and return a scoring result.
+    """
+    def __call__(
+        self,
+        dataset_item: Dict[str, Any],
+        task_outputs: Dict[str, Any],
+        task_span: Optional[models.SpanModel] = None,
+    ) -> Union[score_result.ScoreResult, List[score_result.ScoreResult]]: ...
+ScorerFunction = ScorerFunctionProtocol
+EXPECTED_SCORER_FUNCTION_PARAMETERS = ["dataset_item", "task_outputs"]
+def validate_scorer_function(scorer_function: ScorerFunction) -> None:
+    if not callable(scorer_function):
+        raise ValueError("scorer_function must be a callable function")
+    parameters = inspect.signature(scorer_function).parameters
+    names = set(parameters.keys())
+    # Check if it has both dataset_item and task_outputs
+    has_dataset_item_and_task_outputs = all(
+        param in names for param in EXPECTED_SCORER_FUNCTION_PARAMETERS
+    )
+    # Check if it has at least one task_span parameter
+    has_task_span = "task_span" in names
+    if not (has_dataset_item_and_task_outputs or has_task_span):
+        raise ValueError(
+            f"scorer_function must have either both 'dataset_item' and 'task_outputs' parameters "
+            f"or at least one 'task_span' parameter. Found parameters: {list(names)}"
+        )
+def has_task_span_in_parameters(scorer_function: ScorerFunction) -> bool:
+    return "task_span" in inspect.signature(scorer_function).parameters

opik/evaluation/scorers/scorer_wrapper_metric.py ADDED Viewed

@@ -0,0 +1,130 @@
+from typing import Any, Callable, Dict, Optional, List, Union
+from opik.evaluation.metrics import base_metric, score_result
+from . import scorer_function
+from ...message_processing.emulation import models
+class ScorerWrapperMetric(base_metric.BaseMetric):
+    """
+    A wrapper metric that adapts a ScorerFunction to the BaseMetric interface.
+    This class allows using ScorerFunction instances as BaseMetric instances,
+    providing compatibility between the two interfaces.
+    Args:
+        scorer: The ScorerFunction to wrap
+        name: Optional name for the metric. If not provided, uses the class name.
+        track: Whether to track the metric. Defaults to True.
+        project_name: Optional project name for tracking.
+    Raises:
+        ValueError if the scorer function is invalid.
+    Example:
+        >>> def my_scorer(dataset_item: Dict[str, Any], task_outputs: Dict[str, Any]) -> score_result.ScoreResult:
+        >>>     return score_result.ScoreResult(name="my_metric", value=1.0)
+        >>>
+        >>> wrapper = ScorerWrapperMetric(scorer_function=my_scorer, name="wrapped_scorer")
+        >>> result = wrapper.score(dataset_item={"text": "hello"}, task_outputs={"text": "hello"})
+    """
+    def __init__(
+        self,
+        scorer: scorer_function.ScorerFunction,
+        name: str,
+        track: bool = True,
+        project_name: Optional[str] = None,
+    ) -> None:
+        super().__init__(name=name, track=track, project_name=project_name)
+        self.scorer = scorer
+        # validate scorer function
+        scorer_function.validate_scorer_function(scorer)
+    def score(
+        self,
+        dataset_item: Dict[str, Any],
+        task_outputs: Dict[str, Any],
+        **kwargs: Any,
+    ) -> Union[score_result.ScoreResult, List[score_result.ScoreResult]]:
+        """
+        Score using the wrapped ScorerFunction.
+        Args:
+            dataset_item: The dataset item data to score against
+            task_outputs: The output dictionary to be scored - can be the output of LLM task, etc.
+            **kwargs: Additional keyword arguments (ignored by the scorer function)
+        Returns:
+            ScoreResult from the wrapped scorer function
+        """
+        return self.scorer(dataset_item=dataset_item, task_outputs=task_outputs)
+class ScorerWrapperMetricTaskSpan(ScorerWrapperMetric):
+    def __init__(
+        self,
+        scorer: scorer_function.ScorerFunction,
+        name: str,
+        track: bool = True,
+        project_name: Optional[str] = None,
+    ) -> None:
+        super().__init__(
+            scorer=scorer, name=name, track=track, project_name=project_name
+        )
+    def score(
+        self,
+        dataset_item: Dict[str, Any],
+        task_outputs: Dict[str, Any],
+        task_span: Optional[models.SpanModel] = None,
+        **kwargs: Any,
+    ) -> Union[score_result.ScoreResult, List[score_result.ScoreResult]]:
+        """
+        Score using the wrapped ScorerFunction.
+        Args:
+            dataset_item: The dataset item data to score against
+            task_outputs: The output dictionary to be scored - can be the output of LLM task, etc.
+            task_span: The collected task span data.
+            **kwargs: Additional keyword arguments (ignored by the scorer function)
+        Returns:
+            ScoreResult from the wrapped scorer function
+        """
+        if task_span is not None and scorer_function.has_task_span_in_parameters(
+            self.scorer
+        ):
+            return self.scorer(
+                dataset_item=dataset_item,
+                task_outputs=task_outputs,
+                task_span=task_span,
+            )
+        return self.scorer(dataset_item=dataset_item, task_outputs=task_outputs)
+def _scorer_name(scorer: Callable) -> str:
+    return scorer.__name__
+def wrap_scorer_functions(
+    scorer_functions: List[scorer_function.ScorerFunction], project_name: Optional[str]
+) -> List[base_metric.BaseMetric]:
+    metrics: List[base_metric.BaseMetric] = []
+    for f in scorer_functions:
+        name = _scorer_name(f)
+        if scorer_function.has_task_span_in_parameters(f):
+            metrics.append(
+                ScorerWrapperMetricTaskSpan(
+                    scorer=f, project_name=project_name, name=name
+                )
+            )
+        else:
+            metrics.append(
+                ScorerWrapperMetric(scorer=f, project_name=project_name, name=name)
+            )
+    return metrics

opik/evaluation/test_case.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 import dataclasses
@@ -6,5 +6,6 @@ import dataclasses
 class TestCase:
     trace_id: str
     dataset_item_id: str
-    scoring_inputs: Dict[str, Any]
     task_output: Dict[str, Any]
+    dataset_item_content: Dict[str, Any] = dataclasses.field(default_factory=dict)
+    mapped_scoring_inputs: Optional[Dict[str, Any]] = None

opik/evaluation/test_result.py CHANGED Viewed

@@ -10,3 +10,4 @@ from .metrics import score_result
 class TestResult:
     test_case: test_case.TestCase
     score_results: List[score_result.ScoreResult]
+    trial_id: int

opik/evaluation/threads/__init__.py ADDED Viewed

File without changes

opik/evaluation/threads/context_helper.py ADDED Viewed

@@ -0,0 +1,32 @@
+import contextlib
+from typing import Iterator, Optional
+import opik.context_storage as context_storage
+from opik.api_objects import trace, opik_client
+from opik.decorator import error_info_collector
+from opik.types import ErrorInfoDict
+@contextlib.contextmanager
+def evaluate_llm_conversation_context(
+    trace_data: trace.TraceData,
+    client: opik_client.Opik,
+) -> Iterator[None]:
+    error_info: Optional[ErrorInfoDict] = None
+    try:
+        context_storage.set_trace_data(trace_data)
+        yield
+    except Exception as exception:
+        error_info = error_info_collector.collect(exception)
+        raise
+    finally:
+        trace_data = context_storage.pop_trace_data()  # type: ignore
+        assert trace_data is not None
+        if error_info is not None:
+            trace_data.error_info = error_info
+        trace_data.init_end_time()
+        client.trace(**trace_data.as_parameters)

opik/evaluation/threads/evaluation_engine.py ADDED Viewed

@@ -0,0 +1,181 @@
+import functools
+import logging
+from typing import Optional, List, Callable, Dict, Literal
+import opik
+import opik.exceptions as exceptions
+import opik.opik_context as opik_context
+from opik.evaluation.metrics.conversation import conversation_thread_metric
+from opik.rest_api import JsonListStringPublic, TraceThread
+from . import evaluation_result, helpers
+from ..engine import evaluation_tasks_executor
+from ..engine import types as engine_types
+from ..metrics import score_result
+from ...api_objects import trace
+from ...api_objects.threads import threads_client
+LOGGER = logging.getLogger(__name__)
+class ThreadsEvaluationEngine:
+    def __init__(
+        self,
+        client: threads_client.ThreadsClient,
+        project_name: Optional[str],
+        number_of_workers: int,
+        verbose: int,
+    ) -> None:
+        self._client = client
+        self._project_name = project_name
+        self._number_of_workers = number_of_workers
+        self._verbose = verbose
+        self._threads_client = client
+    def evaluate_threads(
+        self,
+        filter_string: Optional[str],
+        eval_project_name: Optional[str],
+        metrics: List[conversation_thread_metric.ConversationThreadMetric],
+        trace_input_transform: Callable[[JsonListStringPublic], str],
+        trace_output_transform: Callable[[JsonListStringPublic], str],
+        max_traces_per_thread: int = 1000,
+    ) -> evaluation_result.ThreadsEvaluationResult:
+        if len(metrics) == 0:
+            raise ValueError("No metrics provided")
+        threads = self._threads_client.search_threads(
+            project_name=self._project_name,
+            filter_string=filter_string,
+        )
+        if len(threads) == 0:
+            raise exceptions.EvaluationError(
+                f"No threads found with filter_string: {filter_string}"
+            )
+        inactive_threads = [thread for thread in threads if thread.status == "inactive"]
+        if len(inactive_threads) == 0:
+            raise exceptions.EvaluationError(
+                f"No closed threads found with filter_string: {filter_string}. Only closed threads can be evaluated."
+            )
+        elif len(inactive_threads) < len(threads):
+            active_threads_ids = [
+                thread.id for thread in threads if thread.status == "active"
+            ]
+            inactive_threads_ids = [thread.id for thread in inactive_threads]
+            LOGGER.warning(
+                f"Some threads are active: {active_threads_ids} with filter_string: {filter_string}. Only closed threads will be evaluated: {inactive_threads_ids}."
+            )
+        evaluation_tasks: List[
+            engine_types.EvaluationTask[evaluation_result.ThreadEvaluationResult]
+        ] = [
+            functools.partial(
+                self.evaluate_thread,
+                thread=thread,
+                eval_project_name=eval_project_name,
+                metrics=metrics,
+                trace_input_transform=trace_input_transform,
+                trace_output_transform=trace_output_transform,
+                max_traces_per_thread=max_traces_per_thread,
+            )
+            for thread in inactive_threads
+        ]
+        results = evaluation_tasks_executor.execute(
+            evaluation_tasks, workers=self._number_of_workers, verbose=self._verbose
+        )
+        helpers.log_feedback_scores(
+            results, project_name=self._project_name, client=self._threads_client
+        )
+        return evaluation_result.ThreadsEvaluationResult(results=results)
+    def evaluate_thread(
+        self,
+        thread: TraceThread,
+        eval_project_name: Optional[str],
+        metrics: List[conversation_thread_metric.ConversationThreadMetric],
+        trace_input_transform: Callable[[JsonListStringPublic], str],
+        trace_output_transform: Callable[[JsonListStringPublic], str],
+        max_traces_per_thread: int,
+    ) -> evaluation_result.ThreadEvaluationResult:
+        conversation_dict = helpers.load_conversation_thread(
+            thread=thread,
+            trace_input_transform=trace_input_transform,
+            trace_output_transform=trace_output_transform,
+            max_results=max_traces_per_thread,
+            project_name=self._project_name,
+            client=self._client.opik_client,
+        ).model_dump()
+        conversation = conversation_dict["discussion"]
+        if len(conversation) == 0:
+            LOGGER.warning(
+                f"Thread '{thread.id}' has no conversation traces. Skipping evaluation."
+            )
+            return evaluation_result.ThreadEvaluationResult(
+                thread_id=thread.id, scores=[]
+            )
+        if eval_project_name is None:
+            eval_project_name = self._project_name
+        # Create a new trace for the evaluation
+        trace_data = trace.TraceData(
+            input={"conversation": conversation, "metrics": metrics},
+            name="evaluation_task",
+            created_by="evaluation",
+            project_name=eval_project_name,
+        )
+        with opik_context.trace_context(
+            trace_data=trace_data,
+            client=self._client.opik_client,
+        ):
+            results = self._evaluate_conversation(conversation, metrics)
+            # Update the current trace with the evaluation results
+            outputs = [result.__dict__ for result in results]
+            opik_context.update_current_trace(output={"evaluation_results": outputs})
+        return evaluation_result.ThreadEvaluationResult(
+            thread_id=thread.id,
+            scores=results,
+        )
+    @opik.track(name="metrics_calculation")  # type: ignore[attr-defined,has-type]
+    def _evaluate_conversation(
+        self,
+        conversation: List[Dict[Literal["role", "content"], str]],
+        metrics: List[conversation_thread_metric.ConversationThreadMetric],
+    ) -> List[score_result.ScoreResult]:
+        score_results: List[score_result.ScoreResult] = []
+        for metric in metrics:
+            try:
+                LOGGER.debug("Metric %s score started", metric.name)
+                result = metric.score(conversation)
+                LOGGER.debug("Metric %s score ended", metric.name)
+                if isinstance(result, list):
+                    score_results.extend(result)
+                else:
+                    score_results.append(result)
+            except Exception as e:
+                LOGGER.error(
+                    "Failed to compute metric %s. Score result will be marked as failed.",
+                    metric.name,
+                    exc_info=True,
+                )
+                score_results.append(
+                    score_result.ScoreResult(
+                        name=metric.name,
+                        value=0.0,
+                        reason=str(e),
+                        scoring_failed=True,
+                    )
+                )
+        return score_results

opik/evaluation/threads/evaluation_result.py ADDED Viewed

@@ -0,0 +1,18 @@
+from typing import List
+import pydantic
+from ..metrics import score_result
+class ThreadEvaluationResult(pydantic.BaseModel):
+    """Evaluation result for a particular thread."""
+    thread_id: str
+    scores: List[score_result.ScoreResult] = pydantic.Field(default_factory=list)
+class ThreadsEvaluationResult(pydantic.BaseModel):
+    """Threads evaluation results"""
+    results: List[ThreadEvaluationResult] = pydantic.Field(default_factory=list)

opik/evaluation/threads/evaluator.py ADDED Viewed

@@ -0,0 +1,120 @@
+from typing import Optional, List, Callable
+from .. import asyncio_support
+from ...api_objects import opik_client
+from ...api_objects.threads import threads_client
+from ..metrics.conversation import conversation_thread_metric
+from . import evaluation_engine, evaluation_result
+from opik.rest_api import JsonListStringPublic
+def evaluate_threads(
+    project_name: str,
+    filter_string: Optional[str],
+    eval_project_name: Optional[str],
+    metrics: List[conversation_thread_metric.ConversationThreadMetric],
+    trace_input_transform: Callable[[JsonListStringPublic], str],
+    trace_output_transform: Callable[[JsonListStringPublic], str],
+    verbose: int = 1,
+    num_workers: int = 8,
+    max_traces_per_thread: int = 1000,
+) -> evaluation_result.ThreadsEvaluationResult:
+    """Evaluate conversation threads using specified metrics.
+    This function evaluates conversation threads from a project using the provided metrics.
+    It creates a ThreadsEvaluationEngine to fetch threads matching the filter string,
+    converts them to conversation threads, applies the metrics, and logs feedback scores.
+    Args:
+        project_name: The name of the project containing the threads to evaluate.
+        filter_string: Optional filter string to select specific threads for evaluation using Opik Query Language (OQL).
+            The format is: "<COLUMN> <OPERATOR> <VALUE> [AND <COLUMN> <OPERATOR> <VALUE>]*"
+            Supported columns include:
+            - `id`, `name`, `created_by`, `thread_id`, `type`, `model`, `provider`: String fields with full operator support
+            - `status`: String field (=, contains, not_contains only)
+            - `start_time`, `end_time`: DateTime fields (use ISO 8601 format, e.g., "2024-01-01T00:00:00Z")
+            - `input`, `output`: String fields for content (=, contains, not_contains only)
+            - `metadata`: Dictionary field (use dot notation, e.g., "metadata.model")
+            - `feedback_scores`: Numeric field (use dot notation, e.g., "feedback_scores.accuracy")
+            - `tags`: List field (use "contains" operator only)
+            - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`: Numeric usage fields
+            - `duration`, `number_of_messages`, `total_estimated_cost`: Numeric fields
+            Examples: 'status = "inactive"', 'id = "thread_123"', 'duration > 300'
+            If None, all threads in the project will be evaluated.
+        eval_project_name: Optional name for the evaluation project where evaluation traces will be stored.
+            If None, the same project_name will be used.
+        metrics: List of ConversationThreadMetric instances to apply to each thread.
+            Must contain at least one metric.
+        trace_input_transform: Function to transform trace input JSON to string representation.
+            This function extracts the relevant user message from your trace's input structure.
+            The function receives the raw trace input as a dictionary and should return a string.
+            Example: If your trace input is {"content": {"user_question": "Hello"}},
+            use: lambda x: x["content"]["user_question"]
+            This transformation is essential because trace inputs vary by framework, but metrics
+            expect a standardized string format representing the user's message.
+        trace_output_transform: Function to transform trace output JSON to string representation.
+            This function extracts the relevant agent response from your trace's output structure.
+            The function receives the raw trace output as a dictionary and should return a string.
+            Example: If your trace output is {"response": {"text": "Hi there"}},
+            use: lambda x: x["response"]["text"]
+            This transformation is essential because trace outputs vary by framework, but metrics
+            expect a standardized string format representing the agent's response.
+        verbose: Verbosity level for progress reporting (0=silent, 1=progress).
+            Default is 1.
+        num_workers: Number of concurrent workers for thread evaluation.
+            Default is 8.
+        max_traces_per_thread: Maximum number of traces to fetch per thread.
+            Default is 1000.
+    Returns:
+        ThreadsEvaluationResult containing evaluation scores for each thread.
+    Raises:
+        ValueError: If no metrics are provided.
+        MetricComputationError: If no threads are found or if evaluation fails.
+    Example:
+        >>> from opik.evaluation import evaluate_threads
+        >>> from opik.evaluation.metrics import ConversationalCoherenceMetric, UserFrustrationMetric
+        >>>
+        >>> # Initialize the evaluation metrics
+        >>> conversation_coherence_metric = ConversationalCoherenceMetric()
+        >>> user_frustration_metric = UserFrustrationMetric()
+        >>>
+        >>> # Run the threads evaluation
+        >>> results = evaluate_threads(
+        >>>     project_name="ai_team",
+        >>>     filter_string='thread_id = "0197ad2a-cf5c-75af-be8b-20e8a23304fe"',
+        >>>     eval_project_name="ai_team_evaluation",
+        >>>     metrics=[
+        >>>         conversation_coherence_metric,
+        >>>         user_frustration_metric,
+        >>>     ],
+        >>>     trace_input_transform=lambda x: x["input"],
+        >>>     trace_output_transform=lambda x: x["output"],
+        >>> )
+    """
+    client = opik_client.get_client_cached()
+    threads_client_ = threads_client.ThreadsClient(client)
+    with asyncio_support.async_http_connections_expire_immediately():
+        engine = evaluation_engine.ThreadsEvaluationEngine(
+            client=threads_client_,
+            project_name=project_name,
+            number_of_workers=num_workers,
+            verbose=verbose,
+        )
+        return engine.evaluate_threads(
+            filter_string=filter_string,
+            eval_project_name=eval_project_name,
+            metrics=metrics,
+            trace_input_transform=trace_input_transform,
+            trace_output_transform=trace_output_transform,
+            max_traces_per_thread=max_traces_per_thread,
+        )

opik/evaluation/threads/helpers.py ADDED Viewed

@@ -0,0 +1,51 @@
+from typing import List, Callable, Optional
+from . import evaluation_result
+from ...api_objects import opik_client
+from ...api_objects.conversation import conversation_thread, conversation_factory
+from ...rest_api import TraceThread, JsonListStringPublic
+from ...types import BatchFeedbackScoreDict
+from ...api_objects.threads import threads_client
+def log_feedback_scores(
+    results: List[evaluation_result.ThreadEvaluationResult],
+    project_name: Optional[str],
+    client: threads_client.ThreadsClient,
+) -> None:
+    for result in results:
+        feedback_scores = [
+            BatchFeedbackScoreDict(
+                id=result.thread_id,
+                name=score.name,
+                value=score.value,
+                reason=score.reason,
+            )
+            for score in result.scores
+            if not score.scoring_failed
+        ]
+        client.log_threads_feedback_scores(
+            scores=feedback_scores,
+            project_name=project_name,
+        )
+def load_conversation_thread(
+    thread: TraceThread,
+    trace_input_transform: Callable[[JsonListStringPublic], str],
+    trace_output_transform: Callable[[JsonListStringPublic], str],
+    max_results: int,
+    project_name: Optional[str],
+    client: opik_client.Opik,
+) -> conversation_thread.ConversationThread:
+    traces = client.search_traces(
+        project_name=project_name,
+        filter_string=f'thread_id = "{thread.id}"',
+        max_results=max_results,
+        truncate=False,
+    )
+    return conversation_factory.create_conversation_from_traces(
+        traces=traces,
+        input_transform=trace_input_transform,
+        output_transform=trace_output_transform,
+    )

opik 1.6.4__py3-none-any.whl → 1.9.71__py3-none-any.whl

opik 1.6.4py3-none-any.whl → 1.9.71py3-none-any.whl