PyPI - opik - Versions diffs - 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl - Mend

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (592) hide show

opik/__init__.py +19 -3
opik/anonymizer/__init__.py +5 -0
opik/anonymizer/anonymizer.py +12 -0
opik/anonymizer/factory.py +80 -0
opik/anonymizer/recursive_anonymizer.py +64 -0
opik/anonymizer/rules.py +56 -0
opik/anonymizer/rules_anonymizer.py +35 -0
opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/data_helpers.py +79 -0
opik/api_objects/dataset/dataset.py +64 -4
opik/api_objects/dataset/rest_operations.py +11 -2
opik/api_objects/experiment/experiment.py +57 -57
opik/api_objects/experiment/experiment_item.py +2 -1
opik/api_objects/experiment/experiments_client.py +64 -0
opik/api_objects/experiment/helpers.py +35 -11
opik/api_objects/experiment/rest_operations.py +65 -5
opik/api_objects/helpers.py +8 -5
opik/api_objects/local_recording.py +81 -0
opik/api_objects/opik_client.py +600 -108
opik/api_objects/opik_query_language.py +39 -5
opik/api_objects/prompt/__init__.py +12 -2
opik/api_objects/prompt/base_prompt.py +69 -0
opik/api_objects/prompt/base_prompt_template.py +29 -0
opik/api_objects/prompt/chat/__init__.py +1 -0
opik/api_objects/prompt/chat/chat_prompt.py +210 -0
opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
opik/api_objects/prompt/client.py +189 -47
opik/api_objects/prompt/text/__init__.py +1 -0
opik/api_objects/prompt/text/prompt.py +174 -0
opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
opik/api_objects/prompt/types.py +23 -0
opik/api_objects/search_helpers.py +89 -0
opik/api_objects/span/span_data.py +35 -25
opik/api_objects/threads/threads_client.py +39 -5
opik/api_objects/trace/trace_client.py +52 -2
opik/api_objects/trace/trace_data.py +15 -24
opik/api_objects/validation_helpers.py +3 -3
opik/cli/__init__.py +5 -0
opik/cli/__main__.py +6 -0
opik/cli/configure.py +66 -0
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/healthcheck.py +21 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +49 -0
opik/cli/proxy.py +93 -0
opik/cli/usage_report/__init__.py +16 -0
opik/cli/usage_report/charts.py +783 -0
opik/cli/usage_report/cli.py +274 -0
opik/cli/usage_report/constants.py +9 -0
opik/cli/usage_report/extraction.py +749 -0
opik/cli/usage_report/pdf.py +244 -0
opik/cli/usage_report/statistics.py +78 -0
opik/cli/usage_report/utils.py +235 -0
opik/config.py +13 -7
opik/configurator/configure.py +17 -0
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +9 -1
opik/decorator/base_track_decorator.py +205 -133
opik/decorator/context_manager/span_context_manager.py +123 -0
opik/decorator/context_manager/trace_context_manager.py +84 -0
opik/decorator/opik_args/__init__.py +13 -0
opik/decorator/opik_args/api_classes.py +71 -0
opik/decorator/opik_args/helpers.py +120 -0
opik/decorator/span_creation_handler.py +25 -6
opik/dict_utils.py +3 -3
opik/evaluation/__init__.py +13 -2
opik/evaluation/engine/engine.py +272 -75
opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
opik/evaluation/engine/helpers.py +31 -6
opik/evaluation/engine/metrics_evaluator.py +237 -0
opik/evaluation/evaluation_result.py +168 -2
opik/evaluation/evaluator.py +533 -62
opik/evaluation/metrics/__init__.py +103 -4
opik/evaluation/metrics/aggregated_metric.py +35 -6
opik/evaluation/metrics/base_metric.py +1 -1
opik/evaluation/metrics/conversation/__init__.py +48 -0
opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
opik/evaluation/metrics/conversation/helpers.py +14 -15
opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
opik/evaluation/metrics/conversation/types.py +4 -5
opik/evaluation/metrics/conversation_types.py +9 -0
opik/evaluation/metrics/heuristics/bertscore.py +107 -0
opik/evaluation/metrics/heuristics/bleu.py +35 -15
opik/evaluation/metrics/heuristics/chrf.py +127 -0
opik/evaluation/metrics/heuristics/contains.py +47 -11
opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
opik/evaluation/metrics/heuristics/gleu.py +113 -0
opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
opik/evaluation/metrics/heuristics/meteor.py +119 -0
opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
opik/evaluation/metrics/heuristics/readability.py +129 -0
opik/evaluation/metrics/heuristics/rouge.py +26 -9
opik/evaluation/metrics/heuristics/spearman.py +88 -0
opik/evaluation/metrics/heuristics/tone.py +155 -0
opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/__init__.py +8 -0
opik/evaluation/models/base_model.py +107 -1
opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
opik/evaluation/models/langchain/message_converters.py +97 -15
opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
opik/evaluation/models/litellm/util.py +125 -0
opik/evaluation/models/litellm/warning_filters.py +16 -4
opik/evaluation/models/model_capabilities.py +187 -0
opik/evaluation/models/models_factory.py +25 -3
opik/evaluation/preprocessing.py +92 -0
opik/evaluation/report.py +70 -12
opik/evaluation/rest_operations.py +49 -45
opik/evaluation/samplers/__init__.py +4 -0
opik/evaluation/samplers/base_dataset_sampler.py +40 -0
opik/evaluation/samplers/random_dataset_sampler.py +48 -0
opik/evaluation/score_statistics.py +66 -0
opik/evaluation/scorers/__init__.py +4 -0
opik/evaluation/scorers/scorer_function.py +55 -0
opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
opik/evaluation/test_case.py +3 -2
opik/evaluation/test_result.py +1 -0
opik/evaluation/threads/evaluator.py +31 -3
opik/evaluation/threads/helpers.py +3 -2
opik/evaluation/types.py +9 -1
opik/exceptions.py +33 -0
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/hooks/__init__.py +23 -0
opik/hooks/anonymizer_hook.py +36 -0
opik/hooks/httpx_client_hook.py +112 -0
opik/httpx_client.py +12 -9
opik/id_helpers.py +18 -0
opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
opik/integrations/adk/helpers.py +16 -7
opik/integrations/adk/legacy_opik_tracer.py +7 -4
opik/integrations/adk/opik_tracer.py +14 -1
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
opik/integrations/adk/recursive_callback_injector.py +4 -7
opik/integrations/bedrock/converse/__init__.py +0 -0
opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
opik/integrations/bedrock/invoke_model/__init__.py +0 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
opik/integrations/bedrock/invoke_model/response_types.py +34 -0
opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
opik/integrations/bedrock/opik_tracker.py +42 -4
opik/integrations/bedrock/types.py +19 -0
opik/integrations/crewai/crewai_decorator.py +8 -51
opik/integrations/crewai/opik_tracker.py +31 -10
opik/integrations/crewai/patchers/__init__.py +5 -0
opik/integrations/crewai/patchers/flow.py +118 -0
opik/integrations/crewai/patchers/litellm_completion.py +30 -0
opik/integrations/crewai/patchers/llm_client.py +207 -0
opik/integrations/dspy/callback.py +80 -17
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_connector.py +2 -2
opik/integrations/haystack/opik_tracer.py +3 -7
opik/integrations/langchain/__init__.py +3 -1
opik/integrations/langchain/helpers.py +96 -0
opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_encoder_extension.py +1 -1
opik/integrations/langchain/opik_tracer.py +474 -229
opik/integrations/litellm/__init__.py +5 -0
opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
opik/integrations/litellm/litellm_completion_decorator.py +242 -0
opik/integrations/litellm/opik_tracker.py +43 -0
opik/integrations/litellm/stream_patchers.py +151 -0
opik/integrations/llama_index/callback.py +146 -107
opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
opik/integrations/openai/opik_tracker.py +1 -1
opik/integrations/sagemaker/auth.py +5 -1
opik/llm_usage/google_usage.py +3 -1
opik/llm_usage/opik_usage.py +7 -8
opik/llm_usage/opik_usage_factory.py +4 -2
opik/logging_messages.py +6 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batch_manager_constuctors.py +10 -0
opik/message_processing/batching/batchers.py +59 -27
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/__init__.py +0 -0
opik/message_processing/emulation/emulator_message_processor.py +578 -0
opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
opik/message_processing/emulation/models.py +162 -0
opik/message_processing/encoder_helpers.py +79 -0
opik/message_processing/messages.py +56 -1
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/processors/message_processors.py +92 -0
opik/message_processing/processors/message_processors_chain.py +96 -0
opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
opik/message_processing/queue_consumer.py +9 -3
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +43 -10
opik/opik_context.py +16 -4
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +346 -15
opik/rest_api/alerts/__init__.py +7 -0
opik/rest_api/alerts/client.py +667 -0
opik/rest_api/alerts/raw_client.py +1015 -0
opik/rest_api/alerts/types/__init__.py +7 -0
opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
opik/rest_api/annotation_queues/__init__.py +4 -0
opik/rest_api/annotation_queues/client.py +668 -0
opik/rest_api/annotation_queues/raw_client.py +1019 -0
opik/rest_api/automation_rule_evaluators/client.py +34 -2
opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
opik/rest_api/client.py +15 -0
opik/rest_api/dashboards/__init__.py +4 -0
opik/rest_api/dashboards/client.py +462 -0
opik/rest_api/dashboards/raw_client.py +648 -0
opik/rest_api/datasets/client.py +1310 -44
opik/rest_api/datasets/raw_client.py +2269 -358
opik/rest_api/experiments/__init__.py +2 -2
opik/rest_api/experiments/client.py +191 -5
opik/rest_api/experiments/raw_client.py +301 -7
opik/rest_api/experiments/types/__init__.py +4 -1
opik/rest_api/experiments/types/experiment_update_status.py +5 -0
opik/rest_api/experiments/types/experiment_update_type.py +5 -0
opik/rest_api/experiments/types/experiment_write_status.py +5 -0
opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
opik/rest_api/llm_provider_key/client.py +20 -0
opik/rest_api/llm_provider_key/raw_client.py +20 -0
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
opik/rest_api/manual_evaluation/__init__.py +4 -0
opik/rest_api/manual_evaluation/client.py +347 -0
opik/rest_api/manual_evaluation/raw_client.py +543 -0
opik/rest_api/optimizations/client.py +145 -9
opik/rest_api/optimizations/raw_client.py +237 -13
opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
opik/rest_api/prompts/__init__.py +2 -2
opik/rest_api/prompts/client.py +227 -6
opik/rest_api/prompts/raw_client.py +331 -2
opik/rest_api/prompts/types/__init__.py +3 -1
opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
opik/rest_api/spans/__init__.py +0 -2
opik/rest_api/spans/client.py +238 -76
opik/rest_api/spans/raw_client.py +307 -95
opik/rest_api/spans/types/__init__.py +0 -2
opik/rest_api/traces/client.py +572 -161
opik/rest_api/traces/raw_client.py +736 -229
opik/rest_api/types/__init__.py +352 -17
opik/rest_api/types/aggregation_data.py +1 -0
opik/rest_api/types/alert.py +33 -0
opik/rest_api/types/alert_alert_type.py +5 -0
opik/rest_api/types/alert_page_public.py +24 -0
opik/rest_api/types/alert_public.py +33 -0
opik/rest_api/types/alert_public_alert_type.py +5 -0
opik/rest_api/types/alert_trigger.py +27 -0
opik/rest_api/types/alert_trigger_config.py +28 -0
opik/rest_api/types/alert_trigger_config_public.py +28 -0
opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
opik/rest_api/types/alert_trigger_config_type.py +10 -0
opik/rest_api/types/alert_trigger_config_write.py +22 -0
opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
opik/rest_api/types/alert_trigger_event_type.py +19 -0
opik/rest_api/types/alert_trigger_public.py +27 -0
opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
opik/rest_api/types/alert_trigger_write.py +23 -0
opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
opik/rest_api/types/alert_write.py +28 -0
opik/rest_api/types/alert_write_alert_type.py +5 -0
opik/rest_api/types/annotation_queue.py +42 -0
opik/rest_api/types/annotation_queue_batch.py +27 -0
opik/rest_api/types/annotation_queue_item_ids.py +19 -0
opik/rest_api/types/annotation_queue_page_public.py +28 -0
opik/rest_api/types/annotation_queue_public.py +38 -0
opik/rest_api/types/annotation_queue_public_scope.py +5 -0
opik/rest_api/types/annotation_queue_reviewer.py +20 -0
opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
opik/rest_api/types/annotation_queue_scope.py +5 -0
opik/rest_api/types/annotation_queue_write.py +31 -0
opik/rest_api/types/annotation_queue_write_scope.py +5 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +62 -2
opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
opik/rest_api/types/boolean_feedback_definition.py +25 -0
opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
opik/rest_api/types/boolean_feedback_detail.py +29 -0
opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
opik/rest_api/types/dashboard_page_public.py +24 -0
opik/rest_api/types/dashboard_public.py +30 -0
opik/rest_api/types/dataset.py +4 -0
opik/rest_api/types/dataset_expansion.py +42 -0
opik/rest_api/types/dataset_expansion_response.py +39 -0
opik/rest_api/types/dataset_item.py +2 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +2 -0
opik/rest_api/types/dataset_item_filter.py +27 -0
opik/rest_api/types/dataset_item_filter_operator.py +21 -0
opik/rest_api/types/dataset_item_page_compare.py +5 -0
opik/rest_api/types/dataset_item_page_public.py +5 -0
opik/rest_api/types/dataset_item_public.py +2 -0
opik/rest_api/types/dataset_item_update.py +39 -0
opik/rest_api/types/dataset_item_write.py +1 -0
opik/rest_api/types/dataset_public.py +4 -0
opik/rest_api/types/dataset_public_status.py +5 -0
opik/rest_api/types/dataset_status.py +5 -0
opik/rest_api/types/dataset_version_diff.py +22 -0
opik/rest_api/types/dataset_version_diff_stats.py +24 -0
opik/rest_api/types/dataset_version_page_public.py +23 -0
opik/rest_api/types/dataset_version_public.py +59 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/experiment.py +7 -2
opik/rest_api/types/experiment_group_response.py +2 -0
opik/rest_api/types/experiment_public.py +7 -2
opik/rest_api/types/experiment_public_status.py +5 -0
opik/rest_api/types/experiment_score.py +20 -0
opik/rest_api/types/experiment_score_public.py +20 -0
opik/rest_api/types/experiment_score_write.py +20 -0
opik/rest_api/types/experiment_status.py +5 -0
opik/rest_api/types/feedback.py +25 -1
opik/rest_api/types/feedback_create.py +20 -1
opik/rest_api/types/feedback_object_public.py +27 -1
opik/rest_api/types/feedback_public.py +25 -1
opik/rest_api/types/feedback_score_batch_item.py +2 -1
opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
opik/rest_api/types/feedback_score_public.py +4 -0
opik/rest_api/types/feedback_update.py +20 -1
opik/rest_api/types/group_content_with_aggregations.py +1 -0
opik/rest_api/types/group_detail.py +19 -0
opik/rest_api/types/group_details.py +20 -0
opik/rest_api/types/guardrail.py +1 -0
opik/rest_api/types/guardrail_write.py +1 -0
opik/rest_api/types/ids_holder.py +19 -0
opik/rest_api/types/image_url.py +20 -0
opik/rest_api/types/image_url_public.py +20 -0
opik/rest_api/types/image_url_write.py +20 -0
opik/rest_api/types/llm_as_judge_message.py +5 -1
opik/rest_api/types/llm_as_judge_message_content.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
opik/rest_api/types/llm_as_judge_message_public.py +5 -1
opik/rest_api/types/llm_as_judge_message_write.py +5 -1
opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
opik/rest_api/types/manual_evaluation_request.py +38 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
opik/rest_api/types/manual_evaluation_response.py +27 -0
opik/rest_api/types/optimization.py +4 -2
opik/rest_api/types/optimization_public.py +4 -2
opik/rest_api/types/optimization_public_status.py +3 -1
opik/rest_api/types/optimization_status.py +3 -1
opik/rest_api/types/optimization_studio_config.py +27 -0
opik/rest_api/types/optimization_studio_config_public.py +27 -0
opik/rest_api/types/optimization_studio_config_write.py +27 -0
opik/rest_api/types/optimization_studio_log.py +22 -0
opik/rest_api/types/optimization_write.py +4 -2
opik/rest_api/types/optimization_write_status.py +3 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt.py +6 -0
opik/rest_api/types/prompt_detail.py +6 -0
opik/rest_api/types/prompt_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_public.py +6 -0
opik/rest_api/types/prompt_public_template_structure.py +5 -0
opik/rest_api/types/prompt_template_structure.py +5 -0
opik/rest_api/types/prompt_version.py +3 -0
opik/rest_api/types/prompt_version_detail.py +3 -0
opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_version_link.py +1 -0
opik/rest_api/types/prompt_version_link_public.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +3 -0
opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
opik/rest_api/types/prompt_version_template_structure.py +5 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +9 -0
opik/rest_api/types/provider_api_key_provider.py +1 -1
opik/rest_api/types/provider_api_key_public.py +9 -0
opik/rest_api/types/provider_api_key_public_provider.py +1 -1
opik/rest_api/types/score_name.py +1 -0
opik/rest_api/types/service_toggles_config.py +18 -0
opik/rest_api/types/span.py +1 -2
opik/rest_api/types/span_enrichment_options.py +31 -0
opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/span_filter.py +23 -0
opik/rest_api/types/span_filter_operator.py +21 -0
opik/rest_api/types/span_filter_write.py +23 -0
opik/rest_api/types/span_filter_write_operator.py +21 -0
opik/rest_api/types/span_llm_as_judge_code.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
opik/rest_api/types/span_public.py +1 -2
opik/rest_api/types/span_update.py +46 -0
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/rest_api/types/span_write.py +1 -2
opik/rest_api/types/studio_evaluation.py +20 -0
opik/rest_api/types/studio_evaluation_public.py +20 -0
opik/rest_api/types/studio_evaluation_write.py +20 -0
opik/rest_api/types/studio_llm_model.py +21 -0
opik/rest_api/types/studio_llm_model_public.py +21 -0
opik/rest_api/types/studio_llm_model_write.py +21 -0
opik/rest_api/types/studio_message.py +20 -0
opik/rest_api/types/studio_message_public.py +20 -0
opik/rest_api/types/studio_message_write.py +20 -0
opik/rest_api/types/studio_metric.py +21 -0
opik/rest_api/types/studio_metric_public.py +21 -0
opik/rest_api/types/studio_metric_write.py +21 -0
opik/rest_api/types/studio_optimizer.py +21 -0
opik/rest_api/types/studio_optimizer_public.py +21 -0
opik/rest_api/types/studio_optimizer_write.py +21 -0
opik/rest_api/types/studio_prompt.py +20 -0
opik/rest_api/types/studio_prompt_public.py +20 -0
opik/rest_api/types/studio_prompt_write.py +20 -0
opik/rest_api/types/trace.py +11 -2
opik/rest_api/types/trace_enrichment_options.py +32 -0
opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/trace_filter.py +23 -0
opik/rest_api/types/trace_filter_operator.py +21 -0
opik/rest_api/types/trace_filter_write.py +23 -0
opik/rest_api/types/trace_filter_write_operator.py +21 -0
opik/rest_api/types/trace_public.py +11 -2
opik/rest_api/types/trace_thread_filter_write.py +23 -0
opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
opik/rest_api/types/trace_thread_identifier.py +1 -0
opik/rest_api/types/trace_update.py +39 -0
opik/rest_api/types/trace_write.py +1 -2
opik/rest_api/types/value_entry.py +2 -0
opik/rest_api/types/value_entry_compare.py +2 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
opik/rest_api/types/value_entry_public.py +2 -0
opik/rest_api/types/video_url.py +19 -0
opik/rest_api/types/video_url_public.py +19 -0
opik/rest_api/types/video_url_write.py +19 -0
opik/rest_api/types/webhook.py +28 -0
opik/rest_api/types/webhook_examples.py +19 -0
opik/rest_api/types/webhook_public.py +28 -0
opik/rest_api/types/webhook_test_result.py +23 -0
opik/rest_api/types/webhook_test_result_status.py +5 -0
opik/rest_api/types/webhook_write.py +23 -0
opik/rest_api/types/welcome_wizard_tracking.py +22 -0
opik/rest_api/types/workspace_configuration.py +5 -0
opik/rest_api/welcome_wizard/__init__.py +4 -0
opik/rest_api/welcome_wizard/client.py +195 -0
opik/rest_api/welcome_wizard/raw_client.py +208 -0
opik/rest_api/workspaces/client.py +14 -2
opik/rest_api/workspaces/raw_client.py +10 -0
opik/s3_httpx_client.py +14 -1
opik/simulation/__init__.py +6 -0
opik/simulation/simulated_user.py +99 -0
opik/simulation/simulator.py +108 -0
opik/synchronization.py +5 -6
opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
opik-1.9.71.dist-info/METADATA +370 -0
opik-1.9.71.dist-info/RECORD +1110 -0
opik/api_objects/prompt/prompt.py +0 -112
opik/cli.py +0 -193
opik/hooks.py +0 -13
opik/integrations/bedrock/chunks_aggregator.py +0 -55
opik/integrations/bedrock/helpers.py +0 -8
opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
opik-1.8.39.dist-info/METADATA +0 -339
opik-1.8.39.dist-info/RECORD +0 -790
/opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
/opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
/opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0

opik/evaluation/models/litellm/litellm_chat_model.py CHANGED Viewed

@@ -4,28 +4,67 @@ import warnings
 from functools import cached_property
 from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING, Type
 import pydantic
+import tenacity
 if TYPE_CHECKING:
     from litellm.types.utils import ModelResponse
 import opik.semantic_version as semantic_version
+import opik.integrations.litellm as litellm_integration
+import opik.config as opik_config
 from .. import base_model
-from . import opik_monitor, warning_filters
+from . import warning_filters, util
+from opik import exceptions
 LOGGER = logging.getLogger(__name__)
+def _log_warning(message: str, *args: Any) -> None:
+    """Emit a warning to both this module logger and the root logger.
+    pytest's logging capture hooks into the root logger, while production runs use
+    the module-level logger. Logging to both keeps warnings visible in tests and at
+    runtime without duplicating call sites.
+    """
+    LOGGER.warning(message, *args)
+    root_logger = logging.getLogger()
+    if root_logger is not LOGGER:
+        root_logger.log(logging.WARNING, message, *args)
+def _extract_message_content(choice: Dict[str, Any]) -> Optional[str]:
+    message = choice.get("message")
+    if isinstance(message, dict):
+        content = message.get("content")
+    else:
+        content = getattr(message, "content", None)
+    if content is not None and not isinstance(content, str):
+        raise ValueError("LLM choice contains non-text content")
+    return content
+def _first_choice(response: Any) -> Dict[str, Any]:
+    choices = getattr(response, "choices", None)
+    if not isinstance(choices, list) or not choices:
+        raise exceptions.BaseLLMError(
+            "LLM response did not contain any choices to parse."
+        )
+    return util.normalise_choice(choices[0])
 class LiteLLMChatModel(base_model.OpikBaseModel):
     def __init__(
         self,
-        model_name: str = "gpt-4o",
+        model_name: str = "gpt-5-nano",
         must_support_arguments: Optional[List[str]] = None,
         **completion_kwargs: Any,
     ) -> None:
+        import litellm
         """
         Initializes the base model with a given model name.
-        Wraps `litellm.completion` function.
         You can find all possible completion_kwargs parameters here: https://docs.litellm.ai/docs/completion/input.
         Args:
@@ -39,12 +78,13 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             completion_kwargs: key-value arguments to always pass additionally into `litellm.completion` function.
         """
         super().__init__(model_name=model_name)
         self._check_model_name()
         self._check_must_support_arguments(must_support_arguments)
+        self._unsupported_warned: Set[str] = set()
         self._completion_kwargs: Dict[str, Any] = (
             self._remove_unnecessary_not_supported_params(completion_kwargs)
         )
@@ -55,11 +95,21 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             # Litellm has already fixed that, but it is not released yet, so this filter
             # should be removed from here soon.
             warnings.simplefilter("ignore")
-            import litellm
         warning_filters.add_warning_filters()
-        self._engine = litellm
+        config = opik_config.OpikConfig()
+        if config.enable_litellm_models_monitoring:
+            self._litellm_completion = litellm_integration.track_completion()(
+                litellm.completion
+            )
+            self._litellm_acompletion = litellm_integration.track_completion()(
+                litellm.acompletion
+            )
+        else:
+            self._litellm_completion = litellm.completion
+            self._litellm_acompletion = litellm.acompletion
     @cached_property
     def supported_params(self) -> Set[str]:
@@ -114,6 +164,7 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
     ) -> Dict[str, Any]:
         filtered_params = {**params}
+        # Fix for impacted providers like Groq and OpenAI
         if (
             "response_format" in params
             and "response_format" not in self.supported_params
@@ -122,9 +173,54 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             LOGGER.debug(
                 "This model does not support the response_format parameter and it will be ignored."
             )
+        # NOTE: Filtering based on `supported_params` has been disabled temporarily
+        # because LiteLLM does not surface provider-specific connection fields via
+        # `get_supported_openai_params`. Dropping those kwargs breaks Azure/Groq
+        # users who rely on parameters such as `api_version` and `azure_endpoint`.
+        # The old logic is kept here commented for future restoration.
+        #
+        # for key in list(filtered_params.keys()):
+        #     if (
+        #         key not in self.supported_params
+        #         and not util.should_preserve_provider_param(key)
+        #     ):
+        #         filtered_params.pop(key)
+        #         if key not in self._unsupported_warned:
+        #             _log_warning(
+        #                 "Parameter '%s' is not supported by model %s and will be ignored.",
+        #                 key,
+        #                 self.model_name,
+        #             )
+        #             self._unsupported_warned.add(key)
+        util.apply_model_specific_filters(
+            model_name=self.model_name,
+            params=filtered_params,
+            already_warned=self._unsupported_warned,
+            warn=self._warn_about_unsupported_param,
+        )
         return filtered_params
+    def _warn_about_unsupported_param(self, param: str, value: Any) -> None:
+        if param in {"logprobs", "top_logprobs"}:
+            # LiteLLM warns noisily when models like gpt-5-nano don't support these
+            # fields. We already drop them gracefully, so skip logging to avoid
+            # spamming GEval users with repeated warnings.
+            return
+        if param == "temperature":
+            _log_warning(
+                "Model %s only supports temperature=1. Dropping temperature=%s.",
+                self.model_name,
+                value,
+            )
+        else:
+            _log_warning(
+                "Model %s does not support %s. Dropping the parameter.",
+                self.model_name,
+                param,
+            )
     def generate_string(
         self,
         input: str,
@@ -155,10 +251,14 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             },
         ]
-        response = self.generate_provider_response(
-            messages=request, **valid_litellm_params
-        )
-        return response.choices[0].message.content
+        with base_model.get_provider_response(
+            model_provider=self,
+            messages=request,
+            **valid_litellm_params,
+        ) as response:
+            choice = _first_choice(response)
+            content = _extract_message_content(choice)
+            return base_model.check_model_output_string(content)
     def generate_provider_response(
         self,
@@ -166,6 +266,8 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
         **kwargs: Any,
     ) -> "ModelResponse":
         """
+        Do not use this method directly. It is intended to be used within `base_model.get_provider_response()` method.
         Generate a provider-specific response. Can be used to interface with
         the underlying model provider (e.g., OpenAI, Anthropic) and get raw output.
         You can find all possible input parameters here: https://docs.litellm.ai/docs/completion/input
@@ -179,21 +281,29 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             Any: The response from the model provider, which can be of any type depending on the use case and LLM.
         """
+        # Extract retry configuration before filtering params
+        retries = kwargs.pop("__opik_retries", 3)
+        try:
+            max_attempts = max(1, int(retries))
+        except (TypeError, ValueError):
+            max_attempts = 1
         # we need to pop messages first, and after we will check the rest params
         valid_litellm_params = self._remove_unnecessary_not_supported_params(kwargs)
         all_kwargs = {**self._completion_kwargs, **valid_litellm_params}
-        if (
-            opik_monitor.enabled_in_config()
-            and not opik_monitor.opik_is_misconfigured()
-        ):
-            all_kwargs = opik_monitor.try_add_opik_monitoring_to_params(all_kwargs)
-        response = self._engine.completion(
-            model=self.model_name, messages=messages, **all_kwargs
+        retrying = tenacity.Retrying(
+            reraise=True,
+            stop=tenacity.stop_after_attempt(max_attempts),
+            wait=tenacity.wait_exponential(multiplier=0.5, min=0.5, max=8.0),
         )
-        return response
+        return retrying(
+            self._litellm_completion,
+            model=self.model_name,
+            messages=messages,
+            **all_kwargs,
+        )
     async def agenerate_string(
         self,
@@ -225,15 +335,19 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             },
         ]
-        response = await self.agenerate_provider_response(
-            messages=request, **valid_litellm_params
-        )
-        return response.choices[0].message.content
+        async with base_model.aget_provider_response(
+            model_provider=self, messages=request, **valid_litellm_params
+        ) as response:
+            choice = _first_choice(response)
+            content = _extract_message_content(choice)
+            return base_model.check_model_output_string(content)
     async def agenerate_provider_response(
         self, messages: List[Dict[str, Any]], **kwargs: Any
     ) -> "ModelResponse":
         """
+        Do not use this method directly. It is intended to be used within `base_model.aget_provider_response()` method.
         Generate a provider-specific response. Can be used to interface with
         the underlying model provider (e.g., OpenAI, Anthropic) and get raw output. Async version.
         You can find all possible input parameters here: https://docs.litellm.ai/docs/completion/input
@@ -247,14 +361,27 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
             Any: The response from the model provider, which can be of any type depending on the use case and LLM.
         """
+        retries = kwargs.pop("__opik_retries", 3)
+        try:
+            max_attempts = max(1, int(retries))
+        except (TypeError, ValueError):
+            max_attempts = 1
         valid_litellm_params = self._remove_unnecessary_not_supported_params(kwargs)
         all_kwargs = {**self._completion_kwargs, **valid_litellm_params}
-        if opik_monitor.enabled_in_config():
-            all_kwargs = opik_monitor.try_add_opik_monitoring_to_params(all_kwargs)
-        response = await self._engine.acompletion(
-            model=self.model_name, messages=messages, **all_kwargs
+        retrying = tenacity.AsyncRetrying(
+            reraise=True,
+            stop=tenacity.stop_after_attempt(max_attempts),
+            wait=tenacity.wait_exponential(multiplier=0.5, min=0.5, max=8.0),
         )
-        return response
+        async for attempt in retrying:
+            with attempt:
+                return await self._litellm_acompletion(
+                    model=self.model_name, messages=messages, **all_kwargs
+                )
+        raise exceptions.BaseLLMError(
+            "Async LLM completion failed without raising an exception"
+        )  # pragma: no cover

opik/evaluation/models/litellm/util.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Utility helpers shared across LiteLLM models."""
+from __future__ import annotations
+from typing import Any, Callable, Dict, Set
+def normalise_choice(choice: Any) -> Dict[str, Any]:
+    """Produce a dict view of a LiteLLM choice regardless of response type.
+    LiteLLM may return raw dicts, Pydantic models, or dataclasses. Normalising to a
+    dict here keeps downstream parsing logic consistent and backwards compatible with
+    older client versions.
+    """
+    if isinstance(choice, dict):
+        return choice
+    if hasattr(choice, "model_dump") and callable(choice.model_dump):
+        try:
+            return choice.model_dump()
+        except TypeError:
+            pass
+    normalised: Dict[str, Any] = {}
+    message = getattr(choice, "message", None)
+    if message is not None:
+        normalised["message"] = message
+    logprobs = getattr(choice, "logprobs", None)
+    if logprobs is not None:
+        normalised["logprobs"] = logprobs
+    return normalised
+def apply_model_specific_filters(
+    model_name: str,
+    params: Dict[str, Any],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Adjust/drop params for specific model families before calling LiteLLM.
+    Currently handles:
+    - GPT-5: only honours temperature=1 and does not return log probabilities.
+    - DashScope Qwen: enforces constraints for logprobs / top_logprobs
+    """
+    if model_name.startswith("gpt-5"):
+        _apply_gpt5_filters(params, already_warned, warn)
+        return
+    if model_name.startswith("dashscope/"):
+        _apply_qwen_dashscope_filters(params, already_warned, warn)
+        return
+def _apply_gpt5_filters(
+    params: Dict[str, Any],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Apply GPT-5 specific parameter filters.
+    Only honours temperature=1 and does not return log probabilities.
+    Removing those eagerly avoids provider errors while the callback surfaces a
+    one-time warning to the caller.
+    """
+    unsupported: list[tuple[str, Any]] = []
+    if "temperature" in params:
+        value = params["temperature"]
+        try:
+            numeric_value = float(value)
+        except (TypeError, ValueError):
+            numeric_value = None
+        if numeric_value is None or abs(numeric_value - 1.0) > 1e-6:
+            unsupported.append(("temperature", value))
+    for param in ("logprobs", "top_logprobs"):
+        if param in params:
+            unsupported.append((param, params[param]))
+    _drop_unsupported_params_with_warning(
+        params,
+        unsupported,
+        already_warned,
+        warn,
+    )
+def _apply_qwen_dashscope_filters(
+    params: Dict[str, Any],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Apply Qwen/DashScope specific parameter filters.
+    Does not return log probabilities.
+    """
+    unsupported: list[tuple[str, Any]] = []
+    for param in ("logprobs", "top_logprobs"):
+        if param in params:
+            unsupported.append((param, params[param]))
+    _drop_unsupported_params_with_warning(
+        params,
+        unsupported,
+        already_warned,
+        warn,
+    )
+def _drop_unsupported_params_with_warning(
+    params: Dict[str, Any],
+    unsupported_params: list[tuple[str, Any]],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Remove unsupported params and emit warnings once per param name."""
+    for param, value in unsupported_params:
+        params.pop(param, None)
+        if param in already_warned:
+            continue
+        warn(param, value)
+        already_warned.add(param)

opik/evaluation/models/litellm/warning_filters.py CHANGED Viewed

@@ -3,8 +3,14 @@ import warnings
 from typing import Any
+_FILTERS_INSTALLED = False
 def add_warning_filters() -> None:
-    # TODO: This should be removed when we have fixed the error messages in the LiteLLM library
+    global _FILTERS_INSTALLED
+    if _FILTERS_INSTALLED:
+        return
     warnings.filterwarnings("ignore", message="coroutine '.*' was never awaited")
     warnings.filterwarnings(
         "ignore",
@@ -19,9 +25,15 @@ def add_warning_filters() -> None:
             )
     # Add filter to multiple possible loggers
-    filter = NoEventLoopFilterLiteLLM()
-    logging.getLogger("LiteLLM").addFilter(filter)
+    lite_logger = logging.getLogger("LiteLLM")
+    has_filter = any(
+        isinstance(f, NoEventLoopFilterLiteLLM) for f in lite_logger.filters
+    )
+    if not has_filter:
+        lite_logger.addFilter(NoEventLoopFilterLiteLLM())
     import litellm
-    litellm.suppress_debug_info = True  # to disable colorized prints with links to litellm whenever an LLM provider raises an error
+    litellm.suppress_debug_info = True
+    _FILTERS_INSTALLED = True

opik/evaluation/models/model_capabilities.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+Capability registry for evaluation models.
+The registry is designed to grow beyond vision support (e.g. audio in the future).
+"""
+from __future__ import annotations
+from typing import Callable, Dict, Iterable, Optional, Set
+CapabilityDetector = Callable[[str], bool]
+VISION_MODEL_PREFIXES: Set[str] = {
+    # OpenAI
+    "gpt-4-vision",
+    "gpt-4o",
+    "gpt-4o-mini",
+    "gpt-4-turbo",
+    "chatgpt-4o-latest",
+    "gpt-5-mini",
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-preview",
+    # Anthropic
+    "claude-3",
+    "claude-3-5",
+    # Google
+    "gemini-1.5-pro",
+    "gemini-1.5-flash",
+    "gemini-pro-vision",
+    "gemini-2.0-flash",
+    # Meta
+    "llama-3.2-11b-vision",
+    "llama-3.2-90b-vision",
+    # Mistral
+    "pixtral",
+    # Misc
+    "qwen-vl",
+    "qwen2-vl",
+    "phi-3-vision",
+    "phi-3.5-vision",
+    "llava",
+    "cogvlm",
+    "yi-vl",
+}
+VISION_MODEL_PREFIXES = {prefix.lower() for prefix in VISION_MODEL_PREFIXES}
+VISION_MODEL_SUFFIXES: Set[str] = {"-vision", "-vl"}
+def _strip_provider_prefix(model_name: str) -> str:
+    if "/" not in model_name:
+        return model_name
+    _, suffix = model_name.split("/", 1)
+    return suffix
+def _litellm_supports_vision(model_name: str) -> bool:
+    try:
+        import litellm  # type: ignore
+        return litellm.supports_vision(model=model_name)
+    except Exception:
+        return False
+def vision_capability_detector(model_name: str) -> bool:
+    stripped = _strip_provider_prefix(model_name)
+    candidates = {model_name, stripped}
+    for candidate in candidates:
+        if _litellm_supports_vision(candidate):
+            return True
+        normalized = candidate.lower()
+        if any(normalized.startswith(prefix) for prefix in VISION_MODEL_PREFIXES):
+            return True
+        if any(normalized.endswith(suffix) for suffix in VISION_MODEL_SUFFIXES):
+            return True
+    return False
+def video_capability_detector(model_name: str) -> bool:
+    """
+    Heuristically determine whether a model accepts video inputs.
+    Providers rarely expose structured metadata for video support, so we fall back
+    to naming conventions (e.g. models whose names contain ``video`` or ``qwen``
+    + ``vl``). When those heuristics fail we delegate to the vision detector since
+    current SDK integrations treat video as an extension of multimodal/vision APIs.
+    """
+    stripped = _strip_provider_prefix(model_name)
+    candidates = {model_name, stripped}
+    for candidate in candidates:
+        normalized = candidate.lower()
+        if "video" in normalized:
+            return True
+        if "qwen" in normalized and "vl" in normalized:
+            return True
+    # TODO(opik): litellm/model metadata still treats video + image inputs the same.
+    # Fall back to the vision heuristic so we can keep this dedicated capability
+    # and tighten detection once providers expose richer metadata.
+    return vision_capability_detector(model_name)
+class ModelCapabilitiesRegistry:
+    """
+    Central registry for model capability detection.
+    """
+    def __init__(self) -> None:
+        self._capability_detectors: Dict[str, CapabilityDetector] = {}
+    def register_capability_detector(
+        self, capability: str, detector: CapabilityDetector
+    ) -> None:
+        """
+        Register a detector callable for a capability name.
+        """
+        self._capability_detectors[capability] = detector
+    def supports(self, capability: str, model_name: Optional[str]) -> bool:
+        """
+        Return True when the supplied model name supports the requested capability.
+        """
+        if not model_name:
+            return False
+        detector = self._capability_detectors.get(capability)
+        if detector is None:
+            return False
+        try:
+            return detector(model_name)
+        except Exception:
+            return False
+    def supports_vision(self, model_name: Optional[str]) -> bool:
+        """
+        Convenience wrapper for vision-capable detection.
+        """
+        return self.supports("vision", model_name)
+    def supports_video(self, model_name: Optional[str]) -> bool:
+        """
+        Convenience wrapper for video-capable detection.
+        """
+        return self.supports("video", model_name)
+    def add_vision_model(self, model_name: str) -> None:
+        # Extend the module-level registry used by vision_capability_detector
+        VISION_MODEL_PREFIXES.add(self._strip_provider_prefix(model_name).lower())
+    def add_vision_models(self, model_names: Iterable[str]) -> None:
+        for model_name in model_names:
+            self.add_vision_model(model_name)
+    def _supports_vision(self, model_name: str) -> bool:
+        return vision_capability_detector(model_name)
+    @staticmethod
+    def _strip_provider_prefix(model_name: str) -> str:
+        return _strip_provider_prefix(model_name)
+    @staticmethod
+    def _litellm_supports_vision(model_name: str) -> bool:
+        return _litellm_supports_vision(model_name)
+MODEL_CAPABILITIES_REGISTRY = ModelCapabilitiesRegistry()
+MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
+    "vision", vision_capability_detector
+)
+MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
+    "video", video_capability_detector
+)
+# Backwards compatibility shim for previous API which exposed a class with classmethods.
+ModelCapabilities = MODEL_CAPABILITIES_REGISTRY
+__all__ = [
+    "ModelCapabilitiesRegistry",
+    "MODEL_CAPABILITIES_REGISTRY",
+    "ModelCapabilities",
+    "vision_capability_detector",
+    "video_capability_detector",
+]

opik/evaluation/models/models_factory.py CHANGED Viewed

@@ -1,13 +1,35 @@
-from typing import Optional, Any
+from typing import Optional, Any, Dict
 from .litellm import litellm_chat_model
 from . import base_model
-DEFAULT_GPT_MODEL_NAME = "gpt-4o"
+DEFAULT_GPT_MODEL_NAME = "gpt-5-nano"
+_MODEL_CACHE: Dict[Any, base_model.OpikBaseModel] = {}
+def _freeze(value: Any) -> Any:
+    if isinstance(value, dict):
+        return frozenset((k, _freeze(v)) for k, v in value.items())
+    if isinstance(value, (list, tuple)):
+        return tuple(_freeze(v) for v in value)
+    if isinstance(value, set):
+        return frozenset(_freeze(v) for v in value)
+    return value
+def _make_cache_key(model_name: str, model_kwargs: Dict[str, Any]) -> Any:
+    frozen_kwargs = frozenset((k, _freeze(v)) for k, v in model_kwargs.items())
+    return (model_name, frozen_kwargs)
 def get(model_name: Optional[str], **model_kwargs: Any) -> base_model.OpikBaseModel:
     if model_name is None:
         model_name = DEFAULT_GPT_MODEL_NAME
-    return litellm_chat_model.LiteLLMChatModel(model_name=model_name, **model_kwargs)
+    cache_key = _make_cache_key(model_name, model_kwargs)
+    if cache_key not in _MODEL_CACHE:
+        _MODEL_CACHE[cache_key] = litellm_chat_model.LiteLLMChatModel(
+            model_name=model_name, **model_kwargs
+        )
+    return _MODEL_CACHE[cache_key]

opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl