opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1192 @@
|
|
|
1
|
+
"""Experiment import functionality.
|
|
2
|
+
|
|
3
|
+
Note: Experiment import copies traces but not their full span trees (LLM task and
|
|
4
|
+
metrics calculation spans). This is sufficient for experiment representation but
|
|
5
|
+
not a "full & honest" migration. Spans are imported as part of trace import,
|
|
6
|
+
not experiment recreation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
import opik
|
|
17
|
+
from opik.api_objects.dataset import dataset_item as dataset_item_module # noqa: F401
|
|
18
|
+
import opik.id_helpers as id_helpers_module # type: ignore
|
|
19
|
+
from opik.rest_api.types.experiment_item import ExperimentItem
|
|
20
|
+
|
|
21
|
+
# Note: dataset_item_module is imported for test compatibility.
|
|
22
|
+
# Tests patch and import this module, so it must be available even though
|
|
23
|
+
# it's not directly used in this module's code.
|
|
24
|
+
from rich.console import Console
|
|
25
|
+
|
|
26
|
+
from .utils import (
|
|
27
|
+
handle_trace_reference,
|
|
28
|
+
translate_trace_id,
|
|
29
|
+
matches_name_pattern,
|
|
30
|
+
clean_feedback_scores,
|
|
31
|
+
debug_print,
|
|
32
|
+
)
|
|
33
|
+
from .prompt import import_prompts_from_directory
|
|
34
|
+
from .dataset import import_datasets_from_directory
|
|
35
|
+
|
|
36
|
+
console = Console()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class ExperimentData:
|
|
41
|
+
"""Structure for imported experiment data.
|
|
42
|
+
|
|
43
|
+
Matches the export format from create_experiment_data_structure.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
experiment: Dict[str, Any]
|
|
47
|
+
items: List[Dict[str, Any]]
|
|
48
|
+
downloaded_at: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ExperimentData":
|
|
52
|
+
"""Create ExperimentData from a dictionary (e.g., loaded from JSON)."""
|
|
53
|
+
return cls(
|
|
54
|
+
experiment=data.get("experiment", {}),
|
|
55
|
+
items=data.get("items", []),
|
|
56
|
+
downloaded_at=data.get("downloaded_at"),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def find_experiment_files(data_dir: Path) -> list[Path]:
|
|
61
|
+
"""Find all experiment JSON files in the directory."""
|
|
62
|
+
return list(data_dir.glob("experiment_*.json"))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def load_experiment_data(experiment_file: Path) -> ExperimentData:
|
|
66
|
+
"""Load experiment data from JSON file."""
|
|
67
|
+
with open(experiment_file, "r", encoding="utf-8") as f:
|
|
68
|
+
data = json.load(f)
|
|
69
|
+
return ExperimentData.from_dict(data)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _build_dataset_item_id_map(
|
|
73
|
+
client: opik.Opik,
|
|
74
|
+
experiment_files: List[Path],
|
|
75
|
+
datasets_dir: Path,
|
|
76
|
+
dry_run: bool,
|
|
77
|
+
debug: bool,
|
|
78
|
+
) -> tuple[Dict[str, str], Dict[str, int]]:
|
|
79
|
+
"""Build a mapping from original dataset_item_id to new dataset_item_id.
|
|
80
|
+
|
|
81
|
+
This function:
|
|
82
|
+
1. Collects all dataset_item_id and dataset_item_data from experiment files
|
|
83
|
+
2. Imports datasets from the datasets directory
|
|
84
|
+
3. Matches imported dataset items by content to build the mapping
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
client: Opik client instance
|
|
88
|
+
experiment_files: List of experiment JSON files
|
|
89
|
+
datasets_dir: Directory containing dataset exports
|
|
90
|
+
dry_run: If True, only simulate without making changes
|
|
91
|
+
debug: If True, print debug messages
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Tuple of (dataset_item_id_map, dataset_stats) where:
|
|
95
|
+
- dataset_item_id_map: Dictionary mapping original dataset_item_id to new dataset_item_id
|
|
96
|
+
- dataset_stats: Dictionary with 'datasets', 'datasets_skipped', 'datasets_errors' keys
|
|
97
|
+
"""
|
|
98
|
+
dataset_item_id_map: Dict[str, str] = {}
|
|
99
|
+
dataset_stats: Dict[str, int] = {
|
|
100
|
+
"datasets": 0,
|
|
101
|
+
"datasets_skipped": 0,
|
|
102
|
+
"datasets_errors": 0,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if dry_run:
|
|
106
|
+
return dataset_item_id_map, dataset_stats
|
|
107
|
+
|
|
108
|
+
# Step 1: Collect all dataset_item_id and dataset_item_data from experiment files
|
|
109
|
+
# Map: content_hash -> list of (original_dataset_item_id, dataset_item_data)
|
|
110
|
+
# Multiple original IDs can have the same content (they should all map to the same new item)
|
|
111
|
+
content_to_original_ids: Dict[str, List[tuple[str, Dict[str, Any]]]] = {}
|
|
112
|
+
|
|
113
|
+
for experiment_file in experiment_files:
|
|
114
|
+
try:
|
|
115
|
+
experiment_data = load_experiment_data(experiment_file)
|
|
116
|
+
items_data = experiment_data.items
|
|
117
|
+
|
|
118
|
+
for item_data in items_data:
|
|
119
|
+
original_dataset_item_id = item_data.get("dataset_item_id")
|
|
120
|
+
dataset_item_data = item_data.get("dataset_item_data")
|
|
121
|
+
|
|
122
|
+
# Fallback to input for older exports
|
|
123
|
+
if not dataset_item_data:
|
|
124
|
+
dataset_item_data = item_data.get("input")
|
|
125
|
+
|
|
126
|
+
if not original_dataset_item_id or not dataset_item_data:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# Remove 'id' field from dataset_item_data for consistent hashing
|
|
130
|
+
# (imported items don't have 'id' field, so we need to match without it)
|
|
131
|
+
if isinstance(dataset_item_data, dict):
|
|
132
|
+
dataset_item_data_for_hash = {
|
|
133
|
+
k: v for k, v in dataset_item_data.items() if k != "id"
|
|
134
|
+
}
|
|
135
|
+
else:
|
|
136
|
+
dataset_item_data_for_hash = dataset_item_data
|
|
137
|
+
|
|
138
|
+
# Create a hash of the content for matching (without 'id' field)
|
|
139
|
+
# Sort keys to ensure consistent hashing
|
|
140
|
+
content_str = json.dumps(dataset_item_data_for_hash, sort_keys=True)
|
|
141
|
+
content_hash = hashlib.sha256(content_str.encode()).hexdigest()
|
|
142
|
+
|
|
143
|
+
# Store the mapping (content_hash -> list of (original_id, data))
|
|
144
|
+
# Store the original data without 'id' for matching
|
|
145
|
+
if content_hash not in content_to_original_ids:
|
|
146
|
+
content_to_original_ids[content_hash] = []
|
|
147
|
+
content_to_original_ids[content_hash].append(
|
|
148
|
+
(original_dataset_item_id, dataset_item_data_for_hash)
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
debug_print(
|
|
153
|
+
f"Warning: Failed to process experiment file {experiment_file} for dataset mapping: {e}",
|
|
154
|
+
debug,
|
|
155
|
+
)
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
if not content_to_original_ids:
|
|
159
|
+
debug_print("No dataset items found in experiment files", debug)
|
|
160
|
+
return dataset_item_id_map, dataset_stats
|
|
161
|
+
|
|
162
|
+
# Count total unique original IDs
|
|
163
|
+
total_original_ids = sum(len(ids) for ids in content_to_original_ids.values())
|
|
164
|
+
console.print(
|
|
165
|
+
f"[blue]Found {total_original_ids} dataset item reference(s) ({len(content_to_original_ids)} unique content(s)) in experiment files[/blue]"
|
|
166
|
+
)
|
|
167
|
+
debug_print(
|
|
168
|
+
f"Found {total_original_ids} dataset item reference(s) ({len(content_to_original_ids)} unique content(s)) in experiment files",
|
|
169
|
+
debug,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Step 2: Import datasets
|
|
173
|
+
datasets_dir = (
|
|
174
|
+
datasets_dir if datasets_dir.exists() else datasets_dir.parent / "datasets"
|
|
175
|
+
)
|
|
176
|
+
if not datasets_dir.exists():
|
|
177
|
+
console.print(
|
|
178
|
+
f"[yellow]Warning: No datasets directory found at {datasets_dir}, skipping dataset import[/yellow]"
|
|
179
|
+
)
|
|
180
|
+
debug_print(
|
|
181
|
+
f"No datasets directory found at {datasets_dir}, skipping dataset import",
|
|
182
|
+
debug,
|
|
183
|
+
)
|
|
184
|
+
return dataset_item_id_map, dataset_stats
|
|
185
|
+
|
|
186
|
+
console.print(
|
|
187
|
+
f"[blue]Importing datasets from {datasets_dir} to build dataset item ID mapping...[/blue]"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Import datasets (this will create dataset items with new IDs)
|
|
191
|
+
dataset_import_stats = import_datasets_from_directory(
|
|
192
|
+
client, datasets_dir, dry_run, None, debug
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Update dataset_stats with import results
|
|
196
|
+
dataset_stats["datasets"] = dataset_import_stats.get("datasets", 0)
|
|
197
|
+
dataset_stats["datasets_skipped"] = dataset_import_stats.get("datasets_skipped", 0)
|
|
198
|
+
dataset_stats["datasets_errors"] = dataset_import_stats.get("datasets_errors", 0)
|
|
199
|
+
|
|
200
|
+
if dataset_import_stats.get("datasets", 0) == 0:
|
|
201
|
+
console.print(
|
|
202
|
+
f"[yellow]Warning: No datasets were imported from {datasets_dir}[/yellow]"
|
|
203
|
+
)
|
|
204
|
+
dataset_files = list(datasets_dir.glob("dataset_*.json"))
|
|
205
|
+
if dataset_files:
|
|
206
|
+
console.print(
|
|
207
|
+
f"[yellow]Found {len(dataset_files)} dataset file(s) but none were imported[/yellow]"
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
console.print(f"[yellow]No dataset files found in {datasets_dir}[/yellow]")
|
|
211
|
+
|
|
212
|
+
# Flush to ensure datasets are persisted
|
|
213
|
+
if not dry_run:
|
|
214
|
+
client.flush()
|
|
215
|
+
console.print(
|
|
216
|
+
f"[green]Imported {dataset_import_stats.get('datasets', 0)} dataset(s)[/green]"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Step 3: Get all imported dataset items and match by content
|
|
220
|
+
dataset_files = list(datasets_dir.glob("dataset_*.json"))
|
|
221
|
+
|
|
222
|
+
if not dataset_files:
|
|
223
|
+
console.print(
|
|
224
|
+
f"[yellow]Warning: No dataset files found in {datasets_dir}[/yellow]"
|
|
225
|
+
)
|
|
226
|
+
return dataset_item_id_map, dataset_stats
|
|
227
|
+
|
|
228
|
+
console.print(
|
|
229
|
+
f"[blue]Processing {len(dataset_files)} dataset file(s) to build item ID mapping...[/blue]"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
for dataset_file in dataset_files:
|
|
233
|
+
try:
|
|
234
|
+
with open(dataset_file, "r", encoding="utf-8") as f:
|
|
235
|
+
dataset_data = json.load(f)
|
|
236
|
+
|
|
237
|
+
dataset_name = dataset_data.get("name") or (
|
|
238
|
+
dataset_data.get("dataset", {}).get("name")
|
|
239
|
+
if dataset_data.get("dataset")
|
|
240
|
+
else None
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if not dataset_name:
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
# Get the imported dataset
|
|
247
|
+
try:
|
|
248
|
+
dataset = client.get_dataset(dataset_name)
|
|
249
|
+
except Exception:
|
|
250
|
+
debug_print(
|
|
251
|
+
f"Warning: Could not get dataset '{dataset_name}' after import",
|
|
252
|
+
debug,
|
|
253
|
+
)
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
# Get all items from the imported dataset (with their new IDs)
|
|
257
|
+
try:
|
|
258
|
+
imported_items = dataset.get_items()
|
|
259
|
+
console.print(
|
|
260
|
+
f"[blue]Dataset '{dataset_name}' has {len(imported_items)} item(s)[/blue]"
|
|
261
|
+
)
|
|
262
|
+
except Exception as e:
|
|
263
|
+
console.print(
|
|
264
|
+
f"[yellow]Warning: Could not get items from dataset '{dataset_name}': {e}[/yellow]"
|
|
265
|
+
)
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
# Match imported items to original items by content
|
|
269
|
+
matched_count = 0
|
|
270
|
+
for imported_item in imported_items:
|
|
271
|
+
imported_item_id = imported_item.get("id")
|
|
272
|
+
if not imported_item_id:
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
# Remove 'id' from content for comparison
|
|
276
|
+
imported_content = {k: v for k, v in imported_item.items() if k != "id"}
|
|
277
|
+
|
|
278
|
+
# Create hash of imported content
|
|
279
|
+
imported_content_str = json.dumps(imported_content, sort_keys=True)
|
|
280
|
+
imported_content_hash = hashlib.sha256(
|
|
281
|
+
imported_content_str.encode()
|
|
282
|
+
).hexdigest()
|
|
283
|
+
|
|
284
|
+
# Match to original - map all original IDs with this content to the same new item
|
|
285
|
+
if imported_content_hash in content_to_original_ids:
|
|
286
|
+
original_ids_list = content_to_original_ids[imported_content_hash]
|
|
287
|
+
for original_id, _ in original_ids_list:
|
|
288
|
+
dataset_item_id_map[original_id] = imported_item_id
|
|
289
|
+
matched_count += 1
|
|
290
|
+
debug_print(
|
|
291
|
+
f"Mapped dataset item {original_id} -> {imported_item_id}",
|
|
292
|
+
debug,
|
|
293
|
+
)
|
|
294
|
+
# Remove from dict to avoid rematching (though duplicates are fine)
|
|
295
|
+
# We keep it in case the same content appears in multiple datasets
|
|
296
|
+
|
|
297
|
+
if matched_count > 0:
|
|
298
|
+
console.print(
|
|
299
|
+
f"[green]Matched {matched_count} dataset item(s) from dataset '{dataset_name}'[/green]"
|
|
300
|
+
)
|
|
301
|
+
elif imported_items:
|
|
302
|
+
# Show why items weren't matched
|
|
303
|
+
unmatched_hashes = set(content_to_original_ids.keys())
|
|
304
|
+
imported_hashes = set()
|
|
305
|
+
for item in imported_items:
|
|
306
|
+
item_content = {k: v for k, v in item.items() if k != "id"}
|
|
307
|
+
content_str = json.dumps(item_content, sort_keys=True)
|
|
308
|
+
imported_hashes.add(
|
|
309
|
+
hashlib.sha256(content_str.encode()).hexdigest()
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if unmatched_hashes and imported_hashes:
|
|
313
|
+
console.print(
|
|
314
|
+
f"[yellow]Warning: Could not match any items from dataset '{dataset_name}'. Content hashes don't match.[/yellow]"
|
|
315
|
+
)
|
|
316
|
+
if debug:
|
|
317
|
+
# Show sample content from both sides
|
|
318
|
+
sample_original = list(content_to_original_ids.values())[0][0][
|
|
319
|
+
1
|
|
320
|
+
]
|
|
321
|
+
sample_imported = imported_items[0] if imported_items else {}
|
|
322
|
+
console.print(
|
|
323
|
+
f"[yellow]Sample original content: {json.dumps(sample_original, sort_keys=True)[:200]}...[/yellow]"
|
|
324
|
+
)
|
|
325
|
+
imported_sample = {
|
|
326
|
+
k: v for k, v in sample_imported.items() if k != "id"
|
|
327
|
+
}
|
|
328
|
+
console.print(
|
|
329
|
+
f"[yellow]Sample imported content: {json.dumps(imported_sample, sort_keys=True)[:200]}...[/yellow]"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
except Exception as e:
|
|
333
|
+
console.print(
|
|
334
|
+
f"[yellow]Warning: Failed to process dataset file {dataset_file.name} for mapping: {e}[/yellow]"
|
|
335
|
+
)
|
|
336
|
+
if debug:
|
|
337
|
+
import traceback
|
|
338
|
+
|
|
339
|
+
console.print(f"[yellow]Traceback: {traceback.format_exc()}[/yellow]")
|
|
340
|
+
continue
|
|
341
|
+
|
|
342
|
+
if dataset_item_id_map:
|
|
343
|
+
console.print(
|
|
344
|
+
f"[green]Built dataset item ID mapping with {len(dataset_item_id_map)} item(s)[/green]"
|
|
345
|
+
)
|
|
346
|
+
debug_print(
|
|
347
|
+
f"Dataset item ID mapping has {len(dataset_item_id_map)} entries",
|
|
348
|
+
debug,
|
|
349
|
+
)
|
|
350
|
+
else:
|
|
351
|
+
console.print(
|
|
352
|
+
"[yellow]Warning: Dataset item ID mapping is empty. This may cause experiment items to be skipped.[/yellow]"
|
|
353
|
+
)
|
|
354
|
+
if content_to_original_ids:
|
|
355
|
+
console.print(
|
|
356
|
+
f"[yellow]Found {total_original_ids} dataset item reference(s) in experiment files but couldn't match them to imported items.[/yellow]"
|
|
357
|
+
)
|
|
358
|
+
console.print(
|
|
359
|
+
"[yellow]This usually means the dataset items weren't imported correctly or the content structure doesn't match.[/yellow]"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return dataset_item_id_map, dataset_stats
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def recreate_experiment(
|
|
366
|
+
client: opik.Opik,
|
|
367
|
+
experiment_data: ExperimentData,
|
|
368
|
+
project_name: str,
|
|
369
|
+
trace_id_map: Dict[str, str],
|
|
370
|
+
dataset_item_id_map: Optional[Dict[str, str]] = None,
|
|
371
|
+
dry_run: bool = False,
|
|
372
|
+
debug: bool = False,
|
|
373
|
+
) -> bool:
|
|
374
|
+
"""Recreate a single experiment from exported data.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
client: Opik client instance
|
|
378
|
+
experiment_data: Experiment data structure from export
|
|
379
|
+
project_name: Name of the project to create the experiment in
|
|
380
|
+
trace_id_map: Mapping from original trace IDs to new trace IDs (required, can be empty dict)
|
|
381
|
+
dataset_item_id_map: Mapping from original dataset_item_id to new dataset_item_id (optional)
|
|
382
|
+
dry_run: If True, only simulate the import without making changes
|
|
383
|
+
debug: If True, print debug messages
|
|
384
|
+
|
|
385
|
+
Note: This function expects that traces and datasets have already been imported into the target workspace.
|
|
386
|
+
When traces are imported, they receive new IDs. The trace_id_map maps original trace IDs
|
|
387
|
+
to the newly created trace IDs. Items referencing traces not in trace_id_map will be skipped.
|
|
388
|
+
When datasets are imported, their items receive new IDs. The dataset_item_id_map maps original
|
|
389
|
+
dataset_item_id to the newly created dataset_item_id. Items referencing dataset items not in
|
|
390
|
+
dataset_item_id_map will be skipped. An empty trace_id_map or dataset_item_id_map is valid and
|
|
391
|
+
will result in items being skipped.
|
|
392
|
+
"""
|
|
393
|
+
experiment_info = experiment_data.experiment
|
|
394
|
+
items_data = experiment_data.items
|
|
395
|
+
|
|
396
|
+
experiment_name = (
|
|
397
|
+
experiment_info.get("name") or f"recreated-{experiment_info['id']}"
|
|
398
|
+
)
|
|
399
|
+
dataset_name = experiment_info["dataset_name"]
|
|
400
|
+
|
|
401
|
+
console.print(f"[blue]Recreating experiment: {experiment_name}[/blue]")
|
|
402
|
+
|
|
403
|
+
if dry_run:
|
|
404
|
+
console.print(
|
|
405
|
+
f"[yellow]Would create experiment '{experiment_name}' with {len(items_data)} items[/yellow]"
|
|
406
|
+
)
|
|
407
|
+
return True
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
# Get or create the dataset
|
|
411
|
+
# Ensure dataset is in the same workspace as the client
|
|
412
|
+
_ = client.get_or_create_dataset(
|
|
413
|
+
name=dataset_name,
|
|
414
|
+
description=f"Recreated dataset for experiment {experiment_name}",
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
debug_print(
|
|
418
|
+
f"Using dataset '{dataset_name}' for experiment '{experiment_name}'",
|
|
419
|
+
debug,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Ensure project_name is in metadata for future imports
|
|
423
|
+
experiment_metadata = experiment_info.get("metadata") or {}
|
|
424
|
+
if project_name and "project_name" not in experiment_metadata:
|
|
425
|
+
experiment_metadata = experiment_metadata.copy()
|
|
426
|
+
experiment_metadata["project_name"] = project_name
|
|
427
|
+
debug_print(
|
|
428
|
+
f"Adding project_name '{project_name}' to experiment metadata",
|
|
429
|
+
debug,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# Create the experiment
|
|
433
|
+
experiment = client.create_experiment(
|
|
434
|
+
dataset_name=dataset_name,
|
|
435
|
+
name=experiment_name,
|
|
436
|
+
experiment_config=experiment_metadata,
|
|
437
|
+
type=experiment_info.get("type", "regular"),
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
debug_print(
|
|
441
|
+
f"Created experiment '{experiment_name}' with ID: {experiment.id}",
|
|
442
|
+
debug,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
# Process experiment items using dataset_item_id_map
|
|
446
|
+
rest_experiment_items = []
|
|
447
|
+
successful_items = 0
|
|
448
|
+
skipped_items = 0
|
|
449
|
+
skipped_no_trace_id = 0
|
|
450
|
+
skipped_no_trace_mapping = 0
|
|
451
|
+
skipped_no_dataset_item_id = 0
|
|
452
|
+
skipped_no_dataset_item_mapping = 0
|
|
453
|
+
|
|
454
|
+
for item_data in items_data:
|
|
455
|
+
# Handle trace reference (from deduplicated exports)
|
|
456
|
+
trace_id = handle_trace_reference(item_data)
|
|
457
|
+
if not trace_id:
|
|
458
|
+
debug_print(
|
|
459
|
+
f"Warning: No trace ID found in item {item_data.get('id', 'unknown')}, skipping item",
|
|
460
|
+
debug,
|
|
461
|
+
)
|
|
462
|
+
skipped_items += 1
|
|
463
|
+
skipped_no_trace_id += 1
|
|
464
|
+
continue
|
|
465
|
+
|
|
466
|
+
# Translate trace id from source (workspace A) to newly created trace id (workspace B)
|
|
467
|
+
new_trace_id = translate_trace_id(trace_id, trace_id_map)
|
|
468
|
+
if not new_trace_id:
|
|
469
|
+
debug_print(
|
|
470
|
+
f"Warning: No mapping for trace {trace_id}. "
|
|
471
|
+
f"Trace ID map has {len(trace_id_map)} entries. Skipping item.",
|
|
472
|
+
debug,
|
|
473
|
+
)
|
|
474
|
+
if trace_id_map:
|
|
475
|
+
# Show first few trace IDs in map for debugging
|
|
476
|
+
sample_ids = list(trace_id_map.keys())[:3]
|
|
477
|
+
debug_print(f"Sample trace IDs in map: {sample_ids}", debug)
|
|
478
|
+
skipped_items += 1
|
|
479
|
+
skipped_no_trace_mapping += 1
|
|
480
|
+
continue
|
|
481
|
+
|
|
482
|
+
debug_print(f"Mapped trace {trace_id} -> {new_trace_id}", debug)
|
|
483
|
+
|
|
484
|
+
# Translate dataset_item_id using dataset_item_id_map
|
|
485
|
+
original_dataset_item_id = item_data.get("dataset_item_id")
|
|
486
|
+
if not original_dataset_item_id:
|
|
487
|
+
debug_print(
|
|
488
|
+
f"Warning: No dataset_item_id found in item {item_data.get('id', 'unknown')}, skipping item",
|
|
489
|
+
debug,
|
|
490
|
+
)
|
|
491
|
+
skipped_items += 1
|
|
492
|
+
skipped_no_dataset_item_id += 1
|
|
493
|
+
continue
|
|
494
|
+
|
|
495
|
+
# Use dataset_item_id_map to get the new dataset_item_id
|
|
496
|
+
new_dataset_item_id = None
|
|
497
|
+
if dataset_item_id_map:
|
|
498
|
+
new_dataset_item_id = dataset_item_id_map.get(original_dataset_item_id)
|
|
499
|
+
|
|
500
|
+
if not new_dataset_item_id:
|
|
501
|
+
debug_print(
|
|
502
|
+
f"Warning: No mapping for dataset_item_id {original_dataset_item_id}. "
|
|
503
|
+
f"Dataset item ID map has {len(dataset_item_id_map) if dataset_item_id_map else 0} entries. Skipping item.",
|
|
504
|
+
debug,
|
|
505
|
+
)
|
|
506
|
+
if dataset_item_id_map:
|
|
507
|
+
# Show first few dataset item IDs in map for debugging
|
|
508
|
+
sample_ids = list(dataset_item_id_map.keys())[:3]
|
|
509
|
+
debug_print(f"Sample dataset item IDs in map: {sample_ids}", debug)
|
|
510
|
+
skipped_items += 1
|
|
511
|
+
skipped_no_dataset_item_mapping += 1
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
debug_print(
|
|
515
|
+
f"Mapped dataset_item_id {original_dataset_item_id} -> {new_dataset_item_id}",
|
|
516
|
+
debug,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Create experiment item with mapped IDs
|
|
520
|
+
try:
|
|
521
|
+
experiment_item_id = id_helpers_module.generate_id()
|
|
522
|
+
rest_experiment_items.append(
|
|
523
|
+
ExperimentItem(
|
|
524
|
+
id=experiment_item_id,
|
|
525
|
+
experiment_id=experiment.id,
|
|
526
|
+
dataset_item_id=new_dataset_item_id,
|
|
527
|
+
trace_id=new_trace_id,
|
|
528
|
+
)
|
|
529
|
+
)
|
|
530
|
+
successful_items += 1
|
|
531
|
+
debug_print(
|
|
532
|
+
f"Prepared experiment item: dataset_item_id={new_dataset_item_id}, trace_id={new_trace_id}",
|
|
533
|
+
debug,
|
|
534
|
+
)
|
|
535
|
+
except Exception as e:
|
|
536
|
+
console.print(
|
|
537
|
+
f"[yellow]Warning: Failed to create experiment item: {e}[/yellow]"
|
|
538
|
+
)
|
|
539
|
+
skipped_items += 1
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
# Insert experiment items using REST API directly (more reliable than streamer)
|
|
543
|
+
if rest_experiment_items:
|
|
544
|
+
debug_print(
|
|
545
|
+
f"Inserting {len(rest_experiment_items)} experiment items via REST API...",
|
|
546
|
+
debug,
|
|
547
|
+
)
|
|
548
|
+
try:
|
|
549
|
+
# Use REST API directly instead of streamer
|
|
550
|
+
client._rest_client.experiments.create_experiment_items(
|
|
551
|
+
experiment_items=rest_experiment_items
|
|
552
|
+
)
|
|
553
|
+
console.print(
|
|
554
|
+
f"[green]Created experiment '{experiment_name}' with {successful_items} items[/green]"
|
|
555
|
+
)
|
|
556
|
+
if skipped_items > 0:
|
|
557
|
+
console.print(
|
|
558
|
+
f"[yellow]Skipped {skipped_items} items due to missing data:[/yellow]"
|
|
559
|
+
)
|
|
560
|
+
if skipped_no_trace_id > 0:
|
|
561
|
+
console.print(
|
|
562
|
+
f" - {skipped_no_trace_id} items missing trace_id"
|
|
563
|
+
)
|
|
564
|
+
if skipped_no_trace_mapping > 0:
|
|
565
|
+
console.print(
|
|
566
|
+
f" - {skipped_no_trace_mapping} items with trace_id not found in trace_id_map (map has {len(trace_id_map)} entries)"
|
|
567
|
+
)
|
|
568
|
+
if skipped_no_dataset_item_id > 0:
|
|
569
|
+
console.print(
|
|
570
|
+
f" - {skipped_no_dataset_item_id} items missing dataset_item_id"
|
|
571
|
+
)
|
|
572
|
+
if skipped_no_dataset_item_mapping > 0:
|
|
573
|
+
console.print(
|
|
574
|
+
f" - {skipped_no_dataset_item_mapping} items with dataset_item_id not found in dataset_item_id_map (map has {len(dataset_item_id_map) if dataset_item_id_map else 0} entries)"
|
|
575
|
+
)
|
|
576
|
+
except Exception as e:
|
|
577
|
+
console.print(f"[red]Error inserting experiment items: {e}[/red]")
|
|
578
|
+
if debug:
|
|
579
|
+
import traceback
|
|
580
|
+
|
|
581
|
+
console.print(f"[red]Traceback: {traceback.format_exc()}[/red]")
|
|
582
|
+
raise
|
|
583
|
+
else:
|
|
584
|
+
console.print(
|
|
585
|
+
f"[yellow]No valid items found for experiment '{experiment_name}'[/yellow]"
|
|
586
|
+
)
|
|
587
|
+
console.print(
|
|
588
|
+
f"[yellow]Total items in experiment: {len(items_data)}[/yellow]"
|
|
589
|
+
)
|
|
590
|
+
if trace_id_map:
|
|
591
|
+
console.print(
|
|
592
|
+
f"[yellow]Trace ID map has {len(trace_id_map)} entries[/yellow]"
|
|
593
|
+
)
|
|
594
|
+
# Show sample trace IDs from experiment items vs map
|
|
595
|
+
experiment_trace_ids = [
|
|
596
|
+
handle_trace_reference(item) for item in items_data
|
|
597
|
+
]
|
|
598
|
+
experiment_trace_ids = [tid for tid in experiment_trace_ids if tid]
|
|
599
|
+
if experiment_trace_ids:
|
|
600
|
+
matched = sum(
|
|
601
|
+
1 for tid in experiment_trace_ids if tid in trace_id_map
|
|
602
|
+
)
|
|
603
|
+
console.print(
|
|
604
|
+
f"[yellow]Experiment references {len(experiment_trace_ids)} trace(s), {matched} found in trace_id_map[/yellow]"
|
|
605
|
+
)
|
|
606
|
+
if matched < len(experiment_trace_ids):
|
|
607
|
+
missing = [
|
|
608
|
+
tid
|
|
609
|
+
for tid in experiment_trace_ids
|
|
610
|
+
if tid not in trace_id_map
|
|
611
|
+
]
|
|
612
|
+
console.print(
|
|
613
|
+
f"[yellow]Missing trace IDs (first 5): {missing[:5]}[/yellow]"
|
|
614
|
+
)
|
|
615
|
+
else:
|
|
616
|
+
console.print(
|
|
617
|
+
"[yellow]No trace ID map available - traces may not have been imported[/yellow]"
|
|
618
|
+
)
|
|
619
|
+
if dataset_item_id_map:
|
|
620
|
+
console.print(
|
|
621
|
+
f"[yellow]Dataset item ID map has {len(dataset_item_id_map)} entries[/yellow]"
|
|
622
|
+
)
|
|
623
|
+
else:
|
|
624
|
+
console.print(
|
|
625
|
+
"[yellow]No dataset item ID map available - datasets may not have been imported[/yellow]"
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
return True
|
|
629
|
+
|
|
630
|
+
except Exception as e:
|
|
631
|
+
console.print(
|
|
632
|
+
f"[red]Error recreating experiment '{experiment_name}': {e}[/red]"
|
|
633
|
+
)
|
|
634
|
+
return False
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def recreate_experiments(
|
|
638
|
+
client: opik.Opik,
|
|
639
|
+
project_dir: Path,
|
|
640
|
+
project_name: str,
|
|
641
|
+
dry_run: bool = False,
|
|
642
|
+
name_pattern: Optional[str] = None,
|
|
643
|
+
trace_id_map: Optional[Dict[str, str]] = None,
|
|
644
|
+
dataset_item_id_map: Optional[Dict[str, str]] = None,
|
|
645
|
+
debug: bool = False,
|
|
646
|
+
) -> int:
|
|
647
|
+
"""Recreate experiments from JSON files.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
trace_id_map: Mapping from original trace IDs to new trace IDs.
|
|
651
|
+
If None, will be treated as empty dict (all items will be skipped).
|
|
652
|
+
dataset_item_id_map: Mapping from original dataset_item_id to new dataset_item_id.
|
|
653
|
+
If None, will be treated as empty dict (all items will be skipped).
|
|
654
|
+
"""
|
|
655
|
+
experiment_files = find_experiment_files(project_dir)
|
|
656
|
+
|
|
657
|
+
if not experiment_files:
|
|
658
|
+
console.print(f"[yellow]No experiment files found in {project_dir}[/yellow]")
|
|
659
|
+
return 0
|
|
660
|
+
|
|
661
|
+
console.print(f"[green]Found {len(experiment_files)} experiment files[/green]")
|
|
662
|
+
|
|
663
|
+
# Filter experiments by name pattern if specified
|
|
664
|
+
if name_pattern:
|
|
665
|
+
filtered_files = []
|
|
666
|
+
for exp_file in experiment_files:
|
|
667
|
+
try:
|
|
668
|
+
exp_data = load_experiment_data(exp_file)
|
|
669
|
+
exp_name = exp_data.experiment.get("name", "")
|
|
670
|
+
if exp_name and matches_name_pattern(exp_name, name_pattern):
|
|
671
|
+
filtered_files.append(exp_file)
|
|
672
|
+
except Exception:
|
|
673
|
+
continue
|
|
674
|
+
|
|
675
|
+
if filtered_files:
|
|
676
|
+
console.print(
|
|
677
|
+
f"[blue]Filtered to {len(filtered_files)} experiments matching pattern '{name_pattern}'[/blue]"
|
|
678
|
+
)
|
|
679
|
+
experiment_files = filtered_files
|
|
680
|
+
else:
|
|
681
|
+
console.print(
|
|
682
|
+
f"[yellow]No experiments found matching pattern '{name_pattern}'[/yellow]"
|
|
683
|
+
)
|
|
684
|
+
return 0
|
|
685
|
+
|
|
686
|
+
successful = 0
|
|
687
|
+
failed = 0
|
|
688
|
+
|
|
689
|
+
for experiment_file in experiment_files:
|
|
690
|
+
try:
|
|
691
|
+
experiment_data = load_experiment_data(experiment_file)
|
|
692
|
+
|
|
693
|
+
if recreate_experiment(
|
|
694
|
+
client,
|
|
695
|
+
experiment_data,
|
|
696
|
+
project_name,
|
|
697
|
+
trace_id_map or {},
|
|
698
|
+
dataset_item_id_map or {},
|
|
699
|
+
dry_run,
|
|
700
|
+
debug,
|
|
701
|
+
):
|
|
702
|
+
successful += 1
|
|
703
|
+
else:
|
|
704
|
+
failed += 1
|
|
705
|
+
|
|
706
|
+
except Exception as e:
|
|
707
|
+
console.print(f"[red]Error processing {experiment_file.name}: {e}[/red]")
|
|
708
|
+
failed += 1
|
|
709
|
+
continue
|
|
710
|
+
|
|
711
|
+
return successful
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _import_traces_from_projects_directory(
|
|
715
|
+
client: opik.Opik,
|
|
716
|
+
workspace_root: Path,
|
|
717
|
+
dry_run: bool,
|
|
718
|
+
debug: bool,
|
|
719
|
+
) -> tuple[Dict[str, str], Dict[str, int]]:
|
|
720
|
+
"""Import traces from projects directory and return trace_id_map and statistics.
|
|
721
|
+
|
|
722
|
+
Returns:
|
|
723
|
+
Tuple of (trace_id_map, stats_dict) where:
|
|
724
|
+
- trace_id_map: mapping from original trace ID to new trace ID
|
|
725
|
+
- stats_dict: dictionary with 'traces' and 'traces_errors' keys
|
|
726
|
+
"""
|
|
727
|
+
trace_id_map: Dict[str, str] = {}
|
|
728
|
+
traces_imported = 0
|
|
729
|
+
traces_errors = 0
|
|
730
|
+
projects_dir = workspace_root / "projects"
|
|
731
|
+
|
|
732
|
+
if not projects_dir.exists():
|
|
733
|
+
debug_print(
|
|
734
|
+
f"No projects directory found at {projects_dir}, skipping trace import",
|
|
735
|
+
debug,
|
|
736
|
+
)
|
|
737
|
+
return trace_id_map, {"traces": 0, "traces_errors": 0}
|
|
738
|
+
|
|
739
|
+
project_dirs = [d for d in projects_dir.iterdir() if d.is_dir()]
|
|
740
|
+
|
|
741
|
+
if not project_dirs:
|
|
742
|
+
debug_print("No project directories found, skipping trace import", debug)
|
|
743
|
+
return trace_id_map, {"traces": 0, "traces_errors": 0}
|
|
744
|
+
|
|
745
|
+
debug_print(
|
|
746
|
+
f"Importing traces from {len(project_dirs)} project(s) to build trace ID mapping...",
|
|
747
|
+
debug,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
for project_dir in project_dirs:
|
|
751
|
+
project_name = project_dir.name
|
|
752
|
+
trace_files = list(project_dir.glob("trace_*.json"))
|
|
753
|
+
|
|
754
|
+
if not trace_files:
|
|
755
|
+
debug_print(f"No trace files found in project '{project_name}'", debug)
|
|
756
|
+
continue
|
|
757
|
+
|
|
758
|
+
debug_print(
|
|
759
|
+
f"Importing {len(trace_files)} trace(s) from project '{project_name}'...",
|
|
760
|
+
debug,
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
for trace_file in trace_files:
|
|
764
|
+
try:
|
|
765
|
+
with open(trace_file, "r", encoding="utf-8") as f:
|
|
766
|
+
trace_data = json.load(f)
|
|
767
|
+
|
|
768
|
+
trace_info = trace_data.get("trace", {})
|
|
769
|
+
spans_info = trace_data.get("spans", [])
|
|
770
|
+
original_trace_id = trace_info.get("id")
|
|
771
|
+
|
|
772
|
+
if not original_trace_id:
|
|
773
|
+
debug_print(
|
|
774
|
+
f"Warning: Trace file {trace_file.name} has no trace ID, skipping",
|
|
775
|
+
debug,
|
|
776
|
+
)
|
|
777
|
+
traces_errors += 1
|
|
778
|
+
continue
|
|
779
|
+
|
|
780
|
+
if dry_run:
|
|
781
|
+
debug_print(
|
|
782
|
+
f"Would import trace {original_trace_id} from project '{project_name}'",
|
|
783
|
+
debug,
|
|
784
|
+
)
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
# Create trace with full data
|
|
788
|
+
# Clean feedback scores to remove read-only fields
|
|
789
|
+
feedback_scores = clean_feedback_scores(
|
|
790
|
+
trace_info.get("feedback_scores")
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
trace = client.trace(
|
|
794
|
+
name=trace_info.get("name", "imported_trace"),
|
|
795
|
+
start_time=(
|
|
796
|
+
datetime.fromisoformat(
|
|
797
|
+
trace_info["start_time"].replace("Z", "+00:00")
|
|
798
|
+
)
|
|
799
|
+
if trace_info.get("start_time")
|
|
800
|
+
else None
|
|
801
|
+
),
|
|
802
|
+
end_time=(
|
|
803
|
+
datetime.fromisoformat(
|
|
804
|
+
trace_info["end_time"].replace("Z", "+00:00")
|
|
805
|
+
)
|
|
806
|
+
if trace_info.get("end_time")
|
|
807
|
+
else None
|
|
808
|
+
),
|
|
809
|
+
input=trace_info.get("input", {}),
|
|
810
|
+
output=trace_info.get("output", {}),
|
|
811
|
+
metadata=trace_info.get("metadata"),
|
|
812
|
+
tags=trace_info.get("tags"),
|
|
813
|
+
feedback_scores=feedback_scores,
|
|
814
|
+
error_info=trace_info.get("error_info"),
|
|
815
|
+
thread_id=trace_info.get("thread_id"),
|
|
816
|
+
project_name=project_name,
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
# Map original trace ID to new trace ID
|
|
820
|
+
trace_id_map[original_trace_id] = trace.id
|
|
821
|
+
traces_imported += 1
|
|
822
|
+
debug_print(
|
|
823
|
+
f"Mapped trace {original_trace_id} -> {trace.id} (project: {project_name})",
|
|
824
|
+
debug,
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
# Create spans with full data, preserving parent-child relationships
|
|
828
|
+
# Build span_id_map to translate parent_span_id references
|
|
829
|
+
span_id_map: Dict[str, str] = {} # Maps original span ID to new span ID
|
|
830
|
+
|
|
831
|
+
# First pass: create all spans and build span_id_map
|
|
832
|
+
# We need to create spans in order so parent spans exist before children
|
|
833
|
+
# Sort spans to process root spans (no parent) first, then children
|
|
834
|
+
root_spans = [s for s in spans_info if not s.get("parent_span_id")]
|
|
835
|
+
child_spans = [s for s in spans_info if s.get("parent_span_id")]
|
|
836
|
+
sorted_spans = root_spans + child_spans
|
|
837
|
+
|
|
838
|
+
for span_info in sorted_spans:
|
|
839
|
+
# Clean feedback scores to remove read-only fields
|
|
840
|
+
span_feedback_scores = clean_feedback_scores(
|
|
841
|
+
span_info.get("feedback_scores")
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
original_span_id = span_info.get("id")
|
|
845
|
+
original_parent_span_id = span_info.get("parent_span_id")
|
|
846
|
+
|
|
847
|
+
# Translate parent_span_id if it exists
|
|
848
|
+
new_parent_span_id = None
|
|
849
|
+
if (
|
|
850
|
+
original_parent_span_id
|
|
851
|
+
and original_parent_span_id in span_id_map
|
|
852
|
+
):
|
|
853
|
+
new_parent_span_id = span_id_map[original_parent_span_id]
|
|
854
|
+
|
|
855
|
+
# Create span with parent_span_id if available
|
|
856
|
+
span = client.span(
|
|
857
|
+
name=span_info.get("name", "imported_span"),
|
|
858
|
+
start_time=(
|
|
859
|
+
datetime.fromisoformat(
|
|
860
|
+
span_info["start_time"].replace("Z", "+00:00")
|
|
861
|
+
)
|
|
862
|
+
if span_info.get("start_time")
|
|
863
|
+
else None
|
|
864
|
+
),
|
|
865
|
+
end_time=(
|
|
866
|
+
datetime.fromisoformat(
|
|
867
|
+
span_info["end_time"].replace("Z", "+00:00")
|
|
868
|
+
)
|
|
869
|
+
if span_info.get("end_time")
|
|
870
|
+
else None
|
|
871
|
+
),
|
|
872
|
+
input=span_info.get("input", {}),
|
|
873
|
+
output=span_info.get("output", {}),
|
|
874
|
+
metadata=span_info.get("metadata"),
|
|
875
|
+
tags=span_info.get("tags"),
|
|
876
|
+
usage=span_info.get("usage"),
|
|
877
|
+
feedback_scores=span_feedback_scores,
|
|
878
|
+
model=span_info.get("model"),
|
|
879
|
+
provider=span_info.get("provider"),
|
|
880
|
+
error_info=span_info.get("error_info"),
|
|
881
|
+
total_cost=span_info.get("total_cost"),
|
|
882
|
+
trace_id=trace.id,
|
|
883
|
+
parent_span_id=new_parent_span_id,
|
|
884
|
+
project_name=project_name,
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
# Map original span ID to new span ID for parent relationship mapping
|
|
888
|
+
if original_span_id and span.id:
|
|
889
|
+
span_id_map[original_span_id] = span.id
|
|
890
|
+
|
|
891
|
+
except Exception as e:
|
|
892
|
+
console.print(
|
|
893
|
+
f"[yellow]Warning: Failed to import trace from {trace_file}: {e}[/yellow]"
|
|
894
|
+
)
|
|
895
|
+
traces_errors += 1
|
|
896
|
+
continue
|
|
897
|
+
|
|
898
|
+
if not dry_run and trace_id_map:
|
|
899
|
+
# Flush client to ensure traces are persisted before recreating experiments
|
|
900
|
+
client.flush()
|
|
901
|
+
console.print(
|
|
902
|
+
f"[green]Imported {len(trace_id_map)} trace(s) and built trace ID mapping[/green]"
|
|
903
|
+
)
|
|
904
|
+
debug_print(
|
|
905
|
+
f"Trace ID mapping has {len(trace_id_map)} entries",
|
|
906
|
+
debug,
|
|
907
|
+
)
|
|
908
|
+
elif not dry_run:
|
|
909
|
+
console.print(
|
|
910
|
+
"[yellow]Warning: No traces were imported. Trace ID map is empty.[/yellow]"
|
|
911
|
+
)
|
|
912
|
+
if traces_imported == 0 and traces_errors == 0:
|
|
913
|
+
console.print(
|
|
914
|
+
f"[yellow]No trace files were found in {projects_dir}[/yellow]"
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
return trace_id_map, {"traces": traces_imported, "traces_errors": traces_errors}
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def import_experiments_from_directory(
|
|
921
|
+
client: opik.Opik,
|
|
922
|
+
source_dir: Path,
|
|
923
|
+
dry_run: bool,
|
|
924
|
+
name_pattern: Optional[str],
|
|
925
|
+
debug: bool,
|
|
926
|
+
) -> Dict[str, int]:
|
|
927
|
+
"""Import experiments from a directory.
|
|
928
|
+
|
|
929
|
+
This function will first import prompts and traces from their respective directories
|
|
930
|
+
(if they exist) to build a trace_id_map, then use that map when recreating experiments.
|
|
931
|
+
|
|
932
|
+
Returns:
|
|
933
|
+
Dictionary with keys: 'experiments', 'experiments_skipped', 'experiments_errors',
|
|
934
|
+
'prompts', 'prompts_skipped', 'prompts_errors', 'traces', 'traces_errors'
|
|
935
|
+
"""
|
|
936
|
+
try:
|
|
937
|
+
experiment_files = list(source_dir.glob("experiment_*.json"))
|
|
938
|
+
|
|
939
|
+
if not experiment_files:
|
|
940
|
+
console.print("[yellow]No experiment files found in the directory[/yellow]")
|
|
941
|
+
return {
|
|
942
|
+
"experiments": 0,
|
|
943
|
+
"experiments_skipped": 0,
|
|
944
|
+
"experiments_errors": 0,
|
|
945
|
+
"datasets": 0,
|
|
946
|
+
"datasets_skipped": 0,
|
|
947
|
+
"datasets_errors": 0,
|
|
948
|
+
"prompts": 0,
|
|
949
|
+
"prompts_skipped": 0,
|
|
950
|
+
"prompts_errors": 0,
|
|
951
|
+
"traces": 0,
|
|
952
|
+
"traces_errors": 0,
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
# source_dir is typically workspace/experiments, so parent is workspace root
|
|
956
|
+
workspace_root = source_dir.parent
|
|
957
|
+
|
|
958
|
+
# Import prompts first (they may be referenced by experiments)
|
|
959
|
+
prompts_stats = {"prompts": 0, "prompts_skipped": 0, "prompts_errors": 0}
|
|
960
|
+
prompts_dir = workspace_root / "prompts"
|
|
961
|
+
if prompts_dir.exists():
|
|
962
|
+
debug_print("Importing prompts from prompts directory...", debug)
|
|
963
|
+
prompts_stats = import_prompts_from_directory(
|
|
964
|
+
client, prompts_dir, dry_run, name_pattern, debug
|
|
965
|
+
)
|
|
966
|
+
if prompts_stats.get("prompts", 0) > 0 and not dry_run:
|
|
967
|
+
# Flush client to ensure prompts are persisted
|
|
968
|
+
client.flush()
|
|
969
|
+
debug_print(
|
|
970
|
+
f"Imported {prompts_stats.get('prompts', 0)} prompt(s)",
|
|
971
|
+
debug,
|
|
972
|
+
)
|
|
973
|
+
else:
|
|
974
|
+
debug_print("No prompts directory found, skipping prompt import", debug)
|
|
975
|
+
|
|
976
|
+
# Import datasets first to build dataset_item_id_map
|
|
977
|
+
datasets_dir = workspace_root / "datasets"
|
|
978
|
+
dataset_item_id_map: Dict[str, str] = {}
|
|
979
|
+
datasets_stats: Dict[str, int] = {
|
|
980
|
+
"datasets": 0,
|
|
981
|
+
"datasets_skipped": 0,
|
|
982
|
+
"datasets_errors": 0,
|
|
983
|
+
}
|
|
984
|
+
if datasets_dir.exists():
|
|
985
|
+
debug_print(
|
|
986
|
+
"Importing datasets and building dataset item ID mapping...",
|
|
987
|
+
debug,
|
|
988
|
+
)
|
|
989
|
+
dataset_item_id_map, datasets_stats = _build_dataset_item_id_map(
|
|
990
|
+
client, experiment_files, datasets_dir, dry_run, debug
|
|
991
|
+
)
|
|
992
|
+
else:
|
|
993
|
+
debug_print(
|
|
994
|
+
f"No datasets directory found at {datasets_dir}, skipping dataset import",
|
|
995
|
+
debug,
|
|
996
|
+
)
|
|
997
|
+
|
|
998
|
+
# Import traces first to build trace_id_map
|
|
999
|
+
trace_id_map, traces_stats = _import_traces_from_projects_directory(
|
|
1000
|
+
client, workspace_root, dry_run, debug
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
if not trace_id_map and not dry_run:
|
|
1004
|
+
console.print(
|
|
1005
|
+
"[yellow]Warning: No traces were imported. Experiment items may be skipped if they reference traces.[/yellow]"
|
|
1006
|
+
)
|
|
1007
|
+
# Try to diagnose why traces weren't imported
|
|
1008
|
+
projects_dir = workspace_root / "projects"
|
|
1009
|
+
if projects_dir.exists():
|
|
1010
|
+
project_dirs = [d for d in projects_dir.iterdir() if d.is_dir()]
|
|
1011
|
+
debug_print(
|
|
1012
|
+
f"Found {len(project_dirs)} project directory(ies): {[d.name for d in project_dirs]}",
|
|
1013
|
+
debug,
|
|
1014
|
+
)
|
|
1015
|
+
for project_dir in project_dirs:
|
|
1016
|
+
trace_files = list(project_dir.glob("trace_*.json"))
|
|
1017
|
+
debug_print(
|
|
1018
|
+
f"Project '{project_dir.name}' has {len(trace_files)} trace file(s)",
|
|
1019
|
+
debug,
|
|
1020
|
+
)
|
|
1021
|
+
else:
|
|
1022
|
+
debug_print(
|
|
1023
|
+
f"Projects directory not found at {projects_dir}",
|
|
1024
|
+
debug,
|
|
1025
|
+
)
|
|
1026
|
+
elif trace_id_map:
|
|
1027
|
+
debug_print(
|
|
1028
|
+
f"Built trace ID mapping with {len(trace_id_map)} trace(s)",
|
|
1029
|
+
debug,
|
|
1030
|
+
)
|
|
1031
|
+
# Show sample trace IDs for debugging
|
|
1032
|
+
sample_original_ids = list(trace_id_map.keys())[:3]
|
|
1033
|
+
debug_print(
|
|
1034
|
+
f"Sample original trace IDs in map: {sample_original_ids}", debug
|
|
1035
|
+
)
|
|
1036
|
+
|
|
1037
|
+
# Build a map of trace_id -> project_name from trace files for project inference
|
|
1038
|
+
trace_to_project_map: Dict[str, str] = {}
|
|
1039
|
+
projects_dir = workspace_root / "projects"
|
|
1040
|
+
if projects_dir.exists():
|
|
1041
|
+
for project_dir in projects_dir.iterdir():
|
|
1042
|
+
if not project_dir.is_dir():
|
|
1043
|
+
continue
|
|
1044
|
+
project_name = project_dir.name
|
|
1045
|
+
for trace_file in project_dir.glob("trace_*.json"):
|
|
1046
|
+
try:
|
|
1047
|
+
with open(trace_file, "r", encoding="utf-8") as f:
|
|
1048
|
+
trace_data = json.load(f)
|
|
1049
|
+
original_trace_id = trace_data.get("trace", {}).get("id")
|
|
1050
|
+
if original_trace_id:
|
|
1051
|
+
trace_to_project_map[original_trace_id] = project_name
|
|
1052
|
+
except Exception:
|
|
1053
|
+
continue
|
|
1054
|
+
|
|
1055
|
+
imported_count = 0
|
|
1056
|
+
skipped_count = 0
|
|
1057
|
+
error_count = 0
|
|
1058
|
+
for experiment_file in experiment_files:
|
|
1059
|
+
try:
|
|
1060
|
+
experiment_data = load_experiment_data(experiment_file)
|
|
1061
|
+
|
|
1062
|
+
experiment_info = experiment_data.experiment
|
|
1063
|
+
experiment_name = experiment_info.get("name", "")
|
|
1064
|
+
|
|
1065
|
+
# Debug: Check trace IDs in experiment items vs trace_id_map
|
|
1066
|
+
if debug and trace_id_map:
|
|
1067
|
+
items_data = experiment_data.items
|
|
1068
|
+
experiment_trace_ids = []
|
|
1069
|
+
for item_data in items_data:
|
|
1070
|
+
trace_id = handle_trace_reference(item_data)
|
|
1071
|
+
if trace_id:
|
|
1072
|
+
experiment_trace_ids.append(trace_id)
|
|
1073
|
+
|
|
1074
|
+
if experiment_trace_ids:
|
|
1075
|
+
console.print(
|
|
1076
|
+
f"[blue]Experiment '{experiment_name}' references {len(experiment_trace_ids)} trace(s)[/blue]"
|
|
1077
|
+
)
|
|
1078
|
+
matched = sum(
|
|
1079
|
+
1 for tid in experiment_trace_ids if tid in trace_id_map
|
|
1080
|
+
)
|
|
1081
|
+
console.print(
|
|
1082
|
+
f"[blue]{matched}/{len(experiment_trace_ids)} trace IDs found in trace_id_map[/blue]"
|
|
1083
|
+
)
|
|
1084
|
+
if matched < len(experiment_trace_ids):
|
|
1085
|
+
missing = [
|
|
1086
|
+
tid
|
|
1087
|
+
for tid in experiment_trace_ids
|
|
1088
|
+
if tid not in trace_id_map
|
|
1089
|
+
]
|
|
1090
|
+
console.print(
|
|
1091
|
+
f"[yellow]Missing trace IDs: {missing[:5]}[/yellow]"
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
# Filter by name pattern if specified
|
|
1095
|
+
if name_pattern and not matches_name_pattern(
|
|
1096
|
+
experiment_name, name_pattern
|
|
1097
|
+
):
|
|
1098
|
+
debug_print(
|
|
1099
|
+
f"Skipping experiment {experiment_name} (doesn't match pattern)",
|
|
1100
|
+
debug,
|
|
1101
|
+
)
|
|
1102
|
+
skipped_count += 1
|
|
1103
|
+
continue
|
|
1104
|
+
|
|
1105
|
+
if dry_run:
|
|
1106
|
+
console.print(
|
|
1107
|
+
f"[blue]Would import experiment: {experiment_name}[/blue]"
|
|
1108
|
+
)
|
|
1109
|
+
imported_count += 1
|
|
1110
|
+
continue
|
|
1111
|
+
|
|
1112
|
+
debug_print(f"Importing experiment: {experiment_name}", debug)
|
|
1113
|
+
|
|
1114
|
+
# Determine project name: try metadata first, then infer from trace files
|
|
1115
|
+
project_for_logs = (experiment_info.get("metadata") or {}).get(
|
|
1116
|
+
"project_name"
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
# If no project in metadata, try to infer from trace files
|
|
1120
|
+
if not project_for_logs and trace_to_project_map:
|
|
1121
|
+
items_data = experiment_data.items
|
|
1122
|
+
# Find the first trace_id in items and use its project
|
|
1123
|
+
for item_data in items_data:
|
|
1124
|
+
trace_id = handle_trace_reference(item_data)
|
|
1125
|
+
if trace_id and trace_id in trace_to_project_map:
|
|
1126
|
+
project_for_logs = trace_to_project_map[trace_id]
|
|
1127
|
+
debug_print(
|
|
1128
|
+
f"Inferred project name '{project_for_logs}' from trace files",
|
|
1129
|
+
debug,
|
|
1130
|
+
)
|
|
1131
|
+
break
|
|
1132
|
+
|
|
1133
|
+
# Default to "default" if still not found
|
|
1134
|
+
if not project_for_logs:
|
|
1135
|
+
project_for_logs = "default"
|
|
1136
|
+
debug_print(
|
|
1137
|
+
"Using default project name (no project found in metadata or trace files)",
|
|
1138
|
+
debug,
|
|
1139
|
+
)
|
|
1140
|
+
|
|
1141
|
+
# Use trace_id_map and dataset_item_id_map to translate IDs (empty dicts if None)
|
|
1142
|
+
# Note: dataset_item_id_map is already a dict (not None) from _build_dataset_item_id_map
|
|
1143
|
+
success = recreate_experiment(
|
|
1144
|
+
client,
|
|
1145
|
+
experiment_data,
|
|
1146
|
+
project_for_logs,
|
|
1147
|
+
trace_id_map or {},
|
|
1148
|
+
dataset_item_id_map,
|
|
1149
|
+
dry_run,
|
|
1150
|
+
debug,
|
|
1151
|
+
)
|
|
1152
|
+
|
|
1153
|
+
if success:
|
|
1154
|
+
imported_count += 1
|
|
1155
|
+
debug_print(f"Imported experiment: {experiment_name}", debug)
|
|
1156
|
+
|
|
1157
|
+
except Exception as e:
|
|
1158
|
+
console.print(
|
|
1159
|
+
f"[red]Error importing experiment from {experiment_file}: {e}[/red]"
|
|
1160
|
+
)
|
|
1161
|
+
error_count += 1
|
|
1162
|
+
continue
|
|
1163
|
+
|
|
1164
|
+
return {
|
|
1165
|
+
"experiments": imported_count,
|
|
1166
|
+
"experiments_skipped": skipped_count,
|
|
1167
|
+
"experiments_errors": error_count,
|
|
1168
|
+
"datasets": datasets_stats.get("datasets", 0),
|
|
1169
|
+
"datasets_skipped": datasets_stats.get("datasets_skipped", 0),
|
|
1170
|
+
"datasets_errors": datasets_stats.get("datasets_errors", 0),
|
|
1171
|
+
"prompts": prompts_stats.get("prompts", 0),
|
|
1172
|
+
"prompts_skipped": prompts_stats.get("prompts_skipped", 0),
|
|
1173
|
+
"prompts_errors": prompts_stats.get("prompts_errors", 0),
|
|
1174
|
+
"traces": traces_stats.get("traces", 0),
|
|
1175
|
+
"traces_errors": traces_stats.get("traces_errors", 0),
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
except Exception as e:
|
|
1179
|
+
console.print(f"[red]Error importing experiments: {e}[/red]")
|
|
1180
|
+
return {
|
|
1181
|
+
"experiments": 0,
|
|
1182
|
+
"experiments_skipped": 0,
|
|
1183
|
+
"experiments_errors": 1,
|
|
1184
|
+
"datasets": 0,
|
|
1185
|
+
"datasets_skipped": 0,
|
|
1186
|
+
"datasets_errors": 0,
|
|
1187
|
+
"prompts": 0,
|
|
1188
|
+
"prompts_skipped": 0,
|
|
1189
|
+
"prompts_errors": 0,
|
|
1190
|
+
"traces": 0,
|
|
1191
|
+
"traces_errors": 0,
|
|
1192
|
+
}
|