opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +38 -0
- opik/rest_api/datasets/client.py +249 -148
- opik/rest_api/datasets/raw_client.py +356 -217
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +46 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_version_public.py +5 -0
- opik/rest_api/types/dataset_version_summary.py +5 -0
- opik/rest_api/types/dataset_version_summary_public.py +5 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import datetime
|
|
3
|
-
import logging
|
|
4
3
|
from typing import Any, Dict, List, Optional, Union
|
|
5
4
|
|
|
6
5
|
import opik.api_objects.attachment as attachment
|
|
@@ -14,40 +13,19 @@ from opik.types import (
|
|
|
14
13
|
LLMProvider,
|
|
15
14
|
SpanType,
|
|
16
15
|
)
|
|
17
|
-
from .. import span
|
|
16
|
+
from .. import span
|
|
17
|
+
from ..observation_data import ObservationData
|
|
18
18
|
|
|
19
|
-
LOGGER = logging.getLogger(__name__)
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
# Engineer note:
|
|
23
|
-
#
|
|
24
|
-
# After moving to minimal python version 3.10, a lot of common content
|
|
25
|
-
# from SpanData and TraceData can be moved to ObservationData parent dataclass.
|
|
26
|
-
# Before that it's impossible because of the dataclasses limitation to have optional arguments
|
|
27
|
-
# strictly after positional ones (including the attributes from the parent class).
|
|
28
|
-
# In python 3.10 @dataclass(kw_only=True) should help.
|
|
29
20
|
@dataclasses.dataclass
|
|
30
|
-
class TraceData:
|
|
21
|
+
class TraceData(ObservationData):
|
|
31
22
|
"""
|
|
32
23
|
The TraceData object is returned when calling :func:`opik.opik_context.get_current_trace_data` from a tracked function.
|
|
33
24
|
"""
|
|
34
25
|
|
|
35
26
|
id: str = dataclasses.field(default_factory=id_helpers.generate_id)
|
|
36
|
-
name: Optional[str] = None
|
|
37
|
-
start_time: Optional[datetime.datetime] = dataclasses.field(
|
|
38
|
-
default_factory=datetime_helpers.local_timestamp
|
|
39
|
-
)
|
|
40
|
-
end_time: Optional[datetime.datetime] = None
|
|
41
|
-
metadata: Optional[Dict[str, Any]] = None
|
|
42
|
-
input: Optional[Dict[str, Any]] = None
|
|
43
|
-
output: Optional[Dict[str, Any]] = None
|
|
44
|
-
tags: Optional[List[str]] = None
|
|
45
|
-
feedback_scores: Optional[List[FeedbackScoreDict]] = None
|
|
46
|
-
project_name: Optional[str] = None
|
|
47
27
|
created_by: Optional[CreatedByType] = None
|
|
48
|
-
error_info: Optional[ErrorInfoDict] = None
|
|
49
28
|
thread_id: Optional[str] = None
|
|
50
|
-
attachments: Optional[List[attachment.Attachment]] = None
|
|
51
29
|
|
|
52
30
|
def create_child_span_data(
|
|
53
31
|
self,
|
|
@@ -91,55 +69,6 @@ class TraceData:
|
|
|
91
69
|
attachments=attachments,
|
|
92
70
|
)
|
|
93
71
|
|
|
94
|
-
def update(self, **new_data: Any) -> "TraceData":
|
|
95
|
-
for key, value in new_data.items():
|
|
96
|
-
if value is None:
|
|
97
|
-
continue
|
|
98
|
-
|
|
99
|
-
if key not in self.__dict__ and key != "prompts":
|
|
100
|
-
LOGGER.debug(
|
|
101
|
-
"An attempt to update span with parameter name it doesn't have: %s",
|
|
102
|
-
key,
|
|
103
|
-
)
|
|
104
|
-
continue
|
|
105
|
-
|
|
106
|
-
if key == "metadata":
|
|
107
|
-
self.metadata = data_helpers.merge_metadata(
|
|
108
|
-
self.metadata, new_metadata=value
|
|
109
|
-
)
|
|
110
|
-
continue
|
|
111
|
-
elif key == "output":
|
|
112
|
-
self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
|
|
113
|
-
continue
|
|
114
|
-
elif key == "input":
|
|
115
|
-
self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
|
|
116
|
-
continue
|
|
117
|
-
elif key == "attachments":
|
|
118
|
-
self._update_attachments(value)
|
|
119
|
-
continue
|
|
120
|
-
elif key == "tags":
|
|
121
|
-
self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
|
|
122
|
-
continue
|
|
123
|
-
elif key == "prompts":
|
|
124
|
-
self.metadata = data_helpers.merge_metadata(
|
|
125
|
-
self.metadata, new_metadata=new_data.get("metadata"), prompts=value
|
|
126
|
-
)
|
|
127
|
-
continue
|
|
128
|
-
|
|
129
|
-
self.__dict__[key] = value
|
|
130
|
-
|
|
131
|
-
return self
|
|
132
|
-
|
|
133
|
-
def init_end_time(self) -> "TraceData":
|
|
134
|
-
self.end_time = datetime_helpers.local_timestamp()
|
|
135
|
-
return self
|
|
136
|
-
|
|
137
|
-
def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
|
|
138
|
-
if self.attachments is None:
|
|
139
|
-
self.attachments = attachments
|
|
140
|
-
else:
|
|
141
|
-
self.attachments.extend(attachments)
|
|
142
|
-
|
|
143
72
|
@property
|
|
144
73
|
def as_start_parameters(self) -> Dict[str, Any]:
|
|
145
74
|
"""Returns parameters of this trace to be sent to the server when starting a new trace."""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Optional, cast, Union, Dict
|
|
3
3
|
|
|
4
|
-
from ..types import
|
|
4
|
+
from ..types import BatchFeedbackScoreDict
|
|
5
5
|
from ..validation import feedback_score as feedback_score_validator
|
|
6
6
|
from .. import logging_messages, llm_usage
|
|
7
7
|
from opik.types import LLMProvider
|
|
@@ -38,7 +38,7 @@ def validate_and_parse_usage(
|
|
|
38
38
|
|
|
39
39
|
def validate_feedback_score(
|
|
40
40
|
feedback_score: Any, logger: logging.Logger
|
|
41
|
-
) -> Optional[
|
|
41
|
+
) -> Optional[BatchFeedbackScoreDict]:
|
|
42
42
|
feedback_score_validator_ = feedback_score_validator.FeedbackScoreValidator(
|
|
43
43
|
feedback_score
|
|
44
44
|
)
|
|
@@ -51,4 +51,4 @@ def validate_feedback_score(
|
|
|
51
51
|
)
|
|
52
52
|
return None
|
|
53
53
|
|
|
54
|
-
return cast(
|
|
54
|
+
return cast(BatchFeedbackScoreDict, feedback_score)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Download command for Opik CLI."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from .dataset import export_dataset_command
|
|
8
|
+
from .experiment import export_experiment_command
|
|
9
|
+
from .prompt import export_prompt_command
|
|
10
|
+
from .project import export_project_command
|
|
11
|
+
|
|
12
|
+
EXPORT_CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group(
|
|
16
|
+
name="export", context_settings=EXPORT_CONTEXT_SETTINGS, invoke_without_command=True
|
|
17
|
+
)
|
|
18
|
+
@click.argument("workspace", type=str)
|
|
19
|
+
@click.option(
|
|
20
|
+
"--api-key",
|
|
21
|
+
type=str,
|
|
22
|
+
help="Opik API key. If not provided, will use OPIK_API_KEY environment variable or configuration.",
|
|
23
|
+
)
|
|
24
|
+
@click.pass_context
|
|
25
|
+
def export_group(ctx: click.Context, workspace: str, api_key: Optional[str]) -> None:
|
|
26
|
+
"""Export data from Opik workspace.
|
|
27
|
+
|
|
28
|
+
This command allows you to export specific data from an Opik workspace to local files.
|
|
29
|
+
Supported data types include datasets, projects, experiments, and prompts.
|
|
30
|
+
|
|
31
|
+
\b
|
|
32
|
+
General Usage:
|
|
33
|
+
opik export WORKSPACE ITEM NAME [OPTIONS]
|
|
34
|
+
|
|
35
|
+
\b
|
|
36
|
+
Data Types (ITEM):
|
|
37
|
+
dataset Export a dataset by exact name (exports dataset definition and items)
|
|
38
|
+
project Export a project by name or ID (exports project traces and metadata)
|
|
39
|
+
experiment Export an experiment by name or ID (exports experiment configuration and results)
|
|
40
|
+
prompt Export a prompt by exact name (exports prompt templates and versions)
|
|
41
|
+
|
|
42
|
+
\b
|
|
43
|
+
Common Options:
|
|
44
|
+
--path, -p Directory to save exported data (default: opik_exports)
|
|
45
|
+
--format Export format: json or csv (default: json)
|
|
46
|
+
--max-results Maximum number of items to export (varies by data type)
|
|
47
|
+
--force Re-download items even if they already exist locally
|
|
48
|
+
--debug Show detailed information about the export process
|
|
49
|
+
|
|
50
|
+
\b
|
|
51
|
+
Examples:
|
|
52
|
+
# Export a specific dataset
|
|
53
|
+
opik export my-workspace dataset "my-dataset"
|
|
54
|
+
|
|
55
|
+
# Export a project with OQL filter
|
|
56
|
+
opik export my-workspace project "my-project" --filter "status:completed"
|
|
57
|
+
|
|
58
|
+
# Export an experiment with dataset filter (by name or ID)
|
|
59
|
+
opik export my-workspace experiment "my-experiment" --dataset "my-dataset"
|
|
60
|
+
opik export my-workspace experiment "01234567-89ab-cdef-0123-456789abcdef" --dataset "my-dataset"
|
|
61
|
+
|
|
62
|
+
# Export in CSV format to a specific directory
|
|
63
|
+
opik export my-workspace prompt "my-template" --format csv --path ./custom-exports
|
|
64
|
+
"""
|
|
65
|
+
ctx.ensure_object(dict)
|
|
66
|
+
ctx.obj["workspace"] = workspace
|
|
67
|
+
# Use API key from this command or from parent context
|
|
68
|
+
ctx.obj["api_key"] = api_key or (
|
|
69
|
+
ctx.parent.obj.get("api_key") if ctx.parent and ctx.parent.obj else None
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# If no subcommand was invoked, show helpful error
|
|
73
|
+
if ctx.invoked_subcommand is None:
|
|
74
|
+
available_items = ", ".join(
|
|
75
|
+
sorted(["dataset", "experiment", "prompt", "project"])
|
|
76
|
+
)
|
|
77
|
+
click.echo(
|
|
78
|
+
f"Error: Missing ITEM.\n\n"
|
|
79
|
+
f"Available items: {available_items}\n\n"
|
|
80
|
+
f"Usage: opik export {workspace} ITEM NAME [OPTIONS]\n\n"
|
|
81
|
+
f"Examples:\n"
|
|
82
|
+
f' opik export {workspace} dataset "my-dataset"\n'
|
|
83
|
+
f' opik export {workspace} project "my-project"\n'
|
|
84
|
+
f' opik export {workspace} experiment "my-experiment"\n'
|
|
85
|
+
f' opik export {workspace} prompt "my-template"\n\n'
|
|
86
|
+
f"Run 'opik export {workspace} --help' for more information.",
|
|
87
|
+
err=True,
|
|
88
|
+
)
|
|
89
|
+
ctx.exit(2)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# Set subcommand metavar to ITEM instead of COMMAND
|
|
93
|
+
export_group.subcommand_metavar = "ITEM [ARGS]..."
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def format_commands(
|
|
97
|
+
self: click.Group, ctx: click.Context, formatter: click.HelpFormatter
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Override to change 'Commands' heading to 'Items'."""
|
|
100
|
+
commands = []
|
|
101
|
+
for subcommand in self.list_commands(ctx):
|
|
102
|
+
cmd = self.get_command(ctx, subcommand)
|
|
103
|
+
if cmd is None or cmd.hidden:
|
|
104
|
+
continue
|
|
105
|
+
commands.append((subcommand, cmd))
|
|
106
|
+
|
|
107
|
+
if len(commands):
|
|
108
|
+
limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands)
|
|
109
|
+
rows = []
|
|
110
|
+
for subcommand, cmd in commands:
|
|
111
|
+
help = cmd.get_short_help_str(limit)
|
|
112
|
+
rows.append((subcommand, help))
|
|
113
|
+
|
|
114
|
+
if rows:
|
|
115
|
+
with formatter.section("Items"):
|
|
116
|
+
formatter.write_dl(rows)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# Override format_commands method
|
|
120
|
+
setattr(
|
|
121
|
+
export_group,
|
|
122
|
+
"format_commands",
|
|
123
|
+
format_commands.__get__(export_group, type(export_group)),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Add the subcommands
|
|
128
|
+
export_group.add_command(export_dataset_command)
|
|
129
|
+
export_group.add_command(export_experiment_command)
|
|
130
|
+
export_group.add_command(export_prompt_command)
|
|
131
|
+
export_group.add_command(export_project_command)
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""Dataset export functionality."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
import opik
|
|
12
|
+
from .utils import (
|
|
13
|
+
debug_print,
|
|
14
|
+
dataset_to_csv_rows,
|
|
15
|
+
should_skip_file,
|
|
16
|
+
write_csv_data,
|
|
17
|
+
write_json_data,
|
|
18
|
+
print_export_summary,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
console = Console()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def export_single_dataset(
|
|
25
|
+
dataset: opik.Dataset,
|
|
26
|
+
output_dir: Path,
|
|
27
|
+
max_results: Optional[int],
|
|
28
|
+
force: bool,
|
|
29
|
+
debug: bool,
|
|
30
|
+
format: str,
|
|
31
|
+
) -> int:
|
|
32
|
+
"""Export a single dataset."""
|
|
33
|
+
try:
|
|
34
|
+
# Check if already exists and force is not set
|
|
35
|
+
if format.lower() == "csv":
|
|
36
|
+
dataset_file = output_dir / f"dataset_{dataset.name}.csv"
|
|
37
|
+
else:
|
|
38
|
+
dataset_file = output_dir / f"dataset_{dataset.name}.json"
|
|
39
|
+
|
|
40
|
+
if should_skip_file(dataset_file, force):
|
|
41
|
+
if debug:
|
|
42
|
+
debug_print(f"Skipping {dataset.name} (already exists)", debug)
|
|
43
|
+
return 0
|
|
44
|
+
|
|
45
|
+
# Get dataset items
|
|
46
|
+
if debug:
|
|
47
|
+
debug_print(f"Getting items for dataset: {dataset.name}", debug)
|
|
48
|
+
dataset_items = dataset.get_items()
|
|
49
|
+
|
|
50
|
+
# Format items for export
|
|
51
|
+
# Use all fields from each item (datasets can have any user-defined keys/values)
|
|
52
|
+
formatted_items = []
|
|
53
|
+
for item in dataset_items:
|
|
54
|
+
# Create a copy of the item, excluding the 'id' field if present
|
|
55
|
+
# (id is internal and not part of the dataset item content)
|
|
56
|
+
formatted_item = {k: v for k, v in item.items() if k != "id"}
|
|
57
|
+
formatted_items.append(formatted_item)
|
|
58
|
+
|
|
59
|
+
# Create dataset data structure
|
|
60
|
+
dataset_data = {
|
|
61
|
+
"name": dataset.name,
|
|
62
|
+
"description": dataset.description,
|
|
63
|
+
"items": formatted_items,
|
|
64
|
+
"downloaded_at": datetime.now().isoformat(),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Save to file using the appropriate format
|
|
68
|
+
if format.lower() == "csv":
|
|
69
|
+
write_csv_data(dataset_data, dataset_file, dataset_to_csv_rows)
|
|
70
|
+
else:
|
|
71
|
+
write_json_data(dataset_data, dataset_file)
|
|
72
|
+
|
|
73
|
+
if debug:
|
|
74
|
+
debug_print(f"Exported dataset: {dataset.name}", debug)
|
|
75
|
+
return 1
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
console.print(f"[red]Error exporting dataset {dataset.name}: {e}[/red]")
|
|
79
|
+
return 0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def export_dataset_by_name(
|
|
83
|
+
name: str,
|
|
84
|
+
workspace: str,
|
|
85
|
+
output_path: str,
|
|
86
|
+
max_results: Optional[int],
|
|
87
|
+
force: bool,
|
|
88
|
+
debug: bool,
|
|
89
|
+
format: str,
|
|
90
|
+
api_key: Optional[str] = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Export a dataset by exact name."""
|
|
93
|
+
try:
|
|
94
|
+
if debug:
|
|
95
|
+
debug_print(f"Exporting dataset: {name}", debug)
|
|
96
|
+
|
|
97
|
+
# Initialize client
|
|
98
|
+
if api_key:
|
|
99
|
+
client = opik.Opik(api_key=api_key, workspace=workspace)
|
|
100
|
+
else:
|
|
101
|
+
client = opik.Opik(workspace=workspace)
|
|
102
|
+
|
|
103
|
+
# Create output directory
|
|
104
|
+
output_dir = Path(output_path) / workspace / "datasets"
|
|
105
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
if debug:
|
|
108
|
+
debug_print(f"Target directory: {output_dir}", debug)
|
|
109
|
+
|
|
110
|
+
# Try to get dataset by exact name
|
|
111
|
+
try:
|
|
112
|
+
dataset = client.get_dataset(name)
|
|
113
|
+
if debug:
|
|
114
|
+
debug_print(f"Found dataset by direct lookup: {dataset.name}", debug)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
console.print(f"[red]Dataset '{name}' not found: {e}[/red]")
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
|
|
119
|
+
# Export the dataset
|
|
120
|
+
exported_count = export_single_dataset(
|
|
121
|
+
dataset, output_dir, max_results, force, debug, format
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Collect statistics for summary
|
|
125
|
+
stats = {
|
|
126
|
+
"datasets": 1 if exported_count > 0 else 0,
|
|
127
|
+
"datasets_skipped": 0 if exported_count > 0 else 1,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# Show export summary
|
|
131
|
+
print_export_summary(stats, format)
|
|
132
|
+
|
|
133
|
+
if exported_count > 0:
|
|
134
|
+
console.print(
|
|
135
|
+
f"[green]Successfully exported dataset '{name}' to {output_dir}[/green]"
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
console.print(
|
|
139
|
+
f"[yellow]Dataset '{name}' already exists (use --force to re-download)[/yellow]"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
except Exception as e:
|
|
143
|
+
console.print(f"[red]Error exporting dataset: {e}[/red]")
|
|
144
|
+
sys.exit(1)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def export_experiment_datasets(
|
|
148
|
+
client: opik.Opik,
|
|
149
|
+
datasets_to_export: set[str],
|
|
150
|
+
datasets_dir: Path,
|
|
151
|
+
format: str,
|
|
152
|
+
debug: bool,
|
|
153
|
+
force: bool,
|
|
154
|
+
) -> tuple[int, int]:
|
|
155
|
+
"""Export datasets related to an experiment.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
client: Opik client instance
|
|
159
|
+
datasets_to_export: Set of dataset names to export
|
|
160
|
+
datasets_dir: Directory to save datasets
|
|
161
|
+
format: Export format ('json' or 'csv')
|
|
162
|
+
debug: Enable debug output
|
|
163
|
+
force: Re-download datasets even if they already exist locally
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple of (exported_count, skipped_count)
|
|
167
|
+
"""
|
|
168
|
+
exported_count = 0
|
|
169
|
+
skipped_count = 0
|
|
170
|
+
|
|
171
|
+
for dataset_name in datasets_to_export:
|
|
172
|
+
try:
|
|
173
|
+
# Use format parameter to determine file extension
|
|
174
|
+
if format.lower() == "csv":
|
|
175
|
+
dataset_file = datasets_dir / f"dataset_{dataset_name}.csv"
|
|
176
|
+
else:
|
|
177
|
+
dataset_file = datasets_dir / f"dataset_{dataset_name}.json"
|
|
178
|
+
datasets_dir.mkdir(parents=True, exist_ok=True)
|
|
179
|
+
|
|
180
|
+
# Check if file already exists and should be skipped
|
|
181
|
+
if should_skip_file(dataset_file, force):
|
|
182
|
+
if debug:
|
|
183
|
+
debug_print(
|
|
184
|
+
f"Skipping dataset {dataset_name} (already exists)", debug
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
console.print(
|
|
188
|
+
f"[yellow]Skipping dataset: {dataset_name} (already exists)[/yellow]"
|
|
189
|
+
)
|
|
190
|
+
skipped_count += 1
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
dataset_obj = opik.Dataset(
|
|
194
|
+
name=dataset_name,
|
|
195
|
+
description=None, # Description not available from experiment
|
|
196
|
+
rest_client=client.rest_client,
|
|
197
|
+
)
|
|
198
|
+
dataset_items = dataset_obj.get_items()
|
|
199
|
+
|
|
200
|
+
dataset_data = {
|
|
201
|
+
"dataset": {
|
|
202
|
+
"name": dataset_name,
|
|
203
|
+
"id": getattr(dataset_obj, "id", None),
|
|
204
|
+
},
|
|
205
|
+
# Use all fields from each item, excluding 'id' (internal field)
|
|
206
|
+
"items": [
|
|
207
|
+
{k: v for k, v in item.items() if k != "id"}
|
|
208
|
+
for item in dataset_items
|
|
209
|
+
],
|
|
210
|
+
"downloaded_at": datetime.now().isoformat(),
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# Save to file using the appropriate format
|
|
214
|
+
if format.lower() == "csv":
|
|
215
|
+
write_csv_data(dataset_data, dataset_file, dataset_to_csv_rows)
|
|
216
|
+
else:
|
|
217
|
+
write_json_data(dataset_data, dataset_file)
|
|
218
|
+
|
|
219
|
+
console.print(f"[green]Exported dataset: {dataset_name}[/green]")
|
|
220
|
+
exported_count += 1
|
|
221
|
+
except Exception as e:
|
|
222
|
+
if debug:
|
|
223
|
+
console.print(
|
|
224
|
+
f"[yellow]Warning: Could not export dataset {dataset_name}: {e}[/yellow]"
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
console.print(f"[red]Error exporting dataset {dataset_name}: {e}[/red]")
|
|
228
|
+
|
|
229
|
+
return exported_count, skipped_count
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@click.command(name="dataset")
|
|
233
|
+
@click.argument("name", type=str)
|
|
234
|
+
@click.option(
|
|
235
|
+
"--max-results",
|
|
236
|
+
type=int,
|
|
237
|
+
help="Maximum number of datasets to export. Limits the total number of datasets downloaded.",
|
|
238
|
+
)
|
|
239
|
+
@click.option(
|
|
240
|
+
"--path",
|
|
241
|
+
"-p",
|
|
242
|
+
type=click.Path(file_okay=False, dir_okay=True, writable=True),
|
|
243
|
+
default="opik_exports",
|
|
244
|
+
help="Directory to save exported data. Defaults to opik_exports.",
|
|
245
|
+
)
|
|
246
|
+
@click.option(
|
|
247
|
+
"--force",
|
|
248
|
+
is_flag=True,
|
|
249
|
+
help="Re-download items even if they already exist locally.",
|
|
250
|
+
)
|
|
251
|
+
@click.option(
|
|
252
|
+
"--debug",
|
|
253
|
+
is_flag=True,
|
|
254
|
+
help="Enable debug output to show detailed information about the export process.",
|
|
255
|
+
)
|
|
256
|
+
@click.option(
|
|
257
|
+
"--format",
|
|
258
|
+
type=click.Choice(["json", "csv"], case_sensitive=False),
|
|
259
|
+
default="json",
|
|
260
|
+
help="Format for exporting data. Defaults to json.",
|
|
261
|
+
)
|
|
262
|
+
@click.pass_context
|
|
263
|
+
def export_dataset_command(
|
|
264
|
+
ctx: click.Context,
|
|
265
|
+
name: str,
|
|
266
|
+
max_results: Optional[int],
|
|
267
|
+
path: str,
|
|
268
|
+
force: bool,
|
|
269
|
+
debug: bool,
|
|
270
|
+
format: str,
|
|
271
|
+
) -> None:
|
|
272
|
+
"""Export a dataset by exact name to workspace/datasets."""
|
|
273
|
+
# Get workspace and API key from context
|
|
274
|
+
workspace = ctx.obj["workspace"]
|
|
275
|
+
api_key = ctx.obj.get("api_key") if ctx.obj else None
|
|
276
|
+
export_dataset_by_name(
|
|
277
|
+
name, workspace, path, max_results, force, debug, format, api_key
|
|
278
|
+
)
|