opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +38 -0
  114. opik/rest_api/datasets/client.py +249 -148
  115. opik/rest_api/datasets/raw_client.py +356 -217
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +46 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset_item.py +1 -1
  146. opik/rest_api/types/dataset_item_batch.py +4 -0
  147. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  148. opik/rest_api/types/dataset_item_compare.py +1 -1
  149. opik/rest_api/types/dataset_item_filter.py +4 -0
  150. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  151. opik/rest_api/types/dataset_item_page_public.py +0 -1
  152. opik/rest_api/types/dataset_item_public.py +1 -1
  153. opik/rest_api/types/dataset_version_public.py +5 -0
  154. opik/rest_api/types/dataset_version_summary.py +5 -0
  155. opik/rest_api/types/dataset_version_summary_public.py +5 -0
  156. opik/rest_api/types/experiment.py +9 -0
  157. opik/rest_api/types/experiment_public.py +9 -0
  158. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  159. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  160. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  161. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  162. opik/rest_api/types/project.py +1 -0
  163. opik/rest_api/types/project_detailed.py +1 -0
  164. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  165. opik/rest_api/types/project_reference.py +31 -0
  166. opik/rest_api/types/project_reference_public.py +31 -0
  167. opik/rest_api/types/project_stats_summary_item.py +1 -0
  168. opik/rest_api/types/prompt_version.py +1 -0
  169. opik/rest_api/types/prompt_version_detail.py +1 -0
  170. opik/rest_api/types/prompt_version_page_public.py +5 -0
  171. opik/rest_api/types/prompt_version_public.py +1 -0
  172. opik/rest_api/types/prompt_version_update.py +33 -0
  173. opik/rest_api/types/provider_api_key.py +5 -1
  174. opik/rest_api/types/provider_api_key_provider.py +2 -1
  175. opik/rest_api/types/provider_api_key_public.py +5 -1
  176. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  177. opik/rest_api/types/service_toggles_config.py +11 -1
  178. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  179. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  180. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  181. opik/types.py +36 -0
  182. opik/validation/chat_prompt_messages.py +241 -0
  183. opik/validation/feedback_score.py +3 -3
  184. opik/validation/validator.py +28 -0
  185. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
  186. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
  187. opik/cli/export.py +0 -791
  188. opik/cli/import_command.py +0 -575
  189. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
  190. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
  191. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  192. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import dataclasses
2
2
  import datetime
3
- import logging
4
3
  from typing import Any, Dict, List, Optional, Union
5
4
 
6
5
  import opik.api_objects.attachment as attachment
@@ -14,40 +13,19 @@ from opik.types import (
14
13
  LLMProvider,
15
14
  SpanType,
16
15
  )
17
- from .. import span, data_helpers
16
+ from .. import span
17
+ from ..observation_data import ObservationData
18
18
 
19
- LOGGER = logging.getLogger(__name__)
20
19
 
21
-
22
- # Engineer note:
23
- #
24
- # After moving to minimal python version 3.10, a lot of common content
25
- # from SpanData and TraceData can be moved to ObservationData parent dataclass.
26
- # Before that it's impossible because of the dataclasses limitation to have optional arguments
27
- # strictly after positional ones (including the attributes from the parent class).
28
- # In python 3.10 @dataclass(kw_only=True) should help.
29
20
  @dataclasses.dataclass
30
- class TraceData:
21
+ class TraceData(ObservationData):
31
22
  """
32
23
  The TraceData object is returned when calling :func:`opik.opik_context.get_current_trace_data` from a tracked function.
33
24
  """
34
25
 
35
26
  id: str = dataclasses.field(default_factory=id_helpers.generate_id)
36
- name: Optional[str] = None
37
- start_time: Optional[datetime.datetime] = dataclasses.field(
38
- default_factory=datetime_helpers.local_timestamp
39
- )
40
- end_time: Optional[datetime.datetime] = None
41
- metadata: Optional[Dict[str, Any]] = None
42
- input: Optional[Dict[str, Any]] = None
43
- output: Optional[Dict[str, Any]] = None
44
- tags: Optional[List[str]] = None
45
- feedback_scores: Optional[List[FeedbackScoreDict]] = None
46
- project_name: Optional[str] = None
47
27
  created_by: Optional[CreatedByType] = None
48
- error_info: Optional[ErrorInfoDict] = None
49
28
  thread_id: Optional[str] = None
50
- attachments: Optional[List[attachment.Attachment]] = None
51
29
 
52
30
  def create_child_span_data(
53
31
  self,
@@ -91,55 +69,6 @@ class TraceData:
91
69
  attachments=attachments,
92
70
  )
93
71
 
94
- def update(self, **new_data: Any) -> "TraceData":
95
- for key, value in new_data.items():
96
- if value is None:
97
- continue
98
-
99
- if key not in self.__dict__ and key != "prompts":
100
- LOGGER.debug(
101
- "An attempt to update span with parameter name it doesn't have: %s",
102
- key,
103
- )
104
- continue
105
-
106
- if key == "metadata":
107
- self.metadata = data_helpers.merge_metadata(
108
- self.metadata, new_metadata=value
109
- )
110
- continue
111
- elif key == "output":
112
- self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
113
- continue
114
- elif key == "input":
115
- self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
116
- continue
117
- elif key == "attachments":
118
- self._update_attachments(value)
119
- continue
120
- elif key == "tags":
121
- self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
122
- continue
123
- elif key == "prompts":
124
- self.metadata = data_helpers.merge_metadata(
125
- self.metadata, new_metadata=new_data.get("metadata"), prompts=value
126
- )
127
- continue
128
-
129
- self.__dict__[key] = value
130
-
131
- return self
132
-
133
- def init_end_time(self) -> "TraceData":
134
- self.end_time = datetime_helpers.local_timestamp()
135
- return self
136
-
137
- def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
138
- if self.attachments is None:
139
- self.attachments = attachments
140
- else:
141
- self.attachments.extend(attachments)
142
-
143
72
  @property
144
73
  def as_start_parameters(self) -> Dict[str, Any]:
145
74
  """Returns parameters of this trace to be sent to the server when starting a new trace."""
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from typing import Any, Optional, cast, Union, Dict
3
3
 
4
- from ..types import FeedbackScoreDict
4
+ from ..types import BatchFeedbackScoreDict
5
5
  from ..validation import feedback_score as feedback_score_validator
6
6
  from .. import logging_messages, llm_usage
7
7
  from opik.types import LLMProvider
@@ -38,7 +38,7 @@ def validate_and_parse_usage(
38
38
 
39
39
  def validate_feedback_score(
40
40
  feedback_score: Any, logger: logging.Logger
41
- ) -> Optional[FeedbackScoreDict]:
41
+ ) -> Optional[BatchFeedbackScoreDict]:
42
42
  feedback_score_validator_ = feedback_score_validator.FeedbackScoreValidator(
43
43
  feedback_score
44
44
  )
@@ -51,4 +51,4 @@ def validate_feedback_score(
51
51
  )
52
52
  return None
53
53
 
54
- return cast(FeedbackScoreDict, feedback_score)
54
+ return cast(BatchFeedbackScoreDict, feedback_score)
@@ -0,0 +1,131 @@
1
+ """Download command for Opik CLI."""
2
+
3
+ from typing import Optional
4
+
5
+ import click
6
+
7
+ from .dataset import export_dataset_command
8
+ from .experiment import export_experiment_command
9
+ from .prompt import export_prompt_command
10
+ from .project import export_project_command
11
+
12
+ EXPORT_CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
13
+
14
+
15
+ @click.group(
16
+ name="export", context_settings=EXPORT_CONTEXT_SETTINGS, invoke_without_command=True
17
+ )
18
+ @click.argument("workspace", type=str)
19
+ @click.option(
20
+ "--api-key",
21
+ type=str,
22
+ help="Opik API key. If not provided, will use OPIK_API_KEY environment variable or configuration.",
23
+ )
24
+ @click.pass_context
25
+ def export_group(ctx: click.Context, workspace: str, api_key: Optional[str]) -> None:
26
+ """Export data from Opik workspace.
27
+
28
+ This command allows you to export specific data from an Opik workspace to local files.
29
+ Supported data types include datasets, projects, experiments, and prompts.
30
+
31
+ \b
32
+ General Usage:
33
+ opik export WORKSPACE ITEM NAME [OPTIONS]
34
+
35
+ \b
36
+ Data Types (ITEM):
37
+ dataset Export a dataset by exact name (exports dataset definition and items)
38
+ project Export a project by name or ID (exports project traces and metadata)
39
+ experiment Export an experiment by name or ID (exports experiment configuration and results)
40
+ prompt Export a prompt by exact name (exports prompt templates and versions)
41
+
42
+ \b
43
+ Common Options:
44
+ --path, -p Directory to save exported data (default: opik_exports)
45
+ --format Export format: json or csv (default: json)
46
+ --max-results Maximum number of items to export (varies by data type)
47
+ --force Re-download items even if they already exist locally
48
+ --debug Show detailed information about the export process
49
+
50
+ \b
51
+ Examples:
52
+ # Export a specific dataset
53
+ opik export my-workspace dataset "my-dataset"
54
+
55
+ # Export a project with OQL filter
56
+ opik export my-workspace project "my-project" --filter "status:completed"
57
+
58
+ # Export an experiment with dataset filter (by name or ID)
59
+ opik export my-workspace experiment "my-experiment" --dataset "my-dataset"
60
+ opik export my-workspace experiment "01234567-89ab-cdef-0123-456789abcdef" --dataset "my-dataset"
61
+
62
+ # Export in CSV format to a specific directory
63
+ opik export my-workspace prompt "my-template" --format csv --path ./custom-exports
64
+ """
65
+ ctx.ensure_object(dict)
66
+ ctx.obj["workspace"] = workspace
67
+ # Use API key from this command or from parent context
68
+ ctx.obj["api_key"] = api_key or (
69
+ ctx.parent.obj.get("api_key") if ctx.parent and ctx.parent.obj else None
70
+ )
71
+
72
+ # If no subcommand was invoked, show helpful error
73
+ if ctx.invoked_subcommand is None:
74
+ available_items = ", ".join(
75
+ sorted(["dataset", "experiment", "prompt", "project"])
76
+ )
77
+ click.echo(
78
+ f"Error: Missing ITEM.\n\n"
79
+ f"Available items: {available_items}\n\n"
80
+ f"Usage: opik export {workspace} ITEM NAME [OPTIONS]\n\n"
81
+ f"Examples:\n"
82
+ f' opik export {workspace} dataset "my-dataset"\n'
83
+ f' opik export {workspace} project "my-project"\n'
84
+ f' opik export {workspace} experiment "my-experiment"\n'
85
+ f' opik export {workspace} prompt "my-template"\n\n'
86
+ f"Run 'opik export {workspace} --help' for more information.",
87
+ err=True,
88
+ )
89
+ ctx.exit(2)
90
+
91
+
92
+ # Set subcommand metavar to ITEM instead of COMMAND
93
+ export_group.subcommand_metavar = "ITEM [ARGS]..."
94
+
95
+
96
+ def format_commands(
97
+ self: click.Group, ctx: click.Context, formatter: click.HelpFormatter
98
+ ) -> None:
99
+ """Override to change 'Commands' heading to 'Items'."""
100
+ commands = []
101
+ for subcommand in self.list_commands(ctx):
102
+ cmd = self.get_command(ctx, subcommand)
103
+ if cmd is None or cmd.hidden:
104
+ continue
105
+ commands.append((subcommand, cmd))
106
+
107
+ if len(commands):
108
+ limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands)
109
+ rows = []
110
+ for subcommand, cmd in commands:
111
+ help = cmd.get_short_help_str(limit)
112
+ rows.append((subcommand, help))
113
+
114
+ if rows:
115
+ with formatter.section("Items"):
116
+ formatter.write_dl(rows)
117
+
118
+
119
+ # Override format_commands method
120
+ setattr(
121
+ export_group,
122
+ "format_commands",
123
+ format_commands.__get__(export_group, type(export_group)),
124
+ )
125
+
126
+
127
+ # Add the subcommands
128
+ export_group.add_command(export_dataset_command)
129
+ export_group.add_command(export_experiment_command)
130
+ export_group.add_command(export_prompt_command)
131
+ export_group.add_command(export_project_command)
@@ -0,0 +1,278 @@
1
+ """Dataset export functionality."""
2
+
3
+ import sys
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import click
9
+ from rich.console import Console
10
+
11
+ import opik
12
+ from .utils import (
13
+ debug_print,
14
+ dataset_to_csv_rows,
15
+ should_skip_file,
16
+ write_csv_data,
17
+ write_json_data,
18
+ print_export_summary,
19
+ )
20
+
21
+ console = Console()
22
+
23
+
24
+ def export_single_dataset(
25
+ dataset: opik.Dataset,
26
+ output_dir: Path,
27
+ max_results: Optional[int],
28
+ force: bool,
29
+ debug: bool,
30
+ format: str,
31
+ ) -> int:
32
+ """Export a single dataset."""
33
+ try:
34
+ # Check if already exists and force is not set
35
+ if format.lower() == "csv":
36
+ dataset_file = output_dir / f"dataset_{dataset.name}.csv"
37
+ else:
38
+ dataset_file = output_dir / f"dataset_{dataset.name}.json"
39
+
40
+ if should_skip_file(dataset_file, force):
41
+ if debug:
42
+ debug_print(f"Skipping {dataset.name} (already exists)", debug)
43
+ return 0
44
+
45
+ # Get dataset items
46
+ if debug:
47
+ debug_print(f"Getting items for dataset: {dataset.name}", debug)
48
+ dataset_items = dataset.get_items()
49
+
50
+ # Format items for export
51
+ # Use all fields from each item (datasets can have any user-defined keys/values)
52
+ formatted_items = []
53
+ for item in dataset_items:
54
+ # Create a copy of the item, excluding the 'id' field if present
55
+ # (id is internal and not part of the dataset item content)
56
+ formatted_item = {k: v for k, v in item.items() if k != "id"}
57
+ formatted_items.append(formatted_item)
58
+
59
+ # Create dataset data structure
60
+ dataset_data = {
61
+ "name": dataset.name,
62
+ "description": dataset.description,
63
+ "items": formatted_items,
64
+ "downloaded_at": datetime.now().isoformat(),
65
+ }
66
+
67
+ # Save to file using the appropriate format
68
+ if format.lower() == "csv":
69
+ write_csv_data(dataset_data, dataset_file, dataset_to_csv_rows)
70
+ else:
71
+ write_json_data(dataset_data, dataset_file)
72
+
73
+ if debug:
74
+ debug_print(f"Exported dataset: {dataset.name}", debug)
75
+ return 1
76
+
77
+ except Exception as e:
78
+ console.print(f"[red]Error exporting dataset {dataset.name}: {e}[/red]")
79
+ return 0
80
+
81
+
82
+ def export_dataset_by_name(
83
+ name: str,
84
+ workspace: str,
85
+ output_path: str,
86
+ max_results: Optional[int],
87
+ force: bool,
88
+ debug: bool,
89
+ format: str,
90
+ api_key: Optional[str] = None,
91
+ ) -> None:
92
+ """Export a dataset by exact name."""
93
+ try:
94
+ if debug:
95
+ debug_print(f"Exporting dataset: {name}", debug)
96
+
97
+ # Initialize client
98
+ if api_key:
99
+ client = opik.Opik(api_key=api_key, workspace=workspace)
100
+ else:
101
+ client = opik.Opik(workspace=workspace)
102
+
103
+ # Create output directory
104
+ output_dir = Path(output_path) / workspace / "datasets"
105
+ output_dir.mkdir(parents=True, exist_ok=True)
106
+
107
+ if debug:
108
+ debug_print(f"Target directory: {output_dir}", debug)
109
+
110
+ # Try to get dataset by exact name
111
+ try:
112
+ dataset = client.get_dataset(name)
113
+ if debug:
114
+ debug_print(f"Found dataset by direct lookup: {dataset.name}", debug)
115
+ except Exception as e:
116
+ console.print(f"[red]Dataset '{name}' not found: {e}[/red]")
117
+ sys.exit(1)
118
+
119
+ # Export the dataset
120
+ exported_count = export_single_dataset(
121
+ dataset, output_dir, max_results, force, debug, format
122
+ )
123
+
124
+ # Collect statistics for summary
125
+ stats = {
126
+ "datasets": 1 if exported_count > 0 else 0,
127
+ "datasets_skipped": 0 if exported_count > 0 else 1,
128
+ }
129
+
130
+ # Show export summary
131
+ print_export_summary(stats, format)
132
+
133
+ if exported_count > 0:
134
+ console.print(
135
+ f"[green]Successfully exported dataset '{name}' to {output_dir}[/green]"
136
+ )
137
+ else:
138
+ console.print(
139
+ f"[yellow]Dataset '{name}' already exists (use --force to re-download)[/yellow]"
140
+ )
141
+
142
+ except Exception as e:
143
+ console.print(f"[red]Error exporting dataset: {e}[/red]")
144
+ sys.exit(1)
145
+
146
+
147
+ def export_experiment_datasets(
148
+ client: opik.Opik,
149
+ datasets_to_export: set[str],
150
+ datasets_dir: Path,
151
+ format: str,
152
+ debug: bool,
153
+ force: bool,
154
+ ) -> tuple[int, int]:
155
+ """Export datasets related to an experiment.
156
+
157
+ Args:
158
+ client: Opik client instance
159
+ datasets_to_export: Set of dataset names to export
160
+ datasets_dir: Directory to save datasets
161
+ format: Export format ('json' or 'csv')
162
+ debug: Enable debug output
163
+ force: Re-download datasets even if they already exist locally
164
+
165
+ Returns:
166
+ Tuple of (exported_count, skipped_count)
167
+ """
168
+ exported_count = 0
169
+ skipped_count = 0
170
+
171
+ for dataset_name in datasets_to_export:
172
+ try:
173
+ # Use format parameter to determine file extension
174
+ if format.lower() == "csv":
175
+ dataset_file = datasets_dir / f"dataset_{dataset_name}.csv"
176
+ else:
177
+ dataset_file = datasets_dir / f"dataset_{dataset_name}.json"
178
+ datasets_dir.mkdir(parents=True, exist_ok=True)
179
+
180
+ # Check if file already exists and should be skipped
181
+ if should_skip_file(dataset_file, force):
182
+ if debug:
183
+ debug_print(
184
+ f"Skipping dataset {dataset_name} (already exists)", debug
185
+ )
186
+ else:
187
+ console.print(
188
+ f"[yellow]Skipping dataset: {dataset_name} (already exists)[/yellow]"
189
+ )
190
+ skipped_count += 1
191
+ continue
192
+
193
+ dataset_obj = opik.Dataset(
194
+ name=dataset_name,
195
+ description=None, # Description not available from experiment
196
+ rest_client=client.rest_client,
197
+ )
198
+ dataset_items = dataset_obj.get_items()
199
+
200
+ dataset_data = {
201
+ "dataset": {
202
+ "name": dataset_name,
203
+ "id": getattr(dataset_obj, "id", None),
204
+ },
205
+ # Use all fields from each item, excluding 'id' (internal field)
206
+ "items": [
207
+ {k: v for k, v in item.items() if k != "id"}
208
+ for item in dataset_items
209
+ ],
210
+ "downloaded_at": datetime.now().isoformat(),
211
+ }
212
+
213
+ # Save to file using the appropriate format
214
+ if format.lower() == "csv":
215
+ write_csv_data(dataset_data, dataset_file, dataset_to_csv_rows)
216
+ else:
217
+ write_json_data(dataset_data, dataset_file)
218
+
219
+ console.print(f"[green]Exported dataset: {dataset_name}[/green]")
220
+ exported_count += 1
221
+ except Exception as e:
222
+ if debug:
223
+ console.print(
224
+ f"[yellow]Warning: Could not export dataset {dataset_name}: {e}[/yellow]"
225
+ )
226
+ else:
227
+ console.print(f"[red]Error exporting dataset {dataset_name}: {e}[/red]")
228
+
229
+ return exported_count, skipped_count
230
+
231
+
232
+ @click.command(name="dataset")
233
+ @click.argument("name", type=str)
234
+ @click.option(
235
+ "--max-results",
236
+ type=int,
237
+ help="Maximum number of datasets to export. Limits the total number of datasets downloaded.",
238
+ )
239
+ @click.option(
240
+ "--path",
241
+ "-p",
242
+ type=click.Path(file_okay=False, dir_okay=True, writable=True),
243
+ default="opik_exports",
244
+ help="Directory to save exported data. Defaults to opik_exports.",
245
+ )
246
+ @click.option(
247
+ "--force",
248
+ is_flag=True,
249
+ help="Re-download items even if they already exist locally.",
250
+ )
251
+ @click.option(
252
+ "--debug",
253
+ is_flag=True,
254
+ help="Enable debug output to show detailed information about the export process.",
255
+ )
256
+ @click.option(
257
+ "--format",
258
+ type=click.Choice(["json", "csv"], case_sensitive=False),
259
+ default="json",
260
+ help="Format for exporting data. Defaults to json.",
261
+ )
262
+ @click.pass_context
263
+ def export_dataset_command(
264
+ ctx: click.Context,
265
+ name: str,
266
+ max_results: Optional[int],
267
+ path: str,
268
+ force: bool,
269
+ debug: bool,
270
+ format: str,
271
+ ) -> None:
272
+ """Export a dataset by exact name to workspace/datasets."""
273
+ # Get workspace and API key from context
274
+ workspace = ctx.obj["workspace"]
275
+ api_key = ctx.obj.get("api_key") if ctx.obj else None
276
+ export_dataset_by_name(
277
+ name, workspace, path, max_results, force, debug, format, api_key
278
+ )