opik 1.9.39__py3-none-any.whl โ†’ 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py โ†’ processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py โ†’ processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py โ†’ processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +42 -0
  114. opik/rest_api/datasets/client.py +321 -123
  115. opik/rest_api/datasets/raw_client.py +470 -145
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +50 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset.py +2 -0
  146. opik/rest_api/types/dataset_item.py +1 -1
  147. opik/rest_api/types/dataset_item_batch.py +4 -0
  148. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  149. opik/rest_api/types/dataset_item_compare.py +1 -1
  150. opik/rest_api/types/dataset_item_filter.py +4 -0
  151. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  152. opik/rest_api/types/dataset_item_page_public.py +0 -1
  153. opik/rest_api/types/dataset_item_public.py +1 -1
  154. opik/rest_api/types/dataset_public.py +2 -0
  155. opik/rest_api/types/dataset_version_public.py +10 -0
  156. opik/rest_api/types/dataset_version_summary.py +46 -0
  157. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  158. opik/rest_api/types/experiment.py +9 -0
  159. opik/rest_api/types/experiment_public.py +9 -0
  160. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  161. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  162. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  163. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  164. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  165. opik/rest_api/types/project.py +1 -0
  166. opik/rest_api/types/project_detailed.py +1 -0
  167. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  168. opik/rest_api/types/project_reference.py +31 -0
  169. opik/rest_api/types/project_reference_public.py +31 -0
  170. opik/rest_api/types/project_stats_summary_item.py +1 -0
  171. opik/rest_api/types/prompt_version.py +1 -0
  172. opik/rest_api/types/prompt_version_detail.py +1 -0
  173. opik/rest_api/types/prompt_version_page_public.py +5 -0
  174. opik/rest_api/types/prompt_version_public.py +1 -0
  175. opik/rest_api/types/prompt_version_update.py +33 -0
  176. opik/rest_api/types/provider_api_key.py +5 -1
  177. opik/rest_api/types/provider_api_key_provider.py +2 -1
  178. opik/rest_api/types/provider_api_key_public.py +5 -1
  179. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  180. opik/rest_api/types/service_toggles_config.py +11 -1
  181. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  182. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  183. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  184. opik/types.py +36 -0
  185. opik/validation/chat_prompt_messages.py +241 -0
  186. opik/validation/feedback_score.py +3 -3
  187. opik/validation/validator.py +28 -0
  188. {opik-1.9.39.dist-info โ†’ opik-1.9.86.dist-info}/METADATA +7 -7
  189. {opik-1.9.39.dist-info โ†’ opik-1.9.86.dist-info}/RECORD +193 -142
  190. opik/cli/export.py +0 -791
  191. opik/cli/import_command.py +0 -575
  192. {opik-1.9.39.dist-info โ†’ opik-1.9.86.dist-info}/WHEEL +0 -0
  193. {opik-1.9.39.dist-info โ†’ opik-1.9.86.dist-info}/entry_points.txt +0 -0
  194. {opik-1.9.39.dist-info โ†’ opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  195. {opik-1.9.39.dist-info โ†’ opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,406 @@
1
+ """Common utilities for export functionality."""
2
+
3
+ import csv
4
+ import dataclasses
5
+ import json
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any, Callable, Dict, List, Optional
9
+
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ import opik.dict_utils as dict_utils
14
+ from opik.api_objects.experiment.experiment_item import ExperimentItemContent
15
+
16
+ console = Console()
17
+
18
+
19
+ def matches_name_pattern(name: str, pattern: Optional[str]) -> bool:
20
+ """Check if a name matches the given pattern using simple string matching."""
21
+ if pattern is None:
22
+ return True
23
+ # Simple string matching - check if pattern is contained in name (case-insensitive)
24
+ return pattern.lower() in name.lower()
25
+
26
+
27
+ def serialize_experiment_item(item: ExperimentItemContent) -> Dict[str, Any]:
28
+ """Serialize an ExperimentItemContent dataclass to a dictionary."""
29
+ return dataclasses.asdict(item)
30
+
31
+
32
+ def should_skip_file(file_path: Path, force: bool) -> bool:
33
+ """Check if a file should be skipped based on existence and force flag."""
34
+ return file_path.exists() and not force
35
+
36
+
37
+ def write_csv_data(
38
+ data: Dict[str, Any],
39
+ file_path: Path,
40
+ csv_row_converter_func: Callable[[Dict[str, Any]], List[Dict]],
41
+ ) -> None:
42
+ """Write data to CSV file using the provided row converter function."""
43
+ try:
44
+ csv_rows = csv_row_converter_func(data)
45
+ if not csv_rows:
46
+ raise ValueError(f"CSV row converter returned empty list for {file_path}")
47
+ with open(file_path, "w", newline="", encoding="utf-8") as csv_file_handle:
48
+ csv_fieldnames = list(csv_rows[0].keys())
49
+ csv_writer = csv.DictWriter(csv_file_handle, fieldnames=csv_fieldnames)
50
+ csv_writer.writeheader()
51
+ csv_writer.writerows(csv_rows)
52
+ except Exception as e:
53
+ # Re-raise with more context
54
+ raise RuntimeError(f"Failed to write CSV file {file_path}: {e}") from e
55
+
56
+
57
+ def write_json_data(data: Dict[str, Any], file_path: Path) -> None:
58
+ """Write data to JSON file."""
59
+ with open(file_path, "w", encoding="utf-8") as f:
60
+ json.dump(data, f, indent=2, default=str)
61
+
62
+
63
+ def debug_print(message: str, debug: bool) -> None:
64
+ """Print debug message only if debug is enabled."""
65
+ if debug:
66
+ console.print(f"[blue]{message}[/blue]")
67
+
68
+
69
+ def create_experiment_data_structure(
70
+ experiment: Any, experiment_items: List[ExperimentItemContent]
71
+ ) -> Dict[str, Any]:
72
+ """Create a comprehensive experiment data structure for export."""
73
+ # Get the full experiment data which contains all fields
74
+ experiment_data_obj = experiment.get_experiment_data()
75
+
76
+ return {
77
+ "experiment": {
78
+ "id": experiment.id,
79
+ "name": experiment.name,
80
+ "dataset_name": experiment.dataset_name,
81
+ "metadata": getattr(experiment_data_obj, "metadata", None),
82
+ "type": getattr(experiment_data_obj, "type", None),
83
+ "status": getattr(experiment_data_obj, "status", None),
84
+ "created_at": getattr(experiment_data_obj, "created_at", None),
85
+ "last_updated_at": getattr(experiment_data_obj, "last_updated_at", None),
86
+ "created_by": getattr(experiment_data_obj, "created_by", None),
87
+ "last_updated_by": getattr(experiment_data_obj, "last_updated_by", None),
88
+ "trace_count": getattr(experiment_data_obj, "trace_count", None),
89
+ "total_estimated_cost": getattr(
90
+ experiment_data_obj, "total_estimated_cost", None
91
+ ),
92
+ "total_estimated_cost_avg": getattr(
93
+ experiment_data_obj, "total_estimated_cost_avg", None
94
+ ),
95
+ "usage": getattr(experiment_data_obj, "usage", None),
96
+ "feedback_scores": getattr(experiment_data_obj, "feedback_scores", None),
97
+ "comments": getattr(experiment_data_obj, "comments", None),
98
+ "duration": getattr(experiment_data_obj, "duration", None),
99
+ "prompt_version": getattr(experiment_data_obj, "prompt_version", None),
100
+ "prompt_versions": getattr(experiment_data_obj, "prompt_versions", None),
101
+ },
102
+ "items": [serialize_experiment_item(item) for item in experiment_items],
103
+ "downloaded_at": datetime.now().isoformat(),
104
+ }
105
+
106
+
107
+ def dump_to_file(
108
+ data: dict,
109
+ file_path: Path,
110
+ file_format: str,
111
+ csv_writer: Optional[csv.DictWriter] = None,
112
+ csv_fieldnames: Optional[List[str]] = None,
113
+ data_type: str = "trace",
114
+ ) -> tuple:
115
+ """
116
+ Helper function to dump data to file in the specified format.
117
+
118
+ Args:
119
+ data: The data to dump
120
+ file_path: Path where to save the file
121
+ file_format: Format to use ("json" or "csv")
122
+ csv_writer: Existing CSV writer (for CSV format)
123
+ csv_fieldnames: Existing CSV fieldnames (for CSV format)
124
+ data_type: Type of data ("trace", "dataset", "prompt", "experiment")
125
+
126
+ Returns:
127
+ Tuple of (csv_writer, csv_fieldnames) for CSV format, or (None, None) for JSON
128
+ """
129
+ if file_format.lower() == "csv":
130
+ # Convert to CSV rows based on data type
131
+ if data_type == "trace":
132
+ csv_rows = trace_to_csv_rows(data)
133
+ elif data_type == "dataset":
134
+ csv_rows = dataset_to_csv_rows(data)
135
+ elif data_type == "prompt":
136
+ csv_rows = prompt_to_csv_rows(data)
137
+ elif data_type == "experiment":
138
+ csv_rows = experiment_to_csv_rows(data)
139
+ else:
140
+ # Fallback to trace format for unknown types
141
+ csv_rows = trace_to_csv_rows(data)
142
+
143
+ # Initialize CSV writer if not already done
144
+ if csv_writer is None and csv_rows:
145
+ # Use context manager to ensure file is properly closed
146
+ with open(file_path, "w", newline="", encoding="utf-8") as csv_file_handle:
147
+ csv_fieldnames = list(csv_rows[0].keys())
148
+ csv_writer = csv.DictWriter(csv_file_handle, fieldnames=csv_fieldnames)
149
+ csv_writer.writeheader()
150
+ # Write rows while file is open
151
+ csv_writer.writerows(csv_rows)
152
+
153
+ # File is closed, return None, None
154
+ return None, None
155
+
156
+ # Write rows to existing writer (caller manages file lifecycle)
157
+ if csv_writer and csv_rows:
158
+ csv_writer.writerows(csv_rows)
159
+
160
+ return csv_writer, csv_fieldnames
161
+ else:
162
+ # Save to JSON file
163
+ with open(file_path, "w", encoding="utf-8") as f:
164
+ json.dump(data, f, indent=2, default=str)
165
+
166
+ return None, None
167
+
168
+
169
+ def trace_to_csv_rows(trace_data: dict) -> List[Dict]:
170
+ """Convert trace data to CSV rows format."""
171
+ trace = trace_data["trace"]
172
+ spans = trace_data.get("spans", [])
173
+
174
+ # Flatten trace data with "trace" prefix
175
+ trace_flat = dict_utils.flatten_dict(trace, parent_key="trace", delim="_")
176
+
177
+ # If no spans, create a single row for the trace
178
+ if not spans:
179
+ # Create empty span fields to maintain consistent structure
180
+ span_flat = {f"span_{key}": "" for key in trace.keys()}
181
+ span_flat["span_parent_span_id"] = "" # Special case for parent_span_id
182
+
183
+ # Combine trace and empty span data
184
+ row = {**trace_flat, **span_flat}
185
+ return [row]
186
+
187
+ # Create rows for each span
188
+ rows = []
189
+ for span in spans:
190
+ # Flatten span data with "span" prefix
191
+ span_flat = dict_utils.flatten_dict(span, parent_key="span", delim="_")
192
+
193
+ # Combine trace and span data
194
+ row = {**trace_flat, **span_flat}
195
+ rows.append(row)
196
+
197
+ return rows
198
+
199
+
200
+ def dataset_to_csv_rows(dataset_data: dict) -> List[Dict]:
201
+ """Convert dataset data to CSV rows format."""
202
+ rows = []
203
+
204
+ # Create a row for each dataset item
205
+ items = dataset_data.get("items", [])
206
+ for i, item in enumerate(items):
207
+ # Flatten item data - use all fields from the item
208
+ # (datasets can have any user-defined keys/values)
209
+ item_flat = dict_utils.flatten_dict(
210
+ item, # Use the entire item dict, not just hardcoded fields
211
+ parent_key="item",
212
+ delim="_",
213
+ )
214
+
215
+ # Create row with item data and index
216
+ row = {**item_flat}
217
+ row["item_index"] = i # Add index for ordering
218
+ rows.append(row)
219
+
220
+ return rows
221
+
222
+
223
+ def prompt_to_csv_rows(prompt_data: dict) -> List[Dict]:
224
+ """Convert prompt data to CSV rows format."""
225
+ # Flatten prompt data
226
+ prompt_flat = dict_utils.flatten_dict(prompt_data, parent_key="prompt", delim="_")
227
+
228
+ # Create a single row for the prompt
229
+ return [prompt_flat]
230
+
231
+
232
+ def experiment_to_csv_rows(experiment_data: dict) -> List[Dict]:
233
+ """Convert experiment data to CSV rows format."""
234
+ rows = []
235
+
236
+ # Flatten experiment metadata
237
+ experiment_flat = dict_utils.flatten_dict(
238
+ {
239
+ "id": experiment_data.get("experiment", {}).get("id"),
240
+ "name": experiment_data.get("experiment", {}).get("name"),
241
+ "dataset_name": experiment_data.get("experiment", {}).get("dataset_name"),
242
+ "type": experiment_data.get("experiment", {}).get("type"),
243
+ "status": experiment_data.get("experiment", {}).get("status"),
244
+ "created_at": experiment_data.get("experiment", {}).get("created_at"),
245
+ "last_updated_at": experiment_data.get("experiment", {}).get(
246
+ "last_updated_at"
247
+ ),
248
+ "created_by": experiment_data.get("experiment", {}).get("created_by"),
249
+ "last_updated_by": experiment_data.get("experiment", {}).get(
250
+ "last_updated_by"
251
+ ),
252
+ "trace_count": experiment_data.get("experiment", {}).get("trace_count"),
253
+ "total_estimated_cost": experiment_data.get("experiment", {}).get(
254
+ "total_estimated_cost"
255
+ ),
256
+ "downloaded_at": experiment_data.get("downloaded_at"),
257
+ },
258
+ parent_key="experiment",
259
+ delim="_",
260
+ )
261
+
262
+ # Create a row for each experiment item
263
+ items = experiment_data.get("items", [])
264
+ for i, item in enumerate(items):
265
+ # Flatten item data
266
+ item_flat = dict_utils.flatten_dict(
267
+ {
268
+ "id": item.get("id"),
269
+ "experiment_id": item.get("experiment_id"),
270
+ "dataset_item_id": item.get("dataset_item_id"),
271
+ "trace_id": item.get("trace_id"),
272
+ "input": item.get("input"),
273
+ "output": item.get("output"),
274
+ "feedback_scores": item.get("feedback_scores"),
275
+ "comments": item.get("comments"),
276
+ "total_estimated_cost": item.get("total_estimated_cost"),
277
+ "duration": item.get("duration"),
278
+ "usage": item.get("usage"),
279
+ "created_at": item.get("created_at"),
280
+ "last_updated_at": item.get("last_updated_at"),
281
+ "created_by": item.get("created_by"),
282
+ "last_updated_by": item.get("last_updated_by"),
283
+ "trace_visibility_mode": item.get("trace_visibility_mode"),
284
+ },
285
+ parent_key="item",
286
+ delim="_",
287
+ )
288
+
289
+ # Combine experiment and item data
290
+ row = {**experiment_flat, **item_flat}
291
+ row["item_index"] = i # Add index for ordering
292
+ rows.append(row)
293
+
294
+ # If no items, return just the experiment metadata
295
+ if not items:
296
+ rows.append(experiment_flat)
297
+
298
+ return rows
299
+
300
+
301
+ def print_export_summary(stats: Dict[str, int], format: str = "json") -> None:
302
+ """Print a nice summary table of export statistics."""
303
+ table = Table(
304
+ title="๐Ÿ“Š Export Summary", show_header=True, header_style="bold magenta"
305
+ )
306
+ table.add_column("Type", style="cyan", no_wrap=True)
307
+ table.add_column("Exported", justify="right", style="green")
308
+ table.add_column("Skipped", justify="right", style="yellow")
309
+ table.add_column("Files", style="blue")
310
+
311
+ # Add rows for each type
312
+ if stats.get("experiments", 0) > 0 or stats.get("experiments_skipped", 0) > 0:
313
+ exported = stats.get("experiments", 0)
314
+ skipped = stats.get("experiments_skipped", 0)
315
+ experiment_file_pattern = (
316
+ "experiments_*.csv" if format.lower() == "csv" else "experiment_*.json"
317
+ )
318
+ table.add_row(
319
+ "๐Ÿงช Experiments",
320
+ str(exported),
321
+ str(skipped) if skipped > 0 else "",
322
+ experiment_file_pattern,
323
+ )
324
+
325
+ if stats.get("datasets", 0) > 0 or stats.get("datasets_skipped", 0) > 0:
326
+ exported = stats.get("datasets", 0)
327
+ skipped = stats.get("datasets_skipped", 0)
328
+ dataset_file_pattern = (
329
+ "dataset_*.csv" if format.lower() == "csv" else "dataset_*.json"
330
+ )
331
+ table.add_row(
332
+ "๐Ÿ“Š Datasets",
333
+ str(exported),
334
+ str(skipped) if skipped > 0 else "",
335
+ dataset_file_pattern,
336
+ )
337
+
338
+ if stats.get("traces", 0) > 0 or stats.get("traces_skipped", 0) > 0:
339
+ exported = stats.get("traces", 0)
340
+ skipped = stats.get("traces_skipped", 0)
341
+ trace_file_pattern = (
342
+ "trace_*.csv" if format.lower() == "csv" else "trace_*.json"
343
+ )
344
+ table.add_row(
345
+ "๐Ÿ” Traces",
346
+ str(exported),
347
+ str(skipped) if skipped > 0 else "",
348
+ trace_file_pattern,
349
+ )
350
+
351
+ if stats.get("prompts", 0) > 0 or stats.get("prompts_skipped", 0) > 0:
352
+ exported = stats.get("prompts", 0)
353
+ skipped = stats.get("prompts_skipped", 0)
354
+ prompt_file_pattern = (
355
+ "prompts_*.csv" if format.lower() == "csv" else "prompt_*.json"
356
+ )
357
+ table.add_row(
358
+ "๐Ÿ’ฌ Prompts",
359
+ str(exported),
360
+ str(skipped) if skipped > 0 else "",
361
+ prompt_file_pattern,
362
+ )
363
+
364
+ if stats.get("projects", 0) > 0 or stats.get("projects_skipped", 0) > 0:
365
+ exported = stats.get("projects", 0)
366
+ skipped = stats.get("projects_skipped", 0)
367
+ table.add_row(
368
+ "๐Ÿ“ Projects",
369
+ str(exported),
370
+ str(skipped) if skipped > 0 else "",
371
+ "project directories",
372
+ )
373
+
374
+ # Calculate totals
375
+ total_exported = sum(
376
+ [
377
+ stats.get(key, 0)
378
+ for key in ["experiments", "datasets", "traces", "prompts", "projects"]
379
+ ]
380
+ )
381
+ total_skipped = sum(
382
+ [
383
+ stats.get(key, 0)
384
+ for key in [
385
+ "experiments_skipped",
386
+ "datasets_skipped",
387
+ "traces_skipped",
388
+ "prompts_skipped",
389
+ "projects_skipped",
390
+ ]
391
+ ]
392
+ )
393
+ total_files = total_exported + total_skipped
394
+
395
+ table.add_row("", "", "", "", style="bold")
396
+ table.add_row(
397
+ "๐Ÿ“ฆ Total",
398
+ str(total_exported),
399
+ str(total_skipped) if total_skipped > 0 else "",
400
+ f"{total_files} files",
401
+ style="bold green",
402
+ )
403
+
404
+ console.print()
405
+ console.print(table)
406
+ console.print()
opik/cli/harbor.py ADDED
@@ -0,0 +1,39 @@
1
+ """
2
+ Harbor CLI integration with Opik tracking.
3
+
4
+ Usage:
5
+ opik harbor run -d terminal-bench@head -a terminus_2 -m gpt-4.1
6
+ opik harbor jobs start -c config.yaml
7
+ """
8
+
9
+ import sys
10
+
11
+ import click
12
+
13
+
14
+ @click.command(
15
+ name="harbor",
16
+ context_settings={
17
+ "ignore_unknown_options": True,
18
+ "allow_extra_args": True,
19
+ "allow_interspersed_args": False,
20
+ },
21
+ )
22
+ @click.pass_context
23
+ def harbor(ctx: click.Context) -> None:
24
+ """Run Harbor benchmarks with Opik tracking enabled."""
25
+ try:
26
+ import harbor # noqa: F401
27
+ except ImportError:
28
+ raise click.ClickException(
29
+ "Harbor is not installed. Install with: pip install harbor"
30
+ )
31
+
32
+ from opik.integrations.harbor import track_harbor
33
+
34
+ track_harbor()
35
+
36
+ from harbor.cli.main import app
37
+
38
+ sys.argv = ["harbor"] + ctx.args
39
+ app()