opik 1.9.41__py3-none-any.whl โ 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py โ processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py โ processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py โ processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +38 -0
- opik/rest_api/datasets/client.py +249 -148
- opik/rest_api/datasets/raw_client.py +356 -217
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +46 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_version_public.py +5 -0
- opik/rest_api/types/dataset_version_summary.py +5 -0
- opik/rest_api/types/dataset_version_summary_public.py +5 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.41.dist-info โ opik-1.9.86.dist-info}/METADATA +5 -5
- {opik-1.9.41.dist-info โ opik-1.9.86.dist-info}/RECORD +190 -141
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.41.dist-info โ opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.41.dist-info โ opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.41.dist-info โ opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.41.dist-info โ opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""Common utilities for export functionality."""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import dataclasses
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
|
|
13
|
+
import opik.dict_utils as dict_utils
|
|
14
|
+
from opik.api_objects.experiment.experiment_item import ExperimentItemContent
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def matches_name_pattern(name: str, pattern: Optional[str]) -> bool:
|
|
20
|
+
"""Check if a name matches the given pattern using simple string matching."""
|
|
21
|
+
if pattern is None:
|
|
22
|
+
return True
|
|
23
|
+
# Simple string matching - check if pattern is contained in name (case-insensitive)
|
|
24
|
+
return pattern.lower() in name.lower()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def serialize_experiment_item(item: ExperimentItemContent) -> Dict[str, Any]:
|
|
28
|
+
"""Serialize an ExperimentItemContent dataclass to a dictionary."""
|
|
29
|
+
return dataclasses.asdict(item)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def should_skip_file(file_path: Path, force: bool) -> bool:
|
|
33
|
+
"""Check if a file should be skipped based on existence and force flag."""
|
|
34
|
+
return file_path.exists() and not force
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def write_csv_data(
|
|
38
|
+
data: Dict[str, Any],
|
|
39
|
+
file_path: Path,
|
|
40
|
+
csv_row_converter_func: Callable[[Dict[str, Any]], List[Dict]],
|
|
41
|
+
) -> None:
|
|
42
|
+
"""Write data to CSV file using the provided row converter function."""
|
|
43
|
+
try:
|
|
44
|
+
csv_rows = csv_row_converter_func(data)
|
|
45
|
+
if not csv_rows:
|
|
46
|
+
raise ValueError(f"CSV row converter returned empty list for {file_path}")
|
|
47
|
+
with open(file_path, "w", newline="", encoding="utf-8") as csv_file_handle:
|
|
48
|
+
csv_fieldnames = list(csv_rows[0].keys())
|
|
49
|
+
csv_writer = csv.DictWriter(csv_file_handle, fieldnames=csv_fieldnames)
|
|
50
|
+
csv_writer.writeheader()
|
|
51
|
+
csv_writer.writerows(csv_rows)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
# Re-raise with more context
|
|
54
|
+
raise RuntimeError(f"Failed to write CSV file {file_path}: {e}") from e
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def write_json_data(data: Dict[str, Any], file_path: Path) -> None:
|
|
58
|
+
"""Write data to JSON file."""
|
|
59
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
60
|
+
json.dump(data, f, indent=2, default=str)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def debug_print(message: str, debug: bool) -> None:
|
|
64
|
+
"""Print debug message only if debug is enabled."""
|
|
65
|
+
if debug:
|
|
66
|
+
console.print(f"[blue]{message}[/blue]")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def create_experiment_data_structure(
|
|
70
|
+
experiment: Any, experiment_items: List[ExperimentItemContent]
|
|
71
|
+
) -> Dict[str, Any]:
|
|
72
|
+
"""Create a comprehensive experiment data structure for export."""
|
|
73
|
+
# Get the full experiment data which contains all fields
|
|
74
|
+
experiment_data_obj = experiment.get_experiment_data()
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"experiment": {
|
|
78
|
+
"id": experiment.id,
|
|
79
|
+
"name": experiment.name,
|
|
80
|
+
"dataset_name": experiment.dataset_name,
|
|
81
|
+
"metadata": getattr(experiment_data_obj, "metadata", None),
|
|
82
|
+
"type": getattr(experiment_data_obj, "type", None),
|
|
83
|
+
"status": getattr(experiment_data_obj, "status", None),
|
|
84
|
+
"created_at": getattr(experiment_data_obj, "created_at", None),
|
|
85
|
+
"last_updated_at": getattr(experiment_data_obj, "last_updated_at", None),
|
|
86
|
+
"created_by": getattr(experiment_data_obj, "created_by", None),
|
|
87
|
+
"last_updated_by": getattr(experiment_data_obj, "last_updated_by", None),
|
|
88
|
+
"trace_count": getattr(experiment_data_obj, "trace_count", None),
|
|
89
|
+
"total_estimated_cost": getattr(
|
|
90
|
+
experiment_data_obj, "total_estimated_cost", None
|
|
91
|
+
),
|
|
92
|
+
"total_estimated_cost_avg": getattr(
|
|
93
|
+
experiment_data_obj, "total_estimated_cost_avg", None
|
|
94
|
+
),
|
|
95
|
+
"usage": getattr(experiment_data_obj, "usage", None),
|
|
96
|
+
"feedback_scores": getattr(experiment_data_obj, "feedback_scores", None),
|
|
97
|
+
"comments": getattr(experiment_data_obj, "comments", None),
|
|
98
|
+
"duration": getattr(experiment_data_obj, "duration", None),
|
|
99
|
+
"prompt_version": getattr(experiment_data_obj, "prompt_version", None),
|
|
100
|
+
"prompt_versions": getattr(experiment_data_obj, "prompt_versions", None),
|
|
101
|
+
},
|
|
102
|
+
"items": [serialize_experiment_item(item) for item in experiment_items],
|
|
103
|
+
"downloaded_at": datetime.now().isoformat(),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def dump_to_file(
|
|
108
|
+
data: dict,
|
|
109
|
+
file_path: Path,
|
|
110
|
+
file_format: str,
|
|
111
|
+
csv_writer: Optional[csv.DictWriter] = None,
|
|
112
|
+
csv_fieldnames: Optional[List[str]] = None,
|
|
113
|
+
data_type: str = "trace",
|
|
114
|
+
) -> tuple:
|
|
115
|
+
"""
|
|
116
|
+
Helper function to dump data to file in the specified format.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
data: The data to dump
|
|
120
|
+
file_path: Path where to save the file
|
|
121
|
+
file_format: Format to use ("json" or "csv")
|
|
122
|
+
csv_writer: Existing CSV writer (for CSV format)
|
|
123
|
+
csv_fieldnames: Existing CSV fieldnames (for CSV format)
|
|
124
|
+
data_type: Type of data ("trace", "dataset", "prompt", "experiment")
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Tuple of (csv_writer, csv_fieldnames) for CSV format, or (None, None) for JSON
|
|
128
|
+
"""
|
|
129
|
+
if file_format.lower() == "csv":
|
|
130
|
+
# Convert to CSV rows based on data type
|
|
131
|
+
if data_type == "trace":
|
|
132
|
+
csv_rows = trace_to_csv_rows(data)
|
|
133
|
+
elif data_type == "dataset":
|
|
134
|
+
csv_rows = dataset_to_csv_rows(data)
|
|
135
|
+
elif data_type == "prompt":
|
|
136
|
+
csv_rows = prompt_to_csv_rows(data)
|
|
137
|
+
elif data_type == "experiment":
|
|
138
|
+
csv_rows = experiment_to_csv_rows(data)
|
|
139
|
+
else:
|
|
140
|
+
# Fallback to trace format for unknown types
|
|
141
|
+
csv_rows = trace_to_csv_rows(data)
|
|
142
|
+
|
|
143
|
+
# Initialize CSV writer if not already done
|
|
144
|
+
if csv_writer is None and csv_rows:
|
|
145
|
+
# Use context manager to ensure file is properly closed
|
|
146
|
+
with open(file_path, "w", newline="", encoding="utf-8") as csv_file_handle:
|
|
147
|
+
csv_fieldnames = list(csv_rows[0].keys())
|
|
148
|
+
csv_writer = csv.DictWriter(csv_file_handle, fieldnames=csv_fieldnames)
|
|
149
|
+
csv_writer.writeheader()
|
|
150
|
+
# Write rows while file is open
|
|
151
|
+
csv_writer.writerows(csv_rows)
|
|
152
|
+
|
|
153
|
+
# File is closed, return None, None
|
|
154
|
+
return None, None
|
|
155
|
+
|
|
156
|
+
# Write rows to existing writer (caller manages file lifecycle)
|
|
157
|
+
if csv_writer and csv_rows:
|
|
158
|
+
csv_writer.writerows(csv_rows)
|
|
159
|
+
|
|
160
|
+
return csv_writer, csv_fieldnames
|
|
161
|
+
else:
|
|
162
|
+
# Save to JSON file
|
|
163
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
164
|
+
json.dump(data, f, indent=2, default=str)
|
|
165
|
+
|
|
166
|
+
return None, None
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def trace_to_csv_rows(trace_data: dict) -> List[Dict]:
|
|
170
|
+
"""Convert trace data to CSV rows format."""
|
|
171
|
+
trace = trace_data["trace"]
|
|
172
|
+
spans = trace_data.get("spans", [])
|
|
173
|
+
|
|
174
|
+
# Flatten trace data with "trace" prefix
|
|
175
|
+
trace_flat = dict_utils.flatten_dict(trace, parent_key="trace", delim="_")
|
|
176
|
+
|
|
177
|
+
# If no spans, create a single row for the trace
|
|
178
|
+
if not spans:
|
|
179
|
+
# Create empty span fields to maintain consistent structure
|
|
180
|
+
span_flat = {f"span_{key}": "" for key in trace.keys()}
|
|
181
|
+
span_flat["span_parent_span_id"] = "" # Special case for parent_span_id
|
|
182
|
+
|
|
183
|
+
# Combine trace and empty span data
|
|
184
|
+
row = {**trace_flat, **span_flat}
|
|
185
|
+
return [row]
|
|
186
|
+
|
|
187
|
+
# Create rows for each span
|
|
188
|
+
rows = []
|
|
189
|
+
for span in spans:
|
|
190
|
+
# Flatten span data with "span" prefix
|
|
191
|
+
span_flat = dict_utils.flatten_dict(span, parent_key="span", delim="_")
|
|
192
|
+
|
|
193
|
+
# Combine trace and span data
|
|
194
|
+
row = {**trace_flat, **span_flat}
|
|
195
|
+
rows.append(row)
|
|
196
|
+
|
|
197
|
+
return rows
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def dataset_to_csv_rows(dataset_data: dict) -> List[Dict]:
|
|
201
|
+
"""Convert dataset data to CSV rows format."""
|
|
202
|
+
rows = []
|
|
203
|
+
|
|
204
|
+
# Create a row for each dataset item
|
|
205
|
+
items = dataset_data.get("items", [])
|
|
206
|
+
for i, item in enumerate(items):
|
|
207
|
+
# Flatten item data - use all fields from the item
|
|
208
|
+
# (datasets can have any user-defined keys/values)
|
|
209
|
+
item_flat = dict_utils.flatten_dict(
|
|
210
|
+
item, # Use the entire item dict, not just hardcoded fields
|
|
211
|
+
parent_key="item",
|
|
212
|
+
delim="_",
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Create row with item data and index
|
|
216
|
+
row = {**item_flat}
|
|
217
|
+
row["item_index"] = i # Add index for ordering
|
|
218
|
+
rows.append(row)
|
|
219
|
+
|
|
220
|
+
return rows
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def prompt_to_csv_rows(prompt_data: dict) -> List[Dict]:
|
|
224
|
+
"""Convert prompt data to CSV rows format."""
|
|
225
|
+
# Flatten prompt data
|
|
226
|
+
prompt_flat = dict_utils.flatten_dict(prompt_data, parent_key="prompt", delim="_")
|
|
227
|
+
|
|
228
|
+
# Create a single row for the prompt
|
|
229
|
+
return [prompt_flat]
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def experiment_to_csv_rows(experiment_data: dict) -> List[Dict]:
|
|
233
|
+
"""Convert experiment data to CSV rows format."""
|
|
234
|
+
rows = []
|
|
235
|
+
|
|
236
|
+
# Flatten experiment metadata
|
|
237
|
+
experiment_flat = dict_utils.flatten_dict(
|
|
238
|
+
{
|
|
239
|
+
"id": experiment_data.get("experiment", {}).get("id"),
|
|
240
|
+
"name": experiment_data.get("experiment", {}).get("name"),
|
|
241
|
+
"dataset_name": experiment_data.get("experiment", {}).get("dataset_name"),
|
|
242
|
+
"type": experiment_data.get("experiment", {}).get("type"),
|
|
243
|
+
"status": experiment_data.get("experiment", {}).get("status"),
|
|
244
|
+
"created_at": experiment_data.get("experiment", {}).get("created_at"),
|
|
245
|
+
"last_updated_at": experiment_data.get("experiment", {}).get(
|
|
246
|
+
"last_updated_at"
|
|
247
|
+
),
|
|
248
|
+
"created_by": experiment_data.get("experiment", {}).get("created_by"),
|
|
249
|
+
"last_updated_by": experiment_data.get("experiment", {}).get(
|
|
250
|
+
"last_updated_by"
|
|
251
|
+
),
|
|
252
|
+
"trace_count": experiment_data.get("experiment", {}).get("trace_count"),
|
|
253
|
+
"total_estimated_cost": experiment_data.get("experiment", {}).get(
|
|
254
|
+
"total_estimated_cost"
|
|
255
|
+
),
|
|
256
|
+
"downloaded_at": experiment_data.get("downloaded_at"),
|
|
257
|
+
},
|
|
258
|
+
parent_key="experiment",
|
|
259
|
+
delim="_",
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Create a row for each experiment item
|
|
263
|
+
items = experiment_data.get("items", [])
|
|
264
|
+
for i, item in enumerate(items):
|
|
265
|
+
# Flatten item data
|
|
266
|
+
item_flat = dict_utils.flatten_dict(
|
|
267
|
+
{
|
|
268
|
+
"id": item.get("id"),
|
|
269
|
+
"experiment_id": item.get("experiment_id"),
|
|
270
|
+
"dataset_item_id": item.get("dataset_item_id"),
|
|
271
|
+
"trace_id": item.get("trace_id"),
|
|
272
|
+
"input": item.get("input"),
|
|
273
|
+
"output": item.get("output"),
|
|
274
|
+
"feedback_scores": item.get("feedback_scores"),
|
|
275
|
+
"comments": item.get("comments"),
|
|
276
|
+
"total_estimated_cost": item.get("total_estimated_cost"),
|
|
277
|
+
"duration": item.get("duration"),
|
|
278
|
+
"usage": item.get("usage"),
|
|
279
|
+
"created_at": item.get("created_at"),
|
|
280
|
+
"last_updated_at": item.get("last_updated_at"),
|
|
281
|
+
"created_by": item.get("created_by"),
|
|
282
|
+
"last_updated_by": item.get("last_updated_by"),
|
|
283
|
+
"trace_visibility_mode": item.get("trace_visibility_mode"),
|
|
284
|
+
},
|
|
285
|
+
parent_key="item",
|
|
286
|
+
delim="_",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Combine experiment and item data
|
|
290
|
+
row = {**experiment_flat, **item_flat}
|
|
291
|
+
row["item_index"] = i # Add index for ordering
|
|
292
|
+
rows.append(row)
|
|
293
|
+
|
|
294
|
+
# If no items, return just the experiment metadata
|
|
295
|
+
if not items:
|
|
296
|
+
rows.append(experiment_flat)
|
|
297
|
+
|
|
298
|
+
return rows
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def print_export_summary(stats: Dict[str, int], format: str = "json") -> None:
|
|
302
|
+
"""Print a nice summary table of export statistics."""
|
|
303
|
+
table = Table(
|
|
304
|
+
title="๐ Export Summary", show_header=True, header_style="bold magenta"
|
|
305
|
+
)
|
|
306
|
+
table.add_column("Type", style="cyan", no_wrap=True)
|
|
307
|
+
table.add_column("Exported", justify="right", style="green")
|
|
308
|
+
table.add_column("Skipped", justify="right", style="yellow")
|
|
309
|
+
table.add_column("Files", style="blue")
|
|
310
|
+
|
|
311
|
+
# Add rows for each type
|
|
312
|
+
if stats.get("experiments", 0) > 0 or stats.get("experiments_skipped", 0) > 0:
|
|
313
|
+
exported = stats.get("experiments", 0)
|
|
314
|
+
skipped = stats.get("experiments_skipped", 0)
|
|
315
|
+
experiment_file_pattern = (
|
|
316
|
+
"experiments_*.csv" if format.lower() == "csv" else "experiment_*.json"
|
|
317
|
+
)
|
|
318
|
+
table.add_row(
|
|
319
|
+
"๐งช Experiments",
|
|
320
|
+
str(exported),
|
|
321
|
+
str(skipped) if skipped > 0 else "",
|
|
322
|
+
experiment_file_pattern,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if stats.get("datasets", 0) > 0 or stats.get("datasets_skipped", 0) > 0:
|
|
326
|
+
exported = stats.get("datasets", 0)
|
|
327
|
+
skipped = stats.get("datasets_skipped", 0)
|
|
328
|
+
dataset_file_pattern = (
|
|
329
|
+
"dataset_*.csv" if format.lower() == "csv" else "dataset_*.json"
|
|
330
|
+
)
|
|
331
|
+
table.add_row(
|
|
332
|
+
"๐ Datasets",
|
|
333
|
+
str(exported),
|
|
334
|
+
str(skipped) if skipped > 0 else "",
|
|
335
|
+
dataset_file_pattern,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
if stats.get("traces", 0) > 0 or stats.get("traces_skipped", 0) > 0:
|
|
339
|
+
exported = stats.get("traces", 0)
|
|
340
|
+
skipped = stats.get("traces_skipped", 0)
|
|
341
|
+
trace_file_pattern = (
|
|
342
|
+
"trace_*.csv" if format.lower() == "csv" else "trace_*.json"
|
|
343
|
+
)
|
|
344
|
+
table.add_row(
|
|
345
|
+
"๐ Traces",
|
|
346
|
+
str(exported),
|
|
347
|
+
str(skipped) if skipped > 0 else "",
|
|
348
|
+
trace_file_pattern,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if stats.get("prompts", 0) > 0 or stats.get("prompts_skipped", 0) > 0:
|
|
352
|
+
exported = stats.get("prompts", 0)
|
|
353
|
+
skipped = stats.get("prompts_skipped", 0)
|
|
354
|
+
prompt_file_pattern = (
|
|
355
|
+
"prompts_*.csv" if format.lower() == "csv" else "prompt_*.json"
|
|
356
|
+
)
|
|
357
|
+
table.add_row(
|
|
358
|
+
"๐ฌ Prompts",
|
|
359
|
+
str(exported),
|
|
360
|
+
str(skipped) if skipped > 0 else "",
|
|
361
|
+
prompt_file_pattern,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
if stats.get("projects", 0) > 0 or stats.get("projects_skipped", 0) > 0:
|
|
365
|
+
exported = stats.get("projects", 0)
|
|
366
|
+
skipped = stats.get("projects_skipped", 0)
|
|
367
|
+
table.add_row(
|
|
368
|
+
"๐ Projects",
|
|
369
|
+
str(exported),
|
|
370
|
+
str(skipped) if skipped > 0 else "",
|
|
371
|
+
"project directories",
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Calculate totals
|
|
375
|
+
total_exported = sum(
|
|
376
|
+
[
|
|
377
|
+
stats.get(key, 0)
|
|
378
|
+
for key in ["experiments", "datasets", "traces", "prompts", "projects"]
|
|
379
|
+
]
|
|
380
|
+
)
|
|
381
|
+
total_skipped = sum(
|
|
382
|
+
[
|
|
383
|
+
stats.get(key, 0)
|
|
384
|
+
for key in [
|
|
385
|
+
"experiments_skipped",
|
|
386
|
+
"datasets_skipped",
|
|
387
|
+
"traces_skipped",
|
|
388
|
+
"prompts_skipped",
|
|
389
|
+
"projects_skipped",
|
|
390
|
+
]
|
|
391
|
+
]
|
|
392
|
+
)
|
|
393
|
+
total_files = total_exported + total_skipped
|
|
394
|
+
|
|
395
|
+
table.add_row("", "", "", "", style="bold")
|
|
396
|
+
table.add_row(
|
|
397
|
+
"๐ฆ Total",
|
|
398
|
+
str(total_exported),
|
|
399
|
+
str(total_skipped) if total_skipped > 0 else "",
|
|
400
|
+
f"{total_files} files",
|
|
401
|
+
style="bold green",
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
console.print()
|
|
405
|
+
console.print(table)
|
|
406
|
+
console.print()
|
opik/cli/harbor.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Harbor CLI integration with Opik tracking.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
opik harbor run -d terminal-bench@head -a terminus_2 -m gpt-4.1
|
|
6
|
+
opik harbor jobs start -c config.yaml
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.command(
|
|
15
|
+
name="harbor",
|
|
16
|
+
context_settings={
|
|
17
|
+
"ignore_unknown_options": True,
|
|
18
|
+
"allow_extra_args": True,
|
|
19
|
+
"allow_interspersed_args": False,
|
|
20
|
+
},
|
|
21
|
+
)
|
|
22
|
+
@click.pass_context
|
|
23
|
+
def harbor(ctx: click.Context) -> None:
|
|
24
|
+
"""Run Harbor benchmarks with Opik tracking enabled."""
|
|
25
|
+
try:
|
|
26
|
+
import harbor # noqa: F401
|
|
27
|
+
except ImportError:
|
|
28
|
+
raise click.ClickException(
|
|
29
|
+
"Harbor is not installed. Install with: pip install harbor"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
from opik.integrations.harbor import track_harbor
|
|
33
|
+
|
|
34
|
+
track_harbor()
|
|
35
|
+
|
|
36
|
+
from harbor.cli.main import app
|
|
37
|
+
|
|
38
|
+
sys.argv = ["harbor"] + ctx.args
|
|
39
|
+
app()
|