opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +42 -0
- opik/rest_api/datasets/client.py +321 -123
- opik/rest_api/datasets/raw_client.py +470 -145
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +50 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset.py +2 -0
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_public.py +2 -0
- opik/rest_api/types/dataset_version_public.py +10 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
opik/cli/import_command.py
DELETED
|
@@ -1,575 +0,0 @@
|
|
|
1
|
-
"""Upload command for Opik CLI."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import re
|
|
5
|
-
import sys
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import Any, Dict, Optional
|
|
9
|
-
|
|
10
|
-
import click
|
|
11
|
-
from rich.console import Console
|
|
12
|
-
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
13
|
-
|
|
14
|
-
import opik
|
|
15
|
-
from opik.rest_api.core.api_error import ApiError
|
|
16
|
-
from opik.api_objects.trace import trace_data
|
|
17
|
-
from opik.api_objects.span import span_data
|
|
18
|
-
from opik.api_objects.trace.migration import prepare_traces_and_spans_for_copy
|
|
19
|
-
|
|
20
|
-
console = Console()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def _matches_name_pattern(name: str, pattern: Optional[str]) -> bool:
|
|
24
|
-
"""Check if a name matches the given regex pattern."""
|
|
25
|
-
if pattern is None:
|
|
26
|
-
return True
|
|
27
|
-
try:
|
|
28
|
-
return bool(re.search(pattern, name))
|
|
29
|
-
except re.error as e:
|
|
30
|
-
console.print(f"[red]Invalid regex pattern '{pattern}': {e}[/red]")
|
|
31
|
-
return False
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _json_to_trace_data(
|
|
35
|
-
trace_info: Dict[str, Any], project_name: str
|
|
36
|
-
) -> trace_data.TraceData:
|
|
37
|
-
"""Convert JSON trace data to TraceData object."""
|
|
38
|
-
return trace_data.TraceData(
|
|
39
|
-
id=trace_info.get("id", ""),
|
|
40
|
-
name=trace_info.get("name"),
|
|
41
|
-
start_time=(
|
|
42
|
-
datetime.fromisoformat(trace_info["start_time"].replace("Z", "+00:00"))
|
|
43
|
-
if trace_info.get("start_time")
|
|
44
|
-
else None
|
|
45
|
-
),
|
|
46
|
-
end_time=(
|
|
47
|
-
datetime.fromisoformat(trace_info["end_time"].replace("Z", "+00:00"))
|
|
48
|
-
if trace_info.get("end_time")
|
|
49
|
-
else None
|
|
50
|
-
),
|
|
51
|
-
metadata=trace_info.get("metadata"),
|
|
52
|
-
input=trace_info.get("input"),
|
|
53
|
-
output=trace_info.get("output"),
|
|
54
|
-
tags=trace_info.get("tags"),
|
|
55
|
-
feedback_scores=trace_info.get("feedback_scores"),
|
|
56
|
-
project_name=project_name,
|
|
57
|
-
created_by=trace_info.get("created_by"),
|
|
58
|
-
error_info=trace_info.get("error_info"),
|
|
59
|
-
thread_id=trace_info.get("thread_id"),
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _json_to_span_data(
|
|
64
|
-
span_info: Dict[str, Any], project_name: str
|
|
65
|
-
) -> span_data.SpanData:
|
|
66
|
-
"""Convert JSON span data to SpanData object."""
|
|
67
|
-
return span_data.SpanData(
|
|
68
|
-
trace_id=span_info.get("trace_id", ""),
|
|
69
|
-
id=span_info.get("id", ""),
|
|
70
|
-
parent_span_id=span_info.get("parent_span_id"),
|
|
71
|
-
name=span_info.get("name"),
|
|
72
|
-
type=span_info.get("type", "general"),
|
|
73
|
-
start_time=(
|
|
74
|
-
datetime.fromisoformat(span_info["start_time"].replace("Z", "+00:00"))
|
|
75
|
-
if span_info.get("start_time")
|
|
76
|
-
else None
|
|
77
|
-
),
|
|
78
|
-
end_time=(
|
|
79
|
-
datetime.fromisoformat(span_info["end_time"].replace("Z", "+00:00"))
|
|
80
|
-
if span_info.get("end_time")
|
|
81
|
-
else None
|
|
82
|
-
),
|
|
83
|
-
metadata=span_info.get("metadata"),
|
|
84
|
-
input=span_info.get("input"),
|
|
85
|
-
output=span_info.get("output"),
|
|
86
|
-
tags=span_info.get("tags"),
|
|
87
|
-
usage=span_info.get("usage"),
|
|
88
|
-
feedback_scores=span_info.get("feedback_scores"),
|
|
89
|
-
project_name=project_name,
|
|
90
|
-
model=span_info.get("model"),
|
|
91
|
-
provider=span_info.get("provider"),
|
|
92
|
-
error_info=span_info.get("error_info"),
|
|
93
|
-
total_cost=span_info.get("total_cost"),
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def _import_traces(
|
|
98
|
-
client: opik.Opik,
|
|
99
|
-
project_dir: Path,
|
|
100
|
-
dry_run: bool,
|
|
101
|
-
name_pattern: Optional[str] = None,
|
|
102
|
-
) -> int:
|
|
103
|
-
"""Import traces from JSON files."""
|
|
104
|
-
trace_files = list(project_dir.glob("trace_*.json"))
|
|
105
|
-
|
|
106
|
-
if not trace_files:
|
|
107
|
-
console.print(f"[yellow]No trace files found in {project_dir}[/yellow]")
|
|
108
|
-
return 0
|
|
109
|
-
|
|
110
|
-
imported_count = 0
|
|
111
|
-
with Progress(
|
|
112
|
-
SpinnerColumn(),
|
|
113
|
-
TextColumn("[progress.description]{task.description}"),
|
|
114
|
-
console=console,
|
|
115
|
-
) as progress:
|
|
116
|
-
task = progress.add_task("Uploading traces...", total=len(trace_files))
|
|
117
|
-
|
|
118
|
-
for trace_file in trace_files:
|
|
119
|
-
try:
|
|
120
|
-
with open(trace_file, "r", encoding="utf-8") as f:
|
|
121
|
-
trace_data = json.load(f)
|
|
122
|
-
|
|
123
|
-
# Filter by name pattern if specified
|
|
124
|
-
trace_name = trace_data.get("trace", {}).get("name", "")
|
|
125
|
-
if name_pattern and not _matches_name_pattern(trace_name, name_pattern):
|
|
126
|
-
continue
|
|
127
|
-
|
|
128
|
-
if dry_run:
|
|
129
|
-
print(f"Would upload trace: {trace_data['trace']['id']}")
|
|
130
|
-
imported_count += 1
|
|
131
|
-
progress.update(
|
|
132
|
-
task,
|
|
133
|
-
description=f"Imported {imported_count}/{len(trace_files)} traces",
|
|
134
|
-
)
|
|
135
|
-
continue
|
|
136
|
-
|
|
137
|
-
# Extract trace information
|
|
138
|
-
trace_info = trace_data["trace"]
|
|
139
|
-
spans_info = trace_data.get("spans", [])
|
|
140
|
-
|
|
141
|
-
# Convert JSON data to TraceData and SpanData objects
|
|
142
|
-
# Use a temporary project name for the migration logic
|
|
143
|
-
temp_project_name = "temp_import"
|
|
144
|
-
trace_data_obj = _json_to_trace_data(trace_info, temp_project_name)
|
|
145
|
-
|
|
146
|
-
# Convert spans to SpanData objects, setting the correct trace_id
|
|
147
|
-
span_data_objects = []
|
|
148
|
-
for span_info in spans_info:
|
|
149
|
-
span_info["trace_id"] = trace_data_obj.id # Ensure trace_id is set
|
|
150
|
-
span_data_obj = _json_to_span_data(span_info, temp_project_name)
|
|
151
|
-
span_data_objects.append(span_data_obj)
|
|
152
|
-
|
|
153
|
-
# Use the migration logic to prepare traces and spans with new IDs
|
|
154
|
-
# This handles orphan spans, validates parent relationships, and logs issues
|
|
155
|
-
new_trace_data, new_span_data = prepare_traces_and_spans_for_copy(
|
|
156
|
-
destination_project_name=client.project_name or "default",
|
|
157
|
-
traces_data=[trace_data_obj],
|
|
158
|
-
spans_data=span_data_objects,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
# Create the trace using the prepared data
|
|
162
|
-
new_trace = new_trace_data[0]
|
|
163
|
-
trace_obj = client.trace(
|
|
164
|
-
name=new_trace.name,
|
|
165
|
-
start_time=new_trace.start_time,
|
|
166
|
-
end_time=new_trace.end_time,
|
|
167
|
-
input=new_trace.input,
|
|
168
|
-
output=new_trace.output,
|
|
169
|
-
metadata=new_trace.metadata,
|
|
170
|
-
tags=new_trace.tags,
|
|
171
|
-
thread_id=new_trace.thread_id,
|
|
172
|
-
error_info=new_trace.error_info,
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
# Create spans using the prepared data
|
|
176
|
-
for span_data_obj in new_span_data:
|
|
177
|
-
client.span(
|
|
178
|
-
trace_id=trace_obj.id,
|
|
179
|
-
parent_span_id=span_data_obj.parent_span_id,
|
|
180
|
-
name=span_data_obj.name,
|
|
181
|
-
type=span_data_obj.type,
|
|
182
|
-
start_time=span_data_obj.start_time,
|
|
183
|
-
end_time=span_data_obj.end_time,
|
|
184
|
-
input=span_data_obj.input,
|
|
185
|
-
output=span_data_obj.output,
|
|
186
|
-
metadata=span_data_obj.metadata,
|
|
187
|
-
tags=span_data_obj.tags,
|
|
188
|
-
usage=span_data_obj.usage,
|
|
189
|
-
model=span_data_obj.model,
|
|
190
|
-
provider=span_data_obj.provider,
|
|
191
|
-
error_info=span_data_obj.error_info,
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
imported_count += 1
|
|
195
|
-
progress.update(
|
|
196
|
-
task,
|
|
197
|
-
description=f"Imported {imported_count}/{len(trace_files)} traces",
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
except Exception as e:
|
|
201
|
-
console.print(
|
|
202
|
-
f"[red]Error importing trace from {trace_file.name}: {e}[/red]"
|
|
203
|
-
)
|
|
204
|
-
continue
|
|
205
|
-
|
|
206
|
-
return imported_count
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
def _import_datasets(
|
|
210
|
-
client: opik.Opik,
|
|
211
|
-
project_dir: Path,
|
|
212
|
-
dry_run: bool,
|
|
213
|
-
name_pattern: Optional[str] = None,
|
|
214
|
-
) -> int:
|
|
215
|
-
"""Import datasets from JSON files."""
|
|
216
|
-
dataset_files = list(project_dir.glob("dataset_*.json"))
|
|
217
|
-
|
|
218
|
-
if not dataset_files:
|
|
219
|
-
console.print(f"[yellow]No dataset files found in {project_dir}[/yellow]")
|
|
220
|
-
return 0
|
|
221
|
-
|
|
222
|
-
imported_count = 0
|
|
223
|
-
for dataset_file in dataset_files:
|
|
224
|
-
try:
|
|
225
|
-
with open(dataset_file, "r", encoding="utf-8") as f:
|
|
226
|
-
dataset_data = json.load(f)
|
|
227
|
-
|
|
228
|
-
# Filter by name pattern if specified
|
|
229
|
-
dataset_name = dataset_data.get("name", "")
|
|
230
|
-
if name_pattern and not _matches_name_pattern(dataset_name, name_pattern):
|
|
231
|
-
continue
|
|
232
|
-
|
|
233
|
-
if dry_run:
|
|
234
|
-
print(f"Would upload dataset: {dataset_data['name']}")
|
|
235
|
-
imported_count += 1
|
|
236
|
-
continue
|
|
237
|
-
|
|
238
|
-
# Check if dataset already exists
|
|
239
|
-
try:
|
|
240
|
-
client.get_dataset(dataset_data["name"])
|
|
241
|
-
console.print(
|
|
242
|
-
f"[yellow]Dataset '{dataset_data['name']}' already exists, skipping...[/yellow]"
|
|
243
|
-
)
|
|
244
|
-
imported_count += 1
|
|
245
|
-
continue
|
|
246
|
-
except ApiError as e:
|
|
247
|
-
if e.status_code == 404:
|
|
248
|
-
# Dataset doesn't exist, create it
|
|
249
|
-
pass
|
|
250
|
-
else:
|
|
251
|
-
# Re-raise other API errors (network, auth, etc.)
|
|
252
|
-
raise
|
|
253
|
-
|
|
254
|
-
# Create dataset
|
|
255
|
-
dataset = client.create_dataset(
|
|
256
|
-
name=dataset_data["name"], description=dataset_data.get("description")
|
|
257
|
-
)
|
|
258
|
-
|
|
259
|
-
# Insert dataset items
|
|
260
|
-
for item in dataset_data.get("items", []):
|
|
261
|
-
dataset.insert(
|
|
262
|
-
[
|
|
263
|
-
{
|
|
264
|
-
"input": item["input"],
|
|
265
|
-
"expected_output": item["expected_output"],
|
|
266
|
-
"metadata": item.get("metadata"),
|
|
267
|
-
}
|
|
268
|
-
]
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
imported_count += 1
|
|
272
|
-
|
|
273
|
-
except Exception as e:
|
|
274
|
-
console.print(
|
|
275
|
-
f"[red]Error importing dataset from {dataset_file.name}: {e}[/red]"
|
|
276
|
-
)
|
|
277
|
-
continue
|
|
278
|
-
|
|
279
|
-
return imported_count
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def _import_prompts(
|
|
283
|
-
client: opik.Opik,
|
|
284
|
-
project_dir: Path,
|
|
285
|
-
dry_run: bool,
|
|
286
|
-
name_pattern: Optional[str] = None,
|
|
287
|
-
) -> int:
|
|
288
|
-
"""Import prompts from JSON files."""
|
|
289
|
-
prompt_files = list(project_dir.glob("prompt_*.json"))
|
|
290
|
-
|
|
291
|
-
if not prompt_files:
|
|
292
|
-
console.print(f"[yellow]No prompt files found in {project_dir}[/yellow]")
|
|
293
|
-
return 0
|
|
294
|
-
|
|
295
|
-
imported_count = 0
|
|
296
|
-
for prompt_file in prompt_files:
|
|
297
|
-
try:
|
|
298
|
-
with open(prompt_file, "r", encoding="utf-8") as f:
|
|
299
|
-
prompt_data = json.load(f)
|
|
300
|
-
|
|
301
|
-
# Filter by name pattern if specified
|
|
302
|
-
prompt_name = prompt_data.get("name", "")
|
|
303
|
-
if name_pattern and not _matches_name_pattern(prompt_name, name_pattern):
|
|
304
|
-
continue
|
|
305
|
-
|
|
306
|
-
if dry_run:
|
|
307
|
-
print(f"Would upload prompt: {prompt_data['name']}")
|
|
308
|
-
imported_count += 1
|
|
309
|
-
continue
|
|
310
|
-
|
|
311
|
-
# Create prompt
|
|
312
|
-
client.create_prompt(
|
|
313
|
-
name=prompt_data["name"],
|
|
314
|
-
prompt=prompt_data["current_version"]["prompt"],
|
|
315
|
-
metadata=prompt_data["current_version"].get("metadata"),
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
imported_count += 1
|
|
319
|
-
|
|
320
|
-
except Exception as e:
|
|
321
|
-
console.print(
|
|
322
|
-
f"[red]Error importing prompt from {prompt_file.name}: {e}[/red]"
|
|
323
|
-
)
|
|
324
|
-
continue
|
|
325
|
-
|
|
326
|
-
return imported_count
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
@click.command(name="import")
|
|
330
|
-
@click.argument(
|
|
331
|
-
"workspace_folder",
|
|
332
|
-
type=click.Path(file_okay=False, dir_okay=True, readable=True),
|
|
333
|
-
)
|
|
334
|
-
@click.argument("workspace_name", type=str)
|
|
335
|
-
@click.option(
|
|
336
|
-
"--dry-run",
|
|
337
|
-
is_flag=True,
|
|
338
|
-
help="Show what would be imported without actually importing.",
|
|
339
|
-
)
|
|
340
|
-
@click.option(
|
|
341
|
-
"--all",
|
|
342
|
-
is_flag=True,
|
|
343
|
-
help="Include all data types (traces, datasets, prompts).",
|
|
344
|
-
)
|
|
345
|
-
@click.option(
|
|
346
|
-
"--include",
|
|
347
|
-
type=click.Choice(["traces", "datasets", "prompts"], case_sensitive=False),
|
|
348
|
-
multiple=True,
|
|
349
|
-
default=["traces"],
|
|
350
|
-
help="Data types to include in upload. Can be specified multiple times. Defaults to traces only.",
|
|
351
|
-
)
|
|
352
|
-
@click.option(
|
|
353
|
-
"--exclude",
|
|
354
|
-
type=click.Choice(["traces", "datasets", "prompts"], case_sensitive=False),
|
|
355
|
-
multiple=True,
|
|
356
|
-
help="Data types to exclude from upload. Can be specified multiple times.",
|
|
357
|
-
)
|
|
358
|
-
@click.option(
|
|
359
|
-
"--name",
|
|
360
|
-
type=str,
|
|
361
|
-
help="Filter items by name using Python regex patterns. Matches against trace names, dataset names, or prompt names.",
|
|
362
|
-
)
|
|
363
|
-
@click.option(
|
|
364
|
-
"--debug",
|
|
365
|
-
is_flag=True,
|
|
366
|
-
help="Enable debug output to show detailed information about the import process.",
|
|
367
|
-
)
|
|
368
|
-
def import_data(
|
|
369
|
-
workspace_folder: str,
|
|
370
|
-
workspace_name: str,
|
|
371
|
-
dry_run: bool,
|
|
372
|
-
all: bool,
|
|
373
|
-
include: tuple,
|
|
374
|
-
exclude: tuple,
|
|
375
|
-
name: Optional[str],
|
|
376
|
-
debug: bool,
|
|
377
|
-
) -> None:
|
|
378
|
-
"""
|
|
379
|
-
Upload data from local files to a workspace or workspace/project.
|
|
380
|
-
|
|
381
|
-
This command reads data from JSON files in the specified workspace folder
|
|
382
|
-
and imports them to the specified workspace or project.
|
|
383
|
-
|
|
384
|
-
Note: Thread metadata is automatically calculated from traces with the same thread_id,
|
|
385
|
-
so threads don't need to be imported separately.
|
|
386
|
-
|
|
387
|
-
WORKSPACE_FOLDER: Directory containing JSON files to import.
|
|
388
|
-
WORKSPACE_NAME: Either a workspace name (e.g., "my-workspace") to import to all projects,
|
|
389
|
-
or workspace/project (e.g., "my-workspace/my-project") to import to a specific project.
|
|
390
|
-
"""
|
|
391
|
-
try:
|
|
392
|
-
if debug:
|
|
393
|
-
console.print("[blue]DEBUG: Starting import with parameters:[/blue]")
|
|
394
|
-
console.print(f"[blue] workspace_folder: {workspace_folder}[/blue]")
|
|
395
|
-
console.print(f"[blue] workspace_name: {workspace_name}[/blue]")
|
|
396
|
-
console.print(f"[blue] include: {include}[/blue]")
|
|
397
|
-
console.print(f"[blue] debug: {debug}[/blue]")
|
|
398
|
-
|
|
399
|
-
# Parse workspace/project from the argument
|
|
400
|
-
if "/" in workspace_name:
|
|
401
|
-
workspace, project_name = workspace_name.split("/", 1)
|
|
402
|
-
import_to_specific_project = True
|
|
403
|
-
if debug:
|
|
404
|
-
console.print(
|
|
405
|
-
f"[blue]DEBUG: Parsed workspace: {workspace}, project: {project_name}[/blue]"
|
|
406
|
-
)
|
|
407
|
-
else:
|
|
408
|
-
# Only workspace specified - upload to all projects
|
|
409
|
-
workspace = workspace_name
|
|
410
|
-
project_name = None
|
|
411
|
-
import_to_specific_project = False
|
|
412
|
-
if debug:
|
|
413
|
-
console.print(f"[blue]DEBUG: Workspace only: {workspace}[/blue]")
|
|
414
|
-
|
|
415
|
-
# Initialize Opik client with workspace
|
|
416
|
-
if debug:
|
|
417
|
-
console.print(
|
|
418
|
-
f"[blue]DEBUG: Initializing Opik client with workspace: {workspace}[/blue]"
|
|
419
|
-
)
|
|
420
|
-
client = opik.Opik(workspace=workspace)
|
|
421
|
-
|
|
422
|
-
# Use the specified workspace folder directly
|
|
423
|
-
project_dir = Path(workspace_folder)
|
|
424
|
-
|
|
425
|
-
# Determine which data types to upload
|
|
426
|
-
if all:
|
|
427
|
-
# If --all is specified, include all data types
|
|
428
|
-
include_set = {"traces", "datasets", "prompts"}
|
|
429
|
-
else:
|
|
430
|
-
include_set = set(item.lower() for item in include)
|
|
431
|
-
|
|
432
|
-
exclude_set = set(item.lower() for item in exclude)
|
|
433
|
-
|
|
434
|
-
# Apply exclusions
|
|
435
|
-
data_types = include_set - exclude_set
|
|
436
|
-
|
|
437
|
-
if not project_dir.exists():
|
|
438
|
-
console.print(f"[red]Error: Directory not found: {project_dir}[/red]")
|
|
439
|
-
console.print("[yellow]Make sure the path is correct.[/yellow]")
|
|
440
|
-
sys.exit(1)
|
|
441
|
-
|
|
442
|
-
console.print(f"[green]Uploading data from {project_dir}[/green]")
|
|
443
|
-
|
|
444
|
-
if import_to_specific_project:
|
|
445
|
-
console.print(
|
|
446
|
-
f"[blue]Uploading to workspace: {workspace}, project: {project_name}[/blue]"
|
|
447
|
-
)
|
|
448
|
-
else:
|
|
449
|
-
console.print(
|
|
450
|
-
f"[blue]Uploading to workspace: {workspace} (all projects)[/blue]"
|
|
451
|
-
)
|
|
452
|
-
|
|
453
|
-
if debug:
|
|
454
|
-
console.print(f"[blue]Data types: {', '.join(sorted(data_types))}[/blue]")
|
|
455
|
-
|
|
456
|
-
# Note about workspace vs project-specific data
|
|
457
|
-
project_specific = [dt for dt in data_types if dt in ["traces"]]
|
|
458
|
-
workspace_data = [dt for dt in data_types if dt in ["datasets", "prompts"]]
|
|
459
|
-
|
|
460
|
-
if project_specific and workspace_data:
|
|
461
|
-
if import_to_specific_project:
|
|
462
|
-
console.print(
|
|
463
|
-
f"[yellow]Note: {', '.join(project_specific)} will be imported to project '{project_name}', {', '.join(workspace_data)} belong to workspace '{workspace}'[/yellow]"
|
|
464
|
-
)
|
|
465
|
-
else:
|
|
466
|
-
console.print(
|
|
467
|
-
f"[yellow]Note: {', '.join(project_specific)} will be imported to all projects, {', '.join(workspace_data)} belong to workspace '{workspace}'[/yellow]"
|
|
468
|
-
)
|
|
469
|
-
elif workspace_data:
|
|
470
|
-
console.print(
|
|
471
|
-
f"[yellow]Note: {', '.join(workspace_data)} belong to workspace '{workspace}'[/yellow]"
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
if dry_run:
|
|
475
|
-
console.print("[yellow]Dry run mode - no data will be imported[/yellow]")
|
|
476
|
-
|
|
477
|
-
if import_to_specific_project:
|
|
478
|
-
# Upload to specific project
|
|
479
|
-
# Create a new client instance with the specific project name
|
|
480
|
-
assert project_name is not None # Type narrowing for mypy
|
|
481
|
-
client = opik.Opik(workspace=workspace, project_name=project_name)
|
|
482
|
-
|
|
483
|
-
# Upload each data type
|
|
484
|
-
total_imported = 0
|
|
485
|
-
|
|
486
|
-
# Upload traces
|
|
487
|
-
if "traces" in data_types:
|
|
488
|
-
if debug:
|
|
489
|
-
console.print("[blue]Uploading traces...[/blue]")
|
|
490
|
-
traces_imported = _import_traces(client, project_dir, dry_run, name)
|
|
491
|
-
total_imported += traces_imported
|
|
492
|
-
|
|
493
|
-
# Upload datasets
|
|
494
|
-
if "datasets" in data_types:
|
|
495
|
-
if debug:
|
|
496
|
-
console.print("[blue]Uploading datasets...[/blue]")
|
|
497
|
-
datasets_imported = _import_datasets(client, project_dir, dry_run, name)
|
|
498
|
-
total_imported += datasets_imported
|
|
499
|
-
|
|
500
|
-
# Upload prompts
|
|
501
|
-
if "prompts" in data_types:
|
|
502
|
-
if debug:
|
|
503
|
-
console.print("[blue]Uploading prompts...[/blue]")
|
|
504
|
-
prompts_imported = _import_prompts(client, project_dir, dry_run, name)
|
|
505
|
-
total_imported += prompts_imported
|
|
506
|
-
|
|
507
|
-
if dry_run:
|
|
508
|
-
console.print(
|
|
509
|
-
f"[green]Dry run complete: Would import {total_imported} items[/green]"
|
|
510
|
-
)
|
|
511
|
-
else:
|
|
512
|
-
console.print(
|
|
513
|
-
f"[green]Successfully imported {total_imported} items to project '{project_name}'[/green]"
|
|
514
|
-
)
|
|
515
|
-
else:
|
|
516
|
-
# Upload to all projects in workspace
|
|
517
|
-
# Get all projects in the workspace
|
|
518
|
-
try:
|
|
519
|
-
projects_response = client.rest_client.projects.find_projects()
|
|
520
|
-
projects = projects_response.content or []
|
|
521
|
-
|
|
522
|
-
if not projects:
|
|
523
|
-
console.print(
|
|
524
|
-
f"[yellow]No projects found in workspace '{workspace}'[/yellow]"
|
|
525
|
-
)
|
|
526
|
-
return
|
|
527
|
-
|
|
528
|
-
console.print(
|
|
529
|
-
f"[blue]Found {len(projects)} projects in workspace[/blue]"
|
|
530
|
-
)
|
|
531
|
-
|
|
532
|
-
# Upload workspace-level data once (datasets, experiments, prompts)
|
|
533
|
-
total_imported = 0
|
|
534
|
-
|
|
535
|
-
# Upload datasets
|
|
536
|
-
if "datasets" in data_types:
|
|
537
|
-
if debug:
|
|
538
|
-
console.print("[blue]Uploading datasets...[/blue]")
|
|
539
|
-
datasets_imported = _import_datasets(
|
|
540
|
-
client, project_dir, dry_run, name
|
|
541
|
-
)
|
|
542
|
-
total_imported += datasets_imported
|
|
543
|
-
|
|
544
|
-
# Upload prompts
|
|
545
|
-
if "prompts" in data_types:
|
|
546
|
-
if debug:
|
|
547
|
-
console.print("[blue]Uploading prompts...[/blue]")
|
|
548
|
-
prompts_imported = _import_prompts(
|
|
549
|
-
client, project_dir, dry_run, name
|
|
550
|
-
)
|
|
551
|
-
total_imported += prompts_imported
|
|
552
|
-
|
|
553
|
-
# Note: Traces are project-specific and should be imported to a specific project
|
|
554
|
-
# rather than being uploaded to all projects in a workspace
|
|
555
|
-
if "traces" in data_types:
|
|
556
|
-
console.print(
|
|
557
|
-
"[yellow]Note: Traces are project-specific. Use workspace/project format to import traces to a specific project.[/yellow]"
|
|
558
|
-
)
|
|
559
|
-
|
|
560
|
-
if dry_run:
|
|
561
|
-
console.print(
|
|
562
|
-
f"[green]Dry run complete: Would import {total_imported} items to workspace '{workspace}'[/green]"
|
|
563
|
-
)
|
|
564
|
-
else:
|
|
565
|
-
console.print(
|
|
566
|
-
f"[green]Successfully imported {total_imported} items to workspace '{workspace}'[/green]"
|
|
567
|
-
)
|
|
568
|
-
|
|
569
|
-
except Exception as e:
|
|
570
|
-
console.print(f"[red]Error getting projects from workspace: {e}[/red]")
|
|
571
|
-
sys.exit(1)
|
|
572
|
-
|
|
573
|
-
except Exception as e:
|
|
574
|
-
console.print(f"[red]Error: {e}[/red]")
|
|
575
|
-
sys.exit(1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|