opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +42 -0
- opik/rest_api/datasets/client.py +321 -123
- opik/rest_api/datasets/raw_client.py +470 -145
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +50 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset.py +2 -0
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_public.py +2 -0
- opik/rest_api/types/dataset_version_public.py +10 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Opik tracking integration for Harbor benchmark evaluation framework.
|
|
3
|
+
|
|
4
|
+
This module provides the `track_harbor` function to add Opik tracing to Harbor Jobs,
|
|
5
|
+
enabling real-time streaming of trial results to Opik for visualization and analysis.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from opik.integrations.harbor import track_harbor
|
|
9
|
+
>>> from harbor.job import Job
|
|
10
|
+
>>> import os
|
|
11
|
+
>>>
|
|
12
|
+
>>> os.environ["OPIK_PROJECT_NAME"] = "swebench-evaluation"
|
|
13
|
+
>>>
|
|
14
|
+
>>> job = Job(config)
|
|
15
|
+
>>> tracked_job = track_harbor(job)
|
|
16
|
+
>>> result = await tracked_job.run()
|
|
17
|
+
|
|
18
|
+
Or enable tracking globally (for CLI usage):
|
|
19
|
+
>>> from opik.integrations.harbor import track_harbor
|
|
20
|
+
>>> track_harbor()
|
|
21
|
+
>>> # Now run Harbor code - it will be traced
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import functools
|
|
25
|
+
import logging
|
|
26
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
27
|
+
from typing_extensions import override
|
|
28
|
+
|
|
29
|
+
from harbor.job import Job
|
|
30
|
+
from harbor.models.trajectories.step import Step
|
|
31
|
+
from harbor.models.trial.result import TrialResult
|
|
32
|
+
from harbor.models.verifier.result import VerifierResult
|
|
33
|
+
from harbor.trial.trial import Trial
|
|
34
|
+
from harbor.verifier.verifier import Verifier
|
|
35
|
+
|
|
36
|
+
from opik import datetime_helpers, id_helpers, opik_context, track
|
|
37
|
+
from opik.api_objects import opik_client, span
|
|
38
|
+
from opik.decorator import arguments_helpers, base_track_decorator
|
|
39
|
+
from opik.types import FeedbackScoreDict, SpanType
|
|
40
|
+
|
|
41
|
+
from . import experiment_service
|
|
42
|
+
|
|
43
|
+
LOGGER = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class HarborTrialRunDecorator(base_track_decorator.BaseTrackDecorator):
|
|
47
|
+
"""
|
|
48
|
+
Decorator for tracking Harbor Trial.run method.
|
|
49
|
+
|
|
50
|
+
Sets the trace name based on trial configuration before the span/trace
|
|
51
|
+
is sent to the backend.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
@override
|
|
55
|
+
def _start_span_inputs_preprocessor(
|
|
56
|
+
self,
|
|
57
|
+
func: Callable,
|
|
58
|
+
track_options: arguments_helpers.TrackOptions,
|
|
59
|
+
args: Tuple,
|
|
60
|
+
kwargs: Dict[str, Any],
|
|
61
|
+
) -> arguments_helpers.StartSpanParameters:
|
|
62
|
+
"""Extract trial config and set trace name, input, metadata, and tags."""
|
|
63
|
+
# Extract Trial instance from args (Trial.run is an instance method)
|
|
64
|
+
if not args:
|
|
65
|
+
# Fallback if no args (shouldn't happen for instance methods)
|
|
66
|
+
name = (
|
|
67
|
+
track_options.name if track_options.name is not None else func.__name__
|
|
68
|
+
)
|
|
69
|
+
return arguments_helpers.StartSpanParameters(
|
|
70
|
+
name=name,
|
|
71
|
+
input=None,
|
|
72
|
+
type=track_options.type,
|
|
73
|
+
tags=track_options.tags,
|
|
74
|
+
metadata=track_options.metadata,
|
|
75
|
+
project_name=track_options.project_name,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
trial: Trial = args[0]
|
|
79
|
+
config = trial.config
|
|
80
|
+
|
|
81
|
+
# Build trace name from config
|
|
82
|
+
trace_name = f"{config.agent.name}/{config.trial_name}"
|
|
83
|
+
|
|
84
|
+
# Build input dict
|
|
85
|
+
input_dict: Dict[str, Any] = {
|
|
86
|
+
"trial_name": config.trial_name,
|
|
87
|
+
"task": {
|
|
88
|
+
"name": config.task.name
|
|
89
|
+
if hasattr(config.task, "name")
|
|
90
|
+
else str(config.task.path),
|
|
91
|
+
"source": getattr(config.task, "source", None),
|
|
92
|
+
},
|
|
93
|
+
"agent": {
|
|
94
|
+
"name": config.agent.name,
|
|
95
|
+
"model": getattr(config.agent, "model_name", None),
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# Build metadata
|
|
100
|
+
metadata = (
|
|
101
|
+
track_options.metadata.copy() if track_options.metadata is not None else {}
|
|
102
|
+
)
|
|
103
|
+
metadata["created_from"] = "harbor"
|
|
104
|
+
|
|
105
|
+
# Build tags
|
|
106
|
+
tags = track_options.tags if track_options.tags is not None else []
|
|
107
|
+
tags = list(tags) # Make a copy to avoid mutating the original
|
|
108
|
+
if "harbor" not in tags:
|
|
109
|
+
tags.append("harbor")
|
|
110
|
+
if config.agent.name not in tags:
|
|
111
|
+
tags.append(config.agent.name)
|
|
112
|
+
|
|
113
|
+
return arguments_helpers.StartSpanParameters(
|
|
114
|
+
name=trace_name,
|
|
115
|
+
input=input_dict,
|
|
116
|
+
type=track_options.type,
|
|
117
|
+
tags=tags,
|
|
118
|
+
metadata=metadata,
|
|
119
|
+
project_name=track_options.project_name,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
@override
|
|
123
|
+
def _end_span_inputs_preprocessor(
|
|
124
|
+
self,
|
|
125
|
+
output: Any,
|
|
126
|
+
capture_output: bool,
|
|
127
|
+
current_span_data: span.SpanData,
|
|
128
|
+
) -> arguments_helpers.EndSpanParameters:
|
|
129
|
+
"""Process output - minimal implementation since output is handled in _wrap_trial_run."""
|
|
130
|
+
# Output is handled separately in _wrap_trial_run via opik_context.update_current_trace
|
|
131
|
+
# So we don't need to process it here
|
|
132
|
+
return arguments_helpers.EndSpanParameters(output=None)
|
|
133
|
+
|
|
134
|
+
@override
|
|
135
|
+
def _streams_handler(
|
|
136
|
+
self,
|
|
137
|
+
output: Any,
|
|
138
|
+
capture_output: bool,
|
|
139
|
+
generations_aggregator: Optional[Callable[[List[Any]], Any]],
|
|
140
|
+
) -> Optional[Any]:
|
|
141
|
+
"""No stream handling needed for Trial.run."""
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _rewards_to_feedback_scores(
|
|
146
|
+
rewards: Optional[Dict[str, Any]],
|
|
147
|
+
error: Optional[str] = None,
|
|
148
|
+
) -> List[FeedbackScoreDict]:
|
|
149
|
+
"""Convert Harbor verifier rewards to Opik feedback scores."""
|
|
150
|
+
if rewards is None:
|
|
151
|
+
return []
|
|
152
|
+
|
|
153
|
+
feedback_scores: List[FeedbackScoreDict] = []
|
|
154
|
+
for name, value in rewards.items():
|
|
155
|
+
try:
|
|
156
|
+
float_value = float(value)
|
|
157
|
+
|
|
158
|
+
score = FeedbackScoreDict(name=name, value=float_value, reason=error)
|
|
159
|
+
|
|
160
|
+
feedback_scores.append(score)
|
|
161
|
+
except (ValueError, TypeError):
|
|
162
|
+
LOGGER.warning(
|
|
163
|
+
"Could not convert reward value to float: %s=%s", name, value
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return feedback_scores
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _source_to_span_type(source: str) -> SpanType:
|
|
170
|
+
"""Convert ATIF step source to Opik span type."""
|
|
171
|
+
if source == "agent":
|
|
172
|
+
return "llm"
|
|
173
|
+
return "general"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _patch_step_class() -> None:
|
|
177
|
+
"""Patch the Harbor Step class to create Opik spans on instantiation."""
|
|
178
|
+
# Check if already patched
|
|
179
|
+
if hasattr(_patch_step_class, "_patched"):
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
original_init = Step.__init__
|
|
183
|
+
|
|
184
|
+
@functools.wraps(original_init)
|
|
185
|
+
def patched_init(self: Step, *args: Any, **kwargs: Any) -> None:
|
|
186
|
+
original_init(self, *args, **kwargs)
|
|
187
|
+
|
|
188
|
+
trace_data = opik_context.get_current_trace_data()
|
|
189
|
+
if trace_data is None:
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
parent_span = opik_context.get_current_span_data()
|
|
193
|
+
parent_span_id = parent_span.id if parent_span else None
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
client = opik_client.get_client_cached()
|
|
197
|
+
|
|
198
|
+
input_dict: Dict[str, Any] = {}
|
|
199
|
+
if self.message:
|
|
200
|
+
input_dict["message"] = self.message
|
|
201
|
+
if self.tool_calls:
|
|
202
|
+
input_dict["tool_calls"] = [
|
|
203
|
+
{
|
|
204
|
+
"tool_call_id": tc.tool_call_id,
|
|
205
|
+
"function_name": tc.function_name,
|
|
206
|
+
"arguments": tc.arguments,
|
|
207
|
+
}
|
|
208
|
+
for tc in self.tool_calls
|
|
209
|
+
]
|
|
210
|
+
|
|
211
|
+
output_dict: Optional[Dict[str, Any]] = None
|
|
212
|
+
if self.observation and self.observation.results:
|
|
213
|
+
output_dict = {
|
|
214
|
+
"results": [
|
|
215
|
+
{"content": r.content} for r in self.observation.results
|
|
216
|
+
]
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
metadata: Dict[str, Any] = {
|
|
220
|
+
"source": self.source,
|
|
221
|
+
"created_from": "harbor",
|
|
222
|
+
}
|
|
223
|
+
if self.reasoning_content:
|
|
224
|
+
metadata["reasoning"] = self.reasoning_content
|
|
225
|
+
|
|
226
|
+
usage: Optional[Dict[str, Any]] = None
|
|
227
|
+
total_cost: Optional[float] = None
|
|
228
|
+
if self.metrics:
|
|
229
|
+
usage = {}
|
|
230
|
+
if self.metrics.prompt_tokens is not None:
|
|
231
|
+
usage["prompt_tokens"] = self.metrics.prompt_tokens
|
|
232
|
+
if self.metrics.completion_tokens is not None:
|
|
233
|
+
usage["completion_tokens"] = self.metrics.completion_tokens
|
|
234
|
+
if self.metrics.prompt_tokens and self.metrics.completion_tokens:
|
|
235
|
+
usage["total_tokens"] = (
|
|
236
|
+
self.metrics.prompt_tokens + self.metrics.completion_tokens
|
|
237
|
+
)
|
|
238
|
+
if not usage:
|
|
239
|
+
usage = None
|
|
240
|
+
total_cost = getattr(self.metrics, "cost_usd", None)
|
|
241
|
+
|
|
242
|
+
client.span(
|
|
243
|
+
id=id_helpers.generate_id(),
|
|
244
|
+
trace_id=trace_data.id,
|
|
245
|
+
parent_span_id=parent_span_id,
|
|
246
|
+
name=f"step_{self.step_id}",
|
|
247
|
+
type=_source_to_span_type(self.source),
|
|
248
|
+
start_time=datetime_helpers.parse_iso_timestamp(self.timestamp),
|
|
249
|
+
input=input_dict if input_dict else None,
|
|
250
|
+
output=output_dict,
|
|
251
|
+
metadata=metadata,
|
|
252
|
+
usage=usage,
|
|
253
|
+
total_cost=total_cost,
|
|
254
|
+
model=self.model_name if self.source == "agent" else None,
|
|
255
|
+
tags=["harbor", self.source],
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
LOGGER.debug("Failed to create span for step: %s", e)
|
|
260
|
+
|
|
261
|
+
Step.__init__ = patched_init # type: ignore
|
|
262
|
+
setattr(_patch_step_class, "_patched", True)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _enable_harbor_tracking(project_name: Optional[str] = None) -> None:
|
|
266
|
+
"""Internal: Enable Opik tracking for Harbor by patching classes.
|
|
267
|
+
|
|
268
|
+
This patches Harbor's Trial and Verifier classes to add tracing.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
project_name: Opik project name. If None, uses OPIK_PROJECT_NAME env var.
|
|
272
|
+
"""
|
|
273
|
+
# Patch Trial methods (only if not already patched)
|
|
274
|
+
if not hasattr(Trial.run, "opik_tracked"):
|
|
275
|
+
Trial.run = _wrap_trial_run(Trial.run, project_name)
|
|
276
|
+
|
|
277
|
+
if not hasattr(Trial._setup_environment, "opik_tracked"):
|
|
278
|
+
Trial._setup_environment = _wrap_setup_environment(
|
|
279
|
+
Trial._setup_environment, project_name
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if not hasattr(Trial._setup_agent, "opik_tracked"):
|
|
283
|
+
Trial._setup_agent = _wrap_setup_agent(Trial._setup_agent, project_name)
|
|
284
|
+
|
|
285
|
+
if not hasattr(Trial._execute_agent, "opik_tracked"):
|
|
286
|
+
Trial._execute_agent = _wrap_execute_agent(Trial._execute_agent, project_name)
|
|
287
|
+
|
|
288
|
+
if not hasattr(Trial._run_verification, "opik_tracked"):
|
|
289
|
+
Trial._run_verification = _wrap_run_verification(
|
|
290
|
+
Trial._run_verification, project_name
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Patch Verifier (only if not already patched)
|
|
294
|
+
if not hasattr(Verifier.verify, "opik_tracked"):
|
|
295
|
+
Verifier.verify = _wrap_verify(Verifier.verify, project_name)
|
|
296
|
+
|
|
297
|
+
# Patch Step class for real-time step tracking
|
|
298
|
+
_patch_step_class()
|
|
299
|
+
|
|
300
|
+
LOGGER.info("Opik tracking enabled for Harbor")
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def track_harbor(
|
|
304
|
+
job: Optional["Job"] = None,
|
|
305
|
+
project_name: Optional[str] = None,
|
|
306
|
+
) -> Optional["Job"]:
|
|
307
|
+
"""Enable Opik tracking for Harbor.
|
|
308
|
+
|
|
309
|
+
Can be called two ways:
|
|
310
|
+
- track_harbor() - enables global tracking (for CLI usage)
|
|
311
|
+
- track_harbor(job) - wraps a job and enables tracking (for SDK usage)
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
job: Optional Harbor Job instance. If provided, returns the same job.
|
|
315
|
+
project_name: Opik project name. If None, uses OPIK_PROJECT_NAME env var.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
The job instance if provided, None otherwise.
|
|
319
|
+
|
|
320
|
+
Example:
|
|
321
|
+
>>> from opik.integrations.harbor import track_harbor
|
|
322
|
+
>>> job = Job(config)
|
|
323
|
+
>>> tracked_job = track_harbor(job)
|
|
324
|
+
>>> result = await tracked_job.run()
|
|
325
|
+
"""
|
|
326
|
+
_enable_harbor_tracking(project_name=project_name)
|
|
327
|
+
return job
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _wrap_trial_run(original: Callable, project_name: Optional[str]) -> Callable:
|
|
331
|
+
"""Wrap Trial.run with tracing, feedback scores, and experiment linking."""
|
|
332
|
+
|
|
333
|
+
decorator = HarborTrialRunDecorator()
|
|
334
|
+
|
|
335
|
+
@decorator.track(
|
|
336
|
+
tags=["harbor"],
|
|
337
|
+
project_name=project_name,
|
|
338
|
+
capture_output=False,
|
|
339
|
+
)
|
|
340
|
+
@functools.wraps(original)
|
|
341
|
+
async def wrapped(self: Trial) -> TrialResult:
|
|
342
|
+
config = self.config
|
|
343
|
+
|
|
344
|
+
# Lazily setup experiment service if not already done
|
|
345
|
+
# This ensures experiment tracking works for both SDK and CLI modes
|
|
346
|
+
if experiment_service.get_service() is None:
|
|
347
|
+
try:
|
|
348
|
+
# Use job_id for consistent experiment naming
|
|
349
|
+
experiment_name = (
|
|
350
|
+
f"harbor-job-{str(config.job_id)[:8]}" if config.job_id else None
|
|
351
|
+
)
|
|
352
|
+
# Build experiment config with agent/model info
|
|
353
|
+
experiment_config: Dict[str, Any] = {
|
|
354
|
+
"agent_name": config.agent.name,
|
|
355
|
+
}
|
|
356
|
+
model_name = getattr(config.agent, "model_name", None)
|
|
357
|
+
if model_name:
|
|
358
|
+
experiment_config["model_name"] = model_name
|
|
359
|
+
|
|
360
|
+
LOGGER.debug(
|
|
361
|
+
"Lazily setting up experiment service: experiment_name=%s",
|
|
362
|
+
experiment_name,
|
|
363
|
+
)
|
|
364
|
+
experiment_service.setup_lazy(
|
|
365
|
+
experiment_name=experiment_name,
|
|
366
|
+
experiment_config=experiment_config,
|
|
367
|
+
)
|
|
368
|
+
except Exception as e:
|
|
369
|
+
LOGGER.debug("Failed to lazily setup experiment service: %s", e)
|
|
370
|
+
|
|
371
|
+
result: TrialResult = await original(self)
|
|
372
|
+
|
|
373
|
+
# Update trace with output and feedback scores
|
|
374
|
+
output_dict: Dict[str, Any] = {
|
|
375
|
+
"trial_name": result.trial_name,
|
|
376
|
+
"task_name": result.task_name,
|
|
377
|
+
}
|
|
378
|
+
if result.verifier_result and result.verifier_result.rewards:
|
|
379
|
+
output_dict["rewards"] = result.verifier_result.rewards
|
|
380
|
+
|
|
381
|
+
feedback_scores = None
|
|
382
|
+
if result.verifier_result and result.verifier_result.rewards:
|
|
383
|
+
# Get error message if available
|
|
384
|
+
error_msg = getattr(result.verifier_result, "error", None) or getattr(
|
|
385
|
+
result, "error", None
|
|
386
|
+
)
|
|
387
|
+
feedback_scores = _rewards_to_feedback_scores(
|
|
388
|
+
result.verifier_result.rewards, error=error_msg
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
opik_context.update_current_trace(
|
|
392
|
+
output=output_dict,
|
|
393
|
+
feedback_scores=feedback_scores,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Link to experiment
|
|
397
|
+
trace_data = opik_context.get_current_trace_data()
|
|
398
|
+
if trace_data is not None:
|
|
399
|
+
service = experiment_service.get_service()
|
|
400
|
+
LOGGER.debug(
|
|
401
|
+
"Linking trial to experiment: trial=%s, trace_id=%s, service=%s",
|
|
402
|
+
config.trial_name,
|
|
403
|
+
trace_data.id,
|
|
404
|
+
service,
|
|
405
|
+
)
|
|
406
|
+
if service is not None:
|
|
407
|
+
source = getattr(config.task, "source", None)
|
|
408
|
+
task_name = (
|
|
409
|
+
config.task.name
|
|
410
|
+
if hasattr(config.task, "name")
|
|
411
|
+
else str(config.task.path)
|
|
412
|
+
)
|
|
413
|
+
service.link_trial_to_experiment(
|
|
414
|
+
trial_name=config.trial_name,
|
|
415
|
+
trace_id=trace_data.id,
|
|
416
|
+
source=source,
|
|
417
|
+
task_name=task_name,
|
|
418
|
+
)
|
|
419
|
+
else:
|
|
420
|
+
LOGGER.debug(
|
|
421
|
+
"No experiment service available, skipping experiment linking"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
return result
|
|
425
|
+
|
|
426
|
+
return wrapped
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _wrap_setup_environment(
|
|
430
|
+
original: Callable, project_name: Optional[str]
|
|
431
|
+
) -> Callable:
|
|
432
|
+
"""Wrap Trial._setup_environment with tracing."""
|
|
433
|
+
|
|
434
|
+
@track(name="setup_environment", tags=["harbor"], project_name=project_name)
|
|
435
|
+
@functools.wraps(original)
|
|
436
|
+
async def wrapped(self: Trial) -> None:
|
|
437
|
+
opik_context.update_current_span(
|
|
438
|
+
input={"phase": "environment_setup"},
|
|
439
|
+
metadata={"created_from": "harbor"},
|
|
440
|
+
)
|
|
441
|
+
await original(self)
|
|
442
|
+
opik_context.update_current_span(output={"status": "completed"})
|
|
443
|
+
|
|
444
|
+
return wrapped
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _wrap_setup_agent(original: Callable, project_name: Optional[str]) -> Callable:
|
|
448
|
+
"""Wrap Trial._setup_agent with tracing."""
|
|
449
|
+
|
|
450
|
+
@track(name="setup_agent", tags=["harbor"], project_name=project_name)
|
|
451
|
+
@functools.wraps(original)
|
|
452
|
+
async def wrapped(self: Trial) -> None:
|
|
453
|
+
opik_context.update_current_span(
|
|
454
|
+
input={"phase": "agent_setup"},
|
|
455
|
+
metadata={"created_from": "harbor"},
|
|
456
|
+
)
|
|
457
|
+
await original(self)
|
|
458
|
+
opik_context.update_current_span(output={"status": "completed"})
|
|
459
|
+
|
|
460
|
+
return wrapped
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _wrap_execute_agent(original: Callable, project_name: Optional[str]) -> Callable:
|
|
464
|
+
"""Wrap Trial._execute_agent with tracing."""
|
|
465
|
+
|
|
466
|
+
@track(name="execute_agent", tags=["harbor"], project_name=project_name)
|
|
467
|
+
@functools.wraps(original)
|
|
468
|
+
async def wrapped(self: Trial) -> None:
|
|
469
|
+
input_dict = {}
|
|
470
|
+
if hasattr(self, "_task") and self._task:
|
|
471
|
+
input_dict["instruction"] = self._task.instruction
|
|
472
|
+
opik_context.update_current_span(
|
|
473
|
+
input=input_dict,
|
|
474
|
+
metadata={"created_from": "harbor"},
|
|
475
|
+
)
|
|
476
|
+
await original(self)
|
|
477
|
+
opik_context.update_current_span(output={"status": "completed"})
|
|
478
|
+
|
|
479
|
+
return wrapped
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _wrap_run_verification(original: Callable, project_name: Optional[str]) -> Callable:
|
|
483
|
+
"""Wrap Trial._run_verification with tracing."""
|
|
484
|
+
|
|
485
|
+
@track(name="run_verification", tags=["harbor"], project_name=project_name)
|
|
486
|
+
@functools.wraps(original)
|
|
487
|
+
async def wrapped(self: Trial) -> None:
|
|
488
|
+
opik_context.update_current_span(
|
|
489
|
+
input={"phase": "verification"},
|
|
490
|
+
metadata={"created_from": "harbor"},
|
|
491
|
+
)
|
|
492
|
+
await original(self)
|
|
493
|
+
opik_context.update_current_span(output={"status": "completed"})
|
|
494
|
+
|
|
495
|
+
return wrapped
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _wrap_verify(original: Callable, project_name: Optional[str]) -> Callable:
|
|
499
|
+
"""Wrap Verifier.verify with tracing."""
|
|
500
|
+
|
|
501
|
+
@track(name="verify", tags=["harbor"], project_name=project_name)
|
|
502
|
+
@functools.wraps(original)
|
|
503
|
+
async def wrapped(self: Verifier) -> VerifierResult:
|
|
504
|
+
opik_context.update_current_span(
|
|
505
|
+
input={"phase": "verify"},
|
|
506
|
+
metadata={"created_from": "harbor"},
|
|
507
|
+
)
|
|
508
|
+
result: VerifierResult = await original(self)
|
|
509
|
+
|
|
510
|
+
output_dict: Dict[str, Any] = {}
|
|
511
|
+
if result.rewards:
|
|
512
|
+
output_dict["rewards"] = result.rewards
|
|
513
|
+
opik_context.update_current_span(
|
|
514
|
+
output=output_dict if output_dict else {"status": "completed"}
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
return result
|
|
518
|
+
|
|
519
|
+
return wrapped
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def reset_harbor_tracking() -> None:
|
|
523
|
+
"""Reset Harbor tracking state for testing purposes.
|
|
524
|
+
|
|
525
|
+
Resets the experiment service. Method patches remain active
|
|
526
|
+
(they use `opik_tracked` to prevent double-patching).
|
|
527
|
+
"""
|
|
528
|
+
experiment_service.reset()
|
|
@@ -91,12 +91,12 @@ class OpikTracer(tracing.Tracer):
|
|
|
91
91
|
project_name=self._project_name,
|
|
92
92
|
)
|
|
93
93
|
|
|
94
|
-
|
|
94
|
+
result = span_creation_handler.create_span_respecting_context(
|
|
95
95
|
start_span_arguments=start_span_parameters,
|
|
96
96
|
distributed_trace_headers=None,
|
|
97
97
|
)
|
|
98
98
|
final_span_or_trace_data: Union[opik_span.SpanData, opik_trace.TraceData] = (
|
|
99
|
-
trace_data if trace_data is not None else span_data
|
|
99
|
+
result.trace_data if result.trace_data is not None else result.span_data
|
|
100
100
|
)
|
|
101
101
|
|
|
102
102
|
return opik_span_bridge.OpikSpanBridge(final_span_or_trace_data)
|
|
@@ -1,4 +1,17 @@
|
|
|
1
|
-
from .opik_tracer import
|
|
1
|
+
from .opik_tracer import (
|
|
2
|
+
OpikTracer,
|
|
3
|
+
LANGGRAPH_INTERRUPT_OUTPUT_KEY,
|
|
4
|
+
LANGGRAPH_RESUME_INPUT_KEY,
|
|
5
|
+
LANGGRAPH_INTERRUPT_METADATA_KEY,
|
|
6
|
+
)
|
|
2
7
|
from .langgraph_async_context_bridge import extract_current_langgraph_span_data
|
|
8
|
+
from .langgraph_tracer_injector import track_langgraph
|
|
3
9
|
|
|
4
|
-
__all__ = [
|
|
10
|
+
__all__ = [
|
|
11
|
+
"OpikTracer",
|
|
12
|
+
"extract_current_langgraph_span_data",
|
|
13
|
+
"track_langgraph",
|
|
14
|
+
"LANGGRAPH_INTERRUPT_OUTPUT_KEY",
|
|
15
|
+
"LANGGRAPH_RESUME_INPUT_KEY",
|
|
16
|
+
"LANGGRAPH_INTERRUPT_METADATA_KEY",
|
|
17
|
+
]
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, List, TypeVar
|
|
3
|
+
|
|
4
|
+
from langchain_core.runnables import base as runnables_base
|
|
5
|
+
|
|
6
|
+
from . import opik_tracer as opik_tracer_module
|
|
7
|
+
|
|
8
|
+
LOGGER = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
CompiledGraphType = TypeVar("CompiledGraphType", bound=runnables_base.Runnable)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def track_langgraph(
|
|
14
|
+
graph: CompiledGraphType,
|
|
15
|
+
opik_tracer: opik_tracer_module.OpikTracer,
|
|
16
|
+
) -> CompiledGraphType:
|
|
17
|
+
"""
|
|
18
|
+
Adds Opik tracking to a compiled LangGraph graph by injecting OpikTracer into its default config.
|
|
19
|
+
|
|
20
|
+
After calling this function, all subsequent invocations of the graph will automatically
|
|
21
|
+
be tracked without needing to pass the OpikTracer in the config parameter.
|
|
22
|
+
|
|
23
|
+
The function will automatically extract the graph structure visualization from the compiled
|
|
24
|
+
graph if it wasn't already provided when creating the OpikTracer. This visualization will
|
|
25
|
+
be included in the trace metadata in the Opik UI.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
graph: A compiled LangGraph graph (result of StateGraph.compile()).
|
|
29
|
+
opik_tracer: An OpikTracer instance to use for tracking the graph.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
The modified graph with Opik tracking enabled.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
```python
|
|
36
|
+
from langgraph.graph import StateGraph, START, END
|
|
37
|
+
from opik.integrations.langchain import OpikTracer, track_langgraph
|
|
38
|
+
|
|
39
|
+
# Build your graph
|
|
40
|
+
builder = StateGraph(State)
|
|
41
|
+
builder.add_node("my_node", my_node_function)
|
|
42
|
+
builder.add_edge(START, "my_node")
|
|
43
|
+
builder.add_edge("my_node", END)
|
|
44
|
+
|
|
45
|
+
# Compile the graph
|
|
46
|
+
graph = builder.compile()
|
|
47
|
+
|
|
48
|
+
# Create OpikTracer and track the graph once
|
|
49
|
+
# No need to manually extract the graph - it's done automatically!
|
|
50
|
+
opik_tracer = OpikTracer(
|
|
51
|
+
tags=["production"],
|
|
52
|
+
metadata={"version": "1.0"}
|
|
53
|
+
)
|
|
54
|
+
graph = track_langgraph(graph, opik_tracer)
|
|
55
|
+
|
|
56
|
+
# Now all invocations are tracked automatically
|
|
57
|
+
result = graph.invoke({"message": "Hello"})
|
|
58
|
+
# No need to pass config={"callbacks": [opik_tracer]} anymore!
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Note:
|
|
62
|
+
- The graph visualization is automatically extracted and added to trace metadata
|
|
63
|
+
if not already provided in the OpikTracer constructor.
|
|
64
|
+
- If you need to customize the OpikTracer for specific invocations, you can still
|
|
65
|
+
pass it explicitly in the config parameter, which will override the default.
|
|
66
|
+
- The graph object is modified in-place and also returned for convenience.
|
|
67
|
+
- For async invocations using `ainvoke()`, you may still need to use
|
|
68
|
+
`extract_current_langgraph_span_data()` to propagate context to @track-decorated
|
|
69
|
+
functions within async nodes.
|
|
70
|
+
"""
|
|
71
|
+
graph_structure = graph.get_graph(xray=True)
|
|
72
|
+
opik_tracer.set_graph(graph_structure)
|
|
73
|
+
|
|
74
|
+
# Inject the callback into the graph's default config
|
|
75
|
+
config: Dict[str, Any] = getattr(graph, "config", None) or {}
|
|
76
|
+
graph.config = config # type: ignore[attr-defined]
|
|
77
|
+
callbacks: List[Any] = config.setdefault("callbacks", [])
|
|
78
|
+
|
|
79
|
+
if any(isinstance(cb, opik_tracer_module.OpikTracer) for cb in callbacks):
|
|
80
|
+
LOGGER.warning(
|
|
81
|
+
"Graph already has an OpikTracer callback injected. "
|
|
82
|
+
"Skipping re-tracking to avoid duplicate callbacks."
|
|
83
|
+
)
|
|
84
|
+
return graph
|
|
85
|
+
|
|
86
|
+
callbacks.append(opik_tracer)
|
|
87
|
+
|
|
88
|
+
return graph
|