opik 1.9.26__py3-none-any.whl → 1.9.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +10 -3
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +31 -5
- opik/api_objects/experiment/helpers.py +34 -10
- opik/api_objects/local_recording.py +8 -3
- opik/api_objects/opik_client.py +218 -46
- opik/api_objects/opik_query_language.py +9 -0
- opik/api_objects/prompt/__init__.py +11 -3
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +193 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/{chat_content_renderer_registry.py → chat/content_renderer_registry.py} +31 -34
- opik/api_objects/prompt/client.py +101 -30
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/{prompt.py → text/prompt.py} +55 -32
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +8 -5
- opik/cli/export.py +6 -2
- opik/config.py +0 -5
- opik/decorator/base_track_decorator.py +37 -40
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +195 -223
- opik/evaluation/engine/helpers.py +8 -7
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +35 -1
- opik/evaluation/evaluator.py +309 -23
- opik/evaluation/models/litellm/util.py +78 -6
- opik/evaluation/report.py +14 -2
- opik/evaluation/rest_operations.py +6 -9
- opik/evaluation/test_case.py +2 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +17 -0
- opik/id_helpers.py +18 -0
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +3 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/dspy/callback.py +1 -4
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +2 -4
- opik/integrations/langchain/opik_tracer.py +1 -4
- opik/integrations/llama_index/callback.py +2 -4
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/opik_context.py +7 -7
- opik/rest_api/__init__.py +123 -11
- opik/rest_api/dashboards/client.py +65 -2
- opik/rest_api/dashboards/raw_client.py +82 -0
- opik/rest_api/datasets/client.py +441 -2
- opik/rest_api/datasets/raw_client.py +1225 -505
- opik/rest_api/experiments/client.py +30 -2
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/optimizations/client.py +302 -0
- opik/rest_api/optimizations/raw_client.py +463 -0
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +34 -4
- opik/rest_api/prompts/raw_client.py +32 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/traces/client.py +6 -6
- opik/rest_api/traces/raw_client.py +4 -4
- opik/rest_api/types/__init__.py +121 -11
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/automation_rule_evaluator.py +23 -1
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/{automation_rule_evaluator_object_public.py → automation_rule_evaluator_object_object_public.py} +32 -10
- opik/rest_api/types/automation_rule_evaluator_page_public.py +2 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +23 -1
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +23 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +23 -1
- opik/rest_api/types/dashboard_page_public.py +1 -0
- opik/rest_api/types/dataset.py +2 -0
- opik/rest_api/types/dataset_item.py +1 -0
- opik/rest_api/types/dataset_item_compare.py +1 -0
- opik/rest_api/types/dataset_item_page_compare.py +1 -0
- opik/rest_api/types/dataset_item_page_public.py +1 -0
- opik/rest_api/types/dataset_item_public.py +1 -0
- opik/rest_api/types/dataset_public.py +2 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +49 -0
- opik/rest_api/types/experiment.py +2 -0
- opik/rest_api/types/experiment_public.py +2 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/optimization.py +2 -0
- opik/rest_api/types/optimization_public.py +2 -0
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +2 -0
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +2 -0
- opik/rest_api/types/prompt_version_detail.py +2 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_public.py +2 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +5 -0
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +6 -0
- opik/rest_api/types/trace_public.py +6 -0
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- {opik-1.9.26.dist-info → opik-1.9.39.dist-info}/METADATA +2 -1
- {opik-1.9.26.dist-info → opik-1.9.39.dist-info}/RECORD +177 -119
- opik/api_objects/prompt/chat_prompt_template.py +0 -200
- {opik-1.9.26.dist-info → opik-1.9.39.dist-info}/WHEEL +0 -0
- {opik-1.9.26.dist-info → opik-1.9.39.dist-info}/entry_points.txt +0 -0
- {opik-1.9.26.dist-info → opik-1.9.39.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.26.dist-info → opik-1.9.39.dist-info}/top_level.txt +0 -0
|
@@ -36,16 +36,33 @@ def apply_model_specific_filters(
|
|
|
36
36
|
already_warned: Set[str],
|
|
37
37
|
warn: Callable[[str, Any], None],
|
|
38
38
|
) -> None:
|
|
39
|
-
"""
|
|
39
|
+
"""Adjust/drop params for specific model families before calling LiteLLM.
|
|
40
40
|
|
|
41
|
-
Currently handles
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
Currently handles:
|
|
42
|
+
- GPT-5: only honours temperature=1 and does not return log probabilities.
|
|
43
|
+
- DashScope Qwen: enforces constraints for logprobs / top_logprobs
|
|
44
44
|
"""
|
|
45
|
+
if model_name.startswith("gpt-5"):
|
|
46
|
+
_apply_gpt5_filters(params, already_warned, warn)
|
|
47
|
+
return
|
|
45
48
|
|
|
46
|
-
if
|
|
49
|
+
if model_name.startswith("dashscope/"):
|
|
50
|
+
_apply_qwen_dashscope_filters(params, already_warned, warn)
|
|
47
51
|
return
|
|
48
52
|
|
|
53
|
+
|
|
54
|
+
def _apply_gpt5_filters(
|
|
55
|
+
params: Dict[str, Any],
|
|
56
|
+
already_warned: Set[str],
|
|
57
|
+
warn: Callable[[str, Any], None],
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Apply GPT-5 specific parameter filters.
|
|
60
|
+
|
|
61
|
+
Only honours temperature=1 and does not return log probabilities.
|
|
62
|
+
Removing those eagerly avoids provider errors while the callback surfaces a
|
|
63
|
+
one-time warning to the caller.
|
|
64
|
+
"""
|
|
65
|
+
|
|
49
66
|
unsupported: list[tuple[str, Any]] = []
|
|
50
67
|
|
|
51
68
|
if "temperature" in params:
|
|
@@ -61,7 +78,62 @@ def apply_model_specific_filters(
|
|
|
61
78
|
if param in params:
|
|
62
79
|
unsupported.append((param, params[param]))
|
|
63
80
|
|
|
64
|
-
|
|
81
|
+
_drop_unsupported_params_with_warning(
|
|
82
|
+
params,
|
|
83
|
+
unsupported,
|
|
84
|
+
already_warned,
|
|
85
|
+
warn,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _apply_qwen_dashscope_filters(
|
|
90
|
+
params: Dict[str, Any],
|
|
91
|
+
already_warned: Set[str],
|
|
92
|
+
warn: Callable[[str, Any], None],
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Apply Qwen/DashScope specific parameter filters.
|
|
95
|
+
|
|
96
|
+
top_logprobs is only meaningful if logprobs is true and must be an int
|
|
97
|
+
in [0, 5]. When logprobs is false, drops top_logprobs; when logprobs is
|
|
98
|
+
true, clamps top_logprobs into [0, 5].
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
unsupported: list[tuple[str, Any]] = []
|
|
102
|
+
|
|
103
|
+
logprobs_value = params.get("logprobs")
|
|
104
|
+
if not logprobs_value:
|
|
105
|
+
if "top_logprobs" in params:
|
|
106
|
+
unsupported.append(("top_logprobs", params["top_logprobs"]))
|
|
107
|
+
else:
|
|
108
|
+
if "top_logprobs" in params:
|
|
109
|
+
raw_top_logprobs = params["top_logprobs"]
|
|
110
|
+
try:
|
|
111
|
+
top_logprobs = int(raw_top_logprobs)
|
|
112
|
+
except (TypeError, ValueError):
|
|
113
|
+
unsupported.append(("top_logprobs", raw_top_logprobs))
|
|
114
|
+
else:
|
|
115
|
+
if top_logprobs < 0:
|
|
116
|
+
top_logprobs = 0
|
|
117
|
+
elif top_logprobs > 5:
|
|
118
|
+
top_logprobs = 5
|
|
119
|
+
params["top_logprobs"] = top_logprobs
|
|
120
|
+
|
|
121
|
+
_drop_unsupported_params_with_warning(
|
|
122
|
+
params,
|
|
123
|
+
unsupported,
|
|
124
|
+
already_warned,
|
|
125
|
+
warn,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _drop_unsupported_params_with_warning(
|
|
130
|
+
params: Dict[str, Any],
|
|
131
|
+
unsupported_params: list[tuple[str, Any]],
|
|
132
|
+
already_warned: Set[str],
|
|
133
|
+
warn: Callable[[str, Any], None],
|
|
134
|
+
) -> None:
|
|
135
|
+
"""Remove unsupported params and emit warnings once per param name."""
|
|
136
|
+
for param, value in unsupported_params:
|
|
65
137
|
params.pop(param, None)
|
|
66
138
|
if param in already_warned:
|
|
67
139
|
continue
|
opik/evaluation/report.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
|
-
from typing import Dict, List, Tuple
|
|
2
|
+
from typing import Dict, List, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
from rich import align, console, panel, table, text
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
from . import test_result, evaluation_result
|
|
8
|
+
from .metrics import score_result
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
def _format_time(seconds: float) -> str:
|
|
@@ -41,7 +42,10 @@ def _compute_average_scores(
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def display_experiment_results(
|
|
44
|
-
dataset_name: str,
|
|
45
|
+
dataset_name: str,
|
|
46
|
+
total_time: float,
|
|
47
|
+
test_results: List[test_result.TestResult],
|
|
48
|
+
experiment_scores: Optional[List[score_result.ScoreResult]] = None,
|
|
45
49
|
) -> None:
|
|
46
50
|
average_scores, failed_scores = _compute_average_scores(test_results)
|
|
47
51
|
nb_items = len(test_results)
|
|
@@ -62,6 +66,14 @@ def display_experiment_results(
|
|
|
62
66
|
score_strings += text.Text(f" - {failed_scores[name]} failed", style="red")
|
|
63
67
|
score_strings += text.Text("\n")
|
|
64
68
|
|
|
69
|
+
# Add experiment scores if available
|
|
70
|
+
if experiment_scores:
|
|
71
|
+
for score in experiment_scores:
|
|
72
|
+
score_strings += text.Text(
|
|
73
|
+
f"{score.name}: {score.value:.4f}", style="green bold"
|
|
74
|
+
)
|
|
75
|
+
score_strings += text.Text("\n")
|
|
76
|
+
|
|
65
77
|
aligned_test_results = align.Align.left(score_strings)
|
|
66
78
|
|
|
67
79
|
# Combine table, time text, and test results
|
|
@@ -4,7 +4,7 @@ from typing import List, Optional
|
|
|
4
4
|
from opik.api_objects import dataset, experiment, opik_client
|
|
5
5
|
from opik.types import FeedbackScoreDict
|
|
6
6
|
from . import test_case
|
|
7
|
-
from .metrics import
|
|
7
|
+
from .metrics import score_result
|
|
8
8
|
from .types import ScoringKeyMappingType
|
|
9
9
|
|
|
10
10
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -67,11 +67,7 @@ def get_experiment_test_cases(
|
|
|
67
67
|
trace_id=item.trace_id,
|
|
68
68
|
dataset_item_id=item.dataset_item_id,
|
|
69
69
|
task_output=item.evaluation_task_output,
|
|
70
|
-
|
|
71
|
-
dataset_item=dataset_item_data,
|
|
72
|
-
task_output=item.evaluation_task_output,
|
|
73
|
-
scoring_key_mapping=scoring_key_mapping,
|
|
74
|
-
),
|
|
70
|
+
dataset_item_content=dataset_item_data,
|
|
75
71
|
)
|
|
76
72
|
)
|
|
77
73
|
|
|
@@ -98,6 +94,7 @@ def log_test_result_feedback_scores(
|
|
|
98
94
|
)
|
|
99
95
|
all_trace_scores.append(trace_score)
|
|
100
96
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
97
|
+
if len(all_trace_scores) > 0:
|
|
98
|
+
client.log_traces_feedback_scores(
|
|
99
|
+
scores=all_trace_scores, project_name=project_name
|
|
100
|
+
)
|
opik/evaluation/test_case.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict, Any
|
|
1
|
+
from typing import Dict, Any, Optional
|
|
2
2
|
import dataclasses
|
|
3
3
|
|
|
4
4
|
|
|
@@ -6,6 +6,6 @@ import dataclasses
|
|
|
6
6
|
class TestCase:
|
|
7
7
|
trace_id: str
|
|
8
8
|
dataset_item_id: str
|
|
9
|
-
scoring_inputs: Dict[str, Any]
|
|
10
9
|
task_output: Dict[str, Any]
|
|
11
10
|
dataset_item_content: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
|
11
|
+
mapped_scoring_inputs: Optional[Dict[str, Any]] = None
|
opik/evaluation/types.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
|
-
from typing import Any, Callable, Dict, Union
|
|
1
|
+
from typing import Any, Callable, Dict, List, Union
|
|
2
|
+
|
|
3
|
+
from . import test_result
|
|
4
|
+
from .metrics import score_result
|
|
2
5
|
|
|
3
6
|
LLMTask = Callable[[Dict[str, Any]], Dict[str, Any]]
|
|
4
7
|
|
|
5
8
|
ScoringKeyMappingType = Dict[str, Union[str, Callable[[Dict[str, Any]], Any]]]
|
|
9
|
+
|
|
10
|
+
ExperimentScoreFunction = Callable[
|
|
11
|
+
[List[test_result.TestResult]],
|
|
12
|
+
Union[score_result.ScoreResult, List[score_result.ScoreResult]],
|
|
13
|
+
]
|
opik/exceptions.py
CHANGED
|
@@ -81,6 +81,23 @@ class PromptPlaceholdersDontMatchFormatArguments(OpikException):
|
|
|
81
81
|
)
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class PromptTemplateStructureMismatch(OpikException):
|
|
85
|
+
"""Exception raised when attempting to create a prompt version with a different template structure than the existing prompt."""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self, prompt_name: str, existing_structure: str, attempted_structure: str
|
|
89
|
+
):
|
|
90
|
+
self.prompt_name = prompt_name
|
|
91
|
+
self.existing_structure = existing_structure
|
|
92
|
+
self.attempted_structure = attempted_structure
|
|
93
|
+
|
|
94
|
+
def __str__(self) -> str:
|
|
95
|
+
return (
|
|
96
|
+
f"Prompt with name '{self.prompt_name}' already exists and has immutable "
|
|
97
|
+
f"'{self.existing_structure}' template structure, not '{self.attempted_structure}'. "
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
84
101
|
class ExperimentNotFound(OpikException):
|
|
85
102
|
pass
|
|
86
103
|
|
opik/id_helpers.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Optional
|
|
3
|
+
import random
|
|
4
|
+
import string
|
|
3
5
|
import uuid
|
|
4
6
|
import uuid6
|
|
5
7
|
|
|
@@ -12,6 +14,22 @@ def generate_id(timestamp: Optional[datetime] = None) -> str:
|
|
|
12
14
|
return str(uuid6.uuid7())
|
|
13
15
|
|
|
14
16
|
|
|
17
|
+
def generate_random_alphanumeric_string(length: int) -> str:
|
|
18
|
+
"""Generate a random alphanumeric string of the specified length.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
length: The length of the string to generate.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
A random string containing only alphanumeric characters (a-z, A-Z, 0-9).
|
|
25
|
+
"""
|
|
26
|
+
if length < 0:
|
|
27
|
+
raise ValueError("Length must be non-negative")
|
|
28
|
+
|
|
29
|
+
characters = string.ascii_letters + string.digits
|
|
30
|
+
return "".join(random.choice(characters) for _ in range(length))
|
|
31
|
+
|
|
32
|
+
|
|
15
33
|
def uuid4_to_uuid7(user_datetime: datetime, user_uuid: str) -> uuid.UUID:
|
|
16
34
|
"""Convert a UUID v4 into a UUID v7 following RFC draft specification."""
|
|
17
35
|
# Get Unix timestamp in milliseconds
|
opik/integrations/adk/helpers.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
from typing import (
|
|
3
4
|
Any,
|
|
@@ -8,6 +9,8 @@ from google.adk.models import LlmResponse
|
|
|
8
9
|
import opik.types as opik_types
|
|
9
10
|
import pydantic
|
|
10
11
|
|
|
12
|
+
LOGGER = logging.getLogger(__name__)
|
|
13
|
+
|
|
11
14
|
|
|
12
15
|
def convert_adk_base_model_to_dict(value: pydantic.BaseModel) -> Dict[str, Any]:
|
|
13
16
|
"""Most ADK objects are Pydantic Base Models"""
|
|
@@ -27,13 +30,19 @@ def get_adk_provider() -> opik_types.LLMProvider:
|
|
|
27
30
|
|
|
28
31
|
|
|
29
32
|
def has_empty_text_part_content(llm_response: LlmResponse) -> bool:
|
|
30
|
-
|
|
31
|
-
|
|
33
|
+
try:
|
|
34
|
+
if llm_response.content is None:
|
|
35
|
+
return True
|
|
32
36
|
|
|
33
|
-
|
|
34
|
-
if len(llm_response.content.parts) == 1:
|
|
35
|
-
part = llm_response.content.parts[0]
|
|
36
|
-
if part.text is not None and len(part.text) == 0:
|
|
37
|
+
if not llm_response.content.parts:
|
|
37
38
|
return True
|
|
38
39
|
|
|
39
|
-
|
|
40
|
+
# to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
|
|
41
|
+
if len(llm_response.content.parts) == 1:
|
|
42
|
+
part = llm_response.content.parts[0]
|
|
43
|
+
if part.text is not None and len(part.text) == 0:
|
|
44
|
+
return True
|
|
45
|
+
return False
|
|
46
|
+
except Exception as e:
|
|
47
|
+
LOGGER.warning(f"Exception in has_empty_text_part_content {e}", exc_info=True)
|
|
48
|
+
return True
|
|
@@ -8,6 +8,7 @@ from google.adk import models
|
|
|
8
8
|
from google.adk.tools import base_tool
|
|
9
9
|
from google.adk.tools import tool_context
|
|
10
10
|
|
|
11
|
+
import opik
|
|
11
12
|
from opik import context_storage
|
|
12
13
|
from opik.decorator import arguments_helpers, span_creation_handler
|
|
13
14
|
from opik.api_objects import opik_client, span, trace
|
|
@@ -77,7 +78,8 @@ class LegacyOpikTracer:
|
|
|
77
78
|
trace_data = self._context_storage.pop_trace_data()
|
|
78
79
|
assert trace_data is not None
|
|
79
80
|
trace_data.init_end_time()
|
|
80
|
-
|
|
81
|
+
if opik.is_tracing_active():
|
|
82
|
+
self._opik_client.trace(**trace_data.as_parameters)
|
|
81
83
|
|
|
82
84
|
def _end_current_span(
|
|
83
85
|
self,
|
|
@@ -85,20 +87,21 @@ class LegacyOpikTracer:
|
|
|
85
87
|
span_data = self._context_storage.pop_span_data()
|
|
86
88
|
assert span_data is not None
|
|
87
89
|
span_data.init_end_time()
|
|
88
|
-
|
|
90
|
+
if opik.is_tracing_active():
|
|
91
|
+
self._opik_client.span(**span_data.as_parameters)
|
|
89
92
|
|
|
90
93
|
def _start_span(self, span_data: span.SpanData) -> None:
|
|
91
94
|
self._context_storage.add_span_data(span_data)
|
|
92
95
|
self._opik_created_spans.add(span_data.id)
|
|
93
96
|
|
|
94
|
-
if self._opik_client.config.log_start_trace_span:
|
|
97
|
+
if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
|
|
95
98
|
self._opik_client.span(**span_data.as_start_parameters)
|
|
96
99
|
|
|
97
100
|
def _start_trace(self, trace_data: trace.TraceData) -> None:
|
|
98
101
|
self._context_storage.set_trace_data(trace_data)
|
|
99
102
|
self._current_trace_created_by_opik_tracer.set(trace_data.id)
|
|
100
103
|
|
|
101
|
-
if self._opik_client.config.log_start_trace_span:
|
|
104
|
+
if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
|
|
102
105
|
self._opik_client.trace(**trace_data.as_start_parameters)
|
|
103
106
|
|
|
104
107
|
def _set_current_context_data(self, value: SpanOrTraceData) -> None:
|
|
@@ -7,6 +7,7 @@ from google.adk import models
|
|
|
7
7
|
from google.adk.tools import base_tool
|
|
8
8
|
from google.adk.tools import tool_context
|
|
9
9
|
|
|
10
|
+
import opik
|
|
10
11
|
from opik import context_storage
|
|
11
12
|
from opik.api_objects import opik_client, span, trace
|
|
12
13
|
from opik.types import DistributedTraceHeadersDict
|
|
@@ -253,7 +254,8 @@ class OpikTracer:
|
|
|
253
254
|
current_span.init_end_time()
|
|
254
255
|
# We close this span manually because otherwise ADK will close it too late,
|
|
255
256
|
# and it will also add tool spans inside of it, which we want to avoid.
|
|
256
|
-
|
|
257
|
+
if opik.is_tracing_active():
|
|
258
|
+
self._opik_client.span(**current_span.as_parameters)
|
|
257
259
|
self._last_model_output = output
|
|
258
260
|
|
|
259
261
|
except Exception as e:
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Iterator, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
import opentelemetry.trace
|
|
5
|
+
import opik
|
|
5
6
|
import opik.context_storage
|
|
6
7
|
from opik.api_objects import trace, span
|
|
7
8
|
from opik.decorator import (
|
|
@@ -100,7 +101,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
|
|
|
100
101
|
# so we manually finalize it here to avoid incorrect span nesting.
|
|
101
102
|
opik.context_storage.pop_span_data(ensure_id=current_span_data.id)
|
|
102
103
|
current_span_data.init_end_time()
|
|
103
|
-
|
|
104
|
+
if opik.is_tracing_active():
|
|
105
|
+
self.opik_client.span(**current_span_data.as_parameters)
|
|
104
106
|
current_span_data = opik.context_storage.top_span_data()
|
|
105
107
|
|
|
106
108
|
try:
|
|
@@ -145,7 +147,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
|
|
|
145
147
|
trace_data = opik.context_storage.pop_trace_data(ensure_id=trace_id)
|
|
146
148
|
if trace_data is not None:
|
|
147
149
|
trace_data.init_end_time()
|
|
148
|
-
|
|
150
|
+
if opik.is_tracing_active():
|
|
151
|
+
self.opik_client.trace(**trace_data.as_parameters)
|
|
149
152
|
|
|
150
153
|
def _ensure_span_is_finalized(self, span_id: str) -> None:
|
|
151
154
|
opik.context_storage.trim_span_data_stack_to_certain_span(span_id)
|
|
@@ -153,7 +156,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
|
|
|
153
156
|
span_data = opik.context_storage.pop_span_data(ensure_id=span_id)
|
|
154
157
|
if span_data is not None:
|
|
155
158
|
span_data.init_end_time()
|
|
156
|
-
|
|
159
|
+
if opik.is_tracing_active():
|
|
160
|
+
self.opik_client.span(**span_data.as_parameters)
|
|
157
161
|
|
|
158
162
|
|
|
159
163
|
def _prepare_trace_and_span_to_be_finalized(
|
|
@@ -4,11 +4,8 @@ import logging
|
|
|
4
4
|
import dspy
|
|
5
5
|
from dspy.utils import callback as dspy_callback
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
import opik.opik_context as opik_context
|
|
9
|
-
import opik.context_storage as context_storage
|
|
7
|
+
from opik import context_storage, opik_context, tracing_runtime_config, types
|
|
10
8
|
from opik.api_objects import helpers, span, trace, opik_client
|
|
11
|
-
import opik.decorator.tracing_runtime_config as tracing_runtime_config
|
|
12
9
|
from opik.decorator import error_info_collector
|
|
13
10
|
|
|
14
11
|
from .graph import build_mermaid_graph_from_module
|
|
@@ -4,8 +4,8 @@ from typing import Any, Dict, Optional
|
|
|
4
4
|
import haystack
|
|
5
5
|
from haystack import tracing
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
from opik import tracing_runtime_config
|
|
8
|
+
from opik.api_objects import opik_client
|
|
9
9
|
from . import opik_tracer
|
|
10
10
|
|
|
11
11
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -5,10 +5,8 @@ from typing import Any, Dict, Iterator, List, Optional, Union
|
|
|
5
5
|
|
|
6
6
|
from haystack import tracing
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
import opik.decorator.span_creation_handler as span_creation_handler
|
|
11
|
-
import opik.decorator.arguments_helpers as arguments_helpers
|
|
8
|
+
from opik import tracing_runtime_config, url_helpers
|
|
9
|
+
from opik.decorator import arguments_helpers, span_creation_handler
|
|
12
10
|
from opik.api_objects import opik_client
|
|
13
11
|
from opik.api_objects import span as opik_span
|
|
14
12
|
from opik.api_objects import trace as opik_trace
|
|
@@ -19,8 +19,7 @@ from langchain_core import language_models
|
|
|
19
19
|
from langchain_core.tracers import BaseTracer
|
|
20
20
|
from langchain_core.tracers.schemas import Run
|
|
21
21
|
|
|
22
|
-
import
|
|
23
|
-
import opik.llm_usage as llm_usage
|
|
22
|
+
from opik import context_storage, dict_utils, llm_usage, tracing_runtime_config
|
|
24
23
|
from opik.api_objects import span, trace
|
|
25
24
|
from opik.types import DistributedTraceHeadersDict, ErrorInfoDict
|
|
26
25
|
from opik.validation import parameters_validator
|
|
@@ -32,8 +31,6 @@ from . import (
|
|
|
32
31
|
)
|
|
33
32
|
|
|
34
33
|
from ...api_objects import helpers, opik_client
|
|
35
|
-
import opik.context_storage as context_storage
|
|
36
|
-
import opik.decorator.tracing_runtime_config as tracing_runtime_config
|
|
37
34
|
|
|
38
35
|
if TYPE_CHECKING:
|
|
39
36
|
from langchain_core.runnables.graph import Graph
|
|
@@ -6,10 +6,8 @@ import uuid
|
|
|
6
6
|
from llama_index.core.callbacks import schema as llama_index_schema
|
|
7
7
|
from llama_index.core.callbacks import base_handler
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
import opik.decorator.arguments_helpers as arguments_helpers
|
|
12
|
-
import opik.decorator.span_creation_handler as span_creation_handler
|
|
9
|
+
from opik import context_storage, tracing_runtime_config
|
|
10
|
+
from opik.decorator import arguments_helpers, span_creation_handler
|
|
13
11
|
from opik.api_objects import opik_client, span, trace
|
|
14
12
|
|
|
15
13
|
from . import event_parsing_utils
|
|
@@ -3,12 +3,11 @@ from agents import tracing
|
|
|
3
3
|
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
|
+
from opik import context_storage, tracing_runtime_config
|
|
6
7
|
from opik.api_objects.span import span_data
|
|
7
8
|
from opik.api_objects.trace import trace_data
|
|
8
9
|
from opik.api_objects import opik_client
|
|
9
10
|
from opik.decorator import span_creation_handler, arguments_helpers
|
|
10
|
-
import opik.decorator.tracing_runtime_config as tracing_runtime_config
|
|
11
|
-
import opik.context_storage as context_storage
|
|
12
11
|
|
|
13
12
|
from . import span_data_parsers
|
|
14
13
|
|
|
@@ -18,7 +18,7 @@ def track_openai(
|
|
|
18
18
|
"""Adds Opik tracking wrappers to an OpenAI client.
|
|
19
19
|
|
|
20
20
|
The client is always patched; however every wrapped call checks
|
|
21
|
-
`opik.
|
|
21
|
+
`opik.is_tracing_active()` before emitting
|
|
22
22
|
any telemetry. If tracing is disabled at call time, the wrapped function
|
|
23
23
|
executes normally but no span/trace is sent.
|
|
24
24
|
|
opik/opik_context.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
-
from typing import Any, Dict, List, Optional,
|
|
2
|
+
from typing import Any, Dict, List, Optional, Iterator, Union
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
from opik import llm_usage
|
|
5
5
|
from opik.api_objects import span, trace, opik_client, prompt
|
|
6
6
|
from opik.api_objects.attachment import Attachment
|
|
7
7
|
from opik.types import (
|
|
@@ -11,7 +11,7 @@ from opik.types import (
|
|
|
11
11
|
ErrorInfoDict,
|
|
12
12
|
)
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
from opik import tracing_runtime_config
|
|
15
15
|
|
|
16
16
|
from . import context_storage, exceptions
|
|
17
17
|
from .decorator import error_info_collector
|
|
@@ -68,7 +68,7 @@ def update_current_span(
|
|
|
68
68
|
total_cost: Optional[float] = None,
|
|
69
69
|
attachments: Optional[List[Attachment]] = None,
|
|
70
70
|
error_info: Optional[ErrorInfoDict] = None,
|
|
71
|
-
prompts: Optional[List[prompt.
|
|
71
|
+
prompts: Optional[List[prompt.BasePrompt]] = None,
|
|
72
72
|
) -> None:
|
|
73
73
|
"""
|
|
74
74
|
Update the current span with the provided parameters. This method is usually called within a tracked function.
|
|
@@ -97,7 +97,7 @@ def update_current_span(
|
|
|
97
97
|
return
|
|
98
98
|
|
|
99
99
|
if prompts is not None:
|
|
100
|
-
prompts = [
|
|
100
|
+
prompts = [p.__internal_api__to_info_dict__() for p in prompts]
|
|
101
101
|
|
|
102
102
|
new_params = {
|
|
103
103
|
"name": name,
|
|
@@ -130,7 +130,7 @@ def update_current_trace(
|
|
|
130
130
|
feedback_scores: Optional[List[FeedbackScoreDict]] = None,
|
|
131
131
|
thread_id: Optional[str] = None,
|
|
132
132
|
attachments: Optional[List[Attachment]] = None,
|
|
133
|
-
prompts: Optional[List[prompt.
|
|
133
|
+
prompts: Optional[List[prompt.BasePrompt]] = None,
|
|
134
134
|
) -> None:
|
|
135
135
|
"""
|
|
136
136
|
Update the current trace with the provided parameters. This method is usually called within a tracked function.
|
|
@@ -151,7 +151,7 @@ def update_current_trace(
|
|
|
151
151
|
return
|
|
152
152
|
|
|
153
153
|
if prompts is not None:
|
|
154
|
-
prompts = [
|
|
154
|
+
prompts = [p.__internal_api__to_info_dict__() for p in prompts]
|
|
155
155
|
|
|
156
156
|
new_params = {
|
|
157
157
|
"name": name,
|