opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +38 -0
- opik/rest_api/datasets/client.py +249 -148
- opik/rest_api/datasets/raw_client.py +356 -217
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +46 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_version_public.py +5 -0
- opik/rest_api/types/dataset_version_summary.py +5 -0
- opik/rest_api/types/dataset_version_summary_public.py +5 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
opik/api_objects/opik_client.py
CHANGED
|
@@ -2,7 +2,7 @@ import atexit
|
|
|
2
2
|
import datetime
|
|
3
3
|
import functools
|
|
4
4
|
import logging
|
|
5
|
-
from typing import Any, Dict, List, Optional, TypeVar, Union, Literal
|
|
5
|
+
from typing import Any, Dict, List, Optional, TypeVar, Union, Literal, cast
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
@@ -42,9 +42,9 @@ from ..message_processing import (
|
|
|
42
42
|
messages,
|
|
43
43
|
streamer_constructors,
|
|
44
44
|
message_queue,
|
|
45
|
-
message_processors_chain,
|
|
46
45
|
)
|
|
47
46
|
from ..message_processing.batching import sequence_splitter
|
|
47
|
+
from ..message_processing.processors import message_processors_chain
|
|
48
48
|
from ..rest_api import client as rest_api_client
|
|
49
49
|
from ..rest_api.core.api_error import ApiError
|
|
50
50
|
from ..rest_api.types import (
|
|
@@ -55,7 +55,13 @@ from ..rest_api.types import (
|
|
|
55
55
|
span_filter_public,
|
|
56
56
|
trace_filter_public,
|
|
57
57
|
)
|
|
58
|
-
from ..types import
|
|
58
|
+
from ..types import (
|
|
59
|
+
BatchFeedbackScoreDict,
|
|
60
|
+
ErrorInfoDict,
|
|
61
|
+
FeedbackScoreDict,
|
|
62
|
+
LLMProvider,
|
|
63
|
+
SpanType,
|
|
64
|
+
)
|
|
59
65
|
|
|
60
66
|
LOGGER = logging.getLogger(__name__)
|
|
61
67
|
|
|
@@ -107,13 +113,7 @@ class Opik:
|
|
|
107
113
|
self._use_batching = _use_batching
|
|
108
114
|
|
|
109
115
|
self._initialize_streamer(
|
|
110
|
-
url_override=config_.url_override,
|
|
111
|
-
workers=config_.background_workers,
|
|
112
|
-
file_upload_worker_count=config_.file_upload_background_workers,
|
|
113
|
-
api_key=config_.api_key,
|
|
114
|
-
check_tls_certificate=config_.check_tls_certificate,
|
|
115
116
|
use_batching=_use_batching,
|
|
116
|
-
enable_json_request_compression=config_.enable_json_request_compression,
|
|
117
117
|
)
|
|
118
118
|
atexit.register(self.end, timeout=self._flush_timeout)
|
|
119
119
|
|
|
@@ -152,24 +152,17 @@ class Opik:
|
|
|
152
152
|
|
|
153
153
|
def _initialize_streamer(
|
|
154
154
|
self,
|
|
155
|
-
url_override: str,
|
|
156
|
-
workers: int,
|
|
157
|
-
file_upload_worker_count: int,
|
|
158
|
-
api_key: Optional[str],
|
|
159
|
-
check_tls_certificate: bool,
|
|
160
155
|
use_batching: bool,
|
|
161
|
-
enable_json_request_compression: bool,
|
|
162
156
|
) -> None:
|
|
163
|
-
|
|
157
|
+
self._httpx_client = httpx_client.get(
|
|
164
158
|
workspace=self._workspace,
|
|
165
|
-
api_key=api_key,
|
|
166
|
-
check_tls_certificate=check_tls_certificate,
|
|
167
|
-
compress_json_requests=enable_json_request_compression,
|
|
159
|
+
api_key=self._config.api_key,
|
|
160
|
+
check_tls_certificate=self._config.check_tls_certificate,
|
|
161
|
+
compress_json_requests=self._config.enable_json_request_compression,
|
|
168
162
|
)
|
|
169
|
-
self._httpx_client = httpx_client_
|
|
170
163
|
self._rest_client = rest_api_client.OpikApi(
|
|
171
|
-
base_url=url_override,
|
|
172
|
-
httpx_client=
|
|
164
|
+
base_url=self._config.url_override,
|
|
165
|
+
httpx_client=self._httpx_client,
|
|
173
166
|
)
|
|
174
167
|
self._rest_client._client_wrapper._timeout = (
|
|
175
168
|
httpx.USE_CLIENT_DEFAULT
|
|
@@ -181,19 +174,22 @@ class Opik:
|
|
|
181
174
|
batch_factor=self._config.maximal_queue_size_batch_factor,
|
|
182
175
|
)
|
|
183
176
|
|
|
184
|
-
self.
|
|
177
|
+
self.__internal_api__message_processor__ = (
|
|
185
178
|
message_processors_chain.create_message_processors_chain(
|
|
186
179
|
rest_client=self._rest_client
|
|
187
180
|
)
|
|
188
181
|
)
|
|
189
182
|
self._streamer = streamer_constructors.construct_online_streamer(
|
|
190
|
-
n_consumers=
|
|
183
|
+
n_consumers=self._config.background_workers,
|
|
191
184
|
rest_client=self._rest_client,
|
|
192
|
-
httpx_client=
|
|
185
|
+
httpx_client=self._httpx_client,
|
|
193
186
|
use_batching=use_batching,
|
|
194
|
-
|
|
187
|
+
use_attachment_extraction=self._config.is_attachment_extraction_active,
|
|
188
|
+
min_base64_embedded_attachment_size=self._config.min_base64_embedded_attachment_size,
|
|
189
|
+
file_upload_worker_count=self._config.file_upload_background_workers,
|
|
195
190
|
max_queue_size=max_queue_size,
|
|
196
|
-
message_processor=self.
|
|
191
|
+
message_processor=self.__internal_api__message_processor__,
|
|
192
|
+
url_override=self._config.url_override,
|
|
197
193
|
)
|
|
198
194
|
|
|
199
195
|
def _display_trace_url(self, trace_id: str, project_name: str) -> None:
|
|
@@ -295,7 +291,9 @@ class Opik:
|
|
|
295
291
|
for feedback_score in feedback_scores:
|
|
296
292
|
feedback_score["id"] = id
|
|
297
293
|
|
|
298
|
-
self.log_traces_feedback_scores(
|
|
294
|
+
self.log_traces_feedback_scores(
|
|
295
|
+
cast(List[BatchFeedbackScoreDict], feedback_scores), project_name
|
|
296
|
+
)
|
|
299
297
|
|
|
300
298
|
if attachments is not None:
|
|
301
299
|
for attachment_data in attachments:
|
|
@@ -470,7 +468,9 @@ class Opik:
|
|
|
470
468
|
for feedback_score in feedback_scores:
|
|
471
469
|
feedback_score["id"] = id
|
|
472
470
|
|
|
473
|
-
self.log_spans_feedback_scores(
|
|
471
|
+
self.log_spans_feedback_scores(
|
|
472
|
+
cast(List[BatchFeedbackScoreDict], feedback_scores), project_name
|
|
473
|
+
)
|
|
474
474
|
|
|
475
475
|
return span.span_client.create_span(
|
|
476
476
|
trace_id=trace_id,
|
|
@@ -639,23 +639,34 @@ class Opik:
|
|
|
639
639
|
)
|
|
640
640
|
|
|
641
641
|
def log_spans_feedback_scores(
|
|
642
|
-
self, scores: List[
|
|
642
|
+
self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
|
|
643
643
|
) -> None:
|
|
644
644
|
"""
|
|
645
645
|
Log feedback scores for spans.
|
|
646
646
|
|
|
647
647
|
Args:
|
|
648
|
-
scores (List[
|
|
648
|
+
scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
|
|
649
649
|
Specifying a span id via `id` key for each score is mandatory.
|
|
650
650
|
project_name: The name of the project in which the spans are logged. If not set, the project name
|
|
651
651
|
which was configured when the Opik instance was created will be used.
|
|
652
|
+
Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
|
|
652
653
|
|
|
653
654
|
Returns:
|
|
654
655
|
None
|
|
656
|
+
|
|
657
|
+
Example:
|
|
658
|
+
>>> from opik import Opik
|
|
659
|
+
>>> client = Opik()
|
|
660
|
+
>>> # Batch logging across multiple projects
|
|
661
|
+
>>> scores = [
|
|
662
|
+
>>> {"id": span1_id, "name": "accuracy", "value": 0.95, "project_name": "project-A"},
|
|
663
|
+
>>> {"id": span2_id, "name": "accuracy", "value": 0.88, "project_name": "project-B"},
|
|
664
|
+
>>> ]
|
|
665
|
+
>>> client.log_spans_feedback_scores(scores=scores)
|
|
655
666
|
"""
|
|
656
667
|
score_messages = helpers.parse_feedback_score_messages(
|
|
657
668
|
scores=scores,
|
|
658
|
-
project_name=project_name or self.
|
|
669
|
+
project_name=project_name or self.project_name,
|
|
659
670
|
parsed_item_class=messages.FeedbackScoreMessage,
|
|
660
671
|
logger=LOGGER,
|
|
661
672
|
)
|
|
@@ -677,23 +688,34 @@ class Opik:
|
|
|
677
688
|
self._streamer.put(add_span_feedback_scores_batch_message)
|
|
678
689
|
|
|
679
690
|
def log_traces_feedback_scores(
|
|
680
|
-
self, scores: List[
|
|
691
|
+
self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
|
|
681
692
|
) -> None:
|
|
682
693
|
"""
|
|
683
694
|
Log feedback scores for traces.
|
|
684
695
|
|
|
685
696
|
Args:
|
|
686
|
-
scores (List[
|
|
697
|
+
scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
|
|
687
698
|
Specifying a trace id via `id` key for each score is mandatory.
|
|
688
699
|
project_name: The name of the project in which the traces are logged. If not set, the project name
|
|
689
700
|
which was configured when the Opik instance was created will be used.
|
|
701
|
+
Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
|
|
690
702
|
|
|
691
703
|
Returns:
|
|
692
704
|
None
|
|
705
|
+
|
|
706
|
+
Example:
|
|
707
|
+
>>> from opik import Opik
|
|
708
|
+
>>> client = Opik()
|
|
709
|
+
>>> # Batch logging across multiple projects
|
|
710
|
+
>>> scores = [
|
|
711
|
+
>>> {"id": trace1_id, "name": "accuracy", "value": 0.95, "project_name": "project-A"},
|
|
712
|
+
>>> {"id": trace2_id, "name": "accuracy", "value": 0.88, "project_name": "project-B"},
|
|
713
|
+
>>> ]
|
|
714
|
+
>>> client.log_traces_feedback_scores(scores=scores)
|
|
693
715
|
"""
|
|
694
716
|
score_messages = helpers.parse_feedback_score_messages(
|
|
695
717
|
scores=scores,
|
|
696
|
-
project_name=project_name or self.
|
|
718
|
+
project_name=project_name or self.project_name,
|
|
697
719
|
parsed_item_class=messages.FeedbackScoreMessage,
|
|
698
720
|
logger=LOGGER,
|
|
699
721
|
)
|
|
@@ -716,16 +738,17 @@ class Opik:
|
|
|
716
738
|
self._streamer.put(add_trace_feedback_scores_batch_message)
|
|
717
739
|
|
|
718
740
|
def log_threads_feedback_scores(
|
|
719
|
-
self, scores: List[
|
|
741
|
+
self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
|
|
720
742
|
) -> None:
|
|
721
743
|
"""
|
|
722
744
|
Log feedback scores for threads.
|
|
723
745
|
|
|
724
746
|
Args:
|
|
725
|
-
scores (List[
|
|
747
|
+
scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
|
|
726
748
|
Specifying a thread id via `id` key for each score is mandatory.
|
|
727
749
|
project_name: The name of the project in which the threads are logged. If not set, the project name
|
|
728
750
|
which was configured when the Opik instance was created will be used.
|
|
751
|
+
Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
|
|
729
752
|
|
|
730
753
|
Returns:
|
|
731
754
|
None
|
|
@@ -733,13 +756,10 @@ class Opik:
|
|
|
733
756
|
Example:
|
|
734
757
|
>>> from opik import Opik
|
|
735
758
|
>>> client = Opik()
|
|
759
|
+
>>> # Batch logging across multiple projects
|
|
736
760
|
>>> scores = [
|
|
737
|
-
>>> {
|
|
738
|
-
>>>
|
|
739
|
-
>>> "name": "user_satisfaction",
|
|
740
|
-
>>> "value": 0.85,
|
|
741
|
-
>>> "reason": "User seemed satisfied with the conversation"
|
|
742
|
-
>>> }
|
|
761
|
+
>>> {"id": "thread_123", "name": "user_satisfaction", "value": 0.85, "project_name": "project-A"},
|
|
762
|
+
>>> {"id": "thread_456", "name": "user_satisfaction", "value": 0.92, "project_name": "project-B"},
|
|
743
763
|
>>> ]
|
|
744
764
|
>>> client.log_threads_feedback_scores(scores=scores)
|
|
745
765
|
"""
|
|
@@ -801,6 +821,7 @@ class Opik:
|
|
|
801
821
|
name=name,
|
|
802
822
|
description=dataset_fern.description,
|
|
803
823
|
rest_client=self._rest_client,
|
|
824
|
+
dataset_items_count=dataset_fern.dataset_items_count,
|
|
804
825
|
)
|
|
805
826
|
|
|
806
827
|
dataset_.__internal_api__sync_hashes__()
|
|
@@ -886,6 +907,7 @@ class Opik:
|
|
|
886
907
|
name=name,
|
|
887
908
|
description=description,
|
|
888
909
|
rest_client=self._rest_client,
|
|
910
|
+
dataset_items_count=0,
|
|
889
911
|
)
|
|
890
912
|
|
|
891
913
|
self._display_created_dataset_url(dataset_name=name, dataset_id=result.id)
|
|
@@ -921,6 +943,7 @@ class Opik:
|
|
|
921
943
|
prompts: Optional[List[prompt_module.base_prompt.BasePrompt]] = None,
|
|
922
944
|
type: Literal["regular", "trial", "mini-batch"] = "regular",
|
|
923
945
|
optimization_id: Optional[str] = None,
|
|
946
|
+
tags: Optional[List[str]] = None,
|
|
924
947
|
) -> experiment.Experiment:
|
|
925
948
|
"""
|
|
926
949
|
Creates a new experiment using the given dataset name and optional parameters.
|
|
@@ -934,6 +957,7 @@ class Opik:
|
|
|
934
957
|
type: The type of the experiment. Can be "regular", "trial", or "mini-batch".
|
|
935
958
|
Defaults to "regular". "trial" and "mini-batch" are only relevant for prompt optimization experiments.
|
|
936
959
|
optimization_id: Optional ID of the optimization associated with the experiment.
|
|
960
|
+
tags: Optional list of tags to associate with the experiment.
|
|
937
961
|
|
|
938
962
|
Returns:
|
|
939
963
|
experiment.Experiment: The newly created experiment object.
|
|
@@ -958,6 +982,7 @@ class Opik:
|
|
|
958
982
|
prompt_versions=prompt_versions,
|
|
959
983
|
type=type,
|
|
960
984
|
optimization_id=optimization_id,
|
|
985
|
+
tags=tags,
|
|
961
986
|
)
|
|
962
987
|
|
|
963
988
|
experiment_ = experiment.Experiment(
|
|
@@ -968,6 +993,7 @@ class Opik:
|
|
|
968
993
|
streamer=self._streamer,
|
|
969
994
|
experiments_client=self.get_experiments_client(),
|
|
970
995
|
prompts=checked_prompts,
|
|
996
|
+
tags=tags,
|
|
971
997
|
)
|
|
972
998
|
|
|
973
999
|
return experiment_
|
|
@@ -1032,6 +1058,7 @@ class Opik:
|
|
|
1032
1058
|
rest_client=self._rest_client,
|
|
1033
1059
|
streamer=self._streamer,
|
|
1034
1060
|
experiments_client=self.get_experiments_client(),
|
|
1061
|
+
tags=experiment_public.tags,
|
|
1035
1062
|
)
|
|
1036
1063
|
|
|
1037
1064
|
def get_experiments_by_name(self, name: str) -> List[experiment.Experiment]:
|
|
@@ -1058,6 +1085,7 @@ class Opik:
|
|
|
1058
1085
|
rest_client=self._rest_client,
|
|
1059
1086
|
streamer=self._streamer,
|
|
1060
1087
|
experiments_client=self.get_experiments_client(),
|
|
1088
|
+
tags=public_experiment.tags,
|
|
1061
1089
|
)
|
|
1062
1090
|
result.append(experiment_)
|
|
1063
1091
|
|
|
@@ -1091,6 +1119,7 @@ class Opik:
|
|
|
1091
1119
|
rest_client=self._rest_client,
|
|
1092
1120
|
streamer=self._streamer,
|
|
1093
1121
|
experiments_client=self.get_experiments_client(),
|
|
1122
|
+
tags=experiment_public.tags,
|
|
1094
1123
|
)
|
|
1095
1124
|
|
|
1096
1125
|
def end(self, timeout: Optional[int] = None) -> None:
|
|
@@ -1155,7 +1184,7 @@ class Opik:
|
|
|
1155
1184
|
- `start_time`, `end_time`: =, >, <, >=, <=
|
|
1156
1185
|
- `input`, `output`: =, contains, not_contains
|
|
1157
1186
|
- `metadata`: =, contains, >, <
|
|
1158
|
-
- `feedback_scores`: =, >, <, >=,
|
|
1187
|
+
- `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
|
|
1159
1188
|
- `tags`: contains (only)
|
|
1160
1189
|
- `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
|
|
1161
1190
|
|
|
@@ -1165,6 +1194,8 @@ class Opik:
|
|
|
1165
1194
|
- `input contains "question"` - Filter by input content
|
|
1166
1195
|
- `usage.total_tokens > 1000` - Filter by token usage
|
|
1167
1196
|
- `feedback_scores.accuracy > 0.8` - Filter by feedback score
|
|
1197
|
+
- `feedback_scores.my_metric is_empty` - Filter traces with empty feedback score
|
|
1198
|
+
- `feedback_scores.my_metric is_not_empty` - Filter traces with non-empty feedback score
|
|
1168
1199
|
- `tags contains "production"` - Filter by tag
|
|
1169
1200
|
- `metadata.model = "gpt-4"` - Filter by metadata field
|
|
1170
1201
|
- `thread_id = "thread_123"` - Filter by thread ID
|
|
@@ -1247,7 +1278,7 @@ class Opik:
|
|
|
1247
1278
|
- `start_time`, `end_time`: =, >, <, >=, <=
|
|
1248
1279
|
- `input`, `output`: =, contains, not_contains
|
|
1249
1280
|
- `metadata`: =, contains, >, <
|
|
1250
|
-
- `feedback_scores`: =, >, <, >=,
|
|
1281
|
+
- `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
|
|
1251
1282
|
- `tags`: contains (only)
|
|
1252
1283
|
- `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
|
|
1253
1284
|
|
|
@@ -1257,6 +1288,8 @@ class Opik:
|
|
|
1257
1288
|
- `input contains "question"` - Filter by input content
|
|
1258
1289
|
- `usage.total_tokens > 1000` - Filter by token usage
|
|
1259
1290
|
- `feedback_scores.accuracy > 0.8` - Filter by feedback score
|
|
1291
|
+
- `feedback_scores.my_metric is_empty` - Filter spans with empty feedback score
|
|
1292
|
+
- `feedback_scores.my_metric is_not_empty` - Filter spans with non-empty feedback score
|
|
1260
1293
|
- `tags contains "production"` - Filter by tag
|
|
1261
1294
|
- `metadata.model = "gpt-4"` - Filter by metadata field
|
|
1262
1295
|
- `thread_id = "thread_123"` - Filter by thread ID
|
|
@@ -54,7 +54,7 @@ SUPPORTED_OPERATORS = {
|
|
|
54
54
|
],
|
|
55
55
|
"output": ["=", "contains", "not_contains"],
|
|
56
56
|
"metadata": ["=", "contains", ">", "<"],
|
|
57
|
-
"feedback_scores": ["=", ">", "<", ">=", "<="],
|
|
57
|
+
"feedback_scores": ["=", ">", "<", ">=", "<=", "is_empty", "is_not_empty"],
|
|
58
58
|
"tags": ["contains"],
|
|
59
59
|
"usage.total_tokens": ["=", "!=", ">", "<", ">=", "<="],
|
|
60
60
|
"usage.prompt_tokens": ["=", "!=", ">", "<", ">=", "<="],
|
|
@@ -132,6 +132,8 @@ SUPPORTED_OPERATORS = {
|
|
|
132
132
|
],
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
+
OPERATORS_WITHOUT_VALUES = {"is_empty", "is_not_empty"}
|
|
136
|
+
|
|
135
137
|
|
|
136
138
|
class OpikQueryLanguage:
|
|
137
139
|
"""
|
|
@@ -384,8 +386,12 @@ class OpikQueryLanguage:
|
|
|
384
386
|
# Parse operators
|
|
385
387
|
parsed_operator = self._parse_operator(parsed_field["field"])
|
|
386
388
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
+
operator_name = parsed_operator.get("operator", "")
|
|
390
|
+
if operator_name in OPERATORS_WITHOUT_VALUES:
|
|
391
|
+
# For operators without values, use empty string as value
|
|
392
|
+
parsed_value = {"value": ""}
|
|
393
|
+
else:
|
|
394
|
+
parsed_value = self._parse_value()
|
|
389
395
|
|
|
390
396
|
expressions.append({**parsed_field, **parsed_operator, **parsed_value})
|
|
391
397
|
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import json
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Type
|
|
4
|
+
|
|
4
5
|
from typing_extensions import override
|
|
5
6
|
|
|
6
7
|
from opik.rest_api import types as rest_api_types
|
|
8
|
+
from opik.validation import chat_prompt_messages, validator
|
|
7
9
|
from . import chat_prompt_template
|
|
8
10
|
from .. import client as prompt_client
|
|
9
11
|
from .. import types as prompt_types
|
|
@@ -16,6 +18,10 @@ class ChatPrompt(base_prompt.BasePrompt):
|
|
|
16
18
|
Similar to Prompt but uses a list of chat messages instead of a string template.
|
|
17
19
|
"""
|
|
18
20
|
|
|
21
|
+
_parameter_validators: List[Tuple[str, Type[validator.RaisableValidator]]] = [
|
|
22
|
+
("messages", chat_prompt_messages.ChatPromptMessagesValidator),
|
|
23
|
+
]
|
|
24
|
+
|
|
19
25
|
def __init__(
|
|
20
26
|
self,
|
|
21
27
|
name: str,
|
|
@@ -37,8 +43,12 @@ class ChatPrompt(base_prompt.BasePrompt):
|
|
|
37
43
|
|
|
38
44
|
Raises:
|
|
39
45
|
PromptTemplateStructureMismatch: If a text prompt with the same name already exists (template structure is immutable).
|
|
46
|
+
ValidationError: If messages structure is invalid.
|
|
40
47
|
"""
|
|
41
48
|
|
|
49
|
+
# Validate messages structure
|
|
50
|
+
self._validate_inputs(messages=messages)
|
|
51
|
+
|
|
42
52
|
self._chat_template = chat_prompt_template.ChatPromptTemplate(
|
|
43
53
|
messages=messages,
|
|
44
54
|
template_type=type,
|
|
@@ -54,6 +64,13 @@ class ChatPrompt(base_prompt.BasePrompt):
|
|
|
54
64
|
|
|
55
65
|
self._sync_with_backend()
|
|
56
66
|
|
|
67
|
+
def _validate_inputs(self, **kwargs: Any) -> None:
|
|
68
|
+
for parameter, validator_class in self._parameter_validators:
|
|
69
|
+
if parameter in kwargs:
|
|
70
|
+
validator_instance = validator_class(kwargs[parameter])
|
|
71
|
+
validator_instance.validate()
|
|
72
|
+
validator_instance.raise_if_validation_failed()
|
|
73
|
+
|
|
57
74
|
def _sync_with_backend(self) -> None:
|
|
58
75
|
from opik.api_objects import opik_client
|
|
59
76
|
|
|
@@ -143,7 +143,14 @@ class PromptClient:
|
|
|
143
143
|
commit=commit,
|
|
144
144
|
)
|
|
145
145
|
|
|
146
|
-
|
|
146
|
+
should_skip_validation = (
|
|
147
|
+
prompt_version.template_structure is None
|
|
148
|
+
and raise_if_not_template_structure == "text"
|
|
149
|
+
)
|
|
150
|
+
if should_skip_validation:
|
|
151
|
+
return prompt_version
|
|
152
|
+
|
|
153
|
+
# Client-side validation for template_structure if requested and not skipped
|
|
147
154
|
if (
|
|
148
155
|
raise_if_not_template_structure is not None
|
|
149
156
|
and prompt_version.template_structure != raise_if_not_template_structure
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import datetime
|
|
3
|
-
import logging
|
|
4
3
|
from typing import Any, Dict, List, Optional, Union
|
|
5
4
|
|
|
6
5
|
import opik.api_objects.attachment as attachment
|
|
@@ -13,20 +12,12 @@ from opik.types import (
|
|
|
13
12
|
LLMProvider,
|
|
14
13
|
SpanType,
|
|
15
14
|
)
|
|
16
|
-
from .. import helpers
|
|
15
|
+
from .. import helpers
|
|
16
|
+
from ..observation_data import ObservationData
|
|
17
17
|
|
|
18
|
-
LOGGER = logging.getLogger(__name__)
|
|
19
18
|
|
|
20
|
-
|
|
21
|
-
# Engineer note:
|
|
22
|
-
#
|
|
23
|
-
# After moving to minimal python version 3.10, a lot of common content
|
|
24
|
-
# from SpanData and TraceData can be moved to ObservationData parent dataclass.
|
|
25
|
-
# Before that it's impossible because of the dataclasses limitation to have optional arguments
|
|
26
|
-
# strictly after positional ones (including the attributes from the parent class).
|
|
27
|
-
# In python 3.10 @dataclass(kw_only=True) should help.
|
|
28
19
|
@dataclasses.dataclass
|
|
29
|
-
class SpanData:
|
|
20
|
+
class SpanData(ObservationData):
|
|
30
21
|
"""
|
|
31
22
|
The SpanData object is returned when calling :func:`opik.opik_context.get_current_span_data` from a tracked function.
|
|
32
23
|
"""
|
|
@@ -34,24 +25,11 @@ class SpanData:
|
|
|
34
25
|
trace_id: str
|
|
35
26
|
id: str = dataclasses.field(default_factory=helpers.generate_id)
|
|
36
27
|
parent_span_id: Optional[str] = None
|
|
37
|
-
name: Optional[str] = None
|
|
38
28
|
type: SpanType = "general"
|
|
39
|
-
start_time: Optional[datetime.datetime] = dataclasses.field(
|
|
40
|
-
default_factory=datetime_helpers.local_timestamp
|
|
41
|
-
)
|
|
42
|
-
end_time: Optional[datetime.datetime] = None
|
|
43
|
-
metadata: Optional[Dict[str, Any]] = None
|
|
44
|
-
input: Optional[Dict[str, Any]] = None
|
|
45
|
-
output: Optional[Dict[str, Any]] = None
|
|
46
|
-
tags: Optional[List[str]] = None
|
|
47
29
|
usage: Optional[Union[Dict[str, Any], llm_usage.OpikUsage]] = None
|
|
48
|
-
feedback_scores: Optional[List[FeedbackScoreDict]] = None
|
|
49
|
-
project_name: Optional[str] = None
|
|
50
30
|
model: Optional[str] = None
|
|
51
31
|
provider: Optional[Union[str, LLMProvider]] = None
|
|
52
|
-
error_info: Optional[ErrorInfoDict] = None
|
|
53
32
|
total_cost: Optional[float] = None
|
|
54
|
-
attachments: Optional[List[attachment.Attachment]] = None
|
|
55
33
|
|
|
56
34
|
def create_child_span_data(
|
|
57
35
|
self,
|
|
@@ -95,69 +73,6 @@ class SpanData:
|
|
|
95
73
|
attachments=attachments,
|
|
96
74
|
)
|
|
97
75
|
|
|
98
|
-
def update(self, **new_data: Any) -> "SpanData":
|
|
99
|
-
"""
|
|
100
|
-
Updates the attributes of the object with the provided key-value pairs. This method checks if
|
|
101
|
-
an attribute exists before updating it and merges the data appropriately for specific
|
|
102
|
-
keywords like metadata, output, input, attachments, and tags. If a key doesn't correspond
|
|
103
|
-
to an attribute of the object or the provided value is None, the update is skipped.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
**new_data: Key-value pairs of attributes to update. Keys should match existing
|
|
107
|
-
attributes on the object, and values that are None will not update.
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
SpanData: The updated object instance.
|
|
111
|
-
"""
|
|
112
|
-
for key, value in new_data.items():
|
|
113
|
-
if value is None:
|
|
114
|
-
continue
|
|
115
|
-
|
|
116
|
-
if key not in self.__dict__ and key != "prompts":
|
|
117
|
-
LOGGER.debug(
|
|
118
|
-
"An attempt to update span with parameter name it doesn't have: %s",
|
|
119
|
-
key,
|
|
120
|
-
)
|
|
121
|
-
continue
|
|
122
|
-
|
|
123
|
-
if key == "metadata":
|
|
124
|
-
self.metadata = data_helpers.merge_metadata(
|
|
125
|
-
self.metadata, new_metadata=value
|
|
126
|
-
)
|
|
127
|
-
continue
|
|
128
|
-
elif key == "output":
|
|
129
|
-
self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
|
|
130
|
-
continue
|
|
131
|
-
elif key == "input":
|
|
132
|
-
self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
|
|
133
|
-
continue
|
|
134
|
-
elif key == "attachments":
|
|
135
|
-
self._update_attachments(value)
|
|
136
|
-
continue
|
|
137
|
-
elif key == "tags":
|
|
138
|
-
self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
|
|
139
|
-
continue
|
|
140
|
-
elif key == "prompts":
|
|
141
|
-
self.metadata = data_helpers.merge_metadata(
|
|
142
|
-
self.metadata, new_metadata=new_data.get("metadata"), prompts=value
|
|
143
|
-
)
|
|
144
|
-
continue
|
|
145
|
-
|
|
146
|
-
self.__dict__[key] = value
|
|
147
|
-
|
|
148
|
-
return self
|
|
149
|
-
|
|
150
|
-
def init_end_time(self) -> "SpanData":
|
|
151
|
-
self.end_time = datetime_helpers.local_timestamp()
|
|
152
|
-
|
|
153
|
-
return self
|
|
154
|
-
|
|
155
|
-
def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
|
|
156
|
-
if self.attachments is None:
|
|
157
|
-
self.attachments = attachments
|
|
158
|
-
else:
|
|
159
|
-
self.attachments.extend(attachments)
|
|
160
|
-
|
|
161
76
|
@property
|
|
162
77
|
def as_start_parameters(self) -> Dict[str, Any]:
|
|
163
78
|
"""Returns parameters of this span to be sent to the server when starting a new span."""
|
|
@@ -3,7 +3,7 @@ from typing import List, Optional
|
|
|
3
3
|
|
|
4
4
|
import opik
|
|
5
5
|
from opik.rest_api import TraceThread
|
|
6
|
-
from opik.types import
|
|
6
|
+
from opik.types import BatchFeedbackScoreDict
|
|
7
7
|
|
|
8
8
|
from .. import helpers, rest_stream_parser, constants
|
|
9
9
|
from ... import config
|
|
@@ -74,7 +74,7 @@ class ThreadsClient:
|
|
|
74
74
|
- `start_time`, `end_time`: =, >, <, >=, <=
|
|
75
75
|
- `input`, `output`: =, contains, not_contains
|
|
76
76
|
- `metadata`: =, contains, >, <
|
|
77
|
-
- `feedback_scores`: =, >, <, >=,
|
|
77
|
+
- `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
|
|
78
78
|
- `tags`: contains (only)
|
|
79
79
|
- `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
|
|
80
80
|
|
|
@@ -84,6 +84,8 @@ class ThreadsClient:
|
|
|
84
84
|
- `duration > 300` - Filter by thread duration (seconds)
|
|
85
85
|
- `number_of_messages >= 5` - Filter by message count
|
|
86
86
|
- `feedback_scores.user_frustration > 0.5` - Filter by feedback score
|
|
87
|
+
- `feedback_scores.my_metric is_empty` - Filter threads with empty feedback score
|
|
88
|
+
- `feedback_scores.my_metric is_not_empty` - Filter threads with non-empty feedback score
|
|
87
89
|
- `tags contains "important"` - Filter by tag
|
|
88
90
|
|
|
89
91
|
If not provided, all threads in the project will be returned up to the limit.
|
|
@@ -127,7 +129,7 @@ class ThreadsClient:
|
|
|
127
129
|
return threads
|
|
128
130
|
|
|
129
131
|
def log_threads_feedback_scores(
|
|
130
|
-
self, scores: List[
|
|
132
|
+
self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
|
|
131
133
|
) -> None:
|
|
132
134
|
"""
|
|
133
135
|
Logs feedback scores for threads in a specific project. This method processes the given
|
|
@@ -138,7 +140,8 @@ class ThreadsClient:
|
|
|
138
140
|
scores: A list of dictionaries containing feedback scores
|
|
139
141
|
for threads to be logged. Specifying a thread id via `id` key for each score is mandatory.
|
|
140
142
|
project_name: The name of the project to associate with the logged
|
|
141
|
-
scores. If not provided, the
|
|
143
|
+
scores. If not provided, the project name configured in the Opik client will be used.
|
|
144
|
+
This parameter is used as a fallback if `project_name` is not specified in the score dictionary.
|
|
142
145
|
"""
|
|
143
146
|
project_name = project_name or self._opik_client.project_name
|
|
144
147
|
|