opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +42 -0
- opik/rest_api/datasets/client.py +321 -123
- opik/rest_api/datasets/raw_client.py +470 -145
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +50 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset.py +2 -0
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_public.py +2 -0
- opik/rest_api/types/dataset_version_public.py +10 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SpanUserDefinedMetricPythonCodePublic(UniversalBaseModel):
|
|
10
|
+
metric: str
|
|
11
|
+
arguments: typing.Dict[str, str]
|
|
12
|
+
|
|
13
|
+
if IS_PYDANTIC_V2:
|
|
14
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
15
|
+
else:
|
|
16
|
+
|
|
17
|
+
class Config:
|
|
18
|
+
frozen = True
|
|
19
|
+
smart_union = True
|
|
20
|
+
extra = pydantic.Extra.allow
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SpanUserDefinedMetricPythonCodeWrite(UniversalBaseModel):
|
|
10
|
+
metric: str
|
|
11
|
+
arguments: typing.Dict[str, str]
|
|
12
|
+
|
|
13
|
+
if IS_PYDANTIC_V2:
|
|
14
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
15
|
+
else:
|
|
16
|
+
|
|
17
|
+
class Config:
|
|
18
|
+
frozen = True
|
|
19
|
+
smart_union = True
|
|
20
|
+
extra = pydantic.Extra.allow
|
opik/types.py
CHANGED
|
@@ -2,6 +2,7 @@ import enum
|
|
|
2
2
|
import sys
|
|
3
3
|
from typing import Literal, Optional
|
|
4
4
|
|
|
5
|
+
from pydantic import StrictStr
|
|
5
6
|
from typing_extensions import TypedDict
|
|
6
7
|
|
|
7
8
|
if sys.version_info < (3, 11):
|
|
@@ -79,6 +80,41 @@ class FeedbackScoreDict(TypedDict):
|
|
|
79
80
|
"""An optional explanation or justification for the given score."""
|
|
80
81
|
|
|
81
82
|
|
|
83
|
+
class BatchFeedbackScoreDict(TypedDict):
|
|
84
|
+
"""
|
|
85
|
+
A TypedDict representing a feedback score for batch operations.
|
|
86
|
+
|
|
87
|
+
This class defines the structure for feedback scores used in batch logging
|
|
88
|
+
operations, with a required id field and optional per-score project_name.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
id: Required[str]
|
|
92
|
+
"""
|
|
93
|
+
A unique identifier for the object this score should be assigned to.
|
|
94
|
+
Refers to either the trace_id, span_id or thread_id depending on how the score is logged.
|
|
95
|
+
Required for batch operations.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
name: Required[str]
|
|
99
|
+
"""The name of the feedback metric or criterion."""
|
|
100
|
+
|
|
101
|
+
value: Required[float]
|
|
102
|
+
"""The numerical value of the feedback score."""
|
|
103
|
+
|
|
104
|
+
project_name: NotRequired[Optional[StrictStr]]
|
|
105
|
+
"""
|
|
106
|
+
The name of the project for this specific score.
|
|
107
|
+
If not provided, falls back to the project_name parameter in the method call,
|
|
108
|
+
or the default project name configured in the Opik instance.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
category_name: NotRequired[Optional[str]]
|
|
112
|
+
"""An optional category name for the given score."""
|
|
113
|
+
|
|
114
|
+
reason: NotRequired[Optional[str]]
|
|
115
|
+
"""An optional explanation or justification for the given score."""
|
|
116
|
+
|
|
117
|
+
|
|
82
118
|
class ErrorInfoDict(TypedDict):
|
|
83
119
|
"""
|
|
84
120
|
A TypedDict representing the information about the error occurred.
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
from typing import Any, List, Optional
|
|
2
|
+
|
|
3
|
+
import opik.exceptions as exceptions
|
|
4
|
+
from . import validator, result
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ChatPromptMessagesValidator(validator.RaisableValidator):
|
|
8
|
+
"""
|
|
9
|
+
Validator for ChatPrompt messages list.
|
|
10
|
+
|
|
11
|
+
Validates that messages is a list of dicts with:
|
|
12
|
+
- "role" key with value "system", "user", or "assistant"
|
|
13
|
+
- "content" key with value either string or list of dicts
|
|
14
|
+
- If content is list of dicts, each dict must have "type" key
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
VALID_ROLES = {"system", "user", "assistant"}
|
|
18
|
+
URL_BASED_CONTENT_TYPES = {"image_url", "video_url", "audio_url"}
|
|
19
|
+
|
|
20
|
+
def __init__(self, messages: Any):
|
|
21
|
+
self.messages = messages
|
|
22
|
+
self.validation_result: Optional[result.ValidationResult] = None
|
|
23
|
+
|
|
24
|
+
def validate(self) -> result.ValidationResult:
|
|
25
|
+
failure_reasons: List[str] = []
|
|
26
|
+
|
|
27
|
+
# Validate messages is a list
|
|
28
|
+
if not self._validate_messages_is_list(failure_reasons):
|
|
29
|
+
self.validation_result = result.ValidationResult(
|
|
30
|
+
failed=True, failure_reasons=failure_reasons
|
|
31
|
+
)
|
|
32
|
+
return self.validation_result
|
|
33
|
+
|
|
34
|
+
# Validate each message in the list
|
|
35
|
+
for idx, message in enumerate(self.messages):
|
|
36
|
+
prefix = f"messages[{idx}]"
|
|
37
|
+
self._validate_message(prefix, message, failure_reasons)
|
|
38
|
+
|
|
39
|
+
# Create validation result
|
|
40
|
+
if len(failure_reasons) > 0:
|
|
41
|
+
self.validation_result = result.ValidationResult(
|
|
42
|
+
failed=True, failure_reasons=failure_reasons
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
self.validation_result = result.ValidationResult(failed=False)
|
|
46
|
+
|
|
47
|
+
return self.validation_result
|
|
48
|
+
|
|
49
|
+
def _validate_messages_is_list(self, failure_reasons: List[str]) -> bool:
|
|
50
|
+
"""Validate that messages is a list. Returns False if validation fails."""
|
|
51
|
+
if not isinstance(self.messages, list):
|
|
52
|
+
msg = (
|
|
53
|
+
f"messages must be a list but {type(self.messages).__name__} was given"
|
|
54
|
+
)
|
|
55
|
+
failure_reasons.append(msg)
|
|
56
|
+
return False
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
def _validate_message(
|
|
60
|
+
self, prefix: str, message: Any, failure_reasons: List[str]
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Validate a single message structure, role, and content."""
|
|
63
|
+
if not self._validate_message_structure(prefix, message, failure_reasons):
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
self._validate_role(prefix, message, failure_reasons)
|
|
67
|
+
self._validate_content(prefix, message, failure_reasons)
|
|
68
|
+
|
|
69
|
+
def _validate_message_structure(
|
|
70
|
+
self, prefix: str, message: Any, failure_reasons: List[str]
|
|
71
|
+
) -> bool:
|
|
72
|
+
"""Validate that message is a dict with exactly 'role' and 'content' keys. Returns False if validation fails."""
|
|
73
|
+
# Validate message is a dict
|
|
74
|
+
if not isinstance(message, dict):
|
|
75
|
+
msg = f"{prefix}: must be a dict but {type(message).__name__} was given"
|
|
76
|
+
failure_reasons.append(msg)
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
# Validate message has exactly "role" and "content" keys
|
|
80
|
+
message_keys = set(message.keys())
|
|
81
|
+
expected_keys = {"role", "content"}
|
|
82
|
+
|
|
83
|
+
if message_keys != expected_keys:
|
|
84
|
+
if not message_keys.issubset(expected_keys):
|
|
85
|
+
missing_keys = expected_keys - message_keys
|
|
86
|
+
msg = f"{prefix}: missing required keys: {sorted(missing_keys)}"
|
|
87
|
+
failure_reasons.append(msg)
|
|
88
|
+
if not expected_keys.issubset(message_keys):
|
|
89
|
+
extra_keys = message_keys - expected_keys
|
|
90
|
+
msg = (
|
|
91
|
+
f"{prefix}: unexpected keys: {sorted(extra_keys)}. "
|
|
92
|
+
f"Expected only: {sorted(expected_keys)}"
|
|
93
|
+
)
|
|
94
|
+
failure_reasons.append(msg)
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
def _validate_role(
|
|
100
|
+
self, prefix: str, message: dict, failure_reasons: List[str]
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Validate the role field of a message."""
|
|
103
|
+
role = message.get("role")
|
|
104
|
+
if role not in self.VALID_ROLES:
|
|
105
|
+
valid_roles_str = ", ".join([f"'{r}'" for r in sorted(self.VALID_ROLES)])
|
|
106
|
+
msg = (
|
|
107
|
+
f"{prefix}.role: must be one of [{valid_roles_str}] "
|
|
108
|
+
f"but {repr(role)} was given"
|
|
109
|
+
)
|
|
110
|
+
failure_reasons.append(msg)
|
|
111
|
+
|
|
112
|
+
def _validate_content(
|
|
113
|
+
self, prefix: str, message: dict, failure_reasons: List[str]
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Validate the content field of a message."""
|
|
116
|
+
content = message.get("content")
|
|
117
|
+
if content is None:
|
|
118
|
+
msg = f"{prefix}.content: must not be None"
|
|
119
|
+
failure_reasons.append(msg)
|
|
120
|
+
elif not isinstance(content, (str, list)):
|
|
121
|
+
msg = (
|
|
122
|
+
f"{prefix}.content: must be either str or list of dicts "
|
|
123
|
+
f"but {type(content).__name__} was given"
|
|
124
|
+
)
|
|
125
|
+
failure_reasons.append(msg)
|
|
126
|
+
elif isinstance(content, list):
|
|
127
|
+
self._validate_content_list(prefix, content, failure_reasons)
|
|
128
|
+
|
|
129
|
+
def _validate_content_list(
|
|
130
|
+
self, prefix: str, content: list, failure_reasons: List[str]
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Validate content when it is a list of content parts."""
|
|
133
|
+
for content_idx, content_part in enumerate(content):
|
|
134
|
+
content_prefix = f"{prefix}.content[{content_idx}]"
|
|
135
|
+
self._validate_content_part(content_prefix, content_part, failure_reasons)
|
|
136
|
+
|
|
137
|
+
def _validate_content_part(
|
|
138
|
+
self, content_prefix: str, content_part: Any, failure_reasons: List[str]
|
|
139
|
+
) -> None:
|
|
140
|
+
"""Validate a single content part in the content list."""
|
|
141
|
+
if not isinstance(content_part, dict):
|
|
142
|
+
msg = (
|
|
143
|
+
f"{content_prefix}: must be a dict "
|
|
144
|
+
f"but {type(content_part).__name__} was given"
|
|
145
|
+
)
|
|
146
|
+
failure_reasons.append(msg)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
if "type" not in content_part:
|
|
150
|
+
msg = f"{content_prefix}: must have 'type' key"
|
|
151
|
+
failure_reasons.append(msg)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Validate type-specific requirements
|
|
155
|
+
content_type = content_part.get("type")
|
|
156
|
+
self._validate_content_type_specific(
|
|
157
|
+
content_prefix, content_type, content_part, failure_reasons
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
def _validate_content_type_specific(
|
|
161
|
+
self,
|
|
162
|
+
content_prefix: str,
|
|
163
|
+
content_type: Any,
|
|
164
|
+
content_part: dict,
|
|
165
|
+
failure_reasons: List[str],
|
|
166
|
+
) -> None:
|
|
167
|
+
"""Validate type-specific requirements for content parts."""
|
|
168
|
+
if content_type in self.URL_BASED_CONTENT_TYPES:
|
|
169
|
+
self._validate_required_url_object(
|
|
170
|
+
content_prefix,
|
|
171
|
+
content_part,
|
|
172
|
+
content_type,
|
|
173
|
+
content_type,
|
|
174
|
+
failure_reasons,
|
|
175
|
+
)
|
|
176
|
+
elif content_type == "text":
|
|
177
|
+
self._validate_required_string_key(
|
|
178
|
+
content_prefix, content_part, "text", "text", failure_reasons
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def _validate_required_string_key(
|
|
182
|
+
self,
|
|
183
|
+
prefix: str,
|
|
184
|
+
content_part: dict,
|
|
185
|
+
key_name: str,
|
|
186
|
+
type_name: str,
|
|
187
|
+
failure_reasons: List[str],
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Validate that a required key exists and is a string."""
|
|
190
|
+
if key_name not in content_part:
|
|
191
|
+
msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
|
|
192
|
+
failure_reasons.append(msg)
|
|
193
|
+
elif not isinstance(content_part.get(key_name), str):
|
|
194
|
+
msg = (
|
|
195
|
+
f"{prefix}.{key_name}: must be a string "
|
|
196
|
+
f"but {type(content_part.get(key_name)).__name__} was given"
|
|
197
|
+
)
|
|
198
|
+
failure_reasons.append(msg)
|
|
199
|
+
|
|
200
|
+
def _validate_required_url_object(
|
|
201
|
+
self,
|
|
202
|
+
prefix: str,
|
|
203
|
+
content_part: dict,
|
|
204
|
+
key_name: str,
|
|
205
|
+
type_name: str,
|
|
206
|
+
failure_reasons: List[str],
|
|
207
|
+
) -> None:
|
|
208
|
+
"""Validate that a required key exists and is a dict with a 'url' key that is a string."""
|
|
209
|
+
if key_name not in content_part:
|
|
210
|
+
msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
|
|
211
|
+
failure_reasons.append(msg)
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
url_object = content_part.get(key_name)
|
|
215
|
+
if not isinstance(url_object, dict):
|
|
216
|
+
msg = (
|
|
217
|
+
f"{prefix}.{key_name}: must be a dict "
|
|
218
|
+
f"but {type(url_object).__name__} was given"
|
|
219
|
+
)
|
|
220
|
+
failure_reasons.append(msg)
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
if "url" not in url_object:
|
|
224
|
+
msg = f"{prefix}.{key_name}: must have 'url' key"
|
|
225
|
+
failure_reasons.append(msg)
|
|
226
|
+
elif not isinstance(url_object.get("url"), str):
|
|
227
|
+
msg = (
|
|
228
|
+
f"{prefix}.{key_name}.url: must be a string "
|
|
229
|
+
f"but {type(url_object.get('url')).__name__} was given"
|
|
230
|
+
)
|
|
231
|
+
failure_reasons.append(msg)
|
|
232
|
+
|
|
233
|
+
def raise_if_validation_failed(self) -> None:
|
|
234
|
+
if (
|
|
235
|
+
self.validation_result is not None
|
|
236
|
+
and len(self.validation_result.failure_reasons) > 0
|
|
237
|
+
):
|
|
238
|
+
raise exceptions.ValidationError(
|
|
239
|
+
prefix="ChatPrompt.__init__",
|
|
240
|
+
failure_reasons=self.validation_result.failure_reasons,
|
|
241
|
+
)
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import pydantic
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
|
-
from ..types import
|
|
4
|
+
from ..types import BatchFeedbackScoreDict
|
|
5
5
|
from . import validator, result
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class PydanticWrapper(pydantic.BaseModel):
|
|
9
9
|
model_config = pydantic.ConfigDict(extra="forbid")
|
|
10
|
-
feedback_score:
|
|
10
|
+
feedback_score: BatchFeedbackScoreDict
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str]}"
|
|
13
|
+
EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str], 'project_name': NotRequired[str]}"
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class FeedbackScoreValidator(validator.Validator):
|
opik/validation/validator.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import abc
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
from . import result
|
|
4
5
|
|
|
@@ -7,3 +8,30 @@ class Validator(abc.ABC):
|
|
|
7
8
|
@abc.abstractmethod
|
|
8
9
|
def validate(self) -> result.ValidationResult:
|
|
9
10
|
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RaisableValidator(Validator):
|
|
14
|
+
"""
|
|
15
|
+
Abstract validator class that extends Validator and adds raise_if_validation_failed method.
|
|
16
|
+
|
|
17
|
+
This is used for validators that need to raise ValidationError exceptions
|
|
18
|
+
when validation fails, typically used in class initialization.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Initialize the validator.
|
|
24
|
+
|
|
25
|
+
Subclasses can override this method with their own initialization signature.
|
|
26
|
+
"""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def raise_if_validation_failed(self) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Raise a ValidationError if validation failed.
|
|
33
|
+
|
|
34
|
+
This method should check the validation result and raise an appropriate
|
|
35
|
+
ValidationError exception if validation failed.
|
|
36
|
+
"""
|
|
37
|
+
pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: opik
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.86
|
|
4
4
|
Summary: Comet tool for logging and evaluating LLM traces
|
|
5
5
|
Home-page: https://www.comet.com
|
|
6
6
|
Author: Comet ML Inc.
|
|
@@ -14,13 +14,12 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
14
14
|
Classifier: Natural Language :: English
|
|
15
15
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
-
Requires-Python: >=3.
|
|
22
|
+
Requires-Python: >=3.10
|
|
24
23
|
Description-Content-Type: text/markdown
|
|
25
24
|
License-File: LICENSE
|
|
26
25
|
Requires-Dist: boto3-stubs[bedrock-runtime]>=1.34.110
|
|
@@ -70,9 +69,9 @@ Dynamic: summary
|
|
|
70
69
|
Opik
|
|
71
70
|
</div>
|
|
72
71
|
</h1>
|
|
73
|
-
<h2 align="center" style="border-bottom: none">Open-source
|
|
72
|
+
<h2 align="center" style="border-bottom: none">Open-source AI Observability, Evaluation, and Optimization</h2>
|
|
74
73
|
<p align="center">
|
|
75
|
-
Opik helps you build,
|
|
74
|
+
Opik helps you build, test, and optimize generative AI application that run better, from prototype to production. From RAG chatbots to code assistants to complex agentic systems, Opik provides comprehensive tracing, evaluation, and automatic prompt and tool optimization to take the guesswork out of AI development.
|
|
76
75
|
</p>
|
|
77
76
|
|
|
78
77
|
<div align="center">
|
|
@@ -215,7 +214,7 @@ For production or larger-scale self-hosted deployments, Opik can be installed on
|
|
|
215
214
|
|
|
216
215
|
## 💻 Opik Client SDK
|
|
217
216
|
|
|
218
|
-
Opik provides a suite of client libraries and a REST API to interact with the Opik server. This includes SDKs for Python, TypeScript, and Ruby (via OpenTelemetry), allowing for seamless integration into your workflows. For detailed API and SDK references, see the [Opik Client Reference Documentation](
|
|
217
|
+
Opik provides a suite of client libraries and a REST API to interact with the Opik server. This includes SDKs for Python, TypeScript, and Ruby (via OpenTelemetry), allowing for seamless integration into your workflows. For detailed API and SDK references, see the [Opik Client Reference Documentation](https://www.comet.com/docs/opik/reference/overview?from=llm&utm_source=opik&utm_medium=github&utm_content=reference_link&utm_campaign=opik).
|
|
219
218
|
|
|
220
219
|
### Python SDK Quick Start
|
|
221
220
|
|
|
@@ -238,7 +237,7 @@ opik configure
|
|
|
238
237
|
```
|
|
239
238
|
|
|
240
239
|
> [!TIP]
|
|
241
|
-
> You can also call `opik.configure(use_local=True)` from your Python code to configure the SDK to run on a local self-hosted installation, or provide API key and workspace details directly for Comet.com. Refer to the [Python SDK documentation](
|
|
240
|
+
> You can also call `opik.configure(use_local=True)` from your Python code to configure the SDK to run on a local self-hosted installation, or provide API key and workspace details directly for Comet.com. Refer to the [Python SDK documentation](https://www.comet.com/docs/opik/python-sdk-reference/?from=llm&utm_source=opik&utm_medium=github&utm_content=python_sdk_docs_link&utm_campaign=opik) for more configuration options.
|
|
242
241
|
|
|
243
242
|
You are now ready to start logging traces using the [Python SDK](https://www.comet.com/docs/opik/python-sdk-reference/?from=llm&utm_source=opik&utm_medium=github&utm_content=sdk_link2&utm_campaign=opik).
|
|
244
243
|
|
|
@@ -272,6 +271,7 @@ The easiest way to log traces is to use one of our direct integrations. Opik sup
|
|
|
272
271
|
| Groq | Log traces for Groq LLM calls | [Documentation](https://www.comet.com/docs/opik/integrations/groq?utm_source=opik&utm_medium=github&utm_content=groq_link&utm_campaign=opik) |
|
|
273
272
|
| Guardrails | Log traces for Guardrails AI validations | [Documentation](https://www.comet.com/docs/opik/integrations/guardrails-ai?utm_source=opik&utm_medium=github&utm_content=guardrails_link&utm_campaign=opik) |
|
|
274
273
|
| Haystack | Log traces for Haystack calls | [Documentation](https://www.comet.com/docs/opik/integrations/haystack?utm_source=opik&utm_medium=github&utm_content=haystack_link&utm_campaign=opik) |
|
|
274
|
+
| Harbor | Log traces for Harbor benchmark evaluation trials | [Documentation](https://www.comet.com/docs/opik/integrations/harbor?utm_source=opik&utm_medium=github&utm_content=harbor_link&utm_campaign=opik) |
|
|
275
275
|
| Instructor | Log traces for LLM calls made with Instructor | [Documentation](https://www.comet.com/docs/opik/integrations/instructor?utm_source=opik&utm_medium=github&utm_content=instructor_link&utm_campaign=opik) |
|
|
276
276
|
| LangChain (Python) | Log traces for LangChain LLM calls | [Documentation](https://www.comet.com/docs/opik/integrations/langchain?utm_source=opik&utm_medium=github&utm_content=langchain_link&utm_campaign=opik) |
|
|
277
277
|
| LangChain (JS/TS) | Log traces for LangChain JavaScript/TypeScript calls | [Documentation](https://www.comet.com/docs/opik/integrations/langchainjs?utm_source=opik&utm_medium=github&utm_content=langchainjs_link&utm_campaign=opik) |
|