opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +38 -0
- opik/rest_api/datasets/client.py +249 -148
- opik/rest_api/datasets/raw_client.py +356 -217
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +46 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_version_public.py +5 -0
- opik/rest_api/types/dataset_version_summary.py +5 -0
- opik/rest_api/types/dataset_version_summary_public.py +5 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import abc
|
|
|
2
2
|
import logging
|
|
3
3
|
from typing import List, Optional, TypeVar, Type
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from .. import messages
|
|
6
6
|
|
|
7
7
|
import opik.exceptions
|
|
8
8
|
|
|
@@ -76,3 +76,17 @@ class ChainedMessageProcessor(BaseMessageProcessor):
|
|
|
76
76
|
if isinstance(processor, processor_type):
|
|
77
77
|
return processor
|
|
78
78
|
return None
|
|
79
|
+
|
|
80
|
+
def add_first(self, processor: BaseMessageProcessor) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Inserts a processor at the first position in the list of processors.
|
|
83
|
+
|
|
84
|
+
This method allows prioritizing a given processor by placing it
|
|
85
|
+
at the beginning of the internal processor list. As a result, the
|
|
86
|
+
provided processor will be executed before others.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
processor: The message processor to be
|
|
90
|
+
added to the front of the processor list.
|
|
91
|
+
"""
|
|
92
|
+
self._processors.insert(0, processor)
|
opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py}
RENAMED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
|
+
from opik.rest_api import client as rest_api_client
|
|
5
|
+
|
|
4
6
|
from . import (
|
|
5
7
|
message_processors,
|
|
6
8
|
online_message_processor,
|
|
7
9
|
)
|
|
8
|
-
from
|
|
9
|
-
from ..rest_api import client as rest_api_client
|
|
10
|
+
from ..emulation import local_emulator_message_processor
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
LOGGER = logging.getLogger(__name__)
|
opik/message_processing/{online_message_processor.py → processors/online_message_processor.py}
RENAMED
|
@@ -4,21 +4,18 @@ from typing import Callable, Dict, Type, Any
|
|
|
4
4
|
import pydantic
|
|
5
5
|
import tenacity
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
dict_utils,
|
|
12
|
-
)
|
|
13
|
-
from ..rate_limit import rate_limit
|
|
14
|
-
from ..rest_api import client as rest_api_client, core as rest_api_core
|
|
15
|
-
from ..rest_api.types import (
|
|
7
|
+
from opik import dict_utils, exceptions, logging_messages
|
|
8
|
+
from opik.rate_limit import rate_limit
|
|
9
|
+
from opik.rest_api import client as rest_api_client, core as rest_api_core
|
|
10
|
+
from opik.rest_api.types import (
|
|
16
11
|
feedback_score_batch_item,
|
|
17
12
|
feedback_score_batch_item_thread,
|
|
18
13
|
guardrail,
|
|
19
14
|
experiment_item,
|
|
20
15
|
)
|
|
21
16
|
|
|
17
|
+
from . import message_processors
|
|
18
|
+
from .. import encoder_helpers, messages
|
|
22
19
|
|
|
23
20
|
LOGGER = logging.getLogger(__name__)
|
|
24
21
|
|
|
@@ -49,6 +46,7 @@ class OpikMessageProcessor(message_processors.BaseMessageProcessor):
|
|
|
49
46
|
messages.CreateTraceBatchMessage: self._process_create_traces_batch_message, # type: ignore
|
|
50
47
|
messages.GuardrailBatchMessage: self._process_guardrail_batch_message, # type: ignore
|
|
51
48
|
messages.CreateExperimentItemsBatchMessage: self._process_create_experiment_items_batch_message, # type: ignore
|
|
49
|
+
messages.AttachmentSupportingMessage: self._noop_handler, # type: ignore
|
|
52
50
|
}
|
|
53
51
|
|
|
54
52
|
def is_active(self) -> bool:
|
|
@@ -304,6 +302,10 @@ class OpikMessageProcessor(message_processors.BaseMessageProcessor):
|
|
|
304
302
|
"Sent experiment items batch of size %d", len(experiment_items_batch)
|
|
305
303
|
)
|
|
306
304
|
|
|
305
|
+
def _noop_handler(self, message: messages.BaseMessage) -> None:
|
|
306
|
+
# just ignore the message
|
|
307
|
+
pass
|
|
308
|
+
|
|
307
309
|
|
|
308
310
|
def _generate_error_tracking_extra(
|
|
309
311
|
exception: Exception, message: messages.BaseMessage
|
|
@@ -4,7 +4,8 @@ import time
|
|
|
4
4
|
from queue import Empty
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
-
from . import
|
|
7
|
+
from . import message_queue, messages
|
|
8
|
+
from .processors import message_processors
|
|
8
9
|
from .. import exceptions, _logging
|
|
9
10
|
|
|
10
11
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -49,7 +50,8 @@ class QueueConsumer(threading.Thread):
|
|
|
49
50
|
|
|
50
51
|
if message is None:
|
|
51
52
|
return
|
|
52
|
-
|
|
53
|
+
|
|
54
|
+
if message.delivery_time <= now:
|
|
53
55
|
self._process_message(message)
|
|
54
56
|
else:
|
|
55
57
|
# put a message back to keep an order in the queue
|
|
@@ -4,10 +4,14 @@ import time
|
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
|
|
6
6
|
from . import messages, message_queue, queue_consumer
|
|
7
|
-
from .. import synchronization
|
|
8
|
-
from .batching import batch_manager
|
|
9
|
-
from ..file_upload import base_upload_manager
|
|
10
7
|
from .. import _logging
|
|
8
|
+
from .. import synchronization
|
|
9
|
+
from .preprocessing import (
|
|
10
|
+
attachments_preprocessor,
|
|
11
|
+
batching_preprocessor,
|
|
12
|
+
file_upload_preprocessor,
|
|
13
|
+
)
|
|
14
|
+
|
|
11
15
|
|
|
12
16
|
LOGGER = logging.getLogger(__name__)
|
|
13
17
|
|
|
@@ -17,52 +21,74 @@ class Streamer:
|
|
|
17
21
|
self,
|
|
18
22
|
queue: message_queue.MessageQueue[messages.BaseMessage],
|
|
19
23
|
queue_consumers: List[queue_consumer.QueueConsumer],
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
attachments_preprocessor: attachments_preprocessor.AttachmentsPreprocessor,
|
|
25
|
+
batch_preprocessor: batching_preprocessor.BatchingPreprocessor,
|
|
26
|
+
upload_preprocessor: file_upload_preprocessor.FileUploadPreprocessor,
|
|
22
27
|
) -> None:
|
|
23
28
|
self._lock = threading.RLock()
|
|
24
29
|
self._message_queue = queue
|
|
25
30
|
self._queue_consumers = queue_consumers
|
|
26
|
-
self.
|
|
27
|
-
self.
|
|
31
|
+
self._attachments_preprocessor = attachments_preprocessor
|
|
32
|
+
self._batch_preprocessor = batch_preprocessor
|
|
33
|
+
self._upload_preprocessor = upload_preprocessor
|
|
28
34
|
|
|
29
35
|
self._drain = False
|
|
30
36
|
|
|
31
|
-
self.
|
|
37
|
+
self._idle = True
|
|
32
38
|
|
|
33
|
-
|
|
34
|
-
|
|
39
|
+
self._start_queue_consumers()
|
|
40
|
+
self._batch_preprocessor.start()
|
|
35
41
|
|
|
36
42
|
def put(self, message: messages.BaseMessage) -> None:
|
|
37
43
|
with self._lock:
|
|
38
44
|
if self._drain:
|
|
39
45
|
return
|
|
40
46
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
47
|
+
self._idle = False
|
|
48
|
+
try:
|
|
49
|
+
# do embedded attachments pre-processing first (MUST ALWAYS BE DONE FIRST)
|
|
50
|
+
preprocessed_message = self._attachments_preprocessor.preprocess(
|
|
51
|
+
message
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# do file uploads pre-processing second
|
|
55
|
+
preprocessed_message = self._upload_preprocessor.preprocess(
|
|
56
|
+
preprocessed_message
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# do batching pre-processing third
|
|
60
|
+
preprocessed_message = self._batch_preprocessor.preprocess(
|
|
61
|
+
preprocessed_message
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# work with resulting message if not fully consumed by preprocessors
|
|
65
|
+
if preprocessed_message is not None:
|
|
66
|
+
if self._message_queue.accept_put_without_discarding() is False:
|
|
67
|
+
_logging.log_once_at_level(
|
|
68
|
+
logging.WARNING,
|
|
69
|
+
"The message queue size limit has been reached. The new message has been added to the queue, and the oldest message has been discarded.",
|
|
70
|
+
logger=LOGGER,
|
|
71
|
+
)
|
|
72
|
+
self._message_queue.put(preprocessed_message)
|
|
73
|
+
except Exception as ex:
|
|
74
|
+
LOGGER.error(
|
|
75
|
+
"Failed to process message by streamer: %s", ex, exc_info=ex
|
|
76
|
+
)
|
|
77
|
+
self._idle = True
|
|
56
78
|
|
|
57
79
|
def close(self, timeout: Optional[int]) -> bool:
|
|
58
80
|
"""
|
|
59
|
-
Stops data
|
|
81
|
+
Stops data processing threads
|
|
60
82
|
"""
|
|
61
83
|
with self._lock:
|
|
84
|
+
synchronization.wait_for_done(
|
|
85
|
+
check_function=lambda: self._idle,
|
|
86
|
+
timeout=timeout,
|
|
87
|
+
sleep_time=0.1,
|
|
88
|
+
)
|
|
62
89
|
self._drain = True
|
|
63
90
|
|
|
64
|
-
|
|
65
|
-
self._batch_manager.stop() # stopping causes adding remaining batch messages to the queue
|
|
91
|
+
self._batch_preprocessor.stop() # stopping causes adding remaining batch messages to the queue
|
|
66
92
|
|
|
67
93
|
self.flush(timeout)
|
|
68
94
|
self._close_queue_consumers()
|
|
@@ -70,8 +96,17 @@ class Streamer:
|
|
|
70
96
|
return self._message_queue.empty()
|
|
71
97
|
|
|
72
98
|
def flush(self, timeout: Optional[float], upload_sleep_time: int = 5) -> bool:
|
|
73
|
-
|
|
74
|
-
|
|
99
|
+
# wait for current pending messages processing to be completed
|
|
100
|
+
# this should be done before flushing batch preprocessor because some
|
|
101
|
+
# batch messages may be added to the queue during processing
|
|
102
|
+
with self._lock:
|
|
103
|
+
synchronization.wait_for_done(
|
|
104
|
+
check_function=lambda: self._idle,
|
|
105
|
+
timeout=timeout,
|
|
106
|
+
sleep_time=0.1,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
self._batch_preprocessor.flush()
|
|
75
110
|
|
|
76
111
|
start_time = time.time()
|
|
77
112
|
|
|
@@ -88,17 +123,20 @@ class Streamer:
|
|
|
88
123
|
timeout = 1.0
|
|
89
124
|
|
|
90
125
|
# flushing upload manager is blocking operation
|
|
91
|
-
upload_flushed = self.
|
|
126
|
+
upload_flushed = self._upload_preprocessor.flush(
|
|
92
127
|
timeout=timeout, sleep_time=upload_sleep_time
|
|
93
128
|
)
|
|
94
129
|
|
|
95
|
-
|
|
130
|
+
flushed = upload_flushed and self._all_done()
|
|
131
|
+
LOGGER.debug(f"Streamer flushed completely: {flushed}")
|
|
132
|
+
|
|
133
|
+
return flushed
|
|
96
134
|
|
|
97
135
|
def _all_done(self) -> bool:
|
|
98
136
|
return (
|
|
99
137
|
self.workers_idling()
|
|
100
138
|
and self._message_queue.empty()
|
|
101
|
-
and
|
|
139
|
+
and self._batch_preprocessor.is_empty()
|
|
102
140
|
)
|
|
103
141
|
|
|
104
142
|
def workers_idling(self) -> bool:
|
|
@@ -5,12 +5,17 @@ import httpx
|
|
|
5
5
|
from . import (
|
|
6
6
|
queue_consumer,
|
|
7
7
|
messages,
|
|
8
|
-
message_processors,
|
|
9
8
|
message_queue,
|
|
10
9
|
streamer,
|
|
11
10
|
)
|
|
12
11
|
from .batching import batch_manager_constuctors
|
|
13
|
-
from
|
|
12
|
+
from .preprocessing import (
|
|
13
|
+
attachments_preprocessor,
|
|
14
|
+
batching_preprocessor,
|
|
15
|
+
file_upload_preprocessor,
|
|
16
|
+
)
|
|
17
|
+
from .processors import attachments_extraction_processor, message_processors
|
|
18
|
+
from ..file_upload import upload_manager
|
|
14
19
|
from ..rest_api import client as rest_api_client
|
|
15
20
|
|
|
16
21
|
|
|
@@ -18,10 +23,13 @@ def construct_online_streamer(
|
|
|
18
23
|
rest_client: rest_api_client.OpikApi,
|
|
19
24
|
httpx_client: httpx.Client,
|
|
20
25
|
use_batching: bool,
|
|
26
|
+
use_attachment_extraction: bool,
|
|
27
|
+
min_base64_embedded_attachment_size: int,
|
|
21
28
|
file_upload_worker_count: int,
|
|
22
29
|
n_consumers: int,
|
|
23
30
|
max_queue_size: int,
|
|
24
31
|
message_processor: message_processors.ChainedMessageProcessor,
|
|
32
|
+
url_override: str,
|
|
25
33
|
) -> streamer.Streamer:
|
|
26
34
|
file_uploader = upload_manager.FileUploadManager(
|
|
27
35
|
rest_client=rest_client,
|
|
@@ -29,20 +37,37 @@ def construct_online_streamer(
|
|
|
29
37
|
worker_count=file_upload_worker_count,
|
|
30
38
|
)
|
|
31
39
|
|
|
32
|
-
|
|
40
|
+
streamer = construct_streamer(
|
|
33
41
|
message_processor=message_processor,
|
|
34
|
-
|
|
42
|
+
upload_preprocessor=file_upload_preprocessor.FileUploadPreprocessor(
|
|
43
|
+
file_uploader
|
|
44
|
+
),
|
|
35
45
|
n_consumers=n_consumers,
|
|
36
46
|
use_batching=use_batching,
|
|
47
|
+
use_attachment_extraction=use_attachment_extraction,
|
|
37
48
|
max_queue_size=max_queue_size,
|
|
38
49
|
)
|
|
39
50
|
|
|
51
|
+
# add attachment extraction processor to the beginning of the processing chain
|
|
52
|
+
attachment_extraction = (
|
|
53
|
+
attachments_extraction_processor.AttachmentsExtractionProcessor(
|
|
54
|
+
messages_streamer=streamer,
|
|
55
|
+
min_attachment_size=min_base64_embedded_attachment_size,
|
|
56
|
+
url_override=url_override,
|
|
57
|
+
is_active=use_attachment_extraction,
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
message_processor.add_first(attachment_extraction)
|
|
61
|
+
|
|
62
|
+
return streamer
|
|
63
|
+
|
|
40
64
|
|
|
41
65
|
def construct_streamer(
|
|
42
|
-
message_processor: message_processors.
|
|
43
|
-
|
|
66
|
+
message_processor: message_processors.BaseMessageProcessor,
|
|
67
|
+
upload_preprocessor: file_upload_preprocessor.FileUploadPreprocessor,
|
|
44
68
|
n_consumers: int,
|
|
45
69
|
use_batching: bool,
|
|
70
|
+
use_attachment_extraction: bool,
|
|
46
71
|
max_queue_size: Optional[int],
|
|
47
72
|
) -> streamer.Streamer:
|
|
48
73
|
message_queue_: message_queue.MessageQueue[messages.BaseMessage] = (
|
|
@@ -67,8 +92,11 @@ def construct_streamer(
|
|
|
67
92
|
streamer_ = streamer.Streamer(
|
|
68
93
|
queue=message_queue_,
|
|
69
94
|
queue_consumers=queue_consumers,
|
|
70
|
-
|
|
71
|
-
|
|
95
|
+
batch_preprocessor=batching_preprocessor.BatchingPreprocessor(batch_manager),
|
|
96
|
+
upload_preprocessor=upload_preprocessor,
|
|
97
|
+
attachments_preprocessor=attachments_preprocessor.AttachmentsPreprocessor(
|
|
98
|
+
use_attachment_extraction
|
|
99
|
+
),
|
|
72
100
|
)
|
|
73
101
|
|
|
74
102
|
return streamer_
|
|
@@ -41,7 +41,7 @@ def run(client: opik_client.Opik, test_items: List[Item]) -> None:
|
|
|
41
41
|
except Exception:
|
|
42
42
|
dataset = client.create_dataset("tests")
|
|
43
43
|
|
|
44
|
-
dataset_items = dataset.
|
|
44
|
+
dataset_items = list(dataset.__internal_api__stream_items_as_dataclasses__())
|
|
45
45
|
dataset_item_id_finder = get_dataset_item_id_finder(
|
|
46
46
|
existing_dataset_items=dataset_items
|
|
47
47
|
)
|
opik/plugins/pytest/hooks.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import opik._logging as _logging
|
|
3
3
|
from typing import List, Any, Generator, TYPE_CHECKING
|
|
4
|
-
from opik.types import
|
|
4
|
+
from opik.types import BatchFeedbackScoreDict
|
|
5
5
|
|
|
6
6
|
from opik.api_objects import opik_client
|
|
7
7
|
from . import test_runs_storage, experiment_runner, summary
|
|
@@ -48,13 +48,15 @@ def pytest_sessionfinish(session: "pytest.Session", exitstatus: Any) -> None:
|
|
|
48
48
|
return
|
|
49
49
|
|
|
50
50
|
try:
|
|
51
|
-
traces_feedback_scores: List[
|
|
51
|
+
traces_feedback_scores: List[BatchFeedbackScoreDict] = []
|
|
52
52
|
|
|
53
53
|
for item in llm_test_items:
|
|
54
54
|
report: "pytest.TestReport" = item.report
|
|
55
55
|
trace_id = test_runs_storage.TEST_RUNS_TO_TRACE_DATA[item.nodeid].id
|
|
56
56
|
traces_feedback_scores.append(
|
|
57
|
-
|
|
57
|
+
BatchFeedbackScoreDict(
|
|
58
|
+
id=trace_id, name="Passed", value=float(report.passed)
|
|
59
|
+
)
|
|
58
60
|
)
|
|
59
61
|
|
|
60
62
|
client = opik_client.get_client_cached()
|
opik/rest_api/__init__.py
CHANGED
|
@@ -38,6 +38,9 @@ from .types import (
|
|
|
38
38
|
AssistantMessageRole,
|
|
39
39
|
Attachment,
|
|
40
40
|
AttachmentPage,
|
|
41
|
+
AudioUrl,
|
|
42
|
+
AudioUrlPublic,
|
|
43
|
+
AudioUrlWrite,
|
|
41
44
|
AuthDetailsHolder,
|
|
42
45
|
AutomationRuleEvaluator,
|
|
43
46
|
AutomationRuleEvaluatorLlmAsJudge,
|
|
@@ -46,6 +49,7 @@ from .types import (
|
|
|
46
49
|
AutomationRuleEvaluatorObjectObjectPublic,
|
|
47
50
|
AutomationRuleEvaluatorObjectObjectPublic_LlmAsJudge,
|
|
48
51
|
AutomationRuleEvaluatorObjectObjectPublic_SpanLlmAsJudge,
|
|
52
|
+
AutomationRuleEvaluatorObjectObjectPublic_SpanUserDefinedMetricPython,
|
|
49
53
|
AutomationRuleEvaluatorObjectObjectPublic_TraceThreadLlmAsJudge,
|
|
50
54
|
AutomationRuleEvaluatorObjectObjectPublic_TraceThreadUserDefinedMetricPython,
|
|
51
55
|
AutomationRuleEvaluatorObjectObjectPublic_UserDefinedMetricPython,
|
|
@@ -53,12 +57,16 @@ from .types import (
|
|
|
53
57
|
AutomationRuleEvaluatorPublic,
|
|
54
58
|
AutomationRuleEvaluatorPublic_LlmAsJudge,
|
|
55
59
|
AutomationRuleEvaluatorPublic_SpanLlmAsJudge,
|
|
60
|
+
AutomationRuleEvaluatorPublic_SpanUserDefinedMetricPython,
|
|
56
61
|
AutomationRuleEvaluatorPublic_TraceThreadLlmAsJudge,
|
|
57
62
|
AutomationRuleEvaluatorPublic_TraceThreadUserDefinedMetricPython,
|
|
58
63
|
AutomationRuleEvaluatorPublic_UserDefinedMetricPython,
|
|
59
64
|
AutomationRuleEvaluatorSpanLlmAsJudge,
|
|
60
65
|
AutomationRuleEvaluatorSpanLlmAsJudgePublic,
|
|
61
66
|
AutomationRuleEvaluatorSpanLlmAsJudgeWrite,
|
|
67
|
+
AutomationRuleEvaluatorSpanUserDefinedMetricPython,
|
|
68
|
+
AutomationRuleEvaluatorSpanUserDefinedMetricPythonPublic,
|
|
69
|
+
AutomationRuleEvaluatorSpanUserDefinedMetricPythonWrite,
|
|
62
70
|
AutomationRuleEvaluatorTraceThreadLlmAsJudge,
|
|
63
71
|
AutomationRuleEvaluatorTraceThreadLlmAsJudgePublic,
|
|
64
72
|
AutomationRuleEvaluatorTraceThreadLlmAsJudgeWrite,
|
|
@@ -68,11 +76,13 @@ from .types import (
|
|
|
68
76
|
AutomationRuleEvaluatorUpdate,
|
|
69
77
|
AutomationRuleEvaluatorUpdateLlmAsJudge,
|
|
70
78
|
AutomationRuleEvaluatorUpdateSpanLlmAsJudge,
|
|
79
|
+
AutomationRuleEvaluatorUpdateSpanUserDefinedMetricPython,
|
|
71
80
|
AutomationRuleEvaluatorUpdateTraceThreadLlmAsJudge,
|
|
72
81
|
AutomationRuleEvaluatorUpdateTraceThreadUserDefinedMetricPython,
|
|
73
82
|
AutomationRuleEvaluatorUpdateUserDefinedMetricPython,
|
|
74
83
|
AutomationRuleEvaluatorUpdate_LlmAsJudge,
|
|
75
84
|
AutomationRuleEvaluatorUpdate_SpanLlmAsJudge,
|
|
85
|
+
AutomationRuleEvaluatorUpdate_SpanUserDefinedMetricPython,
|
|
76
86
|
AutomationRuleEvaluatorUpdate_TraceThreadLlmAsJudge,
|
|
77
87
|
AutomationRuleEvaluatorUpdate_TraceThreadUserDefinedMetricPython,
|
|
78
88
|
AutomationRuleEvaluatorUpdate_UserDefinedMetricPython,
|
|
@@ -82,11 +92,13 @@ from .types import (
|
|
|
82
92
|
AutomationRuleEvaluatorWrite,
|
|
83
93
|
AutomationRuleEvaluatorWrite_LlmAsJudge,
|
|
84
94
|
AutomationRuleEvaluatorWrite_SpanLlmAsJudge,
|
|
95
|
+
AutomationRuleEvaluatorWrite_SpanUserDefinedMetricPython,
|
|
85
96
|
AutomationRuleEvaluatorWrite_TraceThreadLlmAsJudge,
|
|
86
97
|
AutomationRuleEvaluatorWrite_TraceThreadUserDefinedMetricPython,
|
|
87
98
|
AutomationRuleEvaluatorWrite_UserDefinedMetricPython,
|
|
88
99
|
AutomationRuleEvaluator_LlmAsJudge,
|
|
89
100
|
AutomationRuleEvaluator_SpanLlmAsJudge,
|
|
101
|
+
AutomationRuleEvaluator_SpanUserDefinedMetricPython,
|
|
90
102
|
AutomationRuleEvaluator_TraceThreadLlmAsJudge,
|
|
91
103
|
AutomationRuleEvaluator_TraceThreadUserDefinedMetricPython,
|
|
92
104
|
AutomationRuleEvaluator_UserDefinedMetricPython,
|
|
@@ -145,6 +157,7 @@ from .types import (
|
|
|
145
157
|
DatasetExpansionResponse,
|
|
146
158
|
DatasetItem,
|
|
147
159
|
DatasetItemBatch,
|
|
160
|
+
DatasetItemChangesPublic,
|
|
148
161
|
DatasetItemCompare,
|
|
149
162
|
DatasetItemCompareSource,
|
|
150
163
|
DatasetItemFilter,
|
|
@@ -335,6 +348,8 @@ from .types import (
|
|
|
335
348
|
ProjectPagePublic,
|
|
336
349
|
ProjectPublic,
|
|
337
350
|
ProjectPublicVisibility,
|
|
351
|
+
ProjectReference,
|
|
352
|
+
ProjectReferencePublic,
|
|
338
353
|
ProjectStatItemObjectPublic,
|
|
339
354
|
ProjectStatItemObjectPublic_Avg,
|
|
340
355
|
ProjectStatItemObjectPublic_Count,
|
|
@@ -365,6 +380,7 @@ from .types import (
|
|
|
365
380
|
PromptVersionPublicType,
|
|
366
381
|
PromptVersionTemplateStructure,
|
|
367
382
|
PromptVersionType,
|
|
383
|
+
PromptVersionUpdate,
|
|
368
384
|
ProviderApiKey,
|
|
369
385
|
ProviderApiKeyPagePublic,
|
|
370
386
|
ProviderApiKeyProvider,
|
|
@@ -396,6 +412,9 @@ from .types import (
|
|
|
396
412
|
SpanType,
|
|
397
413
|
SpanUpdate,
|
|
398
414
|
SpanUpdateType,
|
|
415
|
+
SpanUserDefinedMetricPythonCode,
|
|
416
|
+
SpanUserDefinedMetricPythonCodePublic,
|
|
417
|
+
SpanUserDefinedMetricPythonCodeWrite,
|
|
399
418
|
SpanWrite,
|
|
400
419
|
SpanWriteType,
|
|
401
420
|
SpansCountResponse,
|
|
@@ -587,6 +606,9 @@ __all__ = [
|
|
|
587
606
|
"Attachment",
|
|
588
607
|
"AttachmentListRequestEntityType",
|
|
589
608
|
"AttachmentPage",
|
|
609
|
+
"AudioUrl",
|
|
610
|
+
"AudioUrlPublic",
|
|
611
|
+
"AudioUrlWrite",
|
|
590
612
|
"AuthDetailsHolder",
|
|
591
613
|
"AutomationRuleEvaluator",
|
|
592
614
|
"AutomationRuleEvaluatorLlmAsJudge",
|
|
@@ -595,6 +617,7 @@ __all__ = [
|
|
|
595
617
|
"AutomationRuleEvaluatorObjectObjectPublic",
|
|
596
618
|
"AutomationRuleEvaluatorObjectObjectPublic_LlmAsJudge",
|
|
597
619
|
"AutomationRuleEvaluatorObjectObjectPublic_SpanLlmAsJudge",
|
|
620
|
+
"AutomationRuleEvaluatorObjectObjectPublic_SpanUserDefinedMetricPython",
|
|
598
621
|
"AutomationRuleEvaluatorObjectObjectPublic_TraceThreadLlmAsJudge",
|
|
599
622
|
"AutomationRuleEvaluatorObjectObjectPublic_TraceThreadUserDefinedMetricPython",
|
|
600
623
|
"AutomationRuleEvaluatorObjectObjectPublic_UserDefinedMetricPython",
|
|
@@ -602,12 +625,16 @@ __all__ = [
|
|
|
602
625
|
"AutomationRuleEvaluatorPublic",
|
|
603
626
|
"AutomationRuleEvaluatorPublic_LlmAsJudge",
|
|
604
627
|
"AutomationRuleEvaluatorPublic_SpanLlmAsJudge",
|
|
628
|
+
"AutomationRuleEvaluatorPublic_SpanUserDefinedMetricPython",
|
|
605
629
|
"AutomationRuleEvaluatorPublic_TraceThreadLlmAsJudge",
|
|
606
630
|
"AutomationRuleEvaluatorPublic_TraceThreadUserDefinedMetricPython",
|
|
607
631
|
"AutomationRuleEvaluatorPublic_UserDefinedMetricPython",
|
|
608
632
|
"AutomationRuleEvaluatorSpanLlmAsJudge",
|
|
609
633
|
"AutomationRuleEvaluatorSpanLlmAsJudgePublic",
|
|
610
634
|
"AutomationRuleEvaluatorSpanLlmAsJudgeWrite",
|
|
635
|
+
"AutomationRuleEvaluatorSpanUserDefinedMetricPython",
|
|
636
|
+
"AutomationRuleEvaluatorSpanUserDefinedMetricPythonPublic",
|
|
637
|
+
"AutomationRuleEvaluatorSpanUserDefinedMetricPythonWrite",
|
|
611
638
|
"AutomationRuleEvaluatorTraceThreadLlmAsJudge",
|
|
612
639
|
"AutomationRuleEvaluatorTraceThreadLlmAsJudgePublic",
|
|
613
640
|
"AutomationRuleEvaluatorTraceThreadLlmAsJudgeWrite",
|
|
@@ -617,11 +644,13 @@ __all__ = [
|
|
|
617
644
|
"AutomationRuleEvaluatorUpdate",
|
|
618
645
|
"AutomationRuleEvaluatorUpdateLlmAsJudge",
|
|
619
646
|
"AutomationRuleEvaluatorUpdateSpanLlmAsJudge",
|
|
647
|
+
"AutomationRuleEvaluatorUpdateSpanUserDefinedMetricPython",
|
|
620
648
|
"AutomationRuleEvaluatorUpdateTraceThreadLlmAsJudge",
|
|
621
649
|
"AutomationRuleEvaluatorUpdateTraceThreadUserDefinedMetricPython",
|
|
622
650
|
"AutomationRuleEvaluatorUpdateUserDefinedMetricPython",
|
|
623
651
|
"AutomationRuleEvaluatorUpdate_LlmAsJudge",
|
|
624
652
|
"AutomationRuleEvaluatorUpdate_SpanLlmAsJudge",
|
|
653
|
+
"AutomationRuleEvaluatorUpdate_SpanUserDefinedMetricPython",
|
|
625
654
|
"AutomationRuleEvaluatorUpdate_TraceThreadLlmAsJudge",
|
|
626
655
|
"AutomationRuleEvaluatorUpdate_TraceThreadUserDefinedMetricPython",
|
|
627
656
|
"AutomationRuleEvaluatorUpdate_UserDefinedMetricPython",
|
|
@@ -631,11 +660,13 @@ __all__ = [
|
|
|
631
660
|
"AutomationRuleEvaluatorWrite",
|
|
632
661
|
"AutomationRuleEvaluatorWrite_LlmAsJudge",
|
|
633
662
|
"AutomationRuleEvaluatorWrite_SpanLlmAsJudge",
|
|
663
|
+
"AutomationRuleEvaluatorWrite_SpanUserDefinedMetricPython",
|
|
634
664
|
"AutomationRuleEvaluatorWrite_TraceThreadLlmAsJudge",
|
|
635
665
|
"AutomationRuleEvaluatorWrite_TraceThreadUserDefinedMetricPython",
|
|
636
666
|
"AutomationRuleEvaluatorWrite_UserDefinedMetricPython",
|
|
637
667
|
"AutomationRuleEvaluator_LlmAsJudge",
|
|
638
668
|
"AutomationRuleEvaluator_SpanLlmAsJudge",
|
|
669
|
+
"AutomationRuleEvaluator_SpanUserDefinedMetricPython",
|
|
639
670
|
"AutomationRuleEvaluator_TraceThreadLlmAsJudge",
|
|
640
671
|
"AutomationRuleEvaluator_TraceThreadUserDefinedMetricPython",
|
|
641
672
|
"AutomationRuleEvaluator_UserDefinedMetricPython",
|
|
@@ -697,6 +728,7 @@ __all__ = [
|
|
|
697
728
|
"DatasetExpansionResponse",
|
|
698
729
|
"DatasetItem",
|
|
699
730
|
"DatasetItemBatch",
|
|
731
|
+
"DatasetItemChangesPublic",
|
|
700
732
|
"DatasetItemCompare",
|
|
701
733
|
"DatasetItemCompareSource",
|
|
702
734
|
"DatasetItemFilter",
|
|
@@ -907,6 +939,8 @@ __all__ = [
|
|
|
907
939
|
"ProjectPagePublic",
|
|
908
940
|
"ProjectPublic",
|
|
909
941
|
"ProjectPublicVisibility",
|
|
942
|
+
"ProjectReference",
|
|
943
|
+
"ProjectReferencePublic",
|
|
910
944
|
"ProjectStatItemObjectPublic",
|
|
911
945
|
"ProjectStatItemObjectPublic_Avg",
|
|
912
946
|
"ProjectStatItemObjectPublic_Count",
|
|
@@ -939,6 +973,7 @@ __all__ = [
|
|
|
939
973
|
"PromptVersionPublicType",
|
|
940
974
|
"PromptVersionTemplateStructure",
|
|
941
975
|
"PromptVersionType",
|
|
976
|
+
"PromptVersionUpdate",
|
|
942
977
|
"PromptWriteTemplateStructure",
|
|
943
978
|
"PromptWriteType",
|
|
944
979
|
"ProviderApiKey",
|
|
@@ -974,6 +1009,9 @@ __all__ = [
|
|
|
974
1009
|
"SpanType",
|
|
975
1010
|
"SpanUpdate",
|
|
976
1011
|
"SpanUpdateType",
|
|
1012
|
+
"SpanUserDefinedMetricPythonCode",
|
|
1013
|
+
"SpanUserDefinedMetricPythonCodePublic",
|
|
1014
|
+
"SpanUserDefinedMetricPythonCodeWrite",
|
|
977
1015
|
"SpanWrite",
|
|
978
1016
|
"SpanWriteType",
|
|
979
1017
|
"SpansCountResponse",
|