opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +42 -0
  114. opik/rest_api/datasets/client.py +321 -123
  115. opik/rest_api/datasets/raw_client.py +470 -145
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +50 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset.py +2 -0
  146. opik/rest_api/types/dataset_item.py +1 -1
  147. opik/rest_api/types/dataset_item_batch.py +4 -0
  148. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  149. opik/rest_api/types/dataset_item_compare.py +1 -1
  150. opik/rest_api/types/dataset_item_filter.py +4 -0
  151. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  152. opik/rest_api/types/dataset_item_page_public.py +0 -1
  153. opik/rest_api/types/dataset_item_public.py +1 -1
  154. opik/rest_api/types/dataset_public.py +2 -0
  155. opik/rest_api/types/dataset_version_public.py +10 -0
  156. opik/rest_api/types/dataset_version_summary.py +46 -0
  157. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  158. opik/rest_api/types/experiment.py +9 -0
  159. opik/rest_api/types/experiment_public.py +9 -0
  160. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  161. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  162. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  163. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  164. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  165. opik/rest_api/types/project.py +1 -0
  166. opik/rest_api/types/project_detailed.py +1 -0
  167. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  168. opik/rest_api/types/project_reference.py +31 -0
  169. opik/rest_api/types/project_reference_public.py +31 -0
  170. opik/rest_api/types/project_stats_summary_item.py +1 -0
  171. opik/rest_api/types/prompt_version.py +1 -0
  172. opik/rest_api/types/prompt_version_detail.py +1 -0
  173. opik/rest_api/types/prompt_version_page_public.py +5 -0
  174. opik/rest_api/types/prompt_version_public.py +1 -0
  175. opik/rest_api/types/prompt_version_update.py +33 -0
  176. opik/rest_api/types/provider_api_key.py +5 -1
  177. opik/rest_api/types/provider_api_key_provider.py +2 -1
  178. opik/rest_api/types/provider_api_key_public.py +5 -1
  179. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  180. opik/rest_api/types/service_toggles_config.py +11 -1
  181. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  182. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  183. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  184. opik/types.py +36 -0
  185. opik/validation/chat_prompt_messages.py +241 -0
  186. opik/validation/feedback_score.py +3 -3
  187. opik/validation/validator.py +28 -0
  188. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
  189. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
  190. opik/cli/export.py +0 -791
  191. opik/cli/import_command.py +0 -575
  192. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
  193. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
  194. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  195. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import atexit
2
2
  import datetime
3
3
  import functools
4
4
  import logging
5
- from typing import Any, Dict, List, Optional, TypeVar, Union, Literal
5
+ from typing import Any, Dict, List, Optional, TypeVar, Union, Literal, cast
6
6
 
7
7
  import httpx
8
8
 
@@ -42,9 +42,9 @@ from ..message_processing import (
42
42
  messages,
43
43
  streamer_constructors,
44
44
  message_queue,
45
- message_processors_chain,
46
45
  )
47
46
  from ..message_processing.batching import sequence_splitter
47
+ from ..message_processing.processors import message_processors_chain
48
48
  from ..rest_api import client as rest_api_client
49
49
  from ..rest_api.core.api_error import ApiError
50
50
  from ..rest_api.types import (
@@ -55,7 +55,13 @@ from ..rest_api.types import (
55
55
  span_filter_public,
56
56
  trace_filter_public,
57
57
  )
58
- from ..types import ErrorInfoDict, FeedbackScoreDict, LLMProvider, SpanType
58
+ from ..types import (
59
+ BatchFeedbackScoreDict,
60
+ ErrorInfoDict,
61
+ FeedbackScoreDict,
62
+ LLMProvider,
63
+ SpanType,
64
+ )
59
65
 
60
66
  LOGGER = logging.getLogger(__name__)
61
67
 
@@ -107,13 +113,7 @@ class Opik:
107
113
  self._use_batching = _use_batching
108
114
 
109
115
  self._initialize_streamer(
110
- url_override=config_.url_override,
111
- workers=config_.background_workers,
112
- file_upload_worker_count=config_.file_upload_background_workers,
113
- api_key=config_.api_key,
114
- check_tls_certificate=config_.check_tls_certificate,
115
116
  use_batching=_use_batching,
116
- enable_json_request_compression=config_.enable_json_request_compression,
117
117
  )
118
118
  atexit.register(self.end, timeout=self._flush_timeout)
119
119
 
@@ -152,24 +152,17 @@ class Opik:
152
152
 
153
153
  def _initialize_streamer(
154
154
  self,
155
- url_override: str,
156
- workers: int,
157
- file_upload_worker_count: int,
158
- api_key: Optional[str],
159
- check_tls_certificate: bool,
160
155
  use_batching: bool,
161
- enable_json_request_compression: bool,
162
156
  ) -> None:
163
- httpx_client_ = httpx_client.get(
157
+ self._httpx_client = httpx_client.get(
164
158
  workspace=self._workspace,
165
- api_key=api_key,
166
- check_tls_certificate=check_tls_certificate,
167
- compress_json_requests=enable_json_request_compression,
159
+ api_key=self._config.api_key,
160
+ check_tls_certificate=self._config.check_tls_certificate,
161
+ compress_json_requests=self._config.enable_json_request_compression,
168
162
  )
169
- self._httpx_client = httpx_client_
170
163
  self._rest_client = rest_api_client.OpikApi(
171
- base_url=url_override,
172
- httpx_client=httpx_client_,
164
+ base_url=self._config.url_override,
165
+ httpx_client=self._httpx_client,
173
166
  )
174
167
  self._rest_client._client_wrapper._timeout = (
175
168
  httpx.USE_CLIENT_DEFAULT
@@ -181,19 +174,22 @@ class Opik:
181
174
  batch_factor=self._config.maximal_queue_size_batch_factor,
182
175
  )
183
176
 
184
- self._message_processor = (
177
+ self.__internal_api__message_processor__ = (
185
178
  message_processors_chain.create_message_processors_chain(
186
179
  rest_client=self._rest_client
187
180
  )
188
181
  )
189
182
  self._streamer = streamer_constructors.construct_online_streamer(
190
- n_consumers=workers,
183
+ n_consumers=self._config.background_workers,
191
184
  rest_client=self._rest_client,
192
- httpx_client=httpx_client_,
185
+ httpx_client=self._httpx_client,
193
186
  use_batching=use_batching,
194
- file_upload_worker_count=file_upload_worker_count,
187
+ use_attachment_extraction=self._config.is_attachment_extraction_active,
188
+ min_base64_embedded_attachment_size=self._config.min_base64_embedded_attachment_size,
189
+ file_upload_worker_count=self._config.file_upload_background_workers,
195
190
  max_queue_size=max_queue_size,
196
- message_processor=self._message_processor,
191
+ message_processor=self.__internal_api__message_processor__,
192
+ url_override=self._config.url_override,
197
193
  )
198
194
 
199
195
  def _display_trace_url(self, trace_id: str, project_name: str) -> None:
@@ -295,7 +291,9 @@ class Opik:
295
291
  for feedback_score in feedback_scores:
296
292
  feedback_score["id"] = id
297
293
 
298
- self.log_traces_feedback_scores(feedback_scores, project_name)
294
+ self.log_traces_feedback_scores(
295
+ cast(List[BatchFeedbackScoreDict], feedback_scores), project_name
296
+ )
299
297
 
300
298
  if attachments is not None:
301
299
  for attachment_data in attachments:
@@ -470,7 +468,9 @@ class Opik:
470
468
  for feedback_score in feedback_scores:
471
469
  feedback_score["id"] = id
472
470
 
473
- self.log_spans_feedback_scores(feedback_scores, project_name)
471
+ self.log_spans_feedback_scores(
472
+ cast(List[BatchFeedbackScoreDict], feedback_scores), project_name
473
+ )
474
474
 
475
475
  return span.span_client.create_span(
476
476
  trace_id=trace_id,
@@ -639,23 +639,34 @@ class Opik:
639
639
  )
640
640
 
641
641
  def log_spans_feedback_scores(
642
- self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
642
+ self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
643
643
  ) -> None:
644
644
  """
645
645
  Log feedback scores for spans.
646
646
 
647
647
  Args:
648
- scores (List[FeedbackScoreDict]): A list of feedback score dictionaries.
648
+ scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
649
649
  Specifying a span id via `id` key for each score is mandatory.
650
650
  project_name: The name of the project in which the spans are logged. If not set, the project name
651
651
  which was configured when the Opik instance was created will be used.
652
+ Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
652
653
 
653
654
  Returns:
654
655
  None
656
+
657
+ Example:
658
+ >>> from opik import Opik
659
+ >>> client = Opik()
660
+ >>> # Batch logging across multiple projects
661
+ >>> scores = [
662
+ >>> {"id": span1_id, "name": "accuracy", "value": 0.95, "project_name": "project-A"},
663
+ >>> {"id": span2_id, "name": "accuracy", "value": 0.88, "project_name": "project-B"},
664
+ >>> ]
665
+ >>> client.log_spans_feedback_scores(scores=scores)
655
666
  """
656
667
  score_messages = helpers.parse_feedback_score_messages(
657
668
  scores=scores,
658
- project_name=project_name or self._project_name,
669
+ project_name=project_name or self.project_name,
659
670
  parsed_item_class=messages.FeedbackScoreMessage,
660
671
  logger=LOGGER,
661
672
  )
@@ -677,23 +688,34 @@ class Opik:
677
688
  self._streamer.put(add_span_feedback_scores_batch_message)
678
689
 
679
690
  def log_traces_feedback_scores(
680
- self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
691
+ self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
681
692
  ) -> None:
682
693
  """
683
694
  Log feedback scores for traces.
684
695
 
685
696
  Args:
686
- scores (List[FeedbackScoreDict]): A list of feedback score dictionaries.
697
+ scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
687
698
  Specifying a trace id via `id` key for each score is mandatory.
688
699
  project_name: The name of the project in which the traces are logged. If not set, the project name
689
700
  which was configured when the Opik instance was created will be used.
701
+ Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
690
702
 
691
703
  Returns:
692
704
  None
705
+
706
+ Example:
707
+ >>> from opik import Opik
708
+ >>> client = Opik()
709
+ >>> # Batch logging across multiple projects
710
+ >>> scores = [
711
+ >>> {"id": trace1_id, "name": "accuracy", "value": 0.95, "project_name": "project-A"},
712
+ >>> {"id": trace2_id, "name": "accuracy", "value": 0.88, "project_name": "project-B"},
713
+ >>> ]
714
+ >>> client.log_traces_feedback_scores(scores=scores)
693
715
  """
694
716
  score_messages = helpers.parse_feedback_score_messages(
695
717
  scores=scores,
696
- project_name=project_name or self._project_name,
718
+ project_name=project_name or self.project_name,
697
719
  parsed_item_class=messages.FeedbackScoreMessage,
698
720
  logger=LOGGER,
699
721
  )
@@ -716,16 +738,17 @@ class Opik:
716
738
  self._streamer.put(add_trace_feedback_scores_batch_message)
717
739
 
718
740
  def log_threads_feedback_scores(
719
- self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
741
+ self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
720
742
  ) -> None:
721
743
  """
722
744
  Log feedback scores for threads.
723
745
 
724
746
  Args:
725
- scores (List[FeedbackScoreDict]): A list of feedback score dictionaries.
747
+ scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
726
748
  Specifying a thread id via `id` key for each score is mandatory.
727
749
  project_name: The name of the project in which the threads are logged. If not set, the project name
728
750
  which was configured when the Opik instance was created will be used.
751
+ Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
729
752
 
730
753
  Returns:
731
754
  None
@@ -733,13 +756,10 @@ class Opik:
733
756
  Example:
734
757
  >>> from opik import Opik
735
758
  >>> client = Opik()
759
+ >>> # Batch logging across multiple projects
736
760
  >>> scores = [
737
- >>> {
738
- >>> "id": "thread_123",
739
- >>> "name": "user_satisfaction",
740
- >>> "value": 0.85,
741
- >>> "reason": "User seemed satisfied with the conversation"
742
- >>> }
761
+ >>> {"id": "thread_123", "name": "user_satisfaction", "value": 0.85, "project_name": "project-A"},
762
+ >>> {"id": "thread_456", "name": "user_satisfaction", "value": 0.92, "project_name": "project-B"},
743
763
  >>> ]
744
764
  >>> client.log_threads_feedback_scores(scores=scores)
745
765
  """
@@ -801,6 +821,7 @@ class Opik:
801
821
  name=name,
802
822
  description=dataset_fern.description,
803
823
  rest_client=self._rest_client,
824
+ dataset_items_count=dataset_fern.dataset_items_count,
804
825
  )
805
826
 
806
827
  dataset_.__internal_api__sync_hashes__()
@@ -886,6 +907,7 @@ class Opik:
886
907
  name=name,
887
908
  description=description,
888
909
  rest_client=self._rest_client,
910
+ dataset_items_count=0,
889
911
  )
890
912
 
891
913
  self._display_created_dataset_url(dataset_name=name, dataset_id=result.id)
@@ -921,6 +943,7 @@ class Opik:
921
943
  prompts: Optional[List[prompt_module.base_prompt.BasePrompt]] = None,
922
944
  type: Literal["regular", "trial", "mini-batch"] = "regular",
923
945
  optimization_id: Optional[str] = None,
946
+ tags: Optional[List[str]] = None,
924
947
  ) -> experiment.Experiment:
925
948
  """
926
949
  Creates a new experiment using the given dataset name and optional parameters.
@@ -934,6 +957,7 @@ class Opik:
934
957
  type: The type of the experiment. Can be "regular", "trial", or "mini-batch".
935
958
  Defaults to "regular". "trial" and "mini-batch" are only relevant for prompt optimization experiments.
936
959
  optimization_id: Optional ID of the optimization associated with the experiment.
960
+ tags: Optional list of tags to associate with the experiment.
937
961
 
938
962
  Returns:
939
963
  experiment.Experiment: The newly created experiment object.
@@ -958,6 +982,7 @@ class Opik:
958
982
  prompt_versions=prompt_versions,
959
983
  type=type,
960
984
  optimization_id=optimization_id,
985
+ tags=tags,
961
986
  )
962
987
 
963
988
  experiment_ = experiment.Experiment(
@@ -968,6 +993,7 @@ class Opik:
968
993
  streamer=self._streamer,
969
994
  experiments_client=self.get_experiments_client(),
970
995
  prompts=checked_prompts,
996
+ tags=tags,
971
997
  )
972
998
 
973
999
  return experiment_
@@ -1032,6 +1058,7 @@ class Opik:
1032
1058
  rest_client=self._rest_client,
1033
1059
  streamer=self._streamer,
1034
1060
  experiments_client=self.get_experiments_client(),
1061
+ tags=experiment_public.tags,
1035
1062
  )
1036
1063
 
1037
1064
  def get_experiments_by_name(self, name: str) -> List[experiment.Experiment]:
@@ -1058,6 +1085,7 @@ class Opik:
1058
1085
  rest_client=self._rest_client,
1059
1086
  streamer=self._streamer,
1060
1087
  experiments_client=self.get_experiments_client(),
1088
+ tags=public_experiment.tags,
1061
1089
  )
1062
1090
  result.append(experiment_)
1063
1091
 
@@ -1091,6 +1119,7 @@ class Opik:
1091
1119
  rest_client=self._rest_client,
1092
1120
  streamer=self._streamer,
1093
1121
  experiments_client=self.get_experiments_client(),
1122
+ tags=experiment_public.tags,
1094
1123
  )
1095
1124
 
1096
1125
  def end(self, timeout: Optional[int] = None) -> None:
@@ -1155,7 +1184,7 @@ class Opik:
1155
1184
  - `start_time`, `end_time`: =, >, <, >=, <=
1156
1185
  - `input`, `output`: =, contains, not_contains
1157
1186
  - `metadata`: =, contains, >, <
1158
- - `feedback_scores`: =, >, <, >=, <=
1187
+ - `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
1159
1188
  - `tags`: contains (only)
1160
1189
  - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
1161
1190
 
@@ -1165,6 +1194,8 @@ class Opik:
1165
1194
  - `input contains "question"` - Filter by input content
1166
1195
  - `usage.total_tokens > 1000` - Filter by token usage
1167
1196
  - `feedback_scores.accuracy > 0.8` - Filter by feedback score
1197
+ - `feedback_scores.my_metric is_empty` - Filter traces with empty feedback score
1198
+ - `feedback_scores.my_metric is_not_empty` - Filter traces with non-empty feedback score
1168
1199
  - `tags contains "production"` - Filter by tag
1169
1200
  - `metadata.model = "gpt-4"` - Filter by metadata field
1170
1201
  - `thread_id = "thread_123"` - Filter by thread ID
@@ -1247,7 +1278,7 @@ class Opik:
1247
1278
  - `start_time`, `end_time`: =, >, <, >=, <=
1248
1279
  - `input`, `output`: =, contains, not_contains
1249
1280
  - `metadata`: =, contains, >, <
1250
- - `feedback_scores`: =, >, <, >=, <=
1281
+ - `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
1251
1282
  - `tags`: contains (only)
1252
1283
  - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
1253
1284
 
@@ -1257,6 +1288,8 @@ class Opik:
1257
1288
  - `input contains "question"` - Filter by input content
1258
1289
  - `usage.total_tokens > 1000` - Filter by token usage
1259
1290
  - `feedback_scores.accuracy > 0.8` - Filter by feedback score
1291
+ - `feedback_scores.my_metric is_empty` - Filter spans with empty feedback score
1292
+ - `feedback_scores.my_metric is_not_empty` - Filter spans with non-empty feedback score
1260
1293
  - `tags contains "production"` - Filter by tag
1261
1294
  - `metadata.model = "gpt-4"` - Filter by metadata field
1262
1295
  - `thread_id = "thread_123"` - Filter by thread ID
@@ -54,7 +54,7 @@ SUPPORTED_OPERATORS = {
54
54
  ],
55
55
  "output": ["=", "contains", "not_contains"],
56
56
  "metadata": ["=", "contains", ">", "<"],
57
- "feedback_scores": ["=", ">", "<", ">=", "<="],
57
+ "feedback_scores": ["=", ">", "<", ">=", "<=", "is_empty", "is_not_empty"],
58
58
  "tags": ["contains"],
59
59
  "usage.total_tokens": ["=", "!=", ">", "<", ">=", "<="],
60
60
  "usage.prompt_tokens": ["=", "!=", ">", "<", ">=", "<="],
@@ -132,6 +132,8 @@ SUPPORTED_OPERATORS = {
132
132
  ],
133
133
  }
134
134
 
135
+ OPERATORS_WITHOUT_VALUES = {"is_empty", "is_not_empty"}
136
+
135
137
 
136
138
  class OpikQueryLanguage:
137
139
  """
@@ -384,8 +386,12 @@ class OpikQueryLanguage:
384
386
  # Parse operators
385
387
  parsed_operator = self._parse_operator(parsed_field["field"])
386
388
 
387
- # Parse values
388
- parsed_value = self._parse_value()
389
+ operator_name = parsed_operator.get("operator", "")
390
+ if operator_name in OPERATORS_WITHOUT_VALUES:
391
+ # For operators without values, use empty string as value
392
+ parsed_value = {"value": ""}
393
+ else:
394
+ parsed_value = self._parse_value()
389
395
 
390
396
  expressions.append({**parsed_field, **parsed_operator, **parsed_value})
391
397
 
@@ -1,9 +1,11 @@
1
1
  import copy
2
2
  import json
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Optional, Tuple, Type
4
+
4
5
  from typing_extensions import override
5
6
 
6
7
  from opik.rest_api import types as rest_api_types
8
+ from opik.validation import chat_prompt_messages, validator
7
9
  from . import chat_prompt_template
8
10
  from .. import client as prompt_client
9
11
  from .. import types as prompt_types
@@ -16,6 +18,10 @@ class ChatPrompt(base_prompt.BasePrompt):
16
18
  Similar to Prompt but uses a list of chat messages instead of a string template.
17
19
  """
18
20
 
21
+ _parameter_validators: List[Tuple[str, Type[validator.RaisableValidator]]] = [
22
+ ("messages", chat_prompt_messages.ChatPromptMessagesValidator),
23
+ ]
24
+
19
25
  def __init__(
20
26
  self,
21
27
  name: str,
@@ -37,8 +43,12 @@ class ChatPrompt(base_prompt.BasePrompt):
37
43
 
38
44
  Raises:
39
45
  PromptTemplateStructureMismatch: If a text prompt with the same name already exists (template structure is immutable).
46
+ ValidationError: If messages structure is invalid.
40
47
  """
41
48
 
49
+ # Validate messages structure
50
+ self._validate_inputs(messages=messages)
51
+
42
52
  self._chat_template = chat_prompt_template.ChatPromptTemplate(
43
53
  messages=messages,
44
54
  template_type=type,
@@ -54,6 +64,13 @@ class ChatPrompt(base_prompt.BasePrompt):
54
64
 
55
65
  self._sync_with_backend()
56
66
 
67
+ def _validate_inputs(self, **kwargs: Any) -> None:
68
+ for parameter, validator_class in self._parameter_validators:
69
+ if parameter in kwargs:
70
+ validator_instance = validator_class(kwargs[parameter])
71
+ validator_instance.validate()
72
+ validator_instance.raise_if_validation_failed()
73
+
57
74
  def _sync_with_backend(self) -> None:
58
75
  from opik.api_objects import opik_client
59
76
 
@@ -143,7 +143,14 @@ class PromptClient:
143
143
  commit=commit,
144
144
  )
145
145
 
146
- # Client-side validation for template_structure if requested
146
+ should_skip_validation = (
147
+ prompt_version.template_structure is None
148
+ and raise_if_not_template_structure == "text"
149
+ )
150
+ if should_skip_validation:
151
+ return prompt_version
152
+
153
+ # Client-side validation for template_structure if requested and not skipped
147
154
  if (
148
155
  raise_if_not_template_structure is not None
149
156
  and prompt_version.template_structure != raise_if_not_template_structure
@@ -1,6 +1,5 @@
1
1
  import dataclasses
2
2
  import datetime
3
- import logging
4
3
  from typing import Any, Dict, List, Optional, Union
5
4
 
6
5
  import opik.api_objects.attachment as attachment
@@ -13,20 +12,12 @@ from opik.types import (
13
12
  LLMProvider,
14
13
  SpanType,
15
14
  )
16
- from .. import helpers, data_helpers
15
+ from .. import helpers
16
+ from ..observation_data import ObservationData
17
17
 
18
- LOGGER = logging.getLogger(__name__)
19
18
 
20
-
21
- # Engineer note:
22
- #
23
- # After moving to minimal python version 3.10, a lot of common content
24
- # from SpanData and TraceData can be moved to ObservationData parent dataclass.
25
- # Before that it's impossible because of the dataclasses limitation to have optional arguments
26
- # strictly after positional ones (including the attributes from the parent class).
27
- # In python 3.10 @dataclass(kw_only=True) should help.
28
19
  @dataclasses.dataclass
29
- class SpanData:
20
+ class SpanData(ObservationData):
30
21
  """
31
22
  The SpanData object is returned when calling :func:`opik.opik_context.get_current_span_data` from a tracked function.
32
23
  """
@@ -34,24 +25,11 @@ class SpanData:
34
25
  trace_id: str
35
26
  id: str = dataclasses.field(default_factory=helpers.generate_id)
36
27
  parent_span_id: Optional[str] = None
37
- name: Optional[str] = None
38
28
  type: SpanType = "general"
39
- start_time: Optional[datetime.datetime] = dataclasses.field(
40
- default_factory=datetime_helpers.local_timestamp
41
- )
42
- end_time: Optional[datetime.datetime] = None
43
- metadata: Optional[Dict[str, Any]] = None
44
- input: Optional[Dict[str, Any]] = None
45
- output: Optional[Dict[str, Any]] = None
46
- tags: Optional[List[str]] = None
47
29
  usage: Optional[Union[Dict[str, Any], llm_usage.OpikUsage]] = None
48
- feedback_scores: Optional[List[FeedbackScoreDict]] = None
49
- project_name: Optional[str] = None
50
30
  model: Optional[str] = None
51
31
  provider: Optional[Union[str, LLMProvider]] = None
52
- error_info: Optional[ErrorInfoDict] = None
53
32
  total_cost: Optional[float] = None
54
- attachments: Optional[List[attachment.Attachment]] = None
55
33
 
56
34
  def create_child_span_data(
57
35
  self,
@@ -95,69 +73,6 @@ class SpanData:
95
73
  attachments=attachments,
96
74
  )
97
75
 
98
- def update(self, **new_data: Any) -> "SpanData":
99
- """
100
- Updates the attributes of the object with the provided key-value pairs. This method checks if
101
- an attribute exists before updating it and merges the data appropriately for specific
102
- keywords like metadata, output, input, attachments, and tags. If a key doesn't correspond
103
- to an attribute of the object or the provided value is None, the update is skipped.
104
-
105
- Args:
106
- **new_data: Key-value pairs of attributes to update. Keys should match existing
107
- attributes on the object, and values that are None will not update.
108
-
109
- Returns:
110
- SpanData: The updated object instance.
111
- """
112
- for key, value in new_data.items():
113
- if value is None:
114
- continue
115
-
116
- if key not in self.__dict__ and key != "prompts":
117
- LOGGER.debug(
118
- "An attempt to update span with parameter name it doesn't have: %s",
119
- key,
120
- )
121
- continue
122
-
123
- if key == "metadata":
124
- self.metadata = data_helpers.merge_metadata(
125
- self.metadata, new_metadata=value
126
- )
127
- continue
128
- elif key == "output":
129
- self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
130
- continue
131
- elif key == "input":
132
- self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
133
- continue
134
- elif key == "attachments":
135
- self._update_attachments(value)
136
- continue
137
- elif key == "tags":
138
- self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
139
- continue
140
- elif key == "prompts":
141
- self.metadata = data_helpers.merge_metadata(
142
- self.metadata, new_metadata=new_data.get("metadata"), prompts=value
143
- )
144
- continue
145
-
146
- self.__dict__[key] = value
147
-
148
- return self
149
-
150
- def init_end_time(self) -> "SpanData":
151
- self.end_time = datetime_helpers.local_timestamp()
152
-
153
- return self
154
-
155
- def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
156
- if self.attachments is None:
157
- self.attachments = attachments
158
- else:
159
- self.attachments.extend(attachments)
160
-
161
76
  @property
162
77
  def as_start_parameters(self) -> Dict[str, Any]:
163
78
  """Returns parameters of this span to be sent to the server when starting a new span."""
@@ -3,7 +3,7 @@ from typing import List, Optional
3
3
 
4
4
  import opik
5
5
  from opik.rest_api import TraceThread
6
- from opik.types import FeedbackScoreDict
6
+ from opik.types import BatchFeedbackScoreDict
7
7
 
8
8
  from .. import helpers, rest_stream_parser, constants
9
9
  from ... import config
@@ -74,7 +74,7 @@ class ThreadsClient:
74
74
  - `start_time`, `end_time`: =, >, <, >=, <=
75
75
  - `input`, `output`: =, contains, not_contains
76
76
  - `metadata`: =, contains, >, <
77
- - `feedback_scores`: =, >, <, >=, <=
77
+ - `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
78
78
  - `tags`: contains (only)
79
79
  - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
80
80
 
@@ -84,6 +84,8 @@ class ThreadsClient:
84
84
  - `duration > 300` - Filter by thread duration (seconds)
85
85
  - `number_of_messages >= 5` - Filter by message count
86
86
  - `feedback_scores.user_frustration > 0.5` - Filter by feedback score
87
+ - `feedback_scores.my_metric is_empty` - Filter threads with empty feedback score
88
+ - `feedback_scores.my_metric is_not_empty` - Filter threads with non-empty feedback score
87
89
  - `tags contains "important"` - Filter by tag
88
90
 
89
91
  If not provided, all threads in the project will be returned up to the limit.
@@ -127,7 +129,7 @@ class ThreadsClient:
127
129
  return threads
128
130
 
129
131
  def log_threads_feedback_scores(
130
- self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
132
+ self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
131
133
  ) -> None:
132
134
  """
133
135
  Logs feedback scores for threads in a specific project. This method processes the given
@@ -138,7 +140,8 @@ class ThreadsClient:
138
140
  scores: A list of dictionaries containing feedback scores
139
141
  for threads to be logged. Specifying a thread id via `id` key for each score is mandatory.
140
142
  project_name: The name of the project to associate with the logged
141
- scores. If not provided, the scores won't be associated with any specific project.
143
+ scores. If not provided, the project name configured in the Opik client will be used.
144
+ This parameter is used as a fallback if `project_name` is not specified in the score dictionary.
142
145
  """
143
146
  project_name = project_name or self._opik_client.project_name
144
147