opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +38 -0
  114. opik/rest_api/datasets/client.py +249 -148
  115. opik/rest_api/datasets/raw_client.py +356 -217
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +46 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset_item.py +1 -1
  146. opik/rest_api/types/dataset_item_batch.py +4 -0
  147. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  148. opik/rest_api/types/dataset_item_compare.py +1 -1
  149. opik/rest_api/types/dataset_item_filter.py +4 -0
  150. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  151. opik/rest_api/types/dataset_item_page_public.py +0 -1
  152. opik/rest_api/types/dataset_item_public.py +1 -1
  153. opik/rest_api/types/dataset_version_public.py +5 -0
  154. opik/rest_api/types/dataset_version_summary.py +5 -0
  155. opik/rest_api/types/dataset_version_summary_public.py +5 -0
  156. opik/rest_api/types/experiment.py +9 -0
  157. opik/rest_api/types/experiment_public.py +9 -0
  158. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  159. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  160. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  161. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  162. opik/rest_api/types/project.py +1 -0
  163. opik/rest_api/types/project_detailed.py +1 -0
  164. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  165. opik/rest_api/types/project_reference.py +31 -0
  166. opik/rest_api/types/project_reference_public.py +31 -0
  167. opik/rest_api/types/project_stats_summary_item.py +1 -0
  168. opik/rest_api/types/prompt_version.py +1 -0
  169. opik/rest_api/types/prompt_version_detail.py +1 -0
  170. opik/rest_api/types/prompt_version_page_public.py +5 -0
  171. opik/rest_api/types/prompt_version_public.py +1 -0
  172. opik/rest_api/types/prompt_version_update.py +33 -0
  173. opik/rest_api/types/provider_api_key.py +5 -1
  174. opik/rest_api/types/provider_api_key_provider.py +2 -1
  175. opik/rest_api/types/provider_api_key_public.py +5 -1
  176. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  177. opik/rest_api/types/service_toggles_config.py +11 -1
  178. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  179. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  180. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  181. opik/types.py +36 -0
  182. opik/validation/chat_prompt_messages.py +241 -0
  183. opik/validation/feedback_score.py +3 -3
  184. opik/validation/validator.py +28 -0
  185. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
  186. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
  187. opik/cli/export.py +0 -791
  188. opik/cli/import_command.py +0 -575
  189. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
  190. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
  191. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  192. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import datetime
3
+ import re
3
4
  from typing import (
4
5
  Any,
5
6
  Dict,
@@ -21,6 +22,7 @@ from langchain_core.tracers.schemas import Run
21
22
 
22
23
  from opik import context_storage, dict_utils, llm_usage, tracing_runtime_config
23
24
  from opik.api_objects import span, trace
25
+ from opik.decorator import arguments_helpers, span_creation_handler
24
26
  from opik.types import DistributedTraceHeadersDict, ErrorInfoDict
25
27
  from opik.validation import parameters_validator
26
28
  from . import (
@@ -53,6 +55,11 @@ SkipErrorCallback = Callable[[str], bool]
53
55
  # due to a handled/ignored error during execution.
54
56
  ERROR_SKIPPED_OUTPUTS = {"warning": "Error output skipped by skip_error_callback."}
55
57
 
58
+ # Constants for LangGraph interrupt/resume functionality
59
+ LANGGRAPH_INTERRUPT_OUTPUT_KEY = "__interrupt__"
60
+ LANGGRAPH_RESUME_INPUT_KEY = "__resume__"
61
+ LANGGRAPH_INTERRUPT_METADATA_KEY = "_langgraph_interrupt"
62
+
56
63
 
57
64
  class TrackRootRunResult(NamedTuple):
58
65
  new_trace_data: Optional[trace.TraceData]
@@ -77,6 +84,117 @@ def _get_run_metadata(run_dict: Dict[str, Any]) -> Dict[str, Any]:
77
84
  return run_dict["extra"].get("metadata", {})
78
85
 
79
86
 
87
+ def _parse_graph_interrupt_value(error_traceback: str) -> Optional[str]:
88
+ """
89
+ Parse GraphInterrupt error traceback to extract the interrupt value as a string.
90
+
91
+ The function extracts the value from the Interrupt object representation in the traceback.
92
+ It handles both string values (with quotes) and non-string values, including nested structures.
93
+ For string values, escape sequences are decoded (e.g., \\n becomes a newline character).
94
+
95
+ Args:
96
+ error_traceback: The error traceback string containing GraphInterrupt information.
97
+
98
+ Returns:
99
+ The interrupt value as a string if found, None otherwise.
100
+ """
101
+ # Search for GraphInterrupt( anywhere in the traceback
102
+ match = re.search(
103
+ r"GraphInterrupt\(.*?Interrupt\(value=",
104
+ error_traceback,
105
+ re.DOTALL,
106
+ )
107
+ if not match:
108
+ return None
109
+
110
+ # Start parsing from after "value="
111
+ start_pos = match.end()
112
+ value_str = error_traceback[start_pos:]
113
+
114
+ # Extract the value, handling nested parentheses and brackets
115
+ paren_depth = 0
116
+ bracket_depth = 0
117
+ brace_depth = 0
118
+ in_string = False
119
+ string_char = None
120
+ i = 0
121
+
122
+ for i, char in enumerate(value_str):
123
+ # Handle string boundaries
124
+ if char in ('"', "'") and (i == 0 or value_str[i - 1] != "\\"):
125
+ if not in_string:
126
+ in_string = True
127
+ string_char = char
128
+ elif char == string_char:
129
+ in_string = False
130
+ string_char = None
131
+
132
+ # Skip counting brackets/parens inside strings
133
+ if in_string:
134
+ continue
135
+
136
+ # Track nesting depth
137
+ if char == "(":
138
+ paren_depth += 1
139
+ elif char == ")":
140
+ if paren_depth > 0:
141
+ paren_depth -= 1
142
+ else:
143
+ # Found the closing paren of Interrupt(...), stop here
144
+ break
145
+ elif char == "[":
146
+ bracket_depth += 1
147
+ elif char == "]":
148
+ bracket_depth -= 1
149
+ elif char == "{":
150
+ brace_depth += 1
151
+ elif char == "}":
152
+ brace_depth -= 1
153
+ elif (
154
+ char == "," and paren_depth == 0 and bracket_depth == 0 and brace_depth == 0
155
+ ):
156
+ # Found a comma at the top level, stop here
157
+ break
158
+
159
+ # Extract and clean the value
160
+ value = value_str[:i].strip()
161
+
162
+ # Check if the value was originally a quoted string
163
+ was_quoted_string = False
164
+ if len(value) >= 2 and value[0] in ('"', "'") and value[-1] == value[0]:
165
+ was_quoted_string = True
166
+ value = value[1:-1]
167
+
168
+ # Decode escape sequences for string values
169
+ if was_quoted_string:
170
+ try:
171
+ value = value.encode("utf-8").decode("unicode_escape")
172
+ except (UnicodeDecodeError, AttributeError):
173
+ # If decoding fails, return the original value
174
+ pass
175
+
176
+ return value
177
+
178
+
179
+ def _extract_resume_value_from_command(obj: Any) -> Optional[str]:
180
+ """
181
+ Extract the resume value from a LangGraph Command object or serialized Command dict.
182
+
183
+ Args:
184
+ obj: A Command object or dict representing a serialized Command object (from run.dict()).
185
+
186
+ Returns:
187
+ The resume value as a string if found, None otherwise.
188
+ """
189
+ # Check if it's a Command object (has a resume attribute)
190
+ if hasattr(obj, "resume") and obj.resume is not None:
191
+ return str(obj.resume)
192
+ # Check if it's a serialized Command dict
193
+ if obj is not None and isinstance(obj, dict) and "resume" in obj:
194
+ return str(obj["resume"])
195
+ return None
196
+
197
+
80
198
  class OpikTracer(BaseTracer):
81
199
  """Langchain Opik Tracer."""
82
200
 
@@ -89,6 +207,7 @@ class OpikTracer(BaseTracer):
89
207
  distributed_headers: Optional[DistributedTraceHeadersDict] = None,
90
208
  thread_id: Optional[str] = None,
91
209
  skip_error_callback: Optional[SkipErrorCallback] = None,
210
+ opik_context_read_only_mode: bool = False,
92
211
  **kwargs: Any,
93
212
  ) -> None:
94
213
  """
@@ -108,6 +227,12 @@ class OpikTracer(BaseTracer):
108
227
  Please note that in traces/spans where errors are intentionally skipped,
109
228
  the output will be replaced with `ERROR_SKIPPED_OUTPUTS`. You can provide
110
229
  the output manually using `opik_context.get_current_span_data().update(output=...)`.
230
+ opik_context_read_only_mode: Whether to adding/popping spans/traces to/from the context storage.
231
+ * If False (default), OpikTracer will add created spans/traces to the opik context, so if there is a @track-decorated
232
+ function called inside the LangChain runnable, it will be attached to it's parent span from LangChain automatically.
233
+ * If True, OpikTracer will not modify the context storage and only create spans/traces from LangChain's Run objects.
234
+ This might be useful when the environment doesn't support proper context isolation for concurrent operations and you
235
+ want to avoid modifying the Opik context stack due to unsafety.
111
236
  **kwargs: Additional arguments passed to the parent class constructor.
112
237
  """
113
238
  validator = parameters_validator.create_validator(
@@ -125,10 +250,7 @@ class OpikTracer(BaseTracer):
125
250
  self._trace_default_metadata["created_from"] = "langchain"
126
251
 
127
252
  if graph:
128
- self._trace_default_metadata["_opik_graph_definition"] = {
129
- "format": "mermaid",
130
- "data": graph.draw_mermaid(),
131
- }
253
+ self.set_graph(graph)
132
254
 
133
255
  self._trace_default_tags = tags
134
256
 
@@ -164,6 +286,23 @@ class OpikTracer(BaseTracer):
164
286
 
165
287
  self._skip_error_callback = skip_error_callback
166
288
 
289
+ self._opik_context_read_only_mode = opik_context_read_only_mode
290
+
291
+ def set_graph(self, graph: "Graph") -> None:
292
+ """
293
+ Set the LangGraph graph structure for visualization in Opik traces.
294
+
295
+ This method extracts the graph structure and stores it in trace metadata,
296
+ allowing the graph to be visualized in the Opik UI.
297
+
298
+ Args:
299
+ graph: A LangGraph Graph object (typically obtained via graph.get_graph(xray=True)).
300
+ """
301
+ self._trace_default_metadata["_opik_graph_definition"] = {
302
+ "format": "mermaid",
303
+ "data": graph.draw_mermaid(),
304
+ }
305
+
167
306
  def _is_opik_span_created_by_this_tracer(self, span_id: str) -> bool:
168
307
  return any(span_.id == span_id for span_ in self._span_data_map.values())
169
308
 
@@ -179,11 +318,16 @@ class OpikTracer(BaseTracer):
179
318
  trace_additional_metadata: Dict[str, Any] = {}
180
319
 
181
320
  error_str = run_dict.get("error")
182
- outputs = None
321
+ outputs: Optional[Dict[str, Any]] = None
183
322
  error_info = None
184
323
 
185
324
  if error_str is not None:
186
- if not self._should_skip_error(error_str):
325
+ # GraphInterrupt is not an error - it's a normal control flow for LangGraph
326
+ if interrupt_value := _parse_graph_interrupt_value(error_str):
327
+ outputs = {LANGGRAPH_INTERRUPT_OUTPUT_KEY: interrupt_value}
328
+ trace_additional_metadata[LANGGRAPH_INTERRUPT_METADATA_KEY] = True
329
+ # Don't set error_info - this is not an error
330
+ elif not self._should_skip_error(error_str):
187
331
  error_info = ErrorInfoDict(
188
332
  exception_type="Exception",
189
333
  traceback=error_str,
@@ -195,7 +339,8 @@ class OpikTracer(BaseTracer):
195
339
  langchain_helpers.split_big_langgraph_outputs(outputs)
196
340
  )
197
341
 
198
- self._ensure_no_hanging_opik_tracer_spans()
342
+ if not self._opik_context_read_only_mode:
343
+ self._ensure_no_hanging_opik_tracer_spans()
199
344
 
200
345
  span_data = self._span_data_map.get(run.id)
201
346
  if (
@@ -228,6 +373,25 @@ class OpikTracer(BaseTracer):
228
373
  # workaround for `.astream()` method usage
229
374
  if trace_data.input == {"input": ""}:
230
375
  trace_data.input = run_dict["inputs"]
376
+ elif isinstance(trace_data.input, dict) and "input" in trace_data.input:
377
+ input_value = trace_data.input.get("input")
378
+ if resume_value := _extract_resume_value_from_command(input_value):
379
+ trace_data.input = {LANGGRAPH_RESUME_INPUT_KEY: resume_value}
380
+
381
+ # Check if any child span has a GraphInterrupt output and use it for trace output
382
+ for _, span_data in self._span_data_map.items():
383
+ if (
384
+ span_data.trace_id == trace_data.id
385
+ and span_data.metadata is not None
386
+ and span_data.metadata.get(LANGGRAPH_INTERRUPT_METADATA_KEY) is True
387
+ ):
388
+ # Use the interrupt output from the child span
389
+ outputs = span_data.output
390
+ # Also propagate the interrupt metadata to trace
391
+ if trace_additional_metadata is None:
392
+ trace_additional_metadata = {}
393
+ trace_additional_metadata[LANGGRAPH_INTERRUPT_METADATA_KEY] = True
394
+ break
231
395
 
232
396
  if trace_additional_metadata:
233
397
  trace_data.update(metadata=trace_additional_metadata)
@@ -237,7 +401,8 @@ class OpikTracer(BaseTracer):
237
401
 
238
402
  assert trace_ is not None
239
403
  self._created_traces.append(trace_)
240
- self._opik_context_storage.pop_trace_data(ensure_id=trace_data.id)
404
+ if not self._opik_context_read_only_mode:
405
+ self._opik_context_storage.pop_trace_data(ensure_id=trace_data.id)
241
406
 
242
407
  def _ensure_no_hanging_opik_tracer_spans(self) -> None:
243
408
  root_run_external_parent_span_id = self._root_run_external_parent_span_id.get()
@@ -260,19 +425,7 @@ class OpikTracer(BaseTracer):
260
425
  root_metadata = dict_utils.deepmerge(self._trace_default_metadata, run_metadata)
261
426
  self._update_thread_id_from_metadata(run_dict)
262
427
 
263
- # Skip creating a span for root runs only when creating a new trace
264
- # Keep the span when invoked from a tracked function, existing trace or distributed headers
265
-
266
- if self._distributed_headers:
267
- new_span_data = self._attach_span_to_distributed_headers(
268
- run_dict=run_dict,
269
- metadata=root_metadata,
270
- )
271
- return TrackRootRunResult(
272
- new_trace_data=None,
273
- new_span_data=new_span_data,
274
- )
275
-
428
+ # Track the parent span ID for LangGraph cleanup later
276
429
  current_span_data = self._opik_context_storage.top_span_data()
277
430
  parent_span_id_when_langgraph_started = (
278
431
  current_span_data.id if current_span_data is not None else None
@@ -280,146 +433,49 @@ class OpikTracer(BaseTracer):
280
433
  self._root_run_external_parent_span_id.set(
281
434
  parent_span_id_when_langgraph_started
282
435
  )
283
- if current_span_data is not None:
284
- # When invoked from a tracked function, keep the root span
285
- # and attach it to the parent span (don't skip it)
286
- new_span_data = self._attach_span_to_external_span(
287
- run_dict=run_dict,
288
- current_span_data=current_span_data,
289
- root_metadata=root_metadata,
290
- )
291
- return TrackRootRunResult(
292
- new_trace_data=None,
293
- new_span_data=new_span_data,
294
- )
295
-
296
- current_trace_data = self._opik_context_storage.get_trace_data()
297
- if current_trace_data is not None:
298
- # When invoked under an existing trace, keep the root span
299
- # and attach it to the parent trace (don't skip it)
300
- new_span_data = self._attach_span_to_external_trace(
301
- run_dict=run_dict,
302
- current_trace_data=current_trace_data,
303
- root_metadata=root_metadata,
304
- )
305
- return TrackRootRunResult(
306
- new_trace_data=None,
307
- new_span_data=new_span_data,
308
- )
309
-
310
- return self._initialize_span_and_trace_from_scratch(
311
- run_dict=run_dict,
312
- root_metadata=root_metadata,
313
- allow_duplicating_root_span=allow_duplicating_root_span,
314
- )
315
-
316
- def _initialize_span_and_trace_from_scratch(
317
- self,
318
- run_dict: Dict[str, Any],
319
- root_metadata: Dict[str, Any],
320
- allow_duplicating_root_span: bool,
321
- ) -> TrackRootRunResult:
322
- trace_data = trace.TraceData(
323
- name=run_dict["name"],
324
- input=run_dict["inputs"],
325
- metadata=root_metadata,
326
- tags=self._trace_default_tags,
327
- project_name=self._project_name,
328
- thread_id=self._thread_id,
329
- )
330
436
 
331
- # Skip creating a span for LangGraph root runs - children will be attached directly to trace
332
- if _is_root_run(run_dict) and not allow_duplicating_root_span:
333
- return TrackRootRunResult(
334
- new_trace_data=trace_data,
335
- new_span_data=None,
336
- )
337
-
338
- span_data = span.SpanData(
339
- trace_id=trace_data.id,
340
- parent_span_id=None,
437
+ start_span_arguments = arguments_helpers.StartSpanParameters(
341
438
  name=run_dict["name"],
342
439
  input=run_dict["inputs"],
343
440
  type=_get_span_type(run_dict),
344
- metadata=root_metadata,
345
441
  tags=self._trace_default_tags,
442
+ metadata=root_metadata,
346
443
  project_name=self._project_name,
347
- )
348
- return TrackRootRunResult(new_trace_data=trace_data, new_span_data=span_data)
349
-
350
- def _attach_span_to_external_span(
351
- self,
352
- run_dict: Dict[str, Any],
353
- current_span_data: span.SpanData,
354
- root_metadata: Dict[str, Any],
355
- ) -> span.SpanData:
356
- project_name = helpers.resolve_child_span_project_name(
357
- current_span_data.project_name,
358
- self._project_name,
444
+ thread_id=self._thread_id,
359
445
  )
360
446
 
361
- span_data = span.SpanData(
362
- trace_id=current_span_data.trace_id,
363
- parent_span_id=current_span_data.id,
364
- name=run_dict["name"],
365
- input=run_dict["inputs"],
366
- metadata=root_metadata,
367
- tags=self._trace_default_tags,
368
- project_name=project_name,
369
- type=_get_span_type(run_dict),
447
+ span_creation_result = span_creation_handler.create_span_respecting_context(
448
+ start_span_arguments=start_span_arguments,
449
+ distributed_trace_headers=self._distributed_headers,
450
+ opik_context_storage=self._opik_context_storage,
370
451
  )
371
- if not self._is_opik_trace_created_by_this_tracer(span_data.trace_id):
372
- self._externally_created_traces_ids.add(span_data.trace_id)
373
-
374
- return span_data
375
452
 
376
- def _attach_span_to_external_trace(
377
- self,
378
- run_dict: Dict[str, Any],
379
- current_trace_data: trace.TraceData,
380
- root_metadata: Dict[str, Any],
381
- ) -> span.SpanData:
382
- project_name = helpers.resolve_child_span_project_name(
383
- current_trace_data.project_name,
384
- self._project_name,
453
+ trace_created_externally = (
454
+ span_creation_result.trace_data is None
455
+ and not self._is_opik_trace_created_by_this_tracer(
456
+ span_creation_result.span_data.trace_id
457
+ )
385
458
  )
459
+ if trace_created_externally:
460
+ self._externally_created_traces_ids.add(
461
+ span_creation_result.span_data.trace_id
462
+ )
386
463
 
387
- span_data = span.SpanData(
388
- trace_id=current_trace_data.id,
389
- parent_span_id=None,
390
- name=run_dict["name"],
391
- input=run_dict["inputs"],
392
- metadata=root_metadata,
393
- tags=self._trace_default_tags,
394
- project_name=project_name,
395
- type=_get_span_type(run_dict),
464
+ should_skip_root_span_creation = (
465
+ span_creation_result.trace_data is not None
466
+ and _is_root_run(run_dict)
467
+ and not allow_duplicating_root_span
396
468
  )
397
- span_data.update(metadata={"created_from": "langchain"})
398
-
399
- if not self._is_opik_trace_created_by_this_tracer(current_trace_data.id):
400
- self._externally_created_traces_ids.add(current_trace_data.id)
401
- return span_data
469
+ if should_skip_root_span_creation:
470
+ return TrackRootRunResult(
471
+ new_trace_data=span_creation_result.trace_data,
472
+ new_span_data=None,
473
+ )
402
474
 
403
- def _attach_span_to_distributed_headers(
404
- self,
405
- run_dict: Dict[str, Any],
406
- metadata: Dict[str, Any],
407
- ) -> span.SpanData:
408
- if self._distributed_headers is None:
409
- raise ValueError("Distributed headers are not set")
410
-
411
- span_data = span.SpanData(
412
- trace_id=self._distributed_headers["opik_trace_id"],
413
- parent_span_id=self._distributed_headers["opik_parent_span_id"],
414
- name=run_dict["name"],
415
- input=run_dict["inputs"],
416
- metadata=metadata,
417
- tags=self._trace_default_tags,
418
- project_name=self._project_name,
419
- type=_get_span_type(run_dict),
475
+ return TrackRootRunResult(
476
+ new_trace_data=span_creation_result.trace_data,
477
+ new_span_data=span_creation_result.span_data,
420
478
  )
421
- self._externally_created_traces_ids.add(span_data.trace_id)
422
- return span_data
423
479
 
424
480
  def _process_start_span(self, run: Run, allow_duplicating_root_span: bool) -> None:
425
481
  try:
@@ -468,7 +524,11 @@ class OpikTracer(BaseTracer):
468
524
  # This is the first run for the chain.
469
525
  root_run_result = self._track_root_run(run_dict, allow_duplicating_root_span)
470
526
  if root_run_result.new_trace_data is not None:
471
- self._opik_context_storage.set_trace_data(root_run_result.new_trace_data)
527
+ if not self._opik_context_read_only_mode:
528
+ self._opik_context_storage.set_trace_data(
529
+ root_run_result.new_trace_data
530
+ )
531
+
472
532
  if (
473
533
  self._opik_client.config.log_start_trace_span
474
534
  and tracing_runtime_config.is_tracing_active()
@@ -501,7 +561,9 @@ class OpikTracer(BaseTracer):
501
561
  trace_data=root_run_result.new_trace_data,
502
562
  )
503
563
 
504
- self._opik_context_storage.add_span_data(root_run_result.new_span_data)
564
+ if not self._opik_context_read_only_mode:
565
+ self._opik_context_storage.add_span_data(root_run_result.new_span_data)
566
+
505
567
  if (
506
568
  self._opik_client.config.log_start_trace_span
507
569
  and tracing_runtime_config.is_tracing_active()
@@ -549,7 +611,9 @@ class OpikTracer(BaseTracer):
549
611
  parent_run_id
550
612
  ]
551
613
 
552
- self._opik_context_storage.add_span_data(new_span_data)
614
+ if not self._opik_context_read_only_mode:
615
+ self._opik_context_storage.add_span_data(new_span_data)
616
+
553
617
  if (
554
618
  self._opik_client.config.log_start_trace_span
555
619
  and tracing_runtime_config.is_tracing_active()
@@ -586,19 +650,40 @@ class OpikTracer(BaseTracer):
586
650
 
587
651
  elif self._distributed_headers:
588
652
  # LangGraph with distributed headers - attach to distributed trace
589
- new_span_data = self._attach_span_to_distributed_headers(
590
- run_dict=run_dict,
653
+ new_span_data = span.SpanData(
654
+ trace_id=self._distributed_headers["opik_trace_id"],
655
+ parent_span_id=self._distributed_headers["opik_parent_span_id"],
656
+ name=run_dict["name"],
657
+ input=run_dict["inputs"],
591
658
  metadata=_get_run_metadata(run_dict),
659
+ tags=self._trace_default_tags,
660
+ project_name=self._project_name,
661
+ type=_get_span_type(run_dict),
592
662
  )
663
+ self._externally_created_traces_ids.add(new_span_data.trace_id)
664
+
593
665
  elif (
594
666
  current_trace_data := self._opik_context_storage.get_trace_data()
595
667
  ) is not None:
596
668
  # LangGraph attached to existing trace - attach children directly to trace
597
- new_span_data = self._attach_span_to_external_trace(
598
- run_dict=run_dict,
599
- current_trace_data=current_trace_data,
600
- root_metadata=_get_run_metadata(run_dict),
669
+ project_name = helpers.resolve_child_span_project_name(
670
+ current_trace_data.project_name,
671
+ self._project_name,
601
672
  )
673
+
674
+ new_span_data = span.SpanData(
675
+ trace_id=current_trace_data.id,
676
+ parent_span_id=None,
677
+ name=run_dict["name"],
678
+ input=run_dict["inputs"],
679
+ metadata=_get_run_metadata(run_dict),
680
+ tags=self._trace_default_tags,
681
+ project_name=project_name,
682
+ type=_get_span_type(run_dict),
683
+ )
684
+
685
+ if not self._is_opik_trace_created_by_this_tracer(current_trace_data.id):
686
+ self._externally_created_traces_ids.add(current_trace_data.id)
602
687
  else:
603
688
  LOGGER.warning(
604
689
  f"Cannot find trace data or distributed headers for LangGraph child run '{run_id}'"
@@ -612,7 +697,9 @@ class OpikTracer(BaseTracer):
612
697
  trace_data=None,
613
698
  )
614
699
 
615
- self._opik_context_storage.add_span_data(new_span_data)
700
+ if not self._opik_context_read_only_mode:
701
+ self._opik_context_storage.add_span_data(new_span_data)
702
+
616
703
  if (
617
704
  self._opik_client.config.log_start_trace_span
618
705
  and tracing_runtime_config.is_tracing_active()
@@ -641,8 +728,12 @@ class OpikTracer(BaseTracer):
641
728
  usage_info = llm_usage.LLMUsageInfo()
642
729
 
643
730
  # workaround for `.astream()` method usage
644
- if span_data.input == {"input": ""}:
731
+ if span_data.input == {"input": ""} or span_data.input == {"input": {}}:
645
732
  span_data.input = run_dict["inputs"]
733
+ elif isinstance(span_data.input, dict):
734
+ input_value = span_data.input.get("input")
735
+ if resume_value := _extract_resume_value_from_command(input_value):
736
+ span_data.input = {LANGGRAPH_RESUME_INPUT_KEY: resume_value}
646
737
 
647
738
  filtered_output, additional_metadata = (
648
739
  langchain_helpers.split_big_langgraph_outputs(run_dict["outputs"])
@@ -667,7 +758,7 @@ class OpikTracer(BaseTracer):
667
758
  except Exception as e:
668
759
  LOGGER.error(f"Failed during _process_end_span: {e}", exc_info=True)
669
760
  finally:
670
- if span_data is not None:
761
+ if span_data is not None and not self._opik_context_read_only_mode:
671
762
  self._opik_context_storage.trim_span_data_stack_to_certain_span(
672
763
  span_id=span_data.id
673
764
  )
@@ -696,7 +787,14 @@ class OpikTracer(BaseTracer):
696
787
  span_data = self._span_data_map[run.id]
697
788
  error_str = run_dict["error"]
698
789
 
699
- if self._should_skip_error(error_str):
790
+ # GraphInterrupt is not an error - it's a normal control flow for LangGraph
791
+ if interrupt_value := _parse_graph_interrupt_value(error_str):
792
+ span_data.init_end_time().update(
793
+ metadata={LANGGRAPH_INTERRUPT_METADATA_KEY: True},
794
+ output={LANGGRAPH_INTERRUPT_OUTPUT_KEY: interrupt_value},
795
+ )
796
+ # Don't set error_info - this is not an error
797
+ elif self._should_skip_error(error_str):
700
798
  span_data.init_end_time().update(output=ERROR_SKIPPED_OUTPUTS)
701
799
  else:
702
800
  error_info = ErrorInfoDict(
@@ -713,7 +811,7 @@ class OpikTracer(BaseTracer):
713
811
  except Exception as e:
714
812
  LOGGER.debug(f"Failed during _process_end_span_with_error: {e}")
715
813
  finally:
716
- if span_data is not None:
814
+ if span_data is not None and not self._opik_context_read_only_mode:
717
815
  self._opik_context_storage.trim_span_data_stack_to_certain_span(
718
816
  span_id=span_data.id
719
817
  )
@@ -99,6 +99,9 @@ def find_token_usage_dict(
99
99
  that includes one or more keys from the specified candidate keys and returns it.
100
100
  If no such dictionary is found, the function returns None.
101
101
 
102
+ Searches in reverse order to prioritize more recent data (e.g., in multi-turn conversations,
103
+ the most recent turn's usage data comes last in the structure).
104
+
102
105
  Args:
103
106
  all_keys_should_match: if True, all candidate keys must be present in the dictionary.
104
107
  data: A nested data structure containing dictionaries, lists, or tuples to search through.
@@ -117,15 +120,15 @@ def find_token_usage_dict(
117
120
  elif not all_keys_should_match and len(matched_keys) > 0:
118
121
  return data
119
122
 
120
- # Recursively search through dictionary values
121
- for value in data.values():
123
+ # Recursively search through dictionary values in reverse order
124
+ for value in list(data.values())[::-1]:
122
125
  result = find_token_usage_dict(value, candidate_keys, all_keys_should_match)
123
126
  if result is not None:
124
127
  return result
125
128
 
126
- # Handle list and tuple cases
129
+ # Handle list and tuple cases - search in reverse order
127
130
  elif isinstance(data, (list, tuple)):
128
- for item in data:
131
+ for item in reversed(data):
129
132
  result = find_token_usage_dict(item, candidate_keys, all_keys_should_match)
130
133
  if result is not None:
131
134
  return result