opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,528 @@
1
+ """
2
+ Opik tracking integration for Harbor benchmark evaluation framework.
3
+
4
+ This module provides the `track_harbor` function to add Opik tracing to Harbor Jobs,
5
+ enabling real-time streaming of trial results to Opik for visualization and analysis.
6
+
7
+ Example:
8
+ >>> from opik.integrations.harbor import track_harbor
9
+ >>> from harbor.job import Job
10
+ >>> import os
11
+ >>>
12
+ >>> os.environ["OPIK_PROJECT_NAME"] = "swebench-evaluation"
13
+ >>>
14
+ >>> job = Job(config)
15
+ >>> tracked_job = track_harbor(job)
16
+ >>> result = await tracked_job.run()
17
+
18
+ Or enable tracking globally (for CLI usage):
19
+ >>> from opik.integrations.harbor import track_harbor
20
+ >>> track_harbor()
21
+ >>> # Now run Harbor code - it will be traced
22
+ """
23
+
24
+ import functools
25
+ import logging
26
+ from typing import Any, Callable, Dict, List, Optional, Tuple
27
+ from typing_extensions import override
28
+
29
+ from harbor.job import Job
30
+ from harbor.models.trajectories.step import Step
31
+ from harbor.models.trial.result import TrialResult
32
+ from harbor.models.verifier.result import VerifierResult
33
+ from harbor.trial.trial import Trial
34
+ from harbor.verifier.verifier import Verifier
35
+
36
+ from opik import datetime_helpers, id_helpers, opik_context, track
37
+ from opik.api_objects import opik_client, span
38
+ from opik.decorator import arguments_helpers, base_track_decorator
39
+ from opik.types import FeedbackScoreDict, SpanType
40
+
41
+ from . import experiment_service
42
+
43
+ LOGGER = logging.getLogger(__name__)
44
+
45
+
46
+ class HarborTrialRunDecorator(base_track_decorator.BaseTrackDecorator):
47
+ """
48
+ Decorator for tracking Harbor Trial.run method.
49
+
50
+ Sets the trace name based on trial configuration before the span/trace
51
+ is sent to the backend.
52
+ """
53
+
54
+ @override
55
+ def _start_span_inputs_preprocessor(
56
+ self,
57
+ func: Callable,
58
+ track_options: arguments_helpers.TrackOptions,
59
+ args: Tuple,
60
+ kwargs: Dict[str, Any],
61
+ ) -> arguments_helpers.StartSpanParameters:
62
+ """Extract trial config and set trace name, input, metadata, and tags."""
63
+ # Extract Trial instance from args (Trial.run is an instance method)
64
+ if not args:
65
+ # Fallback if no args (shouldn't happen for instance methods)
66
+ name = (
67
+ track_options.name if track_options.name is not None else func.__name__
68
+ )
69
+ return arguments_helpers.StartSpanParameters(
70
+ name=name,
71
+ input=None,
72
+ type=track_options.type,
73
+ tags=track_options.tags,
74
+ metadata=track_options.metadata,
75
+ project_name=track_options.project_name,
76
+ )
77
+
78
+ trial: Trial = args[0]
79
+ config = trial.config
80
+
81
+ # Build trace name from config
82
+ trace_name = f"{config.agent.name}/{config.trial_name}"
83
+
84
+ # Build input dict
85
+ input_dict: Dict[str, Any] = {
86
+ "trial_name": config.trial_name,
87
+ "task": {
88
+ "name": config.task.name
89
+ if hasattr(config.task, "name")
90
+ else str(config.task.path),
91
+ "source": getattr(config.task, "source", None),
92
+ },
93
+ "agent": {
94
+ "name": config.agent.name,
95
+ "model": getattr(config.agent, "model_name", None),
96
+ },
97
+ }
98
+
99
+ # Build metadata
100
+ metadata = (
101
+ track_options.metadata.copy() if track_options.metadata is not None else {}
102
+ )
103
+ metadata["created_from"] = "harbor"
104
+
105
+ # Build tags
106
+ tags = track_options.tags if track_options.tags is not None else []
107
+ tags = list(tags) # Make a copy to avoid mutating the original
108
+ if "harbor" not in tags:
109
+ tags.append("harbor")
110
+ if config.agent.name not in tags:
111
+ tags.append(config.agent.name)
112
+
113
+ return arguments_helpers.StartSpanParameters(
114
+ name=trace_name,
115
+ input=input_dict,
116
+ type=track_options.type,
117
+ tags=tags,
118
+ metadata=metadata,
119
+ project_name=track_options.project_name,
120
+ )
121
+
122
+ @override
123
+ def _end_span_inputs_preprocessor(
124
+ self,
125
+ output: Any,
126
+ capture_output: bool,
127
+ current_span_data: span.SpanData,
128
+ ) -> arguments_helpers.EndSpanParameters:
129
+ """Process output - minimal implementation since output is handled in _wrap_trial_run."""
130
+ # Output is handled separately in _wrap_trial_run via opik_context.update_current_trace
131
+ # So we don't need to process it here
132
+ return arguments_helpers.EndSpanParameters(output=None)
133
+
134
+ @override
135
+ def _streams_handler(
136
+ self,
137
+ output: Any,
138
+ capture_output: bool,
139
+ generations_aggregator: Optional[Callable[[List[Any]], Any]],
140
+ ) -> Optional[Any]:
141
+ """No stream handling needed for Trial.run."""
142
+ return None
143
+
144
+
145
+ def _rewards_to_feedback_scores(
146
+ rewards: Optional[Dict[str, Any]],
147
+ error: Optional[str] = None,
148
+ ) -> List[FeedbackScoreDict]:
149
+ """Convert Harbor verifier rewards to Opik feedback scores."""
150
+ if rewards is None:
151
+ return []
152
+
153
+ feedback_scores: List[FeedbackScoreDict] = []
154
+ for name, value in rewards.items():
155
+ try:
156
+ float_value = float(value)
157
+
158
+ score = FeedbackScoreDict(name=name, value=float_value, reason=error)
159
+
160
+ feedback_scores.append(score)
161
+ except (ValueError, TypeError):
162
+ LOGGER.warning(
163
+ "Could not convert reward value to float: %s=%s", name, value
164
+ )
165
+
166
+ return feedback_scores
167
+
168
+
169
+ def _source_to_span_type(source: str) -> SpanType:
170
+ """Convert ATIF step source to Opik span type."""
171
+ if source == "agent":
172
+ return "llm"
173
+ return "general"
174
+
175
+
176
+ def _patch_step_class() -> None:
177
+ """Patch the Harbor Step class to create Opik spans on instantiation."""
178
+ # Check if already patched
179
+ if hasattr(_patch_step_class, "_patched"):
180
+ return
181
+
182
+ original_init = Step.__init__
183
+
184
+ @functools.wraps(original_init)
185
+ def patched_init(self: Step, *args: Any, **kwargs: Any) -> None:
186
+ original_init(self, *args, **kwargs)
187
+
188
+ trace_data = opik_context.get_current_trace_data()
189
+ if trace_data is None:
190
+ return
191
+
192
+ parent_span = opik_context.get_current_span_data()
193
+ parent_span_id = parent_span.id if parent_span else None
194
+
195
+ try:
196
+ client = opik_client.get_client_cached()
197
+
198
+ input_dict: Dict[str, Any] = {}
199
+ if self.message:
200
+ input_dict["message"] = self.message
201
+ if self.tool_calls:
202
+ input_dict["tool_calls"] = [
203
+ {
204
+ "tool_call_id": tc.tool_call_id,
205
+ "function_name": tc.function_name,
206
+ "arguments": tc.arguments,
207
+ }
208
+ for tc in self.tool_calls
209
+ ]
210
+
211
+ output_dict: Optional[Dict[str, Any]] = None
212
+ if self.observation and self.observation.results:
213
+ output_dict = {
214
+ "results": [
215
+ {"content": r.content} for r in self.observation.results
216
+ ]
217
+ }
218
+
219
+ metadata: Dict[str, Any] = {
220
+ "source": self.source,
221
+ "created_from": "harbor",
222
+ }
223
+ if self.reasoning_content:
224
+ metadata["reasoning"] = self.reasoning_content
225
+
226
+ usage: Optional[Dict[str, Any]] = None
227
+ total_cost: Optional[float] = None
228
+ if self.metrics:
229
+ usage = {}
230
+ if self.metrics.prompt_tokens is not None:
231
+ usage["prompt_tokens"] = self.metrics.prompt_tokens
232
+ if self.metrics.completion_tokens is not None:
233
+ usage["completion_tokens"] = self.metrics.completion_tokens
234
+ if self.metrics.prompt_tokens and self.metrics.completion_tokens:
235
+ usage["total_tokens"] = (
236
+ self.metrics.prompt_tokens + self.metrics.completion_tokens
237
+ )
238
+ if not usage:
239
+ usage = None
240
+ total_cost = getattr(self.metrics, "cost_usd", None)
241
+
242
+ client.span(
243
+ id=id_helpers.generate_id(),
244
+ trace_id=trace_data.id,
245
+ parent_span_id=parent_span_id,
246
+ name=f"step_{self.step_id}",
247
+ type=_source_to_span_type(self.source),
248
+ start_time=datetime_helpers.parse_iso_timestamp(self.timestamp),
249
+ input=input_dict if input_dict else None,
250
+ output=output_dict,
251
+ metadata=metadata,
252
+ usage=usage,
253
+ total_cost=total_cost,
254
+ model=self.model_name if self.source == "agent" else None,
255
+ tags=["harbor", self.source],
256
+ )
257
+
258
+ except Exception as e:
259
+ LOGGER.debug("Failed to create span for step: %s", e)
260
+
261
+ Step.__init__ = patched_init # type: ignore
262
+ setattr(_patch_step_class, "_patched", True)
263
+
264
+
265
+ def _enable_harbor_tracking(project_name: Optional[str] = None) -> None:
266
+ """Internal: Enable Opik tracking for Harbor by patching classes.
267
+
268
+ This patches Harbor's Trial and Verifier classes to add tracing.
269
+
270
+ Args:
271
+ project_name: Opik project name. If None, uses OPIK_PROJECT_NAME env var.
272
+ """
273
+ # Patch Trial methods (only if not already patched)
274
+ if not hasattr(Trial.run, "opik_tracked"):
275
+ Trial.run = _wrap_trial_run(Trial.run, project_name)
276
+
277
+ if not hasattr(Trial._setup_environment, "opik_tracked"):
278
+ Trial._setup_environment = _wrap_setup_environment(
279
+ Trial._setup_environment, project_name
280
+ )
281
+
282
+ if not hasattr(Trial._setup_agent, "opik_tracked"):
283
+ Trial._setup_agent = _wrap_setup_agent(Trial._setup_agent, project_name)
284
+
285
+ if not hasattr(Trial._execute_agent, "opik_tracked"):
286
+ Trial._execute_agent = _wrap_execute_agent(Trial._execute_agent, project_name)
287
+
288
+ if not hasattr(Trial._run_verification, "opik_tracked"):
289
+ Trial._run_verification = _wrap_run_verification(
290
+ Trial._run_verification, project_name
291
+ )
292
+
293
+ # Patch Verifier (only if not already patched)
294
+ if not hasattr(Verifier.verify, "opik_tracked"):
295
+ Verifier.verify = _wrap_verify(Verifier.verify, project_name)
296
+
297
+ # Patch Step class for real-time step tracking
298
+ _patch_step_class()
299
+
300
+ LOGGER.info("Opik tracking enabled for Harbor")
301
+
302
+
303
+ def track_harbor(
304
+ job: Optional["Job"] = None,
305
+ project_name: Optional[str] = None,
306
+ ) -> Optional["Job"]:
307
+ """Enable Opik tracking for Harbor.
308
+
309
+ Can be called two ways:
310
+ - track_harbor() - enables global tracking (for CLI usage)
311
+ - track_harbor(job) - wraps a job and enables tracking (for SDK usage)
312
+
313
+ Args:
314
+ job: Optional Harbor Job instance. If provided, returns the same job.
315
+ project_name: Opik project name. If None, uses OPIK_PROJECT_NAME env var.
316
+
317
+ Returns:
318
+ The job instance if provided, None otherwise.
319
+
320
+ Example:
321
+ >>> from opik.integrations.harbor import track_harbor
322
+ >>> job = Job(config)
323
+ >>> tracked_job = track_harbor(job)
324
+ >>> result = await tracked_job.run()
325
+ """
326
+ _enable_harbor_tracking(project_name=project_name)
327
+ return job
328
+
329
+
330
+ def _wrap_trial_run(original: Callable, project_name: Optional[str]) -> Callable:
331
+ """Wrap Trial.run with tracing, feedback scores, and experiment linking."""
332
+
333
+ decorator = HarborTrialRunDecorator()
334
+
335
+ @decorator.track(
336
+ tags=["harbor"],
337
+ project_name=project_name,
338
+ capture_output=False,
339
+ )
340
+ @functools.wraps(original)
341
+ async def wrapped(self: Trial) -> TrialResult:
342
+ config = self.config
343
+
344
+ # Lazily setup experiment service if not already done
345
+ # This ensures experiment tracking works for both SDK and CLI modes
346
+ if experiment_service.get_service() is None:
347
+ try:
348
+ # Use job_id for consistent experiment naming
349
+ experiment_name = (
350
+ f"harbor-job-{str(config.job_id)[:8]}" if config.job_id else None
351
+ )
352
+ # Build experiment config with agent/model info
353
+ experiment_config: Dict[str, Any] = {
354
+ "agent_name": config.agent.name,
355
+ }
356
+ model_name = getattr(config.agent, "model_name", None)
357
+ if model_name:
358
+ experiment_config["model_name"] = model_name
359
+
360
+ LOGGER.debug(
361
+ "Lazily setting up experiment service: experiment_name=%s",
362
+ experiment_name,
363
+ )
364
+ experiment_service.setup_lazy(
365
+ experiment_name=experiment_name,
366
+ experiment_config=experiment_config,
367
+ )
368
+ except Exception as e:
369
+ LOGGER.debug("Failed to lazily setup experiment service: %s", e)
370
+
371
+ result: TrialResult = await original(self)
372
+
373
+ # Update trace with output and feedback scores
374
+ output_dict: Dict[str, Any] = {
375
+ "trial_name": result.trial_name,
376
+ "task_name": result.task_name,
377
+ }
378
+ if result.verifier_result and result.verifier_result.rewards:
379
+ output_dict["rewards"] = result.verifier_result.rewards
380
+
381
+ feedback_scores = None
382
+ if result.verifier_result and result.verifier_result.rewards:
383
+ # Get error message if available
384
+ error_msg = getattr(result.verifier_result, "error", None) or getattr(
385
+ result, "error", None
386
+ )
387
+ feedback_scores = _rewards_to_feedback_scores(
388
+ result.verifier_result.rewards, error=error_msg
389
+ )
390
+
391
+ opik_context.update_current_trace(
392
+ output=output_dict,
393
+ feedback_scores=feedback_scores,
394
+ )
395
+
396
+ # Link to experiment
397
+ trace_data = opik_context.get_current_trace_data()
398
+ if trace_data is not None:
399
+ service = experiment_service.get_service()
400
+ LOGGER.debug(
401
+ "Linking trial to experiment: trial=%s, trace_id=%s, service=%s",
402
+ config.trial_name,
403
+ trace_data.id,
404
+ service,
405
+ )
406
+ if service is not None:
407
+ source = getattr(config.task, "source", None)
408
+ task_name = (
409
+ config.task.name
410
+ if hasattr(config.task, "name")
411
+ else str(config.task.path)
412
+ )
413
+ service.link_trial_to_experiment(
414
+ trial_name=config.trial_name,
415
+ trace_id=trace_data.id,
416
+ source=source,
417
+ task_name=task_name,
418
+ )
419
+ else:
420
+ LOGGER.debug(
421
+ "No experiment service available, skipping experiment linking"
422
+ )
423
+
424
+ return result
425
+
426
+ return wrapped
427
+
428
+
429
+ def _wrap_setup_environment(
430
+ original: Callable, project_name: Optional[str]
431
+ ) -> Callable:
432
+ """Wrap Trial._setup_environment with tracing."""
433
+
434
+ @track(name="setup_environment", tags=["harbor"], project_name=project_name)
435
+ @functools.wraps(original)
436
+ async def wrapped(self: Trial) -> None:
437
+ opik_context.update_current_span(
438
+ input={"phase": "environment_setup"},
439
+ metadata={"created_from": "harbor"},
440
+ )
441
+ await original(self)
442
+ opik_context.update_current_span(output={"status": "completed"})
443
+
444
+ return wrapped
445
+
446
+
447
+ def _wrap_setup_agent(original: Callable, project_name: Optional[str]) -> Callable:
448
+ """Wrap Trial._setup_agent with tracing."""
449
+
450
+ @track(name="setup_agent", tags=["harbor"], project_name=project_name)
451
+ @functools.wraps(original)
452
+ async def wrapped(self: Trial) -> None:
453
+ opik_context.update_current_span(
454
+ input={"phase": "agent_setup"},
455
+ metadata={"created_from": "harbor"},
456
+ )
457
+ await original(self)
458
+ opik_context.update_current_span(output={"status": "completed"})
459
+
460
+ return wrapped
461
+
462
+
463
+ def _wrap_execute_agent(original: Callable, project_name: Optional[str]) -> Callable:
464
+ """Wrap Trial._execute_agent with tracing."""
465
+
466
+ @track(name="execute_agent", tags=["harbor"], project_name=project_name)
467
+ @functools.wraps(original)
468
+ async def wrapped(self: Trial) -> None:
469
+ input_dict = {}
470
+ if hasattr(self, "_task") and self._task:
471
+ input_dict["instruction"] = self._task.instruction
472
+ opik_context.update_current_span(
473
+ input=input_dict,
474
+ metadata={"created_from": "harbor"},
475
+ )
476
+ await original(self)
477
+ opik_context.update_current_span(output={"status": "completed"})
478
+
479
+ return wrapped
480
+
481
+
482
+ def _wrap_run_verification(original: Callable, project_name: Optional[str]) -> Callable:
483
+ """Wrap Trial._run_verification with tracing."""
484
+
485
+ @track(name="run_verification", tags=["harbor"], project_name=project_name)
486
+ @functools.wraps(original)
487
+ async def wrapped(self: Trial) -> None:
488
+ opik_context.update_current_span(
489
+ input={"phase": "verification"},
490
+ metadata={"created_from": "harbor"},
491
+ )
492
+ await original(self)
493
+ opik_context.update_current_span(output={"status": "completed"})
494
+
495
+ return wrapped
496
+
497
+
498
+ def _wrap_verify(original: Callable, project_name: Optional[str]) -> Callable:
499
+ """Wrap Verifier.verify with tracing."""
500
+
501
+ @track(name="verify", tags=["harbor"], project_name=project_name)
502
+ @functools.wraps(original)
503
+ async def wrapped(self: Verifier) -> VerifierResult:
504
+ opik_context.update_current_span(
505
+ input={"phase": "verify"},
506
+ metadata={"created_from": "harbor"},
507
+ )
508
+ result: VerifierResult = await original(self)
509
+
510
+ output_dict: Dict[str, Any] = {}
511
+ if result.rewards:
512
+ output_dict["rewards"] = result.rewards
513
+ opik_context.update_current_span(
514
+ output=output_dict if output_dict else {"status": "completed"}
515
+ )
516
+
517
+ return result
518
+
519
+ return wrapped
520
+
521
+
522
+ def reset_harbor_tracking() -> None:
523
+ """Reset Harbor tracking state for testing purposes.
524
+
525
+ Resets the experiment service. Method patches remain active
526
+ (they use `opik_tracked` to prevent double-patching).
527
+ """
528
+ experiment_service.reset()
@@ -4,8 +4,8 @@ from typing import Any, Dict, Optional
4
4
  import haystack
5
5
  from haystack import tracing
6
6
 
7
- import opik.api_objects.opik_client as opik_client
8
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
7
+ from opik import tracing_runtime_config
8
+ from opik.api_objects import opik_client
9
9
  from . import opik_tracer
10
10
 
11
11
  LOGGER = logging.getLogger(__name__)
@@ -5,10 +5,8 @@ from typing import Any, Dict, Iterator, List, Optional, Union
5
5
 
6
6
  from haystack import tracing
7
7
 
8
- import opik.url_helpers as url_helpers
9
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
10
- import opik.decorator.span_creation_handler as span_creation_handler
11
- import opik.decorator.arguments_helpers as arguments_helpers
8
+ from opik import tracing_runtime_config, url_helpers
9
+ from opik.decorator import arguments_helpers, span_creation_handler
12
10
  from opik.api_objects import opik_client
13
11
  from opik.api_objects import span as opik_span
14
12
  from opik.api_objects import trace as opik_trace
@@ -82,9 +80,7 @@ class OpikTracer(tracing.Tracer):
82
80
  ) -> opik_span_bridge.OpikSpanBridge:
83
81
  """Create a span or trace based on existing context using span_creation_handler."""
84
82
  # For pipeline operations, use the pipeline name, otherwise use component name
85
- final_name = (
86
- self._name if operation_name == constants.PIPELINE_RUN_KEY else span_name
87
- )
83
+ final_name = self._name if "pipeline.run" in operation_name else span_name
88
84
  metadata = {"created_from": "haystack", "operation": operation_name}
89
85
 
90
86
  # Always use span_creation_handler - it handles existing context properly
@@ -1,3 +1,5 @@
1
1
  from .opik_tracer import OpikTracer
2
+ from .langgraph_async_context_bridge import extract_current_langgraph_span_data
3
+ from .langgraph_tracer_injector import track_langgraph
2
4
 
3
- __all__ = ["OpikTracer"]
5
+ __all__ = ["OpikTracer", "extract_current_langgraph_span_data", "track_langgraph"]
@@ -0,0 +1,96 @@
1
+ import logging
2
+ from typing import Dict, Any, Tuple
3
+
4
+ from ... import _logging
5
+
6
+
7
+ LOGGER = logging.getLogger(__name__)
8
+ LANGGRAPH_OUTPUT_SIZE_THRESHOLD = 5000
9
+
10
+
11
+ def _extract_command_update(outputs: Dict[str, Any]) -> Dict[str, Any]:
12
+ """Extract state updates from LangGraph Command objects.
13
+
14
+ When a LangGraph node returns a Command, LangChain wraps it in {"output": Command(...)}.
15
+ This function detects Command objects and extracts the update dict to properly log state changes.
16
+
17
+ Args:
18
+ outputs: The outputs dict from a LangChain Run.
19
+
20
+ Returns:
21
+ The extracted update dict if a Command is found, otherwise the original outputs.
22
+ """
23
+ if "output" in outputs and len(outputs) == 1:
24
+ output_value = outputs["output"]
25
+ # Duck-type check for Command object
26
+ if hasattr(output_value, "update") and hasattr(output_value, "goto"):
27
+ try:
28
+ update_dict = output_value.update
29
+ if isinstance(update_dict, dict):
30
+ _logging.log_once_at_level(
31
+ logging.DEBUG,
32
+ "Extracted state update from LangGraph Command object",
33
+ LOGGER,
34
+ )
35
+ return update_dict
36
+ except Exception as e:
37
+ LOGGER.warning(
38
+ f"Failed to extract update from Command-like object: {e}",
39
+ exc_info=True,
40
+ )
41
+
42
+ return outputs
43
+
44
+
45
+ def split_big_langgraph_outputs(
46
+ outputs: Dict[str, Any],
47
+ ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
48
+ """
49
+ Split large LangGraph outputs to extract messages for thread display.
50
+
51
+ Returns:
52
+ tuple: (filtered_output_for_display, additional_metadata_for_span)
53
+
54
+ LangGraph agents often produce complex outputs with large internal state
55
+ that breaks thread display. This extracts conversational messages for
56
+ clean thread display while preserving the full state in metadata.
57
+ """
58
+ if not isinstance(outputs, dict):
59
+ return outputs, {}
60
+
61
+ outputs = _extract_command_update(outputs)
62
+
63
+ langgraph_like_output = "messages" in outputs and len(outputs) > 1
64
+ if langgraph_like_output:
65
+ output_str = str(outputs)
66
+ output_size = len(output_str)
67
+
68
+ if output_size > LANGGRAPH_OUTPUT_SIZE_THRESHOLD:
69
+ _logging.log_once_at_level(
70
+ logging.WARNING,
71
+ f"Filtering large LangGraph output ({output_size} chars) for thread display",
72
+ LOGGER,
73
+ )
74
+
75
+ filtered_output = {
76
+ "messages": outputs["messages"],
77
+ }
78
+
79
+ if "thread_id" in outputs:
80
+ filtered_output["thread_id"] = outputs["thread_id"]
81
+
82
+ additional_metadata = {
83
+ "_opik_langgraph_full_output": outputs,
84
+ "_opik_output_filtering": {
85
+ "filtered": True,
86
+ "original_size_chars": output_size,
87
+ "filtered_keys": [
88
+ k for k in outputs.keys() if k not in ["messages", "thread_id"]
89
+ ],
90
+ "reason": "Large LangGraph output filtered for better thread display",
91
+ },
92
+ }
93
+
94
+ return filtered_output, additional_metadata
95
+
96
+ return outputs, {}