lmnr 0.4.53.dev0__py3-none-any.whl → 0.7.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. lmnr/__init__.py +32 -11
  2. lmnr/cli/__init__.py +270 -0
  3. lmnr/cli/datasets.py +371 -0
  4. lmnr/cli/evals.py +111 -0
  5. lmnr/cli/rules.py +42 -0
  6. lmnr/opentelemetry_lib/__init__.py +70 -0
  7. lmnr/opentelemetry_lib/decorators/__init__.py +337 -0
  8. lmnr/opentelemetry_lib/litellm/__init__.py +685 -0
  9. lmnr/opentelemetry_lib/litellm/utils.py +100 -0
  10. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
  11. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
  12. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
  13. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
  14. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
  15. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
  16. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
  17. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
  18. lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
  19. lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
  20. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
  21. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
  22. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
  23. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +599 -0
  24. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/config.py +9 -0
  25. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
  26. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +330 -0
  27. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
  28. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
  29. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
  30. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
  31. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
  32. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
  33. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
  34. lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
  35. lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
  36. lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +121 -0
  37. lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/utils.py +60 -0
  38. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
  39. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
  40. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
  41. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
  42. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
  43. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
  44. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
  45. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
  46. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
  47. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
  48. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
  49. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
  50. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
  51. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
  52. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
  53. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
  54. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
  55. lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
  56. lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +191 -0
  57. lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
  58. lmnr/opentelemetry_lib/tracing/__init__.py +263 -0
  59. lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +516 -0
  60. lmnr/{openllmetry_sdk → opentelemetry_lib}/tracing/attributes.py +21 -8
  61. lmnr/opentelemetry_lib/tracing/context.py +200 -0
  62. lmnr/opentelemetry_lib/tracing/exporter.py +153 -0
  63. lmnr/opentelemetry_lib/tracing/instruments.py +140 -0
  64. lmnr/opentelemetry_lib/tracing/processor.py +193 -0
  65. lmnr/opentelemetry_lib/tracing/span.py +398 -0
  66. lmnr/opentelemetry_lib/tracing/tracer.py +57 -0
  67. lmnr/opentelemetry_lib/tracing/utils.py +62 -0
  68. lmnr/opentelemetry_lib/utils/package_check.py +18 -0
  69. lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
  70. lmnr/sdk/browser/__init__.py +0 -0
  71. lmnr/sdk/browser/background_send_events.py +158 -0
  72. lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
  73. lmnr/sdk/browser/browser_use_otel.py +142 -0
  74. lmnr/sdk/browser/bubus_otel.py +71 -0
  75. lmnr/sdk/browser/cdp_utils.py +518 -0
  76. lmnr/sdk/browser/inject_script.js +514 -0
  77. lmnr/sdk/browser/patchright_otel.py +151 -0
  78. lmnr/sdk/browser/playwright_otel.py +322 -0
  79. lmnr/sdk/browser/pw_utils.py +363 -0
  80. lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
  81. lmnr/sdk/browser/utils.py +70 -0
  82. lmnr/sdk/client/asynchronous/async_client.py +180 -0
  83. lmnr/sdk/client/asynchronous/resources/__init__.py +6 -0
  84. lmnr/sdk/client/asynchronous/resources/base.py +32 -0
  85. lmnr/sdk/client/asynchronous/resources/browser_events.py +41 -0
  86. lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
  87. lmnr/sdk/client/asynchronous/resources/evals.py +266 -0
  88. lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
  89. lmnr/sdk/client/asynchronous/resources/tags.py +83 -0
  90. lmnr/sdk/client/synchronous/resources/__init__.py +6 -0
  91. lmnr/sdk/client/synchronous/resources/base.py +32 -0
  92. lmnr/sdk/client/synchronous/resources/browser_events.py +40 -0
  93. lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
  94. lmnr/sdk/client/synchronous/resources/evals.py +263 -0
  95. lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
  96. lmnr/sdk/client/synchronous/resources/tags.py +83 -0
  97. lmnr/sdk/client/synchronous/sync_client.py +191 -0
  98. lmnr/sdk/datasets/__init__.py +94 -0
  99. lmnr/sdk/datasets/file_utils.py +91 -0
  100. lmnr/sdk/decorators.py +163 -26
  101. lmnr/sdk/eval_control.py +3 -2
  102. lmnr/sdk/evaluations.py +403 -191
  103. lmnr/sdk/laminar.py +1080 -549
  104. lmnr/sdk/log.py +7 -2
  105. lmnr/sdk/types.py +246 -134
  106. lmnr/sdk/utils.py +151 -7
  107. lmnr/version.py +46 -0
  108. {lmnr-0.4.53.dev0.dist-info → lmnr-0.7.26.dist-info}/METADATA +152 -106
  109. lmnr-0.7.26.dist-info/RECORD +116 -0
  110. lmnr-0.7.26.dist-info/WHEEL +4 -0
  111. lmnr-0.7.26.dist-info/entry_points.txt +3 -0
  112. lmnr/cli.py +0 -101
  113. lmnr/openllmetry_sdk/.python-version +0 -1
  114. lmnr/openllmetry_sdk/__init__.py +0 -72
  115. lmnr/openllmetry_sdk/config/__init__.py +0 -9
  116. lmnr/openllmetry_sdk/decorators/base.py +0 -185
  117. lmnr/openllmetry_sdk/instruments.py +0 -38
  118. lmnr/openllmetry_sdk/tracing/__init__.py +0 -1
  119. lmnr/openllmetry_sdk/tracing/content_allow_list.py +0 -24
  120. lmnr/openllmetry_sdk/tracing/context_manager.py +0 -13
  121. lmnr/openllmetry_sdk/tracing/tracing.py +0 -884
  122. lmnr/openllmetry_sdk/utils/in_memory_span_exporter.py +0 -61
  123. lmnr/openllmetry_sdk/utils/package_check.py +0 -7
  124. lmnr/openllmetry_sdk/version.py +0 -1
  125. lmnr/sdk/datasets.py +0 -55
  126. lmnr-0.4.53.dev0.dist-info/LICENSE +0 -75
  127. lmnr-0.4.53.dev0.dist-info/RECORD +0 -33
  128. lmnr-0.4.53.dev0.dist-info/WHEEL +0 -4
  129. lmnr-0.4.53.dev0.dist-info/entry_points.txt +0 -3
  130. /lmnr/{openllmetry_sdk → opentelemetry_lib}/.flake8 +0 -0
  131. /lmnr/{openllmetry_sdk → opentelemetry_lib}/utils/__init__.py +0 -0
  132. /lmnr/{openllmetry_sdk → opentelemetry_lib}/utils/json_encoder.py +0 -0
  133. /lmnr/{openllmetry_sdk/decorators/__init__.py → py.typed} +0 -0
@@ -0,0 +1,786 @@
1
+ import json
2
+ import pydantic
3
+ import re
4
+ import time
5
+
6
+ from openai import AsyncStream, Stream
7
+
8
+ # Conditional imports for backward compatibility
9
+ try:
10
+ from openai.types.responses import (
11
+ FunctionToolParam,
12
+ Response,
13
+ ResponseInputItemParam,
14
+ ResponseInputParam,
15
+ ResponseOutputItem,
16
+ ResponseUsage,
17
+ ToolParam,
18
+ )
19
+ from openai.types.responses.response_output_message_param import (
20
+ ResponseOutputMessageParam,
21
+ )
22
+
23
+ RESPONSES_AVAILABLE = True
24
+ except ImportError:
25
+ # Fallback types for older OpenAI SDK versions
26
+ from typing import Any, Dict, List, Union
27
+
28
+ # Create basic fallback types
29
+ FunctionToolParam = Dict[str, Any]
30
+ Response = Any
31
+ ResponseInputItemParam = Dict[str, Any]
32
+ ResponseInputParam = Union[str, List[Dict[str, Any]]]
33
+ ResponseOutputItem = Dict[str, Any]
34
+ ResponseUsage = Dict[str, Any]
35
+ ToolParam = Dict[str, Any]
36
+ ResponseOutputMessageParam = Dict[str, Any]
37
+ RESPONSES_AVAILABLE = False
38
+
39
+ from lmnr.opentelemetry_lib.tracing.context import (
40
+ get_current_context,
41
+ get_event_attributes_from_context,
42
+ )
43
+ from lmnr.sdk.utils import json_dumps
44
+ from openai._legacy_response import LegacyAPIResponse
45
+ from opentelemetry import context as context_api
46
+ from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
47
+ from opentelemetry.semconv_ai import SpanAttributes
48
+ from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
49
+ from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
50
+ GEN_AI_COMPLETION,
51
+ GEN_AI_PROMPT,
52
+ GEN_AI_USAGE_INPUT_TOKENS,
53
+ GEN_AI_USAGE_OUTPUT_TOKENS,
54
+ GEN_AI_RESPONSE_ID,
55
+ GEN_AI_REQUEST_MODEL,
56
+ GEN_AI_RESPONSE_MODEL,
57
+ GEN_AI_SYSTEM,
58
+ )
59
+ from opentelemetry.trace import SpanKind, Span, StatusCode, Tracer
60
+ from typing import Any, Optional, Union
61
+ from typing_extensions import NotRequired
62
+
63
+ from ..shared import (
64
+ _set_span_attribute,
65
+ model_as_dict,
66
+ )
67
+
68
+ from ..utils import (
69
+ _with_tracer_wrapper,
70
+ dont_throw,
71
+ should_send_prompts,
72
+ )
73
+
74
+ SPAN_NAME = "openai.response"
75
+
76
+
77
+ def prepare_input_param(input_param: ResponseInputItemParam) -> ResponseInputItemParam:
78
+ """
79
+ Looks like OpenAI API infers the type "message" if the shape is correct,
80
+ but type is not specified.
81
+ It is marked as required on the message types. We add this to our
82
+ traced data to make it work.
83
+ """
84
+ try:
85
+ d = model_as_dict(input_param)
86
+ if "type" not in d:
87
+ d["type"] = "message"
88
+ if RESPONSES_AVAILABLE:
89
+ return ResponseInputItemParam(**d)
90
+ else:
91
+ return d
92
+ except Exception:
93
+ return input_param
94
+
95
+
96
+ def process_input(inp: ResponseInputParam) -> ResponseInputParam:
97
+ if not isinstance(inp, list):
98
+ return inp
99
+ return [prepare_input_param(item) for item in inp]
100
+
101
+
102
+ def is_validator_iterator(content):
103
+ """
104
+ Some OpenAI objects contain fields typed as Iterable, which pydantic
105
+ internally converts to a ValidatorIterator, and they cannot be trivially
106
+ serialized without consuming the iterator to, for example, a list.
107
+
108
+ See: https://github.com/pydantic/pydantic/issues/9541#issuecomment-2189045051
109
+ """
110
+ return re.search(r"pydantic.*ValidatorIterator'>$", str(type(content)))
111
+
112
+
113
+ # OpenAI API accepts output messages without an ID in its inputs, but
114
+ # the ID is marked as required in the output type.
115
+ if RESPONSES_AVAILABLE:
116
+
117
+ class ResponseOutputMessageParamWithoutId(ResponseOutputMessageParam):
118
+ id: NotRequired[str]
119
+
120
+ else:
121
+ # Fallback for older SDK versions
122
+ ResponseOutputMessageParamWithoutId = dict
123
+
124
+
125
+ class TracedData(pydantic.BaseModel):
126
+ start_time: float # time.time_ns()
127
+ response_id: str
128
+ # actually Union[str, list[Union[ResponseInputItemParam, ResponseOutputMessageParamWithoutId]]],
129
+ # but this only works properly in Python 3.10+ / newer pydantic
130
+ input: Any
131
+ # system message
132
+ instructions: Optional[str] = pydantic.Field(default=None)
133
+ # TODO: remove Any with newer Python / pydantic
134
+ tools: Optional[list[Union[Any, ToolParam]]] = pydantic.Field(default=None)
135
+ output_blocks: Optional[dict[str, ResponseOutputItem]] = pydantic.Field(
136
+ default=None
137
+ )
138
+ usage: Optional[ResponseUsage] = pydantic.Field(default=None)
139
+ output_text: Optional[str] = pydantic.Field(default=None)
140
+ request_model: Optional[str] = pydantic.Field(default=None)
141
+ response_model: Optional[str] = pydantic.Field(default=None)
142
+
143
+ # Reasoning attributes
144
+ request_reasoning_summary: Optional[str] = pydantic.Field(default=None)
145
+ request_reasoning_effort: Optional[str] = pydantic.Field(default=None)
146
+
147
+ request_service_tier: Optional[str] = pydantic.Field(default=None)
148
+ response_service_tier: Optional[str] = pydantic.Field(default=None)
149
+
150
+
151
+ responses: dict[str, TracedData] = {}
152
+
153
+
154
+ def parse_response(response: Union[LegacyAPIResponse, Response]) -> Response:
155
+ if isinstance(response, LegacyAPIResponse):
156
+ return response.parse()
157
+ return response
158
+
159
+
160
+ def get_tools_from_kwargs(kwargs: dict) -> list[ToolParam]:
161
+ tools_input = kwargs.get("tools", [])
162
+ tools = []
163
+
164
+ for tool in tools_input:
165
+ if tool.get("type") == "function":
166
+ if RESPONSES_AVAILABLE:
167
+ tools.append(FunctionToolParam(**tool))
168
+ else:
169
+ tools.append(tool)
170
+
171
+ return tools
172
+
173
+
174
+ def process_content_block(
175
+ block: dict[str, Any],
176
+ ) -> dict[str, Any]:
177
+ # TODO: keep the original type once backend supports it
178
+ if block.get("type") in ["text", "input_text", "output_text"]:
179
+ return {"type": "text", "text": block.get("text")}
180
+ elif block.get("type") in ["image", "input_image", "output_image"]:
181
+ return {
182
+ "type": "image",
183
+ "image_url": block.get("image_url"),
184
+ "detail": block.get("detail"),
185
+ "file_id": block.get("file_id"),
186
+ }
187
+ elif block.get("type") in ["file", "input_file", "output_file"]:
188
+ return {
189
+ "type": "file",
190
+ "file_id": block.get("file_id"),
191
+ "filename": block.get("filename"),
192
+ "file_data": block.get("file_data"),
193
+ }
194
+ return block
195
+
196
+
197
+ @dont_throw
198
+ def set_data_attributes(traced_response: TracedData, span: Span):
199
+ _set_span_attribute(span, GEN_AI_SYSTEM, "openai")
200
+ _set_span_attribute(span, GEN_AI_REQUEST_MODEL, traced_response.request_model)
201
+ _set_span_attribute(span, GEN_AI_RESPONSE_ID, traced_response.response_id)
202
+ _set_span_attribute(span, GEN_AI_RESPONSE_MODEL, traced_response.response_model)
203
+ if usage := traced_response.usage:
204
+ _set_span_attribute(span, GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens)
205
+ _set_span_attribute(span, GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens)
206
+ _set_span_attribute(
207
+ span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, usage.total_tokens
208
+ )
209
+ if usage.input_tokens_details:
210
+ _set_span_attribute(
211
+ span,
212
+ SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
213
+ usage.input_tokens_details.cached_tokens,
214
+ )
215
+
216
+ reasoning_tokens = None
217
+ if usage.output_tokens_details:
218
+ reasoning_tokens = usage.output_tokens_details.reasoning_tokens
219
+
220
+ _set_span_attribute(
221
+ span,
222
+ SpanAttributes.LLM_USAGE_REASONING_TOKENS,
223
+ reasoning_tokens or 0,
224
+ )
225
+
226
+ _set_span_attribute(
227
+ span,
228
+ f"{SpanAttributes.LLM_REQUEST_REASONING_SUMMARY}",
229
+ traced_response.request_reasoning_summary or (),
230
+ )
231
+
232
+ _set_span_attribute(
233
+ span,
234
+ f"{SpanAttributes.LLM_REQUEST_REASONING_EFFORT}",
235
+ traced_response.request_reasoning_effort or (),
236
+ )
237
+
238
+ _set_span_attribute(
239
+ span,
240
+ "openai.request.service_tier",
241
+ traced_response.request_service_tier,
242
+ )
243
+ _set_span_attribute(
244
+ span,
245
+ "openai.response.service_tier",
246
+ traced_response.response_service_tier,
247
+ )
248
+
249
+ if should_send_prompts():
250
+ prompt_index = 0
251
+ if traced_response.tools:
252
+ for i, tool_param in enumerate(traced_response.tools):
253
+ tool_dict = model_as_dict(tool_param)
254
+ description = tool_dict.get("description")
255
+ parameters = tool_dict.get("parameters")
256
+ name = tool_dict.get("name")
257
+ if parameters is None:
258
+ continue
259
+ _set_span_attribute(
260
+ span,
261
+ f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.description",
262
+ description,
263
+ )
264
+ _set_span_attribute(
265
+ span,
266
+ f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.parameters",
267
+ json.dumps(parameters),
268
+ )
269
+ _set_span_attribute(
270
+ span,
271
+ f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.name",
272
+ name,
273
+ )
274
+ if traced_response.instructions:
275
+ _set_span_attribute(
276
+ span,
277
+ f"{GEN_AI_PROMPT}.{prompt_index}.content",
278
+ traced_response.instructions,
279
+ )
280
+ _set_span_attribute(span, f"{GEN_AI_PROMPT}.{prompt_index}.role", "system")
281
+ prompt_index += 1
282
+
283
+ if isinstance(traced_response.input, str):
284
+ _set_span_attribute(
285
+ span, f"{GEN_AI_PROMPT}.{prompt_index}.content", traced_response.input
286
+ )
287
+ _set_span_attribute(span, f"{GEN_AI_PROMPT}.{prompt_index}.role", "user")
288
+ prompt_index += 1
289
+ else:
290
+ for block in traced_response.input:
291
+ block_dict = model_as_dict(block)
292
+ if block_dict.get("type", "message") == "message":
293
+ content = block_dict.get("content")
294
+ if is_validator_iterator(content):
295
+ # we're after the actual call here, so we can consume the iterator
296
+ content = [process_content_block(block) for block in content]
297
+ try:
298
+ stringified_content = (
299
+ content if isinstance(content, str) else json.dumps(content)
300
+ )
301
+ except Exception:
302
+ stringified_content = (
303
+ str(content) if content is not None else ""
304
+ )
305
+ _set_span_attribute(
306
+ span,
307
+ f"{GEN_AI_PROMPT}.{prompt_index}.content",
308
+ stringified_content,
309
+ )
310
+ _set_span_attribute(
311
+ span,
312
+ f"{GEN_AI_PROMPT}.{prompt_index}.role",
313
+ block_dict.get("role"),
314
+ )
315
+ prompt_index += 1
316
+ elif block_dict.get("type") == "computer_call_output":
317
+ _set_span_attribute(
318
+ span,
319
+ f"{GEN_AI_PROMPT}.{prompt_index}.role",
320
+ "computer_call_output",
321
+ )
322
+ output_image_url = block_dict.get("output", {}).get("image_url")
323
+ if output_image_url:
324
+ _set_span_attribute(
325
+ span,
326
+ f"{GEN_AI_PROMPT}.{prompt_index}.content",
327
+ json.dumps(
328
+ [
329
+ {
330
+ "type": "image_url",
331
+ "image_url": {"url": output_image_url},
332
+ }
333
+ ]
334
+ ),
335
+ )
336
+ prompt_index += 1
337
+ elif block_dict.get("type") == "computer_call":
338
+ _set_span_attribute(
339
+ span, f"{GEN_AI_PROMPT}.{prompt_index}.role", "assistant"
340
+ )
341
+ call_content = {}
342
+ if block_dict.get("id"):
343
+ call_content["id"] = block_dict.get("id")
344
+ if block_dict.get("action"):
345
+ call_content["action"] = block_dict.get("action")
346
+ _set_span_attribute(
347
+ span,
348
+ f"{GEN_AI_PROMPT}.{prompt_index}.tool_calls.0.arguments",
349
+ json.dumps(call_content),
350
+ )
351
+ _set_span_attribute(
352
+ span,
353
+ f"{GEN_AI_PROMPT}.{prompt_index}.tool_calls.0.id",
354
+ block_dict.get("call_id"),
355
+ )
356
+ _set_span_attribute(
357
+ span,
358
+ f"{GEN_AI_PROMPT}.{prompt_index}.tool_calls.0.name",
359
+ "computer_call",
360
+ )
361
+ prompt_index += 1
362
+ elif block_dict.get("type") == "reasoning":
363
+ reasoning_summary = block_dict.get("summary")
364
+ if reasoning_summary and isinstance(reasoning_summary, list):
365
+ processed_chunks = [
366
+ {"type": "text", "text": chunk.get("text")}
367
+ for chunk in reasoning_summary
368
+ if isinstance(chunk, dict)
369
+ and chunk.get("type") == "summary_text"
370
+ ]
371
+ _set_span_attribute(
372
+ span,
373
+ f"{GEN_AI_PROMPT}.{prompt_index}.reasoning",
374
+ json_dumps(processed_chunks),
375
+ )
376
+ _set_span_attribute(
377
+ span,
378
+ f"{GEN_AI_PROMPT}.{prompt_index}.role",
379
+ "assistant",
380
+ )
381
+ # reasoning is followed by other content parts in the same messge,
382
+ # so we don't increment the prompt index
383
+ # TODO: handle other block types
384
+
385
+ _set_span_attribute(span, f"{GEN_AI_COMPLETION}.0.role", "assistant")
386
+ if traced_response.output_text:
387
+ _set_span_attribute(
388
+ span, f"{GEN_AI_COMPLETION}.0.content", traced_response.output_text
389
+ )
390
+ tool_call_index = 0
391
+ for block in traced_response.output_blocks.values():
392
+ block_dict = model_as_dict(block)
393
+ if block_dict.get("type") == "message":
394
+ # either a refusal or handled in output_text above
395
+ continue
396
+ if block_dict.get("type") == "function_call":
397
+ _set_span_attribute(
398
+ span,
399
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.id",
400
+ block_dict.get("id"),
401
+ )
402
+ _set_span_attribute(
403
+ span,
404
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.name",
405
+ block_dict.get("name"),
406
+ )
407
+ _set_span_attribute(
408
+ span,
409
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.arguments",
410
+ block_dict.get("arguments"),
411
+ )
412
+ tool_call_index += 1
413
+ elif block_dict.get("type") == "file_search_call":
414
+ _set_span_attribute(
415
+ span,
416
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.id",
417
+ block_dict.get("id"),
418
+ )
419
+ _set_span_attribute(
420
+ span,
421
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.name",
422
+ "file_search_call",
423
+ )
424
+ tool_call_index += 1
425
+ elif block_dict.get("type") == "web_search_call":
426
+ _set_span_attribute(
427
+ span,
428
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.id",
429
+ block_dict.get("id"),
430
+ )
431
+ _set_span_attribute(
432
+ span,
433
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.name",
434
+ "web_search_call",
435
+ )
436
+ tool_call_index += 1
437
+ elif block_dict.get("type") == "computer_call":
438
+ _set_span_attribute(
439
+ span,
440
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.id",
441
+ block_dict.get("call_id"),
442
+ )
443
+ _set_span_attribute(
444
+ span,
445
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.name",
446
+ "computer_call",
447
+ )
448
+ _set_span_attribute(
449
+ span,
450
+ f"{GEN_AI_COMPLETION}.0.tool_calls.{tool_call_index}.arguments",
451
+ json.dumps(block_dict.get("action")),
452
+ )
453
+ tool_call_index += 1
454
+ elif block_dict.get("type") == "reasoning":
455
+ reasoning_summary = block_dict.get("summary")
456
+ if reasoning_summary and isinstance(reasoning_summary, list):
457
+ processed_chunks = [
458
+ {"type": "text", "text": chunk.get("text")}
459
+ for chunk in reasoning_summary
460
+ if isinstance(chunk, dict)
461
+ and chunk.get("type") == "summary_text"
462
+ ]
463
+ _set_span_attribute(
464
+ span,
465
+ "gen_ai.completion.0.reasoning",
466
+ json_dumps(processed_chunks),
467
+ )
468
+ # TODO: handle other block types, in particular other calls
469
+
470
+
471
+ @dont_throw
472
+ @_with_tracer_wrapper
473
+ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwargs):
474
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
475
+ return wrapped(*args, **kwargs)
476
+ start_time = time.time_ns()
477
+
478
+ try:
479
+ response = wrapped(*args, **kwargs)
480
+ if isinstance(response, Stream):
481
+ return response
482
+ except Exception as e:
483
+ response_id = kwargs.get("response_id")
484
+ existing_data = {}
485
+ if response_id and response_id in responses:
486
+ existing_data = responses[response_id].model_dump()
487
+ try:
488
+ traced_data = TracedData(
489
+ start_time=existing_data.get("start_time", start_time),
490
+ response_id=response_id or "",
491
+ input=process_input(
492
+ kwargs.get("input", existing_data.get("input", []))
493
+ ),
494
+ instructions=kwargs.get(
495
+ "instructions", existing_data.get("instructions")
496
+ ),
497
+ tools=get_tools_from_kwargs(kwargs) or existing_data.get("tools", []),
498
+ output_blocks=existing_data.get("output_blocks", {}),
499
+ usage=existing_data.get("usage"),
500
+ output_text=kwargs.get(
501
+ "output_text", existing_data.get("output_text", "")
502
+ ),
503
+ request_model=kwargs.get(
504
+ "model", existing_data.get("request_model", "")
505
+ ),
506
+ response_model=existing_data.get("response_model", ""),
507
+ request_reasoning_summary=kwargs.get("reasoning", {}).get(
508
+ "summary", existing_data.get("request_reasoning_summary")
509
+ ),
510
+ request_reasoning_effort=kwargs.get("reasoning", {}).get(
511
+ "effort", existing_data.get("request_reasoning_effort")
512
+ ),
513
+ request_service_tier=kwargs.get(
514
+ "service_tier", existing_data.get("request_service_tier")
515
+ ),
516
+ # response_service_tier=existing_data.get("response_service_tier"),
517
+ )
518
+ except Exception:
519
+ traced_data = None
520
+
521
+ span = tracer.start_span(
522
+ SPAN_NAME,
523
+ kind=SpanKind.CLIENT,
524
+ start_time=(
525
+ start_time if traced_data is None else int(traced_data.start_time)
526
+ ),
527
+ context=get_current_context(),
528
+ )
529
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
530
+ span.record_exception(e, attributes=get_event_attributes_from_context())
531
+ span.set_status(StatusCode.ERROR, str(e))
532
+ if traced_data:
533
+ set_data_attributes(traced_data, span)
534
+ span.end()
535
+ raise
536
+ parsed_response = parse_response(response)
537
+
538
+ existing_data = responses.get(parsed_response.id)
539
+ if existing_data is None:
540
+ existing_data = {}
541
+ else:
542
+ existing_data = existing_data.model_dump()
543
+
544
+ request_tools = get_tools_from_kwargs(kwargs)
545
+
546
+ merged_tools = existing_data.get("tools", []) + request_tools
547
+
548
+ try:
549
+ traced_data = TracedData(
550
+ start_time=existing_data.get("start_time", start_time),
551
+ response_id=parsed_response.id,
552
+ input=process_input(existing_data.get("input", kwargs.get("input"))),
553
+ instructions=existing_data.get("instructions", kwargs.get("instructions")),
554
+ tools=merged_tools if merged_tools else None,
555
+ output_blocks={block.id: block for block in parsed_response.output}
556
+ | existing_data.get("output_blocks", {}),
557
+ usage=existing_data.get("usage", parsed_response.usage),
558
+ output_text=existing_data.get(
559
+ "output_text", _get_output_text(parsed_response)
560
+ ),
561
+ request_model=existing_data.get("request_model", kwargs.get("model")),
562
+ response_model=existing_data.get("response_model", parsed_response.model),
563
+ request_reasoning_summary=existing_data.get(
564
+ "request_reasoning_summary", kwargs.get("reasoning", {}).get("summary")
565
+ ),
566
+ request_reasoning_effort=existing_data.get(
567
+ "request_reasoning_effort", kwargs.get("reasoning", {}).get("effort")
568
+ ),
569
+ request_service_tier=existing_data.get(
570
+ "request_service_tier", kwargs.get("service_tier")
571
+ ),
572
+ response_service_tier=existing_data.get(
573
+ "response_service_tier",
574
+ parsed_response.service_tier,
575
+ ),
576
+ )
577
+ responses[parsed_response.id] = traced_data
578
+ except Exception:
579
+ raise
580
+ return response
581
+
582
+ if parsed_response.status == "completed":
583
+ span = tracer.start_span(
584
+ SPAN_NAME,
585
+ kind=SpanKind.CLIENT,
586
+ start_time=int(traced_data.start_time),
587
+ context=get_current_context(),
588
+ )
589
+ set_data_attributes(traced_data, span)
590
+ span.end()
591
+
592
+ return response
593
+
594
+
595
+ @dont_throw
596
+ @_with_tracer_wrapper
597
+ async def async_responses_get_or_create_wrapper(
598
+ tracer: Tracer, wrapped, instance, args, kwargs
599
+ ):
600
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
601
+ return await wrapped(*args, **kwargs)
602
+ start_time = time.time_ns()
603
+
604
+ try:
605
+ response = await wrapped(*args, **kwargs)
606
+ if isinstance(response, (Stream, AsyncStream)):
607
+ return response
608
+ except Exception as e:
609
+ response_id = kwargs.get("response_id")
610
+ existing_data = {}
611
+ if response_id and response_id in responses:
612
+ existing_data = responses[response_id].model_dump()
613
+ try:
614
+ traced_data = TracedData(
615
+ start_time=existing_data.get("start_time", start_time),
616
+ response_id=response_id or "",
617
+ input=process_input(
618
+ kwargs.get("input", existing_data.get("input", []))
619
+ ),
620
+ instructions=kwargs.get(
621
+ "instructions", existing_data.get("instructions", "")
622
+ ),
623
+ tools=get_tools_from_kwargs(kwargs) or existing_data.get("tools", []),
624
+ output_blocks=existing_data.get("output_blocks", {}),
625
+ usage=existing_data.get("usage"),
626
+ output_text=kwargs.get("output_text", existing_data.get("output_text")),
627
+ request_model=kwargs.get("model", existing_data.get("request_model")),
628
+ response_model=existing_data.get("response_model"),
629
+ request_reasoning_summary=kwargs.get("reasoning", {}).get(
630
+ "summary", existing_data.get("request_reasoning_summary")
631
+ ),
632
+ request_reasoning_effort=kwargs.get("reasoning", {}).get(
633
+ "effort", existing_data.get("request_reasoning_effort")
634
+ ),
635
+ request_service_tier=kwargs.get(
636
+ "service_tier", existing_data.get("request_service_tier")
637
+ ),
638
+ # response_service_tier=existing_data.get("response_service_tier"),
639
+ )
640
+ except Exception:
641
+ traced_data = None
642
+
643
+ span = tracer.start_span(
644
+ SPAN_NAME,
645
+ kind=SpanKind.CLIENT,
646
+ start_time=(
647
+ start_time if traced_data is None else int(traced_data.start_time)
648
+ ),
649
+ context=get_current_context(),
650
+ )
651
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
652
+ span.record_exception(e, attributes=get_event_attributes_from_context())
653
+ span.set_status(StatusCode.ERROR, str(e))
654
+ if traced_data:
655
+ set_data_attributes(traced_data, span)
656
+ span.end()
657
+ raise
658
+ parsed_response = parse_response(response)
659
+
660
+ existing_data = responses.get(parsed_response.id)
661
+ if existing_data is None:
662
+ existing_data = {}
663
+ else:
664
+ existing_data = existing_data.model_dump()
665
+
666
+ request_tools = get_tools_from_kwargs(kwargs)
667
+
668
+ merged_tools = existing_data.get("tools", []) + request_tools
669
+
670
+ try:
671
+ traced_data = TracedData(
672
+ start_time=existing_data.get("start_time", start_time),
673
+ response_id=parsed_response.id,
674
+ input=process_input(existing_data.get("input", kwargs.get("input"))),
675
+ instructions=existing_data.get("instructions", kwargs.get("instructions")),
676
+ tools=merged_tools if merged_tools else None,
677
+ output_blocks={block.id: block for block in parsed_response.output}
678
+ | existing_data.get("output_blocks", {}),
679
+ usage=existing_data.get("usage", parsed_response.usage),
680
+ output_text=existing_data.get(
681
+ "output_text", _get_output_text(parsed_response)
682
+ ),
683
+ request_model=existing_data.get("request_model", kwargs.get("model")),
684
+ response_model=existing_data.get("response_model", parsed_response.model),
685
+ request_reasoning_summary=existing_data.get(
686
+ "request_reasoning_summary", kwargs.get("reasoning", {}).get("summary")
687
+ ),
688
+ request_reasoning_effort=existing_data.get(
689
+ "request_reasoning_effort", kwargs.get("reasoning", {}).get("effort")
690
+ ),
691
+ request_service_tier=existing_data.get(
692
+ "request_service_tier", kwargs.get("service_tier")
693
+ ),
694
+ response_service_tier=existing_data.get(
695
+ "response_service_tier",
696
+ parsed_response.service_tier,
697
+ ),
698
+ )
699
+ responses[parsed_response.id] = traced_data
700
+ except Exception:
701
+ return response
702
+
703
+ if parsed_response.status == "completed":
704
+ span = tracer.start_span(
705
+ SPAN_NAME,
706
+ kind=SpanKind.CLIENT,
707
+ start_time=int(traced_data.start_time),
708
+ context=get_current_context(),
709
+ )
710
+ set_data_attributes(traced_data, span)
711
+ span.end()
712
+
713
+ return response
714
+
715
+
716
+ @dont_throw
717
+ @_with_tracer_wrapper
718
+ def responses_cancel_wrapper(tracer: Tracer, wrapped, instance, args, kwargs):
719
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
720
+ return wrapped(*args, **kwargs)
721
+
722
+ response = wrapped(*args, **kwargs)
723
+ if isinstance(response, Stream):
724
+ return response
725
+ parsed_response = parse_response(response)
726
+ existing_data = responses.pop(parsed_response.id, None)
727
+ if existing_data is not None:
728
+ span = tracer.start_span(
729
+ SPAN_NAME,
730
+ kind=SpanKind.CLIENT,
731
+ start_time=existing_data.start_time,
732
+ record_exception=True,
733
+ context=get_current_context(),
734
+ )
735
+ span.record_exception(
736
+ Exception("Response cancelled"),
737
+ attributes=get_event_attributes_from_context(),
738
+ )
739
+ set_data_attributes(existing_data, span)
740
+ span.end()
741
+ return response
742
+
743
+
744
+ @dont_throw
745
+ @_with_tracer_wrapper
746
+ async def async_responses_cancel_wrapper(
747
+ tracer: Tracer, wrapped, instance, args, kwargs
748
+ ):
749
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
750
+ return await wrapped(*args, **kwargs)
751
+
752
+ response = await wrapped(*args, **kwargs)
753
+ if isinstance(response, (Stream, AsyncStream)):
754
+ return response
755
+ parsed_response = parse_response(response)
756
+ existing_data = responses.pop(parsed_response.id, None)
757
+ if existing_data is not None:
758
+ span = tracer.start_span(
759
+ SPAN_NAME,
760
+ kind=SpanKind.CLIENT,
761
+ start_time=existing_data.start_time,
762
+ record_exception=True,
763
+ context=get_current_context(),
764
+ )
765
+ span.record_exception(
766
+ Exception("Response cancelled"),
767
+ attributes=get_event_attributes_from_context(),
768
+ )
769
+ set_data_attributes(existing_data, span)
770
+ span.end()
771
+ return response
772
+
773
+
774
+ def _get_output_text(parsed_response: Response) -> Optional[str]:
775
+ output_text = None
776
+ if hasattr(parsed_response, "output_text"):
777
+ output_text = parsed_response.output_text
778
+ else:
779
+ try:
780
+ output_text = parsed_response.output[0].content[0].text
781
+ except Exception:
782
+ pass
783
+ return output_text
784
+
785
+
786
+ # TODO: build streaming responses