ddtrace 3.11.0rc1__cp311-cp311-win32.whl → 3.11.0rc2__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. ddtrace/_logger.py +5 -6
  2. ddtrace/_trace/product.py +1 -1
  3. ddtrace/_trace/trace_handlers.py +3 -1
  4. ddtrace/_version.py +2 -2
  5. ddtrace/appsec/_asm_request_context.py +3 -1
  6. ddtrace/appsec/_iast/_listener.py +12 -2
  7. ddtrace/contrib/integration_registry/registry.yaml +10 -0
  8. ddtrace/contrib/internal/avro/__init__.py +17 -0
  9. ddtrace/contrib/internal/azure_functions/patch.py +23 -12
  10. ddtrace/contrib/internal/azure_functions/utils.py +14 -0
  11. ddtrace/contrib/internal/botocore/__init__.py +153 -0
  12. ddtrace/contrib/{_freezegun.py → internal/freezegun/__init__.py} +1 -1
  13. ddtrace/contrib/internal/langchain/patch.py +11 -443
  14. ddtrace/contrib/internal/langchain/utils.py +0 -26
  15. ddtrace/contrib/internal/logbook/patch.py +1 -2
  16. ddtrace/contrib/internal/logging/patch.py +4 -7
  17. ddtrace/contrib/internal/loguru/patch.py +1 -3
  18. ddtrace/contrib/internal/protobuf/__init__.py +17 -0
  19. ddtrace/contrib/internal/pytest/__init__.py +62 -0
  20. ddtrace/contrib/internal/pytest/_plugin_v2.py +12 -3
  21. ddtrace/contrib/internal/pytest_bdd/__init__.py +23 -0
  22. ddtrace/contrib/internal/pytest_benchmark/__init__.py +3 -0
  23. ddtrace/contrib/internal/structlog/patch.py +2 -4
  24. ddtrace/contrib/internal/unittest/__init__.py +36 -0
  25. ddtrace/internal/_encoding.cp311-win32.pyd +0 -0
  26. ddtrace/internal/_encoding.pyi +1 -1
  27. ddtrace/internal/_rand.cp311-win32.pyd +0 -0
  28. ddtrace/internal/_tagset.cp311-win32.pyd +0 -0
  29. ddtrace/internal/_threads.cp311-win32.pyd +0 -0
  30. ddtrace/internal/ci_visibility/encoder.py +18 -12
  31. ddtrace/internal/ci_visibility/utils.py +4 -4
  32. ddtrace/internal/core/__init__.py +5 -2
  33. ddtrace/internal/datadog/profiling/dd_wrapper-unknown-amd64.dll +0 -0
  34. ddtrace/internal/datadog/profiling/dd_wrapper-unknown-amd64.lib +0 -0
  35. ddtrace/internal/datadog/profiling/ddup/_ddup.cp311-win32.pyd +0 -0
  36. ddtrace/internal/datadog/profiling/ddup/_ddup.cp311-win32.pyd.lib +0 -0
  37. ddtrace/internal/datadog/profiling/ddup/dd_wrapper-unknown-amd64.dll +0 -0
  38. ddtrace/internal/datadog/profiling/ddup/dd_wrapper-unknown-amd64.lib +0 -0
  39. ddtrace/internal/native/_native.cp311-win32.pyd +0 -0
  40. ddtrace/internal/telemetry/metrics_namespaces.cp311-win32.pyd +0 -0
  41. ddtrace/internal/test_visibility/coverage_lines.py +4 -4
  42. ddtrace/internal/writer/writer.py +24 -11
  43. ddtrace/llmobs/_constants.py +2 -0
  44. ddtrace/llmobs/_experiment.py +69 -10
  45. ddtrace/llmobs/_integrations/bedrock.py +4 -0
  46. ddtrace/llmobs/_integrations/bedrock_agents.py +5 -1
  47. ddtrace/llmobs/_integrations/langchain.py +29 -20
  48. ddtrace/llmobs/_llmobs.py +78 -13
  49. ddtrace/llmobs/_telemetry.py +20 -5
  50. ddtrace/llmobs/_utils.py +6 -0
  51. ddtrace/profiling/_threading.cp311-win32.pyd +0 -0
  52. ddtrace/profiling/collector/_memalloc.cp311-win32.pyd +0 -0
  53. ddtrace/profiling/collector/_task.cp311-win32.pyd +0 -0
  54. ddtrace/profiling/collector/_traceback.cp311-win32.pyd +0 -0
  55. ddtrace/profiling/collector/stack.cp311-win32.pyd +0 -0
  56. ddtrace/settings/_config.py +1 -2
  57. ddtrace/settings/profiling.py +0 -9
  58. ddtrace/vendor/psutil/_psutil_windows.cp311-win32.pyd +0 -0
  59. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/METADATA +1 -1
  60. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/RECORD +143 -150
  61. ddtrace/contrib/_avro.py +0 -17
  62. ddtrace/contrib/_botocore.py +0 -153
  63. ddtrace/contrib/_protobuf.py +0 -17
  64. ddtrace/contrib/_pytest.py +0 -62
  65. ddtrace/contrib/_pytest_bdd.py +0 -23
  66. ddtrace/contrib/_pytest_benchmark.py +0 -3
  67. ddtrace/contrib/_unittest.py +0 -36
  68. /ddtrace/contrib/{_aiobotocore.py → internal/aiobotocore/__init__.py} +0 -0
  69. /ddtrace/contrib/{_aiohttp_jinja2.py → internal/aiohttp_jinja2/__init__.py} +0 -0
  70. /ddtrace/contrib/{_aiomysql.py → internal/aiomysql/__init__.py} +0 -0
  71. /ddtrace/contrib/{_aiopg.py → internal/aiopg/__init__.py} +0 -0
  72. /ddtrace/contrib/{_aioredis.py → internal/aioredis/__init__.py} +0 -0
  73. /ddtrace/contrib/{_algoliasearch.py → internal/algoliasearch/__init__.py} +0 -0
  74. /ddtrace/contrib/{_anthropic.py → internal/anthropic/__init__.py} +0 -0
  75. /ddtrace/contrib/{_aredis.py → internal/aredis/__init__.py} +0 -0
  76. /ddtrace/contrib/{_asyncio.py → internal/asyncio/__init__.py} +0 -0
  77. /ddtrace/contrib/{_asyncpg.py → internal/asyncpg/__init__.py} +0 -0
  78. /ddtrace/contrib/{_aws_lambda.py → internal/aws_lambda/__init__.py} +0 -0
  79. /ddtrace/contrib/{_azure_functions.py → internal/azure_functions/__init__.py} +0 -0
  80. /ddtrace/contrib/{_azure_servicebus.py → internal/azure_servicebus/__init__.py} +0 -0
  81. /ddtrace/contrib/{_boto.py → internal/boto/__init__.py} +0 -0
  82. /ddtrace/contrib/{_cassandra.py → internal/cassandra/__init__.py} +0 -0
  83. /ddtrace/contrib/{_consul.py → internal/consul/__init__.py} +0 -0
  84. /ddtrace/contrib/{_coverage.py → internal/coverage/__init__.py} +0 -0
  85. /ddtrace/contrib/{_crewai.py → internal/crewai/__init__.py} +0 -0
  86. /ddtrace/contrib/{_django.py → internal/django/__init__.py} +0 -0
  87. /ddtrace/contrib/{_dogpile_cache.py → internal/dogpile_cache/__init__.py} +0 -0
  88. /ddtrace/contrib/{_dramatiq.py → internal/dramatiq/__init__.py} +0 -0
  89. /ddtrace/contrib/{_elasticsearch.py → internal/elasticsearch/__init__.py} +0 -0
  90. /ddtrace/contrib/{_fastapi.py → internal/fastapi/__init__.py} +0 -0
  91. /ddtrace/contrib/{_flask.py → internal/flask/__init__.py} +0 -0
  92. /ddtrace/contrib/{_futures.py → internal/futures/__init__.py} +0 -0
  93. /ddtrace/contrib/{_gevent.py → internal/gevent/__init__.py} +0 -0
  94. /ddtrace/contrib/{_google_genai.py → internal/google_genai/__init__.py} +0 -0
  95. /ddtrace/contrib/{_google_generativeai.py → internal/google_generativeai/__init__.py} +0 -0
  96. /ddtrace/contrib/{_graphql.py → internal/graphql/__init__.py} +0 -0
  97. /ddtrace/contrib/{_grpc.py → internal/grpc/__init__.py} +0 -0
  98. /ddtrace/contrib/{_gunicorn.py → internal/gunicorn/__init__.py} +0 -0
  99. /ddtrace/contrib/{_httplib.py → internal/httplib/__init__.py} +0 -0
  100. /ddtrace/contrib/{_httpx.py → internal/httpx/__init__.py} +0 -0
  101. /ddtrace/contrib/{_jinja2.py → internal/jinja2/__init__.py} +0 -0
  102. /ddtrace/contrib/{_kafka.py → internal/kafka/__init__.py} +0 -0
  103. /ddtrace/contrib/{_kombu.py → internal/kombu/__init__.py} +0 -0
  104. /ddtrace/contrib/{_langchain.py → internal/langchain/__init__.py} +0 -0
  105. /ddtrace/contrib/{_langgraph.py → internal/langgraph/__init__.py} +0 -0
  106. /ddtrace/contrib/{_litellm.py → internal/litellm/__init__.py} +0 -0
  107. /ddtrace/contrib/{_logbook.py → internal/logbook/__init__.py} +0 -0
  108. /ddtrace/contrib/{_logging.py → internal/logging/__init__.py} +0 -0
  109. /ddtrace/contrib/{_loguru.py → internal/loguru/__init__.py} +0 -0
  110. /ddtrace/contrib/{_mako.py → internal/mako/__init__.py} +0 -0
  111. /ddtrace/contrib/{_mariadb.py → internal/mariadb/__init__.py} +0 -0
  112. /ddtrace/contrib/{_mcp.py → internal/mcp/__init__.py} +0 -0
  113. /ddtrace/contrib/{_molten.py → internal/molten/__init__.py} +0 -0
  114. /ddtrace/contrib/{_mongoengine.py → internal/mongoengine/__init__.py} +0 -0
  115. /ddtrace/contrib/{_mysql.py → internal/mysql/__init__.py} +0 -0
  116. /ddtrace/contrib/{_mysqldb.py → internal/mysqldb/__init__.py} +0 -0
  117. /ddtrace/contrib/{_openai.py → internal/openai/__init__.py} +0 -0
  118. /ddtrace/contrib/{_openai_agents.py → internal/openai_agents/__init__.py} +0 -0
  119. /ddtrace/contrib/{_psycopg.py → internal/psycopg/__init__.py} +0 -0
  120. /ddtrace/contrib/{_pydantic_ai.py → internal/pydantic_ai/__init__.py} +0 -0
  121. /ddtrace/contrib/{_pymemcache.py → internal/pymemcache/__init__.py} +0 -0
  122. /ddtrace/contrib/{_pymongo.py → internal/pymongo/__init__.py} +0 -0
  123. /ddtrace/contrib/{_pymysql.py → internal/pymysql/__init__.py} +0 -0
  124. /ddtrace/contrib/{_pynamodb.py → internal/pynamodb/__init__.py} +0 -0
  125. /ddtrace/contrib/{_pyodbc.py → internal/pyodbc/__init__.py} +0 -0
  126. /ddtrace/contrib/{_redis.py → internal/redis/__init__.py} +0 -0
  127. /ddtrace/contrib/{_rediscluster.py → internal/rediscluster/__init__.py} +0 -0
  128. /ddtrace/contrib/{_rq.py → internal/rq/__init__.py} +0 -0
  129. /ddtrace/contrib/{_sanic.py → internal/sanic/__init__.py} +0 -0
  130. /ddtrace/contrib/{_selenium.py → internal/selenium/__init__.py} +0 -0
  131. /ddtrace/contrib/{_snowflake.py → internal/snowflake/__init__.py} +0 -0
  132. /ddtrace/contrib/{_sqlite3.py → internal/sqlite3/__init__.py} +0 -0
  133. /ddtrace/contrib/{_starlette.py → internal/starlette/__init__.py} +0 -0
  134. /ddtrace/contrib/{_structlog.py → internal/structlog/__init__.py} +0 -0
  135. /ddtrace/contrib/{_subprocess.py → internal/subprocess/__init__.py} +0 -0
  136. /ddtrace/contrib/{_urllib.py → internal/urllib/__init__.py} +0 -0
  137. /ddtrace/contrib/{_urllib3.py → internal/urllib3/__init__.py} +0 -0
  138. /ddtrace/contrib/{_vertexai.py → internal/vertexai/__init__.py} +0 -0
  139. /ddtrace/contrib/{_vertica.py → internal/vertica/__init__.py} +0 -0
  140. /ddtrace/contrib/{_webbrowser.py → internal/webbrowser/__init__.py} +0 -0
  141. /ddtrace/contrib/{_yaaredis.py → internal/yaaredis/__init__.py} +0 -0
  142. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/WHEEL +0 -0
  143. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/entry_points.txt +0 -0
  144. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/LICENSE +0 -0
  145. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/LICENSE.Apache +0 -0
  146. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/LICENSE.BSD3 +0 -0
  147. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/NOTICE +0 -0
  148. {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ from ddtrace.constants import ERROR_STACK
21
21
  from ddtrace.constants import ERROR_TYPE
22
22
  from ddtrace.internal.logger import get_logger
23
23
  from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
24
+ from ddtrace.llmobs._utils import convert_tags_dict_to_list
24
25
 
25
26
 
26
27
  if TYPE_CHECKING:
@@ -153,6 +154,12 @@ class Dataset:
153
154
  self._deleted_record_ids.append(record_id)
154
155
  del self._records[index]
155
156
 
157
+ @property
158
+ def url(self) -> str:
159
+ # FIXME: need to use the user's site
160
+ # also will not work for subdomain orgs
161
+ return f"https://app.datadoghq.com/llm/datasets/{self._id}"
162
+
156
163
  @overload
157
164
  def __getitem__(self, index: int) -> DatasetRecord:
158
165
  ...
@@ -170,6 +177,50 @@ class Dataset:
170
177
  def __iter__(self) -> Iterator[DatasetRecord]:
171
178
  return iter(self._records)
172
179
 
180
+ def as_dataframe(self) -> None:
181
+ try:
182
+ import pandas as pd
183
+ except ImportError as e:
184
+ raise ImportError(
185
+ "pandas is required to convert dataset to DataFrame. Please install via `pip install pandas`"
186
+ ) from e
187
+
188
+ column_tuples = set()
189
+ data_rows = []
190
+ for record in self._records:
191
+ flat_record = {} # type: Dict[Union[str, Tuple[str, str]], Any]
192
+
193
+ input_data = record.get("input_data", {})
194
+ if isinstance(input_data, dict):
195
+ for input_data_col, input_data_val in input_data.items():
196
+ flat_record[("input_data", input_data_col)] = input_data_val
197
+ column_tuples.add(("input_data", input_data_col))
198
+ else:
199
+ flat_record[("input_data", "")] = input_data
200
+ column_tuples.add(("input_data", ""))
201
+
202
+ expected_output = record.get("expected_output", {})
203
+ if isinstance(expected_output, dict):
204
+ for expected_output_col, expected_output_val in expected_output.items():
205
+ flat_record[("expected_output", expected_output_col)] = expected_output_val
206
+ column_tuples.add(("expected_output", expected_output_col))
207
+ else:
208
+ flat_record[("expected_output", "")] = expected_output
209
+ column_tuples.add(("expected_output", ""))
210
+
211
+ for metadata_col, metadata_val in record.get("metadata", {}).items():
212
+ flat_record[("metadata", metadata_col)] = metadata_val
213
+ column_tuples.add(("metadata", metadata_col))
214
+
215
+ data_rows.append(flat_record)
216
+
217
+ records_list = []
218
+ for flat_record in data_rows:
219
+ row = [flat_record.get(col, None) for col in column_tuples]
220
+ records_list.append(row)
221
+
222
+ return pd.DataFrame(data=records_list, columns=pd.MultiIndex.from_tuples(column_tuples))
223
+
173
224
 
174
225
  class Experiment:
175
226
  def __init__(
@@ -180,7 +231,7 @@ class Experiment:
180
231
  evaluators: List[Callable[[DatasetRecordInputType, JSONType, JSONType], JSONType]],
181
232
  project_name: str,
182
233
  description: str = "",
183
- tags: Optional[List[str]] = None,
234
+ tags: Optional[Dict[str, str]] = None,
184
235
  config: Optional[ExperimentConfigType] = None,
185
236
  _llmobs_instance: Optional["LLMObs"] = None,
186
237
  ) -> None:
@@ -189,8 +240,8 @@ class Experiment:
189
240
  self._dataset = dataset
190
241
  self._evaluators = evaluators
191
242
  self._description = description
192
- self._tags: List[str] = [f"ddtrace.version:{ddtrace.__version__}"]
193
- self._tags.extend(tags or [])
243
+ self._tags: Dict[str, str] = tags or {}
244
+ self._tags["ddtrace.version"] = str(ddtrace.__version__)
194
245
  self._config: Dict[str, JSONType] = config or {}
195
246
  self._llmobs_instance = _llmobs_instance
196
247
 
@@ -217,7 +268,8 @@ class Experiment:
217
268
  if not self._llmobs_instance.enabled:
218
269
  logger.warning(
219
270
  "Skipping experiment as LLMObs is not enabled. "
220
- "Ensure LLM Observability is enabled via `LLMObs.enable(...)` or set `DD_LLMOBS_ENABLED=1`."
271
+ "Ensure LLM Observability is enabled via `LLMObs.enable(...)` "
272
+ "or set `DD_LLMOBS_ENABLED=1` and use `ddtrace-run` to run your application."
221
273
  )
222
274
  return []
223
275
 
@@ -230,17 +282,19 @@ class Experiment:
230
282
  self._project_id,
231
283
  self._dataset._version,
232
284
  self._config,
233
- self._tags,
285
+ convert_tags_dict_to_list(self._tags),
234
286
  self._description,
235
287
  )
236
288
  self._id = experiment_id
237
- self._tags.append(f"experiment_id:{experiment_id}")
289
+ self._tags["experiment_id"] = str(experiment_id)
238
290
  self._run_name = experiment_run_name
239
291
  task_results = self._run_task(jobs, raise_errors, sample_size)
240
292
  evaluations = self._run_evaluators(task_results, raise_errors=raise_errors)
241
293
  experiment_results = self._merge_results(task_results, evaluations)
242
294
  experiment_evals = self._generate_metrics_from_exp_results(experiment_results)
243
- self._llmobs_instance._dne_client.experiment_eval_post(self._id, experiment_evals, self._tags)
295
+ self._llmobs_instance._dne_client.experiment_eval_post(
296
+ self._id, experiment_evals, convert_tags_dict_to_list(self._tags)
297
+ )
244
298
  return experiment_results
245
299
 
246
300
  def _process_record(self, idx_record: Tuple[int, DatasetRecord]) -> Optional[TaskResult]:
@@ -256,7 +310,12 @@ class Experiment:
256
310
  span_id, trace_id = "", ""
257
311
  input_data = record["input_data"]
258
312
  record_id = record.get("record_id", "")
259
- tags = {"dataset_id": self._dataset._id, "dataset_record_id": record_id, "experiment_id": self._id}
313
+ tags = {
314
+ **self._tags,
315
+ "dataset_id": str(self._dataset._id),
316
+ "dataset_record_id": str(record_id),
317
+ "experiment_id": str(self._id),
318
+ }
260
319
  output_data = None
261
320
  try:
262
321
  output_data = self._task(input_data, self._config)
@@ -342,7 +401,7 @@ class Experiment:
342
401
  experiment_results = []
343
402
  for idx, task_result in enumerate(task_results):
344
403
  output_data = task_result["output"]
345
- metadata: Dict[str, JSONType] = {"tags": cast(List[JSONType], self._tags)}
404
+ metadata: Dict[str, JSONType] = {"tags": cast(List[JSONType], convert_tags_dict_to_list(self._tags))}
346
405
  metadata.update(task_result.get("metadata") or {})
347
406
  record: DatasetRecord = self._dataset[idx]
348
407
  evals = evaluations[idx]["evaluations"]
@@ -383,7 +442,7 @@ class Experiment:
383
442
  "label": eval_name,
384
443
  f"{metric_type}_value": eval_value, # type: ignore
385
444
  "error": err,
386
- "tags": self._tags,
445
+ "tags": convert_tags_dict_to_list(self._tags),
387
446
  "experiment_id": self._id,
388
447
  }
389
448
 
@@ -13,6 +13,7 @@ from ddtrace.llmobs._constants import CACHE_READ_INPUT_TOKENS_METRIC_KEY
13
13
  from ddtrace.llmobs._constants import CACHE_WRITE_INPUT_TOKENS_METRIC_KEY
14
14
  from ddtrace.llmobs._constants import INPUT_MESSAGES
15
15
  from ddtrace.llmobs._constants import INPUT_VALUE
16
+ from ddtrace.llmobs._constants import INTEGRATION
16
17
  from ddtrace.llmobs._constants import METADATA
17
18
  from ddtrace.llmobs._constants import METRICS
18
19
  from ddtrace.llmobs._constants import MODEL_NAME
@@ -30,6 +31,7 @@ from ddtrace.llmobs._integrations.bedrock_utils import normalize_input_tokens
30
31
  from ddtrace.llmobs._integrations.utils import get_final_message_converse_stream_message
31
32
  from ddtrace.llmobs._integrations.utils import get_messages_from_converse_content
32
33
  from ddtrace.llmobs._integrations.utils import update_proxy_workflow_input_output_value
34
+ from ddtrace.llmobs._telemetry import record_bedrock_agent_span_event_created
33
35
  from ddtrace.llmobs._writer import LLMObsSpanEvent
34
36
  from ddtrace.trace import Span
35
37
 
@@ -151,6 +153,7 @@ class BedrockIntegration(BaseLLMIntegration):
151
153
  INPUT_VALUE: str(input_value),
152
154
  TAGS: {"session_id": session_id},
153
155
  METADATA: {"agent_id": agent_id, "agent_alias_id": agent_alias_id},
156
+ INTEGRATION: "bedrock_agents",
154
157
  }
155
158
  )
156
159
  if not response:
@@ -176,6 +179,7 @@ class BedrockIntegration(BaseLLMIntegration):
176
179
  )
177
180
  for _, span_event in self._spans.items():
178
181
  LLMObs._instance._llmobs_span_writer.enqueue(span_event)
182
+ record_bedrock_agent_span_event_created(span_event)
179
183
  self._spans.clear()
180
184
  self._active_span_by_step_id.clear()
181
185
 
@@ -15,6 +15,7 @@ from ddtrace.internal.utils.formats import format_trace_id
15
15
  from ddtrace.llmobs._constants import LLMOBS_TRACE_ID
16
16
  from ddtrace.llmobs._integrations.bedrock_utils import parse_model_id
17
17
  from ddtrace.llmobs._utils import _get_ml_app
18
+ from ddtrace.llmobs._utils import _get_session_id
18
19
  from ddtrace.llmobs._utils import safe_json
19
20
 
20
21
 
@@ -57,12 +58,15 @@ def _build_span_event(
57
58
  llmobs_trace_id = root_span._get_ctx_item(LLMOBS_TRACE_ID)
58
59
  if llmobs_trace_id is None:
59
60
  llmobs_trace_id = root_span.trace_id
61
+ session_id = _get_session_id(root_span)
62
+ ml_app = _get_ml_app(root_span)
63
+ tags = [f"ml_app:{ml_app}", f"session_id:{session_id}", "integration:bedrock_agents"]
60
64
  span_event = {
61
65
  "name": span_name,
62
66
  "span_id": str(span_id),
63
67
  "trace_id": format_trace_id(llmobs_trace_id),
64
68
  "parent_id": str(parent_id or root_span.span_id),
65
- "tags": ["ml_app:{}".format(_get_ml_app(root_span))],
69
+ "tags": tags,
66
70
  "start_ns": int(start_ns or root_span.start_ns),
67
71
  "duration": int(duration_ns or DEFAULT_SPAN_DURATION),
68
72
  "status": "error" if error else "ok",
@@ -163,7 +163,6 @@ class LangChainIntegration(BaseLLMIntegration):
163
163
 
164
164
  self._set_links(span)
165
165
  model_provider = span.get_tag(PROVIDER)
166
- self._llmobs_set_metadata(span, model_provider)
167
166
 
168
167
  is_workflow = False
169
168
 
@@ -365,26 +364,37 @@ class LangChainIntegration(BaseLLMIntegration):
365
364
  if hasattr(instance, "_datadog_spans"):
366
365
  delattr(instance, "_datadog_spans")
367
366
 
368
- def _llmobs_set_metadata(self, span: Span, model_provider: Optional[str] = None) -> None:
369
- if not model_provider:
367
+ def _llmobs_set_metadata(self, span: Span, kwargs: Dict[str, Any]) -> None:
368
+ identifying_params = kwargs.pop("_dd.identifying_params", None)
369
+ if not identifying_params:
370
370
  return
371
+ metadata = self._llmobs_extract_parameters(identifying_params)
372
+ for val in identifying_params.values():
373
+ if metadata:
374
+ break
375
+ if not isinstance(val, dict):
376
+ continue
377
+ metadata = self._llmobs_extract_parameters(val)
371
378
 
372
- metadata = {}
373
- temperature = span.get_tag(f"langchain.request.{model_provider}.parameters.temperature") or span.get_tag(
374
- f"langchain.request.{model_provider}.parameters.model_kwargs.temperature"
375
- ) # huggingface
376
- max_tokens = (
377
- span.get_tag(f"langchain.request.{model_provider}.parameters.max_tokens")
378
- or span.get_tag(f"langchain.request.{model_provider}.parameters.maxTokens") # ai21
379
- or span.get_tag(f"langchain.request.{model_provider}.parameters.model_kwargs.max_tokens") # huggingface
380
- )
379
+ if metadata:
380
+ span._set_ctx_item(METADATA, metadata)
381
381
 
382
+ def _llmobs_extract_parameters(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
383
+ metadata: Dict[str, Any] = {}
384
+ max_tokens = None
385
+ temperature = None
386
+ if "temperature" in parameters:
387
+ temperature = parameters["temperature"]
388
+ for max_token_key in ["max_tokens", "maxTokens", "max_completion_tokens"]:
389
+ if max_token_key in parameters:
390
+ max_tokens = parameters[max_token_key]
391
+ break
382
392
  if temperature is not None and temperature != "None":
383
393
  metadata["temperature"] = float(temperature)
384
394
  if max_tokens is not None and max_tokens != "None":
385
395
  metadata["max_tokens"] = int(max_tokens)
386
- if metadata:
387
- span._set_ctx_item(METADATA, metadata)
396
+
397
+ return metadata
388
398
 
389
399
  def _llmobs_set_tags_from_llm(
390
400
  self, span: Span, args: List[Any], kwargs: Dict[str, Any], completions: Any, is_workflow: bool = False
@@ -411,6 +421,8 @@ class LangChainIntegration(BaseLLMIntegration):
411
421
  }
412
422
  )
413
423
 
424
+ self._llmobs_set_metadata(span, kwargs)
425
+
414
426
  if span.error:
415
427
  span._set_ctx_item(output_tag_key, [{"content": ""}])
416
428
  return
@@ -444,6 +456,9 @@ class LangChainIntegration(BaseLLMIntegration):
444
456
  MODEL_PROVIDER: span.get_tag(PROVIDER) or "",
445
457
  }
446
458
  )
459
+
460
+ self._llmobs_set_metadata(span, kwargs)
461
+
447
462
  input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
448
463
  output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_MESSAGES
449
464
  stream = span.get_tag("langchain.request.stream")
@@ -700,16 +715,10 @@ class LangChainIntegration(BaseLLMIntegration):
700
715
  **kwargs,
701
716
  ) -> None:
702
717
  """Set base level tags that should be present on all LangChain spans (if they are not None)."""
703
- span.set_tag_str(TYPE, interface_type)
704
718
  if provider is not None:
705
719
  span.set_tag_str(PROVIDER, provider)
706
720
  if model is not None:
707
721
  span.set_tag_str(MODEL, model)
708
- if api_key is not None:
709
- if len(api_key) >= 4:
710
- span.set_tag_str(API_KEY, "...%s" % str(api_key[-4:]))
711
- else:
712
- span.set_tag_str(API_KEY, api_key)
713
722
 
714
723
  def check_token_usage_chat_or_llm_result(self, result):
715
724
  """Checks for token usage on the top-level ChatResult or LLMResult object"""
ddtrace/llmobs/_llmobs.py CHANGED
@@ -1,3 +1,4 @@
1
+ import csv
1
2
  from dataclasses import dataclass
2
3
  from dataclasses import field
3
4
  import inspect
@@ -45,9 +46,11 @@ from ddtrace.llmobs import _constants as constants
45
46
  from ddtrace.llmobs import _telemetry as telemetry
46
47
  from ddtrace.llmobs._constants import ANNOTATIONS_CONTEXT_ID
47
48
  from ddtrace.llmobs._constants import DECORATOR
49
+ from ddtrace.llmobs._constants import DEFAULT_PROJECT_NAME
48
50
  from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
49
51
  from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL
50
52
  from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
53
+ from ddtrace.llmobs._constants import EXPERIMENT_CSV_FIELD_MAX_SIZE
51
54
  from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
52
55
  from ddtrace.llmobs._constants import EXPERIMENT_ID_KEY
53
56
  from ddtrace.llmobs._constants import INPUT_DOCUMENTS
@@ -77,8 +80,8 @@ from ddtrace.llmobs._constants import TAGS
77
80
  from ddtrace.llmobs._context import LLMObsContextProvider
78
81
  from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
79
82
  from ddtrace.llmobs._experiment import Dataset
83
+ from ddtrace.llmobs._experiment import DatasetRecord
80
84
  from ddtrace.llmobs._experiment import DatasetRecordInputType
81
- from ddtrace.llmobs._experiment import DatasetRecordRaw as DatasetRecord
82
85
  from ddtrace.llmobs._experiment import Experiment
83
86
  from ddtrace.llmobs._experiment import ExperimentConfigType
84
87
  from ddtrace.llmobs._experiment import JSONType
@@ -168,7 +171,7 @@ class LLMObs(Service):
168
171
  _instance = None # type: LLMObs
169
172
  enabled = False
170
173
  _app_key: str = os.getenv("DD_APP_KEY", "")
171
- _project_name: str = os.getenv("DD_LLMOBS_PROJECT_NAME", "")
174
+ _project_name: str = os.getenv("DD_LLMOBS_PROJECT_NAME", DEFAULT_PROJECT_NAME)
172
175
 
173
176
  def __init__(
174
177
  self,
@@ -509,12 +512,16 @@ class LLMObs(Service):
509
512
  config._dd_site = site or config._dd_site
510
513
  config._dd_api_key = api_key or config._dd_api_key
511
514
  cls._app_key = app_key or cls._app_key
512
- cls._project_name = project_name or cls._project_name
515
+ cls._project_name = project_name or cls._project_name or DEFAULT_PROJECT_NAME
513
516
  config.env = env or config.env
514
517
  config.service = service or config.service
515
518
  config._llmobs_ml_app = ml_app or config._llmobs_ml_app
516
519
  config._llmobs_instrumented_proxy_urls = instrumented_proxy_urls or config._llmobs_instrumented_proxy_urls
517
520
 
521
+ # FIXME: workaround to prevent noisy logs when using the experiments feature
522
+ if config._dd_api_key and cls._app_key and os.environ.get("DD_TRACE_ENABLED", "").lower() not in ["true", "1"]:
523
+ ddtrace.tracer.enabled = False
524
+
518
525
  error = None
519
526
  start_ns = time.time_ns()
520
527
  try:
@@ -596,6 +603,67 @@ class LLMObs(Service):
596
603
  ds.push()
597
604
  return ds
598
605
 
606
+ @classmethod
607
+ def create_dataset_from_csv(
608
+ cls,
609
+ csv_path: str,
610
+ dataset_name: str,
611
+ input_data_columns: List[str],
612
+ expected_output_columns: List[str],
613
+ metadata_columns: List[str] = [],
614
+ csv_delimiter: str = ",",
615
+ description="",
616
+ ) -> Dataset:
617
+ ds = cls._instance._dne_client.dataset_create(dataset_name, description)
618
+
619
+ # Store the original field size limit to restore it later
620
+ original_field_size_limit = csv.field_size_limit()
621
+
622
+ csv.field_size_limit(EXPERIMENT_CSV_FIELD_MAX_SIZE) # 10mb
623
+
624
+ try:
625
+ with open(csv_path, mode="r") as csvfile:
626
+ content = csvfile.readline().strip()
627
+ if not content:
628
+ raise ValueError("CSV file appears to be empty or header is missing.")
629
+
630
+ csvfile.seek(0)
631
+
632
+ rows = csv.DictReader(csvfile, delimiter=csv_delimiter)
633
+
634
+ if rows.fieldnames is None:
635
+ raise ValueError("CSV file appears to be empty or header is missing.")
636
+
637
+ header_columns = rows.fieldnames
638
+ missing_input_columns = [col for col in input_data_columns if col not in header_columns]
639
+ missing_output_columns = [col for col in expected_output_columns if col not in header_columns]
640
+ missing_metadata_columns = [col for col in metadata_columns if col not in metadata_columns]
641
+
642
+ if any(col not in header_columns for col in input_data_columns):
643
+ raise ValueError(f"Input columns not found in CSV header: {missing_input_columns}")
644
+ if any(col not in header_columns for col in expected_output_columns):
645
+ raise ValueError(f"Expected output columns not found in CSV header: {missing_output_columns}")
646
+ if any(col not in header_columns for col in metadata_columns):
647
+ raise ValueError(f"Metadata columns not found in CSV header: {missing_metadata_columns}")
648
+
649
+ for row in rows:
650
+ ds.append(
651
+ DatasetRecord(
652
+ input_data={col: row[col] for col in input_data_columns},
653
+ expected_output={col: row[col] for col in expected_output_columns},
654
+ metadata={col: row[col] for col in metadata_columns},
655
+ record_id="",
656
+ )
657
+ )
658
+
659
+ finally:
660
+ # Always restore the original field size limit
661
+ csv.field_size_limit(original_field_size_limit)
662
+
663
+ if len(ds) > 0:
664
+ ds.push()
665
+ return ds
666
+
599
667
  @classmethod
600
668
  def _delete_dataset(cls, dataset_id: str) -> None:
601
669
  return cls._instance._dne_client.dataset_delete(dataset_id)
@@ -608,21 +676,19 @@ class LLMObs(Service):
608
676
  dataset: Dataset,
609
677
  evaluators: List[Callable[[DatasetRecordInputType, JSONType, JSONType], JSONType]],
610
678
  description: str = "",
611
- project_name: Optional[str] = None,
612
- tags: Optional[List[str]] = None,
679
+ tags: Optional[Dict[str, str]] = None,
680
+ config: Optional[ExperimentConfigType] = None,
613
681
  ) -> Experiment:
614
682
  """Initializes an Experiment to run a task on a Dataset and evaluators.
615
683
 
616
684
  :param name: The name of the experiment.
617
- :param task: The task function to run. Must accept a parameter ``input_data`` and optionally ``config``.
685
+ :param task: The task function to run. Must accept parameters ``input_data`` and ``config``.
618
686
  :param dataset: The dataset to run the experiment on, created with LLMObs.pull/create_dataset().
619
687
  :param evaluators: A list of evaluator functions to evaluate the task output.
620
688
  Must accept parameters ``input_data``, ``output_data``, and ``expected_output``.
621
689
  :param description: A description of the experiment.
622
- :param project_name: The name of the project to associate with the experiment. If not provided, defaults to the
623
- configured value set via environment variable `DD_LLMOBS_PROJECT_NAME`
624
- or `LLMObs.enable(project_name=...)`.
625
- :param tags: A list of string tags to associate with the experiment.
690
+ :param tags: A dictionary of string key-value tag pairs to associate with the experiment.
691
+ :param config: A configuration dictionary describing the experiment.
626
692
  """
627
693
  if not callable(task):
628
694
  raise TypeError("task must be a callable function.")
@@ -640,16 +706,15 @@ class LLMObs(Service):
640
706
  required_params = ("input_data", "output_data", "expected_output")
641
707
  if not all(param in params for param in required_params):
642
708
  raise TypeError("Evaluator function must have parameters {}.".format(required_params))
643
- if project_name is None:
644
- project_name = cls._project_name
645
709
  return Experiment(
646
710
  name,
647
711
  task,
648
712
  dataset,
649
713
  evaluators,
650
- project_name=project_name,
714
+ project_name=cls._project_name,
651
715
  tags=tags,
652
716
  description=description,
717
+ config=config,
653
718
  _llmobs_instance=cls._instance,
654
719
  )
655
720
 
@@ -36,16 +36,17 @@ class LLMObsTelemetryMetrics:
36
36
  USER_PROCESSOR_CALLED = "user_processor_called"
37
37
 
38
38
 
39
- def _find_integration_from_tags(tags):
40
- integration_tag = next((tag for tag in tags if tag.startswith("integration:")), None)
41
- if not integration_tag:
39
+ def _find_tag_value_from_tags(tags, tag_key):
40
+ tag_string = next((tag for tag in tags if tag.startswith(f"{tag_key}:")), None)
41
+ if not tag_string:
42
42
  return None
43
- return integration_tag.split("integration:")[-1]
43
+ return tag_string.split(f"{tag_key}:")[-1]
44
44
 
45
45
 
46
46
  def _get_tags_from_span_event(event: LLMObsSpanEvent):
47
47
  span_kind = event.get("meta", {}).get("span.kind", "")
48
- integration = _find_integration_from_tags(event.get("tags", []))
48
+ integration = _find_tag_value_from_tags(event.get("tags", []), "integration")
49
+ ml_app = _find_tag_value_from_tags(event.get("tags", []), "ml_app")
49
50
  autoinstrumented = integration is not None
50
51
  error = event.get("status") == "error"
51
52
  return [
@@ -53,6 +54,7 @@ def _get_tags_from_span_event(event: LLMObsSpanEvent):
53
54
  ("autoinstrumented", str(int(autoinstrumented))),
54
55
  ("error", str(int(error))),
55
56
  ("integration", integration if integration else "N/A"),
57
+ ("ml_app", ml_app if ml_app else "N/A"),
56
58
  ]
57
59
 
58
60
 
@@ -125,6 +127,19 @@ def record_span_created(span: Span):
125
127
  )
126
128
 
127
129
 
130
+ def record_bedrock_agent_span_event_created(span_event: LLMObsSpanEvent):
131
+ is_root_span = span_event["parent_id"] == ROOT_PARENT_ID
132
+ has_session_id = any("session_id" in tag for tag in span_event["tags"])
133
+ tags = _get_tags_from_span_event(span_event)
134
+ tags.extend([("has_session_id", str(int(has_session_id))), ("is_root_span", str(int(is_root_span)))])
135
+ model_provider = span_event["meta"]["metadata"].get("model_provider")
136
+ if model_provider is not None:
137
+ tags.append(("model_provider", model_provider))
138
+ telemetry_writer.add_count_metric(
139
+ namespace=TELEMETRY_NAMESPACE.MLOBS, name=LLMObsTelemetryMetrics.SPAN_FINISHED, value=1, tags=tuple(tags)
140
+ )
141
+
142
+
128
143
  def record_span_event_raw_size(event: LLMObsSpanEvent, raw_event_size: int):
129
144
  telemetry_writer.add_distribution_metric(
130
145
  namespace=TELEMETRY_NAMESPACE.MLOBS,
ddtrace/llmobs/_utils.py CHANGED
@@ -234,6 +234,12 @@ def enforce_message_role(messages: List[Dict[str, str]]) -> List[Dict[str, str]]
234
234
  return messages
235
235
 
236
236
 
237
+ def convert_tags_dict_to_list(tags: Dict[str, str]) -> List[str]:
238
+ if not tags:
239
+ return []
240
+ return [f"{key}:{value}" for key, value in tags.items()]
241
+
242
+
237
243
  @dataclass
238
244
  class ToolCall:
239
245
  """
@@ -17,7 +17,6 @@ from ddtrace.internal.telemetry import telemetry_writer
17
17
  from ddtrace.internal.telemetry import validate_otel_envs
18
18
  from ddtrace.internal.utils.cache import cachedmethod
19
19
 
20
- from .._logger import LogInjectionState
21
20
  from .._logger import get_log_injection_state
22
21
  from ..internal import gitmetadata
23
22
  from ..internal.constants import _PROPAGATION_BEHAVIOR_DEFAULT
@@ -378,7 +377,7 @@ def _default_config() -> Dict[str, _ConfigItem]:
378
377
  modifier=str,
379
378
  ),
380
379
  "_logs_injection": _ConfigItem(
381
- default=LogInjectionState.STRUCTURED,
380
+ default=True,
382
381
  envs=["DD_LOGS_INJECTION"],
383
382
  modifier=get_log_injection_state,
384
383
  ),
@@ -240,15 +240,6 @@ class ProfilingConfig(DDConfig):
240
240
  help="Whether to enable debug assertions in the profiler code",
241
241
  )
242
242
 
243
- _force_legacy_exporter = DDConfig.v(
244
- bool,
245
- "_force_legacy_exporter",
246
- default=False,
247
- help_type="Boolean",
248
- help="Exclusively used in testing environments to force the use of the legacy exporter. This parameter is "
249
- "not for general use and will be removed in the near future.",
250
- )
251
-
252
243
  sample_pool_capacity = DDConfig.v(
253
244
  int,
254
245
  "sample_pool_capacity",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddtrace
3
- Version: 3.11.0rc1
3
+ Version: 3.11.0rc2
4
4
  Summary: Datadog APM client library
5
5
  Author-email: "Datadog, Inc." <dev@datadoghq.com>
6
6
  License: LICENSE.BSD3