ddtrace 3.11.0rc1__cp312-cp312-musllinux_1_2_i686.whl → 3.11.0rc2__cp312-cp312-musllinux_1_2_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ddtrace might be problematic. Click here for more details.
- ddtrace/_logger.py +5 -6
- ddtrace/_trace/product.py +1 -1
- ddtrace/_trace/trace_handlers.py +3 -1
- ddtrace/_version.py +2 -2
- ddtrace/appsec/_asm_request_context.py +3 -1
- ddtrace/appsec/_iast/_listener.py +12 -2
- ddtrace/contrib/integration_registry/registry.yaml +10 -0
- ddtrace/contrib/internal/avro/__init__.py +17 -0
- ddtrace/contrib/internal/azure_functions/patch.py +23 -12
- ddtrace/contrib/internal/azure_functions/utils.py +14 -0
- ddtrace/contrib/internal/botocore/__init__.py +153 -0
- ddtrace/contrib/{_freezegun.py → internal/freezegun/__init__.py} +1 -1
- ddtrace/contrib/internal/langchain/patch.py +11 -443
- ddtrace/contrib/internal/langchain/utils.py +0 -26
- ddtrace/contrib/internal/logbook/patch.py +1 -2
- ddtrace/contrib/internal/logging/patch.py +4 -7
- ddtrace/contrib/internal/loguru/patch.py +1 -3
- ddtrace/contrib/internal/protobuf/__init__.py +17 -0
- ddtrace/contrib/internal/pytest/__init__.py +62 -0
- ddtrace/contrib/internal/pytest/_plugin_v2.py +12 -3
- ddtrace/contrib/internal/pytest_bdd/__init__.py +23 -0
- ddtrace/contrib/internal/pytest_benchmark/__init__.py +3 -0
- ddtrace/contrib/internal/structlog/patch.py +2 -4
- ddtrace/contrib/internal/unittest/__init__.py +36 -0
- ddtrace/internal/_encoding.cpython-312-i386-linux-musl.so +0 -0
- ddtrace/internal/_encoding.pyi +1 -1
- ddtrace/internal/ci_visibility/encoder.py +18 -12
- ddtrace/internal/ci_visibility/utils.py +4 -4
- ddtrace/internal/core/__init__.py +5 -2
- ddtrace/internal/test_visibility/coverage_lines.py +4 -4
- ddtrace/internal/writer/writer.py +24 -11
- ddtrace/llmobs/_constants.py +2 -0
- ddtrace/llmobs/_experiment.py +69 -10
- ddtrace/llmobs/_integrations/bedrock.py +4 -0
- ddtrace/llmobs/_integrations/bedrock_agents.py +5 -1
- ddtrace/llmobs/_integrations/langchain.py +29 -20
- ddtrace/llmobs/_llmobs.py +78 -13
- ddtrace/llmobs/_telemetry.py +20 -5
- ddtrace/llmobs/_utils.py +6 -0
- ddtrace/settings/_config.py +1 -2
- ddtrace/settings/profiling.py +0 -9
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/METADATA +1 -1
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/RECORD +126 -133
- ddtrace/contrib/_avro.py +0 -17
- ddtrace/contrib/_botocore.py +0 -153
- ddtrace/contrib/_protobuf.py +0 -17
- ddtrace/contrib/_pytest.py +0 -62
- ddtrace/contrib/_pytest_bdd.py +0 -23
- ddtrace/contrib/_pytest_benchmark.py +0 -3
- ddtrace/contrib/_unittest.py +0 -36
- /ddtrace/contrib/{_aiobotocore.py → internal/aiobotocore/__init__.py} +0 -0
- /ddtrace/contrib/{_aiohttp_jinja2.py → internal/aiohttp_jinja2/__init__.py} +0 -0
- /ddtrace/contrib/{_aiomysql.py → internal/aiomysql/__init__.py} +0 -0
- /ddtrace/contrib/{_aiopg.py → internal/aiopg/__init__.py} +0 -0
- /ddtrace/contrib/{_aioredis.py → internal/aioredis/__init__.py} +0 -0
- /ddtrace/contrib/{_algoliasearch.py → internal/algoliasearch/__init__.py} +0 -0
- /ddtrace/contrib/{_anthropic.py → internal/anthropic/__init__.py} +0 -0
- /ddtrace/contrib/{_aredis.py → internal/aredis/__init__.py} +0 -0
- /ddtrace/contrib/{_asyncio.py → internal/asyncio/__init__.py} +0 -0
- /ddtrace/contrib/{_asyncpg.py → internal/asyncpg/__init__.py} +0 -0
- /ddtrace/contrib/{_aws_lambda.py → internal/aws_lambda/__init__.py} +0 -0
- /ddtrace/contrib/{_azure_functions.py → internal/azure_functions/__init__.py} +0 -0
- /ddtrace/contrib/{_azure_servicebus.py → internal/azure_servicebus/__init__.py} +0 -0
- /ddtrace/contrib/{_boto.py → internal/boto/__init__.py} +0 -0
- /ddtrace/contrib/{_cassandra.py → internal/cassandra/__init__.py} +0 -0
- /ddtrace/contrib/{_consul.py → internal/consul/__init__.py} +0 -0
- /ddtrace/contrib/{_coverage.py → internal/coverage/__init__.py} +0 -0
- /ddtrace/contrib/{_crewai.py → internal/crewai/__init__.py} +0 -0
- /ddtrace/contrib/{_django.py → internal/django/__init__.py} +0 -0
- /ddtrace/contrib/{_dogpile_cache.py → internal/dogpile_cache/__init__.py} +0 -0
- /ddtrace/contrib/{_dramatiq.py → internal/dramatiq/__init__.py} +0 -0
- /ddtrace/contrib/{_elasticsearch.py → internal/elasticsearch/__init__.py} +0 -0
- /ddtrace/contrib/{_fastapi.py → internal/fastapi/__init__.py} +0 -0
- /ddtrace/contrib/{_flask.py → internal/flask/__init__.py} +0 -0
- /ddtrace/contrib/{_futures.py → internal/futures/__init__.py} +0 -0
- /ddtrace/contrib/{_gevent.py → internal/gevent/__init__.py} +0 -0
- /ddtrace/contrib/{_google_genai.py → internal/google_genai/__init__.py} +0 -0
- /ddtrace/contrib/{_google_generativeai.py → internal/google_generativeai/__init__.py} +0 -0
- /ddtrace/contrib/{_graphql.py → internal/graphql/__init__.py} +0 -0
- /ddtrace/contrib/{_grpc.py → internal/grpc/__init__.py} +0 -0
- /ddtrace/contrib/{_gunicorn.py → internal/gunicorn/__init__.py} +0 -0
- /ddtrace/contrib/{_httplib.py → internal/httplib/__init__.py} +0 -0
- /ddtrace/contrib/{_httpx.py → internal/httpx/__init__.py} +0 -0
- /ddtrace/contrib/{_jinja2.py → internal/jinja2/__init__.py} +0 -0
- /ddtrace/contrib/{_kafka.py → internal/kafka/__init__.py} +0 -0
- /ddtrace/contrib/{_kombu.py → internal/kombu/__init__.py} +0 -0
- /ddtrace/contrib/{_langchain.py → internal/langchain/__init__.py} +0 -0
- /ddtrace/contrib/{_langgraph.py → internal/langgraph/__init__.py} +0 -0
- /ddtrace/contrib/{_litellm.py → internal/litellm/__init__.py} +0 -0
- /ddtrace/contrib/{_logbook.py → internal/logbook/__init__.py} +0 -0
- /ddtrace/contrib/{_logging.py → internal/logging/__init__.py} +0 -0
- /ddtrace/contrib/{_loguru.py → internal/loguru/__init__.py} +0 -0
- /ddtrace/contrib/{_mako.py → internal/mako/__init__.py} +0 -0
- /ddtrace/contrib/{_mariadb.py → internal/mariadb/__init__.py} +0 -0
- /ddtrace/contrib/{_mcp.py → internal/mcp/__init__.py} +0 -0
- /ddtrace/contrib/{_molten.py → internal/molten/__init__.py} +0 -0
- /ddtrace/contrib/{_mongoengine.py → internal/mongoengine/__init__.py} +0 -0
- /ddtrace/contrib/{_mysql.py → internal/mysql/__init__.py} +0 -0
- /ddtrace/contrib/{_mysqldb.py → internal/mysqldb/__init__.py} +0 -0
- /ddtrace/contrib/{_openai.py → internal/openai/__init__.py} +0 -0
- /ddtrace/contrib/{_openai_agents.py → internal/openai_agents/__init__.py} +0 -0
- /ddtrace/contrib/{_psycopg.py → internal/psycopg/__init__.py} +0 -0
- /ddtrace/contrib/{_pydantic_ai.py → internal/pydantic_ai/__init__.py} +0 -0
- /ddtrace/contrib/{_pymemcache.py → internal/pymemcache/__init__.py} +0 -0
- /ddtrace/contrib/{_pymongo.py → internal/pymongo/__init__.py} +0 -0
- /ddtrace/contrib/{_pymysql.py → internal/pymysql/__init__.py} +0 -0
- /ddtrace/contrib/{_pynamodb.py → internal/pynamodb/__init__.py} +0 -0
- /ddtrace/contrib/{_pyodbc.py → internal/pyodbc/__init__.py} +0 -0
- /ddtrace/contrib/{_redis.py → internal/redis/__init__.py} +0 -0
- /ddtrace/contrib/{_rediscluster.py → internal/rediscluster/__init__.py} +0 -0
- /ddtrace/contrib/{_rq.py → internal/rq/__init__.py} +0 -0
- /ddtrace/contrib/{_sanic.py → internal/sanic/__init__.py} +0 -0
- /ddtrace/contrib/{_selenium.py → internal/selenium/__init__.py} +0 -0
- /ddtrace/contrib/{_snowflake.py → internal/snowflake/__init__.py} +0 -0
- /ddtrace/contrib/{_sqlite3.py → internal/sqlite3/__init__.py} +0 -0
- /ddtrace/contrib/{_starlette.py → internal/starlette/__init__.py} +0 -0
- /ddtrace/contrib/{_structlog.py → internal/structlog/__init__.py} +0 -0
- /ddtrace/contrib/{_subprocess.py → internal/subprocess/__init__.py} +0 -0
- /ddtrace/contrib/{_urllib.py → internal/urllib/__init__.py} +0 -0
- /ddtrace/contrib/{_urllib3.py → internal/urllib3/__init__.py} +0 -0
- /ddtrace/contrib/{_vertexai.py → internal/vertexai/__init__.py} +0 -0
- /ddtrace/contrib/{_vertica.py → internal/vertica/__init__.py} +0 -0
- /ddtrace/contrib/{_webbrowser.py → internal/webbrowser/__init__.py} +0 -0
- /ddtrace/contrib/{_yaaredis.py → internal/yaaredis/__init__.py} +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/WHEEL +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/entry_points.txt +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/LICENSE +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/LICENSE.Apache +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/LICENSE.BSD3 +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/licenses/NOTICE +0 -0
- {ddtrace-3.11.0rc1.dist-info → ddtrace-3.11.0rc2.dist-info}/top_level.txt +0 -0
ddtrace/llmobs/_experiment.py
CHANGED
@@ -21,6 +21,7 @@ from ddtrace.constants import ERROR_STACK
|
|
21
21
|
from ddtrace.constants import ERROR_TYPE
|
22
22
|
from ddtrace.internal.logger import get_logger
|
23
23
|
from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
|
24
|
+
from ddtrace.llmobs._utils import convert_tags_dict_to_list
|
24
25
|
|
25
26
|
|
26
27
|
if TYPE_CHECKING:
|
@@ -153,6 +154,12 @@ class Dataset:
|
|
153
154
|
self._deleted_record_ids.append(record_id)
|
154
155
|
del self._records[index]
|
155
156
|
|
157
|
+
@property
|
158
|
+
def url(self) -> str:
|
159
|
+
# FIXME: need to use the user's site
|
160
|
+
# also will not work for subdomain orgs
|
161
|
+
return f"https://app.datadoghq.com/llm/datasets/{self._id}"
|
162
|
+
|
156
163
|
@overload
|
157
164
|
def __getitem__(self, index: int) -> DatasetRecord:
|
158
165
|
...
|
@@ -170,6 +177,50 @@ class Dataset:
|
|
170
177
|
def __iter__(self) -> Iterator[DatasetRecord]:
|
171
178
|
return iter(self._records)
|
172
179
|
|
180
|
+
def as_dataframe(self) -> None:
|
181
|
+
try:
|
182
|
+
import pandas as pd
|
183
|
+
except ImportError as e:
|
184
|
+
raise ImportError(
|
185
|
+
"pandas is required to convert dataset to DataFrame. Please install via `pip install pandas`"
|
186
|
+
) from e
|
187
|
+
|
188
|
+
column_tuples = set()
|
189
|
+
data_rows = []
|
190
|
+
for record in self._records:
|
191
|
+
flat_record = {} # type: Dict[Union[str, Tuple[str, str]], Any]
|
192
|
+
|
193
|
+
input_data = record.get("input_data", {})
|
194
|
+
if isinstance(input_data, dict):
|
195
|
+
for input_data_col, input_data_val in input_data.items():
|
196
|
+
flat_record[("input_data", input_data_col)] = input_data_val
|
197
|
+
column_tuples.add(("input_data", input_data_col))
|
198
|
+
else:
|
199
|
+
flat_record[("input_data", "")] = input_data
|
200
|
+
column_tuples.add(("input_data", ""))
|
201
|
+
|
202
|
+
expected_output = record.get("expected_output", {})
|
203
|
+
if isinstance(expected_output, dict):
|
204
|
+
for expected_output_col, expected_output_val in expected_output.items():
|
205
|
+
flat_record[("expected_output", expected_output_col)] = expected_output_val
|
206
|
+
column_tuples.add(("expected_output", expected_output_col))
|
207
|
+
else:
|
208
|
+
flat_record[("expected_output", "")] = expected_output
|
209
|
+
column_tuples.add(("expected_output", ""))
|
210
|
+
|
211
|
+
for metadata_col, metadata_val in record.get("metadata", {}).items():
|
212
|
+
flat_record[("metadata", metadata_col)] = metadata_val
|
213
|
+
column_tuples.add(("metadata", metadata_col))
|
214
|
+
|
215
|
+
data_rows.append(flat_record)
|
216
|
+
|
217
|
+
records_list = []
|
218
|
+
for flat_record in data_rows:
|
219
|
+
row = [flat_record.get(col, None) for col in column_tuples]
|
220
|
+
records_list.append(row)
|
221
|
+
|
222
|
+
return pd.DataFrame(data=records_list, columns=pd.MultiIndex.from_tuples(column_tuples))
|
223
|
+
|
173
224
|
|
174
225
|
class Experiment:
|
175
226
|
def __init__(
|
@@ -180,7 +231,7 @@ class Experiment:
|
|
180
231
|
evaluators: List[Callable[[DatasetRecordInputType, JSONType, JSONType], JSONType]],
|
181
232
|
project_name: str,
|
182
233
|
description: str = "",
|
183
|
-
tags: Optional[
|
234
|
+
tags: Optional[Dict[str, str]] = None,
|
184
235
|
config: Optional[ExperimentConfigType] = None,
|
185
236
|
_llmobs_instance: Optional["LLMObs"] = None,
|
186
237
|
) -> None:
|
@@ -189,8 +240,8 @@ class Experiment:
|
|
189
240
|
self._dataset = dataset
|
190
241
|
self._evaluators = evaluators
|
191
242
|
self._description = description
|
192
|
-
self._tags:
|
193
|
-
self._tags.
|
243
|
+
self._tags: Dict[str, str] = tags or {}
|
244
|
+
self._tags["ddtrace.version"] = str(ddtrace.__version__)
|
194
245
|
self._config: Dict[str, JSONType] = config or {}
|
195
246
|
self._llmobs_instance = _llmobs_instance
|
196
247
|
|
@@ -217,7 +268,8 @@ class Experiment:
|
|
217
268
|
if not self._llmobs_instance.enabled:
|
218
269
|
logger.warning(
|
219
270
|
"Skipping experiment as LLMObs is not enabled. "
|
220
|
-
"Ensure LLM Observability is enabled via `LLMObs.enable(...)`
|
271
|
+
"Ensure LLM Observability is enabled via `LLMObs.enable(...)` "
|
272
|
+
"or set `DD_LLMOBS_ENABLED=1` and use `ddtrace-run` to run your application."
|
221
273
|
)
|
222
274
|
return []
|
223
275
|
|
@@ -230,17 +282,19 @@ class Experiment:
|
|
230
282
|
self._project_id,
|
231
283
|
self._dataset._version,
|
232
284
|
self._config,
|
233
|
-
self._tags,
|
285
|
+
convert_tags_dict_to_list(self._tags),
|
234
286
|
self._description,
|
235
287
|
)
|
236
288
|
self._id = experiment_id
|
237
|
-
self._tags
|
289
|
+
self._tags["experiment_id"] = str(experiment_id)
|
238
290
|
self._run_name = experiment_run_name
|
239
291
|
task_results = self._run_task(jobs, raise_errors, sample_size)
|
240
292
|
evaluations = self._run_evaluators(task_results, raise_errors=raise_errors)
|
241
293
|
experiment_results = self._merge_results(task_results, evaluations)
|
242
294
|
experiment_evals = self._generate_metrics_from_exp_results(experiment_results)
|
243
|
-
self._llmobs_instance._dne_client.experiment_eval_post(
|
295
|
+
self._llmobs_instance._dne_client.experiment_eval_post(
|
296
|
+
self._id, experiment_evals, convert_tags_dict_to_list(self._tags)
|
297
|
+
)
|
244
298
|
return experiment_results
|
245
299
|
|
246
300
|
def _process_record(self, idx_record: Tuple[int, DatasetRecord]) -> Optional[TaskResult]:
|
@@ -256,7 +310,12 @@ class Experiment:
|
|
256
310
|
span_id, trace_id = "", ""
|
257
311
|
input_data = record["input_data"]
|
258
312
|
record_id = record.get("record_id", "")
|
259
|
-
tags = {
|
313
|
+
tags = {
|
314
|
+
**self._tags,
|
315
|
+
"dataset_id": str(self._dataset._id),
|
316
|
+
"dataset_record_id": str(record_id),
|
317
|
+
"experiment_id": str(self._id),
|
318
|
+
}
|
260
319
|
output_data = None
|
261
320
|
try:
|
262
321
|
output_data = self._task(input_data, self._config)
|
@@ -342,7 +401,7 @@ class Experiment:
|
|
342
401
|
experiment_results = []
|
343
402
|
for idx, task_result in enumerate(task_results):
|
344
403
|
output_data = task_result["output"]
|
345
|
-
metadata: Dict[str, JSONType] = {"tags": cast(List[JSONType], self._tags)}
|
404
|
+
metadata: Dict[str, JSONType] = {"tags": cast(List[JSONType], convert_tags_dict_to_list(self._tags))}
|
346
405
|
metadata.update(task_result.get("metadata") or {})
|
347
406
|
record: DatasetRecord = self._dataset[idx]
|
348
407
|
evals = evaluations[idx]["evaluations"]
|
@@ -383,7 +442,7 @@ class Experiment:
|
|
383
442
|
"label": eval_name,
|
384
443
|
f"{metric_type}_value": eval_value, # type: ignore
|
385
444
|
"error": err,
|
386
|
-
"tags": self._tags,
|
445
|
+
"tags": convert_tags_dict_to_list(self._tags),
|
387
446
|
"experiment_id": self._id,
|
388
447
|
}
|
389
448
|
|
@@ -13,6 +13,7 @@ from ddtrace.llmobs._constants import CACHE_READ_INPUT_TOKENS_METRIC_KEY
|
|
13
13
|
from ddtrace.llmobs._constants import CACHE_WRITE_INPUT_TOKENS_METRIC_KEY
|
14
14
|
from ddtrace.llmobs._constants import INPUT_MESSAGES
|
15
15
|
from ddtrace.llmobs._constants import INPUT_VALUE
|
16
|
+
from ddtrace.llmobs._constants import INTEGRATION
|
16
17
|
from ddtrace.llmobs._constants import METADATA
|
17
18
|
from ddtrace.llmobs._constants import METRICS
|
18
19
|
from ddtrace.llmobs._constants import MODEL_NAME
|
@@ -30,6 +31,7 @@ from ddtrace.llmobs._integrations.bedrock_utils import normalize_input_tokens
|
|
30
31
|
from ddtrace.llmobs._integrations.utils import get_final_message_converse_stream_message
|
31
32
|
from ddtrace.llmobs._integrations.utils import get_messages_from_converse_content
|
32
33
|
from ddtrace.llmobs._integrations.utils import update_proxy_workflow_input_output_value
|
34
|
+
from ddtrace.llmobs._telemetry import record_bedrock_agent_span_event_created
|
33
35
|
from ddtrace.llmobs._writer import LLMObsSpanEvent
|
34
36
|
from ddtrace.trace import Span
|
35
37
|
|
@@ -151,6 +153,7 @@ class BedrockIntegration(BaseLLMIntegration):
|
|
151
153
|
INPUT_VALUE: str(input_value),
|
152
154
|
TAGS: {"session_id": session_id},
|
153
155
|
METADATA: {"agent_id": agent_id, "agent_alias_id": agent_alias_id},
|
156
|
+
INTEGRATION: "bedrock_agents",
|
154
157
|
}
|
155
158
|
)
|
156
159
|
if not response:
|
@@ -176,6 +179,7 @@ class BedrockIntegration(BaseLLMIntegration):
|
|
176
179
|
)
|
177
180
|
for _, span_event in self._spans.items():
|
178
181
|
LLMObs._instance._llmobs_span_writer.enqueue(span_event)
|
182
|
+
record_bedrock_agent_span_event_created(span_event)
|
179
183
|
self._spans.clear()
|
180
184
|
self._active_span_by_step_id.clear()
|
181
185
|
|
@@ -15,6 +15,7 @@ from ddtrace.internal.utils.formats import format_trace_id
|
|
15
15
|
from ddtrace.llmobs._constants import LLMOBS_TRACE_ID
|
16
16
|
from ddtrace.llmobs._integrations.bedrock_utils import parse_model_id
|
17
17
|
from ddtrace.llmobs._utils import _get_ml_app
|
18
|
+
from ddtrace.llmobs._utils import _get_session_id
|
18
19
|
from ddtrace.llmobs._utils import safe_json
|
19
20
|
|
20
21
|
|
@@ -57,12 +58,15 @@ def _build_span_event(
|
|
57
58
|
llmobs_trace_id = root_span._get_ctx_item(LLMOBS_TRACE_ID)
|
58
59
|
if llmobs_trace_id is None:
|
59
60
|
llmobs_trace_id = root_span.trace_id
|
61
|
+
session_id = _get_session_id(root_span)
|
62
|
+
ml_app = _get_ml_app(root_span)
|
63
|
+
tags = [f"ml_app:{ml_app}", f"session_id:{session_id}", "integration:bedrock_agents"]
|
60
64
|
span_event = {
|
61
65
|
"name": span_name,
|
62
66
|
"span_id": str(span_id),
|
63
67
|
"trace_id": format_trace_id(llmobs_trace_id),
|
64
68
|
"parent_id": str(parent_id or root_span.span_id),
|
65
|
-
"tags":
|
69
|
+
"tags": tags,
|
66
70
|
"start_ns": int(start_ns or root_span.start_ns),
|
67
71
|
"duration": int(duration_ns or DEFAULT_SPAN_DURATION),
|
68
72
|
"status": "error" if error else "ok",
|
@@ -163,7 +163,6 @@ class LangChainIntegration(BaseLLMIntegration):
|
|
163
163
|
|
164
164
|
self._set_links(span)
|
165
165
|
model_provider = span.get_tag(PROVIDER)
|
166
|
-
self._llmobs_set_metadata(span, model_provider)
|
167
166
|
|
168
167
|
is_workflow = False
|
169
168
|
|
@@ -365,26 +364,37 @@ class LangChainIntegration(BaseLLMIntegration):
|
|
365
364
|
if hasattr(instance, "_datadog_spans"):
|
366
365
|
delattr(instance, "_datadog_spans")
|
367
366
|
|
368
|
-
def _llmobs_set_metadata(self, span: Span,
|
369
|
-
|
367
|
+
def _llmobs_set_metadata(self, span: Span, kwargs: Dict[str, Any]) -> None:
|
368
|
+
identifying_params = kwargs.pop("_dd.identifying_params", None)
|
369
|
+
if not identifying_params:
|
370
370
|
return
|
371
|
+
metadata = self._llmobs_extract_parameters(identifying_params)
|
372
|
+
for val in identifying_params.values():
|
373
|
+
if metadata:
|
374
|
+
break
|
375
|
+
if not isinstance(val, dict):
|
376
|
+
continue
|
377
|
+
metadata = self._llmobs_extract_parameters(val)
|
371
378
|
|
372
|
-
metadata
|
373
|
-
|
374
|
-
f"langchain.request.{model_provider}.parameters.model_kwargs.temperature"
|
375
|
-
) # huggingface
|
376
|
-
max_tokens = (
|
377
|
-
span.get_tag(f"langchain.request.{model_provider}.parameters.max_tokens")
|
378
|
-
or span.get_tag(f"langchain.request.{model_provider}.parameters.maxTokens") # ai21
|
379
|
-
or span.get_tag(f"langchain.request.{model_provider}.parameters.model_kwargs.max_tokens") # huggingface
|
380
|
-
)
|
379
|
+
if metadata:
|
380
|
+
span._set_ctx_item(METADATA, metadata)
|
381
381
|
|
382
|
+
def _llmobs_extract_parameters(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
383
|
+
metadata: Dict[str, Any] = {}
|
384
|
+
max_tokens = None
|
385
|
+
temperature = None
|
386
|
+
if "temperature" in parameters:
|
387
|
+
temperature = parameters["temperature"]
|
388
|
+
for max_token_key in ["max_tokens", "maxTokens", "max_completion_tokens"]:
|
389
|
+
if max_token_key in parameters:
|
390
|
+
max_tokens = parameters[max_token_key]
|
391
|
+
break
|
382
392
|
if temperature is not None and temperature != "None":
|
383
393
|
metadata["temperature"] = float(temperature)
|
384
394
|
if max_tokens is not None and max_tokens != "None":
|
385
395
|
metadata["max_tokens"] = int(max_tokens)
|
386
|
-
|
387
|
-
|
396
|
+
|
397
|
+
return metadata
|
388
398
|
|
389
399
|
def _llmobs_set_tags_from_llm(
|
390
400
|
self, span: Span, args: List[Any], kwargs: Dict[str, Any], completions: Any, is_workflow: bool = False
|
@@ -411,6 +421,8 @@ class LangChainIntegration(BaseLLMIntegration):
|
|
411
421
|
}
|
412
422
|
)
|
413
423
|
|
424
|
+
self._llmobs_set_metadata(span, kwargs)
|
425
|
+
|
414
426
|
if span.error:
|
415
427
|
span._set_ctx_item(output_tag_key, [{"content": ""}])
|
416
428
|
return
|
@@ -444,6 +456,9 @@ class LangChainIntegration(BaseLLMIntegration):
|
|
444
456
|
MODEL_PROVIDER: span.get_tag(PROVIDER) or "",
|
445
457
|
}
|
446
458
|
)
|
459
|
+
|
460
|
+
self._llmobs_set_metadata(span, kwargs)
|
461
|
+
|
447
462
|
input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
|
448
463
|
output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_MESSAGES
|
449
464
|
stream = span.get_tag("langchain.request.stream")
|
@@ -700,16 +715,10 @@ class LangChainIntegration(BaseLLMIntegration):
|
|
700
715
|
**kwargs,
|
701
716
|
) -> None:
|
702
717
|
"""Set base level tags that should be present on all LangChain spans (if they are not None)."""
|
703
|
-
span.set_tag_str(TYPE, interface_type)
|
704
718
|
if provider is not None:
|
705
719
|
span.set_tag_str(PROVIDER, provider)
|
706
720
|
if model is not None:
|
707
721
|
span.set_tag_str(MODEL, model)
|
708
|
-
if api_key is not None:
|
709
|
-
if len(api_key) >= 4:
|
710
|
-
span.set_tag_str(API_KEY, "...%s" % str(api_key[-4:]))
|
711
|
-
else:
|
712
|
-
span.set_tag_str(API_KEY, api_key)
|
713
722
|
|
714
723
|
def check_token_usage_chat_or_llm_result(self, result):
|
715
724
|
"""Checks for token usage on the top-level ChatResult or LLMResult object"""
|
ddtrace/llmobs/_llmobs.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import csv
|
1
2
|
from dataclasses import dataclass
|
2
3
|
from dataclasses import field
|
3
4
|
import inspect
|
@@ -45,9 +46,11 @@ from ddtrace.llmobs import _constants as constants
|
|
45
46
|
from ddtrace.llmobs import _telemetry as telemetry
|
46
47
|
from ddtrace.llmobs._constants import ANNOTATIONS_CONTEXT_ID
|
47
48
|
from ddtrace.llmobs._constants import DECORATOR
|
49
|
+
from ddtrace.llmobs._constants import DEFAULT_PROJECT_NAME
|
48
50
|
from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
|
49
51
|
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL
|
50
52
|
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
|
53
|
+
from ddtrace.llmobs._constants import EXPERIMENT_CSV_FIELD_MAX_SIZE
|
51
54
|
from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
|
52
55
|
from ddtrace.llmobs._constants import EXPERIMENT_ID_KEY
|
53
56
|
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
|
@@ -77,8 +80,8 @@ from ddtrace.llmobs._constants import TAGS
|
|
77
80
|
from ddtrace.llmobs._context import LLMObsContextProvider
|
78
81
|
from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
|
79
82
|
from ddtrace.llmobs._experiment import Dataset
|
83
|
+
from ddtrace.llmobs._experiment import DatasetRecord
|
80
84
|
from ddtrace.llmobs._experiment import DatasetRecordInputType
|
81
|
-
from ddtrace.llmobs._experiment import DatasetRecordRaw as DatasetRecord
|
82
85
|
from ddtrace.llmobs._experiment import Experiment
|
83
86
|
from ddtrace.llmobs._experiment import ExperimentConfigType
|
84
87
|
from ddtrace.llmobs._experiment import JSONType
|
@@ -168,7 +171,7 @@ class LLMObs(Service):
|
|
168
171
|
_instance = None # type: LLMObs
|
169
172
|
enabled = False
|
170
173
|
_app_key: str = os.getenv("DD_APP_KEY", "")
|
171
|
-
_project_name: str = os.getenv("DD_LLMOBS_PROJECT_NAME",
|
174
|
+
_project_name: str = os.getenv("DD_LLMOBS_PROJECT_NAME", DEFAULT_PROJECT_NAME)
|
172
175
|
|
173
176
|
def __init__(
|
174
177
|
self,
|
@@ -509,12 +512,16 @@ class LLMObs(Service):
|
|
509
512
|
config._dd_site = site or config._dd_site
|
510
513
|
config._dd_api_key = api_key or config._dd_api_key
|
511
514
|
cls._app_key = app_key or cls._app_key
|
512
|
-
cls._project_name = project_name or cls._project_name
|
515
|
+
cls._project_name = project_name or cls._project_name or DEFAULT_PROJECT_NAME
|
513
516
|
config.env = env or config.env
|
514
517
|
config.service = service or config.service
|
515
518
|
config._llmobs_ml_app = ml_app or config._llmobs_ml_app
|
516
519
|
config._llmobs_instrumented_proxy_urls = instrumented_proxy_urls or config._llmobs_instrumented_proxy_urls
|
517
520
|
|
521
|
+
# FIXME: workaround to prevent noisy logs when using the experiments feature
|
522
|
+
if config._dd_api_key and cls._app_key and os.environ.get("DD_TRACE_ENABLED", "").lower() not in ["true", "1"]:
|
523
|
+
ddtrace.tracer.enabled = False
|
524
|
+
|
518
525
|
error = None
|
519
526
|
start_ns = time.time_ns()
|
520
527
|
try:
|
@@ -596,6 +603,67 @@ class LLMObs(Service):
|
|
596
603
|
ds.push()
|
597
604
|
return ds
|
598
605
|
|
606
|
+
@classmethod
|
607
|
+
def create_dataset_from_csv(
|
608
|
+
cls,
|
609
|
+
csv_path: str,
|
610
|
+
dataset_name: str,
|
611
|
+
input_data_columns: List[str],
|
612
|
+
expected_output_columns: List[str],
|
613
|
+
metadata_columns: List[str] = [],
|
614
|
+
csv_delimiter: str = ",",
|
615
|
+
description="",
|
616
|
+
) -> Dataset:
|
617
|
+
ds = cls._instance._dne_client.dataset_create(dataset_name, description)
|
618
|
+
|
619
|
+
# Store the original field size limit to restore it later
|
620
|
+
original_field_size_limit = csv.field_size_limit()
|
621
|
+
|
622
|
+
csv.field_size_limit(EXPERIMENT_CSV_FIELD_MAX_SIZE) # 10mb
|
623
|
+
|
624
|
+
try:
|
625
|
+
with open(csv_path, mode="r") as csvfile:
|
626
|
+
content = csvfile.readline().strip()
|
627
|
+
if not content:
|
628
|
+
raise ValueError("CSV file appears to be empty or header is missing.")
|
629
|
+
|
630
|
+
csvfile.seek(0)
|
631
|
+
|
632
|
+
rows = csv.DictReader(csvfile, delimiter=csv_delimiter)
|
633
|
+
|
634
|
+
if rows.fieldnames is None:
|
635
|
+
raise ValueError("CSV file appears to be empty or header is missing.")
|
636
|
+
|
637
|
+
header_columns = rows.fieldnames
|
638
|
+
missing_input_columns = [col for col in input_data_columns if col not in header_columns]
|
639
|
+
missing_output_columns = [col for col in expected_output_columns if col not in header_columns]
|
640
|
+
missing_metadata_columns = [col for col in metadata_columns if col not in metadata_columns]
|
641
|
+
|
642
|
+
if any(col not in header_columns for col in input_data_columns):
|
643
|
+
raise ValueError(f"Input columns not found in CSV header: {missing_input_columns}")
|
644
|
+
if any(col not in header_columns for col in expected_output_columns):
|
645
|
+
raise ValueError(f"Expected output columns not found in CSV header: {missing_output_columns}")
|
646
|
+
if any(col not in header_columns for col in metadata_columns):
|
647
|
+
raise ValueError(f"Metadata columns not found in CSV header: {missing_metadata_columns}")
|
648
|
+
|
649
|
+
for row in rows:
|
650
|
+
ds.append(
|
651
|
+
DatasetRecord(
|
652
|
+
input_data={col: row[col] for col in input_data_columns},
|
653
|
+
expected_output={col: row[col] for col in expected_output_columns},
|
654
|
+
metadata={col: row[col] for col in metadata_columns},
|
655
|
+
record_id="",
|
656
|
+
)
|
657
|
+
)
|
658
|
+
|
659
|
+
finally:
|
660
|
+
# Always restore the original field size limit
|
661
|
+
csv.field_size_limit(original_field_size_limit)
|
662
|
+
|
663
|
+
if len(ds) > 0:
|
664
|
+
ds.push()
|
665
|
+
return ds
|
666
|
+
|
599
667
|
@classmethod
|
600
668
|
def _delete_dataset(cls, dataset_id: str) -> None:
|
601
669
|
return cls._instance._dne_client.dataset_delete(dataset_id)
|
@@ -608,21 +676,19 @@ class LLMObs(Service):
|
|
608
676
|
dataset: Dataset,
|
609
677
|
evaluators: List[Callable[[DatasetRecordInputType, JSONType, JSONType], JSONType]],
|
610
678
|
description: str = "",
|
611
|
-
|
612
|
-
|
679
|
+
tags: Optional[Dict[str, str]] = None,
|
680
|
+
config: Optional[ExperimentConfigType] = None,
|
613
681
|
) -> Experiment:
|
614
682
|
"""Initializes an Experiment to run a task on a Dataset and evaluators.
|
615
683
|
|
616
684
|
:param name: The name of the experiment.
|
617
|
-
:param task: The task function to run. Must accept
|
685
|
+
:param task: The task function to run. Must accept parameters ``input_data`` and ``config``.
|
618
686
|
:param dataset: The dataset to run the experiment on, created with LLMObs.pull/create_dataset().
|
619
687
|
:param evaluators: A list of evaluator functions to evaluate the task output.
|
620
688
|
Must accept parameters ``input_data``, ``output_data``, and ``expected_output``.
|
621
689
|
:param description: A description of the experiment.
|
622
|
-
:param
|
623
|
-
|
624
|
-
or `LLMObs.enable(project_name=...)`.
|
625
|
-
:param tags: A list of string tags to associate with the experiment.
|
690
|
+
:param tags: A dictionary of string key-value tag pairs to associate with the experiment.
|
691
|
+
:param config: A configuration dictionary describing the experiment.
|
626
692
|
"""
|
627
693
|
if not callable(task):
|
628
694
|
raise TypeError("task must be a callable function.")
|
@@ -640,16 +706,15 @@ class LLMObs(Service):
|
|
640
706
|
required_params = ("input_data", "output_data", "expected_output")
|
641
707
|
if not all(param in params for param in required_params):
|
642
708
|
raise TypeError("Evaluator function must have parameters {}.".format(required_params))
|
643
|
-
if project_name is None:
|
644
|
-
project_name = cls._project_name
|
645
709
|
return Experiment(
|
646
710
|
name,
|
647
711
|
task,
|
648
712
|
dataset,
|
649
713
|
evaluators,
|
650
|
-
project_name=
|
714
|
+
project_name=cls._project_name,
|
651
715
|
tags=tags,
|
652
716
|
description=description,
|
717
|
+
config=config,
|
653
718
|
_llmobs_instance=cls._instance,
|
654
719
|
)
|
655
720
|
|
ddtrace/llmobs/_telemetry.py
CHANGED
@@ -36,16 +36,17 @@ class LLMObsTelemetryMetrics:
|
|
36
36
|
USER_PROCESSOR_CALLED = "user_processor_called"
|
37
37
|
|
38
38
|
|
39
|
-
def
|
40
|
-
|
41
|
-
if not
|
39
|
+
def _find_tag_value_from_tags(tags, tag_key):
|
40
|
+
tag_string = next((tag for tag in tags if tag.startswith(f"{tag_key}:")), None)
|
41
|
+
if not tag_string:
|
42
42
|
return None
|
43
|
-
return
|
43
|
+
return tag_string.split(f"{tag_key}:")[-1]
|
44
44
|
|
45
45
|
|
46
46
|
def _get_tags_from_span_event(event: LLMObsSpanEvent):
|
47
47
|
span_kind = event.get("meta", {}).get("span.kind", "")
|
48
|
-
integration =
|
48
|
+
integration = _find_tag_value_from_tags(event.get("tags", []), "integration")
|
49
|
+
ml_app = _find_tag_value_from_tags(event.get("tags", []), "ml_app")
|
49
50
|
autoinstrumented = integration is not None
|
50
51
|
error = event.get("status") == "error"
|
51
52
|
return [
|
@@ -53,6 +54,7 @@ def _get_tags_from_span_event(event: LLMObsSpanEvent):
|
|
53
54
|
("autoinstrumented", str(int(autoinstrumented))),
|
54
55
|
("error", str(int(error))),
|
55
56
|
("integration", integration if integration else "N/A"),
|
57
|
+
("ml_app", ml_app if ml_app else "N/A"),
|
56
58
|
]
|
57
59
|
|
58
60
|
|
@@ -125,6 +127,19 @@ def record_span_created(span: Span):
|
|
125
127
|
)
|
126
128
|
|
127
129
|
|
130
|
+
def record_bedrock_agent_span_event_created(span_event: LLMObsSpanEvent):
|
131
|
+
is_root_span = span_event["parent_id"] == ROOT_PARENT_ID
|
132
|
+
has_session_id = any("session_id" in tag for tag in span_event["tags"])
|
133
|
+
tags = _get_tags_from_span_event(span_event)
|
134
|
+
tags.extend([("has_session_id", str(int(has_session_id))), ("is_root_span", str(int(is_root_span)))])
|
135
|
+
model_provider = span_event["meta"]["metadata"].get("model_provider")
|
136
|
+
if model_provider is not None:
|
137
|
+
tags.append(("model_provider", model_provider))
|
138
|
+
telemetry_writer.add_count_metric(
|
139
|
+
namespace=TELEMETRY_NAMESPACE.MLOBS, name=LLMObsTelemetryMetrics.SPAN_FINISHED, value=1, tags=tuple(tags)
|
140
|
+
)
|
141
|
+
|
142
|
+
|
128
143
|
def record_span_event_raw_size(event: LLMObsSpanEvent, raw_event_size: int):
|
129
144
|
telemetry_writer.add_distribution_metric(
|
130
145
|
namespace=TELEMETRY_NAMESPACE.MLOBS,
|
ddtrace/llmobs/_utils.py
CHANGED
@@ -234,6 +234,12 @@ def enforce_message_role(messages: List[Dict[str, str]]) -> List[Dict[str, str]]
|
|
234
234
|
return messages
|
235
235
|
|
236
236
|
|
237
|
+
def convert_tags_dict_to_list(tags: Dict[str, str]) -> List[str]:
|
238
|
+
if not tags:
|
239
|
+
return []
|
240
|
+
return [f"{key}:{value}" for key, value in tags.items()]
|
241
|
+
|
242
|
+
|
237
243
|
@dataclass
|
238
244
|
class ToolCall:
|
239
245
|
"""
|
ddtrace/settings/_config.py
CHANGED
@@ -17,7 +17,6 @@ from ddtrace.internal.telemetry import telemetry_writer
|
|
17
17
|
from ddtrace.internal.telemetry import validate_otel_envs
|
18
18
|
from ddtrace.internal.utils.cache import cachedmethod
|
19
19
|
|
20
|
-
from .._logger import LogInjectionState
|
21
20
|
from .._logger import get_log_injection_state
|
22
21
|
from ..internal import gitmetadata
|
23
22
|
from ..internal.constants import _PROPAGATION_BEHAVIOR_DEFAULT
|
@@ -378,7 +377,7 @@ def _default_config() -> Dict[str, _ConfigItem]:
|
|
378
377
|
modifier=str,
|
379
378
|
),
|
380
379
|
"_logs_injection": _ConfigItem(
|
381
|
-
default=
|
380
|
+
default=True,
|
382
381
|
envs=["DD_LOGS_INJECTION"],
|
383
382
|
modifier=get_log_injection_state,
|
384
383
|
),
|
ddtrace/settings/profiling.py
CHANGED
@@ -240,15 +240,6 @@ class ProfilingConfig(DDConfig):
|
|
240
240
|
help="Whether to enable debug assertions in the profiler code",
|
241
241
|
)
|
242
242
|
|
243
|
-
_force_legacy_exporter = DDConfig.v(
|
244
|
-
bool,
|
245
|
-
"_force_legacy_exporter",
|
246
|
-
default=False,
|
247
|
-
help_type="Boolean",
|
248
|
-
help="Exclusively used in testing environments to force the use of the legacy exporter. This parameter is "
|
249
|
-
"not for general use and will be removed in the near future.",
|
250
|
-
)
|
251
|
-
|
252
243
|
sample_pool_capacity = DDConfig.v(
|
253
244
|
int,
|
254
245
|
"sample_pool_capacity",
|