ddtrace 3.11.0rc2__cp312-cp312-win_amd64.whl → 3.11.0rc3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ddtrace might be problematic. Click here for more details.
- ddtrace/_trace/sampling_rule.py +25 -33
- ddtrace/_trace/trace_handlers.py +9 -49
- ddtrace/_trace/utils_botocore/span_tags.py +48 -0
- ddtrace/_version.py +2 -2
- ddtrace/appsec/_constants.py +7 -0
- ddtrace/appsec/_handlers.py +11 -0
- ddtrace/appsec/_processor.py +1 -1
- ddtrace/contrib/internal/aiobotocore/patch.py +8 -0
- ddtrace/contrib/internal/boto/patch.py +14 -0
- ddtrace/contrib/internal/botocore/services/bedrock.py +3 -27
- ddtrace/contrib/internal/django/patch.py +31 -8
- ddtrace/contrib/internal/google_genai/_utils.py +2 -2
- ddtrace/contrib/internal/google_genai/patch.py +7 -7
- ddtrace/contrib/internal/google_generativeai/patch.py +7 -5
- ddtrace/contrib/internal/openai_agents/patch.py +44 -1
- ddtrace/contrib/internal/pytest/_plugin_v2.py +1 -1
- ddtrace/contrib/internal/vertexai/patch.py +7 -5
- ddtrace/ext/ci.py +20 -0
- ddtrace/ext/git.py +66 -11
- ddtrace/internal/_encoding.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/_rand.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/_tagset.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/_threads.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/ci_visibility/encoder.py +126 -55
- ddtrace/internal/datadog/profiling/dd_wrapper-unknown-amd64.dll +0 -0
- ddtrace/internal/datadog/profiling/ddup/_ddup.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/datadog/profiling/ddup/dd_wrapper-unknown-amd64.dll +0 -0
- ddtrace/internal/endpoints.py +76 -0
- ddtrace/internal/native/_native.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/schema/processor.py +6 -2
- ddtrace/internal/telemetry/metrics_namespaces.cp312-win_amd64.pyd +0 -0
- ddtrace/internal/telemetry/writer.py +18 -0
- ddtrace/llmobs/_constants.py +1 -0
- ddtrace/llmobs/_experiment.py +6 -0
- ddtrace/llmobs/_integrations/crewai.py +52 -3
- ddtrace/llmobs/_integrations/gemini.py +7 -7
- ddtrace/llmobs/_integrations/google_genai.py +10 -10
- ddtrace/llmobs/_integrations/{google_genai_utils.py → google_utils.py} +103 -7
- ddtrace/llmobs/_integrations/openai_agents.py +145 -0
- ddtrace/llmobs/_integrations/pydantic_ai.py +67 -26
- ddtrace/llmobs/_integrations/utils.py +68 -158
- ddtrace/llmobs/_integrations/vertexai.py +8 -8
- ddtrace/llmobs/_llmobs.py +5 -1
- ddtrace/llmobs/_utils.py +21 -0
- ddtrace/profiling/_threading.cp312-win_amd64.pyd +0 -0
- ddtrace/profiling/collector/_memalloc.cp312-win_amd64.pyd +0 -0
- ddtrace/profiling/collector/_task.cp312-win_amd64.pyd +0 -0
- ddtrace/profiling/collector/_traceback.cp312-win_amd64.pyd +0 -0
- ddtrace/profiling/collector/stack.cp312-win_amd64.pyd +0 -0
- ddtrace/settings/asm.py +9 -2
- ddtrace/vendor/psutil/_psutil_windows.cp312-win_amd64.pyd +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/METADATA +1 -1
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/RECORD +60 -59
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/WHEEL +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/entry_points.txt +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/LICENSE +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/LICENSE.Apache +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/LICENSE.BSD3 +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/NOTICE +0 -0
- {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,10 @@ from agents.tracing import add_trace_processor
|
|
5
5
|
|
6
6
|
from ddtrace import config
|
7
7
|
from ddtrace.contrib.internal.openai_agents.processor import LLMObsTraceProcessor
|
8
|
+
from ddtrace.contrib.trace_utils import unwrap
|
9
|
+
from ddtrace.contrib.trace_utils import with_traced_module_async
|
10
|
+
from ddtrace.contrib.trace_utils import wrap
|
11
|
+
from ddtrace.internal.utils.version import parse_version
|
8
12
|
from ddtrace.llmobs._integrations.openai_agents import OpenAIAgentsIntegration
|
9
13
|
from ddtrace.trace import Pin
|
10
14
|
|
@@ -22,6 +26,29 @@ def _supported_versions() -> Dict[str, str]:
|
|
22
26
|
return {"agents": ">=0.0.2"}
|
23
27
|
|
24
28
|
|
29
|
+
OPENAI_AGENTS_VERSION = parse_version(get_version())
|
30
|
+
|
31
|
+
|
32
|
+
@with_traced_module_async
|
33
|
+
async def patched_run_single_turn(agents, pin, func, instance, args, kwargs):
|
34
|
+
return await _patched_run_single_turn(agents, pin, func, instance, args, kwargs, agent_index=0)
|
35
|
+
|
36
|
+
|
37
|
+
@with_traced_module_async
|
38
|
+
async def patched_run_single_turn_streamed(agents, pin, func, instance, args, kwargs):
|
39
|
+
return await _patched_run_single_turn(agents, pin, func, instance, args, kwargs, agent_index=1)
|
40
|
+
|
41
|
+
|
42
|
+
async def _patched_run_single_turn(agents, pin, func, instance, args, kwargs, agent_index=0):
|
43
|
+
current_span = pin.tracer.current_span()
|
44
|
+
result = await func(*args, **kwargs)
|
45
|
+
|
46
|
+
integration = agents._datadog_integration
|
47
|
+
integration.tag_agent_manifest(current_span, args, kwargs, agent_index)
|
48
|
+
|
49
|
+
return result
|
50
|
+
|
51
|
+
|
25
52
|
def patch():
|
26
53
|
"""
|
27
54
|
Patch the instrumented methods
|
@@ -33,7 +60,16 @@ def patch():
|
|
33
60
|
|
34
61
|
Pin().onto(agents)
|
35
62
|
|
36
|
-
|
63
|
+
integration = OpenAIAgentsIntegration(integration_config=config.openai_agents)
|
64
|
+
add_trace_processor(LLMObsTraceProcessor(integration))
|
65
|
+
agents._datadog_integration = integration
|
66
|
+
|
67
|
+
if OPENAI_AGENTS_VERSION >= (0, 0, 19):
|
68
|
+
wrap(agents.run.AgentRunner, "_run_single_turn", patched_run_single_turn(agents))
|
69
|
+
wrap(agents.run.AgentRunner, "_run_single_turn_streamed", patched_run_single_turn_streamed(agents))
|
70
|
+
else:
|
71
|
+
wrap(agents.run.Runner, "_run_single_turn", patched_run_single_turn(agents))
|
72
|
+
wrap(agents.run.Runner, "_run_single_turn_streamed", patched_run_single_turn_streamed(agents))
|
37
73
|
|
38
74
|
|
39
75
|
def unpatch():
|
@@ -44,3 +80,10 @@ def unpatch():
|
|
44
80
|
return
|
45
81
|
|
46
82
|
agents._datadog_patch = False
|
83
|
+
|
84
|
+
if OPENAI_AGENTS_VERSION >= (0, 0, 19):
|
85
|
+
unwrap(agents.run.AgentRunner, "_run_single_turn")
|
86
|
+
unwrap(agents.run.AgentRunner, "_run_single_turn_streamed")
|
87
|
+
else:
|
88
|
+
unwrap(agents.run.Runner, "_run_single_turn")
|
89
|
+
unwrap(agents.run.Runner, "_run_single_turn_streamed")
|
@@ -345,7 +345,7 @@ def pytest_sessionstart(session: pytest.Session) -> None:
|
|
345
345
|
test_impact_analysis="1" if _pytest_version_supports_itr() else None,
|
346
346
|
test_management_quarantine="1",
|
347
347
|
test_management_disable="1",
|
348
|
-
test_management_attempt_to_fix="
|
348
|
+
test_management_attempt_to_fix="5" if _pytest_version_supports_attempt_to_fix() else None,
|
349
349
|
)
|
350
350
|
|
351
351
|
InternalTestSession.discover(
|
@@ -14,7 +14,7 @@ from ddtrace.contrib.internal.trace_utils import wrap
|
|
14
14
|
from ddtrace.contrib.internal.vertexai._utils import TracedAsyncVertexAIStreamResponse
|
15
15
|
from ddtrace.contrib.internal.vertexai._utils import TracedVertexAIStreamResponse
|
16
16
|
from ddtrace.llmobs._integrations import VertexAIIntegration
|
17
|
-
from ddtrace.llmobs._integrations.
|
17
|
+
from ddtrace.llmobs._integrations.google_utils import extract_provider_and_model_name
|
18
18
|
from ddtrace.trace import Pin
|
19
19
|
|
20
20
|
|
@@ -60,11 +60,12 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
|
|
60
60
|
integration = vertexai._datadog_integration
|
61
61
|
stream = kwargs.get("stream", False)
|
62
62
|
generations = None
|
63
|
+
provider_name, model_name = extract_provider_and_model_name(instance=model_instance, model_name_attr="_model_name")
|
63
64
|
span = integration.trace(
|
64
65
|
pin,
|
65
66
|
"%s.%s" % (instance.__class__.__name__, func.__name__),
|
66
|
-
provider=
|
67
|
-
model=
|
67
|
+
provider=provider_name,
|
68
|
+
model=model_name,
|
68
69
|
submit_to_llmobs=True,
|
69
70
|
)
|
70
71
|
# history must be copied since it is modified during the LLM interaction
|
@@ -92,11 +93,12 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
|
|
92
93
|
integration = vertexai._datadog_integration
|
93
94
|
stream = kwargs.get("stream", False)
|
94
95
|
generations = None
|
96
|
+
provider_name, model_name = extract_provider_and_model_name(instance=model_instance, model_name_attr="_model_name")
|
95
97
|
span = integration.trace(
|
96
98
|
pin,
|
97
99
|
"%s.%s" % (instance.__class__.__name__, func.__name__),
|
98
|
-
provider=
|
99
|
-
model=
|
100
|
+
provider=provider_name,
|
101
|
+
model=model_name,
|
100
102
|
submit_to_llmobs=True,
|
101
103
|
)
|
102
104
|
# history must be copied since it is modified during the LLM interaction
|
ddtrace/ext/ci.py
CHANGED
@@ -105,6 +105,16 @@ def tags(env=None, cwd=None):
|
|
105
105
|
break
|
106
106
|
|
107
107
|
git_info = git.extract_git_metadata(cwd=cwd)
|
108
|
+
|
109
|
+
# Whenever the HEAD commit SHA is present in the tags that come from the CI provider, we assume that
|
110
|
+
# the CI provider added a commit on top of the user's HEAD commit (e.g., GitHub Actions add a merge
|
111
|
+
# commit when triggered by a pull request). In that case, we extract the metadata for that commit specifically
|
112
|
+
# and add it to the tags.
|
113
|
+
head_commit_sha = tags.get(git.COMMIT_HEAD_SHA)
|
114
|
+
if head_commit_sha:
|
115
|
+
git_head_info = git.extract_git_head_metadata(head_commit_sha=head_commit_sha, cwd=cwd)
|
116
|
+
git_info.update(git_head_info)
|
117
|
+
|
108
118
|
try:
|
109
119
|
git_info[WORKSPACE_PATH] = git.extract_workspace_path(cwd=cwd)
|
110
120
|
except git.GitNotFoundError:
|
@@ -349,6 +359,15 @@ def extract_github_actions(env):
|
|
349
359
|
github_run_id,
|
350
360
|
)
|
351
361
|
|
362
|
+
git_commit_head_sha = None
|
363
|
+
if "GITHUB_EVENT_PATH" in env:
|
364
|
+
try:
|
365
|
+
with open(env["GITHUB_EVENT_PATH"]) as f:
|
366
|
+
github_event_data = json.load(f)
|
367
|
+
git_commit_head_sha = github_event_data.get("pull_request", {}).get("head", {}).get("sha")
|
368
|
+
except Exception as e:
|
369
|
+
log.error("Failed to read or parse GITHUB_EVENT_PATH: %s", e)
|
370
|
+
|
352
371
|
env_vars = {
|
353
372
|
"GITHUB_SERVER_URL": github_server_url,
|
354
373
|
"GITHUB_REPOSITORY": github_repository,
|
@@ -362,6 +381,7 @@ def extract_github_actions(env):
|
|
362
381
|
git.BRANCH: env.get("GITHUB_HEAD_REF") or env.get("GITHUB_REF"),
|
363
382
|
git.COMMIT_SHA: git_commit_sha,
|
364
383
|
git.REPOSITORY_URL: "{0}/{1}.git".format(github_server_url, github_repository),
|
384
|
+
git.COMMIT_HEAD_SHA: git_commit_head_sha,
|
365
385
|
JOB_URL: "{0}/{1}/commit/{2}/checks".format(github_server_url, github_repository, git_commit_sha),
|
366
386
|
PIPELINE_ID: github_run_id,
|
367
387
|
PIPELINE_NAME: env.get("GITHUB_WORKFLOW"),
|
ddtrace/ext/git.py
CHANGED
@@ -33,6 +33,30 @@ BRANCH = "git.branch"
|
|
33
33
|
# Git Commit SHA
|
34
34
|
COMMIT_SHA = "git.commit.sha"
|
35
35
|
|
36
|
+
# Git Commit HEAD SHA
|
37
|
+
COMMIT_HEAD_SHA = "git.commit.head.sha"
|
38
|
+
|
39
|
+
# Git Commit HEAD message
|
40
|
+
COMMIT_HEAD_MESSAGE = "git.commit.head.message"
|
41
|
+
|
42
|
+
# Git Commit HEAD author date
|
43
|
+
COMMIT_HEAD_AUTHOR_DATE = "git.commit.head.author.date"
|
44
|
+
|
45
|
+
# Git Commit HEAD author email
|
46
|
+
COMMIT_HEAD_AUTHOR_EMAIL = "git.commit.head.author.email"
|
47
|
+
|
48
|
+
# Git Commit HEAD author name
|
49
|
+
COMMIT_HEAD_AUTHOR_NAME = "git.commit.head.author.name"
|
50
|
+
|
51
|
+
# Git Commit HEAD committer date
|
52
|
+
COMMIT_HEAD_COMMITTER_DATE = "git.commit.head.committer.date"
|
53
|
+
|
54
|
+
# Git Commit HEAD committer email
|
55
|
+
COMMIT_HEAD_COMMITTER_EMAIL = "git.commit.head.committer.email"
|
56
|
+
|
57
|
+
# Git Commit HEAD committer name
|
58
|
+
COMMIT_HEAD_COMMITTER_NAME = "git.commit.head.committer.name"
|
59
|
+
|
36
60
|
# Git Repository URL
|
37
61
|
REPOSITORY_URL = "git.repository_url"
|
38
62
|
|
@@ -173,11 +197,12 @@ def _get_device_for_path(path):
|
|
173
197
|
return os.stat(path).st_dev
|
174
198
|
|
175
199
|
|
176
|
-
def _unshallow_repository_with_details(
|
177
|
-
|
200
|
+
def _unshallow_repository_with_details(
|
201
|
+
cwd: Optional[str] = None, repo: Optional[str] = None, refspec: Optional[str] = None, parent_only: bool = False
|
202
|
+
) -> _GitSubprocessDetails:
|
178
203
|
cmd = [
|
179
204
|
"fetch",
|
180
|
-
'--shallow-since="1 month ago"',
|
205
|
+
"--deepen=1" if parent_only else '--shallow-since="1 month ago"',
|
181
206
|
"--update-shallow",
|
182
207
|
"--filter=blob:none",
|
183
208
|
"--recurse-submodules=no",
|
@@ -190,18 +215,22 @@ def _unshallow_repository_with_details(cwd=None, repo=None, refspec=None):
|
|
190
215
|
return _git_subprocess_cmd_with_details(*cmd, cwd=cwd)
|
191
216
|
|
192
217
|
|
193
|
-
def _unshallow_repository(
|
194
|
-
|
195
|
-
|
218
|
+
def _unshallow_repository(
|
219
|
+
cwd: Optional[str] = None,
|
220
|
+
repo: Optional[str] = None,
|
221
|
+
refspec: Optional[str] = None,
|
222
|
+
parent_only: bool = False,
|
223
|
+
) -> None:
|
224
|
+
_unshallow_repository_with_details(cwd, repo, refspec, parent_only)
|
196
225
|
|
197
226
|
|
198
|
-
def extract_user_info(cwd=None):
|
199
|
-
# type: (Optional[str]) -> Dict[str, Tuple[str, str, str]]
|
227
|
+
def extract_user_info(cwd: Optional[str] = None, commit_sha: Optional[str] = None) -> Dict[str, Tuple[str, str, str]]:
|
200
228
|
"""Extract commit author info from the git repository in the current directory or one specified by ``cwd``."""
|
201
229
|
# Note: `git show -s --format... --date...` is supported since git 2.1.4 onwards
|
202
|
-
|
203
|
-
|
204
|
-
|
230
|
+
cmd = "show -s --format=%an|||%ae|||%ad|||%cn|||%ce|||%cd --date=format:%Y-%m-%dT%H:%M:%S%z"
|
231
|
+
if commit_sha:
|
232
|
+
cmd += " " + commit_sha
|
233
|
+
stdout = _git_subprocess_cmd(cmd=cmd, cwd=cwd)
|
205
234
|
author_name, author_email, author_date, committer_name, committer_email, committer_date = stdout.split("|||")
|
206
235
|
return {
|
207
236
|
"author": (author_name, author_email, author_date),
|
@@ -316,6 +345,32 @@ def extract_commit_sha(cwd=None):
|
|
316
345
|
return commit_sha
|
317
346
|
|
318
347
|
|
348
|
+
def extract_git_head_metadata(head_commit_sha: str, cwd: Optional[str] = None) -> Dict[str, Optional[str]]:
|
349
|
+
tags: Dict[str, Optional[str]] = {}
|
350
|
+
|
351
|
+
is_shallow, *_ = _is_shallow_repository_with_details(cwd=cwd)
|
352
|
+
if is_shallow:
|
353
|
+
_unshallow_repository(cwd=cwd, repo=None, refspec=None, parent_only=True)
|
354
|
+
|
355
|
+
try:
|
356
|
+
users = extract_user_info(cwd=cwd, commit_sha=head_commit_sha)
|
357
|
+
tags[COMMIT_HEAD_AUTHOR_NAME] = users["author"][0]
|
358
|
+
tags[COMMIT_HEAD_AUTHOR_EMAIL] = users["author"][1]
|
359
|
+
tags[COMMIT_HEAD_AUTHOR_DATE] = users["author"][2]
|
360
|
+
tags[COMMIT_HEAD_COMMITTER_NAME] = users["committer"][0]
|
361
|
+
tags[COMMIT_HEAD_COMMITTER_EMAIL] = users["committer"][1]
|
362
|
+
tags[COMMIT_HEAD_COMMITTER_DATE] = users["committer"][2]
|
363
|
+
tags[COMMIT_HEAD_MESSAGE] = _git_subprocess_cmd(" ".join(("log -n 1 --format=%B", head_commit_sha)), cwd)
|
364
|
+
except GitNotFoundError:
|
365
|
+
log.error("Git executable not found, cannot extract git metadata.")
|
366
|
+
except ValueError as e:
|
367
|
+
debug_mode = log.isEnabledFor(logging.DEBUG)
|
368
|
+
stderr = str(e)
|
369
|
+
log.error("Error extracting git metadata: %s", stderr, exc_info=debug_mode)
|
370
|
+
|
371
|
+
return tags
|
372
|
+
|
373
|
+
|
319
374
|
def extract_git_metadata(cwd=None):
|
320
375
|
# type: (Optional[str]) -> Dict[str, Optional[str]]
|
321
376
|
"""Extract git commit metadata."""
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,7 +1,14 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import json
|
2
4
|
import os
|
3
5
|
import threading
|
4
6
|
from typing import TYPE_CHECKING # noqa:F401
|
7
|
+
from typing import Any # noqa:F401
|
8
|
+
from typing import Dict # noqa:F401
|
9
|
+
from typing import List # noqa:F401
|
10
|
+
from typing import Optional # noqa:F401
|
11
|
+
from typing import Tuple # noqa:F401
|
5
12
|
from uuid import uuid4
|
6
13
|
|
7
14
|
from ddtrace.ext import SpanTypes
|
@@ -28,12 +35,6 @@ from ddtrace.internal.writer.writer import NoEncodableSpansError
|
|
28
35
|
log = get_logger(__name__)
|
29
36
|
|
30
37
|
if TYPE_CHECKING: # pragma: no cover
|
31
|
-
from typing import Any # noqa:F401
|
32
|
-
from typing import Dict # noqa:F401
|
33
|
-
from typing import List # noqa:F401
|
34
|
-
from typing import Optional # noqa:F401
|
35
|
-
from typing import Tuple # noqa:F401
|
36
|
-
|
37
38
|
from ddtrace._trace.span import Span # noqa:F401
|
38
39
|
|
39
40
|
|
@@ -43,79 +44,153 @@ class CIVisibilityEncoderV01(BufferedEncoder):
|
|
43
44
|
TEST_SUITE_EVENT_VERSION = 1
|
44
45
|
TEST_EVENT_VERSION = 2
|
45
46
|
ENDPOINT_TYPE = ENDPOINT.TEST_CYCLE
|
47
|
+
_MAX_PAYLOAD_SIZE = 5 * 1024 * 1024 # 5MB
|
46
48
|
|
47
49
|
def __init__(self, *args):
|
48
50
|
# DEV: args are not used here, but are used by BufferedEncoder's __cinit__() method,
|
49
51
|
# which is called implicitly by Cython.
|
50
52
|
super(CIVisibilityEncoderV01, self).__init__()
|
53
|
+
self._metadata: Dict[str, Dict[str, str]] = {}
|
51
54
|
self._lock = threading.RLock()
|
52
|
-
self.
|
55
|
+
self._is_xdist_worker = os.getenv("PYTEST_XDIST_WORKER") is not None
|
53
56
|
self._init_buffer()
|
54
57
|
|
55
58
|
def __len__(self):
|
56
59
|
with self._lock:
|
57
60
|
return len(self.buffer)
|
58
61
|
|
59
|
-
def set_metadata(self, event_type, metadata):
|
60
|
-
# type: (str, Dict[str, str]) -> None
|
62
|
+
def set_metadata(self, event_type: str, metadata: Dict[str, str]):
|
61
63
|
self._metadata.setdefault(event_type, {}).update(metadata)
|
62
64
|
|
63
65
|
def _init_buffer(self):
|
64
66
|
with self._lock:
|
65
67
|
self.buffer = []
|
66
68
|
|
67
|
-
def put(self,
|
69
|
+
def put(self, item):
|
68
70
|
with self._lock:
|
69
|
-
self.buffer.append(
|
71
|
+
self.buffer.append(item)
|
70
72
|
|
71
73
|
def encode_traces(self, traces):
|
72
|
-
|
74
|
+
"""
|
75
|
+
Only used for LogWriter, not called for CI Visibility currently
|
76
|
+
"""
|
77
|
+
raise NotImplementedError()
|
73
78
|
|
74
|
-
def encode(self):
|
79
|
+
def encode(self) -> List[Tuple[Optional[bytes], int]]:
|
75
80
|
with self._lock:
|
81
|
+
if not self.buffer:
|
82
|
+
return []
|
83
|
+
payloads = []
|
76
84
|
with StopWatch() as sw:
|
77
|
-
|
85
|
+
payloads = self._build_payload(self.buffer)
|
78
86
|
record_endpoint_payload_events_serialization_time(endpoint=self.ENDPOINT_TYPE, seconds=sw.elapsed())
|
79
87
|
self._init_buffer()
|
80
|
-
return
|
88
|
+
return payloads
|
81
89
|
|
82
|
-
def _get_parent_session(self, traces):
|
90
|
+
def _get_parent_session(self, traces: List[List[Span]]) -> int:
|
83
91
|
for trace in traces:
|
84
92
|
for span in trace:
|
85
93
|
if span.get_tag(EVENT_TYPE) == SESSION_TYPE and span.parent_id is not None and span.parent_id != 0:
|
86
94
|
return span.parent_id
|
87
95
|
return 0
|
88
96
|
|
89
|
-
def _build_payload(self, traces):
|
90
|
-
|
97
|
+
def _build_payload(self, traces: List[List[Span]]) -> List[Tuple[Optional[bytes], int]]:
|
98
|
+
"""
|
99
|
+
Build multiple payloads from traces, splitting when necessary to stay under size limits.
|
100
|
+
Uses index-based recursive approach to avoid copying slices.
|
101
|
+
|
102
|
+
Returns a list of (payload_bytes, trace_count) tuples, where each payload contains
|
103
|
+
as many traces as possible without exceeding _MAX_PAYLOAD_SIZE.
|
104
|
+
"""
|
105
|
+
if not traces:
|
106
|
+
return []
|
107
|
+
|
91
108
|
new_parent_session_span_id = self._get_parent_session(traces)
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
109
|
+
return self._build_payloads_recursive(traces, 0, len(traces), new_parent_session_span_id)
|
110
|
+
|
111
|
+
def _build_payloads_recursive(
|
112
|
+
self, traces: List[List[Span]], start_idx: int, end_idx: int, new_parent_session_span_id: int
|
113
|
+
) -> List[Tuple[Optional[bytes], int]]:
|
114
|
+
"""
|
115
|
+
Recursively build payloads using start/end indexes to avoid slice copying.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
traces: Full list of traces
|
119
|
+
start_idx: Start index (inclusive)
|
120
|
+
end_idx: End index (exclusive)
|
121
|
+
new_parent_session_span_id: Parent session span ID
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
List of (payload_bytes, trace_count) tuples
|
125
|
+
"""
|
126
|
+
if start_idx >= end_idx:
|
100
127
|
return []
|
101
|
-
record_endpoint_payload_events_count(endpoint=ENDPOINT.TEST_CYCLE, count=len(normalized_spans))
|
102
128
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
]
|
129
|
+
trace_count = end_idx - start_idx
|
130
|
+
|
131
|
+
# Convert traces to spans with filtering (using indexes)
|
132
|
+
all_spans_with_trace_info = self._convert_traces_to_spans_indexed(
|
133
|
+
traces, start_idx, end_idx, new_parent_session_span_id
|
134
|
+
)
|
135
|
+
|
136
|
+
# Get all spans (flattened)
|
137
|
+
all_spans = [span for _, trace_spans in all_spans_with_trace_info for span in trace_spans]
|
138
|
+
|
139
|
+
if not all_spans:
|
140
|
+
log.debug("No spans to encode after filtering, skipping chunk")
|
141
|
+
return []
|
142
|
+
|
143
|
+
# Try to create payload from all spans
|
144
|
+
payload = self._create_payload_from_spans(all_spans)
|
145
|
+
|
146
|
+
if len(payload) <= self._MAX_PAYLOAD_SIZE or trace_count == 1:
|
147
|
+
# Payload fits or we can't split further (single trace)
|
148
|
+
record_endpoint_payload_events_count(endpoint=self.ENDPOINT_TYPE, count=len(all_spans))
|
149
|
+
return [(payload, trace_count)]
|
150
|
+
else:
|
151
|
+
# Payload is too large, split in half recursively
|
152
|
+
mid_idx = start_idx + (trace_count + 1) // 2
|
153
|
+
|
154
|
+
# Process both halves recursively
|
155
|
+
left_payloads = self._build_payloads_recursive(traces, start_idx, mid_idx, new_parent_session_span_id)
|
156
|
+
right_payloads = self._build_payloads_recursive(traces, mid_idx, end_idx, new_parent_session_span_id)
|
157
|
+
|
158
|
+
# Combine results
|
159
|
+
return left_payloads + right_payloads
|
160
|
+
|
161
|
+
def _convert_traces_to_spans_indexed(
|
162
|
+
self, traces: List[List[Span]], start_idx: int, end_idx: int, new_parent_session_span_id: int
|
163
|
+
) -> List[Tuple[int, List[Dict[str, Any]]]]:
|
164
|
+
"""Convert traces to spans with xdist filtering applied, using indexes to avoid slicing."""
|
165
|
+
all_spans_with_trace_info = []
|
166
|
+
for trace_idx in range(start_idx, end_idx):
|
167
|
+
trace = traces[trace_idx]
|
168
|
+
trace_spans = [
|
169
|
+
self._convert_span(span, trace[0].context.dd_origin, new_parent_session_span_id)
|
170
|
+
for span in trace
|
171
|
+
if (not self._is_xdist_worker) or (span.get_tag(EVENT_TYPE) != SESSION_TYPE)
|
172
|
+
]
|
173
|
+
all_spans_with_trace_info.append((trace_idx, trace_spans))
|
174
|
+
|
175
|
+
return all_spans_with_trace_info
|
176
|
+
|
177
|
+
def _create_payload_from_spans(self, spans: List[Dict[str, Any]]) -> bytes:
|
178
|
+
"""Create a payload from the given spans."""
|
179
|
+
return CIVisibilityEncoderV01._pack_payload(
|
180
|
+
{
|
181
|
+
"version": self.PAYLOAD_FORMAT_VERSION,
|
182
|
+
"metadata": self._metadata,
|
183
|
+
"events": spans,
|
184
|
+
}
|
185
|
+
)
|
112
186
|
|
113
187
|
@staticmethod
|
114
188
|
def _pack_payload(payload):
|
115
189
|
return msgpack_packb(payload)
|
116
190
|
|
117
|
-
def _convert_span(
|
118
|
-
|
191
|
+
def _convert_span(
|
192
|
+
self, span: Span, dd_origin: Optional[str] = None, new_parent_session_span_id: int = 0
|
193
|
+
) -> Dict[str, Any]:
|
119
194
|
sp = JSONEncoderV2._span_to_dict(span)
|
120
195
|
sp = JSONEncoderV2._normalize_span(sp)
|
121
196
|
sp["type"] = span.get_tag(EVENT_TYPE) or span.span_type
|
@@ -183,18 +258,17 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
|
|
183
258
|
def _set_itr_suite_skipping_mode(self, new_value):
|
184
259
|
self.itr_suite_skipping_mode = new_value
|
185
260
|
|
186
|
-
def put(self,
|
261
|
+
def put(self, item):
|
187
262
|
spans_with_coverage = [
|
188
263
|
span
|
189
|
-
for span in
|
264
|
+
for span in item
|
190
265
|
if COVERAGE_TAG_NAME in span.get_tags() or span.get_struct_tag(COVERAGE_TAG_NAME) is not None
|
191
266
|
]
|
192
267
|
if not spans_with_coverage:
|
193
268
|
raise NoEncodableSpansError()
|
194
269
|
return super(CIVisibilityCoverageEncoderV02, self).put(spans_with_coverage)
|
195
270
|
|
196
|
-
def _build_coverage_attachment(self, data):
|
197
|
-
# type: (bytes) -> List[bytes]
|
271
|
+
def _build_coverage_attachment(self, data: bytes) -> List[bytes]:
|
198
272
|
return [
|
199
273
|
b"--%s" % self.boundary.encode("utf-8"),
|
200
274
|
b'Content-Disposition: form-data; name="coverage1"; filename="coverage1.msgpack"',
|
@@ -203,8 +277,7 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
|
|
203
277
|
data,
|
204
278
|
]
|
205
279
|
|
206
|
-
def _build_event_json_attachment(self):
|
207
|
-
# type: () -> List[bytes]
|
280
|
+
def _build_event_json_attachment(self) -> List[bytes]:
|
208
281
|
return [
|
209
282
|
b"--%s" % self.boundary.encode("utf-8"),
|
210
283
|
b'Content-Disposition: form-data; name="event"; filename="event.json"',
|
@@ -213,18 +286,16 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
|
|
213
286
|
b'{"dummy":true}',
|
214
287
|
]
|
215
288
|
|
216
|
-
def _build_body(self, data):
|
217
|
-
# type: (bytes) -> List[bytes]
|
289
|
+
def _build_body(self, data: bytes) -> List[bytes]:
|
218
290
|
return (
|
219
291
|
self._build_coverage_attachment(data)
|
220
292
|
+ self._build_event_json_attachment()
|
221
293
|
+ [b"--%s--" % self.boundary.encode("utf-8")]
|
222
294
|
)
|
223
295
|
|
224
|
-
def _build_data(self, traces):
|
225
|
-
# type: (List[List[Span]]) -> Optional[bytes]
|
296
|
+
def _build_data(self, traces: List[List[Span]]) -> Optional[bytes]:
|
226
297
|
normalized_covs = [
|
227
|
-
self._convert_span(span
|
298
|
+
self._convert_span(span)
|
228
299
|
for trace in traces
|
229
300
|
for span in trace
|
230
301
|
if (COVERAGE_TAG_NAME in span.get_tags() or span.get_struct_tag(COVERAGE_TAG_NAME) is not None)
|
@@ -235,17 +306,17 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
|
|
235
306
|
# TODO: Split the events in several payloads as needed to avoid hitting the intake's maximum payload size.
|
236
307
|
return msgpack_packb({"version": self.PAYLOAD_FORMAT_VERSION, "coverages": normalized_covs})
|
237
308
|
|
238
|
-
def _build_payload(self, traces):
|
239
|
-
# type: (List[List[Span]]) -> List[Tuple[Optional[bytes], int]]
|
309
|
+
def _build_payload(self, traces: List[List[Span]]) -> List[Tuple[Optional[bytes], int]]:
|
240
310
|
data = self._build_data(traces)
|
241
311
|
if not data:
|
242
312
|
return []
|
243
|
-
return [(b"\r\n".join(self._build_body(data)), len(
|
313
|
+
return [(b"\r\n".join(self._build_body(data)), len(data))]
|
244
314
|
|
245
|
-
def _convert_span(
|
246
|
-
|
315
|
+
def _convert_span(
|
316
|
+
self, span: Span, dd_origin: Optional[str] = None, new_parent_session_span_id: int = 0
|
317
|
+
) -> Dict[str, Any]:
|
247
318
|
# DEV: new_parent_session_span_id is unused here, but it is used in super class
|
248
|
-
files:
|
319
|
+
files: dict[str, Any] = {}
|
249
320
|
|
250
321
|
files_struct_tag_value = span.get_struct_tag(COVERAGE_TAG_NAME)
|
251
322
|
if files_struct_tag_value is not None and "files" in files_struct_tag_value:
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,76 @@
|
|
1
|
+
import dataclasses
|
2
|
+
from time import monotonic
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
|
6
|
+
@dataclasses.dataclass(frozen=True)
|
7
|
+
class HttpEndPoint:
|
8
|
+
method: str
|
9
|
+
path: str
|
10
|
+
resource_name: str = dataclasses.field(default="")
|
11
|
+
operation_name: str = dataclasses.field(default="http.request")
|
12
|
+
|
13
|
+
def __post_init__(self) -> None:
|
14
|
+
super().__setattr__("method", self.method.upper())
|
15
|
+
if not self.resource_name:
|
16
|
+
super().__setattr__("resource_name", f"{self.method} {self.path}")
|
17
|
+
|
18
|
+
|
19
|
+
@dataclasses.dataclass()
|
20
|
+
class HttpEndPointsCollection:
|
21
|
+
"""A collection of HTTP endpoints that can be modified and flushed to a telemetry payload.
|
22
|
+
|
23
|
+
The collection collects HTTP endpoints at startup and can be flushed to a telemetry payload.
|
24
|
+
It maintains a maximum size and drops endpoints after a certain time period in case of a hot reload of the server.
|
25
|
+
"""
|
26
|
+
|
27
|
+
endpoints: List[HttpEndPoint] = dataclasses.field(default_factory=list, init=False)
|
28
|
+
is_first: bool = dataclasses.field(default=True, init=False)
|
29
|
+
drop_time_seconds: float = dataclasses.field(default=90.0, init=False)
|
30
|
+
last_modification_time: float = dataclasses.field(default_factory=monotonic, init=False)
|
31
|
+
max_size_length: int = dataclasses.field(default=900, init=False)
|
32
|
+
|
33
|
+
def reset(self) -> None:
|
34
|
+
"""Reset the collection to its initial state."""
|
35
|
+
self.endpoints.clear()
|
36
|
+
self.is_first = True
|
37
|
+
self.last_modification_time = monotonic()
|
38
|
+
|
39
|
+
def add_endpoint(
|
40
|
+
self, method: str, path: str, resource_name: str = "", operation_name: str = "http.request"
|
41
|
+
) -> None:
|
42
|
+
"""
|
43
|
+
Add an endpoint to the collection.
|
44
|
+
"""
|
45
|
+
current_time = monotonic()
|
46
|
+
if current_time - self.last_modification_time > self.drop_time_seconds:
|
47
|
+
self.reset()
|
48
|
+
self.endpoints.append(
|
49
|
+
HttpEndPoint(method=method, path=path, resource_name=resource_name, operation_name=operation_name)
|
50
|
+
)
|
51
|
+
elif len(self.endpoints) < self.max_size_length:
|
52
|
+
self.last_modification_time = current_time
|
53
|
+
self.endpoints.append(
|
54
|
+
HttpEndPoint(method=method, path=path, resource_name=resource_name, operation_name=operation_name)
|
55
|
+
)
|
56
|
+
|
57
|
+
def flush(self, max_length: int) -> dict:
|
58
|
+
"""
|
59
|
+
Flush the endpoints to a payload, returning the first `max` endpoints.
|
60
|
+
"""
|
61
|
+
if max_length >= len(self.endpoints):
|
62
|
+
res = {
|
63
|
+
"is_first": self.is_first,
|
64
|
+
"endpoints": [dataclasses.asdict(ep) for ep in self.endpoints],
|
65
|
+
}
|
66
|
+
self.reset()
|
67
|
+
return res
|
68
|
+
else:
|
69
|
+
res = {
|
70
|
+
"is_first": self.is_first,
|
71
|
+
"endpoints": [dataclasses.asdict(ep) for ep in self.endpoints[:max_length]],
|
72
|
+
}
|
73
|
+
self.endpoints = self.endpoints[max_length:]
|
74
|
+
self.is_first = False
|
75
|
+
self.last_modification_time = monotonic()
|
76
|
+
return res
|
Binary file
|