ddtrace 3.11.0rc2__cp312-cp312-win_amd64.whl → 3.11.0rc3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ddtrace might be problematic. Click here for more details.

Files changed (60) hide show
  1. ddtrace/_trace/sampling_rule.py +25 -33
  2. ddtrace/_trace/trace_handlers.py +9 -49
  3. ddtrace/_trace/utils_botocore/span_tags.py +48 -0
  4. ddtrace/_version.py +2 -2
  5. ddtrace/appsec/_constants.py +7 -0
  6. ddtrace/appsec/_handlers.py +11 -0
  7. ddtrace/appsec/_processor.py +1 -1
  8. ddtrace/contrib/internal/aiobotocore/patch.py +8 -0
  9. ddtrace/contrib/internal/boto/patch.py +14 -0
  10. ddtrace/contrib/internal/botocore/services/bedrock.py +3 -27
  11. ddtrace/contrib/internal/django/patch.py +31 -8
  12. ddtrace/contrib/internal/google_genai/_utils.py +2 -2
  13. ddtrace/contrib/internal/google_genai/patch.py +7 -7
  14. ddtrace/contrib/internal/google_generativeai/patch.py +7 -5
  15. ddtrace/contrib/internal/openai_agents/patch.py +44 -1
  16. ddtrace/contrib/internal/pytest/_plugin_v2.py +1 -1
  17. ddtrace/contrib/internal/vertexai/patch.py +7 -5
  18. ddtrace/ext/ci.py +20 -0
  19. ddtrace/ext/git.py +66 -11
  20. ddtrace/internal/_encoding.cp312-win_amd64.pyd +0 -0
  21. ddtrace/internal/_rand.cp312-win_amd64.pyd +0 -0
  22. ddtrace/internal/_tagset.cp312-win_amd64.pyd +0 -0
  23. ddtrace/internal/_threads.cp312-win_amd64.pyd +0 -0
  24. ddtrace/internal/ci_visibility/encoder.py +126 -55
  25. ddtrace/internal/datadog/profiling/dd_wrapper-unknown-amd64.dll +0 -0
  26. ddtrace/internal/datadog/profiling/ddup/_ddup.cp312-win_amd64.pyd +0 -0
  27. ddtrace/internal/datadog/profiling/ddup/dd_wrapper-unknown-amd64.dll +0 -0
  28. ddtrace/internal/endpoints.py +76 -0
  29. ddtrace/internal/native/_native.cp312-win_amd64.pyd +0 -0
  30. ddtrace/internal/schema/processor.py +6 -2
  31. ddtrace/internal/telemetry/metrics_namespaces.cp312-win_amd64.pyd +0 -0
  32. ddtrace/internal/telemetry/writer.py +18 -0
  33. ddtrace/llmobs/_constants.py +1 -0
  34. ddtrace/llmobs/_experiment.py +6 -0
  35. ddtrace/llmobs/_integrations/crewai.py +52 -3
  36. ddtrace/llmobs/_integrations/gemini.py +7 -7
  37. ddtrace/llmobs/_integrations/google_genai.py +10 -10
  38. ddtrace/llmobs/_integrations/{google_genai_utils.py → google_utils.py} +103 -7
  39. ddtrace/llmobs/_integrations/openai_agents.py +145 -0
  40. ddtrace/llmobs/_integrations/pydantic_ai.py +67 -26
  41. ddtrace/llmobs/_integrations/utils.py +68 -158
  42. ddtrace/llmobs/_integrations/vertexai.py +8 -8
  43. ddtrace/llmobs/_llmobs.py +5 -1
  44. ddtrace/llmobs/_utils.py +21 -0
  45. ddtrace/profiling/_threading.cp312-win_amd64.pyd +0 -0
  46. ddtrace/profiling/collector/_memalloc.cp312-win_amd64.pyd +0 -0
  47. ddtrace/profiling/collector/_task.cp312-win_amd64.pyd +0 -0
  48. ddtrace/profiling/collector/_traceback.cp312-win_amd64.pyd +0 -0
  49. ddtrace/profiling/collector/stack.cp312-win_amd64.pyd +0 -0
  50. ddtrace/settings/asm.py +9 -2
  51. ddtrace/vendor/psutil/_psutil_windows.cp312-win_amd64.pyd +0 -0
  52. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/METADATA +1 -1
  53. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/RECORD +60 -59
  54. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/WHEEL +0 -0
  55. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/entry_points.txt +0 -0
  56. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/LICENSE +0 -0
  57. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/LICENSE.Apache +0 -0
  58. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/LICENSE.BSD3 +0 -0
  59. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/licenses/NOTICE +0 -0
  60. {ddtrace-3.11.0rc2.dist-info → ddtrace-3.11.0rc3.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,10 @@ from agents.tracing import add_trace_processor
5
5
 
6
6
  from ddtrace import config
7
7
  from ddtrace.contrib.internal.openai_agents.processor import LLMObsTraceProcessor
8
+ from ddtrace.contrib.trace_utils import unwrap
9
+ from ddtrace.contrib.trace_utils import with_traced_module_async
10
+ from ddtrace.contrib.trace_utils import wrap
11
+ from ddtrace.internal.utils.version import parse_version
8
12
  from ddtrace.llmobs._integrations.openai_agents import OpenAIAgentsIntegration
9
13
  from ddtrace.trace import Pin
10
14
 
@@ -22,6 +26,29 @@ def _supported_versions() -> Dict[str, str]:
22
26
  return {"agents": ">=0.0.2"}
23
27
 
24
28
 
29
+ OPENAI_AGENTS_VERSION = parse_version(get_version())
30
+
31
+
32
+ @with_traced_module_async
33
+ async def patched_run_single_turn(agents, pin, func, instance, args, kwargs):
34
+ return await _patched_run_single_turn(agents, pin, func, instance, args, kwargs, agent_index=0)
35
+
36
+
37
+ @with_traced_module_async
38
+ async def patched_run_single_turn_streamed(agents, pin, func, instance, args, kwargs):
39
+ return await _patched_run_single_turn(agents, pin, func, instance, args, kwargs, agent_index=1)
40
+
41
+
42
+ async def _patched_run_single_turn(agents, pin, func, instance, args, kwargs, agent_index=0):
43
+ current_span = pin.tracer.current_span()
44
+ result = await func(*args, **kwargs)
45
+
46
+ integration = agents._datadog_integration
47
+ integration.tag_agent_manifest(current_span, args, kwargs, agent_index)
48
+
49
+ return result
50
+
51
+
25
52
  def patch():
26
53
  """
27
54
  Patch the instrumented methods
@@ -33,7 +60,16 @@ def patch():
33
60
 
34
61
  Pin().onto(agents)
35
62
 
36
- add_trace_processor(LLMObsTraceProcessor(OpenAIAgentsIntegration(integration_config=config.openai_agents)))
63
+ integration = OpenAIAgentsIntegration(integration_config=config.openai_agents)
64
+ add_trace_processor(LLMObsTraceProcessor(integration))
65
+ agents._datadog_integration = integration
66
+
67
+ if OPENAI_AGENTS_VERSION >= (0, 0, 19):
68
+ wrap(agents.run.AgentRunner, "_run_single_turn", patched_run_single_turn(agents))
69
+ wrap(agents.run.AgentRunner, "_run_single_turn_streamed", patched_run_single_turn_streamed(agents))
70
+ else:
71
+ wrap(agents.run.Runner, "_run_single_turn", patched_run_single_turn(agents))
72
+ wrap(agents.run.Runner, "_run_single_turn_streamed", patched_run_single_turn_streamed(agents))
37
73
 
38
74
 
39
75
  def unpatch():
@@ -44,3 +80,10 @@ def unpatch():
44
80
  return
45
81
 
46
82
  agents._datadog_patch = False
83
+
84
+ if OPENAI_AGENTS_VERSION >= (0, 0, 19):
85
+ unwrap(agents.run.AgentRunner, "_run_single_turn")
86
+ unwrap(agents.run.AgentRunner, "_run_single_turn_streamed")
87
+ else:
88
+ unwrap(agents.run.Runner, "_run_single_turn")
89
+ unwrap(agents.run.Runner, "_run_single_turn_streamed")
@@ -345,7 +345,7 @@ def pytest_sessionstart(session: pytest.Session) -> None:
345
345
  test_impact_analysis="1" if _pytest_version_supports_itr() else None,
346
346
  test_management_quarantine="1",
347
347
  test_management_disable="1",
348
- test_management_attempt_to_fix="4" if _pytest_version_supports_attempt_to_fix() else None,
348
+ test_management_attempt_to_fix="5" if _pytest_version_supports_attempt_to_fix() else None,
349
349
  )
350
350
 
351
351
  InternalTestSession.discover(
@@ -14,7 +14,7 @@ from ddtrace.contrib.internal.trace_utils import wrap
14
14
  from ddtrace.contrib.internal.vertexai._utils import TracedAsyncVertexAIStreamResponse
15
15
  from ddtrace.contrib.internal.vertexai._utils import TracedVertexAIStreamResponse
16
16
  from ddtrace.llmobs._integrations import VertexAIIntegration
17
- from ddtrace.llmobs._integrations.utils import extract_model_name_google
17
+ from ddtrace.llmobs._integrations.google_utils import extract_provider_and_model_name
18
18
  from ddtrace.trace import Pin
19
19
 
20
20
 
@@ -60,11 +60,12 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
60
60
  integration = vertexai._datadog_integration
61
61
  stream = kwargs.get("stream", False)
62
62
  generations = None
63
+ provider_name, model_name = extract_provider_and_model_name(instance=model_instance, model_name_attr="_model_name")
63
64
  span = integration.trace(
64
65
  pin,
65
66
  "%s.%s" % (instance.__class__.__name__, func.__name__),
66
- provider="google",
67
- model=extract_model_name_google(model_instance, "_model_name"),
67
+ provider=provider_name,
68
+ model=model_name,
68
69
  submit_to_llmobs=True,
69
70
  )
70
71
  # history must be copied since it is modified during the LLM interaction
@@ -92,11 +93,12 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
92
93
  integration = vertexai._datadog_integration
93
94
  stream = kwargs.get("stream", False)
94
95
  generations = None
96
+ provider_name, model_name = extract_provider_and_model_name(instance=model_instance, model_name_attr="_model_name")
95
97
  span = integration.trace(
96
98
  pin,
97
99
  "%s.%s" % (instance.__class__.__name__, func.__name__),
98
- provider="google",
99
- model=extract_model_name_google(model_instance, "_model_name"),
100
+ provider=provider_name,
101
+ model=model_name,
100
102
  submit_to_llmobs=True,
101
103
  )
102
104
  # history must be copied since it is modified during the LLM interaction
ddtrace/ext/ci.py CHANGED
@@ -105,6 +105,16 @@ def tags(env=None, cwd=None):
105
105
  break
106
106
 
107
107
  git_info = git.extract_git_metadata(cwd=cwd)
108
+
109
+ # Whenever the HEAD commit SHA is present in the tags that come from the CI provider, we assume that
110
+ # the CI provider added a commit on top of the user's HEAD commit (e.g., GitHub Actions add a merge
111
+ # commit when triggered by a pull request). In that case, we extract the metadata for that commit specifically
112
+ # and add it to the tags.
113
+ head_commit_sha = tags.get(git.COMMIT_HEAD_SHA)
114
+ if head_commit_sha:
115
+ git_head_info = git.extract_git_head_metadata(head_commit_sha=head_commit_sha, cwd=cwd)
116
+ git_info.update(git_head_info)
117
+
108
118
  try:
109
119
  git_info[WORKSPACE_PATH] = git.extract_workspace_path(cwd=cwd)
110
120
  except git.GitNotFoundError:
@@ -349,6 +359,15 @@ def extract_github_actions(env):
349
359
  github_run_id,
350
360
  )
351
361
 
362
+ git_commit_head_sha = None
363
+ if "GITHUB_EVENT_PATH" in env:
364
+ try:
365
+ with open(env["GITHUB_EVENT_PATH"]) as f:
366
+ github_event_data = json.load(f)
367
+ git_commit_head_sha = github_event_data.get("pull_request", {}).get("head", {}).get("sha")
368
+ except Exception as e:
369
+ log.error("Failed to read or parse GITHUB_EVENT_PATH: %s", e)
370
+
352
371
  env_vars = {
353
372
  "GITHUB_SERVER_URL": github_server_url,
354
373
  "GITHUB_REPOSITORY": github_repository,
@@ -362,6 +381,7 @@ def extract_github_actions(env):
362
381
  git.BRANCH: env.get("GITHUB_HEAD_REF") or env.get("GITHUB_REF"),
363
382
  git.COMMIT_SHA: git_commit_sha,
364
383
  git.REPOSITORY_URL: "{0}/{1}.git".format(github_server_url, github_repository),
384
+ git.COMMIT_HEAD_SHA: git_commit_head_sha,
365
385
  JOB_URL: "{0}/{1}/commit/{2}/checks".format(github_server_url, github_repository, git_commit_sha),
366
386
  PIPELINE_ID: github_run_id,
367
387
  PIPELINE_NAME: env.get("GITHUB_WORKFLOW"),
ddtrace/ext/git.py CHANGED
@@ -33,6 +33,30 @@ BRANCH = "git.branch"
33
33
  # Git Commit SHA
34
34
  COMMIT_SHA = "git.commit.sha"
35
35
 
36
+ # Git Commit HEAD SHA
37
+ COMMIT_HEAD_SHA = "git.commit.head.sha"
38
+
39
+ # Git Commit HEAD message
40
+ COMMIT_HEAD_MESSAGE = "git.commit.head.message"
41
+
42
+ # Git Commit HEAD author date
43
+ COMMIT_HEAD_AUTHOR_DATE = "git.commit.head.author.date"
44
+
45
+ # Git Commit HEAD author email
46
+ COMMIT_HEAD_AUTHOR_EMAIL = "git.commit.head.author.email"
47
+
48
+ # Git Commit HEAD author name
49
+ COMMIT_HEAD_AUTHOR_NAME = "git.commit.head.author.name"
50
+
51
+ # Git Commit HEAD committer date
52
+ COMMIT_HEAD_COMMITTER_DATE = "git.commit.head.committer.date"
53
+
54
+ # Git Commit HEAD committer email
55
+ COMMIT_HEAD_COMMITTER_EMAIL = "git.commit.head.committer.email"
56
+
57
+ # Git Commit HEAD committer name
58
+ COMMIT_HEAD_COMMITTER_NAME = "git.commit.head.committer.name"
59
+
36
60
  # Git Repository URL
37
61
  REPOSITORY_URL = "git.repository_url"
38
62
 
@@ -173,11 +197,12 @@ def _get_device_for_path(path):
173
197
  return os.stat(path).st_dev
174
198
 
175
199
 
176
- def _unshallow_repository_with_details(cwd=None, repo=None, refspec=None):
177
- # type (Optional[str], Optional[str], Optional[str]) -> _GitSubprocessDetails
200
+ def _unshallow_repository_with_details(
201
+ cwd: Optional[str] = None, repo: Optional[str] = None, refspec: Optional[str] = None, parent_only: bool = False
202
+ ) -> _GitSubprocessDetails:
178
203
  cmd = [
179
204
  "fetch",
180
- '--shallow-since="1 month ago"',
205
+ "--deepen=1" if parent_only else '--shallow-since="1 month ago"',
181
206
  "--update-shallow",
182
207
  "--filter=blob:none",
183
208
  "--recurse-submodules=no",
@@ -190,18 +215,22 @@ def _unshallow_repository_with_details(cwd=None, repo=None, refspec=None):
190
215
  return _git_subprocess_cmd_with_details(*cmd, cwd=cwd)
191
216
 
192
217
 
193
- def _unshallow_repository(cwd=None, repo=None, refspec=None):
194
- # type (Optional[str], Optional[str], Optional[str]) -> None
195
- _unshallow_repository_with_details(cwd, repo, refspec)
218
+ def _unshallow_repository(
219
+ cwd: Optional[str] = None,
220
+ repo: Optional[str] = None,
221
+ refspec: Optional[str] = None,
222
+ parent_only: bool = False,
223
+ ) -> None:
224
+ _unshallow_repository_with_details(cwd, repo, refspec, parent_only)
196
225
 
197
226
 
198
- def extract_user_info(cwd=None):
199
- # type: (Optional[str]) -> Dict[str, Tuple[str, str, str]]
227
+ def extract_user_info(cwd: Optional[str] = None, commit_sha: Optional[str] = None) -> Dict[str, Tuple[str, str, str]]:
200
228
  """Extract commit author info from the git repository in the current directory or one specified by ``cwd``."""
201
229
  # Note: `git show -s --format... --date...` is supported since git 2.1.4 onwards
202
- stdout = _git_subprocess_cmd(
203
- "show -s --format=%an|||%ae|||%ad|||%cn|||%ce|||%cd --date=format:%Y-%m-%dT%H:%M:%S%z", cwd=cwd
204
- )
230
+ cmd = "show -s --format=%an|||%ae|||%ad|||%cn|||%ce|||%cd --date=format:%Y-%m-%dT%H:%M:%S%z"
231
+ if commit_sha:
232
+ cmd += " " + commit_sha
233
+ stdout = _git_subprocess_cmd(cmd=cmd, cwd=cwd)
205
234
  author_name, author_email, author_date, committer_name, committer_email, committer_date = stdout.split("|||")
206
235
  return {
207
236
  "author": (author_name, author_email, author_date),
@@ -316,6 +345,32 @@ def extract_commit_sha(cwd=None):
316
345
  return commit_sha
317
346
 
318
347
 
348
+ def extract_git_head_metadata(head_commit_sha: str, cwd: Optional[str] = None) -> Dict[str, Optional[str]]:
349
+ tags: Dict[str, Optional[str]] = {}
350
+
351
+ is_shallow, *_ = _is_shallow_repository_with_details(cwd=cwd)
352
+ if is_shallow:
353
+ _unshallow_repository(cwd=cwd, repo=None, refspec=None, parent_only=True)
354
+
355
+ try:
356
+ users = extract_user_info(cwd=cwd, commit_sha=head_commit_sha)
357
+ tags[COMMIT_HEAD_AUTHOR_NAME] = users["author"][0]
358
+ tags[COMMIT_HEAD_AUTHOR_EMAIL] = users["author"][1]
359
+ tags[COMMIT_HEAD_AUTHOR_DATE] = users["author"][2]
360
+ tags[COMMIT_HEAD_COMMITTER_NAME] = users["committer"][0]
361
+ tags[COMMIT_HEAD_COMMITTER_EMAIL] = users["committer"][1]
362
+ tags[COMMIT_HEAD_COMMITTER_DATE] = users["committer"][2]
363
+ tags[COMMIT_HEAD_MESSAGE] = _git_subprocess_cmd(" ".join(("log -n 1 --format=%B", head_commit_sha)), cwd)
364
+ except GitNotFoundError:
365
+ log.error("Git executable not found, cannot extract git metadata.")
366
+ except ValueError as e:
367
+ debug_mode = log.isEnabledFor(logging.DEBUG)
368
+ stderr = str(e)
369
+ log.error("Error extracting git metadata: %s", stderr, exc_info=debug_mode)
370
+
371
+ return tags
372
+
373
+
319
374
  def extract_git_metadata(cwd=None):
320
375
  # type: (Optional[str]) -> Dict[str, Optional[str]]
321
376
  """Extract git commit metadata."""
Binary file
@@ -1,7 +1,14 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import os
3
5
  import threading
4
6
  from typing import TYPE_CHECKING # noqa:F401
7
+ from typing import Any # noqa:F401
8
+ from typing import Dict # noqa:F401
9
+ from typing import List # noqa:F401
10
+ from typing import Optional # noqa:F401
11
+ from typing import Tuple # noqa:F401
5
12
  from uuid import uuid4
6
13
 
7
14
  from ddtrace.ext import SpanTypes
@@ -28,12 +35,6 @@ from ddtrace.internal.writer.writer import NoEncodableSpansError
28
35
  log = get_logger(__name__)
29
36
 
30
37
  if TYPE_CHECKING: # pragma: no cover
31
- from typing import Any # noqa:F401
32
- from typing import Dict # noqa:F401
33
- from typing import List # noqa:F401
34
- from typing import Optional # noqa:F401
35
- from typing import Tuple # noqa:F401
36
-
37
38
  from ddtrace._trace.span import Span # noqa:F401
38
39
 
39
40
 
@@ -43,79 +44,153 @@ class CIVisibilityEncoderV01(BufferedEncoder):
43
44
  TEST_SUITE_EVENT_VERSION = 1
44
45
  TEST_EVENT_VERSION = 2
45
46
  ENDPOINT_TYPE = ENDPOINT.TEST_CYCLE
47
+ _MAX_PAYLOAD_SIZE = 5 * 1024 * 1024 # 5MB
46
48
 
47
49
  def __init__(self, *args):
48
50
  # DEV: args are not used here, but are used by BufferedEncoder's __cinit__() method,
49
51
  # which is called implicitly by Cython.
50
52
  super(CIVisibilityEncoderV01, self).__init__()
53
+ self._metadata: Dict[str, Dict[str, str]] = {}
51
54
  self._lock = threading.RLock()
52
- self._metadata = {}
55
+ self._is_xdist_worker = os.getenv("PYTEST_XDIST_WORKER") is not None
53
56
  self._init_buffer()
54
57
 
55
58
  def __len__(self):
56
59
  with self._lock:
57
60
  return len(self.buffer)
58
61
 
59
- def set_metadata(self, event_type, metadata):
60
- # type: (str, Dict[str, str]) -> None
62
+ def set_metadata(self, event_type: str, metadata: Dict[str, str]):
61
63
  self._metadata.setdefault(event_type, {}).update(metadata)
62
64
 
63
65
  def _init_buffer(self):
64
66
  with self._lock:
65
67
  self.buffer = []
66
68
 
67
- def put(self, spans):
69
+ def put(self, item):
68
70
  with self._lock:
69
- self.buffer.append(spans)
71
+ self.buffer.append(item)
70
72
 
71
73
  def encode_traces(self, traces):
72
- return self._build_payload(traces=traces)
74
+ """
75
+ Only used for LogWriter, not called for CI Visibility currently
76
+ """
77
+ raise NotImplementedError()
73
78
 
74
- def encode(self):
79
+ def encode(self) -> List[Tuple[Optional[bytes], int]]:
75
80
  with self._lock:
81
+ if not self.buffer:
82
+ return []
83
+ payloads = []
76
84
  with StopWatch() as sw:
77
- result_payloads = self._build_payload(self.buffer)
85
+ payloads = self._build_payload(self.buffer)
78
86
  record_endpoint_payload_events_serialization_time(endpoint=self.ENDPOINT_TYPE, seconds=sw.elapsed())
79
87
  self._init_buffer()
80
- return result_payloads
88
+ return payloads
81
89
 
82
- def _get_parent_session(self, traces):
90
+ def _get_parent_session(self, traces: List[List[Span]]) -> int:
83
91
  for trace in traces:
84
92
  for span in trace:
85
93
  if span.get_tag(EVENT_TYPE) == SESSION_TYPE and span.parent_id is not None and span.parent_id != 0:
86
94
  return span.parent_id
87
95
  return 0
88
96
 
89
- def _build_payload(self, traces):
90
- # type: (List[List[Span]]) -> List[Tuple[Optional[bytes], int]]
97
+ def _build_payload(self, traces: List[List[Span]]) -> List[Tuple[Optional[bytes], int]]:
98
+ """
99
+ Build multiple payloads from traces, splitting when necessary to stay under size limits.
100
+ Uses index-based recursive approach to avoid copying slices.
101
+
102
+ Returns a list of (payload_bytes, trace_count) tuples, where each payload contains
103
+ as many traces as possible without exceeding _MAX_PAYLOAD_SIZE.
104
+ """
105
+ if not traces:
106
+ return []
107
+
91
108
  new_parent_session_span_id = self._get_parent_session(traces)
92
- is_not_xdist_worker = os.getenv("PYTEST_XDIST_WORKER") is None
93
- normalized_spans = [
94
- self._convert_span(span, trace[0].context.dd_origin, new_parent_session_span_id)
95
- for trace in traces
96
- for span in trace
97
- if (is_not_xdist_worker or span.get_tag(EVENT_TYPE) != SESSION_TYPE)
98
- ]
99
- if not normalized_spans:
109
+ return self._build_payloads_recursive(traces, 0, len(traces), new_parent_session_span_id)
110
+
111
+ def _build_payloads_recursive(
112
+ self, traces: List[List[Span]], start_idx: int, end_idx: int, new_parent_session_span_id: int
113
+ ) -> List[Tuple[Optional[bytes], int]]:
114
+ """
115
+ Recursively build payloads using start/end indexes to avoid slice copying.
116
+
117
+ Args:
118
+ traces: Full list of traces
119
+ start_idx: Start index (inclusive)
120
+ end_idx: End index (exclusive)
121
+ new_parent_session_span_id: Parent session span ID
122
+
123
+ Returns:
124
+ List of (payload_bytes, trace_count) tuples
125
+ """
126
+ if start_idx >= end_idx:
100
127
  return []
101
- record_endpoint_payload_events_count(endpoint=ENDPOINT.TEST_CYCLE, count=len(normalized_spans))
102
128
 
103
- # TODO: Split the events in several payloads as needed to avoid hitting the intake's maximum payload size.
104
- return [
105
- (
106
- CIVisibilityEncoderV01._pack_payload(
107
- {"version": self.PAYLOAD_FORMAT_VERSION, "metadata": self._metadata, "events": normalized_spans}
108
- ),
109
- len(traces),
110
- )
111
- ]
129
+ trace_count = end_idx - start_idx
130
+
131
+ # Convert traces to spans with filtering (using indexes)
132
+ all_spans_with_trace_info = self._convert_traces_to_spans_indexed(
133
+ traces, start_idx, end_idx, new_parent_session_span_id
134
+ )
135
+
136
+ # Get all spans (flattened)
137
+ all_spans = [span for _, trace_spans in all_spans_with_trace_info for span in trace_spans]
138
+
139
+ if not all_spans:
140
+ log.debug("No spans to encode after filtering, skipping chunk")
141
+ return []
142
+
143
+ # Try to create payload from all spans
144
+ payload = self._create_payload_from_spans(all_spans)
145
+
146
+ if len(payload) <= self._MAX_PAYLOAD_SIZE or trace_count == 1:
147
+ # Payload fits or we can't split further (single trace)
148
+ record_endpoint_payload_events_count(endpoint=self.ENDPOINT_TYPE, count=len(all_spans))
149
+ return [(payload, trace_count)]
150
+ else:
151
+ # Payload is too large, split in half recursively
152
+ mid_idx = start_idx + (trace_count + 1) // 2
153
+
154
+ # Process both halves recursively
155
+ left_payloads = self._build_payloads_recursive(traces, start_idx, mid_idx, new_parent_session_span_id)
156
+ right_payloads = self._build_payloads_recursive(traces, mid_idx, end_idx, new_parent_session_span_id)
157
+
158
+ # Combine results
159
+ return left_payloads + right_payloads
160
+
161
+ def _convert_traces_to_spans_indexed(
162
+ self, traces: List[List[Span]], start_idx: int, end_idx: int, new_parent_session_span_id: int
163
+ ) -> List[Tuple[int, List[Dict[str, Any]]]]:
164
+ """Convert traces to spans with xdist filtering applied, using indexes to avoid slicing."""
165
+ all_spans_with_trace_info = []
166
+ for trace_idx in range(start_idx, end_idx):
167
+ trace = traces[trace_idx]
168
+ trace_spans = [
169
+ self._convert_span(span, trace[0].context.dd_origin, new_parent_session_span_id)
170
+ for span in trace
171
+ if (not self._is_xdist_worker) or (span.get_tag(EVENT_TYPE) != SESSION_TYPE)
172
+ ]
173
+ all_spans_with_trace_info.append((trace_idx, trace_spans))
174
+
175
+ return all_spans_with_trace_info
176
+
177
+ def _create_payload_from_spans(self, spans: List[Dict[str, Any]]) -> bytes:
178
+ """Create a payload from the given spans."""
179
+ return CIVisibilityEncoderV01._pack_payload(
180
+ {
181
+ "version": self.PAYLOAD_FORMAT_VERSION,
182
+ "metadata": self._metadata,
183
+ "events": spans,
184
+ }
185
+ )
112
186
 
113
187
  @staticmethod
114
188
  def _pack_payload(payload):
115
189
  return msgpack_packb(payload)
116
190
 
117
- def _convert_span(self, span, dd_origin, new_parent_session_span_id=0):
118
- # type: (Span, Optional[str], Optional[int]) -> Dict[str, Any]
191
+ def _convert_span(
192
+ self, span: Span, dd_origin: Optional[str] = None, new_parent_session_span_id: int = 0
193
+ ) -> Dict[str, Any]:
119
194
  sp = JSONEncoderV2._span_to_dict(span)
120
195
  sp = JSONEncoderV2._normalize_span(sp)
121
196
  sp["type"] = span.get_tag(EVENT_TYPE) or span.span_type
@@ -183,18 +258,17 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
183
258
  def _set_itr_suite_skipping_mode(self, new_value):
184
259
  self.itr_suite_skipping_mode = new_value
185
260
 
186
- def put(self, spans):
261
+ def put(self, item):
187
262
  spans_with_coverage = [
188
263
  span
189
- for span in spans
264
+ for span in item
190
265
  if COVERAGE_TAG_NAME in span.get_tags() or span.get_struct_tag(COVERAGE_TAG_NAME) is not None
191
266
  ]
192
267
  if not spans_with_coverage:
193
268
  raise NoEncodableSpansError()
194
269
  return super(CIVisibilityCoverageEncoderV02, self).put(spans_with_coverage)
195
270
 
196
- def _build_coverage_attachment(self, data):
197
- # type: (bytes) -> List[bytes]
271
+ def _build_coverage_attachment(self, data: bytes) -> List[bytes]:
198
272
  return [
199
273
  b"--%s" % self.boundary.encode("utf-8"),
200
274
  b'Content-Disposition: form-data; name="coverage1"; filename="coverage1.msgpack"',
@@ -203,8 +277,7 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
203
277
  data,
204
278
  ]
205
279
 
206
- def _build_event_json_attachment(self):
207
- # type: () -> List[bytes]
280
+ def _build_event_json_attachment(self) -> List[bytes]:
208
281
  return [
209
282
  b"--%s" % self.boundary.encode("utf-8"),
210
283
  b'Content-Disposition: form-data; name="event"; filename="event.json"',
@@ -213,18 +286,16 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
213
286
  b'{"dummy":true}',
214
287
  ]
215
288
 
216
- def _build_body(self, data):
217
- # type: (bytes) -> List[bytes]
289
+ def _build_body(self, data: bytes) -> List[bytes]:
218
290
  return (
219
291
  self._build_coverage_attachment(data)
220
292
  + self._build_event_json_attachment()
221
293
  + [b"--%s--" % self.boundary.encode("utf-8")]
222
294
  )
223
295
 
224
- def _build_data(self, traces):
225
- # type: (List[List[Span]]) -> Optional[bytes]
296
+ def _build_data(self, traces: List[List[Span]]) -> Optional[bytes]:
226
297
  normalized_covs = [
227
- self._convert_span(span, "")
298
+ self._convert_span(span)
228
299
  for trace in traces
229
300
  for span in trace
230
301
  if (COVERAGE_TAG_NAME in span.get_tags() or span.get_struct_tag(COVERAGE_TAG_NAME) is not None)
@@ -235,17 +306,17 @@ class CIVisibilityCoverageEncoderV02(CIVisibilityEncoderV01):
235
306
  # TODO: Split the events in several payloads as needed to avoid hitting the intake's maximum payload size.
236
307
  return msgpack_packb({"version": self.PAYLOAD_FORMAT_VERSION, "coverages": normalized_covs})
237
308
 
238
- def _build_payload(self, traces):
239
- # type: (List[List[Span]]) -> List[Tuple[Optional[bytes], int]]
309
+ def _build_payload(self, traces: List[List[Span]]) -> List[Tuple[Optional[bytes], int]]:
240
310
  data = self._build_data(traces)
241
311
  if not data:
242
312
  return []
243
- return [(b"\r\n".join(self._build_body(data)), len(traces))]
313
+ return [(b"\r\n".join(self._build_body(data)), len(data))]
244
314
 
245
- def _convert_span(self, span, dd_origin, new_parent_session_span_id=0):
246
- # type: (Span, Optional[str], Optional[int]) -> Dict[str, Any]
315
+ def _convert_span(
316
+ self, span: Span, dd_origin: Optional[str] = None, new_parent_session_span_id: int = 0
317
+ ) -> Dict[str, Any]:
247
318
  # DEV: new_parent_session_span_id is unused here, but it is used in super class
248
- files: Dict[str, Any] = {}
319
+ files: dict[str, Any] = {}
249
320
 
250
321
  files_struct_tag_value = span.get_struct_tag(COVERAGE_TAG_NAME)
251
322
  if files_struct_tag_value is not None and "files" in files_struct_tag_value:
@@ -0,0 +1,76 @@
1
+ import dataclasses
2
+ from time import monotonic
3
+ from typing import List
4
+
5
+
6
+ @dataclasses.dataclass(frozen=True)
7
+ class HttpEndPoint:
8
+ method: str
9
+ path: str
10
+ resource_name: str = dataclasses.field(default="")
11
+ operation_name: str = dataclasses.field(default="http.request")
12
+
13
+ def __post_init__(self) -> None:
14
+ super().__setattr__("method", self.method.upper())
15
+ if not self.resource_name:
16
+ super().__setattr__("resource_name", f"{self.method} {self.path}")
17
+
18
+
19
+ @dataclasses.dataclass()
20
+ class HttpEndPointsCollection:
21
+ """A collection of HTTP endpoints that can be modified and flushed to a telemetry payload.
22
+
23
+ The collection collects HTTP endpoints at startup and can be flushed to a telemetry payload.
24
+ It maintains a maximum size and drops endpoints after a certain time period in case of a hot reload of the server.
25
+ """
26
+
27
+ endpoints: List[HttpEndPoint] = dataclasses.field(default_factory=list, init=False)
28
+ is_first: bool = dataclasses.field(default=True, init=False)
29
+ drop_time_seconds: float = dataclasses.field(default=90.0, init=False)
30
+ last_modification_time: float = dataclasses.field(default_factory=monotonic, init=False)
31
+ max_size_length: int = dataclasses.field(default=900, init=False)
32
+
33
+ def reset(self) -> None:
34
+ """Reset the collection to its initial state."""
35
+ self.endpoints.clear()
36
+ self.is_first = True
37
+ self.last_modification_time = monotonic()
38
+
39
+ def add_endpoint(
40
+ self, method: str, path: str, resource_name: str = "", operation_name: str = "http.request"
41
+ ) -> None:
42
+ """
43
+ Add an endpoint to the collection.
44
+ """
45
+ current_time = monotonic()
46
+ if current_time - self.last_modification_time > self.drop_time_seconds:
47
+ self.reset()
48
+ self.endpoints.append(
49
+ HttpEndPoint(method=method, path=path, resource_name=resource_name, operation_name=operation_name)
50
+ )
51
+ elif len(self.endpoints) < self.max_size_length:
52
+ self.last_modification_time = current_time
53
+ self.endpoints.append(
54
+ HttpEndPoint(method=method, path=path, resource_name=resource_name, operation_name=operation_name)
55
+ )
56
+
57
+ def flush(self, max_length: int) -> dict:
58
+ """
59
+ Flush the endpoints to a payload, returning the first `max` endpoints.
60
+ """
61
+ if max_length >= len(self.endpoints):
62
+ res = {
63
+ "is_first": self.is_first,
64
+ "endpoints": [dataclasses.asdict(ep) for ep in self.endpoints],
65
+ }
66
+ self.reset()
67
+ return res
68
+ else:
69
+ res = {
70
+ "is_first": self.is_first,
71
+ "endpoints": [dataclasses.asdict(ep) for ep in self.endpoints[:max_length]],
72
+ }
73
+ self.endpoints = self.endpoints[max_length:]
74
+ self.is_first = False
75
+ self.last_modification_time = monotonic()
76
+ return res