dask-cuda 25.6.0__py3-none-any.whl → 25.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_cuda/GIT_COMMIT +1 -1
- dask_cuda/VERSION +1 -1
- dask_cuda/benchmarks/common.py +4 -1
- dask_cuda/benchmarks/local_cudf_groupby.py +4 -1
- dask_cuda/benchmarks/local_cudf_merge.py +5 -2
- dask_cuda/benchmarks/local_cudf_shuffle.py +5 -2
- dask_cuda/benchmarks/local_cupy.py +4 -1
- dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -1
- dask_cuda/benchmarks/utils.py +7 -4
- dask_cuda/cli.py +21 -15
- dask_cuda/cuda_worker.py +27 -57
- dask_cuda/device_host_file.py +31 -15
- dask_cuda/disk_io.py +7 -4
- dask_cuda/explicit_comms/comms.py +11 -7
- dask_cuda/explicit_comms/dataframe/shuffle.py +23 -23
- dask_cuda/get_device_memory_objects.py +3 -3
- dask_cuda/initialize.py +80 -44
- dask_cuda/local_cuda_cluster.py +63 -66
- dask_cuda/plugins.py +17 -16
- dask_cuda/proxify_device_objects.py +12 -10
- dask_cuda/proxify_host_file.py +30 -27
- dask_cuda/proxy_object.py +20 -17
- dask_cuda/tests/conftest.py +41 -0
- dask_cuda/tests/test_dask_cuda_worker.py +109 -25
- dask_cuda/tests/test_dgx.py +10 -18
- dask_cuda/tests/test_explicit_comms.py +30 -12
- dask_cuda/tests/test_from_array.py +7 -5
- dask_cuda/tests/test_initialize.py +16 -37
- dask_cuda/tests/test_local_cuda_cluster.py +159 -52
- dask_cuda/tests/test_proxify_host_file.py +19 -3
- dask_cuda/tests/test_proxy.py +18 -16
- dask_cuda/tests/test_rdd_ucx.py +160 -0
- dask_cuda/tests/test_spill.py +7 -0
- dask_cuda/tests/test_utils.py +106 -20
- dask_cuda/tests/test_worker_spec.py +5 -2
- dask_cuda/utils.py +261 -38
- dask_cuda/utils_test.py +23 -7
- dask_cuda/worker_common.py +196 -0
- dask_cuda/worker_spec.py +12 -5
- {dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/METADATA +2 -2
- dask_cuda-25.8.0.dist-info/RECORD +63 -0
- dask_cuda-25.8.0.dist-info/top_level.txt +6 -0
- shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
- shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
- shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
- dask_cuda-25.6.0.dist-info/RECORD +0 -57
- dask_cuda-25.6.0.dist-info/top_level.txt +0 -4
- {dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/WHEEL +0 -0
- {dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/entry_points.txt +0 -0
- {dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Process a GitHub workflow log record and output OpenTelemetry span data."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
import sys
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from datetime import datetime, timezone
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import TYPE_CHECKING
|
|
28
|
+
|
|
29
|
+
from opentelemetry import trace
|
|
30
|
+
from opentelemetry.sdk.resources import Resource
|
|
31
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
32
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
33
|
+
from opentelemetry.sdk.trace.id_generator import IdGenerator
|
|
34
|
+
from opentelemetry.trace.status import StatusCode
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from collections.abc import Mapping
|
|
38
|
+
from typing import Any
|
|
39
|
+
|
|
40
|
+
match os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL"):
|
|
41
|
+
case "http/protobuf":
|
|
42
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
43
|
+
OTLPSpanExporter,
|
|
44
|
+
)
|
|
45
|
+
case "grpc":
|
|
46
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
47
|
+
OTLPSpanExporter,
|
|
48
|
+
)
|
|
49
|
+
case _:
|
|
50
|
+
from opentelemetry.sdk.trace.export import (
|
|
51
|
+
ConsoleSpanExporter as OTLPSpanExporter,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
56
|
+
|
|
57
|
+
SpanProcessor = BatchSpanProcessor
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def parse_attributes(attrs: os.PathLike | str | None) -> dict[str, str]:
|
|
61
|
+
"""Attempt to parse attributes in a given list `attrs`."""
|
|
62
|
+
attrs_list: list[str]
|
|
63
|
+
if not attrs:
|
|
64
|
+
return {}
|
|
65
|
+
try:
|
|
66
|
+
with Path(attrs).open() as attribute_file:
|
|
67
|
+
attrs_list = attribute_file.readlines()
|
|
68
|
+
except FileNotFoundError:
|
|
69
|
+
attrs_list = str(attrs).split(",")
|
|
70
|
+
attributes = {}
|
|
71
|
+
for attr in attrs_list:
|
|
72
|
+
try:
|
|
73
|
+
key, value = attr.split("=", 1)
|
|
74
|
+
except ValueError:
|
|
75
|
+
logging.warning("Invalid attribute: %s", attr)
|
|
76
|
+
continue
|
|
77
|
+
attributes[key] = value.strip().strip('"')
|
|
78
|
+
logging.debug("Attribute parsed: Key: %s, Value: %s", key, value)
|
|
79
|
+
return attributes
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def date_str_to_epoch(date_str: str, value_if_not_set: int | None = 0) -> int:
|
|
83
|
+
"""Github logs come in RFC 3339; this converts to nanoseconds since epoch."""
|
|
84
|
+
if date_str:
|
|
85
|
+
# replace bit is to attach the UTC timezone to our datetime object, so
|
|
86
|
+
# that it doesn't "help" us by adjusting our string value, which is
|
|
87
|
+
# already in UTC
|
|
88
|
+
timestamp_ns = int(
|
|
89
|
+
datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc).timestamp() * 1e9
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
timestamp_ns = value_if_not_set or 0
|
|
93
|
+
return timestamp_ns
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def map_conclusion_to_status_code(conclusion: str) -> StatusCode:
|
|
97
|
+
"""Map string conclusion from logs to OTel enum."""
|
|
98
|
+
if conclusion == "success":
|
|
99
|
+
return StatusCode.OK
|
|
100
|
+
if conclusion == "failure":
|
|
101
|
+
return StatusCode.ERROR
|
|
102
|
+
return StatusCode.UNSET
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class RapidsSpanIdGenerator(IdGenerator):
|
|
106
|
+
"""ID Generator that generates span IDs. Trace IDs come from elsewhere."""
|
|
107
|
+
|
|
108
|
+
def __init__(self, trace_id: int, job_name: str) -> None:
|
|
109
|
+
"""Initialize generator with trace ID and initial job_name."""
|
|
110
|
+
self.trace_id = trace_id
|
|
111
|
+
self.job_name = job_name
|
|
112
|
+
self.step_name = None
|
|
113
|
+
|
|
114
|
+
def update_job_name(self, job_name: str) -> None:
|
|
115
|
+
"""Update the job name, which feeds into computing the span name."""
|
|
116
|
+
self.job_name = job_name
|
|
117
|
+
logging.debug("Job name updated: %s", self.job_name)
|
|
118
|
+
|
|
119
|
+
def update_step_name(self, step_name: str) -> None:
|
|
120
|
+
"""Update the step name, which feeds into computing the span name."""
|
|
121
|
+
self.step_name = step_name
|
|
122
|
+
logging.debug("Step name updated: %s", self.step_name)
|
|
123
|
+
|
|
124
|
+
def generate_span_id(self) -> int:
|
|
125
|
+
"""Get a new span ID.
|
|
126
|
+
|
|
127
|
+
This is called by the OpenTelemetry SDK as-is, without arguments.
|
|
128
|
+
To change the generated value here, use the update methods.
|
|
129
|
+
"""
|
|
130
|
+
span_id = hashlib.sha256()
|
|
131
|
+
span_id.update(str(self.trace_id).encode())
|
|
132
|
+
span_id.update(bytes(self.job_name.encode()))
|
|
133
|
+
if self.step_name:
|
|
134
|
+
span_id.update(bytes(self.step_name.encode()))
|
|
135
|
+
return int(span_id.hexdigest()[:16], 16)
|
|
136
|
+
|
|
137
|
+
def generate_trace_id(self) -> int:
|
|
138
|
+
"""Return the trace ID that is stored at initialization."""
|
|
139
|
+
return self.trace_id
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class Compiler:
|
|
144
|
+
hits: int = 0
|
|
145
|
+
misses: int = 0
|
|
146
|
+
errors: int = 0
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def requests(self) -> int:
|
|
150
|
+
"""Calculate the total requests."""
|
|
151
|
+
return self.hits + self.misses + self.errors
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def hit_rate(self) -> float:
|
|
155
|
+
"""Calculate the cache hit rate."""
|
|
156
|
+
return self.hits / self.requests if self.requests > 0 else 0
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def miss_rate(self) -> float:
|
|
160
|
+
"""Calculate the cache miss rate."""
|
|
161
|
+
return self.misses / self.requests if self.requests > 0 else 0
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def error_rate(self) -> float:
|
|
165
|
+
"""Calculate the cache error rate."""
|
|
166
|
+
return self.errors / self.requests if self.requests > 0 else 0
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class SccacheStats:
|
|
170
|
+
requests: int = 0
|
|
171
|
+
compilers: dict[str, Compiler]
|
|
172
|
+
|
|
173
|
+
def __init__(self, requests: int, compilers: dict[str, Compiler]) -> None:
|
|
174
|
+
self.requests = requests
|
|
175
|
+
self.compilers = compilers
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def hits(self) -> int:
|
|
179
|
+
"""Calculate the total cache hits."""
|
|
180
|
+
return sum(v.hits for v in self.compilers.values())
|
|
181
|
+
|
|
182
|
+
@property
|
|
183
|
+
def misses(self) -> int:
|
|
184
|
+
"""Calculate the total cache misses."""
|
|
185
|
+
return sum(v.misses for v in self.compilers.values())
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def errors(self) -> int:
|
|
189
|
+
"""Calculate the total cache errors."""
|
|
190
|
+
return sum(v.errors for v in self.compilers.values())
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def hit_rate(self) -> float:
|
|
194
|
+
"""Calculate the total cache hit rate."""
|
|
195
|
+
return self.hits / self.requests if self.requests > 0 else 0
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def miss_rate(self) -> float:
|
|
199
|
+
"""Calculate the total cache miss rate."""
|
|
200
|
+
return self.misses / self.requests if self.requests > 0 else 0
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def error_rate(self) -> float:
|
|
204
|
+
"""Calculate the total cache error rate."""
|
|
205
|
+
return self.errors / self.requests if self.requests > 0 else 0
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def get_sccache_stats(artifact_folder: Path) -> dict[str, str]:
|
|
209
|
+
"""Get sccache stats from the artifact folder."""
|
|
210
|
+
stats_files = list(artifact_folder.glob("sccache-stats*.txt"))
|
|
211
|
+
logging.debug("SCCache stats files: %s", stats_files)
|
|
212
|
+
parsed_stats = {}
|
|
213
|
+
lang_line_match = re.compile(r"Cache (?P<result>\w+) \((?P<lang>\w+)[^)]*\)\s*(?P<count>\d+)")
|
|
214
|
+
for file in stats_files:
|
|
215
|
+
with file.open() as f:
|
|
216
|
+
stats = SccacheStats(requests=0, compilers={"c": Compiler(), "cpp": Compiler(), "cuda": Compiler()})
|
|
217
|
+
print("SCCache stats file: %s", file)
|
|
218
|
+
for line in f:
|
|
219
|
+
print("Line: %s", line)
|
|
220
|
+
if match := re.match(r"^Compile\srequests\s+(\d+).*", line, re.IGNORECASE):
|
|
221
|
+
stats.requests = int(match.group(1))
|
|
222
|
+
elif match := lang_line_match.match(line):
|
|
223
|
+
compiler = stats.compilers[match.group("lang")]
|
|
224
|
+
setattr(compiler, match.group("result"), int(match.group("count")))
|
|
225
|
+
stats_file_name = re.findall(r"sccache-stats[-]?(?P<name>\w+).txt", file.name)
|
|
226
|
+
if stats_file_name:
|
|
227
|
+
stats_file_name = stats_file_name[0]
|
|
228
|
+
else:
|
|
229
|
+
stats_file_name = "main_process"
|
|
230
|
+
parsed_stats[stats_file_name] = stats
|
|
231
|
+
return parsed_stats
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def process_job_blob( # noqa: PLR0913
|
|
235
|
+
trace_id: int,
|
|
236
|
+
job: Mapping[str, Any],
|
|
237
|
+
env_vars: Mapping[str, str],
|
|
238
|
+
first_timestamp: int,
|
|
239
|
+
last_timestamp: int,
|
|
240
|
+
) -> int:
|
|
241
|
+
"""Transform job JSON into an OTel span."""
|
|
242
|
+
# This is the top-level workflow, which we account for with the root
|
|
243
|
+
# trace above
|
|
244
|
+
if job["name"] == env_vars.get("OTEL_SERVICE_NAME"):
|
|
245
|
+
logging.debug("Job name is the same as the service name: %s", job["name"])
|
|
246
|
+
return last_timestamp
|
|
247
|
+
# this cuts off matrix info from the job name, such that grafana can group
|
|
248
|
+
# these by name
|
|
249
|
+
if "/" in job["name"]:
|
|
250
|
+
job_name, matrix_part = job["name"].split("/", 1)
|
|
251
|
+
job_name = job_name.strip()
|
|
252
|
+
matrix_part = matrix_part.strip()
|
|
253
|
+
else:
|
|
254
|
+
job_name, matrix_part = job["name"], None
|
|
255
|
+
|
|
256
|
+
job_id = job["id"]
|
|
257
|
+
logging.info("Processing job: %s", job_name)
|
|
258
|
+
job_create = date_str_to_epoch(job["created_at"], first_timestamp)
|
|
259
|
+
job_start = date_str_to_epoch(job["started_at"], first_timestamp)
|
|
260
|
+
# this may get later in time as we progress through the steps. It is
|
|
261
|
+
# common to have the job completion time be earlier than the end of
|
|
262
|
+
# the final cleanup steps
|
|
263
|
+
job_last_timestamp = date_str_to_epoch(job["completed_at"], job_start)
|
|
264
|
+
|
|
265
|
+
if job_start == 0:
|
|
266
|
+
logging.info("Job is empty (no start time) - bypassing")
|
|
267
|
+
return last_timestamp
|
|
268
|
+
|
|
269
|
+
artifact_folder = Path.cwd() / f"telemetry-artifacts/telemetry-tools-artifacts-{job_id}"
|
|
270
|
+
attributes = {}
|
|
271
|
+
if (artifact_folder / "attrs").exists():
|
|
272
|
+
logging.debug("Found attribute file for job: %s", job_id)
|
|
273
|
+
attributes = parse_attributes(artifact_folder / "attrs")
|
|
274
|
+
else:
|
|
275
|
+
logging.debug("No attribute metadata found for job: %s", job_id)
|
|
276
|
+
|
|
277
|
+
attributes["service.name"] = job_name
|
|
278
|
+
|
|
279
|
+
sccache_stats = get_sccache_stats(artifact_folder)
|
|
280
|
+
logging.debug("SCCache stats: %s", sccache_stats)
|
|
281
|
+
|
|
282
|
+
job_provider = TracerProvider(
|
|
283
|
+
resource=Resource(attributes),
|
|
284
|
+
id_generator=RapidsSpanIdGenerator(trace_id=trace_id, job_name=job["name"]),
|
|
285
|
+
)
|
|
286
|
+
job_provider.add_span_processor(span_processor=SpanProcessor(OTLPSpanExporter()))
|
|
287
|
+
job_tracer = trace.get_tracer("GitHub Actions parser", "0.0.1", tracer_provider=job_provider)
|
|
288
|
+
|
|
289
|
+
with job_tracer.start_as_current_span(
|
|
290
|
+
name=matrix_part or job["name"],
|
|
291
|
+
start_time=job_create,
|
|
292
|
+
end_on_exit=False,
|
|
293
|
+
) as job_span:
|
|
294
|
+
logging.debug("Job span created: %s", job_span)
|
|
295
|
+
job_span.set_status(map_conclusion_to_status_code(job["conclusion"]))
|
|
296
|
+
|
|
297
|
+
job_provider.id_generator.update_step_name("Start delay time")
|
|
298
|
+
delay_span = job_tracer.start_span(name="Start delay time", start_time=job_create)
|
|
299
|
+
delay_span.end(job_start)
|
|
300
|
+
logging.debug("Delay span created: %s", delay_span)
|
|
301
|
+
|
|
302
|
+
for step in job["steps"]:
|
|
303
|
+
step_start = date_str_to_epoch(step["started_at"], job_last_timestamp)
|
|
304
|
+
step_end = date_str_to_epoch(step["completed_at"], step_start)
|
|
305
|
+
job_last_timestamp = max(step_end, job_last_timestamp)
|
|
306
|
+
job_provider.id_generator.update_step_name(step["name"])
|
|
307
|
+
# Reset attributes for each step
|
|
308
|
+
span_attributes = {}
|
|
309
|
+
|
|
310
|
+
if (step_end - step_start) / 1e9 > 1:
|
|
311
|
+
logging.debug("processing step: %s", step["name"])
|
|
312
|
+
job_provider.id_generator.update_step_name(step["name"])
|
|
313
|
+
# Only add sccache attributes if this is a build step
|
|
314
|
+
if re.match(r"(?:[\w+]+\sbuild$)|(?:Build\sand\srepair.*)", step["name"], re.IGNORECASE):
|
|
315
|
+
logging.debug("Adding sccache attributes for step: %s", step["name"])
|
|
316
|
+
for file_name, stats in sccache_stats.items():
|
|
317
|
+
span_attributes[f"sccache.{file_name}.hit_rate"] = stats.hit_rate
|
|
318
|
+
span_attributes[f"sccache.{file_name}.miss_rate"] = stats.miss_rate
|
|
319
|
+
span_attributes[f"sccache.{file_name}.error_rate"] = stats.error_rate
|
|
320
|
+
span_attributes[f"sccache.{file_name}.requests"] = stats.requests
|
|
321
|
+
for lang, lang_stats in stats.compilers.items():
|
|
322
|
+
span_attributes[f"sccache.{file_name}.{lang}.hit_rate"] = lang_stats.hit_rate
|
|
323
|
+
span_attributes[f"sccache.{file_name}.{lang}.miss_rate"] = lang_stats.miss_rate
|
|
324
|
+
span_attributes[f"sccache.{file_name}.{lang}.error_rate"] = lang_stats.error_rate
|
|
325
|
+
span_attributes[f"sccache.{file_name}.{lang}.requests"] = lang_stats.requests
|
|
326
|
+
|
|
327
|
+
# TODO: Ninja log files?
|
|
328
|
+
# TODO: file sizes of packages or their contents
|
|
329
|
+
|
|
330
|
+
step_span = job_tracer.start_span(
|
|
331
|
+
name=step["name"],
|
|
332
|
+
start_time=step_start,
|
|
333
|
+
attributes=span_attributes,
|
|
334
|
+
)
|
|
335
|
+
logging.debug("Step span created: %s", step_span)
|
|
336
|
+
if span_attributes:
|
|
337
|
+
logging.debug(" Step span attributes: %s", span_attributes)
|
|
338
|
+
# TODO: use step_span.record_exception(exc) to capture errors. exc is an exception object,
|
|
339
|
+
# so if we are reading from a log file, we need to read the log file into an exception object.
|
|
340
|
+
step_span.set_status(map_conclusion_to_status_code(step["conclusion"]))
|
|
341
|
+
step_span.end(step_end)
|
|
342
|
+
logging.debug("Step span ended: %s", step_span)
|
|
343
|
+
else:
|
|
344
|
+
logging.debug("Skipping step: %s, because its duration is < 1s", step["name"])
|
|
345
|
+
job_span.end(job_last_timestamp)
|
|
346
|
+
logging.debug("Job span ended: %s", job_span)
|
|
347
|
+
return last_timestamp
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def main() -> None:
|
|
351
|
+
"""Run core functionality."""
|
|
352
|
+
with Path("all_jobs.json").open() as f:
|
|
353
|
+
jobs = json.loads(f.read())
|
|
354
|
+
|
|
355
|
+
first_timestamp = date_str_to_epoch(jobs[0]["created_at"])
|
|
356
|
+
# track the latest timestamp observed and use it for any unavailable times.
|
|
357
|
+
last_timestamp = date_str_to_epoch(jobs[0]["completed_at"], 0)
|
|
358
|
+
|
|
359
|
+
attribute_folders = list(Path.cwd().glob("telemetry-artifacts/telemetry-tools-artifacts-*"))
|
|
360
|
+
logging.debug("Attribute folders: %s", attribute_folders)
|
|
361
|
+
if attribute_folders:
|
|
362
|
+
attribute_file = attribute_folders[0] / "attrs"
|
|
363
|
+
attributes = parse_attributes(attribute_file.as_posix())
|
|
364
|
+
env_vars = parse_attributes(attribute_folders[0] / "telemetry-env-vars")
|
|
365
|
+
logging.debug("Env vars parsed from first attribute folder: %s", env_vars)
|
|
366
|
+
else:
|
|
367
|
+
attributes = {}
|
|
368
|
+
try:
|
|
369
|
+
env_vars = parse_attributes(Path.cwd() / "telemetry-artifacts/telemetry-env-vars")
|
|
370
|
+
except FileNotFoundError:
|
|
371
|
+
env_vars = os.environ
|
|
372
|
+
global_attrs = {k: v for k, v in attributes.items() if k.startswith("git.")}
|
|
373
|
+
if not (service_name := os.getenv("OTEL_SERVICE_NAME", env_vars.get("OTEL_SERVICE_NAME"))):
|
|
374
|
+
logging.error("OTEL_SERVICE_NAME not found in environment or attribute files")
|
|
375
|
+
sys.exit(1)
|
|
376
|
+
if not (trace_id := os.getenv("TRACEPARENT", env_vars.get("TRACEPARENT"))):
|
|
377
|
+
logging.error("TRACEPARENT not found in environment or attribute files")
|
|
378
|
+
sys.exit(1)
|
|
379
|
+
global_attrs["service.name"] = service_name
|
|
380
|
+
trace_id = int(trace_id.split("-")[1], 16)
|
|
381
|
+
|
|
382
|
+
logging.debug("Service name: %s", global_attrs["service.name"])
|
|
383
|
+
logging.debug("Trace ID: %s", trace_id)
|
|
384
|
+
|
|
385
|
+
provider = TracerProvider(
|
|
386
|
+
resource=Resource(global_attrs),
|
|
387
|
+
id_generator=RapidsSpanIdGenerator(trace_id=trace_id, job_name=global_attrs["service.name"]),
|
|
388
|
+
)
|
|
389
|
+
provider.add_span_processor(span_processor=SpanProcessor(OTLPSpanExporter()))
|
|
390
|
+
tracer = trace.get_tracer("GitHub Actions parser", "0.0.1", tracer_provider=provider)
|
|
391
|
+
|
|
392
|
+
with tracer.start_as_current_span(
|
|
393
|
+
name="Top-level workflow root", start_time=first_timestamp, end_on_exit=False
|
|
394
|
+
) as root_span:
|
|
395
|
+
logging.debug("Root span created: %s", root_span)
|
|
396
|
+
for job in jobs:
|
|
397
|
+
last_timestamp = process_job_blob(
|
|
398
|
+
trace_id=trace_id,
|
|
399
|
+
job=job,
|
|
400
|
+
env_vars=env_vars,
|
|
401
|
+
first_timestamp=first_timestamp,
|
|
402
|
+
last_timestamp=last_timestamp,
|
|
403
|
+
)
|
|
404
|
+
root_span.end(last_timestamp)
|
|
405
|
+
logging.debug("Root span ended: %s", root_span)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
if __name__ == "__main__":
|
|
409
|
+
main()
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
dask_cuda/GIT_COMMIT,sha256=TiWUPXNqs5gL3lxRLAbL9S16XUILnjLBQ-tX9pxEkwE,41
|
|
2
|
-
dask_cuda/VERSION,sha256=mkkPLCPxib-wy79AMMpM4Bq103DbRbHiXhZFFnGa_sk,8
|
|
3
|
-
dask_cuda/__init__.py,sha256=Wbc7R0voN4vsQkb7SKuVXH0YXuXtfnAxrupxfM4lT10,1933
|
|
4
|
-
dask_cuda/_compat.py,sha256=AG2lKGAtZitDPBjHeFDKLTN_B5HKodrhZ2kHlk1Z-D0,498
|
|
5
|
-
dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
|
|
6
|
-
dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
|
|
7
|
-
dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
|
|
8
|
-
dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
|
|
9
|
-
dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
|
|
10
|
-
dask_cuda/get_device_memory_objects.py,sha256=peqXY8nAOtZpo9Pk1innP0rKySB8X4647YYqrwLYPHo,4569
|
|
11
|
-
dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
|
|
12
|
-
dask_cuda/is_device_object.py,sha256=x9klFdeQzLcug7wZMxN3GK2AS121tlDe-LQ2uznm5yo,1179
|
|
13
|
-
dask_cuda/is_spillable_object.py,sha256=8gj6QgtKcmzrpQwy8rE-pS1R8tjaJOeD-Fzr6LumjJg,1596
|
|
14
|
-
dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
|
|
15
|
-
dask_cuda/plugins.py,sha256=A2aT8HA6q_JhIEx6-XKcpbWEbl7aTg1GNoZQH8_vh00,7197
|
|
16
|
-
dask_cuda/proxify_device_objects.py,sha256=jWljqWddOT8NksyNKOh_9nFoV70_3P6s8P91oXdCfEk,8225
|
|
17
|
-
dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
|
|
18
|
-
dask_cuda/proxy_object.py,sha256=mrCCGwS-mltcY8oddJEXnPL6rV2dBpGgsFypBVbxRsA,30150
|
|
19
|
-
dask_cuda/utils.py,sha256=wJ-oTj6mJHojz7JEMTh_QFnvz5igj4ULCbpI0r_XqMY,26273
|
|
20
|
-
dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
|
|
21
|
-
dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
|
|
22
|
-
dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
|
|
24
|
-
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
|
|
25
|
-
dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
|
|
26
|
-
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=Ied7r_fdGuOJyikBVVkMaIX3niJIlF39C1Xk6IVwgo4,8240
|
|
27
|
-
dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
|
|
28
|
-
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
|
|
29
|
-
dask_cuda/benchmarks/read_parquet.py,sha256=spKu6RLWYngPZq9hnaoU0mz7INIaJnErfqjBG2wH8Zc,7614
|
|
30
|
-
dask_cuda/benchmarks/utils.py,sha256=_x0XXL_F3W-fExpuQfTBwuK3WnrVuXQQepbnvjUqS9o,30075
|
|
31
|
-
dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
-
dask_cuda/explicit_comms/comms.py,sha256=uq-XPOH38dFcYS_13Vomj2ER6zxQz7DPeSM000mOVmY,11541
|
|
33
|
-
dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=yG9_7BuXSswiZjFfs6kVdHBA2-mlSBKN1i6phgNTJMY,23815
|
|
35
|
-
dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
|
|
36
|
-
dask_cuda/tests/test_dask_cuda_worker.py,sha256=yG_RcOTF6vt-LBBVrjEQ_2vRZvVfFgFDMedPMSzkFws,20657
|
|
37
|
-
dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
|
|
38
|
-
dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
|
|
39
|
-
dask_cuda/tests/test_explicit_comms.py,sha256=hrNrTKP-pBSohyUqn1hnXKkUttGwRLeYY2bniEXM1FM,19944
|
|
40
|
-
dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
|
|
41
|
-
dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
|
|
42
|
-
dask_cuda/tests/test_initialize.py,sha256=4Ovv_ClokKibPX6wfuaoQgN4eKCohagRFoE3s3D7Huk,8119
|
|
43
|
-
dask_cuda/tests/test_local_cuda_cluster.py,sha256=AiVUx3PkuIeobw1QXdr3mvom_l8DFVvRIvMQE91zAag,19811
|
|
44
|
-
dask_cuda/tests/test_proxify_host_file.py,sha256=pFORynzqGpe9mz_rPwTVW6O4VoY2E0EmjsT7Ux_c920,19333
|
|
45
|
-
dask_cuda/tests/test_proxy.py,sha256=U9uE-QesTwquNKzTReEKiYgoRgS_pfGW-A-gJNppHyg,23817
|
|
46
|
-
dask_cuda/tests/test_spill.py,sha256=A4-pJWCfShUaEGKbUdeIpcVL8zCyyPfAjdlJ0As3LDQ,15462
|
|
47
|
-
dask_cuda/tests/test_utils.py,sha256=PQI_oTONWnKSKlkQfEeK-vlmYa0-cPpDjDEbm74cNCE,9104
|
|
48
|
-
dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
|
|
49
|
-
dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
|
|
50
|
-
dask_cuda-25.6.0.dist-info/licenses/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
|
|
51
|
-
examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
|
|
52
|
-
examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
|
|
53
|
-
dask_cuda-25.6.0.dist-info/METADATA,sha256=Eolq3LbkRkU0ukh5enuHzLIK-YYo-Q_PX2bobo-rT1E,2345
|
|
54
|
-
dask_cuda-25.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
55
|
-
dask_cuda-25.6.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
|
|
56
|
-
dask_cuda-25.6.0.dist-info/top_level.txt,sha256=S_m57qClWFTZ9rBMNTPikpBiy9vTn6_4pjGuInt0XE8,28
|
|
57
|
-
dask_cuda-25.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|