gpufl 0.0.1__tar.gz → 0.1.0.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpufl-0.1.0.dev7/.clang-format +5 -0
- gpufl-0.1.0.dev7/.github/pull_request_template.md +11 -0
- gpufl-0.1.0.dev7/.github/workflows/build.yml +119 -0
- gpufl-0.1.0.dev7/.github/workflows/release.yml +193 -0
- {gpufl-0.0.1 → gpufl-0.1.0.dev7}/.gitignore +80 -80
- gpufl-0.1.0.dev7/CMakeLists.txt +351 -0
- gpufl-0.1.0.dev7/CONTRIBUTING.md +12 -0
- {gpufl-0.0.1 → gpufl-0.1.0.dev7}/LICENSE +201 -201
- gpufl-0.1.0.dev7/PKG-INFO +192 -0
- gpufl-0.1.0.dev7/README.md +167 -0
- gpufl-0.1.0.dev7/build.sh +1 -0
- gpufl-0.1.0.dev7/example/cuda/CMakeLists.txt +241 -0
- gpufl-0.1.0.dev7/example/cuda/block_style_example.cu +99 -0
- gpufl-0.1.0.dev7/example/cuda/check_conflict.cu +81 -0
- gpufl-0.1.0.dev7/example/cuda/check_device.cu +25 -0
- gpufl-0.1.0.dev7/example/cuda/cupti_basic.cu +129 -0
- gpufl-0.1.0.dev7/example/cuda/cupti_pc_sampling.cu +263 -0
- gpufl-0.1.0.dev7/example/cuda/list_sass_metrics.cu +46 -0
- gpufl-0.1.0.dev7/example/cuda/occupancy_demo.cu +155 -0
- gpufl-0.1.0.dev7/example/cuda/system_monitor.cu +58 -0
- gpufl-0.1.0.dev7/example/cuda/test_occupancy.cu +62 -0
- gpufl-0.1.0.dev7/example/cuda/test_sass_cubin.cu +164 -0
- gpufl-0.1.0.dev7/example/cuda/test_sass_metrics.cu +85 -0
- gpufl-0.1.0.dev7/example/cuda/vector_add_benchmark.cu +103 -0
- {gpufl-0.0.1 → gpufl-0.1.0.dev7}/example/python/01_basic.py +25 -25
- {gpufl-0.0.1 → gpufl-0.1.0.dev7}/example/python/02_numba_cuda.py +76 -76
- gpufl-0.1.0.dev7/example/python/03_pytorch_benchmark.py +75 -0
- gpufl-0.1.0.dev7/example/python/analyzer/01_analyzer_sample.py +14 -0
- gpufl-0.1.0.dev7/example/python/requirements.txt +7 -0
- gpufl-0.1.0.dev7/example/python/viz/01_plot_memory_timeline.py +9 -0
- gpufl-0.1.0.dev7/example/python/viz/02_plot_stress_timeline.py +9 -0
- gpufl-0.1.0.dev7/images/Screenshot1.png +0 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.cpp +10 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.hpp +18 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/host_collector.hpp +150 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cuda_collector.cpp +43 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.cpp +316 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.hpp +116 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_common.hpp +157 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_utils.cpp +152 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_utils.hpp +65 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +395 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +66 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +73 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +221 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +44 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +327 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +26 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +237 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +26 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/nvml_collector.cpp +188 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/nvml_collector.hpp +38 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/resource_handler.cpp +62 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/resource_handler.hpp +25 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +222 -0
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +42 -0
- gpufl-0.1.0.dev7/include/gpufl/core/common.cpp +45 -0
- gpufl-0.1.0.dev7/include/gpufl/core/common.hpp +109 -0
- gpufl-0.1.0.dev7/include/gpufl/core/debug_logger.cpp +9 -0
- gpufl-0.1.0.dev7/include/gpufl/core/debug_logger.hpp +43 -0
- gpufl-0.1.0.dev7/include/gpufl/core/events.hpp +274 -0
- gpufl-0.1.0.dev7/include/gpufl/core/gpufl.cpp +398 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/file_compressor.cpp +44 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/file_compressor.hpp +18 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/log_rotator.cpp +65 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/log_rotator.hpp +32 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.cpp +152 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.hpp +70 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/kernel_event_model.cpp +51 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/kernel_event_model.hpp +16 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.cpp +34 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.hpp +24 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/model_utils.hpp +94 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/perf_metric_model.cpp +33 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/perf_metric_model.hpp +16 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/profile_sample_model.cpp +40 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/profile_sample_model.hpp +16 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/scope_event_model.cpp +43 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/scope_event_model.hpp +24 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/serializable.hpp +15 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/system_event_model.cpp +51 -0
- gpufl-0.1.0.dev7/include/gpufl/core/model/system_event_model.hpp +32 -0
- gpufl-0.1.0.dev7/include/gpufl/core/monitor.hpp +95 -0
- gpufl-0.1.0.dev7/include/gpufl/core/monitor_backend.hpp +53 -0
- gpufl-0.1.0.dev7/include/gpufl/core/ring_buffer.hpp +75 -0
- gpufl-0.1.0.dev7/include/gpufl/core/runtime.cpp +6 -0
- gpufl-0.1.0.dev7/include/gpufl/core/runtime.hpp +30 -0
- gpufl-0.1.0.dev7/include/gpufl/core/sampler.cpp +74 -0
- gpufl-0.1.0.dev7/include/gpufl/core/sampler.hpp +51 -0
- gpufl-0.1.0.dev7/include/gpufl/core/scope_registry.cpp +10 -0
- gpufl-0.1.0.dev7/include/gpufl/core/scope_registry.hpp +8 -0
- gpufl-0.1.0.dev7/include/gpufl/core/stack_registry.hpp +47 -0
- gpufl-0.1.0.dev7/include/gpufl/core/stack_trace.cpp +112 -0
- gpufl-0.1.0.dev7/include/gpufl/core/stack_trace.hpp +12 -0
- gpufl-0.1.0.dev7/include/gpufl/core/trace_type.hpp +13 -0
- gpufl-0.1.0.dev7/include/gpufl/cuda/monitor.cpp +405 -0
- gpufl-0.1.0.dev7/include/gpufl/gpufl.hpp +83 -0
- gpufl-0.1.0.dev7/include/gpufl.hpp +3 -0
- gpufl-0.1.0.dev7/pyproject.toml +63 -0
- gpufl-0.1.0.dev7/python/bindings.cpp +129 -0
- gpufl-0.1.0.dev7/python/gpufl/.gitignore +159 -0
- gpufl-0.1.0.dev7/python/gpufl/__init__.py +89 -0
- gpufl-0.1.0.dev7/python/gpufl/analyzer/__init__.py +1 -0
- gpufl-0.1.0.dev7/python/gpufl/analyzer/analyzer.py +721 -0
- gpufl-0.1.0.dev7/python/gpufl/utils.py +19 -0
- gpufl-0.1.0.dev7/python/gpufl/viz/__init__.py +27 -0
- gpufl-0.1.0.dev7/python/gpufl/viz/reader.py +48 -0
- gpufl-0.1.0.dev7/python/gpufl/viz/timeline.py +380 -0
- gpufl-0.1.0.dev7/python/gpufl/viz/visualizer.py +194 -0
- {gpufl-0.0.1 → gpufl-0.1.0.dev7}/schema/ndjson.schema.json +133 -133
- gpufl-0.1.0.dev7/tests/CMakeLists.txt +134 -0
- gpufl-0.1.0.dev7/tests/backends/nvidia/test_cuda_collector.cpp +34 -0
- gpufl-0.1.0.dev7/tests/backends/nvidia/test_nvidia_backend.cpp +127 -0
- gpufl-0.1.0.dev7/tests/backends/nvidia/test_nvml_collector.cpp +54 -0
- gpufl-0.1.0.dev7/tests/common/test_utils.hpp +31 -0
- gpufl-0.1.0.dev7/tests/core/test_analyzer.cpp +15 -0
- gpufl-0.1.0.dev7/tests/core/test_monitor.cpp +77 -0
- gpufl-0.1.0.dev7/tests/main_test_runner.cpp +6 -0
- gpufl-0.1.0.dev7/tests/python/conftest.py +69 -0
- gpufl-0.1.0.dev7/tests/python/test_analyzer.py +54 -0
- gpufl-0.1.0.dev7/tests/verify_pipeline.py +99 -0
- gpufl-0.0.1/.github/workflows/build.yml +0 -59
- gpufl-0.0.1/CMakeLists.txt +0 -74
- gpufl-0.0.1/PKG-INFO +0 -362
- gpufl-0.0.1/README.md +0 -343
- gpufl-0.0.1/example/cuda/CMakeLists.txt +0 -63
- gpufl-0.0.1/example/cuda/block_style_example.cu +0 -159
- gpufl-0.0.1/example/cuda/system_monitor.cu +0 -21
- gpufl-0.0.1/example/python/03_kernel.launch.py +0 -34
- gpufl-0.0.1/example/python/requirements.txt +0 -2
- gpufl-0.0.1/include/gpufl/backends/cuda.hpp +0 -259
- gpufl-0.0.1/include/gpufl/core/common.hpp +0 -201
- gpufl-0.0.1/include/gpufl/core/monitor.hpp +0 -261
- gpufl-0.0.1/include/gpufl/gpufl.hpp +0 -26
- gpufl-0.0.1/pyproject.toml +0 -35
- gpufl-0.0.1/python/bindings.cpp +0 -67
- gpufl-0.0.1/python/gpufl/__init__.py +0 -32
- gpufl-0.0.1/python/gpufl/utils.py +0 -35
- gpufl-0.0.1/tests/verify_pipeline.py +0 -88
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
## Description
|
|
2
|
+
## Type of Change
|
|
3
|
+
- [ ] Bug fix
|
|
4
|
+
- [ ] New feature
|
|
5
|
+
- [ ] Documentation update
|
|
6
|
+
|
|
7
|
+
## Testing
|
|
8
|
+
## Checklist
|
|
9
|
+
- [ ] My code follows the style guidelines of this project
|
|
10
|
+
- [ ] I have performed a self-review of my own code
|
|
11
|
+
- [ ] I have commented my code, particularly in hard-to-understand areas
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
name: Build GPUFl Client
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ "main" ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ "main" ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
name: Build on ${{ matrix.os }}
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-22.04, windows-latest]
|
|
16
|
+
python-version: ["3.12", "3.13"]
|
|
17
|
+
|
|
18
|
+
env:
|
|
19
|
+
CMAKE_ARGS: >-
|
|
20
|
+
-DGPUFL_ENABLE_NVIDIA=ON
|
|
21
|
+
-DGPUFL_ENABLE_AMD=OFF
|
|
22
|
+
-DBUILD_TESTING=OFF
|
|
23
|
+
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
|
|
27
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
28
|
+
uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: ${{ matrix.python-version }}
|
|
31
|
+
|
|
32
|
+
# Install CUDA Toolkit (provides CUDA_PATH)
|
|
33
|
+
- name: Install CUDA Toolkit
|
|
34
|
+
uses: Jimver/cuda-toolkit@v0.2.30
|
|
35
|
+
id: cuda-toolkit
|
|
36
|
+
with:
|
|
37
|
+
cuda: '13.1.0'
|
|
38
|
+
method: 'network'
|
|
39
|
+
use-github-cache: false
|
|
40
|
+
|
|
41
|
+
# Make sure CMake can find CUDA headers/libraries during the *pip build*.
|
|
42
|
+
# Jimver/cuda-toolkit sets CUDA_PATH; we map it to common vars CMake respects.
|
|
43
|
+
- name: Export CUDA environment for CMake
|
|
44
|
+
shell: bash
|
|
45
|
+
run: |
|
|
46
|
+
echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
|
|
47
|
+
echo "CUDAToolkit_ROOT=${CUDA_PATH}" >> $GITHUB_ENV
|
|
48
|
+
if [ "${{ runner.os }}" == "Windows" ]; then
|
|
49
|
+
echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
|
|
50
|
+
echo "${CUDA_PATH}/extras/CUPTI/lib64" >> $GITHUB_PATH
|
|
51
|
+
echo "C:/Program Files/NVIDIA Corporation/NVSMI" >> $GITHUB_PATH
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
- name: Install system dependencies
|
|
55
|
+
if: runner.os == 'Linux'
|
|
56
|
+
run: |
|
|
57
|
+
sudo apt-get update
|
|
58
|
+
sudo apt-get install -y libcurl4-openssl-dev
|
|
59
|
+
|
|
60
|
+
- name: Install python dependencies
|
|
61
|
+
run: |
|
|
62
|
+
python -m pip install --upgrade pip
|
|
63
|
+
pip install scikit-build-core pybind11 cmake ninja
|
|
64
|
+
|
|
65
|
+
- name: Build and Install
|
|
66
|
+
run: |
|
|
67
|
+
pip install .[viz,analyzer] -v
|
|
68
|
+
|
|
69
|
+
- name: Run C++ Unit Tests
|
|
70
|
+
# Skip C++ tests on Windows as they require actual NVIDIA GPUs to run (CUDA/CUPTI initialization)
|
|
71
|
+
if: runner.os != 'Windows'
|
|
72
|
+
shell: bash
|
|
73
|
+
run: |
|
|
74
|
+
# 1. Prepare a local writable directory for CUDA stubs
|
|
75
|
+
# We cannot write to the system CUDA directory (Permission denied).
|
|
76
|
+
LOCAL_STUBS_DIR="${GITHUB_WORKSPACE}/local_cuda_stubs"
|
|
77
|
+
mkdir -p "${LOCAL_STUBS_DIR}"
|
|
78
|
+
|
|
79
|
+
# 2. Gather relevant library directories for the CUDA Toolkit
|
|
80
|
+
STUBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib/stubs"
|
|
81
|
+
LIBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib"
|
|
82
|
+
|
|
83
|
+
# 3. Create versioned symlinks in the LOCAL directory
|
|
84
|
+
# Many binaries expect .so.1 which is only created by the driver installer.
|
|
85
|
+
for lib in libcuda libnvidia-ml libnvrtc; do
|
|
86
|
+
if [ -f "${STUBS_DIR}/${lib}.so" ]; then
|
|
87
|
+
# Symlink the original stub to our local dir
|
|
88
|
+
ln -sf "${STUBS_DIR}/${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so"
|
|
89
|
+
# Create the versioned symlink in our local dir
|
|
90
|
+
ln -sf "${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so.1"
|
|
91
|
+
fi
|
|
92
|
+
done
|
|
93
|
+
|
|
94
|
+
# 4. Add local stubs and toolkit libs to LD_LIBRARY_PATH
|
|
95
|
+
export LD_LIBRARY_PATH="${LOCAL_STUBS_DIR}:${LIBS_DIR}:${LD_LIBRARY_PATH}"
|
|
96
|
+
|
|
97
|
+
# Debug: check what libraries are found
|
|
98
|
+
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
|
|
99
|
+
ls -l "${LOCAL_STUBS_DIR}" || true
|
|
100
|
+
|
|
101
|
+
cmake -B build_tests -S . \
|
|
102
|
+
-DGPUFL_ENABLE_NVIDIA=ON \
|
|
103
|
+
-DBUILD_PYTHON=OFF \
|
|
104
|
+
-DBUILD_TESTING=ON
|
|
105
|
+
|
|
106
|
+
cmake --build build_tests --target gpufl_tests
|
|
107
|
+
|
|
108
|
+
ctest --test-dir build_tests --output-on-failure --verbose --timeout 60
|
|
109
|
+
|
|
110
|
+
- name: Run Python Unit Tests
|
|
111
|
+
shell: bash
|
|
112
|
+
run: |
|
|
113
|
+
python -m pip install pytest
|
|
114
|
+
export PYTHONPATH=$PYTHONPATH:$(pwd)/python
|
|
115
|
+
python -m pytest tests/python
|
|
116
|
+
|
|
117
|
+
- name: Verify Logging Pipeline
|
|
118
|
+
run: |
|
|
119
|
+
python -u tests/verify_pipeline.py
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
name: Build and Release Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build_wheels:
|
|
11
|
+
name: Build wheels on ${{ matrix.os }}
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-22.04, windows-latest]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set package version from tag
|
|
21
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
22
|
+
shell: python
|
|
23
|
+
run: |
|
|
24
|
+
import os
|
|
25
|
+
import re
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
ref_name = os.environ.get("GITHUB_REF_NAME", "")
|
|
29
|
+
if not ref_name.startswith("v"):
|
|
30
|
+
raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
|
|
31
|
+
version = ref_name[1:]
|
|
32
|
+
print(f"Using version from tag: {version}")
|
|
33
|
+
|
|
34
|
+
pyproject = Path("pyproject.toml")
|
|
35
|
+
text = pyproject.read_text(encoding="utf-8")
|
|
36
|
+
text_new, n = re.subn(
|
|
37
|
+
r'(?m)^version\s*=\s*"[^\"]+"$',
|
|
38
|
+
f'version = "{version}"',
|
|
39
|
+
text,
|
|
40
|
+
count=1,
|
|
41
|
+
)
|
|
42
|
+
if n != 1:
|
|
43
|
+
raise SystemExit("Failed to update [project].version in pyproject.toml")
|
|
44
|
+
pyproject.write_text(text_new, encoding="utf-8")
|
|
45
|
+
|
|
46
|
+
init_py = Path("python/gpufl/__init__.py")
|
|
47
|
+
if init_py.exists():
|
|
48
|
+
init_text = init_py.read_text(encoding="utf-8")
|
|
49
|
+
init_new, _ = re.subn(
|
|
50
|
+
r'(?m)^__version__\s*=\s*"[^\"]+"$',
|
|
51
|
+
f'__version__ = "{version}"',
|
|
52
|
+
init_text,
|
|
53
|
+
)
|
|
54
|
+
init_py.write_text(init_new, encoding="utf-8")
|
|
55
|
+
|
|
56
|
+
- name: Cache cibuildwheel downloads
|
|
57
|
+
uses: actions/cache@v4
|
|
58
|
+
with:
|
|
59
|
+
path: |
|
|
60
|
+
~/.cache/cibuildwheel
|
|
61
|
+
~/AppData/Local/pypa/cibuildwheel/Cache
|
|
62
|
+
key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
|
|
63
|
+
restore-keys: |
|
|
64
|
+
cibw-${{ runner.os }}-
|
|
65
|
+
|
|
66
|
+
- name: Install CUDA (Windows)
|
|
67
|
+
if: runner.os == 'Windows'
|
|
68
|
+
uses: Jimver/cuda-toolkit@v0.2.30
|
|
69
|
+
with:
|
|
70
|
+
cuda: '13.1.0'
|
|
71
|
+
method: 'network'
|
|
72
|
+
|
|
73
|
+
- name: Prefetch virtualenv.pyz (Windows)
|
|
74
|
+
if: runner.os == 'Windows'
|
|
75
|
+
shell: pwsh
|
|
76
|
+
run: |
|
|
77
|
+
$version = "20.27.1"
|
|
78
|
+
$cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
|
|
79
|
+
New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
|
|
80
|
+
$dest = Join-Path $cacheDir "virtualenv-$version.pyz"
|
|
81
|
+
if (Test-Path $dest) {
|
|
82
|
+
Write-Host "virtualenv.pyz already cached: $dest"
|
|
83
|
+
exit 0
|
|
84
|
+
}
|
|
85
|
+
$urls = @(
|
|
86
|
+
"https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
|
|
87
|
+
"https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
|
|
88
|
+
"https://bootstrap.pypa.io/virtualenv.pyz"
|
|
89
|
+
)
|
|
90
|
+
$max = 6
|
|
91
|
+
$ok = $false
|
|
92
|
+
foreach ($url in $urls) {
|
|
93
|
+
for ($i = 1; $i -le $max; $i++) {
|
|
94
|
+
try {
|
|
95
|
+
Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
|
|
96
|
+
Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
|
|
97
|
+
if ((Get-Item $dest).Length -gt 0) {
|
|
98
|
+
Write-Host "Downloaded: $dest"
|
|
99
|
+
$ok = $true
|
|
100
|
+
break
|
|
101
|
+
}
|
|
102
|
+
} catch {
|
|
103
|
+
if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
|
|
104
|
+
if ($i -eq $max) { break }
|
|
105
|
+
Start-Sleep -Seconds (5 * $i)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if ($ok) { break }
|
|
109
|
+
}
|
|
110
|
+
if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
|
|
111
|
+
|
|
112
|
+
- name: Build wheels
|
|
113
|
+
uses: pypa/cibuildwheel@v2.22.0
|
|
114
|
+
env:
|
|
115
|
+
CIBW_VIRTUALENV_VERSION: "20.27.1"
|
|
116
|
+
CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
|
|
117
|
+
CIBW_BEFORE_ALL_LINUX: >-
|
|
118
|
+
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
|
|
119
|
+
dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
|
|
120
|
+
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
|
|
121
|
+
CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
|
|
122
|
+
CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
|
|
123
|
+
|
|
124
|
+
- uses: actions/upload-artifact@v4
|
|
125
|
+
with:
|
|
126
|
+
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
|
|
127
|
+
path: ./wheelhouse/*.whl
|
|
128
|
+
|
|
129
|
+
build_sdist:
|
|
130
|
+
name: Build source distribution
|
|
131
|
+
runs-on: ubuntu-latest
|
|
132
|
+
steps:
|
|
133
|
+
- uses: actions/checkout@v4
|
|
134
|
+
|
|
135
|
+
- name: Set package version from tag
|
|
136
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
137
|
+
shell: python
|
|
138
|
+
run: |
|
|
139
|
+
import os
|
|
140
|
+
import re
|
|
141
|
+
from pathlib import Path
|
|
142
|
+
|
|
143
|
+
ref_name = os.environ.get("GITHUB_REF_NAME", "")
|
|
144
|
+
if not ref_name.startswith("v"):
|
|
145
|
+
raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
|
|
146
|
+
version = ref_name[1:]
|
|
147
|
+
print(f"Using version from tag: {version}")
|
|
148
|
+
|
|
149
|
+
pyproject = Path("pyproject.toml")
|
|
150
|
+
text = pyproject.read_text(encoding="utf-8")
|
|
151
|
+
text_new, n = re.subn(
|
|
152
|
+
r'(?m)^version\s*=\s*"[^\"]+"$',
|
|
153
|
+
f'version = "{version}"',
|
|
154
|
+
text,
|
|
155
|
+
count=1,
|
|
156
|
+
)
|
|
157
|
+
if n != 1:
|
|
158
|
+
raise SystemExit("Failed to update [project].version in pyproject.toml")
|
|
159
|
+
pyproject.write_text(text_new, encoding="utf-8")
|
|
160
|
+
|
|
161
|
+
init_py = Path("python/gpufl/__init__.py")
|
|
162
|
+
if init_py.exists():
|
|
163
|
+
init_text = init_py.read_text(encoding="utf-8")
|
|
164
|
+
init_new, _ = re.subn(
|
|
165
|
+
r'(?m)^__version__\s*=\s*"[^\"]+"$',
|
|
166
|
+
f'__version__ = "{version}"',
|
|
167
|
+
init_text,
|
|
168
|
+
)
|
|
169
|
+
init_py.write_text(init_new, encoding="utf-8")
|
|
170
|
+
|
|
171
|
+
- name: Build sdist
|
|
172
|
+
run: pipx run build --sdist
|
|
173
|
+
|
|
174
|
+
- uses: actions/upload-artifact@v4
|
|
175
|
+
with:
|
|
176
|
+
name: cibw-sdist
|
|
177
|
+
path: dist/*.tar.gz
|
|
178
|
+
|
|
179
|
+
upload_pypi:
|
|
180
|
+
needs: [build_wheels, build_sdist]
|
|
181
|
+
runs-on: ubuntu-latest
|
|
182
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
|
|
183
|
+
steps:
|
|
184
|
+
- uses: actions/download-artifact@v4
|
|
185
|
+
with:
|
|
186
|
+
pattern: cibw-*
|
|
187
|
+
path: dist
|
|
188
|
+
merge-multiple: true
|
|
189
|
+
|
|
190
|
+
- name: Publish to PyPI
|
|
191
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
192
|
+
with:
|
|
193
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
@@ -1,80 +1,80 @@
|
|
|
1
|
-
### idea
|
|
2
|
-
.idea/**
|
|
3
|
-
build/
|
|
4
|
-
cmake-build-*/
|
|
5
|
-
cmake/
|
|
6
|
-
|
|
7
|
-
### C++ template
|
|
8
|
-
# Prerequisites
|
|
9
|
-
*.d
|
|
10
|
-
|
|
11
|
-
# Compiled Object files
|
|
12
|
-
*.slo
|
|
13
|
-
*.lo
|
|
14
|
-
*.o
|
|
15
|
-
*.obj
|
|
16
|
-
|
|
17
|
-
# Precompiled Headers
|
|
18
|
-
*.gch
|
|
19
|
-
*.pch
|
|
20
|
-
|
|
21
|
-
# Compiled Dynamic libraries
|
|
22
|
-
*.so
|
|
23
|
-
*.dylib
|
|
24
|
-
*.dll
|
|
25
|
-
|
|
26
|
-
# Fortran module files
|
|
27
|
-
*.mod
|
|
28
|
-
*.smod
|
|
29
|
-
|
|
30
|
-
# Compiled Static libraries
|
|
31
|
-
*.lai
|
|
32
|
-
*.la
|
|
33
|
-
*.a
|
|
34
|
-
*.lib
|
|
35
|
-
|
|
36
|
-
# Executables
|
|
37
|
-
*.exe
|
|
38
|
-
*.out
|
|
39
|
-
*.app
|
|
40
|
-
|
|
41
|
-
### C template
|
|
42
|
-
# Prerequisites
|
|
43
|
-
*.d
|
|
44
|
-
|
|
45
|
-
# Object files
|
|
46
|
-
*.o
|
|
47
|
-
*.ko
|
|
48
|
-
*.obj
|
|
49
|
-
*.elf
|
|
50
|
-
|
|
51
|
-
# Linker output
|
|
52
|
-
*.ilk
|
|
53
|
-
*.map
|
|
54
|
-
*.exp
|
|
55
|
-
|
|
56
|
-
# Precompiled Headers
|
|
57
|
-
*.gch
|
|
58
|
-
*.pch
|
|
59
|
-
|
|
60
|
-
# Libraries
|
|
61
|
-
*.lib
|
|
62
|
-
*.a
|
|
63
|
-
*.la
|
|
64
|
-
*.lo
|
|
65
|
-
|
|
66
|
-
# Shared objects (inc. Windows DLLs)
|
|
67
|
-
*.dll
|
|
68
|
-
*.so
|
|
69
|
-
*.so.*
|
|
70
|
-
*.dylib
|
|
71
|
-
|
|
72
|
-
# Executables
|
|
73
|
-
*.exe
|
|
74
|
-
*.out
|
|
75
|
-
*.app
|
|
76
|
-
*.i*86
|
|
77
|
-
*.x86_64
|
|
78
|
-
*.hex
|
|
79
|
-
|
|
80
|
-
|
|
1
|
+
### idea
|
|
2
|
+
.idea/**
|
|
3
|
+
build/
|
|
4
|
+
cmake-build-*/
|
|
5
|
+
cmake/
|
|
6
|
+
|
|
7
|
+
### C++ template
|
|
8
|
+
# Prerequisites
|
|
9
|
+
*.d
|
|
10
|
+
|
|
11
|
+
# Compiled Object files
|
|
12
|
+
*.slo
|
|
13
|
+
*.lo
|
|
14
|
+
*.o
|
|
15
|
+
*.obj
|
|
16
|
+
|
|
17
|
+
# Precompiled Headers
|
|
18
|
+
*.gch
|
|
19
|
+
*.pch
|
|
20
|
+
|
|
21
|
+
# Compiled Dynamic libraries
|
|
22
|
+
*.so
|
|
23
|
+
*.dylib
|
|
24
|
+
*.dll
|
|
25
|
+
|
|
26
|
+
# Fortran module files
|
|
27
|
+
*.mod
|
|
28
|
+
*.smod
|
|
29
|
+
|
|
30
|
+
# Compiled Static libraries
|
|
31
|
+
*.lai
|
|
32
|
+
*.la
|
|
33
|
+
*.a
|
|
34
|
+
*.lib
|
|
35
|
+
|
|
36
|
+
# Executables
|
|
37
|
+
*.exe
|
|
38
|
+
*.out
|
|
39
|
+
*.app
|
|
40
|
+
|
|
41
|
+
### C template
|
|
42
|
+
# Prerequisites
|
|
43
|
+
*.d
|
|
44
|
+
|
|
45
|
+
# Object files
|
|
46
|
+
*.o
|
|
47
|
+
*.ko
|
|
48
|
+
*.obj
|
|
49
|
+
*.elf
|
|
50
|
+
|
|
51
|
+
# Linker output
|
|
52
|
+
*.ilk
|
|
53
|
+
*.map
|
|
54
|
+
*.exp
|
|
55
|
+
|
|
56
|
+
# Precompiled Headers
|
|
57
|
+
*.gch
|
|
58
|
+
*.pch
|
|
59
|
+
|
|
60
|
+
# Libraries
|
|
61
|
+
*.lib
|
|
62
|
+
*.a
|
|
63
|
+
*.la
|
|
64
|
+
*.lo
|
|
65
|
+
|
|
66
|
+
# Shared objects (inc. Windows DLLs)
|
|
67
|
+
*.dll
|
|
68
|
+
*.so
|
|
69
|
+
*.so.*
|
|
70
|
+
*.dylib
|
|
71
|
+
|
|
72
|
+
# Executables
|
|
73
|
+
*.exe
|
|
74
|
+
*.out
|
|
75
|
+
*.app
|
|
76
|
+
*.i*86
|
|
77
|
+
*.x86_64
|
|
78
|
+
*.hex
|
|
79
|
+
|
|
80
|
+
*.log
|