gpufl 0.0.1__tar.gz → 0.1.0.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. gpufl-0.1.0.dev7/.clang-format +5 -0
  2. gpufl-0.1.0.dev7/.github/pull_request_template.md +11 -0
  3. gpufl-0.1.0.dev7/.github/workflows/build.yml +119 -0
  4. gpufl-0.1.0.dev7/.github/workflows/release.yml +193 -0
  5. {gpufl-0.0.1 → gpufl-0.1.0.dev7}/.gitignore +80 -80
  6. gpufl-0.1.0.dev7/CMakeLists.txt +351 -0
  7. gpufl-0.1.0.dev7/CONTRIBUTING.md +12 -0
  8. {gpufl-0.0.1 → gpufl-0.1.0.dev7}/LICENSE +201 -201
  9. gpufl-0.1.0.dev7/PKG-INFO +192 -0
  10. gpufl-0.1.0.dev7/README.md +167 -0
  11. gpufl-0.1.0.dev7/build.sh +1 -0
  12. gpufl-0.1.0.dev7/example/cuda/CMakeLists.txt +241 -0
  13. gpufl-0.1.0.dev7/example/cuda/block_style_example.cu +99 -0
  14. gpufl-0.1.0.dev7/example/cuda/check_conflict.cu +81 -0
  15. gpufl-0.1.0.dev7/example/cuda/check_device.cu +25 -0
  16. gpufl-0.1.0.dev7/example/cuda/cupti_basic.cu +129 -0
  17. gpufl-0.1.0.dev7/example/cuda/cupti_pc_sampling.cu +263 -0
  18. gpufl-0.1.0.dev7/example/cuda/list_sass_metrics.cu +46 -0
  19. gpufl-0.1.0.dev7/example/cuda/occupancy_demo.cu +155 -0
  20. gpufl-0.1.0.dev7/example/cuda/system_monitor.cu +58 -0
  21. gpufl-0.1.0.dev7/example/cuda/test_occupancy.cu +62 -0
  22. gpufl-0.1.0.dev7/example/cuda/test_sass_cubin.cu +164 -0
  23. gpufl-0.1.0.dev7/example/cuda/test_sass_metrics.cu +85 -0
  24. gpufl-0.1.0.dev7/example/cuda/vector_add_benchmark.cu +103 -0
  25. {gpufl-0.0.1 → gpufl-0.1.0.dev7}/example/python/01_basic.py +25 -25
  26. {gpufl-0.0.1 → gpufl-0.1.0.dev7}/example/python/02_numba_cuda.py +76 -76
  27. gpufl-0.1.0.dev7/example/python/03_pytorch_benchmark.py +75 -0
  28. gpufl-0.1.0.dev7/example/python/analyzer/01_analyzer_sample.py +14 -0
  29. gpufl-0.1.0.dev7/example/python/requirements.txt +7 -0
  30. gpufl-0.1.0.dev7/example/python/viz/01_plot_memory_timeline.py +9 -0
  31. gpufl-0.1.0.dev7/example/python/viz/02_plot_stress_timeline.py +9 -0
  32. gpufl-0.1.0.dev7/images/Screenshot1.png +0 -0
  33. gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.cpp +10 -0
  34. gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.hpp +18 -0
  35. gpufl-0.1.0.dev7/include/gpufl/backends/host_collector.hpp +150 -0
  36. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cuda_collector.cpp +43 -0
  37. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
  38. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.cpp +316 -0
  39. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.hpp +116 -0
  40. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_common.hpp +157 -0
  41. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_utils.cpp +152 -0
  42. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_utils.hpp +65 -0
  43. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +395 -0
  44. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +66 -0
  45. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +73 -0
  46. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
  47. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
  48. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +221 -0
  49. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +44 -0
  50. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +327 -0
  51. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +26 -0
  52. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +237 -0
  53. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +26 -0
  54. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/nvml_collector.cpp +188 -0
  55. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/nvml_collector.hpp +38 -0
  56. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/resource_handler.cpp +62 -0
  57. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/resource_handler.hpp +25 -0
  58. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +222 -0
  59. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +42 -0
  60. gpufl-0.1.0.dev7/include/gpufl/core/common.cpp +45 -0
  61. gpufl-0.1.0.dev7/include/gpufl/core/common.hpp +109 -0
  62. gpufl-0.1.0.dev7/include/gpufl/core/debug_logger.cpp +9 -0
  63. gpufl-0.1.0.dev7/include/gpufl/core/debug_logger.hpp +43 -0
  64. gpufl-0.1.0.dev7/include/gpufl/core/events.hpp +274 -0
  65. gpufl-0.1.0.dev7/include/gpufl/core/gpufl.cpp +398 -0
  66. gpufl-0.1.0.dev7/include/gpufl/core/logger/file_compressor.cpp +44 -0
  67. gpufl-0.1.0.dev7/include/gpufl/core/logger/file_compressor.hpp +18 -0
  68. gpufl-0.1.0.dev7/include/gpufl/core/logger/log_rotator.cpp +65 -0
  69. gpufl-0.1.0.dev7/include/gpufl/core/logger/log_rotator.hpp +32 -0
  70. gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.cpp +152 -0
  71. gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.hpp +70 -0
  72. gpufl-0.1.0.dev7/include/gpufl/core/model/kernel_event_model.cpp +51 -0
  73. gpufl-0.1.0.dev7/include/gpufl/core/model/kernel_event_model.hpp +16 -0
  74. gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.cpp +34 -0
  75. gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.hpp +24 -0
  76. gpufl-0.1.0.dev7/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
  77. gpufl-0.1.0.dev7/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
  78. gpufl-0.1.0.dev7/include/gpufl/core/model/model_utils.hpp +94 -0
  79. gpufl-0.1.0.dev7/include/gpufl/core/model/perf_metric_model.cpp +33 -0
  80. gpufl-0.1.0.dev7/include/gpufl/core/model/perf_metric_model.hpp +16 -0
  81. gpufl-0.1.0.dev7/include/gpufl/core/model/profile_sample_model.cpp +40 -0
  82. gpufl-0.1.0.dev7/include/gpufl/core/model/profile_sample_model.hpp +16 -0
  83. gpufl-0.1.0.dev7/include/gpufl/core/model/scope_event_model.cpp +43 -0
  84. gpufl-0.1.0.dev7/include/gpufl/core/model/scope_event_model.hpp +24 -0
  85. gpufl-0.1.0.dev7/include/gpufl/core/model/serializable.hpp +15 -0
  86. gpufl-0.1.0.dev7/include/gpufl/core/model/system_event_model.cpp +51 -0
  87. gpufl-0.1.0.dev7/include/gpufl/core/model/system_event_model.hpp +32 -0
  88. gpufl-0.1.0.dev7/include/gpufl/core/monitor.hpp +95 -0
  89. gpufl-0.1.0.dev7/include/gpufl/core/monitor_backend.hpp +53 -0
  90. gpufl-0.1.0.dev7/include/gpufl/core/ring_buffer.hpp +75 -0
  91. gpufl-0.1.0.dev7/include/gpufl/core/runtime.cpp +6 -0
  92. gpufl-0.1.0.dev7/include/gpufl/core/runtime.hpp +30 -0
  93. gpufl-0.1.0.dev7/include/gpufl/core/sampler.cpp +74 -0
  94. gpufl-0.1.0.dev7/include/gpufl/core/sampler.hpp +51 -0
  95. gpufl-0.1.0.dev7/include/gpufl/core/scope_registry.cpp +10 -0
  96. gpufl-0.1.0.dev7/include/gpufl/core/scope_registry.hpp +8 -0
  97. gpufl-0.1.0.dev7/include/gpufl/core/stack_registry.hpp +47 -0
  98. gpufl-0.1.0.dev7/include/gpufl/core/stack_trace.cpp +112 -0
  99. gpufl-0.1.0.dev7/include/gpufl/core/stack_trace.hpp +12 -0
  100. gpufl-0.1.0.dev7/include/gpufl/core/trace_type.hpp +13 -0
  101. gpufl-0.1.0.dev7/include/gpufl/cuda/monitor.cpp +405 -0
  102. gpufl-0.1.0.dev7/include/gpufl/gpufl.hpp +83 -0
  103. gpufl-0.1.0.dev7/include/gpufl.hpp +3 -0
  104. gpufl-0.1.0.dev7/pyproject.toml +63 -0
  105. gpufl-0.1.0.dev7/python/bindings.cpp +129 -0
  106. gpufl-0.1.0.dev7/python/gpufl/.gitignore +159 -0
  107. gpufl-0.1.0.dev7/python/gpufl/__init__.py +89 -0
  108. gpufl-0.1.0.dev7/python/gpufl/analyzer/__init__.py +1 -0
  109. gpufl-0.1.0.dev7/python/gpufl/analyzer/analyzer.py +721 -0
  110. gpufl-0.1.0.dev7/python/gpufl/utils.py +19 -0
  111. gpufl-0.1.0.dev7/python/gpufl/viz/__init__.py +27 -0
  112. gpufl-0.1.0.dev7/python/gpufl/viz/reader.py +48 -0
  113. gpufl-0.1.0.dev7/python/gpufl/viz/timeline.py +380 -0
  114. gpufl-0.1.0.dev7/python/gpufl/viz/visualizer.py +194 -0
  115. {gpufl-0.0.1 → gpufl-0.1.0.dev7}/schema/ndjson.schema.json +133 -133
  116. gpufl-0.1.0.dev7/tests/CMakeLists.txt +134 -0
  117. gpufl-0.1.0.dev7/tests/backends/nvidia/test_cuda_collector.cpp +34 -0
  118. gpufl-0.1.0.dev7/tests/backends/nvidia/test_nvidia_backend.cpp +127 -0
  119. gpufl-0.1.0.dev7/tests/backends/nvidia/test_nvml_collector.cpp +54 -0
  120. gpufl-0.1.0.dev7/tests/common/test_utils.hpp +31 -0
  121. gpufl-0.1.0.dev7/tests/core/test_analyzer.cpp +15 -0
  122. gpufl-0.1.0.dev7/tests/core/test_monitor.cpp +77 -0
  123. gpufl-0.1.0.dev7/tests/main_test_runner.cpp +6 -0
  124. gpufl-0.1.0.dev7/tests/python/conftest.py +69 -0
  125. gpufl-0.1.0.dev7/tests/python/test_analyzer.py +54 -0
  126. gpufl-0.1.0.dev7/tests/verify_pipeline.py +99 -0
  127. gpufl-0.0.1/.github/workflows/build.yml +0 -59
  128. gpufl-0.0.1/CMakeLists.txt +0 -74
  129. gpufl-0.0.1/PKG-INFO +0 -362
  130. gpufl-0.0.1/README.md +0 -343
  131. gpufl-0.0.1/example/cuda/CMakeLists.txt +0 -63
  132. gpufl-0.0.1/example/cuda/block_style_example.cu +0 -159
  133. gpufl-0.0.1/example/cuda/system_monitor.cu +0 -21
  134. gpufl-0.0.1/example/python/03_kernel.launch.py +0 -34
  135. gpufl-0.0.1/example/python/requirements.txt +0 -2
  136. gpufl-0.0.1/include/gpufl/backends/cuda.hpp +0 -259
  137. gpufl-0.0.1/include/gpufl/core/common.hpp +0 -201
  138. gpufl-0.0.1/include/gpufl/core/monitor.hpp +0 -261
  139. gpufl-0.0.1/include/gpufl/gpufl.hpp +0 -26
  140. gpufl-0.0.1/pyproject.toml +0 -35
  141. gpufl-0.0.1/python/bindings.cpp +0 -67
  142. gpufl-0.0.1/python/gpufl/__init__.py +0 -32
  143. gpufl-0.0.1/python/gpufl/utils.py +0 -35
  144. gpufl-0.0.1/tests/verify_pipeline.py +0 -88
@@ -0,0 +1,5 @@
1
+ ---
2
+ Language: Cpp
3
+ BasedOnStyle: Google
4
+ IndentWidth: 4
5
+ ColumnLimit: 80
@@ -0,0 +1,11 @@
1
+ ## Description
2
+ ## Type of Change
3
+ - [ ] Bug fix
4
+ - [ ] New feature
5
+ - [ ] Documentation update
6
+
7
+ ## Testing
8
+ ## Checklist
9
+ - [ ] My code follows the style guidelines of this project
10
+ - [ ] I have performed a self-review of my own code
11
+ - [ ] I have commented my code, particularly in hard-to-understand areas
@@ -0,0 +1,119 @@
1
+ name: Build GPUFl Client
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ build:
11
+ name: Build on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+ python-version: ["3.12", "3.13"]
17
+
18
+ env:
19
+ CMAKE_ARGS: >-
20
+ -DGPUFL_ENABLE_NVIDIA=ON
21
+ -DGPUFL_ENABLE_AMD=OFF
22
+ -DBUILD_TESTING=OFF
23
+
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+
27
+ - name: Set up Python ${{ matrix.python-version }}
28
+ uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+
32
+ # Install CUDA Toolkit (provides CUDA_PATH)
33
+ - name: Install CUDA Toolkit
34
+ uses: Jimver/cuda-toolkit@v0.2.30
35
+ id: cuda-toolkit
36
+ with:
37
+ cuda: '13.1.0'
38
+ method: 'network'
39
+ use-github-cache: false
40
+
41
+ # Make sure CMake can find CUDA headers/libraries during the *pip build*.
42
+ # Jimver/cuda-toolkit sets CUDA_PATH; we map it to common vars CMake respects.
43
+ - name: Export CUDA environment for CMake
44
+ shell: bash
45
+ run: |
46
+ echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
47
+ echo "CUDAToolkit_ROOT=${CUDA_PATH}" >> $GITHUB_ENV
48
+ if [ "${{ runner.os }}" == "Windows" ]; then
49
+ echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
50
+ echo "${CUDA_PATH}/extras/CUPTI/lib64" >> $GITHUB_PATH
51
+ echo "C:/Program Files/NVIDIA Corporation/NVSMI" >> $GITHUB_PATH
52
+ fi
53
+
54
+ - name: Install system dependencies
55
+ if: runner.os == 'Linux'
56
+ run: |
57
+ sudo apt-get update
58
+ sudo apt-get install -y libcurl4-openssl-dev
59
+
60
+ - name: Install python dependencies
61
+ run: |
62
+ python -m pip install --upgrade pip
63
+ pip install scikit-build-core pybind11 cmake ninja
64
+
65
+ - name: Build and Install
66
+ run: |
67
+ pip install .[viz,analyzer] -v
68
+
69
+ - name: Run C++ Unit Tests
70
+ # Skip C++ tests on Windows as they require actual NVIDIA GPUs to run (CUDA/CUPTI initialization)
71
+ if: runner.os != 'Windows'
72
+ shell: bash
73
+ run: |
74
+ # 1. Prepare a local writable directory for CUDA stubs
75
+ # We cannot write to the system CUDA directory (Permission denied).
76
+ LOCAL_STUBS_DIR="${GITHUB_WORKSPACE}/local_cuda_stubs"
77
+ mkdir -p "${LOCAL_STUBS_DIR}"
78
+
79
+ # 2. Gather relevant library directories for the CUDA Toolkit
80
+ STUBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib/stubs"
81
+ LIBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib"
82
+
83
+ # 3. Create versioned symlinks in the LOCAL directory
84
+ # Many binaries expect .so.1 which is only created by the driver installer.
85
+ for lib in libcuda libnvidia-ml libnvrtc; do
86
+ if [ -f "${STUBS_DIR}/${lib}.so" ]; then
87
+ # Symlink the original stub to our local dir
88
+ ln -sf "${STUBS_DIR}/${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so"
89
+ # Create the versioned symlink in our local dir
90
+ ln -sf "${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so.1"
91
+ fi
92
+ done
93
+
94
+ # 4. Add local stubs and toolkit libs to LD_LIBRARY_PATH
95
+ export LD_LIBRARY_PATH="${LOCAL_STUBS_DIR}:${LIBS_DIR}:${LD_LIBRARY_PATH}"
96
+
97
+ # Debug: check what libraries are found
98
+ echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
99
+ ls -l "${LOCAL_STUBS_DIR}" || true
100
+
101
+ cmake -B build_tests -S . \
102
+ -DGPUFL_ENABLE_NVIDIA=ON \
103
+ -DBUILD_PYTHON=OFF \
104
+ -DBUILD_TESTING=ON
105
+
106
+ cmake --build build_tests --target gpufl_tests
107
+
108
+ ctest --test-dir build_tests --output-on-failure --verbose --timeout 60
109
+
110
+ - name: Run Python Unit Tests
111
+ shell: bash
112
+ run: |
113
+ python -m pip install pytest
114
+ export PYTHONPATH=$PYTHONPATH:$(pwd)/python
115
+ python -m pytest tests/python
116
+
117
+ - name: Verify Logging Pipeline
118
+ run: |
119
+ python -u tests/verify_pipeline.py
@@ -0,0 +1,193 @@
1
+ name: Build and Release Wheels
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set package version from tag
21
+ if: startsWith(github.ref, 'refs/tags/v')
22
+ shell: python
23
+ run: |
24
+ import os
25
+ import re
26
+ from pathlib import Path
27
+
28
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
29
+ if not ref_name.startswith("v"):
30
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
31
+ version = ref_name[1:]
32
+ print(f"Using version from tag: {version}")
33
+
34
+ pyproject = Path("pyproject.toml")
35
+ text = pyproject.read_text(encoding="utf-8")
36
+ text_new, n = re.subn(
37
+ r'(?m)^version\s*=\s*"[^\"]+"$',
38
+ f'version = "{version}"',
39
+ text,
40
+ count=1,
41
+ )
42
+ if n != 1:
43
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
44
+ pyproject.write_text(text_new, encoding="utf-8")
45
+
46
+ init_py = Path("python/gpufl/__init__.py")
47
+ if init_py.exists():
48
+ init_text = init_py.read_text(encoding="utf-8")
49
+ init_new, _ = re.subn(
50
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
51
+ f'__version__ = "{version}"',
52
+ init_text,
53
+ )
54
+ init_py.write_text(init_new, encoding="utf-8")
55
+
56
+ - name: Cache cibuildwheel downloads
57
+ uses: actions/cache@v4
58
+ with:
59
+ path: |
60
+ ~/.cache/cibuildwheel
61
+ ~/AppData/Local/pypa/cibuildwheel/Cache
62
+ key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
63
+ restore-keys: |
64
+ cibw-${{ runner.os }}-
65
+
66
+ - name: Install CUDA (Windows)
67
+ if: runner.os == 'Windows'
68
+ uses: Jimver/cuda-toolkit@v0.2.30
69
+ with:
70
+ cuda: '13.1.0'
71
+ method: 'network'
72
+
73
+ - name: Prefetch virtualenv.pyz (Windows)
74
+ if: runner.os == 'Windows'
75
+ shell: pwsh
76
+ run: |
77
+ $version = "20.27.1"
78
+ $cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
79
+ New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
80
+ $dest = Join-Path $cacheDir "virtualenv-$version.pyz"
81
+ if (Test-Path $dest) {
82
+ Write-Host "virtualenv.pyz already cached: $dest"
83
+ exit 0
84
+ }
85
+ $urls = @(
86
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
87
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
88
+ "https://bootstrap.pypa.io/virtualenv.pyz"
89
+ )
90
+ $max = 6
91
+ $ok = $false
92
+ foreach ($url in $urls) {
93
+ for ($i = 1; $i -le $max; $i++) {
94
+ try {
95
+ Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
96
+ Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
97
+ if ((Get-Item $dest).Length -gt 0) {
98
+ Write-Host "Downloaded: $dest"
99
+ $ok = $true
100
+ break
101
+ }
102
+ } catch {
103
+ if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
104
+ if ($i -eq $max) { break }
105
+ Start-Sleep -Seconds (5 * $i)
106
+ }
107
+ }
108
+ if ($ok) { break }
109
+ }
110
+ if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
111
+
112
+ - name: Build wheels
113
+ uses: pypa/cibuildwheel@v2.22.0
114
+ env:
115
+ CIBW_VIRTUALENV_VERSION: "20.27.1"
116
+ CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
117
+ CIBW_BEFORE_ALL_LINUX: >-
118
+ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
119
+ dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
120
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
121
+ CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
122
+ CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
123
+
124
+ - uses: actions/upload-artifact@v4
125
+ with:
126
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
127
+ path: ./wheelhouse/*.whl
128
+
129
+ build_sdist:
130
+ name: Build source distribution
131
+ runs-on: ubuntu-latest
132
+ steps:
133
+ - uses: actions/checkout@v4
134
+
135
+ - name: Set package version from tag
136
+ if: startsWith(github.ref, 'refs/tags/v')
137
+ shell: python
138
+ run: |
139
+ import os
140
+ import re
141
+ from pathlib import Path
142
+
143
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
144
+ if not ref_name.startswith("v"):
145
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
146
+ version = ref_name[1:]
147
+ print(f"Using version from tag: {version}")
148
+
149
+ pyproject = Path("pyproject.toml")
150
+ text = pyproject.read_text(encoding="utf-8")
151
+ text_new, n = re.subn(
152
+ r'(?m)^version\s*=\s*"[^\"]+"$',
153
+ f'version = "{version}"',
154
+ text,
155
+ count=1,
156
+ )
157
+ if n != 1:
158
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
159
+ pyproject.write_text(text_new, encoding="utf-8")
160
+
161
+ init_py = Path("python/gpufl/__init__.py")
162
+ if init_py.exists():
163
+ init_text = init_py.read_text(encoding="utf-8")
164
+ init_new, _ = re.subn(
165
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
166
+ f'__version__ = "{version}"',
167
+ init_text,
168
+ )
169
+ init_py.write_text(init_new, encoding="utf-8")
170
+
171
+ - name: Build sdist
172
+ run: pipx run build --sdist
173
+
174
+ - uses: actions/upload-artifact@v4
175
+ with:
176
+ name: cibw-sdist
177
+ path: dist/*.tar.gz
178
+
179
+ upload_pypi:
180
+ needs: [build_wheels, build_sdist]
181
+ runs-on: ubuntu-latest
182
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
183
+ steps:
184
+ - uses: actions/download-artifact@v4
185
+ with:
186
+ pattern: cibw-*
187
+ path: dist
188
+ merge-multiple: true
189
+
190
+ - name: Publish to PyPI
191
+ uses: pypa/gh-action-pypi-publish@release/v1
192
+ with:
193
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -1,80 +1,80 @@
1
- ### idea
2
- .idea/**
3
- build/
4
- cmake-build-*/
5
- cmake/
6
-
7
- ### C++ template
8
- # Prerequisites
9
- *.d
10
-
11
- # Compiled Object files
12
- *.slo
13
- *.lo
14
- *.o
15
- *.obj
16
-
17
- # Precompiled Headers
18
- *.gch
19
- *.pch
20
-
21
- # Compiled Dynamic libraries
22
- *.so
23
- *.dylib
24
- *.dll
25
-
26
- # Fortran module files
27
- *.mod
28
- *.smod
29
-
30
- # Compiled Static libraries
31
- *.lai
32
- *.la
33
- *.a
34
- *.lib
35
-
36
- # Executables
37
- *.exe
38
- *.out
39
- *.app
40
-
41
- ### C template
42
- # Prerequisites
43
- *.d
44
-
45
- # Object files
46
- *.o
47
- *.ko
48
- *.obj
49
- *.elf
50
-
51
- # Linker output
52
- *.ilk
53
- *.map
54
- *.exp
55
-
56
- # Precompiled Headers
57
- *.gch
58
- *.pch
59
-
60
- # Libraries
61
- *.lib
62
- *.a
63
- *.la
64
- *.lo
65
-
66
- # Shared objects (inc. Windows DLLs)
67
- *.dll
68
- *.so
69
- *.so.*
70
- *.dylib
71
-
72
- # Executables
73
- *.exe
74
- *.out
75
- *.app
76
- *.i*86
77
- *.x86_64
78
- *.hex
79
-
80
-
1
+ ### idea
2
+ .idea/**
3
+ build/
4
+ cmake-build-*/
5
+ cmake/
6
+
7
+ ### C++ template
8
+ # Prerequisites
9
+ *.d
10
+
11
+ # Compiled Object files
12
+ *.slo
13
+ *.lo
14
+ *.o
15
+ *.obj
16
+
17
+ # Precompiled Headers
18
+ *.gch
19
+ *.pch
20
+
21
+ # Compiled Dynamic libraries
22
+ *.so
23
+ *.dylib
24
+ *.dll
25
+
26
+ # Fortran module files
27
+ *.mod
28
+ *.smod
29
+
30
+ # Compiled Static libraries
31
+ *.lai
32
+ *.la
33
+ *.a
34
+ *.lib
35
+
36
+ # Executables
37
+ *.exe
38
+ *.out
39
+ *.app
40
+
41
+ ### C template
42
+ # Prerequisites
43
+ *.d
44
+
45
+ # Object files
46
+ *.o
47
+ *.ko
48
+ *.obj
49
+ *.elf
50
+
51
+ # Linker output
52
+ *.ilk
53
+ *.map
54
+ *.exp
55
+
56
+ # Precompiled Headers
57
+ *.gch
58
+ *.pch
59
+
60
+ # Libraries
61
+ *.lib
62
+ *.a
63
+ *.la
64
+ *.lo
65
+
66
+ # Shared objects (inc. Windows DLLs)
67
+ *.dll
68
+ *.so
69
+ *.so.*
70
+ *.dylib
71
+
72
+ # Executables
73
+ *.exe
74
+ *.out
75
+ *.app
76
+ *.i*86
77
+ *.x86_64
78
+ *.hex
79
+
80
+ *.log