gpufl 0.0.1__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. gpufl-0.1.0/.clang-format +5 -0
  2. gpufl-0.1.0/.dockerignore +18 -0
  3. gpufl-0.1.0/.github/pull_request_template.md +11 -0
  4. gpufl-0.1.0/.github/workflows/build.yml +119 -0
  5. gpufl-0.1.0/.github/workflows/release.yml +239 -0
  6. {gpufl-0.0.1 → gpufl-0.1.0}/.gitignore +91 -80
  7. gpufl-0.1.0/CMakeLists.txt +605 -0
  8. gpufl-0.1.0/CONTRIBUTING.md +12 -0
  9. gpufl-0.1.0/Dockerfile.demo +42 -0
  10. gpufl-0.1.0/Dockerfile.monitor +85 -0
  11. gpufl-0.1.0/Dockerfile.monitor.amd +94 -0
  12. gpufl-0.1.0/Dockerfile.monitor.supervisord.conf +27 -0
  13. {gpufl-0.0.1 → gpufl-0.1.0}/LICENSE +201 -201
  14. gpufl-0.1.0/PKG-INFO +349 -0
  15. gpufl-0.1.0/README.md +304 -0
  16. gpufl-0.1.0/benchmark/README.md +71 -0
  17. gpufl-0.1.0/benchmark/cuda_gemm.py +44 -0
  18. gpufl-0.1.0/benchmark/pytorch_train.py +145 -0
  19. gpufl-0.1.0/benchmark/run_benchmark.py +263 -0
  20. gpufl-0.1.0/build.sh +1 -0
  21. gpufl-0.1.0/daemon/README.md +252 -0
  22. gpufl-0.1.0/daemon/monitor/CMakeLists.txt +44 -0
  23. gpufl-0.1.0/daemon/monitor/main.cpp +105 -0
  24. gpufl-0.1.0/docker-compose.monitor.amd.yml +43 -0
  25. gpufl-0.1.0/docker-compose.monitor.yml +71 -0
  26. gpufl-0.1.0/example/amd/CMakeLists.txt +71 -0
  27. gpufl-0.1.0/example/amd/README.md +139 -0
  28. gpufl-0.1.0/example/amd/check_device.cpp +31 -0
  29. gpufl-0.1.0/example/amd/gpufl_scope_demo.cpp +240 -0
  30. gpufl-0.1.0/example/amd/vector_add_benchmark.cpp +137 -0
  31. gpufl-0.1.0/example/cuda/CMakeLists.txt +265 -0
  32. gpufl-0.1.0/example/cuda/block_style_example.cu +100 -0
  33. gpufl-0.1.0/example/cuda/check_conflict.cu +81 -0
  34. gpufl-0.1.0/example/cuda/check_device.cu +25 -0
  35. gpufl-0.1.0/example/cuda/cupti_basic.cu +149 -0
  36. gpufl-0.1.0/example/cuda/cupti_pc_sampling.cu +263 -0
  37. gpufl-0.1.0/example/cuda/list_sass_metrics.cu +46 -0
  38. gpufl-0.1.0/example/cuda/memory_coalescing_demo.cu +134 -0
  39. gpufl-0.1.0/example/cuda/occupancy_demo.cu +155 -0
  40. gpufl-0.1.0/example/cuda/sass_divergence_demo.cu +270 -0
  41. gpufl-0.1.0/example/cuda/system_monitor.cu +58 -0
  42. gpufl-0.1.0/example/cuda/test_occupancy.cu +62 -0
  43. gpufl-0.1.0/example/cuda/vector_add_benchmark.cu +126 -0
  44. {gpufl-0.0.1 → gpufl-0.1.0}/example/python/01_basic.py +25 -25
  45. {gpufl-0.0.1 → gpufl-0.1.0}/example/python/02_numba_cuda.py +76 -76
  46. gpufl-0.1.0/example/python/03_pytorch_benchmark.py +149 -0
  47. gpufl-0.1.0/example/python/analyzer/01_analyzer_sample.py +14 -0
  48. gpufl-0.1.0/example/python/requirements.txt +7 -0
  49. gpufl-0.1.0/example/python/viz/01_plot_memory_timeline.py +9 -0
  50. gpufl-0.1.0/example/python/viz/02_plot_stress_timeline.py +9 -0
  51. gpufl-0.1.0/images/Screenshot1.png +0 -0
  52. gpufl-0.1.0/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +42 -0
  53. gpufl-0.1.0/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +282 -0
  54. gpufl-0.1.0/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +65 -0
  55. gpufl-0.1.0/include/gpufl/backends/amd/hip_static_collector.cpp +91 -0
  56. gpufl-0.1.0/include/gpufl/backends/amd/hip_static_collector.hpp +20 -0
  57. gpufl-0.1.0/include/gpufl/backends/amd/monitor_adapter_amd.cpp +56 -0
  58. gpufl-0.1.0/include/gpufl/backends/amd/monitor_adapter_amd.hpp +30 -0
  59. gpufl-0.1.0/include/gpufl/backends/amd/rocm_collector.cpp +522 -0
  60. gpufl-0.1.0/include/gpufl/backends/amd/rocm_collector.hpp +37 -0
  61. gpufl-0.1.0/include/gpufl/backends/amd/rocprofiler_backend.cpp +799 -0
  62. gpufl-0.1.0/include/gpufl/backends/amd/rocprofiler_backend.hpp +144 -0
  63. gpufl-0.1.0/include/gpufl/backends/host_collector.hpp +150 -0
  64. gpufl-0.1.0/include/gpufl/backends/nvidia/cuda_collector.cpp +44 -0
  65. gpufl-0.1.0/include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
  66. gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_backend.cpp +1218 -0
  67. gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_backend.hpp +159 -0
  68. gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_common.hpp +86 -0
  69. gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_utils.cpp +170 -0
  70. gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_utils.hpp +87 -0
  71. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +695 -0
  72. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +94 -0
  73. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +70 -0
  74. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +65 -0
  75. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +103 -0
  76. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
  77. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
  78. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +421 -0
  79. gpufl-0.1.0/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +61 -0
  80. gpufl-0.1.0/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +483 -0
  81. gpufl-0.1.0/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +35 -0
  82. gpufl-0.1.0/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +297 -0
  83. gpufl-0.1.0/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +27 -0
  84. gpufl-0.1.0/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +81 -0
  85. gpufl-0.1.0/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +32 -0
  86. gpufl-0.1.0/include/gpufl/backends/nvidia/nvml_collector.cpp +341 -0
  87. gpufl-0.1.0/include/gpufl/backends/nvidia/nvml_collector.hpp +48 -0
  88. gpufl-0.1.0/include/gpufl/backends/nvidia/resource_handler.cpp +151 -0
  89. gpufl-0.1.0/include/gpufl/backends/nvidia/resource_handler.hpp +40 -0
  90. gpufl-0.1.0/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +56 -0
  91. gpufl-0.1.0/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +19 -0
  92. gpufl-0.1.0/include/gpufl/backends/nvidia/synchronization_handler.cpp +149 -0
  93. gpufl-0.1.0/include/gpufl/backends/nvidia/synchronization_handler.hpp +60 -0
  94. gpufl-0.1.0/include/gpufl/core/activity_record.hpp +141 -0
  95. gpufl-0.1.0/include/gpufl/core/backend_factory.cpp +139 -0
  96. gpufl-0.1.0/include/gpufl/core/backend_factory.hpp +13 -0
  97. gpufl-0.1.0/include/gpufl/core/backend_interfaces.hpp +31 -0
  98. gpufl-0.1.0/include/gpufl/core/batch_buffer.hpp +23 -0
  99. gpufl-0.1.0/include/gpufl/core/common.cpp +45 -0
  100. gpufl-0.1.0/include/gpufl/core/common.hpp +111 -0
  101. gpufl-0.1.0/include/gpufl/core/config_file_loader.cpp +51 -0
  102. gpufl-0.1.0/include/gpufl/core/config_file_loader.hpp +18 -0
  103. gpufl-0.1.0/include/gpufl/core/debug_logger.cpp +9 -0
  104. gpufl-0.1.0/include/gpufl/core/debug_logger.hpp +43 -0
  105. gpufl-0.1.0/include/gpufl/core/dictionary_manager.cpp +575 -0
  106. gpufl-0.1.0/include/gpufl/core/dictionary_manager.hpp +138 -0
  107. gpufl-0.1.0/include/gpufl/core/events.hpp +601 -0
  108. gpufl-0.1.0/include/gpufl/core/gpufl.cpp +699 -0
  109. gpufl-0.1.0/include/gpufl/core/host_info.cpp +131 -0
  110. gpufl-0.1.0/include/gpufl/core/host_info.hpp +30 -0
  111. gpufl-0.1.0/include/gpufl/core/itanium_demangle.cpp +543 -0
  112. gpufl-0.1.0/include/gpufl/core/itanium_demangle.hpp +43 -0
  113. gpufl-0.1.0/include/gpufl/core/json/json.cpp +369 -0
  114. gpufl-0.1.0/include/gpufl/core/json/json.hpp +155 -0
  115. gpufl-0.1.0/include/gpufl/core/logger/file_compressor.cpp +44 -0
  116. gpufl-0.1.0/include/gpufl/core/logger/file_compressor.hpp +18 -0
  117. gpufl-0.1.0/include/gpufl/core/logger/file_log_sink.cpp +151 -0
  118. gpufl-0.1.0/include/gpufl/core/logger/file_log_sink.hpp +82 -0
  119. gpufl-0.1.0/include/gpufl/core/logger/http_log_sink.cpp +408 -0
  120. gpufl-0.1.0/include/gpufl/core/logger/http_log_sink.hpp +181 -0
  121. gpufl-0.1.0/include/gpufl/core/logger/log_rotator.cpp +65 -0
  122. gpufl-0.1.0/include/gpufl/core/logger/log_rotator.hpp +32 -0
  123. gpufl-0.1.0/include/gpufl/core/logger/log_sink.hpp +53 -0
  124. gpufl-0.1.0/include/gpufl/core/logger/logger.cpp +47 -0
  125. gpufl-0.1.0/include/gpufl/core/logger/logger.hpp +76 -0
  126. gpufl-0.1.0/include/gpufl/core/model/batch_models.cpp +316 -0
  127. gpufl-0.1.0/include/gpufl/core/model/batch_models.hpp +167 -0
  128. gpufl-0.1.0/include/gpufl/core/model/graph_launch_event_model.cpp +37 -0
  129. gpufl-0.1.0/include/gpufl/core/model/graph_launch_event_model.hpp +23 -0
  130. gpufl-0.1.0/include/gpufl/core/model/kernel_event_model.cpp +59 -0
  131. gpufl-0.1.0/include/gpufl/core/model/kernel_event_model.hpp +16 -0
  132. gpufl-0.1.0/include/gpufl/core/model/lifecycle_model.cpp +83 -0
  133. gpufl-0.1.0/include/gpufl/core/model/lifecycle_model.hpp +32 -0
  134. gpufl-0.1.0/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
  135. gpufl-0.1.0/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
  136. gpufl-0.1.0/include/gpufl/core/model/memory_alloc_event_model.cpp +42 -0
  137. gpufl-0.1.0/include/gpufl/core/model/memory_alloc_event_model.hpp +28 -0
  138. gpufl-0.1.0/include/gpufl/core/model/model_utils.hpp +109 -0
  139. gpufl-0.1.0/include/gpufl/core/model/nvtx_marker_model.cpp +25 -0
  140. gpufl-0.1.0/include/gpufl/core/model/nvtx_marker_model.hpp +22 -0
  141. gpufl-0.1.0/include/gpufl/core/model/perf_metric_model.cpp +33 -0
  142. gpufl-0.1.0/include/gpufl/core/model/perf_metric_model.hpp +16 -0
  143. gpufl-0.1.0/include/gpufl/core/model/profile_sample_model.cpp +40 -0
  144. gpufl-0.1.0/include/gpufl/core/model/profile_sample_model.hpp +16 -0
  145. gpufl-0.1.0/include/gpufl/core/model/scope_event_model.cpp +43 -0
  146. gpufl-0.1.0/include/gpufl/core/model/scope_event_model.hpp +24 -0
  147. gpufl-0.1.0/include/gpufl/core/model/serializable.hpp +15 -0
  148. gpufl-0.1.0/include/gpufl/core/model/synchronization_event_model.cpp +38 -0
  149. gpufl-0.1.0/include/gpufl/core/model/synchronization_event_model.hpp +30 -0
  150. gpufl-0.1.0/include/gpufl/core/model/system_event_model.cpp +51 -0
  151. gpufl-0.1.0/include/gpufl/core/model/system_event_model.hpp +32 -0
  152. gpufl-0.1.0/include/gpufl/core/monitor.cpp +594 -0
  153. gpufl-0.1.0/include/gpufl/core/monitor.hpp +204 -0
  154. gpufl-0.1.0/include/gpufl/core/monitor_adapter.cpp +41 -0
  155. gpufl-0.1.0/include/gpufl/core/monitor_adapter.hpp +31 -0
  156. gpufl-0.1.0/include/gpufl/core/monitor_backend.hpp +76 -0
  157. gpufl-0.1.0/include/gpufl/core/remote_config.cpp +279 -0
  158. gpufl-0.1.0/include/gpufl/core/remote_config.hpp +60 -0
  159. gpufl-0.1.0/include/gpufl/core/ring_buffer.hpp +96 -0
  160. gpufl-0.1.0/include/gpufl/core/runtime.cpp +6 -0
  161. gpufl-0.1.0/include/gpufl/core/runtime.hpp +32 -0
  162. gpufl-0.1.0/include/gpufl/core/sampler.cpp +131 -0
  163. gpufl-0.1.0/include/gpufl/core/sampler.hpp +63 -0
  164. gpufl-0.1.0/include/gpufl/core/sass_compressor.cpp +109 -0
  165. gpufl-0.1.0/include/gpufl/core/sass_compressor.hpp +52 -0
  166. gpufl-0.1.0/include/gpufl/core/scope_registry.cpp +10 -0
  167. gpufl-0.1.0/include/gpufl/core/scope_registry.hpp +8 -0
  168. gpufl-0.1.0/include/gpufl/core/stack_registry.hpp +47 -0
  169. gpufl-0.1.0/include/gpufl/core/stack_trace.cpp +139 -0
  170. gpufl-0.1.0/include/gpufl/core/stack_trace.hpp +19 -0
  171. gpufl-0.1.0/include/gpufl/core/stream_handle.hpp +9 -0
  172. gpufl-0.1.0/include/gpufl/core/trace_type.hpp +89 -0
  173. gpufl-0.1.0/include/gpufl/core/version.hpp +63 -0
  174. gpufl-0.1.0/include/gpufl/gpufl.hpp +240 -0
  175. gpufl-0.1.0/include/gpufl/report/hint_engine.cpp +91 -0
  176. gpufl-0.1.0/include/gpufl/report/hint_engine.hpp +28 -0
  177. gpufl-0.1.0/include/gpufl/report/text_report.cpp +1127 -0
  178. gpufl-0.1.0/include/gpufl/report/text_report.hpp +176 -0
  179. gpufl-0.1.0/include/gpufl.hpp +3 -0
  180. gpufl-0.1.0/pyproject.toml +85 -0
  181. gpufl-0.1.0/python/bindings.cpp +205 -0
  182. gpufl-0.1.0/python/gpufl/.gitignore +159 -0
  183. gpufl-0.1.0/python/gpufl/__init__.py +227 -0
  184. gpufl-0.1.0/python/gpufl/analyzer/__init__.py +1 -0
  185. gpufl-0.1.0/python/gpufl/analyzer/analyzer.py +1153 -0
  186. gpufl-0.1.0/python/gpufl/cupy/__init__.py +69 -0
  187. gpufl-0.1.0/python/gpufl/jax/__init__.py +68 -0
  188. gpufl-0.1.0/python/gpufl/numba/__init__.py +58 -0
  189. gpufl-0.1.0/python/gpufl/report/__init__.py +1 -0
  190. gpufl-0.1.0/python/gpufl/report/text_report.py +516 -0
  191. gpufl-0.1.0/python/gpufl/torch/__init__.py +59 -0
  192. gpufl-0.1.0/python/gpufl/torch/dispatch.py +184 -0
  193. gpufl-0.1.0/python/gpufl/torch/profile.py +76 -0
  194. gpufl-0.1.0/python/gpufl/torch/stack.py +62 -0
  195. gpufl-0.1.0/python/gpufl/torch/trace_import.py +125 -0
  196. gpufl-0.1.0/python/gpufl/triton/__init__.py +64 -0
  197. gpufl-0.1.0/python/gpufl/utils.py +19 -0
  198. gpufl-0.1.0/python/gpufl/viz/__init__.py +27 -0
  199. gpufl-0.1.0/python/gpufl/viz/reader.py +48 -0
  200. gpufl-0.1.0/python/gpufl/viz/timeline.py +378 -0
  201. gpufl-0.1.0/python/gpufl/viz/visualizer.py +194 -0
  202. gpufl-0.1.0/scripts/docker-demo-loop.sh +17 -0
  203. gpufl-0.1.0/scripts/windows/run-monitor-local.bat +20 -0
  204. gpufl-0.1.0/tests/CMakeLists.txt +187 -0
  205. gpufl-0.1.0/tests/backends/amd/test_rocm_collector.cpp +91 -0
  206. gpufl-0.1.0/tests/backends/nvidia/test_cuda_collector.cpp +34 -0
  207. gpufl-0.1.0/tests/backends/nvidia/test_engine_coverage.cpp +294 -0
  208. gpufl-0.1.0/tests/backends/nvidia/test_nvidia_backend.cpp +132 -0
  209. gpufl-0.1.0/tests/backends/nvidia/test_nvml_collector.cpp +54 -0
  210. gpufl-0.1.0/tests/common/log_utils.cpp +161 -0
  211. gpufl-0.1.0/tests/common/log_utils.hpp +61 -0
  212. gpufl-0.1.0/tests/common/test_kernel.cu +45 -0
  213. gpufl-0.1.0/tests/common/test_kernel.hpp +22 -0
  214. gpufl-0.1.0/tests/common/test_utils.hpp +55 -0
  215. gpufl-0.1.0/tests/core/test_analyzer.cpp +15 -0
  216. gpufl-0.1.0/tests/core/test_api_path_routing.cpp +213 -0
  217. gpufl-0.1.0/tests/core/test_batch_models.cpp +144 -0
  218. gpufl-0.1.0/tests/core/test_http_log_sink.cpp +300 -0
  219. gpufl-0.1.0/tests/core/test_itanium_demangle.cpp +146 -0
  220. gpufl-0.1.0/tests/core/test_monitor.cpp +77 -0
  221. gpufl-0.1.0/tests/core/test_wire_contract.cpp +394 -0
  222. gpufl-0.1.0/tests/main_test_runner.cpp +6 -0
  223. gpufl-0.1.0/tests/python/conftest.py +223 -0
  224. gpufl-0.1.0/tests/python/test_analyzer.py +82 -0
  225. gpufl-0.1.0/tests/python/test_bindings.py +188 -0
  226. gpufl-0.1.0/tests/python/test_remote_upload_smoke.py +185 -0
  227. gpufl-0.1.0/tests/run_engine_coverage.ps1 +86 -0
  228. gpufl-0.1.0/tests/run_engine_coverage.sh +83 -0
  229. gpufl-0.1.0/tests/verify_pipeline.py +101 -0
  230. gpufl-0.0.1/.github/workflows/build.yml +0 -59
  231. gpufl-0.0.1/CMakeLists.txt +0 -74
  232. gpufl-0.0.1/PKG-INFO +0 -362
  233. gpufl-0.0.1/README.md +0 -343
  234. gpufl-0.0.1/example/cuda/CMakeLists.txt +0 -63
  235. gpufl-0.0.1/example/cuda/block_style_example.cu +0 -159
  236. gpufl-0.0.1/example/cuda/system_monitor.cu +0 -21
  237. gpufl-0.0.1/example/python/03_kernel.launch.py +0 -34
  238. gpufl-0.0.1/example/python/requirements.txt +0 -2
  239. gpufl-0.0.1/include/gpufl/backends/cuda.hpp +0 -259
  240. gpufl-0.0.1/include/gpufl/core/common.hpp +0 -201
  241. gpufl-0.0.1/include/gpufl/core/monitor.hpp +0 -261
  242. gpufl-0.0.1/include/gpufl/gpufl.hpp +0 -26
  243. gpufl-0.0.1/pyproject.toml +0 -35
  244. gpufl-0.0.1/python/bindings.cpp +0 -67
  245. gpufl-0.0.1/python/gpufl/__init__.py +0 -32
  246. gpufl-0.0.1/python/gpufl/utils.py +0 -35
  247. gpufl-0.0.1/schema/ndjson.schema.json +0 -133
  248. gpufl-0.0.1/tests/verify_pipeline.py +0 -88
@@ -0,0 +1,5 @@
1
+ ---
2
+ Language: Cpp
3
+ BasedOnStyle: Google
4
+ IndentWidth: 4
5
+ ColumnLimit: 80
@@ -0,0 +1,18 @@
1
+ # Python / notebooks — not needed for the C++ daemon build
2
+ python/
3
+ example/python/
4
+ **/.Trash-*
5
+ **/__pycache__/
6
+ **/*.pyc
7
+
8
+ # Build artifacts
9
+ cmake-build-*/
10
+ build/
11
+ *.o
12
+ *.a
13
+
14
+ # Dev / IDE
15
+ .git/
16
+ .idea/
17
+ .vscode/
18
+ *.md
@@ -0,0 +1,11 @@
1
+ ## Description
2
+ ## Type of Change
3
+ - [ ] Bug fix
4
+ - [ ] New feature
5
+ - [ ] Documentation update
6
+
7
+ ## Testing
8
+ ## Checklist
9
+ - [ ] My code follows the style guidelines of this project
10
+ - [ ] I have performed a self-review of my own code
11
+ - [ ] I have commented my code, particularly in hard-to-understand areas
@@ -0,0 +1,119 @@
1
+ name: Build GPUFl Client
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ build:
11
+ name: Build on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+ python-version: ["3.12", "3.13"]
17
+
18
+ env:
19
+ CMAKE_ARGS: >-
20
+ -DGPUFL_ENABLE_NVIDIA=ON
21
+ -DGPUFL_ENABLE_AMD=OFF
22
+ -DBUILD_TESTING=OFF
23
+
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+
27
+ - name: Set up Python ${{ matrix.python-version }}
28
+ uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+
32
+ # Install CUDA Toolkit (provides CUDA_PATH)
33
+ - name: Install CUDA Toolkit
34
+ uses: Jimver/cuda-toolkit@v0.2.30
35
+ id: cuda-toolkit
36
+ with:
37
+ cuda: '13.1.0'
38
+ method: 'network'
39
+ use-github-cache: false
40
+
41
+ # Make sure CMake can find CUDA headers/libraries during the *pip build*.
42
+ # Jimver/cuda-toolkit sets CUDA_PATH; we map it to common vars CMake respects.
43
+ - name: Export CUDA environment for CMake
44
+ shell: bash
45
+ run: |
46
+ echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
47
+ echo "CUDAToolkit_ROOT=${CUDA_PATH}" >> $GITHUB_ENV
48
+ if [ "${{ runner.os }}" == "Windows" ]; then
49
+ echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
50
+ echo "${CUDA_PATH}/extras/CUPTI/lib64" >> $GITHUB_PATH
51
+ echo "C:/Program Files/NVIDIA Corporation/NVSMI" >> $GITHUB_PATH
52
+ fi
53
+
54
+ - name: Install system dependencies
55
+ if: runner.os == 'Linux'
56
+ run: |
57
+ sudo apt-get update
58
+ sudo apt-get install -y libcurl4-openssl-dev
59
+
60
+ - name: Install python dependencies
61
+ run: |
62
+ python -m pip install --upgrade pip
63
+ pip install scikit-build-core pybind11 cmake ninja
64
+
65
+ - name: Build and Install
66
+ run: |
67
+ pip install .[viz,analyzer] -v
68
+
69
+ - name: Run C++ Unit Tests
70
+ # Skip C++ tests on Windows as they require actual NVIDIA GPUs to run (CUDA/CUPTI initialization)
71
+ if: runner.os != 'Windows'
72
+ shell: bash
73
+ run: |
74
+ # 1. Prepare a local writable directory for CUDA stubs
75
+ # We cannot write to the system CUDA directory (Permission denied).
76
+ LOCAL_STUBS_DIR="${GITHUB_WORKSPACE}/local_cuda_stubs"
77
+ mkdir -p "${LOCAL_STUBS_DIR}"
78
+
79
+ # 2. Gather relevant library directories for the CUDA Toolkit
80
+ STUBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib/stubs"
81
+ LIBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib"
82
+
83
+ # 3. Create versioned symlinks in the LOCAL directory
84
+ # Many binaries expect .so.1 which is only created by the driver installer.
85
+ for lib in libcuda libnvidia-ml libnvrtc; do
86
+ if [ -f "${STUBS_DIR}/${lib}.so" ]; then
87
+ # Symlink the original stub to our local dir
88
+ ln -sf "${STUBS_DIR}/${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so"
89
+ # Create the versioned symlink in our local dir
90
+ ln -sf "${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so.1"
91
+ fi
92
+ done
93
+
94
+ # 4. Add local stubs and toolkit libs to LD_LIBRARY_PATH
95
+ export LD_LIBRARY_PATH="${LOCAL_STUBS_DIR}:${LIBS_DIR}:${LD_LIBRARY_PATH}"
96
+
97
+ # Debug: check what libraries are found
98
+ echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
99
+ ls -l "${LOCAL_STUBS_DIR}" || true
100
+
101
+ cmake -B build_tests -S . \
102
+ -DGPUFL_ENABLE_NVIDIA=ON \
103
+ -DBUILD_PYTHON=OFF \
104
+ -DBUILD_TESTING=ON
105
+
106
+ cmake --build build_tests --target gpufl_tests
107
+
108
+ ctest --test-dir build_tests --output-on-failure --verbose --timeout 60
109
+
110
+ - name: Run Python Unit Tests
111
+ shell: bash
112
+ run: |
113
+ python -m pip install pytest
114
+ export PYTHONPATH=$PYTHONPATH:$(pwd)/python
115
+ python -m pytest tests/python
116
+
117
+ - name: Verify Logging Pipeline
118
+ run: |
119
+ python -u tests/verify_pipeline.py
@@ -0,0 +1,239 @@
1
+ name: Build and Release Wheels
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set package version from tag
21
+ if: startsWith(github.ref, 'refs/tags/v')
22
+ shell: python
23
+ run: |
24
+ import os
25
+ import re
26
+ from pathlib import Path
27
+
28
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
29
+ if not ref_name.startswith("v"):
30
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
31
+ version = ref_name[1:]
32
+ print(f"Using version from tag: {version}")
33
+
34
+ pyproject = Path("pyproject.toml")
35
+ text = pyproject.read_text(encoding="utf-8")
36
+ text_new, n = re.subn(
37
+ r'(?m)^version\s*=\s*"[^\"]+"$',
38
+ f'version = "{version}"',
39
+ text,
40
+ count=1,
41
+ )
42
+ if n != 1:
43
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
44
+ pyproject.write_text(text_new, encoding="utf-8")
45
+
46
+ init_py = Path("python/gpufl/__init__.py")
47
+ if init_py.exists():
48
+ init_text = init_py.read_text(encoding="utf-8")
49
+ init_new, _ = re.subn(
50
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
51
+ f'__version__ = "{version}"',
52
+ init_text,
53
+ )
54
+ init_py.write_text(init_new, encoding="utf-8")
55
+
56
+ # Keep the C++ side in lockstep. The CMake project() VERSION is the
57
+ # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
58
+ # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
59
+ # this, release wheels would ship the tag version in Python metadata
60
+ # but a stale hardcoded version in the compiled client. CMake's
61
+ # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
62
+ # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
63
+ # full version still lands in the wheel metadata above.
64
+ m = re.match(r"\d+(?:\.\d+){0,3}", version)
65
+ cmake_version = m.group(0) if m else version
66
+ cmakelists = Path("CMakeLists.txt")
67
+ cm_text = cmakelists.read_text(encoding="utf-8")
68
+ cm_new, cm_n = re.subn(
69
+ r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
70
+ rf'\g<1>{cmake_version}',
71
+ cm_text,
72
+ count=1,
73
+ flags=re.DOTALL,
74
+ )
75
+ if cm_n != 1:
76
+ raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
77
+ cmakelists.write_text(cm_new, encoding="utf-8")
78
+
79
+ - name: Cache cibuildwheel downloads
80
+ uses: actions/cache@v4
81
+ with:
82
+ path: |
83
+ ~/.cache/cibuildwheel
84
+ ~/AppData/Local/pypa/cibuildwheel/Cache
85
+ key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
86
+ restore-keys: |
87
+ cibw-${{ runner.os }}-
88
+
89
+ - name: Install CUDA (Windows)
90
+ if: runner.os == 'Windows'
91
+ uses: Jimver/cuda-toolkit@v0.2.30
92
+ with:
93
+ cuda: '13.1.0'
94
+ method: 'network'
95
+
96
+ - name: Prefetch virtualenv.pyz (Windows)
97
+ if: runner.os == 'Windows'
98
+ shell: pwsh
99
+ run: |
100
+ $version = "20.27.1"
101
+ $cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
102
+ New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
103
+ $dest = Join-Path $cacheDir "virtualenv-$version.pyz"
104
+ if (Test-Path $dest) {
105
+ Write-Host "virtualenv.pyz already cached: $dest"
106
+ exit 0
107
+ }
108
+ $urls = @(
109
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
110
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
111
+ "https://bootstrap.pypa.io/virtualenv.pyz"
112
+ )
113
+ $max = 6
114
+ $ok = $false
115
+ foreach ($url in $urls) {
116
+ for ($i = 1; $i -le $max; $i++) {
117
+ try {
118
+ Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
119
+ Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
120
+ if ((Get-Item $dest).Length -gt 0) {
121
+ Write-Host "Downloaded: $dest"
122
+ $ok = $true
123
+ break
124
+ }
125
+ } catch {
126
+ if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
127
+ if ($i -eq $max) { break }
128
+ Start-Sleep -Seconds (5 * $i)
129
+ }
130
+ }
131
+ if ($ok) { break }
132
+ }
133
+ if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
134
+
135
+ - name: Build wheels
136
+ uses: pypa/cibuildwheel@v2.22.0
137
+ env:
138
+ CIBW_VIRTUALENV_VERSION: "20.27.1"
139
+ CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
140
+ CIBW_BEFORE_ALL_LINUX: >-
141
+ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
142
+ dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
143
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
144
+ CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
145
+ CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
146
+
147
+ - uses: actions/upload-artifact@v4
148
+ with:
149
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
150
+ path: ./wheelhouse/*.whl
151
+
152
+ build_sdist:
153
+ name: Build source distribution
154
+ runs-on: ubuntu-latest
155
+ steps:
156
+ - uses: actions/checkout@v4
157
+
158
+ - name: Set package version from tag
159
+ if: startsWith(github.ref, 'refs/tags/v')
160
+ shell: python
161
+ run: |
162
+ import os
163
+ import re
164
+ from pathlib import Path
165
+
166
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
167
+ if not ref_name.startswith("v"):
168
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
169
+ version = ref_name[1:]
170
+ print(f"Using version from tag: {version}")
171
+
172
+ pyproject = Path("pyproject.toml")
173
+ text = pyproject.read_text(encoding="utf-8")
174
+ text_new, n = re.subn(
175
+ r'(?m)^version\s*=\s*"[^\"]+"$',
176
+ f'version = "{version}"',
177
+ text,
178
+ count=1,
179
+ )
180
+ if n != 1:
181
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
182
+ pyproject.write_text(text_new, encoding="utf-8")
183
+
184
+ init_py = Path("python/gpufl/__init__.py")
185
+ if init_py.exists():
186
+ init_text = init_py.read_text(encoding="utf-8")
187
+ init_new, _ = re.subn(
188
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
189
+ f'__version__ = "{version}"',
190
+ init_text,
191
+ )
192
+ init_py.write_text(init_new, encoding="utf-8")
193
+
194
+ # Keep the C++ side in lockstep. The CMake project() VERSION is the
195
+ # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
196
+ # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
197
+ # this, release wheels would ship the tag version in Python metadata
198
+ # but a stale hardcoded version in the compiled client. CMake's
199
+ # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
200
+ # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
201
+ # full version still lands in the wheel metadata above.
202
+ m = re.match(r"\d+(?:\.\d+){0,3}", version)
203
+ cmake_version = m.group(0) if m else version
204
+ cmakelists = Path("CMakeLists.txt")
205
+ cm_text = cmakelists.read_text(encoding="utf-8")
206
+ cm_new, cm_n = re.subn(
207
+ r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
208
+ rf'\g<1>{cmake_version}',
209
+ cm_text,
210
+ count=1,
211
+ flags=re.DOTALL,
212
+ )
213
+ if cm_n != 1:
214
+ raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
215
+ cmakelists.write_text(cm_new, encoding="utf-8")
216
+
217
+ - name: Build sdist
218
+ run: pipx run build --sdist
219
+
220
+ - uses: actions/upload-artifact@v4
221
+ with:
222
+ name: cibw-sdist
223
+ path: dist/*.tar.gz
224
+
225
+ upload_pypi:
226
+ needs: [build_wheels, build_sdist]
227
+ runs-on: ubuntu-latest
228
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
229
+ steps:
230
+ - uses: actions/download-artifact@v4
231
+ with:
232
+ pattern: cibw-*
233
+ path: dist
234
+ merge-multiple: true
235
+
236
+ - name: Publish to PyPI
237
+ uses: pypa/gh-action-pypi-publish@release/v1
238
+ with:
239
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -1,80 +1,91 @@
1
- ### idea
2
- .idea/**
3
- build/
4
- cmake-build-*/
5
- cmake/
6
-
7
- ### C++ template
8
- # Prerequisites
9
- *.d
10
-
11
- # Compiled Object files
12
- *.slo
13
- *.lo
14
- *.o
15
- *.obj
16
-
17
- # Precompiled Headers
18
- *.gch
19
- *.pch
20
-
21
- # Compiled Dynamic libraries
22
- *.so
23
- *.dylib
24
- *.dll
25
-
26
- # Fortran module files
27
- *.mod
28
- *.smod
29
-
30
- # Compiled Static libraries
31
- *.lai
32
- *.la
33
- *.a
34
- *.lib
35
-
36
- # Executables
37
- *.exe
38
- *.out
39
- *.app
40
-
41
- ### C template
42
- # Prerequisites
43
- *.d
44
-
45
- # Object files
46
- *.o
47
- *.ko
48
- *.obj
49
- *.elf
50
-
51
- # Linker output
52
- *.ilk
53
- *.map
54
- *.exp
55
-
56
- # Precompiled Headers
57
- *.gch
58
- *.pch
59
-
60
- # Libraries
61
- *.lib
62
- *.a
63
- *.la
64
- *.lo
65
-
66
- # Shared objects (inc. Windows DLLs)
67
- *.dll
68
- *.so
69
- *.so.*
70
- *.dylib
71
-
72
- # Executables
73
- *.exe
74
- *.out
75
- *.app
76
- *.i*86
77
- *.x86_64
78
- *.hex
79
-
80
-
1
+ ### claude
2
+ .claude/
3
+
4
+ ### idea
5
+ .idea/**
6
+ build/
7
+ build-*/
8
+ build_tests/
9
+ cmake-build-*/
10
+ cmake/
11
+ CMakeFiles/
12
+ CMakeCache.txt
13
+ wget-log*
14
+
15
+ ### docker
16
+ example/python/docker/**/
17
+
18
+ ### C++ template
19
+ # Prerequisites
20
+ *.d
21
+
22
+ # Compiled Object files
23
+ *.slo
24
+ *.lo
25
+ *.o
26
+ *.obj
27
+
28
+ # Precompiled Headers
29
+ *.gch
30
+ *.pch
31
+
32
+ # Compiled Dynamic libraries
33
+ *.so
34
+ *.dylib
35
+ *.dll
36
+
37
+ # Fortran module files
38
+ *.mod
39
+ *.smod
40
+
41
+ # Compiled Static libraries
42
+ *.lai
43
+ *.la
44
+ *.a
45
+ *.lib
46
+
47
+ # Executables
48
+ *.exe
49
+ *.out
50
+ *.app
51
+
52
+ ### C template
53
+ # Prerequisites
54
+ *.d
55
+
56
+ # Object files
57
+ *.o
58
+ *.ko
59
+ *.obj
60
+ *.elf
61
+
62
+ # Linker output
63
+ *.ilk
64
+ *.map
65
+ *.exp
66
+
67
+ # Precompiled Headers
68
+ *.gch
69
+ *.pch
70
+
71
+ # Libraries
72
+ *.lib
73
+ *.a
74
+ *.la
75
+ *.lo
76
+
77
+ # Shared objects (inc. Windows DLLs)
78
+ *.dll
79
+ *.so
80
+ *.so.*
81
+ *.dylib
82
+
83
+ # Executables
84
+ *.exe
85
+ *.out
86
+ *.app
87
+ *.i*86
88
+ *.x86_64
89
+ *.hex
90
+
91
+ *.log