gpufl 0.1.0.dev0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. gpufl-0.1.2/.dockerignore +18 -0
  2. gpufl-0.1.2/.github/workflows/release.yml +253 -0
  3. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.gitignore +12 -1
  4. gpufl-0.1.2/CMakeLists.txt +625 -0
  5. gpufl-0.1.2/Dockerfile.demo +42 -0
  6. gpufl-0.1.2/Dockerfile.monitor +85 -0
  7. gpufl-0.1.2/Dockerfile.monitor.amd +94 -0
  8. gpufl-0.1.2/Dockerfile.monitor.supervisord.conf +27 -0
  9. gpufl-0.1.2/PKG-INFO +349 -0
  10. gpufl-0.1.2/README.md +304 -0
  11. gpufl-0.1.2/benchmark/README.md +71 -0
  12. gpufl-0.1.2/benchmark/cuda_gemm.py +44 -0
  13. gpufl-0.1.2/benchmark/pytorch_train.py +145 -0
  14. gpufl-0.1.2/benchmark/run_benchmark.py +263 -0
  15. gpufl-0.1.2/daemon/README.md +252 -0
  16. gpufl-0.1.2/daemon/monitor/CMakeLists.txt +44 -0
  17. gpufl-0.1.2/daemon/monitor/main.cpp +105 -0
  18. gpufl-0.1.2/docker-compose.monitor.amd.yml +43 -0
  19. gpufl-0.1.2/docker-compose.monitor.yml +71 -0
  20. gpufl-0.1.2/example/amd/CMakeLists.txt +71 -0
  21. gpufl-0.1.2/example/amd/README.md +139 -0
  22. gpufl-0.1.2/example/amd/check_device.cpp +31 -0
  23. gpufl-0.1.2/example/amd/gpufl_scope_demo.cpp +240 -0
  24. gpufl-0.1.2/example/amd/vector_add_benchmark.cpp +137 -0
  25. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/CMakeLists.txt +111 -87
  26. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/block_style_example.cu +13 -10
  27. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/cupti_basic.cu +73 -53
  28. gpufl-0.1.2/example/cuda/memory_coalescing_demo.cu +134 -0
  29. gpufl-0.1.2/example/cuda/sass_divergence_demo.cu +270 -0
  30. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/vector_add_benchmark.cu +23 -0
  31. gpufl-0.1.2/example/python/03_pytorch_benchmark.py +149 -0
  32. gpufl-0.1.2/example/python/analyzer/01_analyzer_sample.py +14 -0
  33. gpufl-0.1.2/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +42 -0
  34. gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +282 -0
  35. gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +65 -0
  36. gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.cpp +91 -0
  37. gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.hpp +20 -0
  38. gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.cpp +56 -0
  39. gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.hpp +30 -0
  40. gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.cpp +522 -0
  41. gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.hpp +37 -0
  42. gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.cpp +799 -0
  43. gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.hpp +144 -0
  44. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/host_collector.hpp +2 -2
  45. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.cpp +5 -4
  46. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.hpp +2 -2
  47. gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_backend.cpp +1218 -0
  48. gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_backend.hpp +159 -0
  49. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_common.hpp +7 -67
  50. gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_utils.cpp +170 -0
  51. gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_utils.hpp +87 -0
  52. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +695 -0
  53. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +94 -0
  54. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +70 -0
  55. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +65 -0
  56. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +103 -0
  57. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
  58. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
  59. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +421 -0
  60. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +61 -0
  61. gpufl-0.1.2/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +483 -0
  62. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +10 -1
  63. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +72 -12
  64. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +2 -1
  65. gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +81 -0
  66. gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +32 -0
  67. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.cpp +154 -1
  68. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.hpp +10 -0
  69. gpufl-0.1.2/include/gpufl/backends/nvidia/resource_handler.cpp +151 -0
  70. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/resource_handler.hpp +15 -0
  71. gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +56 -0
  72. gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +19 -0
  73. gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.cpp +149 -0
  74. gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.hpp +60 -0
  75. gpufl-0.1.2/include/gpufl/core/activity_record.hpp +141 -0
  76. gpufl-0.1.2/include/gpufl/core/backend_factory.cpp +139 -0
  77. gpufl-0.1.2/include/gpufl/core/backend_factory.hpp +13 -0
  78. gpufl-0.1.2/include/gpufl/core/backend_interfaces.hpp +31 -0
  79. gpufl-0.1.2/include/gpufl/core/batch_buffer.hpp +23 -0
  80. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/common.hpp +2 -0
  81. gpufl-0.1.2/include/gpufl/core/config_file_loader.cpp +51 -0
  82. gpufl-0.1.2/include/gpufl/core/config_file_loader.hpp +18 -0
  83. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/debug_logger.hpp +1 -1
  84. gpufl-0.1.2/include/gpufl/core/dictionary_manager.cpp +575 -0
  85. gpufl-0.1.2/include/gpufl/core/dictionary_manager.hpp +138 -0
  86. gpufl-0.1.2/include/gpufl/core/events.hpp +601 -0
  87. gpufl-0.1.2/include/gpufl/core/gpufl.cpp +699 -0
  88. gpufl-0.1.2/include/gpufl/core/host_info.cpp +131 -0
  89. gpufl-0.1.2/include/gpufl/core/host_info.hpp +30 -0
  90. gpufl-0.1.2/include/gpufl/core/itanium_demangle.cpp +543 -0
  91. gpufl-0.1.2/include/gpufl/core/itanium_demangle.hpp +43 -0
  92. gpufl-0.1.2/include/gpufl/core/json/json.cpp +369 -0
  93. gpufl-0.1.2/include/gpufl/core/json/json.hpp +155 -0
  94. gpufl-0.1.2/include/gpufl/core/logger/file_compressor.cpp +44 -0
  95. gpufl-0.1.2/include/gpufl/core/logger/file_compressor.hpp +18 -0
  96. gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.cpp +151 -0
  97. gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.hpp +82 -0
  98. gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.cpp +408 -0
  99. gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.hpp +181 -0
  100. gpufl-0.1.2/include/gpufl/core/logger/log_rotator.cpp +65 -0
  101. gpufl-0.1.2/include/gpufl/core/logger/log_rotator.hpp +32 -0
  102. gpufl-0.1.2/include/gpufl/core/logger/log_sink.hpp +53 -0
  103. gpufl-0.1.2/include/gpufl/core/logger/logger.cpp +47 -0
  104. gpufl-0.1.2/include/gpufl/core/logger/logger.hpp +76 -0
  105. gpufl-0.1.2/include/gpufl/core/model/batch_models.cpp +316 -0
  106. gpufl-0.1.2/include/gpufl/core/model/batch_models.hpp +167 -0
  107. gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.cpp +37 -0
  108. gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.hpp +23 -0
  109. gpufl-0.1.2/include/gpufl/core/model/kernel_event_model.cpp +59 -0
  110. gpufl-0.1.2/include/gpufl/core/model/kernel_event_model.hpp +16 -0
  111. gpufl-0.1.2/include/gpufl/core/model/lifecycle_model.cpp +83 -0
  112. gpufl-0.1.2/include/gpufl/core/model/lifecycle_model.hpp +32 -0
  113. gpufl-0.1.2/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
  114. gpufl-0.1.2/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
  115. gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.cpp +42 -0
  116. gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.hpp +28 -0
  117. gpufl-0.1.2/include/gpufl/core/model/model_utils.hpp +109 -0
  118. gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.cpp +25 -0
  119. gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.hpp +22 -0
  120. gpufl-0.1.2/include/gpufl/core/model/perf_metric_model.cpp +33 -0
  121. gpufl-0.1.2/include/gpufl/core/model/perf_metric_model.hpp +16 -0
  122. gpufl-0.1.2/include/gpufl/core/model/profile_sample_model.cpp +40 -0
  123. gpufl-0.1.2/include/gpufl/core/model/profile_sample_model.hpp +16 -0
  124. gpufl-0.1.2/include/gpufl/core/model/scope_event_model.cpp +43 -0
  125. gpufl-0.1.2/include/gpufl/core/model/scope_event_model.hpp +24 -0
  126. gpufl-0.1.2/include/gpufl/core/model/serializable.hpp +15 -0
  127. gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.cpp +38 -0
  128. gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.hpp +30 -0
  129. gpufl-0.1.2/include/gpufl/core/model/system_event_model.cpp +51 -0
  130. gpufl-0.1.2/include/gpufl/core/model/system_event_model.hpp +32 -0
  131. gpufl-0.1.2/include/gpufl/core/monitor.cpp +594 -0
  132. gpufl-0.1.2/include/gpufl/core/monitor.hpp +204 -0
  133. gpufl-0.1.2/include/gpufl/core/monitor_adapter.cpp +41 -0
  134. gpufl-0.1.2/include/gpufl/core/monitor_adapter.hpp +31 -0
  135. gpufl-0.1.2/include/gpufl/core/monitor_backend.hpp +76 -0
  136. gpufl-0.1.2/include/gpufl/core/remote_config.cpp +279 -0
  137. gpufl-0.1.2/include/gpufl/core/remote_config.hpp +60 -0
  138. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/ring_buffer.hpp +27 -6
  139. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/runtime.hpp +3 -1
  140. gpufl-0.1.2/include/gpufl/core/sampler.cpp +131 -0
  141. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/sampler.hpp +14 -2
  142. gpufl-0.1.2/include/gpufl/core/sass_compressor.cpp +109 -0
  143. gpufl-0.1.2/include/gpufl/core/sass_compressor.hpp +52 -0
  144. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/stack_trace.cpp +39 -12
  145. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/stack_trace.hpp +7 -0
  146. gpufl-0.1.2/include/gpufl/core/stream_handle.hpp +9 -0
  147. gpufl-0.1.2/include/gpufl/core/trace_type.hpp +89 -0
  148. gpufl-0.1.2/include/gpufl/core/version.hpp +63 -0
  149. gpufl-0.1.2/include/gpufl/gpufl.hpp +240 -0
  150. gpufl-0.1.2/include/gpufl/report/hint_engine.cpp +91 -0
  151. gpufl-0.1.2/include/gpufl/report/hint_engine.hpp +28 -0
  152. gpufl-0.1.2/include/gpufl/report/text_report.cpp +1127 -0
  153. gpufl-0.1.2/include/gpufl/report/text_report.hpp +176 -0
  154. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/pyproject.toml +23 -1
  155. gpufl-0.1.2/python/bindings.cpp +205 -0
  156. gpufl-0.1.2/python/gpufl/__init__.py +227 -0
  157. gpufl-0.1.2/python/gpufl/analyzer/analyzer.py +1153 -0
  158. gpufl-0.1.2/python/gpufl/cupy/__init__.py +69 -0
  159. gpufl-0.1.2/python/gpufl/jax/__init__.py +68 -0
  160. gpufl-0.1.2/python/gpufl/numba/__init__.py +58 -0
  161. gpufl-0.1.2/python/gpufl/report/__init__.py +1 -0
  162. gpufl-0.1.2/python/gpufl/report/text_report.py +516 -0
  163. gpufl-0.1.2/python/gpufl/torch/__init__.py +59 -0
  164. gpufl-0.1.2/python/gpufl/torch/dispatch.py +184 -0
  165. gpufl-0.1.2/python/gpufl/torch/profile.py +76 -0
  166. gpufl-0.1.2/python/gpufl/torch/stack.py +62 -0
  167. gpufl-0.1.2/python/gpufl/torch/trace_import.py +125 -0
  168. gpufl-0.1.2/python/gpufl/triton/__init__.py +64 -0
  169. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/timeline.py +10 -12
  170. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/visualizer.py +1 -1
  171. gpufl-0.1.2/scripts/docker-demo-loop.sh +17 -0
  172. gpufl-0.1.2/scripts/windows/run-monitor-local.bat +20 -0
  173. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/CMakeLists.txt +62 -9
  174. gpufl-0.1.2/tests/backends/amd/test_rocm_collector.cpp +91 -0
  175. gpufl-0.1.2/tests/backends/nvidia/test_engine_coverage.cpp +294 -0
  176. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/backends/nvidia/test_nvidia_backend.cpp +11 -6
  177. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/backends/nvidia/test_nvml_collector.cpp +4 -4
  178. gpufl-0.1.2/tests/common/log_utils.cpp +161 -0
  179. gpufl-0.1.2/tests/common/log_utils.hpp +61 -0
  180. gpufl-0.1.2/tests/common/test_kernel.cu +45 -0
  181. gpufl-0.1.2/tests/common/test_kernel.hpp +22 -0
  182. gpufl-0.1.2/tests/common/test_utils.hpp +55 -0
  183. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/core/test_analyzer.cpp +1 -1
  184. gpufl-0.1.2/tests/core/test_api_path_routing.cpp +213 -0
  185. gpufl-0.1.2/tests/core/test_batch_models.cpp +144 -0
  186. gpufl-0.1.2/tests/core/test_http_log_sink.cpp +300 -0
  187. gpufl-0.1.2/tests/core/test_itanium_demangle.cpp +146 -0
  188. gpufl-0.1.2/tests/core/test_wire_contract.cpp +394 -0
  189. gpufl-0.1.2/tests/python/conftest.py +223 -0
  190. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/python/test_analyzer.py +32 -4
  191. gpufl-0.1.2/tests/python/test_bindings.py +188 -0
  192. gpufl-0.1.2/tests/python/test_remote_upload_smoke.py +185 -0
  193. gpufl-0.1.2/tests/run_engine_coverage.ps1 +86 -0
  194. gpufl-0.1.2/tests/run_engine_coverage.sh +83 -0
  195. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/verify_pipeline.py +15 -5
  196. gpufl-0.1.0.dev0/.github/workflows/release.yml +0 -71
  197. gpufl-0.1.0.dev0/CMakeLists.txt +0 -277
  198. gpufl-0.1.0.dev0/PKG-INFO +0 -192
  199. gpufl-0.1.0.dev0/README.md +0 -167
  200. gpufl-0.1.0.dev0/example/cuda/test_sass_cubin.cu +0 -164
  201. gpufl-0.1.0.dev0/example/cuda/test_sass_metrics.cu +0 -85
  202. gpufl-0.1.0.dev0/example/python/03_pytorch_benchmark.py +0 -75
  203. gpufl-0.1.0.dev0/example/python/analyzer/01_analyzer_sample.py +0 -10
  204. gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.cpp +0 -10
  205. gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.hpp +0 -18
  206. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.cpp +0 -806
  207. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.hpp +0 -164
  208. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.cpp +0 -73
  209. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.hpp +0 -37
  210. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +0 -282
  211. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/resource_handler.cpp +0 -63
  212. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +0 -222
  213. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +0 -42
  214. gpufl-0.1.0.dev0/include/gpufl/core/events.hpp +0 -253
  215. gpufl-0.1.0.dev0/include/gpufl/core/gpufl.cpp +0 -365
  216. gpufl-0.1.0.dev0/include/gpufl/core/logger.cpp +0 -437
  217. gpufl-0.1.0.dev0/include/gpufl/core/logger.hpp +0 -88
  218. gpufl-0.1.0.dev0/include/gpufl/core/monitor.hpp +0 -100
  219. gpufl-0.1.0.dev0/include/gpufl/core/monitor_backend.hpp +0 -46
  220. gpufl-0.1.0.dev0/include/gpufl/core/sampler.cpp +0 -73
  221. gpufl-0.1.0.dev0/include/gpufl/core/trace_type.hpp +0 -13
  222. gpufl-0.1.0.dev0/include/gpufl/cuda/monitor.cpp +0 -380
  223. gpufl-0.1.0.dev0/include/gpufl/gpufl.hpp +0 -80
  224. gpufl-0.1.0.dev0/python/bindings.cpp +0 -103
  225. gpufl-0.1.0.dev0/python/gpufl/__init__.py +0 -83
  226. gpufl-0.1.0.dev0/python/gpufl/analyzer/analyzer.py +0 -359
  227. gpufl-0.1.0.dev0/schema/ndjson.schema.json +0 -133
  228. gpufl-0.1.0.dev0/tests/common/test_utils.hpp +0 -31
  229. gpufl-0.1.0.dev0/tests/python/conftest.py +0 -69
  230. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.clang-format +0 -0
  231. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.github/pull_request_template.md +0 -0
  232. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.github/workflows/build.yml +0 -0
  233. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/CONTRIBUTING.md +0 -0
  234. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/LICENSE +0 -0
  235. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/build.sh +0 -0
  236. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/check_conflict.cu +0 -0
  237. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/check_device.cu +0 -0
  238. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/cupti_pc_sampling.cu +0 -0
  239. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/list_sass_metrics.cu +0 -0
  240. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/occupancy_demo.cu +0 -0
  241. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/system_monitor.cu +0 -0
  242. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/test_occupancy.cu +0 -0
  243. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/01_basic.py +0 -0
  244. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/02_numba_cuda.py +0 -0
  245. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/requirements.txt +0 -0
  246. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/viz/01_plot_memory_timeline.py +0 -0
  247. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/viz/02_plot_stress_timeline.py +0 -0
  248. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/images/Screenshot1.png +0 -0
  249. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/common.cpp +0 -0
  250. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/debug_logger.cpp +0 -0
  251. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/runtime.cpp +0 -0
  252. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/scope_registry.cpp +0 -0
  253. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/scope_registry.hpp +0 -0
  254. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/stack_registry.hpp +0 -0
  255. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl.hpp +0 -0
  256. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/.gitignore +0 -0
  257. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/analyzer/__init__.py +0 -0
  258. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/utils.py +0 -0
  259. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/__init__.py +0 -0
  260. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/reader.py +0 -0
  261. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/backends/nvidia/test_cuda_collector.cpp +1 -1
  262. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/core/test_monitor.cpp +0 -0
  263. {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/main_test_runner.cpp +0 -0
@@ -0,0 +1,18 @@
1
+ # Python / notebooks — not needed for the C++ daemon build
2
+ python/
3
+ example/python/
4
+ **/.Trash-*
5
+ **/__pycache__/
6
+ **/*.pyc
7
+
8
+ # Build artifacts
9
+ cmake-build-*/
10
+ build/
11
+ *.o
12
+ *.a
13
+
14
+ # Dev / IDE
15
+ .git/
16
+ .idea/
17
+ .vscode/
18
+ *.md
@@ -0,0 +1,253 @@
1
+ name: Build and Release Wheels
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set package version from tag
21
+ if: startsWith(github.ref, 'refs/tags/v')
22
+ shell: python
23
+ run: |
24
+ import os
25
+ import re
26
+ from pathlib import Path
27
+
28
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
29
+ if not ref_name.startswith("v"):
30
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
31
+ version = ref_name[1:]
32
+ print(f"Using version from tag: {version}")
33
+
34
+ pyproject = Path("pyproject.toml")
35
+ text = pyproject.read_text(encoding="utf-8")
36
+ text_new, n = re.subn(
37
+ r'(?m)^version\s*=\s*"[^\"]+"$',
38
+ f'version = "{version}"',
39
+ text,
40
+ count=1,
41
+ )
42
+ if n != 1:
43
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
44
+ pyproject.write_text(text_new, encoding="utf-8")
45
+
46
+ init_py = Path("python/gpufl/__init__.py")
47
+ if init_py.exists():
48
+ init_text = init_py.read_text(encoding="utf-8")
49
+ init_new, _ = re.subn(
50
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
51
+ f'__version__ = "{version}"',
52
+ init_text,
53
+ )
54
+ init_py.write_text(init_new, encoding="utf-8")
55
+
56
+ # Keep the C++ side in lockstep. The CMake project() VERSION is the
57
+ # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
58
+ # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
59
+ # this, release wheels would ship the tag version in Python metadata
60
+ # but a stale hardcoded version in the compiled client. CMake's
61
+ # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
62
+ # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
63
+ # full version still lands in the wheel metadata above.
64
+ m = re.match(r"\d+(?:\.\d+){0,3}", version)
65
+ cmake_version = m.group(0) if m else version
66
+ cmakelists = Path("CMakeLists.txt")
67
+ cm_text = cmakelists.read_text(encoding="utf-8")
68
+ cm_new, cm_n = re.subn(
69
+ r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
70
+ rf'\g<1>{cmake_version}',
71
+ cm_text,
72
+ count=1,
73
+ flags=re.DOTALL,
74
+ )
75
+ if cm_n != 1:
76
+ raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
77
+ cmakelists.write_text(cm_new, encoding="utf-8")
78
+
79
+ - name: Cache cibuildwheel downloads
80
+ uses: actions/cache@v4
81
+ with:
82
+ path: |
83
+ ~/.cache/cibuildwheel
84
+ ~/AppData/Local/pypa/cibuildwheel/Cache
85
+ key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
86
+ restore-keys: |
87
+ cibw-${{ runner.os }}-
88
+
89
+ - name: Install CUDA (Windows)
90
+ if: runner.os == 'Windows'
91
+ uses: Jimver/cuda-toolkit@v0.2.30
92
+ with:
93
+ cuda: '13.1.0'
94
+ method: 'network'
95
+
96
+ - name: Prefetch virtualenv.pyz (Windows)
97
+ if: runner.os == 'Windows'
98
+ shell: pwsh
99
+ run: |
100
+ $version = "20.27.1"
101
+ $cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
102
+ New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
103
+ $dest = Join-Path $cacheDir "virtualenv-$version.pyz"
104
+ if (Test-Path $dest) {
105
+ Write-Host "virtualenv.pyz already cached: $dest"
106
+ exit 0
107
+ }
108
+ $urls = @(
109
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
110
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
111
+ "https://bootstrap.pypa.io/virtualenv.pyz"
112
+ )
113
+ $max = 6
114
+ $ok = $false
115
+ foreach ($url in $urls) {
116
+ for ($i = 1; $i -le $max; $i++) {
117
+ try {
118
+ Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
119
+ Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
120
+ if ((Get-Item $dest).Length -gt 0) {
121
+ Write-Host "Downloaded: $dest"
122
+ $ok = $true
123
+ break
124
+ }
125
+ } catch {
126
+ if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
127
+ if ($i -eq $max) { break }
128
+ Start-Sleep -Seconds (5 * $i)
129
+ }
130
+ }
131
+ if ($ok) { break }
132
+ }
133
+ if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
134
+
135
+ - name: Build wheels
136
+ uses: pypa/cibuildwheel@v2.22.0
137
+ env:
138
+ CIBW_VIRTUALENV_VERSION: "20.27.1"
139
+ CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
140
+ # cuda-nvml-devel-13-1 ships the libnvidia-ml.so stub under
141
+ # targets/x86_64-linux/lib/stubs/ — without it CMake's NVML probe
142
+ # finds nothing and (since v0.1.1) fails the build loudly. Every
143
+ # release before v0.1.1 silently shipped wheels without NVML
144
+ # because this package was missing here.
145
+ CIBW_BEFORE_ALL_LINUX: >-
146
+ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
147
+ dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1 cuda-nvml-devel-13-1
148
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
149
+ CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
150
+ # libnvidia-ml.so.1 is excluded for the same reason as libcuda.so.1:
151
+ # it ships with the NVIDIA driver, not the CUDA toolkit, and is
152
+ # not present in the manylinux build container. Auditwheel
153
+ # locates every DT_NEEDED entry on disk before deciding whether
154
+ # to bundle, so an un-excluded NVML reference fails the build
155
+ # ("Cannot repair wheel, because required library libnvidia-ml.so.1
156
+ # could not be located"). The toolkit's `libnvidia-ml.so` stub is
157
+ # only the unversioned link-time placeholder — the versioned
158
+ # `.so.1` the SONAME chains to lives on the user's machine.
159
+ CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 --exclude libnvidia-ml.so.1 -w {dest_dir} {wheel}"
160
+
161
+ - uses: actions/upload-artifact@v4
162
+ with:
163
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
164
+ path: ./wheelhouse/*.whl
165
+
166
+ build_sdist:
167
+ name: Build source distribution
168
+ runs-on: ubuntu-latest
169
+ steps:
170
+ - uses: actions/checkout@v4
171
+
172
+ - name: Set package version from tag
173
+ if: startsWith(github.ref, 'refs/tags/v')
174
+ shell: python
175
+ run: |
176
+ import os
177
+ import re
178
+ from pathlib import Path
179
+
180
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
181
+ if not ref_name.startswith("v"):
182
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
183
+ version = ref_name[1:]
184
+ print(f"Using version from tag: {version}")
185
+
186
+ pyproject = Path("pyproject.toml")
187
+ text = pyproject.read_text(encoding="utf-8")
188
+ text_new, n = re.subn(
189
+ r'(?m)^version\s*=\s*"[^\"]+"$',
190
+ f'version = "{version}"',
191
+ text,
192
+ count=1,
193
+ )
194
+ if n != 1:
195
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
196
+ pyproject.write_text(text_new, encoding="utf-8")
197
+
198
+ init_py = Path("python/gpufl/__init__.py")
199
+ if init_py.exists():
200
+ init_text = init_py.read_text(encoding="utf-8")
201
+ init_new, _ = re.subn(
202
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
203
+ f'__version__ = "{version}"',
204
+ init_text,
205
+ )
206
+ init_py.write_text(init_new, encoding="utf-8")
207
+
208
+ # Keep the C++ side in lockstep. The CMake project() VERSION is the
209
+ # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
210
+ # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
211
+ # this, release wheels would ship the tag version in Python metadata
212
+ # but a stale hardcoded version in the compiled client. CMake's
213
+ # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
214
+ # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
215
+ # full version still lands in the wheel metadata above.
216
+ m = re.match(r"\d+(?:\.\d+){0,3}", version)
217
+ cmake_version = m.group(0) if m else version
218
+ cmakelists = Path("CMakeLists.txt")
219
+ cm_text = cmakelists.read_text(encoding="utf-8")
220
+ cm_new, cm_n = re.subn(
221
+ r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
222
+ rf'\g<1>{cmake_version}',
223
+ cm_text,
224
+ count=1,
225
+ flags=re.DOTALL,
226
+ )
227
+ if cm_n != 1:
228
+ raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
229
+ cmakelists.write_text(cm_new, encoding="utf-8")
230
+
231
+ - name: Build sdist
232
+ run: pipx run build --sdist
233
+
234
+ - uses: actions/upload-artifact@v4
235
+ with:
236
+ name: cibw-sdist
237
+ path: dist/*.tar.gz
238
+
239
+ upload_pypi:
240
+ needs: [build_wheels, build_sdist]
241
+ runs-on: ubuntu-latest
242
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
243
+ steps:
244
+ - uses: actions/download-artifact@v4
245
+ with:
246
+ pattern: cibw-*
247
+ path: dist
248
+ merge-multiple: true
249
+
250
+ - name: Publish to PyPI
251
+ uses: pypa/gh-action-pypi-publish@release/v1
252
+ with:
253
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -1,8 +1,19 @@
1
+ ### claude
2
+ .claude/
3
+
1
4
  ### idea
2
5
  .idea/**
3
6
  build/
7
+ build-*/
8
+ build_tests/
4
9
  cmake-build-*/
5
10
  cmake/
11
+ CMakeFiles/
12
+ CMakeCache.txt
13
+ wget-log*
14
+
15
+ ### docker
16
+ example/python/docker/**/
6
17
 
7
18
  ### C++ template
8
19
  # Prerequisites
@@ -77,4 +88,4 @@ cmake/
77
88
  *.x86_64
78
89
  *.hex
79
90
 
80
- *.log
91
+ *.log