gpufl 0.1.0.dev7__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. gpufl-0.1.2/.dockerignore +18 -0
  2. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.github/workflows/release.yml +62 -2
  3. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.gitignore +12 -1
  4. gpufl-0.1.2/CMakeLists.txt +625 -0
  5. gpufl-0.1.2/Dockerfile.demo +42 -0
  6. gpufl-0.1.2/Dockerfile.monitor +85 -0
  7. gpufl-0.1.2/Dockerfile.monitor.amd +94 -0
  8. gpufl-0.1.2/Dockerfile.monitor.supervisord.conf +27 -0
  9. gpufl-0.1.2/PKG-INFO +349 -0
  10. gpufl-0.1.2/README.md +304 -0
  11. gpufl-0.1.2/benchmark/README.md +71 -0
  12. gpufl-0.1.2/benchmark/cuda_gemm.py +44 -0
  13. gpufl-0.1.2/benchmark/pytorch_train.py +145 -0
  14. gpufl-0.1.2/benchmark/run_benchmark.py +263 -0
  15. gpufl-0.1.2/daemon/README.md +252 -0
  16. gpufl-0.1.2/daemon/monitor/CMakeLists.txt +44 -0
  17. gpufl-0.1.2/daemon/monitor/main.cpp +105 -0
  18. gpufl-0.1.2/docker-compose.monitor.amd.yml +43 -0
  19. gpufl-0.1.2/docker-compose.monitor.yml +71 -0
  20. gpufl-0.1.2/example/amd/CMakeLists.txt +71 -0
  21. gpufl-0.1.2/example/amd/README.md +139 -0
  22. gpufl-0.1.2/example/amd/check_device.cpp +31 -0
  23. gpufl-0.1.2/example/amd/gpufl_scope_demo.cpp +240 -0
  24. gpufl-0.1.2/example/amd/vector_add_benchmark.cpp +137 -0
  25. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/CMakeLists.txt +111 -87
  26. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/block_style_example.cu +11 -10
  27. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/cupti_basic.cu +73 -53
  28. gpufl-0.1.2/example/cuda/memory_coalescing_demo.cu +134 -0
  29. gpufl-0.1.2/example/cuda/sass_divergence_demo.cu +270 -0
  30. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/vector_add_benchmark.cu +23 -0
  31. gpufl-0.1.2/example/python/03_pytorch_benchmark.py +149 -0
  32. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/analyzer/01_analyzer_sample.py +2 -2
  33. gpufl-0.1.2/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +42 -0
  34. gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +282 -0
  35. gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +65 -0
  36. gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.cpp +91 -0
  37. gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.hpp +20 -0
  38. gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.cpp +56 -0
  39. gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.hpp +30 -0
  40. gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.cpp +522 -0
  41. gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.hpp +37 -0
  42. gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.cpp +799 -0
  43. gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.hpp +144 -0
  44. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/host_collector.hpp +2 -2
  45. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.cpp +5 -4
  46. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.hpp +2 -2
  47. gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_backend.cpp +1218 -0
  48. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_backend.hpp +44 -1
  49. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_common.hpp +2 -73
  50. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_utils.cpp +32 -14
  51. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_utils.hpp +23 -1
  52. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +695 -0
  53. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +30 -2
  54. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +70 -0
  55. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +65 -0
  56. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +30 -0
  57. gpufl-0.1.2/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +421 -0
  58. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +17 -0
  59. gpufl-0.1.2/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +483 -0
  60. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +10 -1
  61. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +72 -12
  62. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +2 -1
  63. gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +81 -0
  64. gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +32 -0
  65. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.cpp +154 -1
  66. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.hpp +10 -0
  67. gpufl-0.1.2/include/gpufl/backends/nvidia/resource_handler.cpp +151 -0
  68. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/resource_handler.hpp +15 -0
  69. gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +56 -0
  70. gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +19 -0
  71. gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.cpp +149 -0
  72. gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.hpp +60 -0
  73. gpufl-0.1.2/include/gpufl/core/activity_record.hpp +141 -0
  74. gpufl-0.1.2/include/gpufl/core/backend_factory.cpp +139 -0
  75. gpufl-0.1.2/include/gpufl/core/backend_factory.hpp +13 -0
  76. gpufl-0.1.2/include/gpufl/core/backend_interfaces.hpp +31 -0
  77. gpufl-0.1.2/include/gpufl/core/batch_buffer.hpp +23 -0
  78. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/common.hpp +2 -0
  79. gpufl-0.1.2/include/gpufl/core/config_file_loader.cpp +51 -0
  80. gpufl-0.1.2/include/gpufl/core/config_file_loader.hpp +18 -0
  81. gpufl-0.1.2/include/gpufl/core/dictionary_manager.cpp +575 -0
  82. gpufl-0.1.2/include/gpufl/core/dictionary_manager.hpp +138 -0
  83. gpufl-0.1.2/include/gpufl/core/events.hpp +601 -0
  84. gpufl-0.1.2/include/gpufl/core/gpufl.cpp +699 -0
  85. gpufl-0.1.2/include/gpufl/core/host_info.cpp +131 -0
  86. gpufl-0.1.2/include/gpufl/core/host_info.hpp +30 -0
  87. gpufl-0.1.2/include/gpufl/core/itanium_demangle.cpp +543 -0
  88. gpufl-0.1.2/include/gpufl/core/itanium_demangle.hpp +43 -0
  89. gpufl-0.1.2/include/gpufl/core/json/json.cpp +369 -0
  90. gpufl-0.1.2/include/gpufl/core/json/json.hpp +155 -0
  91. gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.cpp → gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.cpp +30 -31
  92. gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.hpp +82 -0
  93. gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.cpp +408 -0
  94. gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.hpp +181 -0
  95. gpufl-0.1.2/include/gpufl/core/logger/log_sink.hpp +53 -0
  96. gpufl-0.1.2/include/gpufl/core/logger/logger.cpp +47 -0
  97. gpufl-0.1.2/include/gpufl/core/logger/logger.hpp +76 -0
  98. gpufl-0.1.2/include/gpufl/core/model/batch_models.cpp +316 -0
  99. gpufl-0.1.2/include/gpufl/core/model/batch_models.hpp +167 -0
  100. gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.cpp +37 -0
  101. gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.hpp +23 -0
  102. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/kernel_event_model.cpp +13 -5
  103. gpufl-0.1.2/include/gpufl/core/model/lifecycle_model.cpp +83 -0
  104. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/lifecycle_model.hpp +8 -0
  105. gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.cpp +42 -0
  106. gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.hpp +28 -0
  107. gpufl-0.1.2/include/gpufl/core/model/model_utils.hpp +109 -0
  108. gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.cpp +25 -0
  109. gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.hpp +22 -0
  110. gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.cpp +38 -0
  111. gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.hpp +30 -0
  112. gpufl-0.1.2/include/gpufl/core/monitor.cpp +594 -0
  113. gpufl-0.1.2/include/gpufl/core/monitor.hpp +204 -0
  114. gpufl-0.1.2/include/gpufl/core/monitor_adapter.cpp +41 -0
  115. gpufl-0.1.2/include/gpufl/core/monitor_adapter.hpp +31 -0
  116. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/monitor_backend.hpp +23 -0
  117. gpufl-0.1.2/include/gpufl/core/remote_config.cpp +279 -0
  118. gpufl-0.1.2/include/gpufl/core/remote_config.hpp +60 -0
  119. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/ring_buffer.hpp +27 -6
  120. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/runtime.hpp +3 -1
  121. gpufl-0.1.2/include/gpufl/core/sampler.cpp +131 -0
  122. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/sampler.hpp +14 -2
  123. gpufl-0.1.2/include/gpufl/core/sass_compressor.cpp +109 -0
  124. gpufl-0.1.2/include/gpufl/core/sass_compressor.hpp +52 -0
  125. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/stack_trace.cpp +39 -12
  126. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/stack_trace.hpp +7 -0
  127. gpufl-0.1.2/include/gpufl/core/stream_handle.hpp +9 -0
  128. gpufl-0.1.2/include/gpufl/core/trace_type.hpp +89 -0
  129. gpufl-0.1.2/include/gpufl/core/version.hpp +63 -0
  130. gpufl-0.1.2/include/gpufl/gpufl.hpp +240 -0
  131. gpufl-0.1.2/include/gpufl/report/hint_engine.cpp +91 -0
  132. gpufl-0.1.2/include/gpufl/report/hint_engine.hpp +28 -0
  133. gpufl-0.1.2/include/gpufl/report/text_report.cpp +1127 -0
  134. gpufl-0.1.2/include/gpufl/report/text_report.hpp +176 -0
  135. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/pyproject.toml +23 -1
  136. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/bindings.cpp +84 -8
  137. gpufl-0.1.2/python/gpufl/__init__.py +227 -0
  138. gpufl-0.1.2/python/gpufl/analyzer/analyzer.py +1153 -0
  139. gpufl-0.1.2/python/gpufl/cupy/__init__.py +69 -0
  140. gpufl-0.1.2/python/gpufl/jax/__init__.py +68 -0
  141. gpufl-0.1.2/python/gpufl/numba/__init__.py +58 -0
  142. gpufl-0.1.2/python/gpufl/report/__init__.py +1 -0
  143. gpufl-0.1.2/python/gpufl/report/text_report.py +516 -0
  144. gpufl-0.1.2/python/gpufl/torch/__init__.py +59 -0
  145. gpufl-0.1.2/python/gpufl/torch/dispatch.py +184 -0
  146. gpufl-0.1.2/python/gpufl/torch/profile.py +76 -0
  147. gpufl-0.1.2/python/gpufl/torch/stack.py +62 -0
  148. gpufl-0.1.2/python/gpufl/torch/trace_import.py +125 -0
  149. gpufl-0.1.2/python/gpufl/triton/__init__.py +64 -0
  150. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/timeline.py +10 -12
  151. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/visualizer.py +1 -1
  152. gpufl-0.1.2/scripts/docker-demo-loop.sh +17 -0
  153. gpufl-0.1.2/scripts/windows/run-monitor-local.bat +20 -0
  154. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/CMakeLists.txt +62 -9
  155. gpufl-0.1.2/tests/backends/amd/test_rocm_collector.cpp +91 -0
  156. gpufl-0.1.2/tests/backends/nvidia/test_engine_coverage.cpp +294 -0
  157. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/backends/nvidia/test_nvidia_backend.cpp +10 -5
  158. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/backends/nvidia/test_nvml_collector.cpp +4 -4
  159. gpufl-0.1.2/tests/common/log_utils.cpp +161 -0
  160. gpufl-0.1.2/tests/common/log_utils.hpp +61 -0
  161. gpufl-0.1.2/tests/common/test_kernel.cu +45 -0
  162. gpufl-0.1.2/tests/common/test_kernel.hpp +22 -0
  163. gpufl-0.1.2/tests/common/test_utils.hpp +55 -0
  164. gpufl-0.1.2/tests/core/test_api_path_routing.cpp +213 -0
  165. gpufl-0.1.2/tests/core/test_batch_models.cpp +144 -0
  166. gpufl-0.1.2/tests/core/test_http_log_sink.cpp +300 -0
  167. gpufl-0.1.2/tests/core/test_itanium_demangle.cpp +146 -0
  168. gpufl-0.1.2/tests/core/test_wire_contract.cpp +394 -0
  169. gpufl-0.1.2/tests/python/conftest.py +223 -0
  170. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/python/test_analyzer.py +32 -4
  171. gpufl-0.1.2/tests/python/test_bindings.py +188 -0
  172. gpufl-0.1.2/tests/python/test_remote_upload_smoke.py +185 -0
  173. gpufl-0.1.2/tests/run_engine_coverage.ps1 +86 -0
  174. gpufl-0.1.2/tests/run_engine_coverage.sh +83 -0
  175. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/verify_pipeline.py +5 -3
  176. gpufl-0.1.0.dev7/CMakeLists.txt +0 -351
  177. gpufl-0.1.0.dev7/PKG-INFO +0 -192
  178. gpufl-0.1.0.dev7/README.md +0 -167
  179. gpufl-0.1.0.dev7/example/cuda/test_sass_cubin.cu +0 -164
  180. gpufl-0.1.0.dev7/example/cuda/test_sass_metrics.cu +0 -85
  181. gpufl-0.1.0.dev7/example/python/03_pytorch_benchmark.py +0 -75
  182. gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.cpp +0 -10
  183. gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.hpp +0 -18
  184. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.cpp +0 -316
  185. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +0 -395
  186. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +0 -221
  187. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +0 -327
  188. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/resource_handler.cpp +0 -62
  189. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +0 -222
  190. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +0 -42
  191. gpufl-0.1.0.dev7/include/gpufl/core/events.hpp +0 -274
  192. gpufl-0.1.0.dev7/include/gpufl/core/gpufl.cpp +0 -398
  193. gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.hpp +0 -70
  194. gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.cpp +0 -34
  195. gpufl-0.1.0.dev7/include/gpufl/core/model/model_utils.hpp +0 -94
  196. gpufl-0.1.0.dev7/include/gpufl/core/monitor.hpp +0 -95
  197. gpufl-0.1.0.dev7/include/gpufl/core/sampler.cpp +0 -74
  198. gpufl-0.1.0.dev7/include/gpufl/core/trace_type.hpp +0 -13
  199. gpufl-0.1.0.dev7/include/gpufl/cuda/monitor.cpp +0 -405
  200. gpufl-0.1.0.dev7/include/gpufl/gpufl.hpp +0 -83
  201. gpufl-0.1.0.dev7/python/gpufl/__init__.py +0 -89
  202. gpufl-0.1.0.dev7/python/gpufl/analyzer/analyzer.py +0 -721
  203. gpufl-0.1.0.dev7/schema/ndjson.schema.json +0 -133
  204. gpufl-0.1.0.dev7/tests/common/test_utils.hpp +0 -31
  205. gpufl-0.1.0.dev7/tests/python/conftest.py +0 -69
  206. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.clang-format +0 -0
  207. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.github/pull_request_template.md +0 -0
  208. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.github/workflows/build.yml +0 -0
  209. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/CONTRIBUTING.md +0 -0
  210. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/LICENSE +0 -0
  211. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/build.sh +0 -0
  212. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/check_conflict.cu +0 -0
  213. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/check_device.cu +0 -0
  214. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/cupti_pc_sampling.cu +0 -0
  215. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/list_sass_metrics.cu +0 -0
  216. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/occupancy_demo.cu +0 -0
  217. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/system_monitor.cu +0 -0
  218. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/test_occupancy.cu +0 -0
  219. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/01_basic.py +0 -0
  220. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/02_numba_cuda.py +0 -0
  221. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/requirements.txt +0 -0
  222. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/viz/01_plot_memory_timeline.py +0 -0
  223. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/viz/02_plot_stress_timeline.py +0 -0
  224. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/images/Screenshot1.png +0 -0
  225. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +0 -0
  226. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +0 -0
  227. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/common.cpp +0 -0
  228. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/debug_logger.cpp +0 -0
  229. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/debug_logger.hpp +0 -0
  230. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/file_compressor.cpp +0 -0
  231. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/file_compressor.hpp +0 -0
  232. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/log_rotator.cpp +0 -0
  233. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/log_rotator.hpp +0 -0
  234. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/kernel_event_model.hpp +0 -0
  235. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/memcpy_event_model.cpp +0 -0
  236. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/memcpy_event_model.hpp +0 -0
  237. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/perf_metric_model.cpp +0 -0
  238. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/perf_metric_model.hpp +0 -0
  239. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/profile_sample_model.cpp +0 -0
  240. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/profile_sample_model.hpp +0 -0
  241. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/scope_event_model.cpp +0 -0
  242. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/scope_event_model.hpp +0 -0
  243. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/serializable.hpp +0 -0
  244. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/system_event_model.cpp +0 -0
  245. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/system_event_model.hpp +0 -0
  246. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/runtime.cpp +0 -0
  247. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/scope_registry.cpp +0 -0
  248. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/scope_registry.hpp +0 -0
  249. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/stack_registry.hpp +0 -0
  250. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl.hpp +0 -0
  251. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/.gitignore +0 -0
  252. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/analyzer/__init__.py +0 -0
  253. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/utils.py +0 -0
  254. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/__init__.py +0 -0
  255. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/reader.py +0 -0
  256. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/backends/nvidia/test_cuda_collector.cpp +1 -1
  257. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/core/test_analyzer.cpp +0 -0
  258. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/core/test_monitor.cpp +0 -0
  259. {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/main_test_runner.cpp +0 -0
@@ -0,0 +1,18 @@
1
+ # Python / notebooks — not needed for the C++ daemon build
2
+ python/
3
+ example/python/
4
+ **/.Trash-*
5
+ **/__pycache__/
6
+ **/*.pyc
7
+
8
+ # Build artifacts
9
+ cmake-build-*/
10
+ build/
11
+ *.o
12
+ *.a
13
+
14
+ # Dev / IDE
15
+ .git/
16
+ .idea/
17
+ .vscode/
18
+ *.md
@@ -53,6 +53,29 @@ jobs:
53
53
  )
54
54
  init_py.write_text(init_new, encoding="utf-8")
55
55
 
56
+ # Keep the C++ side in lockstep. The CMake project() VERSION is the
57
+ # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
58
+ # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
59
+ # this, release wheels would ship the tag version in Python metadata
60
+ # but a stale hardcoded version in the compiled client. CMake's
61
+ # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
62
+ # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
63
+ # full version still lands in the wheel metadata above.
64
+ m = re.match(r"\d+(?:\.\d+){0,3}", version)
65
+ cmake_version = m.group(0) if m else version
66
+ cmakelists = Path("CMakeLists.txt")
67
+ cm_text = cmakelists.read_text(encoding="utf-8")
68
+ cm_new, cm_n = re.subn(
69
+ r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
70
+ rf'\g<1>{cmake_version}',
71
+ cm_text,
72
+ count=1,
73
+ flags=re.DOTALL,
74
+ )
75
+ if cm_n != 1:
76
+ raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
77
+ cmakelists.write_text(cm_new, encoding="utf-8")
78
+
56
79
  - name: Cache cibuildwheel downloads
57
80
  uses: actions/cache@v4
58
81
  with:
@@ -114,12 +137,26 @@ jobs:
114
137
  env:
115
138
  CIBW_VIRTUALENV_VERSION: "20.27.1"
116
139
  CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
140
+ # cuda-nvml-devel-13-1 ships the libnvidia-ml.so stub under
141
+ # targets/x86_64-linux/lib/stubs/ — without it CMake's NVML probe
142
+ # finds nothing and (since v0.1.1) fails the build loudly. Every
143
+ # release before v0.1.1 silently shipped wheels without NVML
144
+ # because this package was missing here.
117
145
  CIBW_BEFORE_ALL_LINUX: >-
118
146
  curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
119
- dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
147
+ dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1 cuda-nvml-devel-13-1
120
148
  CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
121
149
  CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
122
- CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
150
+ # libnvidia-ml.so.1 is excluded for the same reason as libcuda.so.1:
151
+ # it ships with the NVIDIA driver, not the CUDA toolkit, and is
152
+ # not present in the manylinux build container. Auditwheel
153
+ # locates every DT_NEEDED entry on disk before deciding whether
154
+ # to bundle, so an un-excluded NVML reference fails the build
155
+ # ("Cannot repair wheel, because required library libnvidia-ml.so.1
156
+ # could not be located"). The toolkit's `libnvidia-ml.so` stub is
157
+ # only the unversioned link-time placeholder — the versioned
158
+ # `.so.1` the SONAME chains to lives on the user's machine.
159
+ CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 --exclude libnvidia-ml.so.1 -w {dest_dir} {wheel}"
123
160
 
124
161
  - uses: actions/upload-artifact@v4
125
162
  with:
@@ -168,6 +205,29 @@ jobs:
168
205
  )
169
206
  init_py.write_text(init_new, encoding="utf-8")
170
207
 
208
+ # Keep the C++ side in lockstep. The CMake project() VERSION is the
209
+ # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
210
+ # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
211
+ # this, release wheels would ship the tag version in Python metadata
212
+ # but a stale hardcoded version in the compiled client. CMake's
213
+ # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
214
+ # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
215
+ # full version still lands in the wheel metadata above.
216
+ m = re.match(r"\d+(?:\.\d+){0,3}", version)
217
+ cmake_version = m.group(0) if m else version
218
+ cmakelists = Path("CMakeLists.txt")
219
+ cm_text = cmakelists.read_text(encoding="utf-8")
220
+ cm_new, cm_n = re.subn(
221
+ r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
222
+ rf'\g<1>{cmake_version}',
223
+ cm_text,
224
+ count=1,
225
+ flags=re.DOTALL,
226
+ )
227
+ if cm_n != 1:
228
+ raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
229
+ cmakelists.write_text(cm_new, encoding="utf-8")
230
+
171
231
  - name: Build sdist
172
232
  run: pipx run build --sdist
173
233
 
@@ -1,8 +1,19 @@
1
+ ### claude
2
+ .claude/
3
+
1
4
  ### idea
2
5
  .idea/**
3
6
  build/
7
+ build-*/
8
+ build_tests/
4
9
  cmake-build-*/
5
10
  cmake/
11
+ CMakeFiles/
12
+ CMakeCache.txt
13
+ wget-log*
14
+
15
+ ### docker
16
+ example/python/docker/**/
6
17
 
7
18
  ### C++ template
8
19
  # Prerequisites
@@ -77,4 +88,4 @@ cmake/
77
88
  *.x86_64
78
89
  *.hex
79
90
 
80
- *.log
91
+ *.log