gpufl 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. {gpufl-0.1.2 → gpufl-0.1.4}/CMakeLists.txt +1 -1
  2. {gpufl-0.1.2 → gpufl-0.1.4}/PKG-INFO +1 -1
  3. {gpufl-0.1.2 → gpufl-0.1.4}/pyproject.toml +1 -1
  4. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/__init__.py +1 -1
  5. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/analyzer/analyzer.py +14 -2
  6. {gpufl-0.1.2 → gpufl-0.1.4}/.clang-format +0 -0
  7. {gpufl-0.1.2 → gpufl-0.1.4}/.dockerignore +0 -0
  8. {gpufl-0.1.2 → gpufl-0.1.4}/.github/pull_request_template.md +0 -0
  9. {gpufl-0.1.2 → gpufl-0.1.4}/.github/workflows/build.yml +0 -0
  10. {gpufl-0.1.2 → gpufl-0.1.4}/.github/workflows/release.yml +0 -0
  11. {gpufl-0.1.2 → gpufl-0.1.4}/.gitignore +0 -0
  12. {gpufl-0.1.2 → gpufl-0.1.4}/CONTRIBUTING.md +0 -0
  13. {gpufl-0.1.2 → gpufl-0.1.4}/Dockerfile.demo +0 -0
  14. {gpufl-0.1.2 → gpufl-0.1.4}/Dockerfile.monitor +0 -0
  15. {gpufl-0.1.2 → gpufl-0.1.4}/Dockerfile.monitor.amd +0 -0
  16. {gpufl-0.1.2 → gpufl-0.1.4}/Dockerfile.monitor.supervisord.conf +0 -0
  17. {gpufl-0.1.2 → gpufl-0.1.4}/LICENSE +0 -0
  18. {gpufl-0.1.2 → gpufl-0.1.4}/README.md +0 -0
  19. {gpufl-0.1.2 → gpufl-0.1.4}/benchmark/README.md +0 -0
  20. {gpufl-0.1.2 → gpufl-0.1.4}/benchmark/cuda_gemm.py +0 -0
  21. {gpufl-0.1.2 → gpufl-0.1.4}/benchmark/pytorch_train.py +0 -0
  22. {gpufl-0.1.2 → gpufl-0.1.4}/benchmark/run_benchmark.py +0 -0
  23. {gpufl-0.1.2 → gpufl-0.1.4}/build.sh +0 -0
  24. {gpufl-0.1.2 → gpufl-0.1.4}/daemon/README.md +0 -0
  25. {gpufl-0.1.2 → gpufl-0.1.4}/daemon/monitor/CMakeLists.txt +0 -0
  26. {gpufl-0.1.2 → gpufl-0.1.4}/daemon/monitor/main.cpp +0 -0
  27. {gpufl-0.1.2 → gpufl-0.1.4}/docker-compose.monitor.amd.yml +0 -0
  28. {gpufl-0.1.2 → gpufl-0.1.4}/docker-compose.monitor.yml +0 -0
  29. {gpufl-0.1.2 → gpufl-0.1.4}/example/amd/CMakeLists.txt +0 -0
  30. {gpufl-0.1.2 → gpufl-0.1.4}/example/amd/README.md +0 -0
  31. {gpufl-0.1.2 → gpufl-0.1.4}/example/amd/check_device.cpp +0 -0
  32. {gpufl-0.1.2 → gpufl-0.1.4}/example/amd/gpufl_scope_demo.cpp +0 -0
  33. {gpufl-0.1.2 → gpufl-0.1.4}/example/amd/vector_add_benchmark.cpp +0 -0
  34. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/CMakeLists.txt +0 -0
  35. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/block_style_example.cu +0 -0
  36. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/check_conflict.cu +0 -0
  37. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/check_device.cu +0 -0
  38. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/cupti_basic.cu +0 -0
  39. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/cupti_pc_sampling.cu +0 -0
  40. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/list_sass_metrics.cu +0 -0
  41. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/memory_coalescing_demo.cu +0 -0
  42. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/occupancy_demo.cu +0 -0
  43. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/sass_divergence_demo.cu +0 -0
  44. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/system_monitor.cu +0 -0
  45. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/test_occupancy.cu +0 -0
  46. {gpufl-0.1.2 → gpufl-0.1.4}/example/cuda/vector_add_benchmark.cu +0 -0
  47. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/01_basic.py +0 -0
  48. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/02_numba_cuda.py +0 -0
  49. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/03_pytorch_benchmark.py +0 -0
  50. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/analyzer/01_analyzer_sample.py +0 -0
  51. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/requirements.txt +0 -0
  52. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/viz/01_plot_memory_timeline.py +0 -0
  53. {gpufl-0.1.2 → gpufl-0.1.4}/example/python/viz/02_plot_stress_timeline.py +0 -0
  54. {gpufl-0.1.2 → gpufl-0.1.4}/images/Screenshot1.png +0 -0
  55. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +0 -0
  56. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +0 -0
  57. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +0 -0
  58. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/hip_static_collector.cpp +0 -0
  59. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/hip_static_collector.hpp +0 -0
  60. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/monitor_adapter_amd.cpp +0 -0
  61. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/monitor_adapter_amd.hpp +0 -0
  62. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/rocm_collector.cpp +0 -0
  63. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/rocm_collector.hpp +0 -0
  64. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/rocprofiler_backend.cpp +0 -0
  65. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/amd/rocprofiler_backend.hpp +0 -0
  66. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/host_collector.hpp +0 -0
  67. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cuda_collector.cpp +0 -0
  68. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cuda_collector.hpp +0 -0
  69. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cupti_backend.cpp +0 -0
  70. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cupti_backend.hpp +0 -0
  71. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cupti_common.hpp +0 -0
  72. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cupti_utils.cpp +0 -0
  73. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/cupti_utils.hpp +0 -0
  74. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +0 -0
  75. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +0 -0
  76. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +0 -0
  77. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +0 -0
  78. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +0 -0
  79. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +0 -0
  80. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +0 -0
  81. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +0 -0
  82. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +0 -0
  83. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +0 -0
  84. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +0 -0
  85. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +0 -0
  86. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +0 -0
  87. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +0 -0
  88. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +0 -0
  89. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/nvml_collector.cpp +0 -0
  90. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/nvml_collector.hpp +0 -0
  91. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/resource_handler.cpp +0 -0
  92. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/resource_handler.hpp +0 -0
  93. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +0 -0
  94. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +0 -0
  95. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/synchronization_handler.cpp +0 -0
  96. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/backends/nvidia/synchronization_handler.hpp +0 -0
  97. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/activity_record.hpp +0 -0
  98. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/backend_factory.cpp +0 -0
  99. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/backend_factory.hpp +0 -0
  100. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/backend_interfaces.hpp +0 -0
  101. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/batch_buffer.hpp +0 -0
  102. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/common.cpp +0 -0
  103. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/common.hpp +0 -0
  104. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/config_file_loader.cpp +0 -0
  105. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/config_file_loader.hpp +0 -0
  106. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/debug_logger.cpp +0 -0
  107. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/debug_logger.hpp +0 -0
  108. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/dictionary_manager.cpp +0 -0
  109. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/dictionary_manager.hpp +0 -0
  110. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/events.hpp +0 -0
  111. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/gpufl.cpp +0 -0
  112. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/host_info.cpp +0 -0
  113. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/host_info.hpp +0 -0
  114. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/itanium_demangle.cpp +0 -0
  115. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/itanium_demangle.hpp +0 -0
  116. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/json/json.cpp +0 -0
  117. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/json/json.hpp +0 -0
  118. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/file_compressor.cpp +0 -0
  119. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/file_compressor.hpp +0 -0
  120. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/file_log_sink.cpp +0 -0
  121. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/file_log_sink.hpp +0 -0
  122. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/http_log_sink.cpp +0 -0
  123. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/http_log_sink.hpp +0 -0
  124. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/log_rotator.cpp +0 -0
  125. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/log_rotator.hpp +0 -0
  126. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/log_sink.hpp +0 -0
  127. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/logger.cpp +0 -0
  128. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/logger/logger.hpp +0 -0
  129. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/batch_models.cpp +0 -0
  130. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/batch_models.hpp +0 -0
  131. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/graph_launch_event_model.cpp +0 -0
  132. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/graph_launch_event_model.hpp +0 -0
  133. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/kernel_event_model.cpp +0 -0
  134. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/kernel_event_model.hpp +0 -0
  135. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/lifecycle_model.cpp +0 -0
  136. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/lifecycle_model.hpp +0 -0
  137. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/memcpy_event_model.cpp +0 -0
  138. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/memcpy_event_model.hpp +0 -0
  139. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/memory_alloc_event_model.cpp +0 -0
  140. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/memory_alloc_event_model.hpp +0 -0
  141. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/model_utils.hpp +0 -0
  142. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/nvtx_marker_model.cpp +0 -0
  143. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/nvtx_marker_model.hpp +0 -0
  144. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/perf_metric_model.cpp +0 -0
  145. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/perf_metric_model.hpp +0 -0
  146. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/profile_sample_model.cpp +0 -0
  147. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/profile_sample_model.hpp +0 -0
  148. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/scope_event_model.cpp +0 -0
  149. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/scope_event_model.hpp +0 -0
  150. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/serializable.hpp +0 -0
  151. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/synchronization_event_model.cpp +0 -0
  152. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/synchronization_event_model.hpp +0 -0
  153. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/system_event_model.cpp +0 -0
  154. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/model/system_event_model.hpp +0 -0
  155. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/monitor.cpp +0 -0
  156. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/monitor.hpp +0 -0
  157. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/monitor_adapter.cpp +0 -0
  158. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/monitor_adapter.hpp +0 -0
  159. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/monitor_backend.hpp +0 -0
  160. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/remote_config.cpp +0 -0
  161. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/remote_config.hpp +0 -0
  162. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/ring_buffer.hpp +0 -0
  163. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/runtime.cpp +0 -0
  164. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/runtime.hpp +0 -0
  165. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/sampler.cpp +0 -0
  166. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/sampler.hpp +0 -0
  167. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/sass_compressor.cpp +0 -0
  168. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/sass_compressor.hpp +0 -0
  169. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/scope_registry.cpp +0 -0
  170. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/scope_registry.hpp +0 -0
  171. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/stack_registry.hpp +0 -0
  172. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/stack_trace.cpp +0 -0
  173. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/stack_trace.hpp +0 -0
  174. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/stream_handle.hpp +0 -0
  175. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/trace_type.hpp +0 -0
  176. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/core/version.hpp +0 -0
  177. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/gpufl.hpp +0 -0
  178. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/report/hint_engine.cpp +0 -0
  179. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/report/hint_engine.hpp +0 -0
  180. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/report/text_report.cpp +0 -0
  181. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl/report/text_report.hpp +0 -0
  182. {gpufl-0.1.2 → gpufl-0.1.4}/include/gpufl.hpp +0 -0
  183. {gpufl-0.1.2 → gpufl-0.1.4}/python/bindings.cpp +0 -0
  184. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/.gitignore +0 -0
  185. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/analyzer/__init__.py +0 -0
  186. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/cupy/__init__.py +0 -0
  187. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/jax/__init__.py +0 -0
  188. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/numba/__init__.py +0 -0
  189. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/report/__init__.py +0 -0
  190. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/report/text_report.py +0 -0
  191. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/torch/__init__.py +0 -0
  192. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/torch/dispatch.py +0 -0
  193. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/torch/profile.py +0 -0
  194. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/torch/stack.py +0 -0
  195. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/torch/trace_import.py +0 -0
  196. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/triton/__init__.py +0 -0
  197. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/utils.py +0 -0
  198. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/viz/__init__.py +0 -0
  199. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/viz/reader.py +0 -0
  200. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/viz/timeline.py +0 -0
  201. {gpufl-0.1.2 → gpufl-0.1.4}/python/gpufl/viz/visualizer.py +0 -0
  202. {gpufl-0.1.2 → gpufl-0.1.4}/scripts/docker-demo-loop.sh +0 -0
  203. {gpufl-0.1.2 → gpufl-0.1.4}/scripts/windows/run-monitor-local.bat +0 -0
  204. {gpufl-0.1.2 → gpufl-0.1.4}/tests/CMakeLists.txt +0 -0
  205. {gpufl-0.1.2 → gpufl-0.1.4}/tests/backends/amd/test_rocm_collector.cpp +0 -0
  206. {gpufl-0.1.2 → gpufl-0.1.4}/tests/backends/nvidia/test_cuda_collector.cpp +0 -0
  207. {gpufl-0.1.2 → gpufl-0.1.4}/tests/backends/nvidia/test_engine_coverage.cpp +0 -0
  208. {gpufl-0.1.2 → gpufl-0.1.4}/tests/backends/nvidia/test_nvidia_backend.cpp +0 -0
  209. {gpufl-0.1.2 → gpufl-0.1.4}/tests/backends/nvidia/test_nvml_collector.cpp +0 -0
  210. {gpufl-0.1.2 → gpufl-0.1.4}/tests/common/log_utils.cpp +0 -0
  211. {gpufl-0.1.2 → gpufl-0.1.4}/tests/common/log_utils.hpp +0 -0
  212. {gpufl-0.1.2 → gpufl-0.1.4}/tests/common/test_kernel.cu +0 -0
  213. {gpufl-0.1.2 → gpufl-0.1.4}/tests/common/test_kernel.hpp +0 -0
  214. {gpufl-0.1.2 → gpufl-0.1.4}/tests/common/test_utils.hpp +0 -0
  215. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_analyzer.cpp +0 -0
  216. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_api_path_routing.cpp +0 -0
  217. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_batch_models.cpp +0 -0
  218. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_http_log_sink.cpp +0 -0
  219. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_itanium_demangle.cpp +0 -0
  220. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_monitor.cpp +0 -0
  221. {gpufl-0.1.2 → gpufl-0.1.4}/tests/core/test_wire_contract.cpp +0 -0
  222. {gpufl-0.1.2 → gpufl-0.1.4}/tests/main_test_runner.cpp +0 -0
  223. {gpufl-0.1.2 → gpufl-0.1.4}/tests/python/conftest.py +0 -0
  224. {gpufl-0.1.2 → gpufl-0.1.4}/tests/python/test_analyzer.py +0 -0
  225. {gpufl-0.1.2 → gpufl-0.1.4}/tests/python/test_bindings.py +0 -0
  226. {gpufl-0.1.2 → gpufl-0.1.4}/tests/python/test_remote_upload_smoke.py +0 -0
  227. {gpufl-0.1.2 → gpufl-0.1.4}/tests/run_engine_coverage.ps1 +0 -0
  228. {gpufl-0.1.2 → gpufl-0.1.4}/tests/run_engine_coverage.sh +0 -0
  229. {gpufl-0.1.2 → gpufl-0.1.4}/tests/verify_pipeline.py +0 -0
@@ -1,7 +1,7 @@
1
1
  cmake_minimum_required(VERSION 3.31)
2
2
 
3
3
  project(gpufl_client
4
- VERSION 0.1.2
4
+ VERSION 0.1.4
5
5
  LANGUAGES CXX
6
6
  DESCRIPTION "Header-only GPU monitoring client library"
7
7
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gpufl
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: GPU Monitoring Client
5
5
  Author-Email: Myoungho Shin <myounghoshin84@gmail.com>
6
6
  Classifier: Development Status :: 3 - Alpha
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
4
4
 
5
5
  [project]
6
6
  name = "gpufl"
7
- version = "0.1.2"
7
+ version = "0.1.4"
8
8
  description = "GPU Monitoring Client"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -128,7 +128,7 @@ except Exception as e:
128
128
  print(f"[FATAL] Unexpected error importing _gpufl_client: {e}", file=sys.stderr)
129
129
  raise e
130
130
 
131
- __version__ = "0.1.2"
131
+ __version__ = "0.1.4"
132
132
 
133
133
  # ── Remote Configuration ──────────────────────────────────────────────────────
134
134
  #
@@ -695,7 +695,7 @@ class GpuFlightSession:
695
695
  table.add_column("Total Time", justify="right", style="green")
696
696
  table.add_column("Occupancy", justify="right", style="magenta")
697
697
  table.add_column("Grid/Block", justify="center")
698
- table.add_column("Resources (Reg/SMem/DMem/LMem/CMem/Spill)", justify="left")
698
+ table.add_column("Resources (Reg/SMem/LMem/CMem/Spill)", justify="left")
699
699
 
700
700
  for (name, *rest), row in summary.iterrows():
701
701
  stack_trace = rest[0] if rest else None
@@ -743,10 +743,22 @@ class GpuFlightSession:
743
743
  grid_val = row['grid'] if 'grid' in row.index and pd.notna(row.get('grid')) else "n/a"
744
744
  block_val = row['block'] if 'block' in row.index and pd.notna(row.get('block')) else "n/a"
745
745
 
746
+ # Shared memory: `static` is compile-time __shared__ arrays;
747
+ # `dyn` is the third launch arg <<<grid,block,dyn_shared>>>.
748
+ # Both live in the same physical SM shared-memory space and
749
+ # together drive smem_occupancy — so we display the SUM as
750
+ # `SMem` (matching what the occupancy % is computed against)
751
+ # with the static/dyn breakdown in parentheses for users
752
+ # tuning either piece. Previously this row showed two
753
+ # separate `SMem`/`DMem` values which read as "static
754
+ # shared" vs "device memory" (wrong: it's dyn-shared, not
755
+ # device global memory) and didn't visually reconcile with
756
+ # the smem occupancy % above.
757
+ smem_total = (static_b or 0) + (dyn_b or 0)
746
758
  resource_str = (
747
759
  f"{num_regs} regs"
748
760
  + (f" ({occ_breakdown})" if occ_breakdown else "")
749
- + f"\nSMem {static_b} B · DMem {dyn_b} B"
761
+ + f"\nSMem {smem_total} B ({static_b} static + {dyn_b} dyn)"
750
762
  + f"\nLMem {local_b} B · CMem {const_b} B"
751
763
  + spill_str
752
764
  + bottleneck_str
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes