gpufl 0.1.0.dev7__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpufl-0.1.2/.dockerignore +18 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.github/workflows/release.yml +62 -2
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.gitignore +12 -1
- gpufl-0.1.2/CMakeLists.txt +625 -0
- gpufl-0.1.2/Dockerfile.demo +42 -0
- gpufl-0.1.2/Dockerfile.monitor +85 -0
- gpufl-0.1.2/Dockerfile.monitor.amd +94 -0
- gpufl-0.1.2/Dockerfile.monitor.supervisord.conf +27 -0
- gpufl-0.1.2/PKG-INFO +349 -0
- gpufl-0.1.2/README.md +304 -0
- gpufl-0.1.2/benchmark/README.md +71 -0
- gpufl-0.1.2/benchmark/cuda_gemm.py +44 -0
- gpufl-0.1.2/benchmark/pytorch_train.py +145 -0
- gpufl-0.1.2/benchmark/run_benchmark.py +263 -0
- gpufl-0.1.2/daemon/README.md +252 -0
- gpufl-0.1.2/daemon/monitor/CMakeLists.txt +44 -0
- gpufl-0.1.2/daemon/monitor/main.cpp +105 -0
- gpufl-0.1.2/docker-compose.monitor.amd.yml +43 -0
- gpufl-0.1.2/docker-compose.monitor.yml +71 -0
- gpufl-0.1.2/example/amd/CMakeLists.txt +71 -0
- gpufl-0.1.2/example/amd/README.md +139 -0
- gpufl-0.1.2/example/amd/check_device.cpp +31 -0
- gpufl-0.1.2/example/amd/gpufl_scope_demo.cpp +240 -0
- gpufl-0.1.2/example/amd/vector_add_benchmark.cpp +137 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/CMakeLists.txt +111 -87
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/block_style_example.cu +11 -10
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/cupti_basic.cu +73 -53
- gpufl-0.1.2/example/cuda/memory_coalescing_demo.cu +134 -0
- gpufl-0.1.2/example/cuda/sass_divergence_demo.cu +270 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/vector_add_benchmark.cu +23 -0
- gpufl-0.1.2/example/python/03_pytorch_benchmark.py +149 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/analyzer/01_analyzer_sample.py +2 -2
- gpufl-0.1.2/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +42 -0
- gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +282 -0
- gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +65 -0
- gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.cpp +91 -0
- gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.hpp +20 -0
- gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.cpp +56 -0
- gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.hpp +30 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.cpp +522 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.hpp +37 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.cpp +799 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.hpp +144 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/host_collector.hpp +2 -2
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.cpp +5 -4
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.hpp +2 -2
- gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_backend.cpp +1218 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_backend.hpp +44 -1
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_common.hpp +2 -73
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_utils.cpp +32 -14
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_utils.hpp +23 -1
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +695 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +30 -2
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +70 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +65 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +30 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +421 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +17 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +483 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +10 -1
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +72 -12
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +2 -1
- gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +81 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +32 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.cpp +154 -1
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.hpp +10 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/resource_handler.cpp +151 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/resource_handler.hpp +15 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +56 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +19 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.cpp +149 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.hpp +60 -0
- gpufl-0.1.2/include/gpufl/core/activity_record.hpp +141 -0
- gpufl-0.1.2/include/gpufl/core/backend_factory.cpp +139 -0
- gpufl-0.1.2/include/gpufl/core/backend_factory.hpp +13 -0
- gpufl-0.1.2/include/gpufl/core/backend_interfaces.hpp +31 -0
- gpufl-0.1.2/include/gpufl/core/batch_buffer.hpp +23 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/common.hpp +2 -0
- gpufl-0.1.2/include/gpufl/core/config_file_loader.cpp +51 -0
- gpufl-0.1.2/include/gpufl/core/config_file_loader.hpp +18 -0
- gpufl-0.1.2/include/gpufl/core/dictionary_manager.cpp +575 -0
- gpufl-0.1.2/include/gpufl/core/dictionary_manager.hpp +138 -0
- gpufl-0.1.2/include/gpufl/core/events.hpp +601 -0
- gpufl-0.1.2/include/gpufl/core/gpufl.cpp +699 -0
- gpufl-0.1.2/include/gpufl/core/host_info.cpp +131 -0
- gpufl-0.1.2/include/gpufl/core/host_info.hpp +30 -0
- gpufl-0.1.2/include/gpufl/core/itanium_demangle.cpp +543 -0
- gpufl-0.1.2/include/gpufl/core/itanium_demangle.hpp +43 -0
- gpufl-0.1.2/include/gpufl/core/json/json.cpp +369 -0
- gpufl-0.1.2/include/gpufl/core/json/json.hpp +155 -0
- gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.cpp → gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.cpp +30 -31
- gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.hpp +82 -0
- gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.cpp +408 -0
- gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.hpp +181 -0
- gpufl-0.1.2/include/gpufl/core/logger/log_sink.hpp +53 -0
- gpufl-0.1.2/include/gpufl/core/logger/logger.cpp +47 -0
- gpufl-0.1.2/include/gpufl/core/logger/logger.hpp +76 -0
- gpufl-0.1.2/include/gpufl/core/model/batch_models.cpp +316 -0
- gpufl-0.1.2/include/gpufl/core/model/batch_models.hpp +167 -0
- gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.cpp +37 -0
- gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.hpp +23 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/kernel_event_model.cpp +13 -5
- gpufl-0.1.2/include/gpufl/core/model/lifecycle_model.cpp +83 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/lifecycle_model.hpp +8 -0
- gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.cpp +42 -0
- gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.hpp +28 -0
- gpufl-0.1.2/include/gpufl/core/model/model_utils.hpp +109 -0
- gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.cpp +25 -0
- gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.hpp +22 -0
- gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.cpp +38 -0
- gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.hpp +30 -0
- gpufl-0.1.2/include/gpufl/core/monitor.cpp +594 -0
- gpufl-0.1.2/include/gpufl/core/monitor.hpp +204 -0
- gpufl-0.1.2/include/gpufl/core/monitor_adapter.cpp +41 -0
- gpufl-0.1.2/include/gpufl/core/monitor_adapter.hpp +31 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/monitor_backend.hpp +23 -0
- gpufl-0.1.2/include/gpufl/core/remote_config.cpp +279 -0
- gpufl-0.1.2/include/gpufl/core/remote_config.hpp +60 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/ring_buffer.hpp +27 -6
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/runtime.hpp +3 -1
- gpufl-0.1.2/include/gpufl/core/sampler.cpp +131 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/sampler.hpp +14 -2
- gpufl-0.1.2/include/gpufl/core/sass_compressor.cpp +109 -0
- gpufl-0.1.2/include/gpufl/core/sass_compressor.hpp +52 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/stack_trace.cpp +39 -12
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/stack_trace.hpp +7 -0
- gpufl-0.1.2/include/gpufl/core/stream_handle.hpp +9 -0
- gpufl-0.1.2/include/gpufl/core/trace_type.hpp +89 -0
- gpufl-0.1.2/include/gpufl/core/version.hpp +63 -0
- gpufl-0.1.2/include/gpufl/gpufl.hpp +240 -0
- gpufl-0.1.2/include/gpufl/report/hint_engine.cpp +91 -0
- gpufl-0.1.2/include/gpufl/report/hint_engine.hpp +28 -0
- gpufl-0.1.2/include/gpufl/report/text_report.cpp +1127 -0
- gpufl-0.1.2/include/gpufl/report/text_report.hpp +176 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/pyproject.toml +23 -1
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/bindings.cpp +84 -8
- gpufl-0.1.2/python/gpufl/__init__.py +227 -0
- gpufl-0.1.2/python/gpufl/analyzer/analyzer.py +1153 -0
- gpufl-0.1.2/python/gpufl/cupy/__init__.py +69 -0
- gpufl-0.1.2/python/gpufl/jax/__init__.py +68 -0
- gpufl-0.1.2/python/gpufl/numba/__init__.py +58 -0
- gpufl-0.1.2/python/gpufl/report/__init__.py +1 -0
- gpufl-0.1.2/python/gpufl/report/text_report.py +516 -0
- gpufl-0.1.2/python/gpufl/torch/__init__.py +59 -0
- gpufl-0.1.2/python/gpufl/torch/dispatch.py +184 -0
- gpufl-0.1.2/python/gpufl/torch/profile.py +76 -0
- gpufl-0.1.2/python/gpufl/torch/stack.py +62 -0
- gpufl-0.1.2/python/gpufl/torch/trace_import.py +125 -0
- gpufl-0.1.2/python/gpufl/triton/__init__.py +64 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/timeline.py +10 -12
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/visualizer.py +1 -1
- gpufl-0.1.2/scripts/docker-demo-loop.sh +17 -0
- gpufl-0.1.2/scripts/windows/run-monitor-local.bat +20 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/CMakeLists.txt +62 -9
- gpufl-0.1.2/tests/backends/amd/test_rocm_collector.cpp +91 -0
- gpufl-0.1.2/tests/backends/nvidia/test_engine_coverage.cpp +294 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/backends/nvidia/test_nvidia_backend.cpp +10 -5
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/backends/nvidia/test_nvml_collector.cpp +4 -4
- gpufl-0.1.2/tests/common/log_utils.cpp +161 -0
- gpufl-0.1.2/tests/common/log_utils.hpp +61 -0
- gpufl-0.1.2/tests/common/test_kernel.cu +45 -0
- gpufl-0.1.2/tests/common/test_kernel.hpp +22 -0
- gpufl-0.1.2/tests/common/test_utils.hpp +55 -0
- gpufl-0.1.2/tests/core/test_api_path_routing.cpp +213 -0
- gpufl-0.1.2/tests/core/test_batch_models.cpp +144 -0
- gpufl-0.1.2/tests/core/test_http_log_sink.cpp +300 -0
- gpufl-0.1.2/tests/core/test_itanium_demangle.cpp +146 -0
- gpufl-0.1.2/tests/core/test_wire_contract.cpp +394 -0
- gpufl-0.1.2/tests/python/conftest.py +223 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/python/test_analyzer.py +32 -4
- gpufl-0.1.2/tests/python/test_bindings.py +188 -0
- gpufl-0.1.2/tests/python/test_remote_upload_smoke.py +185 -0
- gpufl-0.1.2/tests/run_engine_coverage.ps1 +86 -0
- gpufl-0.1.2/tests/run_engine_coverage.sh +83 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/verify_pipeline.py +5 -3
- gpufl-0.1.0.dev7/CMakeLists.txt +0 -351
- gpufl-0.1.0.dev7/PKG-INFO +0 -192
- gpufl-0.1.0.dev7/README.md +0 -167
- gpufl-0.1.0.dev7/example/cuda/test_sass_cubin.cu +0 -164
- gpufl-0.1.0.dev7/example/cuda/test_sass_metrics.cu +0 -85
- gpufl-0.1.0.dev7/example/python/03_pytorch_benchmark.py +0 -75
- gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.cpp +0 -10
- gpufl-0.1.0.dev7/include/gpufl/backends/amd/rocm_collector.hpp +0 -18
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.cpp +0 -316
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +0 -395
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +0 -221
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +0 -327
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/resource_handler.cpp +0 -62
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +0 -222
- gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +0 -42
- gpufl-0.1.0.dev7/include/gpufl/core/events.hpp +0 -274
- gpufl-0.1.0.dev7/include/gpufl/core/gpufl.cpp +0 -398
- gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.hpp +0 -70
- gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.cpp +0 -34
- gpufl-0.1.0.dev7/include/gpufl/core/model/model_utils.hpp +0 -94
- gpufl-0.1.0.dev7/include/gpufl/core/monitor.hpp +0 -95
- gpufl-0.1.0.dev7/include/gpufl/core/sampler.cpp +0 -74
- gpufl-0.1.0.dev7/include/gpufl/core/trace_type.hpp +0 -13
- gpufl-0.1.0.dev7/include/gpufl/cuda/monitor.cpp +0 -405
- gpufl-0.1.0.dev7/include/gpufl/gpufl.hpp +0 -83
- gpufl-0.1.0.dev7/python/gpufl/__init__.py +0 -89
- gpufl-0.1.0.dev7/python/gpufl/analyzer/analyzer.py +0 -721
- gpufl-0.1.0.dev7/schema/ndjson.schema.json +0 -133
- gpufl-0.1.0.dev7/tests/common/test_utils.hpp +0 -31
- gpufl-0.1.0.dev7/tests/python/conftest.py +0 -69
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.clang-format +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.github/pull_request_template.md +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/.github/workflows/build.yml +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/CONTRIBUTING.md +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/LICENSE +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/build.sh +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/check_conflict.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/check_device.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/cupti_pc_sampling.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/list_sass_metrics.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/occupancy_demo.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/system_monitor.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/cuda/test_occupancy.cu +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/01_basic.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/02_numba_cuda.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/requirements.txt +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/viz/01_plot_memory_timeline.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/example/python/viz/02_plot_stress_timeline.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/images/Screenshot1.png +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/common.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/debug_logger.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/debug_logger.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/file_compressor.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/file_compressor.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/log_rotator.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/logger/log_rotator.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/kernel_event_model.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/memcpy_event_model.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/memcpy_event_model.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/perf_metric_model.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/perf_metric_model.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/profile_sample_model.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/profile_sample_model.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/scope_event_model.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/scope_event_model.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/serializable.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/system_event_model.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/model/system_event_model.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/runtime.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/scope_registry.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/scope_registry.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl/core/stack_registry.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/include/gpufl.hpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/.gitignore +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/analyzer/__init__.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/utils.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/__init__.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/python/gpufl/viz/reader.py +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/backends/nvidia/test_cuda_collector.cpp +1 -1
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/core/test_analyzer.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/core/test_monitor.cpp +0 -0
- {gpufl-0.1.0.dev7 → gpufl-0.1.2}/tests/main_test_runner.cpp +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Python / notebooks — not needed for the C++ daemon build
|
|
2
|
+
python/
|
|
3
|
+
example/python/
|
|
4
|
+
**/.Trash-*
|
|
5
|
+
**/__pycache__/
|
|
6
|
+
**/*.pyc
|
|
7
|
+
|
|
8
|
+
# Build artifacts
|
|
9
|
+
cmake-build-*/
|
|
10
|
+
build/
|
|
11
|
+
*.o
|
|
12
|
+
*.a
|
|
13
|
+
|
|
14
|
+
# Dev / IDE
|
|
15
|
+
.git/
|
|
16
|
+
.idea/
|
|
17
|
+
.vscode/
|
|
18
|
+
*.md
|
|
@@ -53,6 +53,29 @@ jobs:
|
|
|
53
53
|
)
|
|
54
54
|
init_py.write_text(init_new, encoding="utf-8")
|
|
55
55
|
|
|
56
|
+
# Keep the C++ side in lockstep. The CMake project() VERSION is the
|
|
57
|
+
# single source of truth for GPUFL_CLIENT_VERSION (stamped into the
|
|
58
|
+
# binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
|
|
59
|
+
# this, release wheels would ship the tag version in Python metadata
|
|
60
|
+
# but a stale hardcoded version in the compiled client. CMake's
|
|
61
|
+
# project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
|
|
62
|
+
# so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
|
|
63
|
+
# full version still lands in the wheel metadata above.
|
|
64
|
+
m = re.match(r"\d+(?:\.\d+){0,3}", version)
|
|
65
|
+
cmake_version = m.group(0) if m else version
|
|
66
|
+
cmakelists = Path("CMakeLists.txt")
|
|
67
|
+
cm_text = cmakelists.read_text(encoding="utf-8")
|
|
68
|
+
cm_new, cm_n = re.subn(
|
|
69
|
+
r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
|
|
70
|
+
rf'\g<1>{cmake_version}',
|
|
71
|
+
cm_text,
|
|
72
|
+
count=1,
|
|
73
|
+
flags=re.DOTALL,
|
|
74
|
+
)
|
|
75
|
+
if cm_n != 1:
|
|
76
|
+
raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
|
|
77
|
+
cmakelists.write_text(cm_new, encoding="utf-8")
|
|
78
|
+
|
|
56
79
|
- name: Cache cibuildwheel downloads
|
|
57
80
|
uses: actions/cache@v4
|
|
58
81
|
with:
|
|
@@ -114,12 +137,26 @@ jobs:
|
|
|
114
137
|
env:
|
|
115
138
|
CIBW_VIRTUALENV_VERSION: "20.27.1"
|
|
116
139
|
CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
|
|
140
|
+
# cuda-nvml-devel-13-1 ships the libnvidia-ml.so stub under
|
|
141
|
+
# targets/x86_64-linux/lib/stubs/ — without it CMake's NVML probe
|
|
142
|
+
# finds nothing and (since v0.1.1) fails the build loudly. Every
|
|
143
|
+
# release before v0.1.1 silently shipped wheels without NVML
|
|
144
|
+
# because this package was missing here.
|
|
117
145
|
CIBW_BEFORE_ALL_LINUX: >-
|
|
118
146
|
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
|
|
119
|
-
dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
|
|
147
|
+
dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1 cuda-nvml-devel-13-1
|
|
120
148
|
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
|
|
121
149
|
CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
|
|
122
|
-
|
|
150
|
+
# libnvidia-ml.so.1 is excluded for the same reason as libcuda.so.1:
|
|
151
|
+
# it ships with the NVIDIA driver, not the CUDA toolkit, and is
|
|
152
|
+
# not present in the manylinux build container. Auditwheel
|
|
153
|
+
# locates every DT_NEEDED entry on disk before deciding whether
|
|
154
|
+
# to bundle, so an un-excluded NVML reference fails the build
|
|
155
|
+
# ("Cannot repair wheel, because required library libnvidia-ml.so.1
|
|
156
|
+
# could not be located"). The toolkit's `libnvidia-ml.so` stub is
|
|
157
|
+
# only the unversioned link-time placeholder — the versioned
|
|
158
|
+
# `.so.1` the SONAME chains to lives on the user's machine.
|
|
159
|
+
CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 --exclude libnvidia-ml.so.1 -w {dest_dir} {wheel}"
|
|
123
160
|
|
|
124
161
|
- uses: actions/upload-artifact@v4
|
|
125
162
|
with:
|
|
@@ -168,6 +205,29 @@ jobs:
|
|
|
168
205
|
)
|
|
169
206
|
init_py.write_text(init_new, encoding="utf-8")
|
|
170
207
|
|
|
208
|
+
# Keep the C++ side in lockstep. The CMake project() VERSION is the
|
|
209
|
+
# single source of truth for GPUFL_CLIENT_VERSION (stamped into the
|
|
210
|
+
# binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
|
|
211
|
+
# this, release wheels would ship the tag version in Python metadata
|
|
212
|
+
# but a stale hardcoded version in the compiled client. CMake's
|
|
213
|
+
# project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
|
|
214
|
+
# so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
|
|
215
|
+
# full version still lands in the wheel metadata above.
|
|
216
|
+
m = re.match(r"\d+(?:\.\d+){0,3}", version)
|
|
217
|
+
cmake_version = m.group(0) if m else version
|
|
218
|
+
cmakelists = Path("CMakeLists.txt")
|
|
219
|
+
cm_text = cmakelists.read_text(encoding="utf-8")
|
|
220
|
+
cm_new, cm_n = re.subn(
|
|
221
|
+
r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
|
|
222
|
+
rf'\g<1>{cmake_version}',
|
|
223
|
+
cm_text,
|
|
224
|
+
count=1,
|
|
225
|
+
flags=re.DOTALL,
|
|
226
|
+
)
|
|
227
|
+
if cm_n != 1:
|
|
228
|
+
raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
|
|
229
|
+
cmakelists.write_text(cm_new, encoding="utf-8")
|
|
230
|
+
|
|
171
231
|
- name: Build sdist
|
|
172
232
|
run: pipx run build --sdist
|
|
173
233
|
|
|
@@ -1,8 +1,19 @@
|
|
|
1
|
+
### claude
|
|
2
|
+
.claude/
|
|
3
|
+
|
|
1
4
|
### idea
|
|
2
5
|
.idea/**
|
|
3
6
|
build/
|
|
7
|
+
build-*/
|
|
8
|
+
build_tests/
|
|
4
9
|
cmake-build-*/
|
|
5
10
|
cmake/
|
|
11
|
+
CMakeFiles/
|
|
12
|
+
CMakeCache.txt
|
|
13
|
+
wget-log*
|
|
14
|
+
|
|
15
|
+
### docker
|
|
16
|
+
example/python/docker/**/
|
|
6
17
|
|
|
7
18
|
### C++ template
|
|
8
19
|
# Prerequisites
|
|
@@ -77,4 +88,4 @@ cmake/
|
|
|
77
88
|
*.x86_64
|
|
78
89
|
*.hex
|
|
79
90
|
|
|
80
|
-
*.log
|
|
91
|
+
*.log
|