gpufl 0.1.0.dev0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpufl-0.1.2/.dockerignore +18 -0
- gpufl-0.1.2/.github/workflows/release.yml +253 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.gitignore +12 -1
- gpufl-0.1.2/CMakeLists.txt +625 -0
- gpufl-0.1.2/Dockerfile.demo +42 -0
- gpufl-0.1.2/Dockerfile.monitor +85 -0
- gpufl-0.1.2/Dockerfile.monitor.amd +94 -0
- gpufl-0.1.2/Dockerfile.monitor.supervisord.conf +27 -0
- gpufl-0.1.2/PKG-INFO +349 -0
- gpufl-0.1.2/README.md +304 -0
- gpufl-0.1.2/benchmark/README.md +71 -0
- gpufl-0.1.2/benchmark/cuda_gemm.py +44 -0
- gpufl-0.1.2/benchmark/pytorch_train.py +145 -0
- gpufl-0.1.2/benchmark/run_benchmark.py +263 -0
- gpufl-0.1.2/daemon/README.md +252 -0
- gpufl-0.1.2/daemon/monitor/CMakeLists.txt +44 -0
- gpufl-0.1.2/daemon/monitor/main.cpp +105 -0
- gpufl-0.1.2/docker-compose.monitor.amd.yml +43 -0
- gpufl-0.1.2/docker-compose.monitor.yml +71 -0
- gpufl-0.1.2/example/amd/CMakeLists.txt +71 -0
- gpufl-0.1.2/example/amd/README.md +139 -0
- gpufl-0.1.2/example/amd/check_device.cpp +31 -0
- gpufl-0.1.2/example/amd/gpufl_scope_demo.cpp +240 -0
- gpufl-0.1.2/example/amd/vector_add_benchmark.cpp +137 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/CMakeLists.txt +111 -87
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/block_style_example.cu +13 -10
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/cupti_basic.cu +73 -53
- gpufl-0.1.2/example/cuda/memory_coalescing_demo.cu +134 -0
- gpufl-0.1.2/example/cuda/sass_divergence_demo.cu +270 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/vector_add_benchmark.cu +23 -0
- gpufl-0.1.2/example/python/03_pytorch_benchmark.py +149 -0
- gpufl-0.1.2/example/python/analyzer/01_analyzer_sample.py +14 -0
- gpufl-0.1.2/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +42 -0
- gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +282 -0
- gpufl-0.1.2/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +65 -0
- gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.cpp +91 -0
- gpufl-0.1.2/include/gpufl/backends/amd/hip_static_collector.hpp +20 -0
- gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.cpp +56 -0
- gpufl-0.1.2/include/gpufl/backends/amd/monitor_adapter_amd.hpp +30 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.cpp +522 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocm_collector.hpp +37 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.cpp +799 -0
- gpufl-0.1.2/include/gpufl/backends/amd/rocprofiler_backend.hpp +144 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/host_collector.hpp +2 -2
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.cpp +5 -4
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cuda_collector.hpp +2 -2
- gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_backend.cpp +1218 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_backend.hpp +159 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/cupti_common.hpp +7 -67
- gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_utils.cpp +170 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/cupti_utils.hpp +87 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +695 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +94 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +70 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +65 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +103 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +421 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +61 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +483 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +10 -1
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +72 -12
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +2 -1
- gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +81 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +32 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.cpp +154 -1
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/nvml_collector.hpp +10 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/resource_handler.cpp +151 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/backends/nvidia/resource_handler.hpp +15 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +56 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +19 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.cpp +149 -0
- gpufl-0.1.2/include/gpufl/backends/nvidia/synchronization_handler.hpp +60 -0
- gpufl-0.1.2/include/gpufl/core/activity_record.hpp +141 -0
- gpufl-0.1.2/include/gpufl/core/backend_factory.cpp +139 -0
- gpufl-0.1.2/include/gpufl/core/backend_factory.hpp +13 -0
- gpufl-0.1.2/include/gpufl/core/backend_interfaces.hpp +31 -0
- gpufl-0.1.2/include/gpufl/core/batch_buffer.hpp +23 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/common.hpp +2 -0
- gpufl-0.1.2/include/gpufl/core/config_file_loader.cpp +51 -0
- gpufl-0.1.2/include/gpufl/core/config_file_loader.hpp +18 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/debug_logger.hpp +1 -1
- gpufl-0.1.2/include/gpufl/core/dictionary_manager.cpp +575 -0
- gpufl-0.1.2/include/gpufl/core/dictionary_manager.hpp +138 -0
- gpufl-0.1.2/include/gpufl/core/events.hpp +601 -0
- gpufl-0.1.2/include/gpufl/core/gpufl.cpp +699 -0
- gpufl-0.1.2/include/gpufl/core/host_info.cpp +131 -0
- gpufl-0.1.2/include/gpufl/core/host_info.hpp +30 -0
- gpufl-0.1.2/include/gpufl/core/itanium_demangle.cpp +543 -0
- gpufl-0.1.2/include/gpufl/core/itanium_demangle.hpp +43 -0
- gpufl-0.1.2/include/gpufl/core/json/json.cpp +369 -0
- gpufl-0.1.2/include/gpufl/core/json/json.hpp +155 -0
- gpufl-0.1.2/include/gpufl/core/logger/file_compressor.cpp +44 -0
- gpufl-0.1.2/include/gpufl/core/logger/file_compressor.hpp +18 -0
- gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.cpp +151 -0
- gpufl-0.1.2/include/gpufl/core/logger/file_log_sink.hpp +82 -0
- gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.cpp +408 -0
- gpufl-0.1.2/include/gpufl/core/logger/http_log_sink.hpp +181 -0
- gpufl-0.1.2/include/gpufl/core/logger/log_rotator.cpp +65 -0
- gpufl-0.1.2/include/gpufl/core/logger/log_rotator.hpp +32 -0
- gpufl-0.1.2/include/gpufl/core/logger/log_sink.hpp +53 -0
- gpufl-0.1.2/include/gpufl/core/logger/logger.cpp +47 -0
- gpufl-0.1.2/include/gpufl/core/logger/logger.hpp +76 -0
- gpufl-0.1.2/include/gpufl/core/model/batch_models.cpp +316 -0
- gpufl-0.1.2/include/gpufl/core/model/batch_models.hpp +167 -0
- gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.cpp +37 -0
- gpufl-0.1.2/include/gpufl/core/model/graph_launch_event_model.hpp +23 -0
- gpufl-0.1.2/include/gpufl/core/model/kernel_event_model.cpp +59 -0
- gpufl-0.1.2/include/gpufl/core/model/kernel_event_model.hpp +16 -0
- gpufl-0.1.2/include/gpufl/core/model/lifecycle_model.cpp +83 -0
- gpufl-0.1.2/include/gpufl/core/model/lifecycle_model.hpp +32 -0
- gpufl-0.1.2/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
- gpufl-0.1.2/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
- gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.cpp +42 -0
- gpufl-0.1.2/include/gpufl/core/model/memory_alloc_event_model.hpp +28 -0
- gpufl-0.1.2/include/gpufl/core/model/model_utils.hpp +109 -0
- gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.cpp +25 -0
- gpufl-0.1.2/include/gpufl/core/model/nvtx_marker_model.hpp +22 -0
- gpufl-0.1.2/include/gpufl/core/model/perf_metric_model.cpp +33 -0
- gpufl-0.1.2/include/gpufl/core/model/perf_metric_model.hpp +16 -0
- gpufl-0.1.2/include/gpufl/core/model/profile_sample_model.cpp +40 -0
- gpufl-0.1.2/include/gpufl/core/model/profile_sample_model.hpp +16 -0
- gpufl-0.1.2/include/gpufl/core/model/scope_event_model.cpp +43 -0
- gpufl-0.1.2/include/gpufl/core/model/scope_event_model.hpp +24 -0
- gpufl-0.1.2/include/gpufl/core/model/serializable.hpp +15 -0
- gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.cpp +38 -0
- gpufl-0.1.2/include/gpufl/core/model/synchronization_event_model.hpp +30 -0
- gpufl-0.1.2/include/gpufl/core/model/system_event_model.cpp +51 -0
- gpufl-0.1.2/include/gpufl/core/model/system_event_model.hpp +32 -0
- gpufl-0.1.2/include/gpufl/core/monitor.cpp +594 -0
- gpufl-0.1.2/include/gpufl/core/monitor.hpp +204 -0
- gpufl-0.1.2/include/gpufl/core/monitor_adapter.cpp +41 -0
- gpufl-0.1.2/include/gpufl/core/monitor_adapter.hpp +31 -0
- gpufl-0.1.2/include/gpufl/core/monitor_backend.hpp +76 -0
- gpufl-0.1.2/include/gpufl/core/remote_config.cpp +279 -0
- gpufl-0.1.2/include/gpufl/core/remote_config.hpp +60 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/ring_buffer.hpp +27 -6
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/runtime.hpp +3 -1
- gpufl-0.1.2/include/gpufl/core/sampler.cpp +131 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/sampler.hpp +14 -2
- gpufl-0.1.2/include/gpufl/core/sass_compressor.cpp +109 -0
- gpufl-0.1.2/include/gpufl/core/sass_compressor.hpp +52 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/stack_trace.cpp +39 -12
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/stack_trace.hpp +7 -0
- gpufl-0.1.2/include/gpufl/core/stream_handle.hpp +9 -0
- gpufl-0.1.2/include/gpufl/core/trace_type.hpp +89 -0
- gpufl-0.1.2/include/gpufl/core/version.hpp +63 -0
- gpufl-0.1.2/include/gpufl/gpufl.hpp +240 -0
- gpufl-0.1.2/include/gpufl/report/hint_engine.cpp +91 -0
- gpufl-0.1.2/include/gpufl/report/hint_engine.hpp +28 -0
- gpufl-0.1.2/include/gpufl/report/text_report.cpp +1127 -0
- gpufl-0.1.2/include/gpufl/report/text_report.hpp +176 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/pyproject.toml +23 -1
- gpufl-0.1.2/python/bindings.cpp +205 -0
- gpufl-0.1.2/python/gpufl/__init__.py +227 -0
- gpufl-0.1.2/python/gpufl/analyzer/analyzer.py +1153 -0
- gpufl-0.1.2/python/gpufl/cupy/__init__.py +69 -0
- gpufl-0.1.2/python/gpufl/jax/__init__.py +68 -0
- gpufl-0.1.2/python/gpufl/numba/__init__.py +58 -0
- gpufl-0.1.2/python/gpufl/report/__init__.py +1 -0
- gpufl-0.1.2/python/gpufl/report/text_report.py +516 -0
- gpufl-0.1.2/python/gpufl/torch/__init__.py +59 -0
- gpufl-0.1.2/python/gpufl/torch/dispatch.py +184 -0
- gpufl-0.1.2/python/gpufl/torch/profile.py +76 -0
- gpufl-0.1.2/python/gpufl/torch/stack.py +62 -0
- gpufl-0.1.2/python/gpufl/torch/trace_import.py +125 -0
- gpufl-0.1.2/python/gpufl/triton/__init__.py +64 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/timeline.py +10 -12
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/visualizer.py +1 -1
- gpufl-0.1.2/scripts/docker-demo-loop.sh +17 -0
- gpufl-0.1.2/scripts/windows/run-monitor-local.bat +20 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/CMakeLists.txt +62 -9
- gpufl-0.1.2/tests/backends/amd/test_rocm_collector.cpp +91 -0
- gpufl-0.1.2/tests/backends/nvidia/test_engine_coverage.cpp +294 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/backends/nvidia/test_nvidia_backend.cpp +11 -6
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/backends/nvidia/test_nvml_collector.cpp +4 -4
- gpufl-0.1.2/tests/common/log_utils.cpp +161 -0
- gpufl-0.1.2/tests/common/log_utils.hpp +61 -0
- gpufl-0.1.2/tests/common/test_kernel.cu +45 -0
- gpufl-0.1.2/tests/common/test_kernel.hpp +22 -0
- gpufl-0.1.2/tests/common/test_utils.hpp +55 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/core/test_analyzer.cpp +1 -1
- gpufl-0.1.2/tests/core/test_api_path_routing.cpp +213 -0
- gpufl-0.1.2/tests/core/test_batch_models.cpp +144 -0
- gpufl-0.1.2/tests/core/test_http_log_sink.cpp +300 -0
- gpufl-0.1.2/tests/core/test_itanium_demangle.cpp +146 -0
- gpufl-0.1.2/tests/core/test_wire_contract.cpp +394 -0
- gpufl-0.1.2/tests/python/conftest.py +223 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/python/test_analyzer.py +32 -4
- gpufl-0.1.2/tests/python/test_bindings.py +188 -0
- gpufl-0.1.2/tests/python/test_remote_upload_smoke.py +185 -0
- gpufl-0.1.2/tests/run_engine_coverage.ps1 +86 -0
- gpufl-0.1.2/tests/run_engine_coverage.sh +83 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/verify_pipeline.py +15 -5
- gpufl-0.1.0.dev0/.github/workflows/release.yml +0 -71
- gpufl-0.1.0.dev0/CMakeLists.txt +0 -277
- gpufl-0.1.0.dev0/PKG-INFO +0 -192
- gpufl-0.1.0.dev0/README.md +0 -167
- gpufl-0.1.0.dev0/example/cuda/test_sass_cubin.cu +0 -164
- gpufl-0.1.0.dev0/example/cuda/test_sass_metrics.cu +0 -85
- gpufl-0.1.0.dev0/example/python/03_pytorch_benchmark.py +0 -75
- gpufl-0.1.0.dev0/example/python/analyzer/01_analyzer_sample.py +0 -10
- gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.cpp +0 -10
- gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.hpp +0 -18
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.cpp +0 -806
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.hpp +0 -164
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.cpp +0 -73
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.hpp +0 -37
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +0 -282
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/resource_handler.cpp +0 -63
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +0 -222
- gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +0 -42
- gpufl-0.1.0.dev0/include/gpufl/core/events.hpp +0 -253
- gpufl-0.1.0.dev0/include/gpufl/core/gpufl.cpp +0 -365
- gpufl-0.1.0.dev0/include/gpufl/core/logger.cpp +0 -437
- gpufl-0.1.0.dev0/include/gpufl/core/logger.hpp +0 -88
- gpufl-0.1.0.dev0/include/gpufl/core/monitor.hpp +0 -100
- gpufl-0.1.0.dev0/include/gpufl/core/monitor_backend.hpp +0 -46
- gpufl-0.1.0.dev0/include/gpufl/core/sampler.cpp +0 -73
- gpufl-0.1.0.dev0/include/gpufl/core/trace_type.hpp +0 -13
- gpufl-0.1.0.dev0/include/gpufl/cuda/monitor.cpp +0 -380
- gpufl-0.1.0.dev0/include/gpufl/gpufl.hpp +0 -80
- gpufl-0.1.0.dev0/python/bindings.cpp +0 -103
- gpufl-0.1.0.dev0/python/gpufl/__init__.py +0 -83
- gpufl-0.1.0.dev0/python/gpufl/analyzer/analyzer.py +0 -359
- gpufl-0.1.0.dev0/schema/ndjson.schema.json +0 -133
- gpufl-0.1.0.dev0/tests/common/test_utils.hpp +0 -31
- gpufl-0.1.0.dev0/tests/python/conftest.py +0 -69
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.clang-format +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.github/pull_request_template.md +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/.github/workflows/build.yml +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/CONTRIBUTING.md +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/LICENSE +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/build.sh +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/check_conflict.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/check_device.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/cupti_pc_sampling.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/list_sass_metrics.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/occupancy_demo.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/system_monitor.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/cuda/test_occupancy.cu +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/01_basic.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/02_numba_cuda.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/requirements.txt +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/viz/01_plot_memory_timeline.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/example/python/viz/02_plot_stress_timeline.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/images/Screenshot1.png +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/common.cpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/debug_logger.cpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/runtime.cpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/scope_registry.cpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/scope_registry.hpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl/core/stack_registry.hpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/include/gpufl.hpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/.gitignore +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/analyzer/__init__.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/utils.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/__init__.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/python/gpufl/viz/reader.py +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/backends/nvidia/test_cuda_collector.cpp +1 -1
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/core/test_monitor.cpp +0 -0
- {gpufl-0.1.0.dev0 → gpufl-0.1.2}/tests/main_test_runner.cpp +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Python / notebooks — not needed for the C++ daemon build
|
|
2
|
+
python/
|
|
3
|
+
example/python/
|
|
4
|
+
**/.Trash-*
|
|
5
|
+
**/__pycache__/
|
|
6
|
+
**/*.pyc
|
|
7
|
+
|
|
8
|
+
# Build artifacts
|
|
9
|
+
cmake-build-*/
|
|
10
|
+
build/
|
|
11
|
+
*.o
|
|
12
|
+
*.a
|
|
13
|
+
|
|
14
|
+
# Dev / IDE
|
|
15
|
+
.git/
|
|
16
|
+
.idea/
|
|
17
|
+
.vscode/
|
|
18
|
+
*.md
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
name: Build and Release Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build_wheels:
|
|
11
|
+
name: Build wheels on ${{ matrix.os }}
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-22.04, windows-latest]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set package version from tag
|
|
21
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
22
|
+
shell: python
|
|
23
|
+
run: |
|
|
24
|
+
import os
|
|
25
|
+
import re
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
ref_name = os.environ.get("GITHUB_REF_NAME", "")
|
|
29
|
+
if not ref_name.startswith("v"):
|
|
30
|
+
raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
|
|
31
|
+
version = ref_name[1:]
|
|
32
|
+
print(f"Using version from tag: {version}")
|
|
33
|
+
|
|
34
|
+
pyproject = Path("pyproject.toml")
|
|
35
|
+
text = pyproject.read_text(encoding="utf-8")
|
|
36
|
+
text_new, n = re.subn(
|
|
37
|
+
r'(?m)^version\s*=\s*"[^\"]+"$',
|
|
38
|
+
f'version = "{version}"',
|
|
39
|
+
text,
|
|
40
|
+
count=1,
|
|
41
|
+
)
|
|
42
|
+
if n != 1:
|
|
43
|
+
raise SystemExit("Failed to update [project].version in pyproject.toml")
|
|
44
|
+
pyproject.write_text(text_new, encoding="utf-8")
|
|
45
|
+
|
|
46
|
+
init_py = Path("python/gpufl/__init__.py")
|
|
47
|
+
if init_py.exists():
|
|
48
|
+
init_text = init_py.read_text(encoding="utf-8")
|
|
49
|
+
init_new, _ = re.subn(
|
|
50
|
+
r'(?m)^__version__\s*=\s*"[^\"]+"$',
|
|
51
|
+
f'__version__ = "{version}"',
|
|
52
|
+
init_text,
|
|
53
|
+
)
|
|
54
|
+
init_py.write_text(init_new, encoding="utf-8")
|
|
55
|
+
|
|
56
|
+
# Keep the C++ side in lockstep. The CMake project() VERSION is the
|
|
57
|
+
# single source of truth for GPUFL_CLIENT_VERSION (stamped into the
|
|
58
|
+
# binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
|
|
59
|
+
# this, release wheels would ship the tag version in Python metadata
|
|
60
|
+
# but a stale hardcoded version in the compiled client. CMake's
|
|
61
|
+
# project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
|
|
62
|
+
# so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
|
|
63
|
+
# full version still lands in the wheel metadata above.
|
|
64
|
+
m = re.match(r"\d+(?:\.\d+){0,3}", version)
|
|
65
|
+
cmake_version = m.group(0) if m else version
|
|
66
|
+
cmakelists = Path("CMakeLists.txt")
|
|
67
|
+
cm_text = cmakelists.read_text(encoding="utf-8")
|
|
68
|
+
cm_new, cm_n = re.subn(
|
|
69
|
+
r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
|
|
70
|
+
rf'\g<1>{cmake_version}',
|
|
71
|
+
cm_text,
|
|
72
|
+
count=1,
|
|
73
|
+
flags=re.DOTALL,
|
|
74
|
+
)
|
|
75
|
+
if cm_n != 1:
|
|
76
|
+
raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
|
|
77
|
+
cmakelists.write_text(cm_new, encoding="utf-8")
|
|
78
|
+
|
|
79
|
+
- name: Cache cibuildwheel downloads
|
|
80
|
+
uses: actions/cache@v4
|
|
81
|
+
with:
|
|
82
|
+
path: |
|
|
83
|
+
~/.cache/cibuildwheel
|
|
84
|
+
~/AppData/Local/pypa/cibuildwheel/Cache
|
|
85
|
+
key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
|
|
86
|
+
restore-keys: |
|
|
87
|
+
cibw-${{ runner.os }}-
|
|
88
|
+
|
|
89
|
+
- name: Install CUDA (Windows)
|
|
90
|
+
if: runner.os == 'Windows'
|
|
91
|
+
uses: Jimver/cuda-toolkit@v0.2.30
|
|
92
|
+
with:
|
|
93
|
+
cuda: '13.1.0'
|
|
94
|
+
method: 'network'
|
|
95
|
+
|
|
96
|
+
- name: Prefetch virtualenv.pyz (Windows)
|
|
97
|
+
if: runner.os == 'Windows'
|
|
98
|
+
shell: pwsh
|
|
99
|
+
run: |
|
|
100
|
+
$version = "20.27.1"
|
|
101
|
+
$cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
|
|
102
|
+
New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
|
|
103
|
+
$dest = Join-Path $cacheDir "virtualenv-$version.pyz"
|
|
104
|
+
if (Test-Path $dest) {
|
|
105
|
+
Write-Host "virtualenv.pyz already cached: $dest"
|
|
106
|
+
exit 0
|
|
107
|
+
}
|
|
108
|
+
$urls = @(
|
|
109
|
+
"https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
|
|
110
|
+
"https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
|
|
111
|
+
"https://bootstrap.pypa.io/virtualenv.pyz"
|
|
112
|
+
)
|
|
113
|
+
$max = 6
|
|
114
|
+
$ok = $false
|
|
115
|
+
foreach ($url in $urls) {
|
|
116
|
+
for ($i = 1; $i -le $max; $i++) {
|
|
117
|
+
try {
|
|
118
|
+
Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
|
|
119
|
+
Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
|
|
120
|
+
if ((Get-Item $dest).Length -gt 0) {
|
|
121
|
+
Write-Host "Downloaded: $dest"
|
|
122
|
+
$ok = $true
|
|
123
|
+
break
|
|
124
|
+
}
|
|
125
|
+
} catch {
|
|
126
|
+
if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
|
|
127
|
+
if ($i -eq $max) { break }
|
|
128
|
+
Start-Sleep -Seconds (5 * $i)
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
if ($ok) { break }
|
|
132
|
+
}
|
|
133
|
+
if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
|
|
134
|
+
|
|
135
|
+
- name: Build wheels
|
|
136
|
+
uses: pypa/cibuildwheel@v2.22.0
|
|
137
|
+
env:
|
|
138
|
+
CIBW_VIRTUALENV_VERSION: "20.27.1"
|
|
139
|
+
CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
|
|
140
|
+
# cuda-nvml-devel-13-1 ships the libnvidia-ml.so stub under
|
|
141
|
+
# targets/x86_64-linux/lib/stubs/ — without it CMake's NVML probe
|
|
142
|
+
# finds nothing and (since v0.1.1) fails the build loudly. Every
|
|
143
|
+
# release before v0.1.1 silently shipped wheels without NVML
|
|
144
|
+
# because this package was missing here.
|
|
145
|
+
CIBW_BEFORE_ALL_LINUX: >-
|
|
146
|
+
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
|
|
147
|
+
dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1 cuda-nvml-devel-13-1
|
|
148
|
+
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
|
|
149
|
+
CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
|
|
150
|
+
# libnvidia-ml.so.1 is excluded for the same reason as libcuda.so.1:
|
|
151
|
+
# it ships with the NVIDIA driver, not the CUDA toolkit, and is
|
|
152
|
+
# not present in the manylinux build container. Auditwheel
|
|
153
|
+
# locates every DT_NEEDED entry on disk before deciding whether
|
|
154
|
+
# to bundle, so an un-excluded NVML reference fails the build
|
|
155
|
+
# ("Cannot repair wheel, because required library libnvidia-ml.so.1
|
|
156
|
+
# could not be located"). The toolkit's `libnvidia-ml.so` stub is
|
|
157
|
+
# only the unversioned link-time placeholder — the versioned
|
|
158
|
+
# `.so.1` the SONAME chains to lives on the user's machine.
|
|
159
|
+
CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 --exclude libnvidia-ml.so.1 -w {dest_dir} {wheel}"
|
|
160
|
+
|
|
161
|
+
- uses: actions/upload-artifact@v4
|
|
162
|
+
with:
|
|
163
|
+
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
|
|
164
|
+
path: ./wheelhouse/*.whl
|
|
165
|
+
|
|
166
|
+
build_sdist:
|
|
167
|
+
name: Build source distribution
|
|
168
|
+
runs-on: ubuntu-latest
|
|
169
|
+
steps:
|
|
170
|
+
- uses: actions/checkout@v4
|
|
171
|
+
|
|
172
|
+
- name: Set package version from tag
|
|
173
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
174
|
+
shell: python
|
|
175
|
+
run: |
|
|
176
|
+
import os
|
|
177
|
+
import re
|
|
178
|
+
from pathlib import Path
|
|
179
|
+
|
|
180
|
+
ref_name = os.environ.get("GITHUB_REF_NAME", "")
|
|
181
|
+
if not ref_name.startswith("v"):
|
|
182
|
+
raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
|
|
183
|
+
version = ref_name[1:]
|
|
184
|
+
print(f"Using version from tag: {version}")
|
|
185
|
+
|
|
186
|
+
pyproject = Path("pyproject.toml")
|
|
187
|
+
text = pyproject.read_text(encoding="utf-8")
|
|
188
|
+
text_new, n = re.subn(
|
|
189
|
+
r'(?m)^version\s*=\s*"[^\"]+"$',
|
|
190
|
+
f'version = "{version}"',
|
|
191
|
+
text,
|
|
192
|
+
count=1,
|
|
193
|
+
)
|
|
194
|
+
if n != 1:
|
|
195
|
+
raise SystemExit("Failed to update [project].version in pyproject.toml")
|
|
196
|
+
pyproject.write_text(text_new, encoding="utf-8")
|
|
197
|
+
|
|
198
|
+
init_py = Path("python/gpufl/__init__.py")
|
|
199
|
+
if init_py.exists():
|
|
200
|
+
init_text = init_py.read_text(encoding="utf-8")
|
|
201
|
+
init_new, _ = re.subn(
|
|
202
|
+
r'(?m)^__version__\s*=\s*"[^\"]+"$',
|
|
203
|
+
f'__version__ = "{version}"',
|
|
204
|
+
init_text,
|
|
205
|
+
)
|
|
206
|
+
init_py.write_text(init_new, encoding="utf-8")
|
|
207
|
+
|
|
208
|
+
# Keep the C++ side in lockstep. The CMake project() VERSION is the
|
|
209
|
+
# single source of truth for GPUFL_CLIENT_VERSION (stamped into the
|
|
210
|
+
# binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
|
|
211
|
+
# this, release wheels would ship the tag version in Python metadata
|
|
212
|
+
# but a stale hardcoded version in the compiled client. CMake's
|
|
213
|
+
# project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
|
|
214
|
+
# so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
|
|
215
|
+
# full version still lands in the wheel metadata above.
|
|
216
|
+
m = re.match(r"\d+(?:\.\d+){0,3}", version)
|
|
217
|
+
cmake_version = m.group(0) if m else version
|
|
218
|
+
cmakelists = Path("CMakeLists.txt")
|
|
219
|
+
cm_text = cmakelists.read_text(encoding="utf-8")
|
|
220
|
+
cm_new, cm_n = re.subn(
|
|
221
|
+
r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
|
|
222
|
+
rf'\g<1>{cmake_version}',
|
|
223
|
+
cm_text,
|
|
224
|
+
count=1,
|
|
225
|
+
flags=re.DOTALL,
|
|
226
|
+
)
|
|
227
|
+
if cm_n != 1:
|
|
228
|
+
raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
|
|
229
|
+
cmakelists.write_text(cm_new, encoding="utf-8")
|
|
230
|
+
|
|
231
|
+
- name: Build sdist
|
|
232
|
+
run: pipx run build --sdist
|
|
233
|
+
|
|
234
|
+
- uses: actions/upload-artifact@v4
|
|
235
|
+
with:
|
|
236
|
+
name: cibw-sdist
|
|
237
|
+
path: dist/*.tar.gz
|
|
238
|
+
|
|
239
|
+
upload_pypi:
|
|
240
|
+
needs: [build_wheels, build_sdist]
|
|
241
|
+
runs-on: ubuntu-latest
|
|
242
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
|
|
243
|
+
steps:
|
|
244
|
+
- uses: actions/download-artifact@v4
|
|
245
|
+
with:
|
|
246
|
+
pattern: cibw-*
|
|
247
|
+
path: dist
|
|
248
|
+
merge-multiple: true
|
|
249
|
+
|
|
250
|
+
- name: Publish to PyPI
|
|
251
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
252
|
+
with:
|
|
253
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
@@ -1,8 +1,19 @@
|
|
|
1
|
+
### claude
|
|
2
|
+
.claude/
|
|
3
|
+
|
|
1
4
|
### idea
|
|
2
5
|
.idea/**
|
|
3
6
|
build/
|
|
7
|
+
build-*/
|
|
8
|
+
build_tests/
|
|
4
9
|
cmake-build-*/
|
|
5
10
|
cmake/
|
|
11
|
+
CMakeFiles/
|
|
12
|
+
CMakeCache.txt
|
|
13
|
+
wget-log*
|
|
14
|
+
|
|
15
|
+
### docker
|
|
16
|
+
example/python/docker/**/
|
|
6
17
|
|
|
7
18
|
### C++ template
|
|
8
19
|
# Prerequisites
|
|
@@ -77,4 +88,4 @@ cmake/
|
|
|
77
88
|
*.x86_64
|
|
78
89
|
*.hex
|
|
79
90
|
|
|
80
|
-
*.log
|
|
91
|
+
*.log
|