PyPI - gpufl - Versions diffs - 0.0.1__tar.gz → 0.1.0__tar.gz - Mend

gpufl 0.0.1tar.gz → 0.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

gpufl-0.1.0/.clang-format +5 -0
gpufl-0.1.0/.dockerignore +18 -0
gpufl-0.1.0/.github/pull_request_template.md +11 -0
gpufl-0.1.0/.github/workflows/build.yml +119 -0
gpufl-0.1.0/.github/workflows/release.yml +239 -0
{gpufl-0.0.1 → gpufl-0.1.0}/.gitignore +91 -80
gpufl-0.1.0/CMakeLists.txt +605 -0
gpufl-0.1.0/CONTRIBUTING.md +12 -0
gpufl-0.1.0/Dockerfile.demo +42 -0
gpufl-0.1.0/Dockerfile.monitor +85 -0
gpufl-0.1.0/Dockerfile.monitor.amd +94 -0
gpufl-0.1.0/Dockerfile.monitor.supervisord.conf +27 -0
{gpufl-0.0.1 → gpufl-0.1.0}/LICENSE +201 -201
gpufl-0.1.0/PKG-INFO +349 -0
gpufl-0.1.0/README.md +304 -0
gpufl-0.1.0/benchmark/README.md +71 -0
gpufl-0.1.0/benchmark/cuda_gemm.py +44 -0
gpufl-0.1.0/benchmark/pytorch_train.py +145 -0
gpufl-0.1.0/benchmark/run_benchmark.py +263 -0
gpufl-0.1.0/build.sh +1 -0
gpufl-0.1.0/daemon/README.md +252 -0
gpufl-0.1.0/daemon/monitor/CMakeLists.txt +44 -0
gpufl-0.1.0/daemon/monitor/main.cpp +105 -0
gpufl-0.1.0/docker-compose.monitor.amd.yml +43 -0
gpufl-0.1.0/docker-compose.monitor.yml +71 -0
gpufl-0.1.0/example/amd/CMakeLists.txt +71 -0
gpufl-0.1.0/example/amd/README.md +139 -0
gpufl-0.1.0/example/amd/check_device.cpp +31 -0
gpufl-0.1.0/example/amd/gpufl_scope_demo.cpp +240 -0
gpufl-0.1.0/example/amd/vector_add_benchmark.cpp +137 -0
gpufl-0.1.0/example/cuda/CMakeLists.txt +265 -0
gpufl-0.1.0/example/cuda/block_style_example.cu +100 -0
gpufl-0.1.0/example/cuda/check_conflict.cu +81 -0
gpufl-0.1.0/example/cuda/check_device.cu +25 -0
gpufl-0.1.0/example/cuda/cupti_basic.cu +149 -0
gpufl-0.1.0/example/cuda/cupti_pc_sampling.cu +263 -0
gpufl-0.1.0/example/cuda/list_sass_metrics.cu +46 -0
gpufl-0.1.0/example/cuda/memory_coalescing_demo.cu +134 -0
gpufl-0.1.0/example/cuda/occupancy_demo.cu +155 -0
gpufl-0.1.0/example/cuda/sass_divergence_demo.cu +270 -0
gpufl-0.1.0/example/cuda/system_monitor.cu +58 -0
gpufl-0.1.0/example/cuda/test_occupancy.cu +62 -0
gpufl-0.1.0/example/cuda/vector_add_benchmark.cu +126 -0
{gpufl-0.0.1 → gpufl-0.1.0}/example/python/01_basic.py +25 -25
{gpufl-0.0.1 → gpufl-0.1.0}/example/python/02_numba_cuda.py +76 -76
gpufl-0.1.0/example/python/03_pytorch_benchmark.py +149 -0
gpufl-0.1.0/example/python/analyzer/01_analyzer_sample.py +14 -0
gpufl-0.1.0/example/python/requirements.txt +7 -0
gpufl-0.1.0/example/python/viz/01_plot_memory_timeline.py +9 -0
gpufl-0.1.0/example/python/viz/02_plot_stress_timeline.py +9 -0
gpufl-0.1.0/images/Screenshot1.png +0 -0
gpufl-0.1.0/include/gpufl/backends/amd/engine/amd_profiling_engine.hpp +42 -0
gpufl-0.1.0/include/gpufl/backends/amd/engine/dispatch_counter_engine.cpp +282 -0
gpufl-0.1.0/include/gpufl/backends/amd/engine/dispatch_counter_engine.hpp +65 -0
gpufl-0.1.0/include/gpufl/backends/amd/hip_static_collector.cpp +91 -0
gpufl-0.1.0/include/gpufl/backends/amd/hip_static_collector.hpp +20 -0
gpufl-0.1.0/include/gpufl/backends/amd/monitor_adapter_amd.cpp +56 -0
gpufl-0.1.0/include/gpufl/backends/amd/monitor_adapter_amd.hpp +30 -0
gpufl-0.1.0/include/gpufl/backends/amd/rocm_collector.cpp +522 -0
gpufl-0.1.0/include/gpufl/backends/amd/rocm_collector.hpp +37 -0
gpufl-0.1.0/include/gpufl/backends/amd/rocprofiler_backend.cpp +799 -0
gpufl-0.1.0/include/gpufl/backends/amd/rocprofiler_backend.hpp +144 -0
gpufl-0.1.0/include/gpufl/backends/host_collector.hpp +150 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cuda_collector.cpp +44 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_backend.cpp +1218 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_backend.hpp +159 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_common.hpp +86 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_utils.cpp +170 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/cupti_utils.hpp +87 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +695 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +94 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.cpp +70 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/pc_sampling_with_sass_engine.hpp +65 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +103 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +421 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +61 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +483 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +35 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +297 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +27 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/monitor_adapter_nvidia.cpp +81 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/monitor_adapter_nvidia.hpp +32 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/nvml_collector.cpp +341 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/nvml_collector.hpp +48 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/resource_handler.cpp +151 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/resource_handler.hpp +40 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +56 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +19 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/synchronization_handler.cpp +149 -0
gpufl-0.1.0/include/gpufl/backends/nvidia/synchronization_handler.hpp +60 -0
gpufl-0.1.0/include/gpufl/core/activity_record.hpp +141 -0
gpufl-0.1.0/include/gpufl/core/backend_factory.cpp +139 -0
gpufl-0.1.0/include/gpufl/core/backend_factory.hpp +13 -0
gpufl-0.1.0/include/gpufl/core/backend_interfaces.hpp +31 -0
gpufl-0.1.0/include/gpufl/core/batch_buffer.hpp +23 -0
gpufl-0.1.0/include/gpufl/core/common.cpp +45 -0
gpufl-0.1.0/include/gpufl/core/common.hpp +111 -0
gpufl-0.1.0/include/gpufl/core/config_file_loader.cpp +51 -0
gpufl-0.1.0/include/gpufl/core/config_file_loader.hpp +18 -0
gpufl-0.1.0/include/gpufl/core/debug_logger.cpp +9 -0
gpufl-0.1.0/include/gpufl/core/debug_logger.hpp +43 -0
gpufl-0.1.0/include/gpufl/core/dictionary_manager.cpp +575 -0
gpufl-0.1.0/include/gpufl/core/dictionary_manager.hpp +138 -0
gpufl-0.1.0/include/gpufl/core/events.hpp +601 -0
gpufl-0.1.0/include/gpufl/core/gpufl.cpp +699 -0
gpufl-0.1.0/include/gpufl/core/host_info.cpp +131 -0
gpufl-0.1.0/include/gpufl/core/host_info.hpp +30 -0
gpufl-0.1.0/include/gpufl/core/itanium_demangle.cpp +543 -0
gpufl-0.1.0/include/gpufl/core/itanium_demangle.hpp +43 -0
gpufl-0.1.0/include/gpufl/core/json/json.cpp +369 -0
gpufl-0.1.0/include/gpufl/core/json/json.hpp +155 -0
gpufl-0.1.0/include/gpufl/core/logger/file_compressor.cpp +44 -0
gpufl-0.1.0/include/gpufl/core/logger/file_compressor.hpp +18 -0
gpufl-0.1.0/include/gpufl/core/logger/file_log_sink.cpp +151 -0
gpufl-0.1.0/include/gpufl/core/logger/file_log_sink.hpp +82 -0
gpufl-0.1.0/include/gpufl/core/logger/http_log_sink.cpp +408 -0
gpufl-0.1.0/include/gpufl/core/logger/http_log_sink.hpp +181 -0
gpufl-0.1.0/include/gpufl/core/logger/log_rotator.cpp +65 -0
gpufl-0.1.0/include/gpufl/core/logger/log_rotator.hpp +32 -0
gpufl-0.1.0/include/gpufl/core/logger/log_sink.hpp +53 -0
gpufl-0.1.0/include/gpufl/core/logger/logger.cpp +47 -0
gpufl-0.1.0/include/gpufl/core/logger/logger.hpp +76 -0
gpufl-0.1.0/include/gpufl/core/model/batch_models.cpp +316 -0
gpufl-0.1.0/include/gpufl/core/model/batch_models.hpp +167 -0
gpufl-0.1.0/include/gpufl/core/model/graph_launch_event_model.cpp +37 -0
gpufl-0.1.0/include/gpufl/core/model/graph_launch_event_model.hpp +23 -0
gpufl-0.1.0/include/gpufl/core/model/kernel_event_model.cpp +59 -0
gpufl-0.1.0/include/gpufl/core/model/kernel_event_model.hpp +16 -0
gpufl-0.1.0/include/gpufl/core/model/lifecycle_model.cpp +83 -0
gpufl-0.1.0/include/gpufl/core/model/lifecycle_model.hpp +32 -0
gpufl-0.1.0/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
gpufl-0.1.0/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
gpufl-0.1.0/include/gpufl/core/model/memory_alloc_event_model.cpp +42 -0
gpufl-0.1.0/include/gpufl/core/model/memory_alloc_event_model.hpp +28 -0
gpufl-0.1.0/include/gpufl/core/model/model_utils.hpp +109 -0
gpufl-0.1.0/include/gpufl/core/model/nvtx_marker_model.cpp +25 -0
gpufl-0.1.0/include/gpufl/core/model/nvtx_marker_model.hpp +22 -0
gpufl-0.1.0/include/gpufl/core/model/perf_metric_model.cpp +33 -0
gpufl-0.1.0/include/gpufl/core/model/perf_metric_model.hpp +16 -0
gpufl-0.1.0/include/gpufl/core/model/profile_sample_model.cpp +40 -0
gpufl-0.1.0/include/gpufl/core/model/profile_sample_model.hpp +16 -0
gpufl-0.1.0/include/gpufl/core/model/scope_event_model.cpp +43 -0
gpufl-0.1.0/include/gpufl/core/model/scope_event_model.hpp +24 -0
gpufl-0.1.0/include/gpufl/core/model/serializable.hpp +15 -0
gpufl-0.1.0/include/gpufl/core/model/synchronization_event_model.cpp +38 -0
gpufl-0.1.0/include/gpufl/core/model/synchronization_event_model.hpp +30 -0
gpufl-0.1.0/include/gpufl/core/model/system_event_model.cpp +51 -0
gpufl-0.1.0/include/gpufl/core/model/system_event_model.hpp +32 -0
gpufl-0.1.0/include/gpufl/core/monitor.cpp +594 -0
gpufl-0.1.0/include/gpufl/core/monitor.hpp +204 -0
gpufl-0.1.0/include/gpufl/core/monitor_adapter.cpp +41 -0
gpufl-0.1.0/include/gpufl/core/monitor_adapter.hpp +31 -0
gpufl-0.1.0/include/gpufl/core/monitor_backend.hpp +76 -0
gpufl-0.1.0/include/gpufl/core/remote_config.cpp +279 -0
gpufl-0.1.0/include/gpufl/core/remote_config.hpp +60 -0
gpufl-0.1.0/include/gpufl/core/ring_buffer.hpp +96 -0
gpufl-0.1.0/include/gpufl/core/runtime.cpp +6 -0
gpufl-0.1.0/include/gpufl/core/runtime.hpp +32 -0
gpufl-0.1.0/include/gpufl/core/sampler.cpp +131 -0
gpufl-0.1.0/include/gpufl/core/sampler.hpp +63 -0
gpufl-0.1.0/include/gpufl/core/sass_compressor.cpp +109 -0
gpufl-0.1.0/include/gpufl/core/sass_compressor.hpp +52 -0
gpufl-0.1.0/include/gpufl/core/scope_registry.cpp +10 -0
gpufl-0.1.0/include/gpufl/core/scope_registry.hpp +8 -0
gpufl-0.1.0/include/gpufl/core/stack_registry.hpp +47 -0
gpufl-0.1.0/include/gpufl/core/stack_trace.cpp +139 -0
gpufl-0.1.0/include/gpufl/core/stack_trace.hpp +19 -0
gpufl-0.1.0/include/gpufl/core/stream_handle.hpp +9 -0
gpufl-0.1.0/include/gpufl/core/trace_type.hpp +89 -0
gpufl-0.1.0/include/gpufl/core/version.hpp +63 -0
gpufl-0.1.0/include/gpufl/gpufl.hpp +240 -0
gpufl-0.1.0/include/gpufl/report/hint_engine.cpp +91 -0
gpufl-0.1.0/include/gpufl/report/hint_engine.hpp +28 -0
gpufl-0.1.0/include/gpufl/report/text_report.cpp +1127 -0
gpufl-0.1.0/include/gpufl/report/text_report.hpp +176 -0
gpufl-0.1.0/include/gpufl.hpp +3 -0
gpufl-0.1.0/pyproject.toml +85 -0
gpufl-0.1.0/python/bindings.cpp +205 -0
gpufl-0.1.0/python/gpufl/.gitignore +159 -0
gpufl-0.1.0/python/gpufl/__init__.py +227 -0
gpufl-0.1.0/python/gpufl/analyzer/__init__.py +1 -0
gpufl-0.1.0/python/gpufl/analyzer/analyzer.py +1153 -0
gpufl-0.1.0/python/gpufl/cupy/__init__.py +69 -0
gpufl-0.1.0/python/gpufl/jax/__init__.py +68 -0
gpufl-0.1.0/python/gpufl/numba/__init__.py +58 -0
gpufl-0.1.0/python/gpufl/report/__init__.py +1 -0
gpufl-0.1.0/python/gpufl/report/text_report.py +516 -0
gpufl-0.1.0/python/gpufl/torch/__init__.py +59 -0
gpufl-0.1.0/python/gpufl/torch/dispatch.py +184 -0
gpufl-0.1.0/python/gpufl/torch/profile.py +76 -0
gpufl-0.1.0/python/gpufl/torch/stack.py +62 -0
gpufl-0.1.0/python/gpufl/torch/trace_import.py +125 -0
gpufl-0.1.0/python/gpufl/triton/__init__.py +64 -0
gpufl-0.1.0/python/gpufl/utils.py +19 -0
gpufl-0.1.0/python/gpufl/viz/__init__.py +27 -0
gpufl-0.1.0/python/gpufl/viz/reader.py +48 -0
gpufl-0.1.0/python/gpufl/viz/timeline.py +378 -0
gpufl-0.1.0/python/gpufl/viz/visualizer.py +194 -0
gpufl-0.1.0/scripts/docker-demo-loop.sh +17 -0
gpufl-0.1.0/scripts/windows/run-monitor-local.bat +20 -0
gpufl-0.1.0/tests/CMakeLists.txt +187 -0
gpufl-0.1.0/tests/backends/amd/test_rocm_collector.cpp +91 -0
gpufl-0.1.0/tests/backends/nvidia/test_cuda_collector.cpp +34 -0
gpufl-0.1.0/tests/backends/nvidia/test_engine_coverage.cpp +294 -0
gpufl-0.1.0/tests/backends/nvidia/test_nvidia_backend.cpp +132 -0
gpufl-0.1.0/tests/backends/nvidia/test_nvml_collector.cpp +54 -0
gpufl-0.1.0/tests/common/log_utils.cpp +161 -0
gpufl-0.1.0/tests/common/log_utils.hpp +61 -0
gpufl-0.1.0/tests/common/test_kernel.cu +45 -0
gpufl-0.1.0/tests/common/test_kernel.hpp +22 -0
gpufl-0.1.0/tests/common/test_utils.hpp +55 -0
gpufl-0.1.0/tests/core/test_analyzer.cpp +15 -0
gpufl-0.1.0/tests/core/test_api_path_routing.cpp +213 -0
gpufl-0.1.0/tests/core/test_batch_models.cpp +144 -0
gpufl-0.1.0/tests/core/test_http_log_sink.cpp +300 -0
gpufl-0.1.0/tests/core/test_itanium_demangle.cpp +146 -0
gpufl-0.1.0/tests/core/test_monitor.cpp +77 -0
gpufl-0.1.0/tests/core/test_wire_contract.cpp +394 -0
gpufl-0.1.0/tests/main_test_runner.cpp +6 -0
gpufl-0.1.0/tests/python/conftest.py +223 -0
gpufl-0.1.0/tests/python/test_analyzer.py +82 -0
gpufl-0.1.0/tests/python/test_bindings.py +188 -0
gpufl-0.1.0/tests/python/test_remote_upload_smoke.py +185 -0
gpufl-0.1.0/tests/run_engine_coverage.ps1 +86 -0
gpufl-0.1.0/tests/run_engine_coverage.sh +83 -0
gpufl-0.1.0/tests/verify_pipeline.py +101 -0
gpufl-0.0.1/.github/workflows/build.yml +0 -59
gpufl-0.0.1/CMakeLists.txt +0 -74
gpufl-0.0.1/PKG-INFO +0 -362
gpufl-0.0.1/README.md +0 -343
gpufl-0.0.1/example/cuda/CMakeLists.txt +0 -63
gpufl-0.0.1/example/cuda/block_style_example.cu +0 -159
gpufl-0.0.1/example/cuda/system_monitor.cu +0 -21
gpufl-0.0.1/example/python/03_kernel.launch.py +0 -34
gpufl-0.0.1/example/python/requirements.txt +0 -2
gpufl-0.0.1/include/gpufl/backends/cuda.hpp +0 -259
gpufl-0.0.1/include/gpufl/core/common.hpp +0 -201
gpufl-0.0.1/include/gpufl/core/monitor.hpp +0 -261
gpufl-0.0.1/include/gpufl/gpufl.hpp +0 -26
gpufl-0.0.1/pyproject.toml +0 -35
gpufl-0.0.1/python/bindings.cpp +0 -67
gpufl-0.0.1/python/gpufl/__init__.py +0 -32
gpufl-0.0.1/python/gpufl/utils.py +0 -35
gpufl-0.0.1/schema/ndjson.schema.json +0 -133
gpufl-0.0.1/tests/verify_pipeline.py +0 -88

gpufl-0.1.0/.clang-format ADDED Viewed

@@ -0,0 +1,5 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+IndentWidth: 4
+ColumnLimit: 80

gpufl-0.1.0/.dockerignore ADDED Viewed

@@ -0,0 +1,18 @@
+# Python / notebooks — not needed for the C++ daemon build
+python/
+example/python/
+**/.Trash-*
+**/__pycache__/
+**/*.pyc
+# Build artifacts
+cmake-build-*/
+build/
+*.o
+*.a
+# Dev / IDE
+.git/
+.idea/
+.vscode/
+*.md

gpufl-0.1.0/.github/pull_request_template.md ADDED Viewed

@@ -0,0 +1,11 @@
+## Description
+## Type of Change
+- [ ] Bug fix
+- [ ] New feature
+- [ ] Documentation update
+## Testing
+## Checklist
+- [ ] My code follows the style guidelines of this project
+- [ ] I have performed a self-review of my own code
+- [ ] I have commented my code, particularly in hard-to-understand areas

gpufl-0.1.0/.github/workflows/build.yml ADDED Viewed

@@ -0,0 +1,119 @@
+name: Build GPUFl Client
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+jobs:
+  build:
+    name: Build on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-22.04, windows-latest]
+        python-version: ["3.12", "3.13"]
+    env:
+      CMAKE_ARGS: >-
+        -DGPUFL_ENABLE_NVIDIA=ON
+        -DGPUFL_ENABLE_AMD=OFF
+        -DBUILD_TESTING=OFF
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      # Install CUDA Toolkit (provides CUDA_PATH)
+      - name: Install CUDA Toolkit
+        uses: Jimver/cuda-toolkit@v0.2.30
+        id: cuda-toolkit
+        with:
+          cuda: '13.1.0'
+          method: 'network'
+          use-github-cache: false
+      # Make sure CMake can find CUDA headers/libraries during the *pip build*.
+      # Jimver/cuda-toolkit sets CUDA_PATH; we map it to common vars CMake respects.
+      - name: Export CUDA environment for CMake
+        shell: bash
+        run: |
+          echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
+          echo "CUDAToolkit_ROOT=${CUDA_PATH}" >> $GITHUB_ENV
+          if [ "${{ runner.os }}" == "Windows" ]; then
+            echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
+            echo "${CUDA_PATH}/extras/CUPTI/lib64" >> $GITHUB_PATH
+            echo "C:/Program Files/NVIDIA Corporation/NVSMI" >> $GITHUB_PATH
+          fi
+      - name: Install system dependencies
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libcurl4-openssl-dev
+      - name: Install python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install scikit-build-core pybind11 cmake ninja
+      - name: Build and Install
+        run: |
+          pip install .[viz,analyzer] -v
+      - name: Run C++ Unit Tests
+        # Skip C++ tests on Windows as they require actual NVIDIA GPUs to run (CUDA/CUPTI initialization)
+        if: runner.os != 'Windows'
+        shell: bash
+        run: |
+          # 1. Prepare a local writable directory for CUDA stubs
+          # We cannot write to the system CUDA directory (Permission denied).
+          LOCAL_STUBS_DIR="${GITHUB_WORKSPACE}/local_cuda_stubs"
+          mkdir -p "${LOCAL_STUBS_DIR}"
+          # 2. Gather relevant library directories for the CUDA Toolkit
+          STUBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib/stubs"
+          LIBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib"
+          # 3. Create versioned symlinks in the LOCAL directory
+          # Many binaries expect .so.1 which is only created by the driver installer.
+          for lib in libcuda libnvidia-ml libnvrtc; do
+            if [ -f "${STUBS_DIR}/${lib}.so" ]; then
+              # Symlink the original stub to our local dir
+              ln -sf "${STUBS_DIR}/${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so"
+              # Create the versioned symlink in our local dir
+              ln -sf "${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so.1"
+            fi
+          done
+          # 4. Add local stubs and toolkit libs to LD_LIBRARY_PATH
+          export LD_LIBRARY_PATH="${LOCAL_STUBS_DIR}:${LIBS_DIR}:${LD_LIBRARY_PATH}"
+          # Debug: check what libraries are found
+          echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+          ls -l "${LOCAL_STUBS_DIR}" || true
+          cmake -B build_tests -S . \
+            -DGPUFL_ENABLE_NVIDIA=ON \
+            -DBUILD_PYTHON=OFF \
+            -DBUILD_TESTING=ON
+          cmake --build build_tests --target gpufl_tests
+          ctest --test-dir build_tests --output-on-failure --verbose --timeout 60
+      - name: Run Python Unit Tests
+        shell: bash
+        run: |
+          python -m pip install pytest
+          export PYTHONPATH=$PYTHONPATH:$(pwd)/python
+          python -m pytest tests/python
+      - name: Verify Logging Pipeline
+        run: |
+          python -u tests/verify_pipeline.py

gpufl-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,239 @@
+name: Build and Release Wheels
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-22.04, windows-latest]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set package version from tag
+        if: startsWith(github.ref, 'refs/tags/v')
+        shell: python
+        run: |
+          import os
+          import re
+          from pathlib import Path
+          ref_name = os.environ.get("GITHUB_REF_NAME", "")
+          if not ref_name.startswith("v"):
+              raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
+          version = ref_name[1:]
+          print(f"Using version from tag: {version}")
+          pyproject = Path("pyproject.toml")
+          text = pyproject.read_text(encoding="utf-8")
+          text_new, n = re.subn(
+              r'(?m)^version\s*=\s*"[^\"]+"$',
+              f'version = "{version}"',
+              text,
+              count=1,
+          )
+          if n != 1:
+              raise SystemExit("Failed to update [project].version in pyproject.toml")
+          pyproject.write_text(text_new, encoding="utf-8")
+          init_py = Path("python/gpufl/__init__.py")
+          if init_py.exists():
+              init_text = init_py.read_text(encoding="utf-8")
+              init_new, _ = re.subn(
+                  r'(?m)^__version__\s*=\s*"[^\"]+"$',
+                  f'__version__ = "{version}"',
+                  init_text,
+              )
+              init_py.write_text(init_new, encoding="utf-8")
+          # Keep the C++ side in lockstep. The CMake project() VERSION is the
+          # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
+          # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
+          # this, release wheels would ship the tag version in Python metadata
+          # but a stale hardcoded version in the compiled client. CMake's
+          # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
+          # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
+          # full version still lands in the wheel metadata above.
+          m = re.match(r"\d+(?:\.\d+){0,3}", version)
+          cmake_version = m.group(0) if m else version
+          cmakelists = Path("CMakeLists.txt")
+          cm_text = cmakelists.read_text(encoding="utf-8")
+          cm_new, cm_n = re.subn(
+              r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
+              rf'\g<1>{cmake_version}',
+              cm_text,
+              count=1,
+              flags=re.DOTALL,
+          )
+          if cm_n != 1:
+              raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
+          cmakelists.write_text(cm_new, encoding="utf-8")
+      - name: Cache cibuildwheel downloads
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/cibuildwheel
+            ~/AppData/Local/pypa/cibuildwheel/Cache
+          key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
+          restore-keys: |
+            cibw-${{ runner.os }}-
+      - name: Install CUDA (Windows)
+        if: runner.os == 'Windows'
+        uses: Jimver/cuda-toolkit@v0.2.30
+        with:
+          cuda: '13.1.0'
+          method: 'network'
+      - name: Prefetch virtualenv.pyz (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          $version = "20.27.1"
+          $cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
+          New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
+          $dest = Join-Path $cacheDir "virtualenv-$version.pyz"
+          if (Test-Path $dest) {
+            Write-Host "virtualenv.pyz already cached: $dest"
+            exit 0
+          }
+          $urls = @(
+            "https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
+            "https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
+            "https://bootstrap.pypa.io/virtualenv.pyz"
+          )
+          $max = 6
+          $ok = $false
+          foreach ($url in $urls) {
+            for ($i = 1; $i -le $max; $i++) {
+              try {
+                Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
+                Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
+                if ((Get-Item $dest).Length -gt 0) {
+                  Write-Host "Downloaded: $dest"
+                  $ok = $true
+                  break
+                }
+              } catch {
+                if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
+                if ($i -eq $max) { break }
+                Start-Sleep -Seconds (5 * $i)
+              }
+            }
+            if ($ok) { break }
+          }
+          if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.22.0
+        env:
+          CIBW_VIRTUALENV_VERSION: "20.27.1"
+          CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
+          CIBW_BEFORE_ALL_LINUX: >-
+            curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
+            dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
+          CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
+          CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
+          path: ./wheelhouse/*.whl
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set package version from tag
+        if: startsWith(github.ref, 'refs/tags/v')
+        shell: python
+        run: |
+          import os
+          import re
+          from pathlib import Path
+          ref_name = os.environ.get("GITHUB_REF_NAME", "")
+          if not ref_name.startswith("v"):
+              raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
+          version = ref_name[1:]
+          print(f"Using version from tag: {version}")
+          pyproject = Path("pyproject.toml")
+          text = pyproject.read_text(encoding="utf-8")
+          text_new, n = re.subn(
+              r'(?m)^version\s*=\s*"[^\"]+"$',
+              f'version = "{version}"',
+              text,
+              count=1,
+          )
+          if n != 1:
+              raise SystemExit("Failed to update [project].version in pyproject.toml")
+          pyproject.write_text(text_new, encoding="utf-8")
+          init_py = Path("python/gpufl/__init__.py")
+          if init_py.exists():
+              init_text = init_py.read_text(encoding="utf-8")
+              init_new, _ = re.subn(
+                  r'(?m)^__version__\s*=\s*"[^\"]+"$',
+                  f'__version__ = "{version}"',
+                  init_text,
+              )
+              init_py.write_text(init_new, encoding="utf-8")
+          # Keep the C++ side in lockstep. The CMake project() VERSION is the
+          # single source of truth for GPUFL_CLIENT_VERSION (stamped into the
+          # binary, sent as User-Agent / X-GpuFlight-Client-Version). Without
+          # this, release wheels would ship the tag version in Python metadata
+          # but a stale hardcoded version in the compiled client. CMake's
+          # project(VERSION ...) only accepts numeric major.minor.patch[.tweak],
+          # so strip any PEP 440 suffix (rc/dev/post) for the CMake value — the
+          # full version still lands in the wheel metadata above.
+          m = re.match(r"\d+(?:\.\d+){0,3}", version)
+          cmake_version = m.group(0) if m else version
+          cmakelists = Path("CMakeLists.txt")
+          cm_text = cmakelists.read_text(encoding="utf-8")
+          cm_new, cm_n = re.subn(
+              r'(project\(gpufl_client\s+VERSION\s+)\d+(?:\.\d+)*',
+              rf'\g<1>{cmake_version}',
+              cm_text,
+              count=1,
+              flags=re.DOTALL,
+          )
+          if cm_n != 1:
+              raise SystemExit("Failed to update project(... VERSION) in CMakeLists.txt")
+          cmakelists.write_text(cm_new, encoding="utf-8")
+      - name: Build sdist
+        run: pipx run build --sdist
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-sdist
+          path: dist/*.tar.gz
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: cibw-*
+          path: dist
+          merge-multiple: true
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}

{gpufl-0.0.1 → gpufl-0.1.0}/.gitignore RENAMED Viewed

@@ -1,80 +1,91 @@
-### idea
-.idea/**
-build/
-cmake-build-*/
-cmake/
-### C++ template
-# Prerequisites
-*.d
-# Compiled Object files
-*.slo
-*.lo
-*.o
-*.obj
-# Precompiled Headers
-*.gch
-*.pch
-# Compiled Dynamic libraries
-*.so
-*.dylib
-*.dll
-# Fortran module files
-*.mod
-*.smod
-# Compiled Static libraries
-*.lai
-*.la
-*.a
-*.lib
-# Executables
-*.exe
-*.out
-*.app
-### C template
-# Prerequisites
-*.d
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-# Linker output
-*.ilk
-*.map
-*.exp
-# Precompiled Headers
-*.gch
-*.pch
-# Libraries
-*.lib
-*.a
-*.la
-*.lo
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
+### claude
+.claude/
+### idea
+.idea/**
+build/
+build-*/
+build_tests/
+cmake-build-*/
+cmake/
+CMakeFiles/
+CMakeCache.txt
+wget-log*
+### docker
+example/python/docker/**/
+### C++ template
+# Prerequisites
+*.d
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+# Precompiled Headers
+*.gch
+*.pch
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+# Fortran module files
+*.mod
+*.smod
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+# Executables
+*.exe
+*.out
+*.app
+### C template
+# Prerequisites
+*.d
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+# Linker output
+*.ilk
+*.map
+*.exp
+# Precompiled Headers
+*.gch
+*.pch
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+*.log

gpufl 0.0.1__tar.gz → 0.1.0__tar.gz

gpufl 0.0.1tar.gz → 0.1.0tar.gz