PyPI - gpufl - Versions diffs - 0.0.1__tar.gz → 0.1.0.dev0__tar.gz - Mend

gpufl 0.0.1tar.gz → 0.1.0.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

gpufl-0.1.0.dev0/.clang-format +5 -0
gpufl-0.1.0.dev0/.github/pull_request_template.md +11 -0
gpufl-0.1.0.dev0/.github/workflows/build.yml +119 -0
gpufl-0.1.0.dev0/.github/workflows/release.yml +71 -0
{gpufl-0.0.1 → gpufl-0.1.0.dev0}/.gitignore +80 -80
gpufl-0.1.0.dev0/CMakeLists.txt +277 -0
gpufl-0.1.0.dev0/CONTRIBUTING.md +12 -0
{gpufl-0.0.1 → gpufl-0.1.0.dev0}/LICENSE +201 -201
gpufl-0.1.0.dev0/PKG-INFO +192 -0
gpufl-0.1.0.dev0/README.md +167 -0
gpufl-0.1.0.dev0/build.sh +1 -0
gpufl-0.1.0.dev0/example/cuda/CMakeLists.txt +241 -0
gpufl-0.1.0.dev0/example/cuda/block_style_example.cu +97 -0
gpufl-0.1.0.dev0/example/cuda/check_conflict.cu +81 -0
gpufl-0.1.0.dev0/example/cuda/check_device.cu +25 -0
gpufl-0.1.0.dev0/example/cuda/cupti_basic.cu +129 -0
gpufl-0.1.0.dev0/example/cuda/cupti_pc_sampling.cu +263 -0
gpufl-0.1.0.dev0/example/cuda/list_sass_metrics.cu +46 -0
gpufl-0.1.0.dev0/example/cuda/occupancy_demo.cu +155 -0
gpufl-0.1.0.dev0/example/cuda/system_monitor.cu +58 -0
gpufl-0.1.0.dev0/example/cuda/test_occupancy.cu +62 -0
gpufl-0.1.0.dev0/example/cuda/test_sass_cubin.cu +164 -0
gpufl-0.1.0.dev0/example/cuda/test_sass_metrics.cu +85 -0
gpufl-0.1.0.dev0/example/cuda/vector_add_benchmark.cu +103 -0
{gpufl-0.0.1 → gpufl-0.1.0.dev0}/example/python/01_basic.py +25 -25
{gpufl-0.0.1 → gpufl-0.1.0.dev0}/example/python/02_numba_cuda.py +76 -76
gpufl-0.1.0.dev0/example/python/03_pytorch_benchmark.py +75 -0
gpufl-0.1.0.dev0/example/python/analyzer/01_analyzer_sample.py +10 -0
gpufl-0.1.0.dev0/example/python/requirements.txt +7 -0
gpufl-0.1.0.dev0/example/python/viz/01_plot_memory_timeline.py +9 -0
gpufl-0.1.0.dev0/example/python/viz/02_plot_stress_timeline.py +9 -0
gpufl-0.1.0.dev0/images/Screenshot1.png +0 -0
gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.cpp +10 -0
gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.hpp +18 -0
gpufl-0.1.0.dev0/include/gpufl/backends/host_collector.hpp +150 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cuda_collector.cpp +43 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.cpp +806 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.hpp +164 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_common.hpp +146 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.cpp +73 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.hpp +37 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +282 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +26 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +237 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +26 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/nvml_collector.cpp +188 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/nvml_collector.hpp +38 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/resource_handler.cpp +63 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/resource_handler.hpp +25 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +222 -0
gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +42 -0
gpufl-0.1.0.dev0/include/gpufl/core/common.cpp +45 -0
gpufl-0.1.0.dev0/include/gpufl/core/common.hpp +109 -0
gpufl-0.1.0.dev0/include/gpufl/core/debug_logger.cpp +9 -0
gpufl-0.1.0.dev0/include/gpufl/core/debug_logger.hpp +43 -0
gpufl-0.1.0.dev0/include/gpufl/core/events.hpp +253 -0
gpufl-0.1.0.dev0/include/gpufl/core/gpufl.cpp +365 -0
gpufl-0.1.0.dev0/include/gpufl/core/logger.cpp +437 -0
gpufl-0.1.0.dev0/include/gpufl/core/logger.hpp +88 -0
gpufl-0.1.0.dev0/include/gpufl/core/monitor.hpp +100 -0
gpufl-0.1.0.dev0/include/gpufl/core/monitor_backend.hpp +46 -0
gpufl-0.1.0.dev0/include/gpufl/core/ring_buffer.hpp +75 -0
gpufl-0.1.0.dev0/include/gpufl/core/runtime.cpp +6 -0
gpufl-0.1.0.dev0/include/gpufl/core/runtime.hpp +30 -0
gpufl-0.1.0.dev0/include/gpufl/core/sampler.cpp +73 -0
gpufl-0.1.0.dev0/include/gpufl/core/sampler.hpp +51 -0
gpufl-0.1.0.dev0/include/gpufl/core/scope_registry.cpp +10 -0
gpufl-0.1.0.dev0/include/gpufl/core/scope_registry.hpp +8 -0
gpufl-0.1.0.dev0/include/gpufl/core/stack_registry.hpp +47 -0
gpufl-0.1.0.dev0/include/gpufl/core/stack_trace.cpp +112 -0
gpufl-0.1.0.dev0/include/gpufl/core/stack_trace.hpp +12 -0
gpufl-0.1.0.dev0/include/gpufl/core/trace_type.hpp +13 -0
gpufl-0.1.0.dev0/include/gpufl/cuda/monitor.cpp +380 -0
gpufl-0.1.0.dev0/include/gpufl/gpufl.hpp +80 -0
gpufl-0.1.0.dev0/include/gpufl.hpp +3 -0
gpufl-0.1.0.dev0/pyproject.toml +63 -0
gpufl-0.1.0.dev0/python/bindings.cpp +103 -0
gpufl-0.1.0.dev0/python/gpufl/.gitignore +159 -0
gpufl-0.1.0.dev0/python/gpufl/__init__.py +83 -0
gpufl-0.1.0.dev0/python/gpufl/analyzer/__init__.py +1 -0
gpufl-0.1.0.dev0/python/gpufl/analyzer/analyzer.py +359 -0
gpufl-0.1.0.dev0/python/gpufl/utils.py +19 -0
gpufl-0.1.0.dev0/python/gpufl/viz/__init__.py +27 -0
gpufl-0.1.0.dev0/python/gpufl/viz/reader.py +48 -0
gpufl-0.1.0.dev0/python/gpufl/viz/timeline.py +380 -0
gpufl-0.1.0.dev0/python/gpufl/viz/visualizer.py +194 -0
{gpufl-0.0.1 → gpufl-0.1.0.dev0}/schema/ndjson.schema.json +133 -133
gpufl-0.1.0.dev0/tests/CMakeLists.txt +134 -0
gpufl-0.1.0.dev0/tests/backends/nvidia/test_cuda_collector.cpp +34 -0
gpufl-0.1.0.dev0/tests/backends/nvidia/test_nvidia_backend.cpp +127 -0
gpufl-0.1.0.dev0/tests/backends/nvidia/test_nvml_collector.cpp +54 -0
gpufl-0.1.0.dev0/tests/common/test_utils.hpp +31 -0
gpufl-0.1.0.dev0/tests/core/test_analyzer.cpp +15 -0
gpufl-0.1.0.dev0/tests/core/test_monitor.cpp +77 -0
gpufl-0.1.0.dev0/tests/main_test_runner.cpp +6 -0
gpufl-0.1.0.dev0/tests/python/conftest.py +69 -0
gpufl-0.1.0.dev0/tests/python/test_analyzer.py +54 -0
gpufl-0.1.0.dev0/tests/verify_pipeline.py +91 -0
gpufl-0.0.1/.github/workflows/build.yml +0 -59
gpufl-0.0.1/CMakeLists.txt +0 -74
gpufl-0.0.1/PKG-INFO +0 -362
gpufl-0.0.1/README.md +0 -343
gpufl-0.0.1/example/cuda/CMakeLists.txt +0 -63
gpufl-0.0.1/example/cuda/block_style_example.cu +0 -159
gpufl-0.0.1/example/cuda/system_monitor.cu +0 -21
gpufl-0.0.1/example/python/03_kernel.launch.py +0 -34
gpufl-0.0.1/example/python/requirements.txt +0 -2
gpufl-0.0.1/include/gpufl/backends/cuda.hpp +0 -259
gpufl-0.0.1/include/gpufl/core/common.hpp +0 -201
gpufl-0.0.1/include/gpufl/core/monitor.hpp +0 -261
gpufl-0.0.1/include/gpufl/gpufl.hpp +0 -26
gpufl-0.0.1/pyproject.toml +0 -35
gpufl-0.0.1/python/bindings.cpp +0 -67
gpufl-0.0.1/python/gpufl/__init__.py +0 -32
gpufl-0.0.1/python/gpufl/utils.py +0 -35
gpufl-0.0.1/tests/verify_pipeline.py +0 -88

gpufl-0.1.0.dev0/.clang-format ADDED Viewed

@@ -0,0 +1,5 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+IndentWidth: 4
+ColumnLimit: 80

gpufl-0.1.0.dev0/.github/pull_request_template.md ADDED Viewed

@@ -0,0 +1,11 @@
+## Description
+## Type of Change
+- [ ] Bug fix
+- [ ] New feature
+- [ ] Documentation update
+## Testing
+## Checklist
+- [ ] My code follows the style guidelines of this project
+- [ ] I have performed a self-review of my own code
+- [ ] I have commented my code, particularly in hard-to-understand areas

gpufl-0.1.0.dev0/.github/workflows/build.yml ADDED Viewed

@@ -0,0 +1,119 @@
+name: Build GPUFl Client
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+jobs:
+  build:
+    name: Build on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-22.04, windows-latest]
+        python-version: ["3.12", "3.13"]
+    env:
+      CMAKE_ARGS: >-
+        -DGPUFL_ENABLE_NVIDIA=ON
+        -DGPUFL_ENABLE_AMD=OFF
+        -DBUILD_TESTING=OFF
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      # Install CUDA Toolkit (provides CUDA_PATH)
+      - name: Install CUDA Toolkit
+        uses: Jimver/cuda-toolkit@v0.2.30
+        id: cuda-toolkit
+        with:
+          cuda: '13.1.0'
+          method: 'network'
+          use-github-cache: false
+      # Make sure CMake can find CUDA headers/libraries during the *pip build*.
+      # Jimver/cuda-toolkit sets CUDA_PATH; we map it to common vars CMake respects.
+      - name: Export CUDA environment for CMake
+        shell: bash
+        run: |
+          echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
+          echo "CUDAToolkit_ROOT=${CUDA_PATH}" >> $GITHUB_ENV
+          if [ "${{ runner.os }}" == "Windows" ]; then
+            echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
+            echo "${CUDA_PATH}/extras/CUPTI/lib64" >> $GITHUB_PATH
+            echo "C:/Program Files/NVIDIA Corporation/NVSMI" >> $GITHUB_PATH
+          fi
+      - name: Install system dependencies
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libcurl4-openssl-dev
+      - name: Install python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install scikit-build-core pybind11 cmake ninja
+      - name: Build and Install
+        run: |
+          pip install .[viz,analyzer] -v
+      - name: Run C++ Unit Tests
+        # Skip C++ tests on Windows as they require actual NVIDIA GPUs to run (CUDA/CUPTI initialization)
+        if: runner.os != 'Windows'
+        shell: bash
+        run: |
+          # 1. Prepare a local writable directory for CUDA stubs
+          # We cannot write to the system CUDA directory (Permission denied).
+          LOCAL_STUBS_DIR="${GITHUB_WORKSPACE}/local_cuda_stubs"
+          mkdir -p "${LOCAL_STUBS_DIR}"
+          # 2. Gather relevant library directories for the CUDA Toolkit
+          STUBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib/stubs"
+          LIBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib"
+          # 3. Create versioned symlinks in the LOCAL directory
+          # Many binaries expect .so.1 which is only created by the driver installer.
+          for lib in libcuda libnvidia-ml libnvrtc; do
+            if [ -f "${STUBS_DIR}/${lib}.so" ]; then
+              # Symlink the original stub to our local dir
+              ln -sf "${STUBS_DIR}/${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so"
+              # Create the versioned symlink in our local dir
+              ln -sf "${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so.1"
+            fi
+          done
+          # 4. Add local stubs and toolkit libs to LD_LIBRARY_PATH
+          export LD_LIBRARY_PATH="${LOCAL_STUBS_DIR}:${LIBS_DIR}:${LD_LIBRARY_PATH}"
+          # Debug: check what libraries are found
+          echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+          ls -l "${LOCAL_STUBS_DIR}" || true
+          cmake -B build_tests -S . \
+            -DGPUFL_ENABLE_NVIDIA=ON \
+            -DBUILD_PYTHON=OFF \
+            -DBUILD_TESTING=ON
+          cmake --build build_tests --target gpufl_tests
+          ctest --test-dir build_tests --output-on-failure --verbose --timeout 60
+      - name: Run Python Unit Tests
+        shell: bash
+        run: |
+          python -m pip install pytest
+          export PYTHONPATH=$PYTHONPATH:$(pwd)/python
+          python -m pytest tests/python
+      - name: Verify Logging Pipeline
+        run: |
+          python -u tests/verify_pipeline.py

gpufl-0.1.0.dev0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,71 @@
+name: Build and Release Wheels
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-22.04, windows-latest]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install CUDA (Windows)
+        if: runner.os == 'Windows'
+        uses: Jimver/cuda-toolkit@v0.2.30
+        with:
+          cuda: '13.1.0'
+          method: 'network'
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.22.0
+        env:
+          CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
+          CIBW_BEFORE_ALL_LINUX: >-
+            curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
+            dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
+          CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
+          CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
+          path: ./wheelhouse/*.whl
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build sdist
+        run: pipx run build --sdist
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-sdist
+          path: dist/*.tar.gz
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: cibw-*
+          path: dist
+          merge-multiple: true
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}

{gpufl-0.0.1 → gpufl-0.1.0.dev0}/.gitignore RENAMED Viewed

@@ -1,80 +1,80 @@
-### idea
-.idea/**
-build/
-cmake-build-*/
-cmake/
-### C++ template
-# Prerequisites
-*.d
-# Compiled Object files
-*.slo
-*.lo
-*.o
-*.obj
-# Precompiled Headers
-*.gch
-*.pch
-# Compiled Dynamic libraries
-*.so
-*.dylib
-*.dll
-# Fortran module files
-*.mod
-*.smod
-# Compiled Static libraries
-*.lai
-*.la
-*.a
-*.lib
-# Executables
-*.exe
-*.out
-*.app
-### C template
-# Prerequisites
-*.d
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-# Linker output
-*.ilk
-*.map
-*.exp
-# Precompiled Headers
-*.gch
-*.pch
-# Libraries
-*.lib
-*.a
-*.la
-*.lo
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
+### idea
+.idea/**
+build/
+cmake-build-*/
+cmake/
+### C++ template
+# Prerequisites
+*.d
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+# Precompiled Headers
+*.gch
+*.pch
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+# Fortran module files
+*.mod
+*.smod
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+# Executables
+*.exe
+*.out
+*.app
+### C template
+# Prerequisites
+*.d
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+# Linker output
+*.ilk
+*.map
+*.exp
+# Precompiled Headers
+*.gch
+*.pch
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+*.log

gpufl-0.1.0.dev0/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,277 @@
+cmake_minimum_required(VERSION 3.31)
+project(gpufl_client
+    VERSION 0.1.0
+    LANGUAGES CXX
+    DESCRIPTION "Header-only GPU monitoring client library"
+)
+# -----------------------
+# CUDA Architectures (CI Friendly)
+# -----------------------
+if(NOT CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES "all-major")
+endif()
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+# -----------------------
+# Options
+# -----------------------
+option(GPUFL_ENABLE_NVIDIA "Enable NVIDIA backends (CUDA + NVML when available)" ON)
+option(GPUFL_ENABLE_AMD    "Enable AMD backends (ROCm when available)" OFF)
+option(BUILD_GPUFL_EXAMPLE "Build gpufl example application" ON)
+option(BUILD_PYTHON "Build Python bindings" OFF)
+# -----------------------
+# Library target
+# -----------------------
+add_library(gpufl STATIC
+        include/gpufl/backends/nvidia/sampler/cupti_sass.cpp
+        include/gpufl/backends/nvidia/sampler/cupti_sass.hpp
+)
+add_library(gpufl::gpufl ALIAS gpufl)
+target_include_directories(gpufl
+    PUBLIC
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<INSTALL_INTERFACE:include>
+)
+target_compile_features(gpufl INTERFACE cxx_std_17)
+# Enable PIC for static library (required when linking into shared libraries like Python modules)
+set_target_properties(gpufl PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_sources(gpufl PRIVATE
+    include/gpufl/core/logger.cpp
+    include/gpufl/core/sampler.cpp
+    include/gpufl/core/runtime.cpp
+    include/gpufl/core/gpufl.cpp
+    include/gpufl/core/common.cpp
+    include/gpufl/core/debug_logger.cpp
+    include/gpufl/core/stack_trace.cpp
+    include/gpufl/core/scope_registry.cpp
+)
+set(GPUFL_HAS_CUDA 0)
+set(GPUFL_HAS_NVML 0)
+set(GPUFL_HAS_ROCM 0)
+set(GPUFL_HAS_CUPTI 0)
+# -----------------------
+# Backends
+# -----------------------
+if(GPUFL_ENABLE_NVIDIA)
+    target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_NVIDIA=1)
+else()
+    target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_NVIDIA=0)
+endif()
+if(GPUFL_ENABLE_AMD)
+    target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_AMD=1)
+else()
+    target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_AMD=0)
+endif()
+if(GPUFL_ENABLE_NVIDIA)
+    #
+    # CUDA capability: only if CUDA toolkit is available
+    #
+    include(CheckLanguage)
+    check_language(CUDA)
+    if(CMAKE_CUDA_COMPILER)
+        enable_language(CUDA)
+        find_package(CUDAToolkit QUIET)
+        if(CUDAToolkit_FOUND)
+            set(GPUFL_HAS_CUDA 1)
+            target_sources(gpufl PRIVATE
+                include/gpufl/backends/nvidia/cuda_collector.cpp
+                include/gpufl/backends/nvidia/cupti_utils.cpp
+                include/gpufl/backends/nvidia/resource_handler.cpp
+                include/gpufl/backends/nvidia/kernel_launch_handler.cpp
+                include/gpufl/backends/nvidia/mem_transfer_handler.cpp
+                include/gpufl/cuda/monitor.cpp
+                include/gpufl/backends/nvidia/cupti_backend.cpp)
+            target_link_libraries(gpufl PRIVATE CUDA::cudart CUDA::cuda_driver)
+            # --------------------------------------------------------
+            # CUPTI Support (Added)
+            # --------------------------------------------------------
+            if (TARGET CUDA::cupti)
+                target_link_libraries(gpufl PRIVATE CUDA::cupti)
+                set(GPUFL_HAS_CUPTI 1)
+                message(STATUS "Found CUPTI via CUDAToolkit target")
+            else()
+                # Fallback: Manual search if the target is missing
+                find_library(CUPTI_LIBRARY NAMES cupti
+                    HINTS "${CUDAToolkit_ROOT}/extras/CUPTI/lib64"
+                    "${CUDAToolkit_ROOT}/extras/CUPTI/lib"
+                    "$ENV{CUDA_PATH}/extras/CUPTI/lib64"
+                )
+                find_path(CUPTI_INCLUDE_DIR NAMES cupti.h
+                    HINTS "${CUDAToolkit_ROOT}/extras/CUPTI/include"
+                    "$ENV{CUDA_PATH}/extras/CUPTI/include"
+                )
+                if(CUPTI_LIBRARY AND CUPTI_INCLUDE_DIR)
+                    target_link_libraries(gpufl PRIVATE ${CUPTI_LIBRARY})
+                    target_include_directories(gpufl PRIVATE ${CUPTI_INCLUDE_DIR})
+                    set(GPUFL_HAS_CUPTI 1)
+                    message(STATUS "Found CUPTI manually: ${CUPTI_LIBRARY}")
+                endif()
+            endif()
+        endif()
+    endif()
+    #
+    # NVML capability:
+    # - On Linux, link libnvidia-ml if present.
+    # - On Windows, many projects LoadLibrary/GetProcAddress at runtime.
+    #   If you do runtime loading, treat NVML as "capable" without link-time lib.
+    #
+    if(WIN32)
+        # nvml.dll usually comes from the NVIDIA driver (NVSMI), not CUDA toolkit.
+        find_path(NVML_DLL_DIR NAMES nvml.dll
+            PATHS
+            "$ENV{ProgramFiles}/NVIDIA Corporation/NVSMI"
+            "$ENV{SystemRoot}/System32"
+        )
+        if(NVML_DLL_DIR)
+            set(GPUFL_HAS_NVML 1)
+            # If you still want link-time import lib, only do it when DLL exists:
+            find_library(NVML_LIBRARY NAMES nvml nvidia-ml
+                PATHS "$ENV{CUDA_PATH}/lib/x64"
+                PATH_SUFFIXES lib lib/x64
+            )
+            if(NVML_LIBRARY)
+                target_link_libraries(gpufl PRIVATE ${NVML_LIBRARY})
+            endif()
+        endif()
+    else()
+        find_library(NVML_LIBRARY NAMES nvidia-ml
+                HINTS
+                "${CUDAToolkit_LIBRARY_ROOT}/stubs"
+                "${CUDAToolkit_ROOT}/targets/x86_64-linux/lib/stubs"
+                "${CUDAToolkit_ROOT}/lib64/stubs"
+                "$ENV{CUDA_PATH}/targets/x86_64-linux/lib/stubs"
+                "/usr/local/cuda-13.1/targets/x86_64-linux/lib/stubs"
+                "/usr/lib/wsl/lib"  # Common location for WSL2 users
+        )
+        if(NVML_LIBRARY)
+            set(GPUFL_HAS_NVML 1)
+            target_link_libraries(gpufl PRIVATE ${NVML_LIBRARY})
+            message(STATUS "Found NVML (Linux): ${NVML_LIBRARY}")
+        else()
+            message(WARNING "NVML not found! GPUFL_HAS_NVML will be 0. (Check CUDA Toolkit 'stubs' folder)")
+        endif()
+    endif()
+    if(WIN32)
+        target_link_libraries(gpufl PRIVATE dbghelp)
+    endif()
+    # Apply definitions to public interface so tests inherit them
+    target_compile_definitions(gpufl PUBLIC
+        GPUFL_HAS_CUDA=${GPUFL_HAS_CUDA}
+        GPUFL_HAS_NVML=${GPUFL_HAS_NVML}
+        GPUFL_HAS_CUPTI=${GPUFL_HAS_CUPTI}
+    )
+    #
+    # Only compile NVML collector if NVML is actually available
+    #
+    if(GPUFL_HAS_NVML)
+        target_sources(gpufl PRIVATE
+            include/gpufl/backends/nvidia/nvml_collector.cpp
+        )
+    endif()
+    # -----------------------
+    # CUDA Example (only when CUDA is available)
+    # -----------------------
+    if(BUILD_GPUFL_EXAMPLE AND GPUFL_HAS_CUDA AND CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
+        add_subdirectory(example/cuda)
+    endif()
+    # -----------------------
+    # Unit Tests
+    # -----------------------
+    option(BUILD_TESTING "Build the testing tree." ON)
+    if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND BUILD_TESTING)
+        enable_testing()
+        add_subdirectory(tests)
+    endif()
+endif()
+# -----------------------
+# AMD backends (placeholder, auto-detect later)
+# -----------------------
+if(GPUFL_ENABLE_AMD)
+    target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_AMD=1)
+    # Keep your current source wired the same way; later you can replace with real ROCm detection.
+    set(GPUFL_HAS_ROCM 1)
+    target_compile_definitions(gpufl PUBLIC GPUFL_HAS_ROCM=1)
+    target_sources(gpufl PRIVATE
+        include/gpufl/backends/amd/rocm_collector.cpp
+    )
+endif()
+if(BUILD_PYTHON)
+    find_package(pybind11 QUIET)
+    if(NOT pybind11_FOUND)
+        include(FetchContent)
+        FetchContent_Declare(
+            pybind11
+            GIT_REPOSITORY https://github.com/pybind/pybind11.git
+            GIT_TAG v2.13
+        )
+        FetchContent_MakeAvailable(pybind11)
+    endif()
+    pybind11_add_module(_gpufl_client python/bindings.cpp)
+    target_link_libraries(_gpufl_client PRIVATE gpufl::gpufl)
+    # If CUDA is available, link it to the Python module
+    if(GPUFL_HAS_CUDA)
+        target_link_libraries(_gpufl_client PRIVATE CUDA::cudart)
+    endif()
+    install(TARGETS _gpufl_client DESTINATION gpufl)
+endif()
+# -----------------------
+# Install
+# -----------------------
+include(GNUInstallDirs)
+# Install header files
+install(DIRECTORY include/
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+install(TARGETS gpufl
+    EXPORT gpufl_clientTargets
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+install(EXPORT gpufl_clientTargets
+    FILE gpufl_clientTargets.cmake
+    NAMESPACE gpufl::
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/gpufl_client
+)

gpufl-0.1.0.dev0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,12 @@
+# Contributing to GPU Flight
+First off, thank you for your interest! This project is a learning-focused initiative to build high-performance GPU monitoring tools.
+### Our Current Policy
+To maintain a high pace of development and a consistent architectural style (following the **Google C++ Style Guide**), the following rules apply:
+1. **Open an Issue First:** Do not submit a Pull Request without an associated Issue. If you have an idea, let's discuss it in an Issue first to ensure it fits the current roadmap.
+2. **Focus Areas:** We are currently prioritizing stability and NVIDIA/CUDA support. Small bug fixes are welcome!
+3. **No Large Refactors:** Please do not submit large code refactors or style changes. We use `.clang-format` to handle styling automatically.
+If you have a major feature in mind, we encourage you to **fork the repo** and experiment! We can always discuss merging your findings back into the main branch later.

gpufl 0.0.1__tar.gz → 0.1.0.dev0__tar.gz

gpufl 0.0.1tar.gz → 0.1.0.dev0tar.gz