gpufl 0.0.1__tar.gz → 0.1.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. gpufl-0.1.0.dev0/.clang-format +5 -0
  2. gpufl-0.1.0.dev0/.github/pull_request_template.md +11 -0
  3. gpufl-0.1.0.dev0/.github/workflows/build.yml +119 -0
  4. gpufl-0.1.0.dev0/.github/workflows/release.yml +71 -0
  5. {gpufl-0.0.1 → gpufl-0.1.0.dev0}/.gitignore +80 -80
  6. gpufl-0.1.0.dev0/CMakeLists.txt +277 -0
  7. gpufl-0.1.0.dev0/CONTRIBUTING.md +12 -0
  8. {gpufl-0.0.1 → gpufl-0.1.0.dev0}/LICENSE +201 -201
  9. gpufl-0.1.0.dev0/PKG-INFO +192 -0
  10. gpufl-0.1.0.dev0/README.md +167 -0
  11. gpufl-0.1.0.dev0/build.sh +1 -0
  12. gpufl-0.1.0.dev0/example/cuda/CMakeLists.txt +241 -0
  13. gpufl-0.1.0.dev0/example/cuda/block_style_example.cu +97 -0
  14. gpufl-0.1.0.dev0/example/cuda/check_conflict.cu +81 -0
  15. gpufl-0.1.0.dev0/example/cuda/check_device.cu +25 -0
  16. gpufl-0.1.0.dev0/example/cuda/cupti_basic.cu +129 -0
  17. gpufl-0.1.0.dev0/example/cuda/cupti_pc_sampling.cu +263 -0
  18. gpufl-0.1.0.dev0/example/cuda/list_sass_metrics.cu +46 -0
  19. gpufl-0.1.0.dev0/example/cuda/occupancy_demo.cu +155 -0
  20. gpufl-0.1.0.dev0/example/cuda/system_monitor.cu +58 -0
  21. gpufl-0.1.0.dev0/example/cuda/test_occupancy.cu +62 -0
  22. gpufl-0.1.0.dev0/example/cuda/test_sass_cubin.cu +164 -0
  23. gpufl-0.1.0.dev0/example/cuda/test_sass_metrics.cu +85 -0
  24. gpufl-0.1.0.dev0/example/cuda/vector_add_benchmark.cu +103 -0
  25. {gpufl-0.0.1 → gpufl-0.1.0.dev0}/example/python/01_basic.py +25 -25
  26. {gpufl-0.0.1 → gpufl-0.1.0.dev0}/example/python/02_numba_cuda.py +76 -76
  27. gpufl-0.1.0.dev0/example/python/03_pytorch_benchmark.py +75 -0
  28. gpufl-0.1.0.dev0/example/python/analyzer/01_analyzer_sample.py +10 -0
  29. gpufl-0.1.0.dev0/example/python/requirements.txt +7 -0
  30. gpufl-0.1.0.dev0/example/python/viz/01_plot_memory_timeline.py +9 -0
  31. gpufl-0.1.0.dev0/example/python/viz/02_plot_stress_timeline.py +9 -0
  32. gpufl-0.1.0.dev0/images/Screenshot1.png +0 -0
  33. gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.cpp +10 -0
  34. gpufl-0.1.0.dev0/include/gpufl/backends/amd/rocm_collector.hpp +18 -0
  35. gpufl-0.1.0.dev0/include/gpufl/backends/host_collector.hpp +150 -0
  36. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cuda_collector.cpp +43 -0
  37. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
  38. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.cpp +806 -0
  39. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.hpp +164 -0
  40. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_common.hpp +146 -0
  41. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.cpp +73 -0
  42. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.hpp +37 -0
  43. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +282 -0
  44. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +26 -0
  45. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +237 -0
  46. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +26 -0
  47. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/nvml_collector.cpp +188 -0
  48. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/nvml_collector.hpp +38 -0
  49. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/resource_handler.cpp +63 -0
  50. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/resource_handler.hpp +25 -0
  51. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +222 -0
  52. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +42 -0
  53. gpufl-0.1.0.dev0/include/gpufl/core/common.cpp +45 -0
  54. gpufl-0.1.0.dev0/include/gpufl/core/common.hpp +109 -0
  55. gpufl-0.1.0.dev0/include/gpufl/core/debug_logger.cpp +9 -0
  56. gpufl-0.1.0.dev0/include/gpufl/core/debug_logger.hpp +43 -0
  57. gpufl-0.1.0.dev0/include/gpufl/core/events.hpp +253 -0
  58. gpufl-0.1.0.dev0/include/gpufl/core/gpufl.cpp +365 -0
  59. gpufl-0.1.0.dev0/include/gpufl/core/logger.cpp +437 -0
  60. gpufl-0.1.0.dev0/include/gpufl/core/logger.hpp +88 -0
  61. gpufl-0.1.0.dev0/include/gpufl/core/monitor.hpp +100 -0
  62. gpufl-0.1.0.dev0/include/gpufl/core/monitor_backend.hpp +46 -0
  63. gpufl-0.1.0.dev0/include/gpufl/core/ring_buffer.hpp +75 -0
  64. gpufl-0.1.0.dev0/include/gpufl/core/runtime.cpp +6 -0
  65. gpufl-0.1.0.dev0/include/gpufl/core/runtime.hpp +30 -0
  66. gpufl-0.1.0.dev0/include/gpufl/core/sampler.cpp +73 -0
  67. gpufl-0.1.0.dev0/include/gpufl/core/sampler.hpp +51 -0
  68. gpufl-0.1.0.dev0/include/gpufl/core/scope_registry.cpp +10 -0
  69. gpufl-0.1.0.dev0/include/gpufl/core/scope_registry.hpp +8 -0
  70. gpufl-0.1.0.dev0/include/gpufl/core/stack_registry.hpp +47 -0
  71. gpufl-0.1.0.dev0/include/gpufl/core/stack_trace.cpp +112 -0
  72. gpufl-0.1.0.dev0/include/gpufl/core/stack_trace.hpp +12 -0
  73. gpufl-0.1.0.dev0/include/gpufl/core/trace_type.hpp +13 -0
  74. gpufl-0.1.0.dev0/include/gpufl/cuda/monitor.cpp +380 -0
  75. gpufl-0.1.0.dev0/include/gpufl/gpufl.hpp +80 -0
  76. gpufl-0.1.0.dev0/include/gpufl.hpp +3 -0
  77. gpufl-0.1.0.dev0/pyproject.toml +63 -0
  78. gpufl-0.1.0.dev0/python/bindings.cpp +103 -0
  79. gpufl-0.1.0.dev0/python/gpufl/.gitignore +159 -0
  80. gpufl-0.1.0.dev0/python/gpufl/__init__.py +83 -0
  81. gpufl-0.1.0.dev0/python/gpufl/analyzer/__init__.py +1 -0
  82. gpufl-0.1.0.dev0/python/gpufl/analyzer/analyzer.py +359 -0
  83. gpufl-0.1.0.dev0/python/gpufl/utils.py +19 -0
  84. gpufl-0.1.0.dev0/python/gpufl/viz/__init__.py +27 -0
  85. gpufl-0.1.0.dev0/python/gpufl/viz/reader.py +48 -0
  86. gpufl-0.1.0.dev0/python/gpufl/viz/timeline.py +380 -0
  87. gpufl-0.1.0.dev0/python/gpufl/viz/visualizer.py +194 -0
  88. {gpufl-0.0.1 → gpufl-0.1.0.dev0}/schema/ndjson.schema.json +133 -133
  89. gpufl-0.1.0.dev0/tests/CMakeLists.txt +134 -0
  90. gpufl-0.1.0.dev0/tests/backends/nvidia/test_cuda_collector.cpp +34 -0
  91. gpufl-0.1.0.dev0/tests/backends/nvidia/test_nvidia_backend.cpp +127 -0
  92. gpufl-0.1.0.dev0/tests/backends/nvidia/test_nvml_collector.cpp +54 -0
  93. gpufl-0.1.0.dev0/tests/common/test_utils.hpp +31 -0
  94. gpufl-0.1.0.dev0/tests/core/test_analyzer.cpp +15 -0
  95. gpufl-0.1.0.dev0/tests/core/test_monitor.cpp +77 -0
  96. gpufl-0.1.0.dev0/tests/main_test_runner.cpp +6 -0
  97. gpufl-0.1.0.dev0/tests/python/conftest.py +69 -0
  98. gpufl-0.1.0.dev0/tests/python/test_analyzer.py +54 -0
  99. gpufl-0.1.0.dev0/tests/verify_pipeline.py +91 -0
  100. gpufl-0.0.1/.github/workflows/build.yml +0 -59
  101. gpufl-0.0.1/CMakeLists.txt +0 -74
  102. gpufl-0.0.1/PKG-INFO +0 -362
  103. gpufl-0.0.1/README.md +0 -343
  104. gpufl-0.0.1/example/cuda/CMakeLists.txt +0 -63
  105. gpufl-0.0.1/example/cuda/block_style_example.cu +0 -159
  106. gpufl-0.0.1/example/cuda/system_monitor.cu +0 -21
  107. gpufl-0.0.1/example/python/03_kernel.launch.py +0 -34
  108. gpufl-0.0.1/example/python/requirements.txt +0 -2
  109. gpufl-0.0.1/include/gpufl/backends/cuda.hpp +0 -259
  110. gpufl-0.0.1/include/gpufl/core/common.hpp +0 -201
  111. gpufl-0.0.1/include/gpufl/core/monitor.hpp +0 -261
  112. gpufl-0.0.1/include/gpufl/gpufl.hpp +0 -26
  113. gpufl-0.0.1/pyproject.toml +0 -35
  114. gpufl-0.0.1/python/bindings.cpp +0 -67
  115. gpufl-0.0.1/python/gpufl/__init__.py +0 -32
  116. gpufl-0.0.1/python/gpufl/utils.py +0 -35
  117. gpufl-0.0.1/tests/verify_pipeline.py +0 -88
@@ -0,0 +1,5 @@
1
+ ---
2
+ Language: Cpp
3
+ BasedOnStyle: Google
4
+ IndentWidth: 4
5
+ ColumnLimit: 80
@@ -0,0 +1,11 @@
1
+ ## Description
2
+ ## Type of Change
3
+ - [ ] Bug fix
4
+ - [ ] New feature
5
+ - [ ] Documentation update
6
+
7
+ ## Testing
8
+ ## Checklist
9
+ - [ ] My code follows the style guidelines of this project
10
+ - [ ] I have performed a self-review of my own code
11
+ - [ ] I have commented my code, particularly in hard-to-understand areas
@@ -0,0 +1,119 @@
1
+ name: Build GPUFl Client
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ build:
11
+ name: Build on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+ python-version: ["3.12", "3.13"]
17
+
18
+ env:
19
+ CMAKE_ARGS: >-
20
+ -DGPUFL_ENABLE_NVIDIA=ON
21
+ -DGPUFL_ENABLE_AMD=OFF
22
+ -DBUILD_TESTING=OFF
23
+
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+
27
+ - name: Set up Python ${{ matrix.python-version }}
28
+ uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+
32
+ # Install CUDA Toolkit (provides CUDA_PATH)
33
+ - name: Install CUDA Toolkit
34
+ uses: Jimver/cuda-toolkit@v0.2.30
35
+ id: cuda-toolkit
36
+ with:
37
+ cuda: '13.1.0'
38
+ method: 'network'
39
+ use-github-cache: false
40
+
41
+ # Make sure CMake can find CUDA headers/libraries during the *pip build*.
42
+ # Jimver/cuda-toolkit sets CUDA_PATH; we map it to common vars CMake respects.
43
+ - name: Export CUDA environment for CMake
44
+ shell: bash
45
+ run: |
46
+ echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
47
+ echo "CUDAToolkit_ROOT=${CUDA_PATH}" >> $GITHUB_ENV
48
+ if [ "${{ runner.os }}" == "Windows" ]; then
49
+ echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
50
+ echo "${CUDA_PATH}/extras/CUPTI/lib64" >> $GITHUB_PATH
51
+ echo "C:/Program Files/NVIDIA Corporation/NVSMI" >> $GITHUB_PATH
52
+ fi
53
+
54
+ - name: Install system dependencies
55
+ if: runner.os == 'Linux'
56
+ run: |
57
+ sudo apt-get update
58
+ sudo apt-get install -y libcurl4-openssl-dev
59
+
60
+ - name: Install python dependencies
61
+ run: |
62
+ python -m pip install --upgrade pip
63
+ pip install scikit-build-core pybind11 cmake ninja
64
+
65
+ - name: Build and Install
66
+ run: |
67
+ pip install .[viz,analyzer] -v
68
+
69
+ - name: Run C++ Unit Tests
70
+ # Skip C++ tests on Windows as they require actual NVIDIA GPUs to run (CUDA/CUPTI initialization)
71
+ if: runner.os != 'Windows'
72
+ shell: bash
73
+ run: |
74
+ # 1. Prepare a local writable directory for CUDA stubs
75
+ # We cannot write to the system CUDA directory (Permission denied).
76
+ LOCAL_STUBS_DIR="${GITHUB_WORKSPACE}/local_cuda_stubs"
77
+ mkdir -p "${LOCAL_STUBS_DIR}"
78
+
79
+ # 2. Gather relevant library directories for the CUDA Toolkit
80
+ STUBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib/stubs"
81
+ LIBS_DIR="${CUDA_HOME}/targets/x86_64-linux/lib"
82
+
83
+ # 3. Create versioned symlinks in the LOCAL directory
84
+ # Many binaries expect .so.1 which is only created by the driver installer.
85
+ for lib in libcuda libnvidia-ml libnvrtc; do
86
+ if [ -f "${STUBS_DIR}/${lib}.so" ]; then
87
+ # Symlink the original stub to our local dir
88
+ ln -sf "${STUBS_DIR}/${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so"
89
+ # Create the versioned symlink in our local dir
90
+ ln -sf "${lib}.so" "${LOCAL_STUBS_DIR}/${lib}.so.1"
91
+ fi
92
+ done
93
+
94
+ # 4. Add local stubs and toolkit libs to LD_LIBRARY_PATH
95
+ export LD_LIBRARY_PATH="${LOCAL_STUBS_DIR}:${LIBS_DIR}:${LD_LIBRARY_PATH}"
96
+
97
+ # Debug: check what libraries are found
98
+ echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
99
+ ls -l "${LOCAL_STUBS_DIR}" || true
100
+
101
+ cmake -B build_tests -S . \
102
+ -DGPUFL_ENABLE_NVIDIA=ON \
103
+ -DBUILD_PYTHON=OFF \
104
+ -DBUILD_TESTING=ON
105
+
106
+ cmake --build build_tests --target gpufl_tests
107
+
108
+ ctest --test-dir build_tests --output-on-failure --verbose --timeout 60
109
+
110
+ - name: Run Python Unit Tests
111
+ shell: bash
112
+ run: |
113
+ python -m pip install pytest
114
+ export PYTHONPATH=$PYTHONPATH:$(pwd)/python
115
+ python -m pytest tests/python
116
+
117
+ - name: Verify Logging Pipeline
118
+ run: |
119
+ python -u tests/verify_pipeline.py
@@ -0,0 +1,71 @@
1
+ name: Build and Release Wheels
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install CUDA (Windows)
21
+ if: runner.os == 'Windows'
22
+ uses: Jimver/cuda-toolkit@v0.2.30
23
+ with:
24
+ cuda: '13.1.0'
25
+ method: 'network'
26
+
27
+ - name: Build wheels
28
+ uses: pypa/cibuildwheel@v2.22.0
29
+ env:
30
+ CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
31
+ CIBW_BEFORE_ALL_LINUX: >-
32
+ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
33
+ dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
34
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
35
+ CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
36
+ CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
37
+
38
+ - uses: actions/upload-artifact@v4
39
+ with:
40
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
41
+ path: ./wheelhouse/*.whl
42
+
43
+ build_sdist:
44
+ name: Build source distribution
45
+ runs-on: ubuntu-latest
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+
49
+ - name: Build sdist
50
+ run: pipx run build --sdist
51
+
52
+ - uses: actions/upload-artifact@v4
53
+ with:
54
+ name: cibw-sdist
55
+ path: dist/*.tar.gz
56
+
57
+ upload_pypi:
58
+ needs: [build_wheels, build_sdist]
59
+ runs-on: ubuntu-latest
60
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
61
+ steps:
62
+ - uses: actions/download-artifact@v4
63
+ with:
64
+ pattern: cibw-*
65
+ path: dist
66
+ merge-multiple: true
67
+
68
+ - name: Publish to PyPI
69
+ uses: pypa/gh-action-pypi-publish@release/v1
70
+ with:
71
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -1,80 +1,80 @@
1
- ### idea
2
- .idea/**
3
- build/
4
- cmake-build-*/
5
- cmake/
6
-
7
- ### C++ template
8
- # Prerequisites
9
- *.d
10
-
11
- # Compiled Object files
12
- *.slo
13
- *.lo
14
- *.o
15
- *.obj
16
-
17
- # Precompiled Headers
18
- *.gch
19
- *.pch
20
-
21
- # Compiled Dynamic libraries
22
- *.so
23
- *.dylib
24
- *.dll
25
-
26
- # Fortran module files
27
- *.mod
28
- *.smod
29
-
30
- # Compiled Static libraries
31
- *.lai
32
- *.la
33
- *.a
34
- *.lib
35
-
36
- # Executables
37
- *.exe
38
- *.out
39
- *.app
40
-
41
- ### C template
42
- # Prerequisites
43
- *.d
44
-
45
- # Object files
46
- *.o
47
- *.ko
48
- *.obj
49
- *.elf
50
-
51
- # Linker output
52
- *.ilk
53
- *.map
54
- *.exp
55
-
56
- # Precompiled Headers
57
- *.gch
58
- *.pch
59
-
60
- # Libraries
61
- *.lib
62
- *.a
63
- *.la
64
- *.lo
65
-
66
- # Shared objects (inc. Windows DLLs)
67
- *.dll
68
- *.so
69
- *.so.*
70
- *.dylib
71
-
72
- # Executables
73
- *.exe
74
- *.out
75
- *.app
76
- *.i*86
77
- *.x86_64
78
- *.hex
79
-
80
-
1
+ ### idea
2
+ .idea/**
3
+ build/
4
+ cmake-build-*/
5
+ cmake/
6
+
7
+ ### C++ template
8
+ # Prerequisites
9
+ *.d
10
+
11
+ # Compiled Object files
12
+ *.slo
13
+ *.lo
14
+ *.o
15
+ *.obj
16
+
17
+ # Precompiled Headers
18
+ *.gch
19
+ *.pch
20
+
21
+ # Compiled Dynamic libraries
22
+ *.so
23
+ *.dylib
24
+ *.dll
25
+
26
+ # Fortran module files
27
+ *.mod
28
+ *.smod
29
+
30
+ # Compiled Static libraries
31
+ *.lai
32
+ *.la
33
+ *.a
34
+ *.lib
35
+
36
+ # Executables
37
+ *.exe
38
+ *.out
39
+ *.app
40
+
41
+ ### C template
42
+ # Prerequisites
43
+ *.d
44
+
45
+ # Object files
46
+ *.o
47
+ *.ko
48
+ *.obj
49
+ *.elf
50
+
51
+ # Linker output
52
+ *.ilk
53
+ *.map
54
+ *.exp
55
+
56
+ # Precompiled Headers
57
+ *.gch
58
+ *.pch
59
+
60
+ # Libraries
61
+ *.lib
62
+ *.a
63
+ *.la
64
+ *.lo
65
+
66
+ # Shared objects (inc. Windows DLLs)
67
+ *.dll
68
+ *.so
69
+ *.so.*
70
+ *.dylib
71
+
72
+ # Executables
73
+ *.exe
74
+ *.out
75
+ *.app
76
+ *.i*86
77
+ *.x86_64
78
+ *.hex
79
+
80
+ *.log
@@ -0,0 +1,277 @@
1
+ cmake_minimum_required(VERSION 3.31)
2
+
3
+ project(gpufl_client
4
+ VERSION 0.1.0
5
+ LANGUAGES CXX
6
+ DESCRIPTION "Header-only GPU monitoring client library"
7
+ )
8
+
9
+ # -----------------------
10
+ # CUDA Architectures (CI Friendly)
11
+ # -----------------------
12
+ if(NOT CMAKE_CUDA_ARCHITECTURES)
13
+ set(CMAKE_CUDA_ARCHITECTURES "all-major")
14
+ endif()
15
+
16
+ set(CMAKE_CXX_STANDARD 17)
17
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
18
+ set(CMAKE_CXX_EXTENSIONS OFF)
19
+
20
+ # -----------------------
21
+ # Options
22
+ # -----------------------
23
+ option(GPUFL_ENABLE_NVIDIA "Enable NVIDIA backends (CUDA + NVML when available)" ON)
24
+ option(GPUFL_ENABLE_AMD "Enable AMD backends (ROCm when available)" OFF)
25
+
26
+ option(BUILD_GPUFL_EXAMPLE "Build gpufl example application" ON)
27
+ option(BUILD_PYTHON "Build Python bindings" OFF)
28
+
29
+ # -----------------------
30
+ # Library target
31
+ # -----------------------
32
+ add_library(gpufl STATIC
33
+ include/gpufl/backends/nvidia/sampler/cupti_sass.cpp
34
+ include/gpufl/backends/nvidia/sampler/cupti_sass.hpp
35
+ )
36
+ add_library(gpufl::gpufl ALIAS gpufl)
37
+
38
+ target_include_directories(gpufl
39
+ PUBLIC
40
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
41
+ $<INSTALL_INTERFACE:include>
42
+ )
43
+
44
+ target_compile_features(gpufl INTERFACE cxx_std_17)
45
+
46
+ # Enable PIC for static library (required when linking into shared libraries like Python modules)
47
+ set_target_properties(gpufl PROPERTIES POSITION_INDEPENDENT_CODE ON)
48
+
49
+ target_sources(gpufl PRIVATE
50
+ include/gpufl/core/logger.cpp
51
+ include/gpufl/core/sampler.cpp
52
+ include/gpufl/core/runtime.cpp
53
+ include/gpufl/core/gpufl.cpp
54
+ include/gpufl/core/common.cpp
55
+ include/gpufl/core/debug_logger.cpp
56
+ include/gpufl/core/stack_trace.cpp
57
+ include/gpufl/core/scope_registry.cpp
58
+ )
59
+
60
+ set(GPUFL_HAS_CUDA 0)
61
+ set(GPUFL_HAS_NVML 0)
62
+ set(GPUFL_HAS_ROCM 0)
63
+ set(GPUFL_HAS_CUPTI 0)
64
+
65
+ # -----------------------
66
+ # Backends
67
+ # -----------------------
68
+ if(GPUFL_ENABLE_NVIDIA)
69
+ target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_NVIDIA=1)
70
+ else()
71
+ target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_NVIDIA=0)
72
+ endif()
73
+
74
+ if(GPUFL_ENABLE_AMD)
75
+ target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_AMD=1)
76
+ else()
77
+ target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_AMD=0)
78
+ endif()
79
+
80
+ if(GPUFL_ENABLE_NVIDIA)
81
+ #
82
+ # CUDA capability: only if CUDA toolkit is available
83
+ #
84
+ include(CheckLanguage)
85
+ check_language(CUDA)
86
+ if(CMAKE_CUDA_COMPILER)
87
+ enable_language(CUDA)
88
+ find_package(CUDAToolkit QUIET)
89
+ if(CUDAToolkit_FOUND)
90
+ set(GPUFL_HAS_CUDA 1)
91
+ target_sources(gpufl PRIVATE
92
+ include/gpufl/backends/nvidia/cuda_collector.cpp
93
+ include/gpufl/backends/nvidia/cupti_utils.cpp
94
+ include/gpufl/backends/nvidia/resource_handler.cpp
95
+ include/gpufl/backends/nvidia/kernel_launch_handler.cpp
96
+ include/gpufl/backends/nvidia/mem_transfer_handler.cpp
97
+ include/gpufl/cuda/monitor.cpp
98
+ include/gpufl/backends/nvidia/cupti_backend.cpp)
99
+ target_link_libraries(gpufl PRIVATE CUDA::cudart CUDA::cuda_driver)
100
+
101
+ # --------------------------------------------------------
102
+ # CUPTI Support (Added)
103
+ # --------------------------------------------------------
104
+ if (TARGET CUDA::cupti)
105
+ target_link_libraries(gpufl PRIVATE CUDA::cupti)
106
+ set(GPUFL_HAS_CUPTI 1)
107
+ message(STATUS "Found CUPTI via CUDAToolkit target")
108
+ else()
109
+ # Fallback: Manual search if the target is missing
110
+ find_library(CUPTI_LIBRARY NAMES cupti
111
+ HINTS "${CUDAToolkit_ROOT}/extras/CUPTI/lib64"
112
+ "${CUDAToolkit_ROOT}/extras/CUPTI/lib"
113
+ "$ENV{CUDA_PATH}/extras/CUPTI/lib64"
114
+ )
115
+ find_path(CUPTI_INCLUDE_DIR NAMES cupti.h
116
+ HINTS "${CUDAToolkit_ROOT}/extras/CUPTI/include"
117
+ "$ENV{CUDA_PATH}/extras/CUPTI/include"
118
+ )
119
+
120
+ if(CUPTI_LIBRARY AND CUPTI_INCLUDE_DIR)
121
+ target_link_libraries(gpufl PRIVATE ${CUPTI_LIBRARY})
122
+ target_include_directories(gpufl PRIVATE ${CUPTI_INCLUDE_DIR})
123
+ set(GPUFL_HAS_CUPTI 1)
124
+ message(STATUS "Found CUPTI manually: ${CUPTI_LIBRARY}")
125
+ endif()
126
+ endif()
127
+ endif()
128
+ endif()
129
+ #
130
+ # NVML capability:
131
+ # - On Linux, link libnvidia-ml if present.
132
+ # - On Windows, many projects LoadLibrary/GetProcAddress at runtime.
133
+ # If you do runtime loading, treat NVML as "capable" without link-time lib.
134
+ #
135
+
136
+ if(WIN32)
137
+ # nvml.dll usually comes from the NVIDIA driver (NVSMI), not CUDA toolkit.
138
+ find_path(NVML_DLL_DIR NAMES nvml.dll
139
+ PATHS
140
+ "$ENV{ProgramFiles}/NVIDIA Corporation/NVSMI"
141
+ "$ENV{SystemRoot}/System32"
142
+ )
143
+
144
+ if(NVML_DLL_DIR)
145
+ set(GPUFL_HAS_NVML 1)
146
+
147
+ # If you still want link-time import lib, only do it when DLL exists:
148
+ find_library(NVML_LIBRARY NAMES nvml nvidia-ml
149
+ PATHS "$ENV{CUDA_PATH}/lib/x64"
150
+ PATH_SUFFIXES lib lib/x64
151
+ )
152
+ if(NVML_LIBRARY)
153
+ target_link_libraries(gpufl PRIVATE ${NVML_LIBRARY})
154
+ endif()
155
+ endif()
156
+ else()
157
+ find_library(NVML_LIBRARY NAMES nvidia-ml
158
+ HINTS
159
+ "${CUDAToolkit_LIBRARY_ROOT}/stubs"
160
+ "${CUDAToolkit_ROOT}/targets/x86_64-linux/lib/stubs"
161
+ "${CUDAToolkit_ROOT}/lib64/stubs"
162
+ "$ENV{CUDA_PATH}/targets/x86_64-linux/lib/stubs"
163
+ "/usr/local/cuda-13.1/targets/x86_64-linux/lib/stubs"
164
+ "/usr/lib/wsl/lib" # Common location for WSL2 users
165
+ )
166
+
167
+ if(NVML_LIBRARY)
168
+ set(GPUFL_HAS_NVML 1)
169
+ target_link_libraries(gpufl PRIVATE ${NVML_LIBRARY})
170
+ message(STATUS "Found NVML (Linux): ${NVML_LIBRARY}")
171
+ else()
172
+ message(WARNING "NVML not found! GPUFL_HAS_NVML will be 0. (Check CUDA Toolkit 'stubs' folder)")
173
+ endif()
174
+ endif()
175
+
176
+ if(WIN32)
177
+ target_link_libraries(gpufl PRIVATE dbghelp)
178
+ endif()
179
+
180
+ # Apply definitions to public interface so tests inherit them
181
+ target_compile_definitions(gpufl PUBLIC
182
+ GPUFL_HAS_CUDA=${GPUFL_HAS_CUDA}
183
+ GPUFL_HAS_NVML=${GPUFL_HAS_NVML}
184
+ GPUFL_HAS_CUPTI=${GPUFL_HAS_CUPTI}
185
+ )
186
+
187
+ #
188
+ # Only compile NVML collector if NVML is actually available
189
+ #
190
+ if(GPUFL_HAS_NVML)
191
+ target_sources(gpufl PRIVATE
192
+ include/gpufl/backends/nvidia/nvml_collector.cpp
193
+ )
194
+ endif()
195
+
196
+ # -----------------------
197
+ # CUDA Example (only when CUDA is available)
198
+ # -----------------------
199
+ if(BUILD_GPUFL_EXAMPLE AND GPUFL_HAS_CUDA AND CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
200
+ add_subdirectory(example/cuda)
201
+ endif()
202
+
203
+ # -----------------------
204
+ # Unit Tests
205
+ # -----------------------
206
+ option(BUILD_TESTING "Build the testing tree." ON)
207
+ if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND BUILD_TESTING)
208
+ enable_testing()
209
+ add_subdirectory(tests)
210
+ endif()
211
+ endif()
212
+
213
+
214
+ # -----------------------
215
+ # AMD backends (placeholder, auto-detect later)
216
+ # -----------------------
217
+ if(GPUFL_ENABLE_AMD)
218
+ target_compile_definitions(gpufl PUBLIC GPUFL_ENABLE_AMD=1)
219
+
220
+ # Keep your current source wired the same way; later you can replace with real ROCm detection.
221
+ set(GPUFL_HAS_ROCM 1)
222
+ target_compile_definitions(gpufl PUBLIC GPUFL_HAS_ROCM=1)
223
+
224
+ target_sources(gpufl PRIVATE
225
+ include/gpufl/backends/amd/rocm_collector.cpp
226
+ )
227
+ endif()
228
+
229
+ if(BUILD_PYTHON)
230
+ find_package(pybind11 QUIET)
231
+ if(NOT pybind11_FOUND)
232
+ include(FetchContent)
233
+ FetchContent_Declare(
234
+ pybind11
235
+ GIT_REPOSITORY https://github.com/pybind/pybind11.git
236
+ GIT_TAG v2.13
237
+ )
238
+ FetchContent_MakeAvailable(pybind11)
239
+ endif()
240
+
241
+ pybind11_add_module(_gpufl_client python/bindings.cpp)
242
+
243
+ target_link_libraries(_gpufl_client PRIVATE gpufl::gpufl)
244
+
245
+ # If CUDA is available, link it to the Python module
246
+ if(GPUFL_HAS_CUDA)
247
+ target_link_libraries(_gpufl_client PRIVATE CUDA::cudart)
248
+ endif()
249
+
250
+ install(TARGETS _gpufl_client DESTINATION gpufl)
251
+ endif()
252
+
253
+
254
+
255
+ # -----------------------
256
+ # Install
257
+ # -----------------------
258
+ include(GNUInstallDirs)
259
+
260
+ # Install header files
261
+ install(DIRECTORY include/
262
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
263
+ )
264
+
265
+ install(TARGETS gpufl
266
+ EXPORT gpufl_clientTargets
267
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
268
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
269
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
270
+ INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
271
+ )
272
+
273
+ install(EXPORT gpufl_clientTargets
274
+ FILE gpufl_clientTargets.cmake
275
+ NAMESPACE gpufl::
276
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/gpufl_client
277
+ )
@@ -0,0 +1,12 @@
1
+ # Contributing to GPU Flight
2
+
3
+ First off, thank you for your interest! This project is a learning-focused initiative to build high-performance GPU monitoring tools.
4
+
5
+ ### Our Current Policy
6
+ To maintain a high pace of development and a consistent architectural style (following the **Google C++ Style Guide**), the following rules apply:
7
+
8
+ 1. **Open an Issue First:** Do not submit a Pull Request without an associated Issue. If you have an idea, let's discuss it in an Issue first to ensure it fits the current roadmap.
9
+ 2. **Focus Areas:** We are currently prioritizing stability and NVIDIA/CUDA support. Small bug fixes are welcome!
10
+ 3. **No Large Refactors:** Please do not submit large code refactors or style changes. We use `.clang-format` to handle styling automatically.
11
+
12
+ If you have a major feature in mind, we encourage you to **fork the repo** and experiment! We can always discuss merging your findings back into the main branch later.