gpufl 0.1.0.dev0__tar.gz → 0.1.0.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. gpufl-0.1.0.dev7/.github/workflows/release.yml +193 -0
  2. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/CMakeLists.txt +77 -3
  3. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/PKG-INFO +1 -1
  4. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/block_style_example.cu +3 -1
  5. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/analyzer/01_analyzer_sample.py +5 -1
  6. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.cpp +316 -0
  7. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_backend.hpp +116 -0
  8. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/cupti_common.hpp +11 -0
  9. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/cupti_utils.cpp +152 -0
  10. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/cupti_utils.hpp +28 -0
  11. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp +395 -0
  12. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/pc_sampling_engine.hpp +66 -0
  13. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/profiling_engine.hpp +73 -0
  14. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp +479 -0
  15. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/range_profiler_engine.hpp +53 -0
  16. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp +221 -0
  17. gpufl-0.1.0.dev7/include/gpufl/backends/nvidia/engine/sass_metrics_engine.hpp +44 -0
  18. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/kernel_launch_handler.cpp +46 -1
  19. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/resource_handler.cpp +2 -3
  20. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/debug_logger.hpp +1 -1
  21. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/events.hpp +29 -8
  22. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/gpufl.cpp +54 -21
  23. gpufl-0.1.0.dev7/include/gpufl/core/logger/file_compressor.cpp +44 -0
  24. gpufl-0.1.0.dev7/include/gpufl/core/logger/file_compressor.hpp +18 -0
  25. gpufl-0.1.0.dev7/include/gpufl/core/logger/log_rotator.cpp +65 -0
  26. gpufl-0.1.0.dev7/include/gpufl/core/logger/log_rotator.hpp +32 -0
  27. gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.cpp +152 -0
  28. gpufl-0.1.0.dev7/include/gpufl/core/logger/logger.hpp +70 -0
  29. gpufl-0.1.0.dev7/include/gpufl/core/model/kernel_event_model.cpp +51 -0
  30. gpufl-0.1.0.dev7/include/gpufl/core/model/kernel_event_model.hpp +16 -0
  31. gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.cpp +34 -0
  32. gpufl-0.1.0.dev7/include/gpufl/core/model/lifecycle_model.hpp +24 -0
  33. gpufl-0.1.0.dev7/include/gpufl/core/model/memcpy_event_model.cpp +58 -0
  34. gpufl-0.1.0.dev7/include/gpufl/core/model/memcpy_event_model.hpp +24 -0
  35. gpufl-0.1.0.dev7/include/gpufl/core/model/model_utils.hpp +94 -0
  36. gpufl-0.1.0.dev7/include/gpufl/core/model/perf_metric_model.cpp +33 -0
  37. gpufl-0.1.0.dev7/include/gpufl/core/model/perf_metric_model.hpp +16 -0
  38. gpufl-0.1.0.dev7/include/gpufl/core/model/profile_sample_model.cpp +40 -0
  39. gpufl-0.1.0.dev7/include/gpufl/core/model/profile_sample_model.hpp +16 -0
  40. gpufl-0.1.0.dev7/include/gpufl/core/model/scope_event_model.cpp +43 -0
  41. gpufl-0.1.0.dev7/include/gpufl/core/model/scope_event_model.hpp +24 -0
  42. gpufl-0.1.0.dev7/include/gpufl/core/model/serializable.hpp +15 -0
  43. gpufl-0.1.0.dev7/include/gpufl/core/model/system_event_model.cpp +51 -0
  44. gpufl-0.1.0.dev7/include/gpufl/core/model/system_event_model.hpp +32 -0
  45. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/monitor.hpp +25 -30
  46. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/monitor_backend.hpp +7 -0
  47. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/sampler.cpp +3 -2
  48. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/cuda/monitor.cpp +33 -8
  49. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/gpufl.hpp +10 -7
  50. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/pyproject.toml +1 -1
  51. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/bindings.cpp +39 -13
  52. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/__init__.py +10 -4
  53. gpufl-0.1.0.dev7/python/gpufl/analyzer/analyzer.py +721 -0
  54. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/backends/nvidia/test_nvidia_backend.cpp +1 -1
  55. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/core/test_analyzer.cpp +1 -1
  56. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/verify_pipeline.py +10 -2
  57. gpufl-0.1.0.dev0/.github/workflows/release.yml +0 -71
  58. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.cpp +0 -806
  59. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_backend.hpp +0 -164
  60. gpufl-0.1.0.dev0/include/gpufl/backends/nvidia/cupti_utils.cpp +0 -73
  61. gpufl-0.1.0.dev0/include/gpufl/core/logger.cpp +0 -437
  62. gpufl-0.1.0.dev0/include/gpufl/core/logger.hpp +0 -88
  63. gpufl-0.1.0.dev0/python/gpufl/analyzer/analyzer.py +0 -359
  64. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/.clang-format +0 -0
  65. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/.github/pull_request_template.md +0 -0
  66. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/.github/workflows/build.yml +0 -0
  67. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/.gitignore +0 -0
  68. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/CONTRIBUTING.md +0 -0
  69. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/LICENSE +0 -0
  70. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/README.md +0 -0
  71. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/build.sh +0 -0
  72. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/CMakeLists.txt +0 -0
  73. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/check_conflict.cu +0 -0
  74. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/check_device.cu +0 -0
  75. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/cupti_basic.cu +0 -0
  76. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/cupti_pc_sampling.cu +0 -0
  77. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/list_sass_metrics.cu +0 -0
  78. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/occupancy_demo.cu +0 -0
  79. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/system_monitor.cu +0 -0
  80. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/test_occupancy.cu +0 -0
  81. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/test_sass_cubin.cu +0 -0
  82. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/test_sass_metrics.cu +0 -0
  83. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/cuda/vector_add_benchmark.cu +0 -0
  84. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/01_basic.py +0 -0
  85. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/02_numba_cuda.py +0 -0
  86. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/03_pytorch_benchmark.py +0 -0
  87. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/requirements.txt +0 -0
  88. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/viz/01_plot_memory_timeline.py +0 -0
  89. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/example/python/viz/02_plot_stress_timeline.py +0 -0
  90. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/images/Screenshot1.png +0 -0
  91. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/amd/rocm_collector.cpp +0 -0
  92. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/amd/rocm_collector.hpp +0 -0
  93. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/host_collector.hpp +0 -0
  94. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/cuda_collector.cpp +0 -0
  95. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/cuda_collector.hpp +0 -0
  96. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/kernel_launch_handler.hpp +0 -0
  97. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/mem_transfer_handler.cpp +0 -0
  98. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/mem_transfer_handler.hpp +0 -0
  99. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/nvml_collector.cpp +0 -0
  100. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/nvml_collector.hpp +0 -0
  101. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/resource_handler.hpp +0 -0
  102. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +0 -0
  103. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +0 -0
  104. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/common.cpp +0 -0
  105. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/common.hpp +0 -0
  106. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/debug_logger.cpp +0 -0
  107. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/ring_buffer.hpp +0 -0
  108. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/runtime.cpp +0 -0
  109. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/runtime.hpp +0 -0
  110. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/sampler.hpp +0 -0
  111. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/scope_registry.cpp +0 -0
  112. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/scope_registry.hpp +0 -0
  113. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/stack_registry.hpp +0 -0
  114. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/stack_trace.cpp +0 -0
  115. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/stack_trace.hpp +0 -0
  116. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl/core/trace_type.hpp +0 -0
  117. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/include/gpufl.hpp +0 -0
  118. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/.gitignore +0 -0
  119. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/analyzer/__init__.py +0 -0
  120. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/utils.py +0 -0
  121. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/viz/__init__.py +0 -0
  122. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/viz/reader.py +0 -0
  123. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/viz/timeline.py +0 -0
  124. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/python/gpufl/viz/visualizer.py +0 -0
  125. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/schema/ndjson.schema.json +0 -0
  126. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/CMakeLists.txt +0 -0
  127. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/backends/nvidia/test_cuda_collector.cpp +0 -0
  128. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/backends/nvidia/test_nvml_collector.cpp +0 -0
  129. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/common/test_utils.hpp +0 -0
  130. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/core/test_monitor.cpp +0 -0
  131. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/main_test_runner.cpp +0 -0
  132. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/python/conftest.py +0 -0
  133. {gpufl-0.1.0.dev0 → gpufl-0.1.0.dev7}/tests/python/test_analyzer.py +0 -0
@@ -0,0 +1,193 @@
1
+ name: Build and Release Wheels
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-22.04, windows-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set package version from tag
21
+ if: startsWith(github.ref, 'refs/tags/v')
22
+ shell: python
23
+ run: |
24
+ import os
25
+ import re
26
+ from pathlib import Path
27
+
28
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
29
+ if not ref_name.startswith("v"):
30
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
31
+ version = ref_name[1:]
32
+ print(f"Using version from tag: {version}")
33
+
34
+ pyproject = Path("pyproject.toml")
35
+ text = pyproject.read_text(encoding="utf-8")
36
+ text_new, n = re.subn(
37
+ r'(?m)^version\s*=\s*"[^\"]+"$',
38
+ f'version = "{version}"',
39
+ text,
40
+ count=1,
41
+ )
42
+ if n != 1:
43
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
44
+ pyproject.write_text(text_new, encoding="utf-8")
45
+
46
+ init_py = Path("python/gpufl/__init__.py")
47
+ if init_py.exists():
48
+ init_text = init_py.read_text(encoding="utf-8")
49
+ init_new, _ = re.subn(
50
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
51
+ f'__version__ = "{version}"',
52
+ init_text,
53
+ )
54
+ init_py.write_text(init_new, encoding="utf-8")
55
+
56
+ - name: Cache cibuildwheel downloads
57
+ uses: actions/cache@v4
58
+ with:
59
+ path: |
60
+ ~/.cache/cibuildwheel
61
+ ~/AppData/Local/pypa/cibuildwheel/Cache
62
+ key: cibw-${{ runner.os }}-${{ hashFiles('.github/workflows/release.yml') }}
63
+ restore-keys: |
64
+ cibw-${{ runner.os }}-
65
+
66
+ - name: Install CUDA (Windows)
67
+ if: runner.os == 'Windows'
68
+ uses: Jimver/cuda-toolkit@v0.2.30
69
+ with:
70
+ cuda: '13.1.0'
71
+ method: 'network'
72
+
73
+ - name: Prefetch virtualenv.pyz (Windows)
74
+ if: runner.os == 'Windows'
75
+ shell: pwsh
76
+ run: |
77
+ $version = "20.27.1"
78
+ $cacheDir = Join-Path $env:LOCALAPPDATA "pypa\cibuildwheel\Cache"
79
+ New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
80
+ $dest = Join-Path $cacheDir "virtualenv-$version.pyz"
81
+ if (Test-Path $dest) {
82
+ Write-Host "virtualenv.pyz already cached: $dest"
83
+ exit 0
84
+ }
85
+ $urls = @(
86
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/$version/public/virtualenv.pyz",
87
+ "https://raw.githubusercontent.com/pypa/get-virtualenv/refs/tags/$version/public/virtualenv.pyz",
88
+ "https://bootstrap.pypa.io/virtualenv.pyz"
89
+ )
90
+ $max = 6
91
+ $ok = $false
92
+ foreach ($url in $urls) {
93
+ for ($i = 1; $i -le $max; $i++) {
94
+ try {
95
+ Write-Host "Downloading virtualenv.pyz from $url (attempt $i/$max)..."
96
+ Invoke-WebRequest -Uri $url -OutFile $dest -TimeoutSec 120 -Headers @{ "User-Agent" = "cibuildwheel-prefetch" }
97
+ if ((Get-Item $dest).Length -gt 0) {
98
+ Write-Host "Downloaded: $dest"
99
+ $ok = $true
100
+ break
101
+ }
102
+ } catch {
103
+ if (Test-Path $dest) { Remove-Item $dest -Force -ErrorAction SilentlyContinue }
104
+ if ($i -eq $max) { break }
105
+ Start-Sleep -Seconds (5 * $i)
106
+ }
107
+ }
108
+ if ($ok) { break }
109
+ }
110
+ if (-not $ok) { throw "Failed to prefetch virtualenv.pyz from all sources." }
111
+
112
+ - name: Build wheels
113
+ uses: pypa/cibuildwheel@v2.22.0
114
+ env:
115
+ CIBW_VIRTUALENV_VERSION: "20.27.1"
116
+ CIBW_ENVIRONMENT_LINUX: "CUDA_HOME=/usr/local/cuda PATH=/usr/local/cuda/bin:$PATH CMAKE_ARGS='-DGPUFL_ENABLE_NVIDIA=ON -DGPUFL_ENABLE_AMD=OFF -DBUILD_TESTING=OFF'"
117
+ CIBW_BEFORE_ALL_LINUX: >-
118
+ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo > /etc/yum.repos.d/cuda.repo &&
119
+ dnf install -y --nogpgcheck cuda-nvcc-13-1 cuda-cudart-devel-13-1 cuda-cupti-13-1 cuda-driver-devel-13-1
120
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
121
+ CIBW_BUILD: "cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp312-win_amd64 cp313-win_amd64"
122
+ CIBW_REPAIR_WHEEL_COMMAND_LINUX: "auditwheel repair --plat manylinux_2_28_x86_64 --exclude libcuda.so.1 -w {dest_dir} {wheel}"
123
+
124
+ - uses: actions/upload-artifact@v4
125
+ with:
126
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
127
+ path: ./wheelhouse/*.whl
128
+
129
+ build_sdist:
130
+ name: Build source distribution
131
+ runs-on: ubuntu-latest
132
+ steps:
133
+ - uses: actions/checkout@v4
134
+
135
+ - name: Set package version from tag
136
+ if: startsWith(github.ref, 'refs/tags/v')
137
+ shell: python
138
+ run: |
139
+ import os
140
+ import re
141
+ from pathlib import Path
142
+
143
+ ref_name = os.environ.get("GITHUB_REF_NAME", "")
144
+ if not ref_name.startswith("v"):
145
+ raise SystemExit(f"Expected tag starting with 'v', got: {ref_name}")
146
+ version = ref_name[1:]
147
+ print(f"Using version from tag: {version}")
148
+
149
+ pyproject = Path("pyproject.toml")
150
+ text = pyproject.read_text(encoding="utf-8")
151
+ text_new, n = re.subn(
152
+ r'(?m)^version\s*=\s*"[^\"]+"$',
153
+ f'version = "{version}"',
154
+ text,
155
+ count=1,
156
+ )
157
+ if n != 1:
158
+ raise SystemExit("Failed to update [project].version in pyproject.toml")
159
+ pyproject.write_text(text_new, encoding="utf-8")
160
+
161
+ init_py = Path("python/gpufl/__init__.py")
162
+ if init_py.exists():
163
+ init_text = init_py.read_text(encoding="utf-8")
164
+ init_new, _ = re.subn(
165
+ r'(?m)^__version__\s*=\s*"[^\"]+"$',
166
+ f'__version__ = "{version}"',
167
+ init_text,
168
+ )
169
+ init_py.write_text(init_new, encoding="utf-8")
170
+
171
+ - name: Build sdist
172
+ run: pipx run build --sdist
173
+
174
+ - uses: actions/upload-artifact@v4
175
+ with:
176
+ name: cibw-sdist
177
+ path: dist/*.tar.gz
178
+
179
+ upload_pypi:
180
+ needs: [build_wheels, build_sdist]
181
+ runs-on: ubuntu-latest
182
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
183
+ steps:
184
+ - uses: actions/download-artifact@v4
185
+ with:
186
+ pattern: cibw-*
187
+ path: dist
188
+ merge-multiple: true
189
+
190
+ - name: Publish to PyPI
191
+ uses: pypa/gh-action-pypi-publish@release/v1
192
+ with:
193
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -47,7 +47,15 @@ target_compile_features(gpufl INTERFACE cxx_std_17)
47
47
  set_target_properties(gpufl PROPERTIES POSITION_INDEPENDENT_CODE ON)
48
48
 
49
49
  target_sources(gpufl PRIVATE
50
- include/gpufl/core/logger.cpp
50
+ include/gpufl/core/logger/logger.cpp
51
+ include/gpufl/core/logger/log_rotator.cpp
52
+ include/gpufl/core/model/lifecycle_model.cpp
53
+ include/gpufl/core/model/kernel_event_model.cpp
54
+ include/gpufl/core/model/memcpy_event_model.cpp
55
+ include/gpufl/core/model/scope_event_model.cpp
56
+ include/gpufl/core/model/profile_sample_model.cpp
57
+ include/gpufl/core/model/perf_metric_model.cpp
58
+ include/gpufl/core/model/system_event_model.cpp
51
59
  include/gpufl/core/sampler.cpp
52
60
  include/gpufl/core/runtime.cpp
53
61
  include/gpufl/core/gpufl.cpp
@@ -61,6 +69,43 @@ set(GPUFL_HAS_CUDA 0)
61
69
  set(GPUFL_HAS_NVML 0)
62
70
  set(GPUFL_HAS_ROCM 0)
63
71
  set(GPUFL_HAS_CUPTI 0)
72
+ set(GPUFL_HAS_PERFWORKS 0)
73
+ # ZLIB — try system install first, fall back to FetchContent so every platform
74
+ # (including Windows CI) always gets compression support and .gz output.
75
+ find_package(ZLIB QUIET)
76
+ if(ZLIB_FOUND)
77
+ message(STATUS "Found system ZLIB: ${ZLIB_LIBRARIES}")
78
+ target_link_libraries(gpufl PRIVATE ZLIB::ZLIB)
79
+ else()
80
+ message(STATUS "ZLIB not found on system — fetching via FetchContent")
81
+ include(FetchContent)
82
+ # Suppress zlib's own example / test targets to keep the build clean
83
+ set(ZLIB_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
84
+ FetchContent_Declare(
85
+ zlib
86
+ GIT_REPOSITORY https://github.com/madler/zlib.git
87
+ GIT_TAG v1.3.1
88
+ )
89
+ FetchContent_MakeAvailable(zlib)
90
+ # zlib.h lives in the source dir; zconf.h is generated in the binary dir.
91
+ # Add both privately to gpufl — consumers never include zlib headers directly.
92
+ target_link_libraries(gpufl PRIVATE zlibstatic)
93
+ target_include_directories(gpufl PRIVATE
94
+ ${zlib_SOURCE_DIR}
95
+ ${zlib_BINARY_DIR}
96
+ )
97
+ # zlib's own CMakeLists sets INTERFACE_INCLUDE_DIRECTORIES on zlibstatic to
98
+ # build-directory paths, which CMake forbids in install exports. Clear them:
99
+ # gpufl already propagates the paths privately, so consumers don't need them.
100
+ set_target_properties(zlibstatic PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
101
+ # zlibstatic must be in the same export set as gpufl — static lib consumers
102
+ # need to link it transitively, so CMake requires it to be exported too.
103
+ install(TARGETS zlibstatic
104
+ EXPORT gpufl_clientTargets
105
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
106
+ )
107
+ endif()
108
+ target_sources(gpufl PRIVATE include/gpufl/core/logger/file_compressor.cpp)
64
109
 
65
110
  # -----------------------
66
111
  # Backends
@@ -95,7 +140,10 @@ if(GPUFL_ENABLE_NVIDIA)
95
140
  include/gpufl/backends/nvidia/kernel_launch_handler.cpp
96
141
  include/gpufl/backends/nvidia/mem_transfer_handler.cpp
97
142
  include/gpufl/cuda/monitor.cpp
98
- include/gpufl/backends/nvidia/cupti_backend.cpp)
143
+ include/gpufl/backends/nvidia/cupti_backend.cpp
144
+ include/gpufl/backends/nvidia/engine/pc_sampling_engine.cpp
145
+ include/gpufl/backends/nvidia/engine/sass_metrics_engine.cpp
146
+ include/gpufl/backends/nvidia/engine/range_profiler_engine.cpp)
99
147
  target_link_libraries(gpufl PRIVATE CUDA::cudart CUDA::cuda_driver)
100
148
 
101
149
  # --------------------------------------------------------
@@ -124,6 +172,31 @@ if(GPUFL_ENABLE_NVIDIA)
124
172
  message(STATUS "Found CUPTI manually: ${CUPTI_LIBRARY}")
125
173
  endif()
126
174
  endif()
175
+
176
+ # --------------------------------------------------------
177
+ # NVPERF Support (for GFL_PERF_SCOPE hardware counters)
178
+ # --------------------------------------------------------
179
+ find_library(NVPERF_HOST_LIBRARY NAMES nvperf_host nvperf_host_static
180
+ HINTS "${CUDAToolkit_ROOT}/extras/CUPTI/lib64"
181
+ "${CUDAToolkit_ROOT}/targets/x86_64-linux/lib"
182
+ "$ENV{CUDA_PATH}/extras/CUPTI/lib64"
183
+ "$ENV{CUDA_PATH}/targets/x86_64-linux/lib"
184
+ )
185
+ find_library(NVPERF_TARGET_LIBRARY NAMES nvperf_target
186
+ HINTS "${CUDAToolkit_ROOT}/extras/CUPTI/lib64"
187
+ "${CUDAToolkit_ROOT}/targets/x86_64-linux/lib"
188
+ "$ENV{CUDA_PATH}/extras/CUPTI/lib64"
189
+ "$ENV{CUDA_PATH}/targets/x86_64-linux/lib"
190
+ )
191
+ if(NVPERF_HOST_LIBRARY AND NVPERF_TARGET_LIBRARY)
192
+ set(GPUFL_HAS_PERFWORKS 1)
193
+ target_link_libraries(gpufl PRIVATE
194
+ ${NVPERF_HOST_LIBRARY} ${NVPERF_TARGET_LIBRARY})
195
+ message(STATUS "Found NVPERF: ${NVPERF_HOST_LIBRARY}")
196
+ else()
197
+ set(GPUFL_HAS_PERFWORKS 0)
198
+ message(WARNING "NVPERF not found — GFL_PERF_SCOPE disabled at runtime")
199
+ endif()
127
200
  endif()
128
201
  endif()
129
202
  #
@@ -178,10 +251,11 @@ if(GPUFL_ENABLE_NVIDIA)
178
251
  endif()
179
252
 
180
253
  # Apply definitions to public interface so tests inherit them
181
- target_compile_definitions(gpufl PUBLIC
254
+ target_compile_definitions(gpufl PUBLIC
182
255
  GPUFL_HAS_CUDA=${GPUFL_HAS_CUDA}
183
256
  GPUFL_HAS_NVML=${GPUFL_HAS_NVML}
184
257
  GPUFL_HAS_CUPTI=${GPUFL_HAS_CUPTI}
258
+ GPUFL_HAS_PERFWORKS=${GPUFL_HAS_PERFWORKS}
185
259
  )
186
260
 
187
261
  #
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gpufl
3
- Version: 0.1.0.dev0
3
+ Version: 0.1.0.dev7
4
4
  Summary: GPU Monitoring Client
5
5
  Author-Email: Myoungho Shin <myounghoshin84@gmail.com>
6
6
  Classifier: Development Status :: 3 - Alpha
@@ -36,12 +36,14 @@ int main() {
36
36
  // Initialize GFL
37
37
  gpufl::InitOptions opts;
38
38
  opts.app_name = "block_style_demo";
39
- opts.log_path = "gfl_block.log";
39
+ opts.log_path = "gfl_block";
40
40
  opts.system_sample_rate_ms = 50;
41
41
  opts.kernel_sample_rate_ms = 50;
42
42
  opts.enable_kernel_details = true;
43
43
  opts.sampling_auto_start = true;
44
44
  opts.enable_debug_output = true;
45
+ opts.profiling_engine = gpufl::ProfilingEngine::SassMetrics;
46
+
45
47
  if (!gpufl::init(opts)) {
46
48
  std::cerr << "Failed to initialize gpufl" << std::endl;
47
49
  return 1;
@@ -7,4 +7,8 @@ analyzer.print_summary()
7
7
 
8
8
  analyzer.inspect_scopes()
9
9
 
10
- analyzer.inspect_hotspots()
10
+ analyzer.inspect_hotspots()
11
+
12
+ analyzer.inspect_profile_samples()
13
+
14
+ analyzer.inspect_perf_metrics()
@@ -0,0 +1,316 @@
1
+ #include "gpufl/backends/nvidia/cupti_backend.hpp"
2
+
3
+ #include <cupti_pcsampling.h>
4
+ #include <cupti_profiler_target.h>
5
+ #include <cupti_sass_metrics.h>
6
+ #include <cupti_target.h>
7
+
8
+ #if GPUFL_HAS_PERFWORKS
9
+ #include <cupti_range_profiler.h>
10
+ #endif
11
+
12
+ #include <cstring>
13
+ #include <exception>
14
+ #include <set>
15
+
16
+ #include "gpufl/backends/nvidia/cupti_utils.hpp"
17
+ #include "gpufl/backends/nvidia/engine/pc_sampling_engine.hpp"
18
+ #include "gpufl/backends/nvidia/engine/range_profiler_engine.hpp"
19
+ #include "gpufl/backends/nvidia/engine/sass_metrics_engine.hpp"
20
+ #include "gpufl/backends/nvidia/kernel_launch_handler.hpp"
21
+ #include "gpufl/backends/nvidia/mem_transfer_handler.hpp"
22
+ #include "gpufl/backends/nvidia/resource_handler.hpp"
23
+ #include "gpufl/core/common.hpp"
24
+ #include "gpufl/core/debug_logger.hpp"
25
+ #include "gpufl/core/ring_buffer.hpp"
26
+ #include "gpufl/core/trace_type.hpp"
27
+
28
+ #include "gpufl/backends/nvidia/cuda_collector.hpp"
29
+ #include "gpufl/core/scope_registry.hpp"
30
+ #include "gpufl/core/stack_registry.hpp"
31
+ #include "gpufl/core/stack_trace.hpp"
32
+
33
+ namespace gpufl {
34
+ std::atomic<gpufl::CuptiBackend*> g_activeBackend{nullptr};
35
+
36
+ extern RingBuffer<ActivityRecord, 1024> g_monitorBuffer;
37
+
38
+ void CuptiBackend::initialize(const MonitorOptions& opts) {
39
+ opts_ = opts;
40
+
41
+ DebugLogger::setEnabled(opts_.enable_debug_output);
42
+
43
+ // Create the engine (no CUDA context needed yet)
44
+ switch (opts_.profiling_engine) {
45
+ case ProfilingEngine::PcSampling:
46
+ engine_ = std::make_unique<PcSamplingEngine>();
47
+ GFL_LOG_DEBUG("[CuptiBackend] Engine: PcSampling");
48
+ break;
49
+ case ProfilingEngine::SassMetrics:
50
+ engine_ = std::make_unique<SassMetricsEngine>();
51
+ GFL_LOG_DEBUG("[CuptiBackend] Engine: SassMetrics");
52
+ break;
53
+ case ProfilingEngine::RangeProfiler:
54
+ #if GPUFL_HAS_PERFWORKS
55
+ engine_ = std::make_unique<RangeProfilerEngine>();
56
+ GFL_LOG_DEBUG("[CuptiBackend] Engine: RangeProfiler");
57
+ #else
58
+ GFL_LOG_ERROR("[CuptiBackend] RangeProfiler engine requires "
59
+ "GPUFL_HAS_PERFWORKS; falling back to None");
60
+ #endif
61
+ break;
62
+ case ProfilingEngine::None:
63
+ default:
64
+ GFL_LOG_DEBUG("[CuptiBackend] Engine: None (monitoring only)");
65
+ break;
66
+ }
67
+
68
+ g_activeBackend.store(this, std::memory_order_release);
69
+
70
+ // Internal handler registration
71
+ RegisterHandler(std::make_shared<ResourceHandler>(this));
72
+ RegisterHandler(std::make_shared<KernelLaunchHandler>(this));
73
+ RegisterHandler(std::make_shared<MemTransferHandler>(this));
74
+
75
+ GFL_LOG_DEBUG("Subscribing to CUPTI...");
76
+ CUPTI_CHECK_RETURN(
77
+ cuptiSubscribe(&subscriber_,
78
+ reinterpret_cast<CUpti_CallbackFunc>(GflCallback), this),
79
+ "[GPUFL Monitor] ERROR: Failed to subscribe to CUPTI\n"
80
+ "[GPUFL Monitor] This may indicate:\n"
81
+ " - CUPTI library not found or incompatible\n"
82
+ " - Insufficient permissions\n"
83
+ " - CUDA driver issues");
84
+ GFL_LOG_DEBUG("CUPTI subscription successful");
85
+
86
+ std::set<CUpti_CallbackDomain> domains;
87
+ std::set<std::pair<CUpti_CallbackDomain, CUpti_CallbackId>> callbacks;
88
+ {
89
+ std::lock_guard<std::mutex> lk(handler_mu_);
90
+ for (const auto& h : handlers_) {
91
+ for (auto d : h->requiredDomains()) domains.insert(d);
92
+ for (auto cb : h->requiredCallbacks()) callbacks.insert(cb);
93
+ }
94
+ }
95
+ for (auto d : domains) CUPTI_CHECK(cuptiEnableDomain(1, subscriber_, d));
96
+ for (auto& [domain, cbid] : callbacks)
97
+ CUPTI_CHECK(cuptiEnableCallback(1, subscriber_, domain, cbid));
98
+
99
+ CUptiResult resCb =
100
+ cuptiActivityRegisterCallbacks(BufferRequested, BufferCompleted);
101
+ if (resCb != CUPTI_SUCCESS) {
102
+ GFL_LOG_ERROR("FATAL: Failed to register activity callbacks.");
103
+ LogCuptiErrorIfFailed("CUPTI", "cuptiActivityRegisterCallbacks", resCb);
104
+ initialized_ = false;
105
+ return;
106
+ }
107
+
108
+ initialized_ = true;
109
+ GFL_LOG_DEBUG("Callbacks registered successfully.");
110
+ }
111
+
112
+ void CuptiBackend::shutdown() {
113
+ if (!initialized_) return;
114
+
115
+ // Delegate engine teardown first
116
+ if (engine_) {
117
+ engine_->stop();
118
+ engine_->shutdown();
119
+ engine_.reset();
120
+ }
121
+
122
+ LogCuptiErrorIfFailed("Perfworks", "cuptiActivityFlushAll",
123
+ cuptiActivityFlushAll(1));
124
+
125
+ {
126
+ std::lock_guard<std::mutex> lk(handler_mu_);
127
+ std::set<CUpti_CallbackDomain> domains;
128
+ for (const auto& h : handlers_)
129
+ for (auto d : h->requiredDomains()) domains.insert(d);
130
+ for (auto d : domains) cuptiEnableDomain(0, subscriber_, d);
131
+ }
132
+
133
+ cuptiUnsubscribe(subscriber_);
134
+ g_activeBackend.store(nullptr, std::memory_order_release);
135
+ initialized_ = false;
136
+ }
137
+
138
+ CUptiResult (*CuptiBackend::get_value())(CUpti_ActivityKind) {
139
+ return cuptiActivityEnable;
140
+ }
141
+
142
+ void CuptiBackend::start() {
143
+ if (!initialized_) return;
144
+ kernel_activity_seen_.store(0, std::memory_order_relaxed);
145
+ kernel_activity_emitted_.store(0, std::memory_order_relaxed);
146
+ kernel_activity_throttled_.store(0, std::memory_order_relaxed);
147
+
148
+ CUPTI_CHECK(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR));
149
+
150
+ // Enable activity kinds required by registered handlers (always on)
151
+ {
152
+ std::set<CUpti_ActivityKind> kinds;
153
+ {
154
+ std::lock_guard<std::mutex> lk(handler_mu_);
155
+ for (const auto& h : handlers_)
156
+ for (auto k : h->requiredActivityKinds()) kinds.insert(k);
157
+ }
158
+ for (auto k : kinds) CUPTI_CHECK(cuptiActivityEnable(k));
159
+ }
160
+
161
+ // Initialize and start the engine (requires CUDA context)
162
+ if (engine_) {
163
+ if (EnsureCudaContext(&ctx_)) {
164
+ cuptiGetDeviceId(ctx_, &device_id_);
165
+ chip_name_ = getChipName(device_id_);
166
+ cached_device_name_ = GetCurrentDeviceName();
167
+
168
+ EngineContext ectx{ctx_, device_id_, chip_name_,
169
+ &cubin_mu_, &cubin_by_crc_};
170
+ engine_->initialize(opts_, ectx);
171
+ engine_->start();
172
+ } else {
173
+ GFL_LOG_ERROR("[CuptiBackend] Failed to get CUDA context; "
174
+ "engine will not start.");
175
+ }
176
+ }
177
+
178
+ active_.store(true);
179
+ GFL_LOG_DEBUG("Backend started.");
180
+ }
181
+
182
+ void CuptiBackend::stop() {
183
+ if (!initialized_) return;
184
+ active_.store(false);
185
+
186
+ LogCuptiErrorIfFailed("Perfworks", "cuptiActivityFlushAll",
187
+ cuptiActivityFlushAll(1));
188
+
189
+ {
190
+ std::set<CUpti_ActivityKind> kinds;
191
+ {
192
+ std::lock_guard<std::mutex> lk(handler_mu_);
193
+ for (const auto& h : handlers_)
194
+ for (auto k : h->requiredActivityKinds()) kinds.insert(k);
195
+ }
196
+ for (auto k : kinds) cuptiActivityDisable(k);
197
+ }
198
+
199
+ const uint64_t seen = kernel_activity_seen_.load(std::memory_order_relaxed);
200
+ const uint64_t emitted =
201
+ kernel_activity_emitted_.load(std::memory_order_relaxed);
202
+ const uint64_t throttled =
203
+ kernel_activity_throttled_.load(std::memory_order_relaxed);
204
+ GFL_LOG_DEBUG("[KernelLaunchHandler] activity summary seen=", seen,
205
+ " emitted=", emitted, " throttled=", throttled);
206
+ }
207
+
208
+ void CuptiBackend::RegisterHandler(
209
+ const std::shared_ptr<ICuptiHandler>& handler) {
210
+ if (!handler) return;
211
+ std::lock_guard<std::mutex> lk(handler_mu_);
212
+ handlers_.push_back(handler);
213
+ }
214
+
215
+ // ---- Static callbacks ------------------------------------------------------
216
+
217
+ void CUPTIAPI CuptiBackend::BufferRequested(uint8_t** buffer, size_t* size,
218
+ size_t* maxNumRecords) {
219
+ *size = 64 * 1024;
220
+ *buffer = static_cast<uint8_t*>(malloc(*size));
221
+ *maxNumRecords = 0;
222
+ }
223
+
224
+ void CUPTIAPI CuptiBackend::BufferCompleted(CUcontext context,
225
+ uint32_t streamId, uint8_t* buffer,
226
+ size_t size,
227
+ const size_t validSize) {
228
+ auto* backend = g_activeBackend.load(std::memory_order_acquire);
229
+ if (!backend) {
230
+ ::gpufl::DebugLogger::error("[CUPTI] ",
231
+ "BufferCompleted: No active backend!");
232
+ if (buffer) free(buffer);
233
+ return;
234
+ }
235
+
236
+ static int64_t baseCpuNs = detail::GetTimestampNs();
237
+ static uint64_t baseCuptiTs = 0;
238
+ if (baseCuptiTs == 0) cuptiGetTimestamp(&baseCuptiTs);
239
+
240
+ std::vector<std::shared_ptr<ICuptiHandler>> handlers;
241
+ {
242
+ std::lock_guard<std::mutex> lk(backend->handler_mu_);
243
+ handlers = backend->handlers_;
244
+ }
245
+
246
+ if (validSize > 0) {
247
+ CUpti_Activity* record = nullptr;
248
+ while (true) {
249
+ const CUptiResult st =
250
+ cuptiActivityGetNextRecord(buffer, validSize, &record);
251
+ if (st == CUPTI_SUCCESS) {
252
+ bool handled = false;
253
+ for (const auto& h : handlers) {
254
+ if (h->handleActivityRecord(record, baseCpuNs,
255
+ baseCuptiTs)) {
256
+ handled = true;
257
+ break;
258
+ }
259
+ }
260
+ if (!handled &&
261
+ record->kind == CUPTI_ACTIVITY_KIND_PC_SAMPLING) {
262
+ auto* pc =
263
+ reinterpret_cast<CUpti_ActivityPCSampling3*>(record);
264
+ ActivityRecord out{};
265
+ out.type = TraceType::PC_SAMPLE;
266
+ out.corr_id = pc->correlationId;
267
+ std::snprintf(out.sample_kind, sizeof(out.sample_kind),
268
+ "%s", "pc_sampling");
269
+ out.samples_count = pc->samples;
270
+ out.stall_reason = pc->stallReason;
271
+ out.device_id =
272
+ reinterpret_cast<const CUpti_ActivityKernel11*>(record)
273
+ ->deviceId;
274
+ g_monitorBuffer.Push(out);
275
+ }
276
+ } else if (st == CUPTI_ERROR_MAX_LIMIT_REACHED) {
277
+ break;
278
+ } else {
279
+ ::gpufl::DebugLogger::error("[CUPTI] ",
280
+ "Error parsing buffer: ", st);
281
+ break;
282
+ }
283
+ }
284
+ }
285
+
286
+ free(buffer);
287
+ }
288
+
289
+ void CuptiBackend::GflCallback(void* userdata, CUpti_CallbackDomain domain,
290
+ CUpti_CallbackId cbid, const void* cbdata) {
291
+ if (!cbdata) return;
292
+
293
+ auto* backend = static_cast<CuptiBackend*>(userdata);
294
+ if (!backend) return;
295
+
296
+ std::vector<std::shared_ptr<ICuptiHandler>> handlers;
297
+ {
298
+ std::lock_guard<std::mutex> lk(backend->handler_mu_);
299
+ handlers = backend->handlers_;
300
+ }
301
+
302
+ bool apiHandled = false;
303
+
304
+ for (const auto& handler : handlers) {
305
+ if (handler->shouldHandle(domain, cbid)) {
306
+ if (domain == CUPTI_CB_DOMAIN_RUNTIME_API ||
307
+ domain == CUPTI_CB_DOMAIN_DRIVER_API) {
308
+ if (apiHandled) continue;
309
+ apiHandled = true;
310
+ }
311
+ handler->handle(domain, cbid, cbdata);
312
+ }
313
+ }
314
+ }
315
+
316
+ } // namespace gpufl