gpu-memory-profiler 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. gpu_memory_profiler-0.2.0/.editorconfig +16 -0
  2. gpu_memory_profiler-0.2.0/.flake8 +4 -0
  3. gpu_memory_profiler-0.2.0/.github/workflows/ci.yml +263 -0
  4. gpu_memory_profiler-0.2.0/.github/workflows/release.yml +36 -0
  5. gpu_memory_profiler-0.2.0/.gitignore +110 -0
  6. gpu_memory_profiler-0.2.0/.pre-commit-config.yaml +20 -0
  7. gpu_memory_profiler-0.2.0/.readthedocs.yaml +14 -0
  8. gpu_memory_profiler-0.2.0/CHANGELOG.md +188 -0
  9. gpu_memory_profiler-0.2.0/CODE_OF_CONDUCT.md +143 -0
  10. gpu_memory_profiler-0.2.0/CONTRIBUTING.md +271 -0
  11. gpu_memory_profiler-0.2.0/LICENSE +21 -0
  12. gpu_memory_profiler-0.2.0/PKG-INFO +340 -0
  13. gpu_memory_profiler-0.2.0/PROJECT_STATUS.md +70 -0
  14. gpu_memory_profiler-0.2.0/README.md +232 -0
  15. gpu_memory_profiler-0.2.0/RELEASE_CHECKLIST.md +90 -0
  16. gpu_memory_profiler-0.2.0/SECURITY.md +113 -0
  17. gpu_memory_profiler-0.2.0/docs/api.md +55 -0
  18. gpu_memory_profiler-0.2.0/docs/architecture.md +431 -0
  19. gpu_memory_profiler-0.2.0/docs/article.md +1112 -0
  20. gpu_memory_profiler-0.2.0/docs/benchmark_harness.md +53 -0
  21. gpu_memory_profiler-0.2.0/docs/benchmarks/v0.2_budgets.json +10 -0
  22. gpu_memory_profiler-0.2.0/docs/cli.md +136 -0
  23. gpu_memory_profiler-0.2.0/docs/compatibility_matrix.md +61 -0
  24. gpu_memory_profiler-0.2.0/docs/conf.py +73 -0
  25. gpu_memory_profiler-0.2.0/docs/cpu_compatibility.md +433 -0
  26. gpu_memory_profiler-0.2.0/docs/examples/test_guides/README.md +120 -0
  27. gpu_memory_profiler-0.2.0/docs/examples.md +308 -0
  28. gpu_memory_profiler-0.2.0/docs/gpu-profiler-1.png +0 -0
  29. gpu_memory_profiler-0.2.0/docs/gpu-profiler-2.png +0 -0
  30. gpu_memory_profiler-0.2.0/docs/gpu-profiler-overview.gif +0 -0
  31. gpu_memory_profiler-0.2.0/docs/gpu-profiler-overview.mp4 +0 -0
  32. gpu_memory_profiler-0.2.0/docs/gpu_setup.md +99 -0
  33. gpu_memory_profiler-0.2.0/docs/index.md +42 -0
  34. gpu_memory_profiler-0.2.0/docs/installation.md +180 -0
  35. gpu_memory_profiler-0.2.0/docs/pytorch_testing_guide.md +1337 -0
  36. gpu_memory_profiler-0.2.0/docs/reference/api.rst +16 -0
  37. gpu_memory_profiler-0.2.0/docs/reference/index.md +15 -0
  38. gpu_memory_profiler-0.2.0/docs/requirements-rtd.txt +4 -0
  39. gpu_memory_profiler-0.2.0/docs/schemas/telemetry_event_v2.schema.json +98 -0
  40. gpu_memory_profiler-0.2.0/docs/telemetry_schema.md +92 -0
  41. gpu_memory_profiler-0.2.0/docs/tensorflow_testing_guide.md +896 -0
  42. gpu_memory_profiler-0.2.0/docs/testing.md +615 -0
  43. gpu_memory_profiler-0.2.0/docs/troubleshooting.md +475 -0
  44. gpu_memory_profiler-0.2.0/docs/tui.md +115 -0
  45. gpu_memory_profiler-0.2.0/docs/usage.md +216 -0
  46. gpu_memory_profiler-0.2.0/examples/advanced/__init__.py +1 -0
  47. gpu_memory_profiler-0.2.0/examples/advanced/tracking_demo.py +175 -0
  48. gpu_memory_profiler-0.2.0/examples/basic/__init__.py +1 -0
  49. gpu_memory_profiler-0.2.0/examples/basic/pytorch_demo.py +103 -0
  50. gpu_memory_profiler-0.2.0/examples/basic/tensorflow_demo.py +83 -0
  51. gpu_memory_profiler-0.2.0/examples/cli/__init__.py +1 -0
  52. gpu_memory_profiler-0.2.0/examples/cli/benchmark_harness.py +297 -0
  53. gpu_memory_profiler-0.2.0/examples/cli/capability_matrix.py +332 -0
  54. gpu_memory_profiler-0.2.0/examples/cli/quickstart.py +71 -0
  55. gpu_memory_profiler-0.2.0/examples/common/__init__.py +65 -0
  56. gpu_memory_profiler-0.2.0/examples/common/capability_matrix_utils.py +86 -0
  57. gpu_memory_profiler-0.2.0/examples/common/cli.py +48 -0
  58. gpu_memory_profiler-0.2.0/examples/common/device.py +105 -0
  59. gpu_memory_profiler-0.2.0/examples/common/formatting.py +19 -0
  60. gpu_memory_profiler-0.2.0/examples/common/summary.py +53 -0
  61. gpu_memory_profiler-0.2.0/examples/common/tf_workflow.py +62 -0
  62. gpu_memory_profiler-0.2.0/examples/common/torch_workflow.py +81 -0
  63. gpu_memory_profiler-0.2.0/examples/scenarios/__init__.py +2 -0
  64. gpu_memory_profiler-0.2.0/examples/scenarios/cpu_telemetry_scenario.py +135 -0
  65. gpu_memory_profiler-0.2.0/examples/scenarios/mps_telemetry_scenario.py +152 -0
  66. gpu_memory_profiler-0.2.0/examples/scenarios/oom_flight_recorder_scenario.py +220 -0
  67. gpu_memory_profiler-0.2.0/examples/scenarios/tf_end_to_end_scenario.py +186 -0
  68. gpu_memory_profiler-0.2.0/examples/test_guides/README.md +17 -0
  69. gpu_memory_profiler-0.2.0/gpu_memory_profiler.egg-info/PKG-INFO +340 -0
  70. gpu_memory_profiler-0.2.0/gpu_memory_profiler.egg-info/SOURCES.txt +136 -0
  71. gpu_memory_profiler-0.2.0/gpu_memory_profiler.egg-info/dependency_links.txt +1 -0
  72. gpu_memory_profiler-0.2.0/gpu_memory_profiler.egg-info/entry_points.txt +4 -0
  73. gpu_memory_profiler-0.2.0/gpu_memory_profiler.egg-info/requires.txt +63 -0
  74. gpu_memory_profiler-0.2.0/gpu_memory_profiler.egg-info/top_level.txt +2 -0
  75. gpu_memory_profiler-0.2.0/gpumemprof/__init__.py +76 -0
  76. gpu_memory_profiler-0.2.0/gpumemprof/_version.py +34 -0
  77. gpu_memory_profiler-0.2.0/gpumemprof/analyzer.py +771 -0
  78. gpu_memory_profiler-0.2.0/gpumemprof/cli.py +551 -0
  79. gpu_memory_profiler-0.2.0/gpumemprof/context_profiler.py +322 -0
  80. gpu_memory_profiler-0.2.0/gpumemprof/cpu_profiler.py +413 -0
  81. gpu_memory_profiler-0.2.0/gpumemprof/device_collectors.py +242 -0
  82. gpu_memory_profiler-0.2.0/gpumemprof/diagnose.py +297 -0
  83. gpu_memory_profiler-0.2.0/gpumemprof/gap_analysis.py +0 -0
  84. gpu_memory_profiler-0.2.0/gpumemprof/oom_flight_recorder.py +226 -0
  85. gpu_memory_profiler-0.2.0/gpumemprof/profiler.py +478 -0
  86. gpu_memory_profiler-0.2.0/gpumemprof/telemetry.py +557 -0
  87. gpu_memory_profiler-0.2.0/gpumemprof/tracker.py +790 -0
  88. gpu_memory_profiler-0.2.0/gpumemprof/tui/__init__.py +8 -0
  89. gpu_memory_profiler-0.2.0/gpumemprof/tui/app.py +1823 -0
  90. gpu_memory_profiler-0.2.0/gpumemprof/tui/commands.py +73 -0
  91. gpu_memory_profiler-0.2.0/gpumemprof/tui/monitor.py +246 -0
  92. gpu_memory_profiler-0.2.0/gpumemprof/tui/profiles.py +129 -0
  93. gpu_memory_profiler-0.2.0/gpumemprof/utils.py +455 -0
  94. gpu_memory_profiler-0.2.0/gpumemprof/visualizer.py +645 -0
  95. gpu_memory_profiler-0.2.0/pyproject.toml +240 -0
  96. gpu_memory_profiler-0.2.0/pytest.ini +8 -0
  97. gpu_memory_profiler-0.2.0/requirements-ci-base.txt +21 -0
  98. gpu_memory_profiler-0.2.0/requirements-dev.txt +39 -0
  99. gpu_memory_profiler-0.2.0/requirements-test.txt +27 -0
  100. gpu_memory_profiler-0.2.0/requirements.txt +23 -0
  101. gpu_memory_profiler-0.2.0/setup.cfg +4 -0
  102. gpu_memory_profiler-0.2.0/setup.py +6 -0
  103. gpu_memory_profiler-0.2.0/snapshot_report.html +6950 -0
  104. gpu_memory_profiler-0.2.0/tests/conftest.py +4 -0
  105. gpu_memory_profiler-0.2.0/tests/e2e/test_tui_pty.py +90 -0
  106. gpu_memory_profiler-0.2.0/tests/gap_test_helpers.py +38 -0
  107. gpu_memory_profiler-0.2.0/tests/test_benchmark_harness.py +86 -0
  108. gpu_memory_profiler-0.2.0/tests/test_cli_diagnose.py +390 -0
  109. gpu_memory_profiler-0.2.0/tests/test_cli_info.py +255 -0
  110. gpu_memory_profiler-0.2.0/tests/test_cli_oom_flight_recorder.py +121 -0
  111. gpu_memory_profiler-0.2.0/tests/test_core_profiler.py +410 -0
  112. gpu_memory_profiler-0.2.0/tests/test_cpu_profiler.py +686 -0
  113. gpu_memory_profiler-0.2.0/tests/test_device_collectors.py +67 -0
  114. gpu_memory_profiler-0.2.0/tests/test_docs_regressions.py +41 -0
  115. gpu_memory_profiler-0.2.0/tests/test_examples_scenarios.py +73 -0
  116. gpu_memory_profiler-0.2.0/tests/test_gap_analysis.py +178 -0
  117. gpu_memory_profiler-0.2.0/tests/test_oom_flight_recorder.py +239 -0
  118. gpu_memory_profiler-0.2.0/tests/test_profiler.py +65 -0
  119. gpu_memory_profiler-0.2.0/tests/test_profiler_regressions.py +144 -0
  120. gpu_memory_profiler-0.2.0/tests/test_telemetry_v2.py +310 -0
  121. gpu_memory_profiler-0.2.0/tests/test_tf_env.py +19 -0
  122. gpu_memory_profiler-0.2.0/tests/test_tf_gap_analysis.py +183 -0
  123. gpu_memory_profiler-0.2.0/tests/test_tf_telemetry_export.py +94 -0
  124. gpu_memory_profiler-0.2.0/tests/test_tfmemprof_diagnose.py +349 -0
  125. gpu_memory_profiler-0.2.0/tests/test_utils.py +162 -0
  126. gpu_memory_profiler-0.2.0/tests/tui/test_app_pilot.py +253 -0
  127. gpu_memory_profiler-0.2.0/tests/tui/test_app_snapshots.py +162 -0
  128. gpu_memory_profiler-0.2.0/tests/tui/test_monitor.py +97 -0
  129. gpu_memory_profiler-0.2.0/tfmemprof/__init__.py +28 -0
  130. gpu_memory_profiler-0.2.0/tfmemprof/analyzer.py +390 -0
  131. gpu_memory_profiler-0.2.0/tfmemprof/cli.py +532 -0
  132. gpu_memory_profiler-0.2.0/tfmemprof/context_profiler.py +345 -0
  133. gpu_memory_profiler-0.2.0/tfmemprof/diagnose.py +276 -0
  134. gpu_memory_profiler-0.2.0/tfmemprof/profiler.py +437 -0
  135. gpu_memory_profiler-0.2.0/tfmemprof/tf_env.py +8 -0
  136. gpu_memory_profiler-0.2.0/tfmemprof/tracker.py +479 -0
  137. gpu_memory_profiler-0.2.0/tfmemprof/utils.py +582 -0
  138. gpu_memory_profiler-0.2.0/tfmemprof/visualizer.py +0 -0
@@ -0,0 +1,16 @@
1
+ # EditorConfig helps maintain consistent coding styles
2
+ root = true
3
+
4
+ [*]
5
+ charset = utf-8
6
+ end_of_line = lf
7
+ insert_final_newline = true
8
+ trim_trailing_whitespace = true
9
+ indent_style = space
10
+ indent_size = 4
11
+
12
+ [*.md]
13
+ trim_trailing_whitespace = false
14
+
15
+ [*.py]
16
+ indent_size = 4
@@ -0,0 +1,4 @@
1
+ [flake8]
2
+ max-line-length = 88
3
+ extend-ignore = E203, W503
4
+ exclude = .git,__pycache__,docs,build,dist,venv,.venv,.eggs,*.egg,*.egg-info
@@ -0,0 +1,263 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, develop]
6
+ pull_request:
7
+ branches: [main, release/v0.2-readiness]
8
+ schedule:
9
+ - cron: "0 3 * * *"
10
+
11
+ jobs:
12
+ test:
13
+ if: github.event_name != 'schedule'
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ python-version: ["3.10", "3.11", "3.12"]
19
+ framework: [pytorch, tensorflow]
20
+ exclude:
21
+ - python-version: "3.12"
22
+ framework: tensorflow
23
+
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+
27
+ - name: Set up Python ${{ matrix.python-version }}
28
+ uses: actions/setup-python@v4
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+
32
+ - name: Cache pip dependencies
33
+ uses: actions/cache@v3
34
+ with:
35
+ path: ~/.cache/pip
36
+ key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
37
+ restore-keys: |
38
+ ${{ runner.os }}-pip-${{ matrix.python-version }}-
39
+
40
+ - name: Install base dependencies
41
+ run: |
42
+ python3 -m pip install --upgrade pip
43
+ pip install -e . --no-deps
44
+ pip install -r requirements-ci-base.txt
45
+
46
+ - name: Install framework dependencies
47
+ run: |
48
+ if [ "${{ matrix.framework }}" = "pytorch" ]; then
49
+ pip install torch==2.2.2 --index-url https://download.pytorch.org/whl/cpu
50
+ else
51
+ pip install tensorflow-cpu==2.15.0
52
+ # Shared tests import gpumemprof modules that require torch at import time.
53
+ pip install torch==2.2.2 --index-url https://download.pytorch.org/whl/cpu
54
+ fi
55
+
56
+ - name: Run tests
57
+ run: |
58
+ python3 -m pytest tests/ -v -m "not tui_pilot and not tui_snapshot and not tui_pty" --cov=gpumemprof --cov=tfmemprof --cov-report=xml
59
+
60
+ - name: Upload coverage to Codecov
61
+ uses: codecov/codecov-action@v3
62
+ with:
63
+ file: ./coverage.xml
64
+ flags: unittests
65
+ name: codecov-umbrella
66
+
67
+ tui-pr-gate:
68
+ if: github.event_name == 'pull_request' || (github.event_name == 'push' && github.ref == 'refs/heads/develop')
69
+ runs-on: ubuntu-latest
70
+
71
+ steps:
72
+ - uses: actions/checkout@v4
73
+
74
+ - name: Set up Python
75
+ uses: actions/setup-python@v4
76
+ with:
77
+ python-version: "3.11"
78
+
79
+ - name: Cache pip dependencies
80
+ uses: actions/cache@v3
81
+ with:
82
+ path: ~/.cache/pip
83
+ key: ${{ runner.os }}-pip-tui-pr-${{ hashFiles('pyproject.toml') }}
84
+ restore-keys: |
85
+ ${{ runner.os }}-pip-tui-pr-
86
+
87
+ - name: Install TUI test dependencies
88
+ run: |
89
+ python3 -m pip install --upgrade pip
90
+ pip install -e . --no-deps
91
+ pip install -r requirements-ci-base.txt
92
+ pip install torch==2.2.2 --index-url https://download.pytorch.org/whl/cpu
93
+
94
+ - name: Run TUI pilot and snapshot suites
95
+ run: |
96
+ python3 -m pytest tests/tui/ -m "tui_pilot or tui_snapshot" -v
97
+
98
+ tui-pty-smoke:
99
+ if: github.event_name == 'schedule' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
100
+ runs-on: ubuntu-latest
101
+
102
+ steps:
103
+ - uses: actions/checkout@v4
104
+
105
+ - name: Set up Python
106
+ uses: actions/setup-python@v4
107
+ with:
108
+ python-version: "3.11"
109
+
110
+ - name: Cache pip dependencies
111
+ uses: actions/cache@v3
112
+ with:
113
+ path: ~/.cache/pip
114
+ key: ${{ runner.os }}-pip-tui-pty-${{ hashFiles('pyproject.toml') }}
115
+ restore-keys: |
116
+ ${{ runner.os }}-pip-tui-pty-
117
+
118
+ - name: Install PTY test dependencies
119
+ run: |
120
+ python3 -m pip install --upgrade pip
121
+ pip install -e . --no-deps
122
+ pip install -r requirements-ci-base.txt
123
+ pip install torch==2.2.2 --index-url https://download.pytorch.org/whl/cpu
124
+
125
+ - name: Run TUI PTY smoke suite
126
+ run: |
127
+ python3 -m pytest tests/e2e/test_tui_pty.py -m tui_pty -v
128
+
129
+ lint:
130
+ if: github.event_name != 'schedule'
131
+ runs-on: ubuntu-latest
132
+ strategy:
133
+ matrix:
134
+ python-version: ["3.10"]
135
+
136
+ steps:
137
+ - uses: actions/checkout@v4
138
+
139
+ - name: Set up Python ${{ matrix.python-version }}
140
+ uses: actions/setup-python@v4
141
+ with:
142
+ python-version: ${{ matrix.python-version }}
143
+
144
+ - name: Install dependencies
145
+ run: |
146
+ python3 -m pip install --upgrade pip
147
+ # Lint does not require heavy runtime ML dependencies.
148
+ pip install -e . --no-deps
149
+ pip install black flake8 mypy rich
150
+
151
+ - name: Run black
152
+ run: |
153
+ # Temporarily disabled - will be addressed in separate PR
154
+ # python3 -m black --check gpumemprof/ tfmemprof/ tests/ examples/
155
+ echo "Black formatting check temporarily disabled"
156
+
157
+ - name: Run flake8
158
+ run: |
159
+ # Current repository has broad legacy lint debt; keep CI focused on fatal lint errors.
160
+ python3 -m flake8 gpumemprof/ tfmemprof/ tests/ examples/ --select=E9,F63,F7,F82 --show-source --statistics
161
+
162
+ - name: Run mypy
163
+ run: |
164
+ python3 -m mypy gpumemprof/ tfmemprof/
165
+
166
+ docs:
167
+ if: github.event_name != 'schedule'
168
+ runs-on: ubuntu-latest
169
+
170
+ steps:
171
+ - uses: actions/checkout@v4
172
+
173
+ - name: Set up Python
174
+ uses: actions/setup-python@v4
175
+ with:
176
+ python-version: "3.11"
177
+
178
+ - name: Cache pip dependencies
179
+ uses: actions/cache@v3
180
+ with:
181
+ path: ~/.cache/pip
182
+ key: ${{ runner.os }}-pip-docs-${{ hashFiles('pyproject.toml', 'docs/requirements-rtd.txt') }}
183
+ restore-keys: |
184
+ ${{ runner.os }}-pip-docs-
185
+
186
+ - name: Install docs dependencies
187
+ run: |
188
+ python3 -m pip install --upgrade pip
189
+ pip install -e . --no-deps
190
+ pip install -r docs/requirements-rtd.txt
191
+
192
+ - name: Build docs with warnings as errors
193
+ run: |
194
+ python3 -m sphinx -W --keep-going -b html docs docs/_build/html
195
+
196
+ build:
197
+ if: github.event_name != 'schedule'
198
+ runs-on: ubuntu-latest
199
+ needs: [test, lint, docs]
200
+
201
+ steps:
202
+ - uses: actions/checkout@v4
203
+
204
+ - name: Set up Python
205
+ uses: actions/setup-python@v4
206
+ with:
207
+ python-version: "3.10"
208
+
209
+ - name: Install build dependencies
210
+ run: |
211
+ python3 -m pip install --upgrade pip
212
+ pip install build twine
213
+
214
+ - name: Build package
215
+ run: |
216
+ python3 -m build
217
+
218
+ - name: Check package
219
+ run: |
220
+ twine check dist/*
221
+
222
+ - name: Upload build artifacts
223
+ uses: actions/upload-artifact@v4
224
+ with:
225
+ name: dist
226
+ path: dist/
227
+
228
+ cli-test:
229
+ if: github.event_name != 'schedule'
230
+ runs-on: ubuntu-latest
231
+ needs: [test, lint, docs]
232
+
233
+ steps:
234
+ - uses: actions/checkout@v4
235
+
236
+ - name: Set up Python
237
+ uses: actions/setup-python@v4
238
+ with:
239
+ python-version: "3.10"
240
+
241
+ - name: Install package
242
+ run: |
243
+ python3 -m pip install --upgrade pip
244
+ pip install -e .
245
+
246
+ - name: Test CLI tools
247
+ run: |
248
+ gpumemprof --help
249
+ tfmemprof --help
250
+ gpumemprof info
251
+ tfmemprof info
252
+
253
+ - name: Run documented CLI examples smoke test
254
+ env:
255
+ CUDA_VISIBLE_DEVICES: ""
256
+ run: |
257
+ python3 -m examples.cli.quickstart
258
+
259
+ - name: Run capability matrix smoke test
260
+ env:
261
+ CUDA_VISIBLE_DEVICES: ""
262
+ run: |
263
+ python3 -m examples.cli.capability_matrix --mode smoke --target auto --oom-mode simulated --skip-tui
@@ -0,0 +1,36 @@
1
+ name: Release
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ deploy:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+
13
+ - name: Set up Python
14
+ uses: actions/setup-python@v4
15
+ with:
16
+ python-version: "3.10"
17
+
18
+ - name: Install build dependencies
19
+ run: |
20
+ python3 -m pip install --upgrade pip
21
+ pip install build twine
22
+
23
+ - name: Build package
24
+ run: |
25
+ python3 -m build
26
+
27
+ - name: Check package
28
+ run: |
29
+ twine check dist/*
30
+
31
+ - name: Publish to PyPI
32
+ env:
33
+ TWINE_USERNAME: __token__
34
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
35
+ run: |
36
+ twine upload dist/*
@@ -0,0 +1,110 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ # Usually these files are written by a python script from a template
29
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
30
+ *.manifest
31
+ *.spec
32
+
33
+ # Installer logs
34
+ debug.log
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ .hypothesis/
49
+ .pytest_cache/
50
+
51
+ # Translations
52
+ *.mo
53
+ *.pot
54
+
55
+ # Django stuff:
56
+ *.log
57
+ local_settings.py
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ /docs/_build/
68
+
69
+ # PyBuilder
70
+ .target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # IPython
76
+ profile_default/
77
+ ipython_config.py
78
+
79
+ # pyenv
80
+ .python-version
81
+
82
+ # pipenv
83
+ Pipfile.lock
84
+
85
+ # poetry
86
+ poetry.lock
87
+
88
+ # mypy
89
+ .mypy_cache/
90
+ .dmypy.json
91
+
92
+ # Pyre type checker
93
+ .pyre/
94
+
95
+ # VS Code
96
+ .vscode/
97
+
98
+ # macOS
99
+ .DS_Store
100
+
101
+ # Virtual environments
102
+ venv/
103
+ ENV/
104
+ env.bak/
105
+ venv.bak/
106
+
107
+ # Version files
108
+ gpumemprof/_version.py
109
+ tfmemprof/_version.py
110
+
@@ -0,0 +1,20 @@
1
+ repos:
2
+ - repo: https://github.com/psf/black
3
+ rev: 24.3.0
4
+ hooks:
5
+ - id: black
6
+ - repo: https://github.com/PyCQA/flake8
7
+ rev: 7.0.0
8
+ hooks:
9
+ - id: flake8
10
+ - repo: https://github.com/pre-commit/pre-commit-hooks
11
+ rev: v4.5.0
12
+ hooks:
13
+ - id: trailing-whitespace
14
+ - id: end-of-file-fixer
15
+ - id: check-yaml
16
+ - id: check-added-large-files
17
+ - repo: https://github.com/pre-commit/mirrors-mypy
18
+ rev: v1.8.0
19
+ hooks:
20
+ - id: mypy
@@ -0,0 +1,14 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.11"
7
+
8
+ sphinx:
9
+ configuration: docs/conf.py
10
+ fail_on_warning: true
11
+
12
+ python:
13
+ install:
14
+ - requirements: docs/requirements-rtd.txt
@@ -0,0 +1,188 @@
1
+ # Changelog
2
+
3
+ All notable changes to GPU Memory Profiler will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.2.0] - Unreleased
9
+
10
+ ### Added
11
+
12
+ - Launch QA scenario modules under `examples/scenarios/` for CPU telemetry, MPS telemetry, OOM flight recorder coverage, and TensorFlow end-to-end telemetry/diagnose checks.
13
+ - Capability matrix orchestrator (`python -m examples.cli.capability_matrix`) with smoke/full modes, target selection (`auto|cpu|mps|both`), OOM mode controls, and machine-readable reports.
14
+ - Scenario smoke tests (`tests/test_examples_scenarios.py`) and updated TUI pilot coverage for launch quick actions.
15
+ - Updated TUI snapshot baselines for intentional CLI & Actions tab changes.
16
+
17
+ ### Changed
18
+
19
+ - Drop support for Python 3.8 and 3.9; minimum supported runtime is now Python 3.10.
20
+ - Migration note: users on Python 3.8/3.9 should upgrade to Python 3.10+ or pin `gpu-memory-profiler<0.2.0`.
21
+ - Refresh docs/API examples to match current CLI and profiler behavior.
22
+ - Publish a versioned compatibility matrix for v0.2 and link it from top-level docs.
23
+ - Stabilize benchmark harness defaults (`--iterations 200`) and align benchmark/testing documentation to this baseline.
24
+ - Expand TUI CLI/Playbook guidance and quick actions to highlight diagnose, OOM scenario, and capability matrix workflows.
25
+ - Refresh release-facing docs (`README`, examples guides, `RELEASE_CHECKLIST`, `PROJECT_STATUS`) for v0.2 launch readiness.
26
+
27
+ ### Deprecated
28
+
29
+ - [Future deprecations will be listed here]
30
+
31
+ ### Removed
32
+
33
+ - [Future removals will be listed here]
34
+
35
+ ### Fixed
36
+
37
+ - Remove stale docs references to unsupported CLI options and non-existent profiler APIs.
38
+ - Fix `examples.basic.tensorflow_demo` constructor/API mismatch so the demo runs against the current TensorFlow profiler implementation.
39
+
40
+ ### Security
41
+
42
+ - [Future security fixes will be listed here]
43
+
44
+ ## [0.1.0] - 2024-12-19
45
+
46
+ ### Added
47
+
48
+ - **Core PyTorch Profiler (`gpumemprof`)**
49
+
50
+ - Real-time GPU memory monitoring with configurable sampling intervals
51
+ - Memory leak detection using statistical analysis and pattern recognition
52
+ - Interactive visualizations with matplotlib and plotly support
53
+ - Context-aware profiling with function decorators and context managers
54
+ - Command-line interface for standalone usage
55
+ - Advanced analytics including pattern detection and fragmentation analysis
56
+ - Alert system with configurable thresholds
57
+ - Data export capabilities (CSV, JSON)
58
+ - Automatic memory management with watchdog system
59
+
60
+ - **Core TensorFlow Profiler (`tfmemprof`)**
61
+
62
+ - Real-time TensorFlow GPU memory monitoring
63
+ - TensorFlow-specific memory leak detection
64
+ - Integration with TensorFlow's memory management system
65
+ - Support for TensorFlow sessions and graph execution
66
+ - Keras model profiling capabilities
67
+ - Mixed precision profiling support
68
+ - Multi-GPU strategy profiling
69
+ - Command-line interface for TensorFlow workflows
70
+
71
+ - **Visualization & Analysis**
72
+
73
+ - Memory timeline plots with interactive features
74
+ - Function comparison charts
75
+ - Memory usage heatmaps
76
+ - Interactive dashboards with Plotly
77
+ - Memory fragmentation analysis
78
+ - Performance correlation analysis
79
+ - Optimization scoring and recommendations
80
+
81
+ - **Command Line Tools**
82
+
83
+ - `gpumemprof` CLI for PyTorch profiling
84
+ - `tfmemprof` CLI for TensorFlow profiling
85
+ - System information display
86
+ - Real-time monitoring capabilities
87
+ - Background tracking with alerts
88
+ - Results analysis and visualization
89
+
90
+ - **CPU Compatibility**
91
+
92
+ - CPU memory profiling for systems without GPU
93
+ - Cross-platform compatibility
94
+ - CPU-based model training profiling
95
+ - System RAM monitoring capabilities
96
+
97
+ - **Testing & Documentation**
98
+ - Comprehensive test suite for both GPU and CPU environments
99
+ - PyTorch testing guide with examples
100
+ - TensorFlow testing guide with examples
101
+ - CPU compatibility guide
102
+ - Complete API documentation
103
+ - Usage examples and tutorials
104
+ - Troubleshooting guides
105
+
106
+ ### Technical Features
107
+
108
+ - Modular architecture with 7 core components
109
+ - Thread-safe profiling with background monitoring
110
+ - Configurable sampling intervals and alert thresholds
111
+ - Support for multiple GPU devices
112
+ - Memory snapshot capture and analysis
113
+ - Tensor lifecycle tracking (PyTorch)
114
+ - Graph execution monitoring (TensorFlow)
115
+ - Export capabilities for further analysis
116
+
117
+ ### Documentation
118
+
119
+ - Comprehensive documentation in `/docs/` directory
120
+ - Quick start guides for both PyTorch and TensorFlow
121
+ - API reference with examples
122
+ - CLI usage guide
123
+ - CPU compatibility guide
124
+ - Testing guides for both frameworks
125
+ - In-depth technical article
126
+ - Contributing guidelines and code of conduct
127
+
128
+ ### Infrastructure
129
+
130
+ - Open source project structure
131
+ - MIT License
132
+ - Contributing guidelines (CONTRIBUTING.md)
133
+ - Code of Conduct (CODE_OF_CONDUCT.md)
134
+ - Security policy (SECURITY.md)
135
+ - Changelog tracking
136
+ - Development setup instructions
137
+
138
+ ---
139
+
140
+ ## Version History
141
+
142
+ - **0.1.0** (2024-12-19): Initial release with full PyTorch and TensorFlow support
143
+
144
+ ## Release Notes
145
+
146
+ ### Version 0.1.0
147
+
148
+ This is the initial release of GPU Memory Profiler, providing comprehensive memory profiling capabilities for both PyTorch and TensorFlow deep learning frameworks. The release includes:
149
+
150
+ - Complete PyTorch profiler with real-time monitoring, leak detection, and visualization
151
+ - Complete TensorFlow profiler with TensorFlow-specific optimizations
152
+ - Command-line interfaces for both frameworks
153
+ - CPU compatibility for systems without GPU support
154
+ - Comprehensive documentation and testing guides
155
+ - Open source project structure ready for community contributions
156
+
157
+ ### Breaking Changes
158
+
159
+ None - this is the initial release.
160
+
161
+ ### Known Issues
162
+
163
+ - Some visualization features may require additional dependencies (PyQt5, tkinter)
164
+ - TensorFlow CLI may have dependency conflicts with certain typing-extensions versions
165
+ - CPU profiling is limited compared to GPU profiling capabilities
166
+
167
+ ### Migration Guide
168
+
169
+ N/A - this is the initial release.
170
+
171
+ ---
172
+
173
+ ## Contributing to the Changelog
174
+
175
+ When contributing to the project, please update this changelog by adding entries under the appropriate version section. Follow the format:
176
+
177
+ - **Added** for new features
178
+ - **Changed** for changes in existing functionality
179
+ - **Deprecated** for soon-to-be removed features
180
+ - **Removed** for now removed features
181
+ - **Fixed** for any bug fixes
182
+ - **Security** for security vulnerability fixes
183
+
184
+ Use the present tense ("Add" not "Added") and imperative mood ("Move cursor to..." not "Moves cursor to...").
185
+
186
+ ---
187
+
188
+ **For more information about this project, see the [README](README.md) and [Documentation](docs/index.md).**