mlenvdoctor 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. mlenvdoctor-0.1.2/.github/workflows/ci.yml +137 -0
  2. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/.gitignore +6 -0
  3. mlenvdoctor-0.1.2/.pre-commit-config.yaml +34 -0
  4. mlenvdoctor-0.1.2/IMPROVEMENTS.md +265 -0
  5. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/PKG-INFO +3 -2
  6. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/README.md +1 -1
  7. mlenvdoctor-0.1.2/docker/README.md +324 -0
  8. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/pyproject.toml +39 -1
  9. mlenvdoctor-0.1.2/results.html +183 -0
  10. mlenvdoctor-0.1.2/results.json +64 -0
  11. mlenvdoctor-0.1.2/scripts/test_cli_improvements.py +350 -0
  12. mlenvdoctor-0.1.2/src/mlenvdoctor/__init__.py +18 -0
  13. mlenvdoctor-0.1.2/src/mlenvdoctor/cli.py +203 -0
  14. mlenvdoctor-0.1.2/src/mlenvdoctor/config.py +169 -0
  15. mlenvdoctor-0.1.2/src/mlenvdoctor/constants.py +63 -0
  16. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/src/mlenvdoctor/diagnose.py +146 -46
  17. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/src/mlenvdoctor/dockerize.py +3 -6
  18. mlenvdoctor-0.1.2/src/mlenvdoctor/exceptions.py +51 -0
  19. mlenvdoctor-0.1.2/src/mlenvdoctor/export.py +290 -0
  20. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/src/mlenvdoctor/fix.py +19 -13
  21. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/src/mlenvdoctor/gpu.py +15 -9
  22. mlenvdoctor-0.1.2/src/mlenvdoctor/icons.py +100 -0
  23. mlenvdoctor-0.1.2/src/mlenvdoctor/logger.py +81 -0
  24. mlenvdoctor-0.1.2/src/mlenvdoctor/parallel.py +115 -0
  25. mlenvdoctor-0.1.2/src/mlenvdoctor/retry.py +92 -0
  26. mlenvdoctor-0.1.2/src/mlenvdoctor/utils.py +164 -0
  27. mlenvdoctor-0.1.2/src/mlenvdoctor/validators.py +217 -0
  28. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/tests/__init__.py +0 -1
  29. mlenvdoctor-0.1.2/tests/test_cli.py +181 -0
  30. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/tests/test_diagnose.py +0 -3
  31. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/tests/test_dockerize.py +0 -3
  32. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/tests/test_fix.py +0 -1
  33. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/tests/test_utils.py +0 -5
  34. mlenvdoctor-0.1.2/tests/test_validators.py +126 -0
  35. mlenvdoctor-0.1.0/.github/workflows/ci.yml +0 -79
  36. mlenvdoctor-0.1.0/docker/README.md +0 -32
  37. mlenvdoctor-0.1.0/src/mlenvdoctor/__init__.py +0 -4
  38. mlenvdoctor-0.1.0/src/mlenvdoctor/cli.py +0 -153
  39. mlenvdoctor-0.1.0/src/mlenvdoctor/utils.py +0 -107
  40. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/CHANGELOG.md +0 -0
  41. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/CONTRIBUTING.md +0 -0
  42. {mlenvdoctor-0.1.0 → mlenvdoctor-0.1.2}/LICENSE +0 -0
@@ -0,0 +1,137 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master, develop]
6
+ pull_request:
7
+ branches: [main, master, develop]
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+ test:
13
+ name: Test Python ${{ matrix.python-version }}
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest, windows-latest, macos-latest]
19
+ python-version: ["3.8", "3.9", "3.10", "3.11"]
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+
29
+ - name: Install dependencies
30
+ run: |
31
+ python -m pip install --upgrade pip
32
+ pip install -e ".[dev]"
33
+
34
+ - name: Run linters
35
+ run: |
36
+ black --check src/ tests/
37
+ ruff check src/ tests/
38
+
39
+ - name: Run type checker
40
+ run: |
41
+ mypy src/ || true # Optional for now
42
+
43
+ - name: Run tests
44
+ run: |
45
+ pytest --cov=mlenvdoctor --cov-report=xml --cov-report=term-missing
46
+
47
+ - name: Upload coverage
48
+ uses: codecov/codecov-action@v3
49
+ with:
50
+ file: ./coverage.xml
51
+ flags: unittests
52
+ name: codecov-umbrella
53
+ fail_ci_if_error: false
54
+
55
+ lint:
56
+ name: Lint
57
+ runs-on: ubuntu-latest
58
+ steps:
59
+ - uses: actions/checkout@v4
60
+
61
+ - name: Set up Python
62
+ uses: actions/setup-python@v5
63
+ with:
64
+ python-version: "3.11"
65
+
66
+ - name: Install dependencies
67
+ run: |
68
+ python -m pip install --upgrade pip
69
+ pip install black ruff mypy
70
+
71
+ - name: Check formatting
72
+ run: black --check src/ tests/
73
+
74
+ - name: Run ruff
75
+ run: ruff check src/ tests/
76
+
77
+ - name: Type check
78
+ run: mypy src/ || true
79
+
80
+ build:
81
+ name: Build package
82
+ runs-on: ubuntu-latest
83
+ needs: [test, lint]
84
+ steps:
85
+ - uses: actions/checkout@v4
86
+
87
+ - name: Set up Python
88
+ uses: actions/setup-python@v5
89
+ with:
90
+ python-version: "3.11"
91
+
92
+ - name: Install build dependencies
93
+ run: |
94
+ python -m pip install --upgrade pip
95
+ pip install build
96
+
97
+ - name: Build package
98
+ run: python -m build
99
+
100
+ - name: Check package
101
+ run: |
102
+ pip install twine
103
+ twine check dist/*
104
+
105
+ - name: Upload artifacts
106
+ uses: actions/upload-artifact@v3
107
+ with:
108
+ name: dist
109
+ path: dist/
110
+
111
+ publish:
112
+ name: Publish to PyPI
113
+ runs-on: ubuntu-latest
114
+ needs: [build]
115
+ if: github.event_name == 'release' && github.event.action == 'published'
116
+ steps:
117
+ - uses: actions/checkout@v4
118
+
119
+ - name: Download artifacts
120
+ uses: actions/download-artifact@v3
121
+ with:
122
+ name: dist
123
+ path: dist/
124
+
125
+ - name: Set up Python
126
+ uses: actions/setup-python@v5
127
+ with:
128
+ python-version: "3.11"
129
+
130
+ - name: Install twine
131
+ run: pip install twine
132
+
133
+ - name: Publish to PyPI
134
+ env:
135
+ TWINE_USERNAME: __token__
136
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
137
+ run: twine upload dist/*
@@ -139,3 +139,9 @@ environment-mlenvdoctor.yml
139
139
  Dockerfile.mlenvdoctor
140
140
  *.mlenvdoctor
141
141
 
142
+ # Local docs / helper guides (not for distribution)
143
+ QUICK_CLI_TEST.md
144
+ CLI_TESTING_GUIDE.md
145
+ TECHNICAL_IMPROVEMENTS.md
146
+ TECHNICAL_IMPROVEMENTS_SUMMARY.md
147
+ CHANGES_SUMMARY.md
@@ -0,0 +1,34 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.5.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-added-large-files
9
+ - id: check-json
10
+ - id: check-toml
11
+ - id: check-merge-conflict
12
+ - id: debug-statements
13
+ - id: mixed-line-ending
14
+
15
+ - repo: https://github.com/psf/black
16
+ rev: 23.12.1
17
+ hooks:
18
+ - id: black
19
+ language_version: python3
20
+ args: [--line-length=100]
21
+
22
+ - repo: https://github.com/astral-sh/ruff-pre-commit
23
+ rev: v0.1.9
24
+ hooks:
25
+ - id: ruff
26
+ args: [--fix, --exit-non-zero-on-fix]
27
+
28
+ - repo: https://github.com/pre-commit/mirrors-mypy
29
+ rev: v1.8.0
30
+ hooks:
31
+ - id: mypy
32
+ additional_dependencies: [types-all]
33
+ args: [--ignore-missing-imports]
34
+ exclude: ^tests/
@@ -0,0 +1,265 @@
1
+ # 🚀 ML Environment Doctor - Improvement Recommendations
2
+
3
+ This document outlines potential improvements for the ML Environment Doctor project, organized by priority and category.
4
+
5
+ ## 🔴 Critical Issues (Fix Immediately)
6
+
7
+ ### 1. Version Mismatch
8
+ - **Issue**: `src/mlenvdoctor/__init__.py` has version `0.1.0` but `pyproject.toml` has `0.1.1`
9
+ - **Fix**: Synchronize versions across all files
10
+ - **Impact**: Version inconsistency can cause confusion and packaging issues
11
+
12
+ ### 2. Missing CI/CD Pipeline
13
+ - **Issue**: No GitHub Actions workflow despite being mentioned in CHANGELOG
14
+ - **Fix**: Create `.github/workflows/ci.yml` with:
15
+ - Automated testing on multiple Python versions (3.8, 3.9, 3.10, 3.11)
16
+ - Linting (black, ruff, mypy)
17
+ - Test coverage reporting
18
+ - Automated PyPI publishing on tags
19
+ - **Impact**: No automated quality checks, harder to maintain
20
+
21
+ ### 3. Missing Pre-commit Configuration
22
+ - **Issue**: Pre-commit hooks mentioned in CONTRIBUTING.md but no `.pre-commit-config.yaml`
23
+ - **Fix**: Add pre-commit config with black, ruff, and other hooks
24
+ - **Impact**: Inconsistent code quality, manual checks required
25
+
26
+ ## 🟠 High Priority Improvements
27
+
28
+ ### 4. Enhanced Logging System
29
+ - **Current**: Only console output via Rich
30
+ - **Improvement**: Add proper logging with levels (DEBUG, INFO, WARNING, ERROR)
31
+ - File logging option (`--log-file`)
32
+ - Structured logging for programmatic access
33
+ - Log rotation
34
+ - **Benefits**: Better debugging, audit trails, production readiness
35
+
36
+ ### 5. Export/Report Functionality
37
+ - **Current**: Only console output
38
+ - **Improvement**: Add export options:
39
+ - `--json` flag for JSON output
40
+ - `--csv` flag for CSV export
41
+ - `--html` flag for HTML report
42
+ - `--output` flag to specify file path
43
+ - **Benefits**: Integration with CI/CD, documentation, tracking over time
44
+
45
+ ### 6. Better Error Handling
46
+ - **Current**: Basic try-except blocks, some errors not caught
47
+ - **Improvement**:
48
+ - Custom exception classes (`MLEnvDoctorError`, `DiagnosticError`, etc.)
49
+ - Better error messages with actionable suggestions
50
+ - Error recovery where possible
51
+ - Stack traces in debug mode
52
+ - **Benefits**: Better user experience, easier debugging
53
+
54
+ ### 7. Configuration File Support
55
+ - **Current**: All settings are CLI flags
56
+ - **Improvement**: Add `mlenvdoctor.toml` or `.mlenvdoctorrc` config file:
57
+ ```toml
58
+ [diagnostics]
59
+ full_scan = false
60
+ skip_checks = ["docker_gpu"]
61
+
62
+ [fix]
63
+ default_stack = "trl-peft"
64
+ auto_install = false
65
+
66
+ [docker]
67
+ default_base_image = "nvidia/cuda:12.4.0-devel-ubuntu22.04"
68
+ ```
69
+ - **Benefits**: Better UX, repeatable configurations
70
+
71
+ ### 8. Requirements Locking
72
+ - **Current**: Generates requirements with `>=` version constraints
73
+ - **Improvement**:
74
+ - Add `--lock` flag to generate exact versions using `pip-compile`
75
+ - Support for `requirements-lock.txt` with hashes
76
+ - Verify lock file integrity
77
+ - **Benefits**: Reproducible environments, security
78
+
79
+ ## 🟡 Medium Priority Improvements
80
+
81
+ ### 9. Test Coverage Expansion
82
+ - **Current**: Minimal tests, mostly smoke tests
83
+ - **Improvement**:
84
+ - Unit tests for each diagnostic check
85
+ - Mock external dependencies (nvidia-smi, docker, etc.)
86
+ - Integration tests with test fixtures
87
+ - Test coverage target: >80%
88
+ - **Benefits**: Confidence in changes, catch regressions
89
+
90
+ ### 10. Progress Indicators
91
+ - **Current**: Basic spinners for some operations
92
+ - **Improvement**:
93
+ - Progress bars for long operations (model downloads, installations)
94
+ - Estimated time remaining
95
+ - Download progress for model files
96
+ - Better visual feedback
97
+ - **Benefits**: Better UX, users know what's happening
98
+
99
+ ### 11. Caching System
100
+ - **Current**: No caching, re-runs all checks every time
101
+ - **Improvement**:
102
+ - Cache diagnostic results (with TTL)
103
+ - Cache model downloads
104
+ - Cache version checks
105
+ - `--no-cache` flag to bypass
106
+ - **Benefits**: Faster subsequent runs, reduced network usage
107
+
108
+ ### 12. Interactive Mode
109
+ - **Current**: CLI flags only
110
+ - **Improvement**: Add `--interactive` mode:
111
+ - Prompt for missing information
112
+ - Confirm before auto-fixing
113
+ - Step-by-step fix wizard
114
+ - Guided setup for beginners
115
+ - **Benefits**: Better for new users, more control
116
+
117
+ ### 13. Multi-GPU Support
118
+ - **Current**: Only checks first GPU
119
+ - **Improvement**:
120
+ - Detect all GPUs
121
+ - Show per-GPU diagnostics
122
+ - Multi-GPU memory checks
123
+ - GPU topology detection
124
+ - **Benefits**: Better for multi-GPU setups
125
+
126
+ ### 14. Windows-Specific Improvements
127
+ - **Current**: Some paths may not work well on Windows
128
+ - **Improvement**:
129
+ - Better Windows path handling
130
+ - Windows-specific CUDA detection
131
+ - PowerShell vs CMD compatibility
132
+ - Windows service detection
133
+ - **Benefits**: Better Windows support
134
+
135
+ ### 15. Model Registry System
136
+ - **Current**: Hardcoded models in `dockerize.py`
137
+ - **Improvement**:
138
+ - External model registry (JSON/YAML)
139
+ - User-defined model templates
140
+ - Model discovery from Hugging Face
141
+ - Model recommendations based on GPU
142
+ - **Benefits**: Extensibility, easier updates
143
+
144
+ ## 🟢 Nice-to-Have Features
145
+
146
+ ### 16. Plugin System
147
+ - **Current**: Monolithic codebase
148
+ - **Improvement**:
149
+ - Plugin architecture for custom checks
150
+ - Plugin registry
151
+ - Community plugins
152
+ - **Benefits**: Extensibility, community contributions
153
+
154
+ ### 17. Telemetry (Opt-in)
155
+ - **Current**: No usage tracking
156
+ - **Improvement**:
157
+ - Opt-in anonymous usage statistics
158
+ - Error reporting (with user consent)
159
+ - Feature usage analytics
160
+ - **Benefits**: Understand user needs, prioritize features
161
+
162
+ ### 18. Documentation Improvements
163
+ - **Current**: Basic README
164
+ - **Improvement**:
165
+ - API documentation (Sphinx/MkDocs)
166
+ - Video tutorials
167
+ - Example workflows
168
+ - Troubleshooting guide
169
+ - FAQ section
170
+ - **Benefits**: Better onboarding, reduced support burden
171
+
172
+ ### 19. Performance Optimizations
173
+ - **Current**: Sequential checks
174
+ - **Improvement**:
175
+ - Parallel execution of independent checks
176
+ - Async I/O for network checks
177
+ - Faster version detection
178
+ - **Benefits**: Faster diagnostics
179
+
180
+ ### 20. Additional Diagnostic Checks
181
+ - **Current**: Basic checks
182
+ - **Improvement**:
183
+ - Python version compatibility
184
+ - Virtual environment detection
185
+ - Conda environment detection
186
+ - Jupyter notebook compatibility
187
+ - VS Code / PyCharm integration
188
+ - WSL2 GPU support
189
+ - Cloud GPU detection (AWS, GCP, Azure)
190
+ - **Benefits**: More comprehensive diagnostics
191
+
192
+ ### 21. Docker Improvements
193
+ - **Current**: Basic Dockerfile generation
194
+ - **Improvement**:
195
+ - Docker Compose templates
196
+ - Multi-stage builds
197
+ - BuildKit optimizations
198
+ - Health checks
199
+ - Volume management
200
+ - **Benefits**: Production-ready containers
201
+
202
+ ### 22. Integration with ML Frameworks
203
+ - **Current**: PyTorch-focused
204
+ - **Improvement**:
205
+ - TensorFlow support
206
+ - JAX support
207
+ - ONNX Runtime checks
208
+ - MLflow integration
209
+ - **Benefits**: Broader framework support
210
+
211
+ ### 23. Benchmark Suite
212
+ - **Current**: Basic GPU benchmark
213
+ - **Improvement**:
214
+ - Comprehensive benchmark suite
215
+ - Compare against baseline
216
+ - Performance regression detection
217
+ - Benchmark history
218
+ - **Benefits**: Performance monitoring
219
+
220
+ ### 24. Environment Comparison
221
+ - **Current**: Single environment diagnostics
222
+ - **Improvement**:
223
+ - Compare two environments
224
+ - Diff diagnostics
225
+ - Environment migration guide
226
+ - **Benefits**: Easier environment management
227
+
228
+ ### 25. Automated Fixes
229
+ - **Current**: Generates files, user installs
230
+ - **Improvement**:
231
+ - Automatic installation with confirmation
232
+ - Rollback on failure
233
+ - Dry-run mode
234
+ - Fix verification
235
+ - **Benefits**: True auto-fix capability
236
+
237
+ ## 📊 Implementation Priority Matrix
238
+
239
+ | Priority | Effort | Impact | Recommendation |
240
+ |----------|--------|--------|----------------|
241
+ | Critical | Low | High | Fix version mismatch, add CI/CD |
242
+ | High | Medium | High | Add logging, export, config files |
243
+ | Medium | Medium | Medium | Expand tests, add caching, interactive mode |
244
+ | Low | High | Medium | Plugin system, telemetry, framework support |
245
+
246
+ ## 🎯 Quick Wins (Low Effort, High Impact)
247
+
248
+ 1. **Fix version mismatch** (5 min)
249
+ 2. **Add CI/CD pipeline** (1-2 hours)
250
+ 3. **Add pre-commit config** (30 min)
251
+ 4. **Add JSON export** (1-2 hours)
252
+ 5. **Improve error messages** (2-3 hours)
253
+ 6. **Add progress bars** (2-3 hours)
254
+
255
+ ## 📝 Notes
256
+
257
+ - Consider breaking into phases: Phase 1 (Critical + High Priority), Phase 2 (Medium), Phase 3 (Nice-to-have)
258
+ - Community feedback should guide priority
259
+ - Some features may require breaking changes (version 0.2.0+)
260
+ - Consider backward compatibility when adding features
261
+
262
+ ---
263
+
264
+ **Last Updated**: 2024
265
+ **Status**: Recommendations for project improvement
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlenvdoctor
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Diagnose & fix ML environments for LLM fine-tuning
5
5
  Author: ML Environment Doctor Contributors
6
6
  License: MIT
@@ -20,6 +20,7 @@ Requires-Python: >=3.8
20
20
  Requires-Dist: packaging>=23.0
21
21
  Requires-Dist: psutil>=5.9.0
22
22
  Requires-Dist: rich>=13.0.0
23
+ Requires-Dist: tomli>=2.0.0; python_version < '3.11'
23
24
  Requires-Dist: typer>=0.9.0
24
25
  Provides-Extra: dev
25
26
  Requires-Dist: black>=23.0.0; extra == 'dev'
@@ -34,7 +35,7 @@ Description-Content-Type: text/markdown
34
35
 
35
36
  [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
36
37
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
37
- [![PyPI version](https://badge.fury.io/py/mlenvdoctor.svg)](https://badge.fury.io/py/mlenvdoctor)
38
+ [![PyPI](https://img.shields.io/pypi/v/mlenvdoctor.svg)]([https://pypi.org/project/mlenvdoctor/])
38
39
 
39
40
  > **Single command fixes 90% of "my torch.cuda.is_available() is False" issues.**
40
41
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
4
4
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
- [![PyPI version](https://badge.fury.io/py/mlenvdoctor.svg)](https://badge.fury.io/py/mlenvdoctor)
5
+ [![PyPI](https://img.shields.io/pypi/v/mlenvdoctor.svg)]([https://pypi.org/project/mlenvdoctor/])
6
6
 
7
7
  > **Single command fixes 90% of "my torch.cuda.is_available() is False" issues.**
8
8