mlenvdoctor 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlenvdoctor/__init__.py CHANGED
@@ -1,4 +1,18 @@
1
1
  """ML Environment Doctor - Diagnose and fix ML environments for LLM fine-tuning."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.2"
4
4
 
5
+ __all__ = [
6
+ "__version__",
7
+ "diagnose",
8
+ "fix",
9
+ "dockerize",
10
+ "export",
11
+ "exceptions",
12
+ "logger",
13
+ "config",
14
+ "validators",
15
+ "retry",
16
+ "parallel",
17
+ "constants",
18
+ ]
mlenvdoctor/cli.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """CLI entrypoint for ML Environment Doctor."""
2
2
 
3
+ from pathlib import Path
3
4
  from typing import Optional
4
5
 
5
6
  import typer
@@ -7,13 +8,16 @@ import typer
7
8
  from . import __version__
8
9
  from .diagnose import diagnose_env, print_diagnostic_table
9
10
  from .dockerize import generate_dockerfile, generate_service_template
11
+ from .export import export_csv, export_html, export_json
10
12
  from .fix import auto_fix
11
- from .gpu import benchmark_gpu_ops, smoke_test_lora, test_model
13
+ from .gpu import benchmark_gpu_ops, smoke_test_lora, test_model as gpu_test_model
14
+ from .icons import icon_check, icon_cross, icon_search, icon_test, icon_whale, icon_wrench
15
+ from .logger import get_default_log_file, setup_logger
12
16
  from .utils import console
13
17
 
14
18
  app = typer.Typer(
15
19
  name="mlenvdoctor",
16
- help="🔍 ML Environment Doctor - Diagnose & fix ML environments for LLM fine-tuning",
20
+ help=f"{icon_search()} ML Environment Doctor - Diagnose & fix ML environments for LLM fine-tuning",
17
21
  add_completion=False,
18
22
  )
19
23
 
@@ -21,26 +25,56 @@ app = typer.Typer(
21
25
  def version_callback(value: bool):
22
26
  """Print version and exit."""
23
27
  if value:
24
- console.print(f"[bold blue]ML Environment Doctor[/bold blue] version [cyan]{__version__}[/cyan]")
28
+ console.print(
29
+ f"[bold blue]ML Environment Doctor[/bold blue] version [cyan]{__version__}[/cyan]"
30
+ )
25
31
  raise typer.Exit()
26
32
 
27
33
 
28
34
  @app.callback()
29
35
  def main(
30
36
  version: Optional[bool] = typer.Option(
31
- None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version and exit"
37
+ None,
38
+ "--version",
39
+ "-v",
40
+ callback=version_callback,
41
+ is_eager=True,
42
+ help="Show version and exit",
43
+ ),
44
+ log_file: Optional[Path] = typer.Option(
45
+ None,
46
+ "--log-file",
47
+ help="Path to log file (default: ~/.mlenvdoctor/logs/mlenvdoctor.log)",
48
+ ),
49
+ log_level: str = typer.Option(
50
+ "INFO",
51
+ "--log-level",
52
+ help="Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL",
32
53
  ),
33
54
  ):
34
55
  """ML Environment Doctor - Diagnose & fix ML environments for LLM fine-tuning."""
35
- pass
56
+ # Set up logging
57
+ log_path = log_file or get_default_log_file()
58
+ setup_logger(log_file=log_path, level=log_level)
36
59
 
37
60
 
38
61
  @app.command()
39
62
  def diagnose(
40
- full: bool = typer.Option(False, "--full", "-f", help="Run full diagnostics including GPU benchmarks"),
63
+ full: bool = typer.Option(
64
+ False, "--full", "-f", help="Run full diagnostics including GPU benchmarks"
65
+ ),
66
+ json_output: Optional[Path] = typer.Option(
67
+ None, "--json", help="Export results to JSON file"
68
+ ),
69
+ csv_output: Optional[Path] = typer.Option(
70
+ None, "--csv", help="Export results to CSV file"
71
+ ),
72
+ html_output: Optional[Path] = typer.Option(
73
+ None, "--html", help="Export results to HTML file"
74
+ ),
41
75
  ):
42
- """
43
- 🔍 Diagnose your ML environment.
76
+ f"""
77
+ {icon_search()} Diagnose your ML environment.
44
78
 
45
79
  Quick scan: Checks CUDA, PyTorch, and required ML libraries.
46
80
  Full scan (--full): Also checks GPU memory, disk space, Docker GPU support, and connectivity.
@@ -48,6 +82,17 @@ def diagnose(
48
82
  issues = diagnose_env(full=full)
49
83
  print_diagnostic_table(issues)
50
84
 
85
+ # Export to formats if requested
86
+ if json_output:
87
+ export_json(issues, json_output)
88
+ console.print(f"[green]{icon_check()} Exported to {json_output}[/green]")
89
+ if csv_output:
90
+ export_csv(issues, csv_output)
91
+ console.print(f"[green]{icon_check()} Exported to {csv_output}[/green]")
92
+ if html_output:
93
+ export_html(issues, html_output)
94
+ console.print(f"[green]{icon_check()} Exported to {html_output}[/green]")
95
+
51
96
  if full:
52
97
  console.print()
53
98
  console.print("[bold blue]Running GPU benchmark...[/bold blue]")
@@ -69,8 +114,8 @@ def fix(
69
114
  venv: bool = typer.Option(False, "--venv", "-v", help="Create virtual environment"),
70
115
  stack: str = typer.Option("trl-peft", "--stack", "-s", help="ML stack: trl-peft or minimal"),
71
116
  ):
72
- """
73
- 🔧 Auto-fix environment issues and generate requirements.
117
+ f"""
118
+ {icon_wrench()} Auto-fix environment issues and generate requirements.
74
119
 
75
120
  Generates requirements.txt or conda environment file based on detected issues.
76
121
  Optionally creates a virtual environment and installs dependencies.
@@ -78,18 +123,22 @@ def fix(
78
123
  success = auto_fix(use_conda=conda, create_venv=venv, stack=stack)
79
124
  if success:
80
125
  console.print()
81
- console.print("[bold green] Auto-fix completed![/bold green]")
126
+ console.print(f"[bold green]{icon_check()} Auto-fix completed![/bold green]")
82
127
  console.print("[yellow]💡 Run 'mlenvdoctor diagnose' to verify fixes[/yellow]")
83
128
 
84
129
 
85
130
  @app.command()
86
131
  def dockerize(
87
132
  model: Optional[str] = typer.Argument(None, help="Model name (mistral-7b, tinyllama, gpt2)"),
88
- service: bool = typer.Option(False, "--service", "-s", help="Generate FastAPI service template"),
89
- output: str = typer.Option("Dockerfile.mlenvdoctor", "--output", "-o", help="Output Dockerfile name"),
133
+ service: bool = typer.Option(
134
+ False, "--service", "-s", help="Generate FastAPI service template"
135
+ ),
136
+ output: str = typer.Option(
137
+ "Dockerfile.mlenvdoctor", "--output", "-o", help="Output Dockerfile name"
138
+ ),
90
139
  ):
91
- """
92
- 🐳 Generate Dockerfile for ML fine-tuning.
140
+ f"""
141
+ {icon_whale()} Generate Dockerfile for ML fine-tuning.
93
142
 
94
143
  Creates a production-ready Dockerfile with CUDA support.
95
144
  Optionally generates a FastAPI service template.
@@ -102,44 +151,46 @@ def dockerize(
102
151
  generate_dockerfile(model_name=model, service=service, output_file=output)
103
152
 
104
153
  console.print()
105
- console.print("[bold green] Dockerfile generated![/bold green]")
154
+ console.print(f"[bold green]{icon_check()} Dockerfile generated![/bold green]")
106
155
 
107
156
 
108
- @app.command()
109
- def test_model(
157
+ @app.command(name="test-model")
158
+ def test_model_cmd(
110
159
  model: str = typer.Argument("tinyllama", help="Model to test (tinyllama, gpt2, mistral-7b)"),
111
160
  ):
112
- """
113
- 🧪 Run smoke test with a real LLM model.
161
+ f"""
162
+ {icon_test()} Run smoke test with a real LLM model.
114
163
 
115
164
  Tests model loading and forward pass to verify fine-tuning readiness.
116
165
  """
117
- console.print(f"[bold blue]🧪 Testing model: {model}[/bold blue]\n")
118
- success = test_model(model_name=model)
166
+ console.print(f"[bold blue]{icon_test()} Testing model: {model}[/bold blue]\n")
167
+ success = gpu_test_model(model_name=model)
119
168
  if success:
120
169
  console.print()
121
- console.print("[bold green] Model test passed! Ready for fine-tuning.[/bold green]")
170
+ console.print(f"[bold green]{icon_check()} Model test passed! Ready for fine-tuning.[/bold green]")
122
171
  else:
123
172
  console.print()
124
- console.print("[bold red] Model test failed. Check diagnostics.[/bold red]")
173
+ console.print(f"[bold red]{icon_cross()} Model test failed. Check diagnostics.[/bold red]")
125
174
  raise typer.Exit(1)
126
175
 
127
176
 
128
177
  @app.command()
129
178
  def smoke_test():
130
- """
131
- 🧪 Run LoRA fine-tuning smoke test.
179
+ f"""
180
+ {icon_test()} Run LoRA fine-tuning smoke test.
132
181
 
133
182
  Performs a minimal LoRA fine-tuning test to verify environment setup.
134
183
  """
135
- console.print("[bold blue]🧪 Running LoRA smoke test...[/bold blue]\n")
184
+ console.print(f"[bold blue]{icon_test()} Running LoRA smoke test...[/bold blue]\n")
136
185
  success = smoke_test_lora()
137
186
  if success:
138
187
  console.print()
139
- console.print("[bold green] Smoke test passed! Environment is ready.[/bold green]")
188
+ console.print(f"[bold green]{icon_check()} Smoke test passed! Environment is ready.[/bold green]")
140
189
  else:
141
190
  console.print()
142
- console.print("[bold red]❌ Smoke test failed. Run 'mlenvdoctor diagnose' for details.[/bold red]")
191
+ console.print(
192
+ f"[bold red]{icon_cross()} Smoke test failed. Run 'mlenvdoctor diagnose' for details.[/bold red]"
193
+ )
143
194
  raise typer.Exit(1)
144
195
 
145
196
 
@@ -150,4 +201,3 @@ def main_cli():
150
201
 
151
202
  if __name__ == "__main__":
152
203
  main_cli()
153
-
mlenvdoctor/config.py ADDED
@@ -0,0 +1,169 @@
1
+ """Configuration management for ML Environment Doctor."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Optional
5
+
6
+ # Try tomllib (Python 3.11+)
7
+ try:
8
+ import tomllib
9
+ except ImportError:
10
+ tomllib = None # type: ignore
11
+
12
+ # Fallback to tomli for older Python versions
13
+ try:
14
+ import tomli
15
+ except ImportError:
16
+ tomli = None
17
+
18
+ from .exceptions import ConfigurationError
19
+ from .utils import get_home_config_dir
20
+
21
+
22
+ def load_config(config_path: Optional[Path] = None) -> Dict[str, Any]:
23
+ """
24
+ Load configuration from TOML file.
25
+
26
+ Args:
27
+ config_path: Path to config file. If None, searches for:
28
+ 1. mlenvdoctor.toml in current directory
29
+ 2. .mlenvdoctorrc in current directory
30
+ 3. ~/.mlenvdoctor/config.toml
31
+
32
+ Returns:
33
+ Configuration dictionary
34
+
35
+ Raises:
36
+ ConfigurationError: If config file is invalid
37
+ """
38
+ default_config: Dict[str, Any] = {
39
+ "diagnostics": {
40
+ "full_scan": False,
41
+ "skip_checks": [],
42
+ },
43
+ "fix": {
44
+ "default_stack": "trl-peft",
45
+ "auto_install": False,
46
+ },
47
+ "docker": {
48
+ "default_base_image": "nvidia/cuda:12.4.0-devel-ubuntu22.04",
49
+ },
50
+ "logging": {
51
+ "level": "INFO",
52
+ "file": None,
53
+ },
54
+ }
55
+
56
+ if config_path is None:
57
+ # Search for config files
58
+ search_paths = [
59
+ Path("mlenvdoctor.toml"),
60
+ Path(".mlenvdoctorrc"),
61
+ get_home_config_dir() / "config.toml",
62
+ ]
63
+
64
+ for path in search_paths:
65
+ if path.exists():
66
+ config_path = path
67
+ break
68
+ else:
69
+ if not config_path.exists():
70
+ raise ConfigurationError(
71
+ f"Config file not found: {config_path}",
72
+ "Create the file or use default configuration",
73
+ )
74
+
75
+ if config_path is None or not config_path.exists():
76
+ return default_config
77
+
78
+ try:
79
+ # Try tomllib (Python 3.11+)
80
+ if tomllib is not None:
81
+ with config_path.open("rb") as f:
82
+ user_config = tomllib.load(f)
83
+ elif tomli is not None:
84
+ # Fallback to tomli for older Python
85
+ with config_path.open("rb") as f:
86
+ user_config = tomli.load(f)
87
+ else:
88
+ raise ConfigurationError(
89
+ "TOML parsing not available. Install tomli: pip install tomli",
90
+ "Or upgrade to Python 3.11+",
91
+ )
92
+
93
+ # Merge with defaults
94
+ merged_config = default_config.copy()
95
+ for section, values in user_config.items():
96
+ if section in merged_config and isinstance(merged_config[section], dict):
97
+ merged_config[section].update(values)
98
+ else:
99
+ merged_config[section] = values
100
+
101
+ return merged_config
102
+
103
+ except Exception as e:
104
+ raise ConfigurationError(
105
+ f"Error parsing config file {config_path}: {e}",
106
+ "Check TOML syntax and file permissions",
107
+ ) from e
108
+
109
+
110
+ def get_config_value(config: Dict[str, Any], *keys: str, default: Any = None) -> Any:
111
+ """
112
+ Get nested config value safely.
113
+
114
+ Args:
115
+ config: Configuration dictionary
116
+ *keys: Nested keys to traverse
117
+ default: Default value if key not found
118
+
119
+ Returns:
120
+ Config value or default
121
+ """
122
+ value = config
123
+ for key in keys:
124
+ if isinstance(value, dict):
125
+ value = value.get(key)
126
+ if value is None:
127
+ return default
128
+ else:
129
+ return default
130
+ return value if value is not None else default
131
+
132
+
133
+ def create_default_config(output_path: Path) -> Path:
134
+ """
135
+ Create a default configuration file.
136
+
137
+ Args:
138
+ output_path: Path where to create config file
139
+
140
+ Returns:
141
+ Path to created config file
142
+ """
143
+ default_content = """# ML Environment Doctor Configuration
144
+
145
+ [diagnostics]
146
+ # Run full scan by default
147
+ full_scan = false
148
+ # Skip specific checks (e.g., ["docker_gpu", "internet"])
149
+ skip_checks = []
150
+
151
+ [fix]
152
+ # Default ML stack: "trl-peft" or "minimal"
153
+ default_stack = "trl-peft"
154
+ # Automatically install dependencies without prompting
155
+ auto_install = false
156
+
157
+ [docker]
158
+ # Default base image for Dockerfiles
159
+ default_base_image = "nvidia/cuda:12.4.0-devel-ubuntu22.04"
160
+
161
+ [logging]
162
+ # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL
163
+ level = "INFO"
164
+ # Log file path (None for default: ~/.mlenvdoctor/logs/mlenvdoctor.log)
165
+ file = null
166
+ """
167
+
168
+ output_path.write_text(default_content, encoding="utf-8")
169
+ return output_path
@@ -0,0 +1,63 @@
1
+ """Constants used throughout ML Environment Doctor."""
2
+
3
+ from typing import Final
4
+
5
+ # Version compatibility
6
+ MIN_PYTHON_VERSION: Final[tuple[int, int]] = (3, 8)
7
+ MIN_PYTORCH_VERSION: Final[str] = "2.4.0"
8
+
9
+ # CUDA versions
10
+ SUPPORTED_CUDA_VERSIONS: Final[list[str]] = ["12.1", "12.4"]
11
+ DEFAULT_CUDA_VERSION: Final[str] = "12.4"
12
+
13
+ # ML Library versions
14
+ MIN_TRANSFORMERS_VERSION: Final[str] = "4.44.0"
15
+ MIN_PEFT_VERSION: Final[str] = "0.12.0"
16
+ MIN_TRL_VERSION: Final[str] = "0.9.0"
17
+ MIN_DATASETS_VERSION: Final[str] = "2.20.0"
18
+ MIN_ACCELERATE_VERSION: Final[str] = "1.0.0"
19
+
20
+ # Memory requirements (GB)
21
+ MIN_GPU_MEMORY_GB: Final[int] = 8
22
+ RECOMMENDED_GPU_MEMORY_GB: Final[int] = 16
23
+ MIN_DISK_SPACE_GB: Final[int] = 50
24
+
25
+ # Timeouts (seconds)
26
+ DEFAULT_COMMAND_TIMEOUT: Final[int] = 30
27
+ DEFAULT_NETWORK_TIMEOUT: Final[int] = 10
28
+ DEFAULT_INSTALL_TIMEOUT: Final[int] = 600
29
+
30
+ # File paths
31
+ DEFAULT_CONFIG_FILE: Final[str] = "mlenvdoctor.toml"
32
+ DEFAULT_REQUIREMENTS_FILE: Final[str] = "requirements-mlenvdoctor.txt"
33
+ DEFAULT_DOCKERFILE: Final[str] = "Dockerfile.mlenvdoctor"
34
+
35
+ # Model names
36
+ SUPPORTED_MODELS: Final[dict[str, str]] = {
37
+ "tinyllama": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
38
+ "gpt2": "gpt2",
39
+ "mistral-7b": "mistralai/Mistral-7B-v0.1",
40
+ }
41
+
42
+ # ML Stacks
43
+ ML_STACKS: Final[list[str]] = ["trl-peft", "minimal"]
44
+
45
+ # Diagnostic check names
46
+ CHECK_CUDA_DRIVER: Final[str] = "cuda_driver"
47
+ CHECK_PYTORCH_CUDA: Final[str] = "pytorch_cuda"
48
+ CHECK_ML_LIBRARIES: Final[str] = "ml_libraries"
49
+ CHECK_GPU_MEMORY: Final[str] = "gpu_memory"
50
+ CHECK_DISK_SPACE: Final[str] = "disk_space"
51
+ CHECK_DOCKER_GPU: Final[str] = "docker_gpu"
52
+ CHECK_INTERNET: Final[str] = "internet"
53
+
54
+ # Severity levels
55
+ SEVERITY_CRITICAL: Final[str] = "critical"
56
+ SEVERITY_WARNING: Final[str] = "warning"
57
+ SEVERITY_INFO: Final[str] = "info"
58
+
59
+ # Status values
60
+ STATUS_PASS: Final[str] = "PASS"
61
+ STATUS_FAIL: Final[str] = "FAIL"
62
+ STATUS_WARN: Final[str] = "WARN"
63
+ STATUS_INFO: Final[str] = "INFO"