hydraflow 0.12.4__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/cli.py +66 -9
- hydraflow/core/io.py +10 -25
- hydraflow/executor/job.py +132 -78
- {hydraflow-0.12.4.dist-info → hydraflow-0.13.0.dist-info}/METADATA +1 -1
- {hydraflow-0.12.4.dist-info → hydraflow-0.13.0.dist-info}/RECORD +8 -8
- {hydraflow-0.12.4.dist-info → hydraflow-0.13.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.12.4.dist-info → hydraflow-0.13.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.12.4.dist-info → hydraflow-0.13.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/cli.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
+
import shlex
|
5
6
|
from typing import Annotated
|
6
7
|
|
7
8
|
import typer
|
@@ -12,30 +13,86 @@ app = typer.Typer(add_completion=False)
|
|
12
13
|
console = Console()
|
13
14
|
|
14
15
|
|
15
|
-
@app.command()
|
16
|
+
@app.command(context_settings={"ignore_unknown_options": True})
|
16
17
|
def run(
|
17
18
|
name: Annotated[str, Argument(help="Job name.", show_default=False)],
|
18
19
|
*,
|
20
|
+
args: Annotated[
|
21
|
+
list[str] | None,
|
22
|
+
Argument(help="Arguments to pass to the job.", show_default=False),
|
23
|
+
] = None,
|
19
24
|
dry_run: Annotated[
|
20
25
|
bool,
|
21
|
-
Option("--dry-run", help="Perform a dry run"),
|
26
|
+
Option("--dry-run", help="Perform a dry run."),
|
22
27
|
] = False,
|
23
28
|
) -> None:
|
24
29
|
"""Run a job."""
|
30
|
+
from hydraflow.executor.io import get_job
|
31
|
+
from hydraflow.executor.job import iter_batches, iter_calls, iter_runs
|
32
|
+
|
33
|
+
args = args or []
|
34
|
+
job = get_job(name)
|
35
|
+
|
36
|
+
if job.run:
|
37
|
+
args = [*shlex.split(job.run), *args]
|
38
|
+
it = iter_runs(args, iter_batches(job), dry_run=dry_run)
|
39
|
+
elif job.call:
|
40
|
+
args = [*shlex.split(job.call), *args]
|
41
|
+
it = iter_calls(args, iter_batches(job), dry_run=dry_run)
|
42
|
+
else:
|
43
|
+
typer.echo(f"No command found in job: {job.name}.")
|
44
|
+
raise typer.Exit(1)
|
45
|
+
|
46
|
+
if not dry_run:
|
47
|
+
import mlflow
|
48
|
+
|
49
|
+
mlflow.set_experiment(job.name)
|
50
|
+
|
51
|
+
for task in it: # jobs will be executed here
|
52
|
+
if job.run and dry_run:
|
53
|
+
typer.echo(shlex.join(task.args))
|
54
|
+
elif job.call and dry_run:
|
55
|
+
funcname, *args = task.args
|
56
|
+
arg = ", ".join(f"{arg!r}" for arg in args)
|
57
|
+
typer.echo(f"{funcname}([{arg}])")
|
58
|
+
|
25
59
|
|
60
|
+
@app.command(context_settings={"ignore_unknown_options": True})
|
61
|
+
def submit(
|
62
|
+
name: Annotated[str, Argument(help="Job name.", show_default=False)],
|
63
|
+
*,
|
64
|
+
args: Annotated[
|
65
|
+
list[str] | None,
|
66
|
+
Argument(help="Arguments to pass to the job.", show_default=False),
|
67
|
+
] = None,
|
68
|
+
dry_run: Annotated[
|
69
|
+
bool,
|
70
|
+
Option("--dry-run", help="Perform a dry run."),
|
71
|
+
] = False,
|
72
|
+
) -> None:
|
73
|
+
"""Submit a job."""
|
26
74
|
from hydraflow.executor.io import get_job
|
27
|
-
from hydraflow.executor.job import
|
75
|
+
from hydraflow.executor.job import iter_batches, submit
|
28
76
|
|
77
|
+
args = args or []
|
29
78
|
job = get_job(name)
|
30
79
|
|
31
|
-
if
|
32
|
-
typer.echo(
|
33
|
-
raise typer.Exit
|
80
|
+
if not job.run:
|
81
|
+
typer.echo(f"No run found in job: {job.name}.")
|
82
|
+
raise typer.Exit(1)
|
83
|
+
|
84
|
+
if not dry_run:
|
85
|
+
import mlflow
|
86
|
+
|
87
|
+
mlflow.set_experiment(job.name)
|
34
88
|
|
35
|
-
|
89
|
+
args = [*shlex.split(job.run), *args]
|
90
|
+
result = submit(args, iter_batches(job), dry_run=dry_run)
|
36
91
|
|
37
|
-
|
38
|
-
|
92
|
+
if dry_run and isinstance(result, tuple):
|
93
|
+
for line in result[1].splitlines():
|
94
|
+
args = shlex.split(line)
|
95
|
+
typer.echo(shlex.join([*result[0][:-1], *args]))
|
39
96
|
|
40
97
|
|
41
98
|
@app.command()
|
hydraflow/core/io.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
+
import fnmatch
|
5
6
|
import shutil
|
6
7
|
import urllib.parse
|
7
8
|
import urllib.request
|
@@ -152,21 +153,6 @@ def remove_run(run: Run | Iterable[Run]) -> None:
|
|
152
153
|
shutil.rmtree(get_artifact_dir(run).parent)
|
153
154
|
|
154
155
|
|
155
|
-
def get_root_dir(uri: str | Path | None = None) -> Path:
|
156
|
-
"""Get the root directory for the MLflow tracking server."""
|
157
|
-
import mlflow
|
158
|
-
|
159
|
-
if uri is not None:
|
160
|
-
return Path(uri).absolute()
|
161
|
-
|
162
|
-
uri = mlflow.get_tracking_uri()
|
163
|
-
|
164
|
-
if uri.startswith("file:"):
|
165
|
-
return file_uri_to_path(uri)
|
166
|
-
|
167
|
-
return Path(uri).absolute()
|
168
|
-
|
169
|
-
|
170
156
|
def get_experiment_name(path: Path) -> str | None:
|
171
157
|
"""Get the experiment name from the meta file."""
|
172
158
|
metafile = path / "meta.yaml"
|
@@ -195,50 +181,49 @@ def predicate_experiment_dir(
|
|
195
181
|
return True
|
196
182
|
|
197
183
|
if isinstance(experiment_names, list):
|
198
|
-
return name in experiment_names
|
184
|
+
return any(fnmatch.fnmatch(name, e) for e in experiment_names)
|
199
185
|
|
200
186
|
return experiment_names(name)
|
201
187
|
|
202
188
|
|
203
189
|
def iter_experiment_dirs(
|
190
|
+
root_dir: str | Path,
|
204
191
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
205
|
-
root_dir: str | Path | None = None,
|
206
192
|
) -> Iterator[Path]:
|
207
193
|
"""Iterate over the experiment directories in the root directory."""
|
208
194
|
if isinstance(experiment_names, str):
|
209
195
|
experiment_names = [experiment_names]
|
210
196
|
|
211
|
-
|
212
|
-
for path in root_dir.iterdir():
|
197
|
+
for path in Path(root_dir).iterdir():
|
213
198
|
if predicate_experiment_dir(path, experiment_names):
|
214
199
|
yield path
|
215
200
|
|
216
201
|
|
217
202
|
def iter_run_dirs(
|
203
|
+
root_dir: str | Path,
|
218
204
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
219
|
-
root_dir: str | Path | None = None,
|
220
205
|
) -> Iterator[Path]:
|
221
206
|
"""Iterate over the run directories in the root directory."""
|
222
|
-
for experiment_dir in iter_experiment_dirs(
|
207
|
+
for experiment_dir in iter_experiment_dirs(root_dir, experiment_names):
|
223
208
|
for path in experiment_dir.iterdir():
|
224
209
|
if path.is_dir() and (path / "artifacts").exists():
|
225
210
|
yield path
|
226
211
|
|
227
212
|
|
228
213
|
def iter_artifacts_dirs(
|
214
|
+
root_dir: str | Path,
|
229
215
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
230
|
-
root_dir: str | Path | None = None,
|
231
216
|
) -> Iterator[Path]:
|
232
217
|
"""Iterate over the artifacts directories in the root directory."""
|
233
|
-
for path in iter_run_dirs(
|
218
|
+
for path in iter_run_dirs(root_dir, experiment_names):
|
234
219
|
yield path / "artifacts"
|
235
220
|
|
236
221
|
|
237
222
|
def iter_artifact_paths(
|
223
|
+
root_dir: str | Path,
|
238
224
|
artifact_path: str | Path,
|
239
225
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
240
|
-
root_dir: str | Path | None = None,
|
241
226
|
) -> Iterator[Path]:
|
242
227
|
"""Iterate over the artifact paths in the root directory."""
|
243
|
-
for path in iter_artifacts_dirs(
|
228
|
+
for path in iter_artifacts_dirs(root_dir, experiment_names):
|
244
229
|
yield path / artifact_path
|
hydraflow/executor/job.py
CHANGED
@@ -21,15 +21,20 @@ import importlib
|
|
21
21
|
import shlex
|
22
22
|
import subprocess
|
23
23
|
import sys
|
24
|
-
from
|
25
|
-
from
|
24
|
+
from dataclasses import dataclass
|
25
|
+
from pathlib import Path
|
26
|
+
from subprocess import CompletedProcess
|
27
|
+
from tempfile import NamedTemporaryFile
|
28
|
+
from typing import TYPE_CHECKING, overload
|
26
29
|
|
27
30
|
import ulid
|
28
31
|
|
29
32
|
from .parser import collect, expand
|
30
33
|
|
31
34
|
if TYPE_CHECKING:
|
32
|
-
from collections.abc import Iterator
|
35
|
+
from collections.abc import Callable, Iterable, Iterator
|
36
|
+
from subprocess import CompletedProcess
|
37
|
+
from typing import Any
|
33
38
|
|
34
39
|
from .conf import Job
|
35
40
|
|
@@ -79,90 +84,139 @@ def iter_batches(job: Job) -> Iterator[list[str]]:
|
|
79
84
|
yield ["--multirun", *args, job_name, sweep_dir, *configs]
|
80
85
|
|
81
86
|
|
82
|
-
|
83
|
-
|
87
|
+
@dataclass
|
88
|
+
class Task:
|
89
|
+
"""An executed task."""
|
84
90
|
|
85
|
-
|
91
|
+
args: list[str]
|
92
|
+
total: int
|
93
|
+
completed: int
|
86
94
|
|
87
|
-
1. Shell command mode (job.run): Executes shell commands with the generated
|
88
|
-
arguments
|
89
|
-
2. Python function mode (job.call): Calls a Python function with the generated
|
90
|
-
arguments
|
91
95
|
|
92
|
-
|
93
|
-
|
96
|
+
@dataclass
|
97
|
+
class Run(Task):
|
98
|
+
"""An executed run."""
|
94
99
|
|
95
|
-
|
96
|
-
RuntimeError: If a shell command fails or if a function call encounters
|
97
|
-
an error.
|
98
|
-
ValueError: If the Python function path is invalid or the function cannot
|
99
|
-
be imported.
|
100
|
+
result: CompletedProcess
|
100
101
|
|
101
|
-
"""
|
102
|
-
it = iter_batches(job)
|
103
|
-
|
104
|
-
if job.run:
|
105
|
-
base_cmds = shlex.split(job.run)
|
106
|
-
if base_cmds[0] == "python" and sys.platform == "win32":
|
107
|
-
base_cmds[0] = sys.executable
|
108
|
-
|
109
|
-
for args in it:
|
110
|
-
cmds = [*base_cmds, *args]
|
111
|
-
try:
|
112
|
-
subprocess.run(cmds, check=True)
|
113
|
-
except CalledProcessError as e:
|
114
|
-
msg = f"Command failed with exit code {e.returncode}"
|
115
|
-
raise RuntimeError(msg) from e
|
116
|
-
|
117
|
-
elif job.call:
|
118
|
-
call_name, *base_args = shlex.split(job.call)
|
119
|
-
|
120
|
-
if "." not in call_name:
|
121
|
-
msg = f"Invalid function path: {call_name}."
|
122
|
-
msg += " Expected format: 'package.module.function'"
|
123
|
-
raise ValueError(msg)
|
124
|
-
|
125
|
-
try:
|
126
|
-
module_name, func_name = call_name.rsplit(".", 1)
|
127
|
-
module = importlib.import_module(module_name)
|
128
|
-
func = getattr(module, func_name)
|
129
|
-
except (ImportError, AttributeError, ModuleNotFoundError) as e:
|
130
|
-
msg = f"Failed to import or find function: {call_name}"
|
131
|
-
raise ValueError(msg) from e
|
132
|
-
|
133
|
-
for args in it:
|
134
|
-
try:
|
135
|
-
func([*base_args, *args])
|
136
|
-
except Exception as e: # noqa: PERF203
|
137
|
-
msg = f"Function call '{job.call}' failed with args: {args}"
|
138
|
-
raise RuntimeError(msg) from e
|
139
|
-
|
140
|
-
|
141
|
-
def to_text(job: Job) -> str:
|
142
|
-
"""Convert the job configuration to a string.
|
143
|
-
|
144
|
-
This function returns the job configuration for a given job.
|
145
102
|
|
146
|
-
|
147
|
-
|
103
|
+
@dataclass
|
104
|
+
class Call(Task):
|
105
|
+
"""An executed call."""
|
148
106
|
|
149
|
-
|
150
|
-
str: The job configuration.
|
107
|
+
result: Any
|
151
108
|
|
152
|
-
"""
|
153
|
-
text = ""
|
154
109
|
|
155
|
-
|
110
|
+
@overload
|
111
|
+
def iter_runs(args: list[str], iterable: Iterable[list[str]]) -> Iterator[Run]: ...
|
112
|
+
|
113
|
+
|
114
|
+
@overload
|
115
|
+
def iter_runs(
|
116
|
+
args: list[str],
|
117
|
+
iterable: Iterable[list[str]],
|
118
|
+
*,
|
119
|
+
dry_run: bool = False,
|
120
|
+
) -> Iterator[Task | Run]: ...
|
121
|
+
|
122
|
+
|
123
|
+
def iter_runs(
|
124
|
+
args: list[str],
|
125
|
+
iterable: Iterable[list[str]],
|
126
|
+
*,
|
127
|
+
dry_run: bool = False,
|
128
|
+
) -> Iterator[Task | Run]:
|
129
|
+
"""Execute multiple runs of a job using shell commands."""
|
130
|
+
executable, *args = args
|
131
|
+
if executable == "python" and sys.platform == "win32":
|
132
|
+
executable = sys.executable
|
133
|
+
|
134
|
+
iterable = list(iterable)
|
135
|
+
total = len(iterable)
|
136
|
+
|
137
|
+
for completed, args_ in enumerate(iterable, 1):
|
138
|
+
cmd = [executable, *args, *args_]
|
139
|
+
if dry_run:
|
140
|
+
yield Task(cmd, total, completed)
|
141
|
+
else:
|
142
|
+
result = subprocess.run(cmd, check=False)
|
143
|
+
yield Run(cmd, total, completed, result)
|
144
|
+
|
145
|
+
|
146
|
+
@overload
|
147
|
+
def iter_calls(args: list[str], iterable: Iterable[list[str]]) -> Iterator[Call]: ...
|
148
|
+
|
149
|
+
|
150
|
+
@overload
|
151
|
+
def iter_calls(
|
152
|
+
args: list[str],
|
153
|
+
iterable: Iterable[list[str]],
|
154
|
+
*,
|
155
|
+
dry_run: bool = False,
|
156
|
+
) -> Iterator[Task | Call]: ...
|
157
|
+
|
158
|
+
|
159
|
+
def iter_calls(
|
160
|
+
args: list[str],
|
161
|
+
iterable: Iterable[list[str]],
|
162
|
+
*,
|
163
|
+
dry_run: bool = False,
|
164
|
+
) -> Iterator[Task | Call]:
|
165
|
+
"""Execute multiple calls of a job using Python functions."""
|
166
|
+
funcname, *args = args
|
167
|
+
func = get_callable(funcname)
|
168
|
+
|
169
|
+
iterable = list(iterable)
|
170
|
+
total = len(iterable)
|
156
171
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
172
|
+
for completed, args_ in enumerate(iterable, 1):
|
173
|
+
cmd = [funcname, *args, *args_]
|
174
|
+
if dry_run:
|
175
|
+
yield Task(cmd, total, completed)
|
176
|
+
else:
|
177
|
+
result = func([*args, *args_])
|
178
|
+
yield Call(cmd, total, completed, result)
|
162
179
|
|
163
|
-
elif job.call:
|
164
|
-
text = f"call: {job.call}\n"
|
165
|
-
for args in it:
|
166
|
-
text += f"args: {args}\n"
|
167
180
|
|
168
|
-
|
181
|
+
def submit(
|
182
|
+
args: list[str],
|
183
|
+
iterable: Iterable[list[str]],
|
184
|
+
*,
|
185
|
+
dry_run: bool = False,
|
186
|
+
) -> CompletedProcess | tuple[list[str], str]:
|
187
|
+
"""Submit entire job using a shell command."""
|
188
|
+
executable, *args = args
|
189
|
+
if executable == "python" and sys.platform == "win32":
|
190
|
+
executable = sys.executable
|
191
|
+
|
192
|
+
temp = NamedTemporaryFile(dir=Path.cwd(), delete=False) # for Windows
|
193
|
+
file = Path(temp.name)
|
194
|
+
temp.close()
|
195
|
+
|
196
|
+
text = "\n".join(shlex.join(args) for args in iterable)
|
197
|
+
file.write_text(text)
|
198
|
+
cmd = [executable, *args, file.as_posix()]
|
199
|
+
|
200
|
+
try:
|
201
|
+
if dry_run:
|
202
|
+
return cmd, text
|
203
|
+
return subprocess.run(cmd, check=False)
|
204
|
+
|
205
|
+
finally:
|
206
|
+
file.unlink(missing_ok=True)
|
207
|
+
|
208
|
+
|
209
|
+
def get_callable(name: str) -> Callable:
|
210
|
+
"""Get a callable from a function name."""
|
211
|
+
if "." not in name:
|
212
|
+
msg = f"Invalid function path: {name}."
|
213
|
+
raise ValueError(msg)
|
214
|
+
|
215
|
+
try:
|
216
|
+
module_name, func_name = name.rsplit(".", 1)
|
217
|
+
module = importlib.import_module(module_name)
|
218
|
+
return getattr(module, func_name)
|
219
|
+
|
220
|
+
except (ImportError, AttributeError, ModuleNotFoundError) as e:
|
221
|
+
msg = f"Failed to import or find function: {name}"
|
222
|
+
raise ValueError(msg) from e
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.13.0
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -1,10 +1,10 @@
|
|
1
1
|
hydraflow/__init__.py,sha256=f2KO2iF7um-nNmayNyEr7TWG4UICOXy7YAN1d3qu0OY,936
|
2
|
-
hydraflow/cli.py,sha256=
|
2
|
+
hydraflow/cli.py,sha256=nGFrZeQnn1h7lCbVhjBJwdrcDDGoqYpVFbqqwkLMzng,3385
|
3
3
|
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
hydraflow/core/config.py,sha256=SJzjgsO_kzB78_whJ3lmy7GlZvTvwZONH1BJBn8zCuI,3817
|
6
6
|
hydraflow/core/context.py,sha256=L4OygMLbITwlWzq17Lh8VoXKKtjOJ3DBEVsBddKPSJ8,4741
|
7
|
-
hydraflow/core/io.py,sha256=
|
7
|
+
hydraflow/core/io.py,sha256=Tch85xbdRao7rG9BMbRpc2Cq0glC8a8M87QDoyQ81p8,6926
|
8
8
|
hydraflow/core/main.py,sha256=dY8uUykS_AbzverrSWkXLyj98TjBPHAiMUf_l5met1U,5162
|
9
9
|
hydraflow/core/mlflow.py,sha256=OQJ3f2wkHJRb11ZK__HF4R8FyBEje7-NOqObpoanGhU,5704
|
10
10
|
hydraflow/core/param.py,sha256=LHU9j9_7oA99igasoOyKofKClVr9FmGA3UABJ-KmyS0,4538
|
@@ -15,10 +15,10 @@ hydraflow/entities/run_info.py,sha256=FRC6ICOlzB2u_xi_33Qs-YZLt677UotuNbYqI7XSmH
|
|
15
15
|
hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
hydraflow/executor/conf.py,sha256=2dv6_PlsynRmia-fGZlmBEVt8GopT0f32N13qY7tYnM,402
|
17
17
|
hydraflow/executor/io.py,sha256=yZMcBVmAbPZZ82cAXhgiJfj9p8WvHmzOCMBg_vtEVek,1509
|
18
|
-
hydraflow/executor/job.py,sha256=
|
18
|
+
hydraflow/executor/job.py,sha256=bmjlqE-cE7lyNFFKj1nUhxiQHKf7DsFkCSTD9iTEQ5I,5606
|
19
19
|
hydraflow/executor/parser.py,sha256=_Rfund3FDgrXitTt_znsTpgEtMDqZ_ICynaB_Zje14Q,14561
|
20
|
-
hydraflow-0.
|
21
|
-
hydraflow-0.
|
22
|
-
hydraflow-0.
|
23
|
-
hydraflow-0.
|
24
|
-
hydraflow-0.
|
20
|
+
hydraflow-0.13.0.dist-info/METADATA,sha256=DcBL4IuQHdGIyEXTqFoDGlUdI2sgH0THpTuyFoB3Wg0,4549
|
21
|
+
hydraflow-0.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
22
|
+
hydraflow-0.13.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
23
|
+
hydraflow-0.13.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
24
|
+
hydraflow-0.13.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|