hydraflow 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +7 -4
- hydraflow/asyncio.py +9 -3
- hydraflow/context.py +24 -8
- hydraflow/info.py +57 -4
- hydraflow/mlflow.py +98 -42
- hydraflow/progress.py +117 -46
- hydraflow/run_collection.py +31 -122
- {hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/METADATA +1 -1
- hydraflow-0.2.9.dist-info/RECORD +12 -0
- hydraflow-0.2.7.dist-info/RECORD +0 -12
- {hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
from .context import chdir_artifact, log_run, start_run, watch
|
2
|
-
from .info import load_config
|
3
|
-
from .mlflow import
|
4
|
-
from .run_collection import (
|
5
|
-
RunCollection,
|
2
|
+
from .info import get_artifact_dir, get_hydra_output_dir, load_config
|
3
|
+
from .mlflow import (
|
6
4
|
list_runs,
|
7
5
|
search_runs,
|
6
|
+
set_experiment,
|
8
7
|
)
|
8
|
+
from .progress import multi_tasks_progress, parallel_progress
|
9
|
+
from .run_collection import RunCollection
|
9
10
|
|
10
11
|
__all__ = [
|
11
12
|
"RunCollection",
|
@@ -15,6 +16,8 @@ __all__ = [
|
|
15
16
|
"list_runs",
|
16
17
|
"load_config",
|
17
18
|
"log_run",
|
19
|
+
"multi_tasks_progress",
|
20
|
+
"parallel_progress",
|
18
21
|
"search_runs",
|
19
22
|
"set_experiment",
|
20
23
|
"start_run",
|
hydraflow/asyncio.py
CHANGED
@@ -41,7 +41,9 @@ async def execute_command(
|
|
41
41
|
int: The return code of the process.
|
42
42
|
"""
|
43
43
|
try:
|
44
|
-
process = await asyncio.create_subprocess_exec(
|
44
|
+
process = await asyncio.create_subprocess_exec(
|
45
|
+
program, *args, stdout=PIPE, stderr=PIPE
|
46
|
+
)
|
45
47
|
await asyncio.gather(
|
46
48
|
process_stream(process.stdout, stdout),
|
47
49
|
process_stream(process.stderr, stderr),
|
@@ -100,7 +102,9 @@ async def monitor_file_changes(
|
|
100
102
|
"""
|
101
103
|
str_paths = [str(path) for path in paths]
|
102
104
|
try:
|
103
|
-
async for changes in watchfiles.awatch(
|
105
|
+
async for changes in watchfiles.awatch(
|
106
|
+
*str_paths, stop_event=stop_event, **awatch_kwargs
|
107
|
+
):
|
104
108
|
callback(changes)
|
105
109
|
except Exception as e:
|
106
110
|
logger.error(f"Error watching files: {e}")
|
@@ -129,7 +133,9 @@ async def run_and_monitor(
|
|
129
133
|
"""
|
130
134
|
stop_event = asyncio.Event()
|
131
135
|
run_task = asyncio.create_task(
|
132
|
-
execute_command(
|
136
|
+
execute_command(
|
137
|
+
program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
|
138
|
+
)
|
133
139
|
)
|
134
140
|
if watch and paths:
|
135
141
|
monitor_task = asyncio.create_task(
|
hydraflow/context.py
CHANGED
@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
|
|
14
14
|
|
15
15
|
import mlflow
|
16
16
|
from hydra.core.hydra_config import HydraConfig
|
17
|
-
from watchdog.events import FileModifiedEvent,
|
17
|
+
from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
|
18
18
|
from watchdog.observers import Observer
|
19
19
|
|
20
|
-
from hydraflow.
|
20
|
+
from hydraflow.info import get_artifact_dir
|
21
|
+
from hydraflow.mlflow import log_params
|
21
22
|
|
22
23
|
if TYPE_CHECKING:
|
23
24
|
from collections.abc import Callable, Iterator
|
@@ -68,7 +69,7 @@ def log_run(
|
|
68
69
|
mlflow.log_artifact(local_path)
|
69
70
|
|
70
71
|
try:
|
71
|
-
with watch(log_artifact, output_dir):
|
72
|
+
with watch(log_artifact, output_dir, ignore_log=False):
|
72
73
|
yield
|
73
74
|
|
74
75
|
except Exception as e:
|
@@ -140,9 +141,11 @@ def start_run(
|
|
140
141
|
|
141
142
|
@contextmanager
|
142
143
|
def watch(
|
143
|
-
|
144
|
+
callback: Callable[[Path], None],
|
144
145
|
dir: Path | str = "",
|
145
146
|
timeout: int = 60,
|
147
|
+
ignore_patterns: list[str] | None = None,
|
148
|
+
ignore_log: bool = True,
|
146
149
|
) -> Iterator[None]:
|
147
150
|
"""
|
148
151
|
Watch the given directory for changes and call the provided function
|
@@ -154,7 +157,7 @@ def watch(
|
|
154
157
|
period or until the context is exited.
|
155
158
|
|
156
159
|
Args:
|
157
|
-
|
160
|
+
callback (Callable[[Path], None]): The function to call when a change is
|
158
161
|
detected. It should accept a single argument of type `Path`,
|
159
162
|
which is the path of the modified file.
|
160
163
|
dir (Path | str): The directory to watch. If not specified,
|
@@ -174,7 +177,7 @@ def watch(
|
|
174
177
|
if isinstance(dir, Path):
|
175
178
|
dir = dir.as_posix()
|
176
179
|
|
177
|
-
handler = Handler(
|
180
|
+
handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
|
178
181
|
observer = Observer()
|
179
182
|
observer.schedule(handler, dir, recursive=True)
|
180
183
|
observer.start()
|
@@ -198,10 +201,23 @@ def watch(
|
|
198
201
|
observer.join()
|
199
202
|
|
200
203
|
|
201
|
-
class Handler(
|
202
|
-
def __init__(
|
204
|
+
class Handler(PatternMatchingEventHandler):
|
205
|
+
def __init__(
|
206
|
+
self,
|
207
|
+
func: Callable[[Path], None],
|
208
|
+
ignore_patterns: list[str] | None = None,
|
209
|
+
ignore_log: bool = True,
|
210
|
+
) -> None:
|
203
211
|
self.func = func
|
204
212
|
|
213
|
+
if ignore_log:
|
214
|
+
if ignore_patterns:
|
215
|
+
ignore_patterns.append("*.log")
|
216
|
+
else:
|
217
|
+
ignore_patterns = ["*.log"]
|
218
|
+
|
219
|
+
super().__init__(ignore_patterns=ignore_patterns)
|
220
|
+
|
205
221
|
def on_modified(self, event: FileModifiedEvent) -> None:
|
206
222
|
file = Path(str(event.src_path))
|
207
223
|
if file.is_file():
|
hydraflow/info.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from pathlib import Path
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
6
|
+
import mlflow
|
7
|
+
from hydra.core.hydra_config import HydraConfig
|
8
|
+
from mlflow.tracking import artifact_utils
|
5
9
|
from omegaconf import DictConfig, OmegaConf
|
6
10
|
|
7
|
-
from hydraflow.mlflow import get_artifact_dir
|
8
|
-
|
9
11
|
if TYPE_CHECKING:
|
10
|
-
from pathlib import Path
|
11
|
-
|
12
12
|
from mlflow.entities import Run
|
13
13
|
|
14
14
|
from hydraflow.run_collection import RunCollection
|
@@ -43,6 +43,59 @@ class RunCollectionInfo:
|
|
43
43
|
return [load_config(run) for run in self._runs]
|
44
44
|
|
45
45
|
|
46
|
+
def get_artifact_dir(run: Run | None = None) -> Path:
|
47
|
+
"""
|
48
|
+
Retrieve the artifact directory for the given run.
|
49
|
+
|
50
|
+
This function uses MLflow to get the artifact directory for the given run.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
run (Run | None): The run object. Defaults to None.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
The local path to the directory where the artifacts are downloaded.
|
57
|
+
"""
|
58
|
+
if run is None:
|
59
|
+
uri = mlflow.get_artifact_uri()
|
60
|
+
else:
|
61
|
+
uri = artifact_utils.get_artifact_uri(run.info.run_id)
|
62
|
+
|
63
|
+
return Path(mlflow.artifacts.download_artifacts(uri))
|
64
|
+
|
65
|
+
|
66
|
+
def get_hydra_output_dir(run: Run | None = None) -> Path:
|
67
|
+
"""
|
68
|
+
Retrieve the Hydra output directory for the given run.
|
69
|
+
|
70
|
+
This function returns the Hydra output directory. If no run is provided,
|
71
|
+
it retrieves the output directory from the current Hydra configuration.
|
72
|
+
If a run is provided, it retrieves the artifact path for the run, loads
|
73
|
+
the Hydra configuration from the downloaded artifacts, and returns the
|
74
|
+
output directory specified in that configuration.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
run (Run | None): The run object. Defaults to None.
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
Path: The path to the Hydra output directory.
|
81
|
+
|
82
|
+
Raises:
|
83
|
+
FileNotFoundError: If the Hydra configuration file is not found
|
84
|
+
in the artifacts.
|
85
|
+
"""
|
86
|
+
if run is None:
|
87
|
+
hc = HydraConfig.get()
|
88
|
+
return Path(hc.runtime.output_dir)
|
89
|
+
|
90
|
+
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
91
|
+
|
92
|
+
if path.exists():
|
93
|
+
hc = OmegaConf.load(path)
|
94
|
+
return Path(hc.hydra.runtime.output_dir)
|
95
|
+
|
96
|
+
raise FileNotFoundError
|
97
|
+
|
98
|
+
|
46
99
|
def load_config(run: Run) -> DictConfig:
|
47
100
|
"""
|
48
101
|
Load the configuration for a given run.
|
hydraflow/mlflow.py
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
"""
|
2
|
-
This module provides functionality to log parameters from Hydra
|
3
|
-
|
2
|
+
This module provides functionality to log parameters from Hydra configuration objects
|
3
|
+
and set up experiments using MLflow. It includes methods for managing experiments,
|
4
|
+
searching for runs, and logging parameters and artifacts.
|
5
|
+
|
6
|
+
Key Features:
|
7
|
+
- **Experiment Management**: Set and manage MLflow experiments with customizable names
|
8
|
+
based on Hydra configuration.
|
9
|
+
- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
|
10
|
+
MLflow, ensuring that all relevant information is captured during experiments.
|
11
|
+
- **Run Search**: Search for runs based on various criteria, allowing for flexible
|
12
|
+
retrieval of experiment results.
|
13
|
+
- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
|
14
|
+
easy access to outputs generated during experiments.
|
15
|
+
|
16
|
+
This module is designed to integrate seamlessly with Hydra, providing a robust
|
17
|
+
solution for tracking machine learning experiments and their associated metadata.
|
4
18
|
"""
|
5
19
|
|
6
20
|
from __future__ import annotations
|
@@ -10,14 +24,14 @@ from typing import TYPE_CHECKING
|
|
10
24
|
|
11
25
|
import mlflow
|
12
26
|
from hydra.core.hydra_config import HydraConfig
|
13
|
-
from mlflow.
|
14
|
-
from
|
27
|
+
from mlflow.entities import ViewType
|
28
|
+
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
|
15
29
|
|
16
30
|
from hydraflow.config import iter_params
|
31
|
+
from hydraflow.run_collection import RunCollection
|
17
32
|
|
18
33
|
if TYPE_CHECKING:
|
19
34
|
from mlflow.entities.experiment import Experiment
|
20
|
-
from mlflow.entities.run import Run
|
21
35
|
|
22
36
|
|
23
37
|
def set_experiment(
|
@@ -26,7 +40,7 @@ def set_experiment(
|
|
26
40
|
uri: str | Path | None = None,
|
27
41
|
) -> Experiment:
|
28
42
|
"""
|
29
|
-
|
43
|
+
Sets the experiment name and tracking URI optionally.
|
30
44
|
|
31
45
|
This function sets the experiment name by combining the given prefix,
|
32
46
|
the job name from HydraConfig, and the given suffix. Optionally, it can
|
@@ -66,54 +80,96 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
|
66
80
|
mlflow.log_param(key, value, synchronous=synchronous)
|
67
81
|
|
68
82
|
|
69
|
-
def
|
83
|
+
def search_runs(
|
84
|
+
experiment_ids: list[str] | None = None,
|
85
|
+
filter_string: str = "",
|
86
|
+
run_view_type: int = ViewType.ACTIVE_ONLY,
|
87
|
+
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
88
|
+
order_by: list[str] | None = None,
|
89
|
+
search_all_experiments: bool = False,
|
90
|
+
experiment_names: list[str] | None = None,
|
91
|
+
) -> RunCollection:
|
70
92
|
"""
|
71
|
-
|
93
|
+
Search for Runs that fit the specified criteria.
|
72
94
|
|
73
|
-
This function
|
95
|
+
This function wraps the `mlflow.search_runs` function and returns the
|
96
|
+
results as a `RunCollection` object. It allows for flexible searching of
|
97
|
+
MLflow runs based on various criteria.
|
98
|
+
|
99
|
+
Note:
|
100
|
+
The returned runs are sorted by their start time in ascending order.
|
74
101
|
|
75
102
|
Args:
|
76
|
-
|
103
|
+
experiment_ids (list[str] | None): List of experiment IDs. Search can
|
104
|
+
work with experiment IDs or experiment names, but not both in the
|
105
|
+
same call. Values other than ``None`` or ``[]`` will result in
|
106
|
+
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
107
|
+
``None`` will default to the active experiment if ``experiment_names``
|
108
|
+
is ``None`` or ``[]``.
|
109
|
+
filter_string (str): Filter query string, defaults to searching all
|
110
|
+
runs.
|
111
|
+
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
112
|
+
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
113
|
+
max_results (int): The maximum number of runs to put in the dataframe.
|
114
|
+
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
115
|
+
machine.
|
116
|
+
order_by (list[str] | None): List of columns to order by (e.g.,
|
117
|
+
"metrics.rmse"). The ``order_by`` column can contain an optional
|
118
|
+
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
119
|
+
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
120
|
+
``start_time DESC``, then ``run_id``.
|
121
|
+
search_all_experiments (bool): Boolean specifying whether all
|
122
|
+
experiments should be searched. Only honored if ``experiment_ids``
|
123
|
+
is ``[]`` or ``None``.
|
124
|
+
experiment_names (list[str] | None): List of experiment names. Search
|
125
|
+
can work with experiment IDs or experiment names, but not both in
|
126
|
+
the same call. Values other than ``None`` or ``[]`` will result in
|
127
|
+
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
128
|
+
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
129
|
+
default to the active experiment if ``experiment_ids`` is ``None``
|
130
|
+
or ``[]``.
|
77
131
|
|
78
132
|
Returns:
|
79
|
-
|
133
|
+
A `RunCollection` object containing the search results.
|
80
134
|
"""
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
135
|
+
runs = mlflow.search_runs(
|
136
|
+
experiment_ids=experiment_ids,
|
137
|
+
filter_string=filter_string,
|
138
|
+
run_view_type=run_view_type,
|
139
|
+
max_results=max_results,
|
140
|
+
order_by=order_by,
|
141
|
+
output_format="list",
|
142
|
+
search_all_experiments=search_all_experiments,
|
143
|
+
experiment_names=experiment_names,
|
144
|
+
)
|
145
|
+
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
146
|
+
return RunCollection(runs) # type: ignore
|
147
|
+
|
148
|
+
|
149
|
+
def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
|
90
150
|
"""
|
91
|
-
|
151
|
+
List all runs for the specified experiments.
|
92
152
|
|
93
|
-
This function
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
153
|
+
This function retrieves all runs for the given list of experiment names.
|
154
|
+
If no experiment names are provided (None), it defaults to searching all runs
|
155
|
+
for the currently active experiment. If an empty list is provided, the function
|
156
|
+
will search all runs for all experiments except the "Default" experiment.
|
157
|
+
The function returns the results as a `RunCollection` object.
|
158
|
+
|
159
|
+
Note:
|
160
|
+
The returned runs are sorted by their start time in ascending order.
|
98
161
|
|
99
162
|
Args:
|
100
|
-
|
163
|
+
experiment_names (list[str] | None): List of experiment names to search
|
164
|
+
for runs. If None or an empty list is provided, the function will
|
165
|
+
search the currently active experiment or all experiments except
|
166
|
+
the "Default" experiment.
|
101
167
|
|
102
168
|
Returns:
|
103
|
-
|
104
|
-
|
105
|
-
Raises:
|
106
|
-
FileNotFoundError: If the Hydra configuration file is not found
|
107
|
-
in the artifacts.
|
169
|
+
A `RunCollection` object containing the runs for the specified experiments.
|
108
170
|
"""
|
109
|
-
if
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
114
|
-
|
115
|
-
if path.exists():
|
116
|
-
hc = OmegaConf.load(path)
|
117
|
-
return Path(hc.hydra.runtime.output_dir)
|
171
|
+
if experiment_names == []:
|
172
|
+
experiments = mlflow.search_experiments()
|
173
|
+
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
118
174
|
|
119
|
-
|
175
|
+
return search_runs(experiment_names=experiment_names)
|
hydraflow/progress.py
CHANGED
@@ -1,17 +1,129 @@
|
|
1
|
+
"""
|
2
|
+
Module for managing progress tracking in parallel processing using Joblib
|
3
|
+
and Rich's Progress bar.
|
4
|
+
|
5
|
+
Provide context managers and functions to facilitate the execution
|
6
|
+
of tasks in parallel while displaying progress updates.
|
7
|
+
|
8
|
+
The following key components are provided:
|
9
|
+
|
10
|
+
- JoblibProgress: A context manager for tracking progress with Rich's Progress
|
11
|
+
bar.
|
12
|
+
- parallel_progress: A function to execute a given function in parallel over
|
13
|
+
an iterable with progress tracking.
|
14
|
+
- multi_tasks_progress: A function to render auto-updating progress bars for
|
15
|
+
multiple tasks concurrently.
|
16
|
+
|
17
|
+
Usage:
|
18
|
+
Import the necessary functions and use them to manage progress in your
|
19
|
+
parallel processing tasks.
|
20
|
+
"""
|
21
|
+
|
1
22
|
from __future__ import annotations
|
2
23
|
|
3
|
-
from
|
24
|
+
from contextlib import contextmanager
|
25
|
+
from typing import TYPE_CHECKING, TypeVar
|
4
26
|
|
5
27
|
import joblib
|
6
28
|
from rich.progress import Progress
|
7
29
|
|
8
30
|
if TYPE_CHECKING:
|
9
|
-
from collections.abc import Iterable
|
31
|
+
from collections.abc import Callable, Iterable, Iterator
|
10
32
|
|
11
33
|
from rich.progress import ProgressColumn
|
12
34
|
|
13
35
|
|
14
|
-
|
36
|
+
# https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
|
37
|
+
@contextmanager
|
38
|
+
def JoblibProgress(
|
39
|
+
*columns: ProgressColumn | str,
|
40
|
+
description: str | None = None,
|
41
|
+
total: int | None = None,
|
42
|
+
**kwargs,
|
43
|
+
) -> Iterator[Progress]:
|
44
|
+
"""
|
45
|
+
Context manager for tracking progress using Joblib with Rich's Progress bar.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
*columns (ProgressColumn | str): Columns to display in the progress bar.
|
49
|
+
description (str | None, optional): A description for the progress task.
|
50
|
+
Defaults to None.
|
51
|
+
total (int | None, optional): The total number of tasks. If None, it will
|
52
|
+
be determined automatically.
|
53
|
+
**kwargs: Additional keyword arguments passed to the Progress instance.
|
54
|
+
|
55
|
+
Yields:
|
56
|
+
Progress: A Progress instance for managing the progress bar.
|
57
|
+
|
58
|
+
Example:
|
59
|
+
with JoblibProgress("task", total=100) as progress:
|
60
|
+
# Your parallel processing code here
|
61
|
+
"""
|
62
|
+
if not columns:
|
63
|
+
columns = Progress.get_default_columns()
|
64
|
+
|
65
|
+
progress = Progress(*columns, **kwargs)
|
66
|
+
|
67
|
+
if description is None:
|
68
|
+
description = "Processing..."
|
69
|
+
|
70
|
+
task_id = progress.add_task(description, total=total)
|
71
|
+
print_progress = joblib.parallel.Parallel.print_progress
|
72
|
+
|
73
|
+
def update_progress(self: joblib.parallel.Parallel):
|
74
|
+
progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
|
75
|
+
return print_progress(self)
|
76
|
+
|
77
|
+
try:
|
78
|
+
joblib.parallel.Parallel.print_progress = update_progress
|
79
|
+
progress.start()
|
80
|
+
yield progress
|
81
|
+
|
82
|
+
finally:
|
83
|
+
progress.stop()
|
84
|
+
joblib.parallel.Parallel.print_progress = print_progress
|
85
|
+
|
86
|
+
|
87
|
+
T = TypeVar("T")
|
88
|
+
U = TypeVar("U")
|
89
|
+
|
90
|
+
|
91
|
+
def parallel_progress(
|
92
|
+
func: Callable[[T], U],
|
93
|
+
iterable: Iterable[T],
|
94
|
+
*columns: ProgressColumn | str,
|
95
|
+
n_jobs: int = -1,
|
96
|
+
description: str | None = None,
|
97
|
+
**kwargs,
|
98
|
+
) -> list[U]:
|
99
|
+
"""
|
100
|
+
Execute a function in parallel over an iterable with progress tracking.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
func (Callable[[T], U]): The function to execute on each item in the
|
104
|
+
iterable.
|
105
|
+
iterable (Iterable[T]): An iterable of items to process.
|
106
|
+
*columns (ProgressColumn | str): Additional columns to display in the
|
107
|
+
progress bar.
|
108
|
+
n_jobs (int, optional): The number of jobs to run in parallel.
|
109
|
+
Defaults to -1 (all processors).
|
110
|
+
description (str | None, optional): A description for the progress bar.
|
111
|
+
Defaults to None.
|
112
|
+
**kwargs: Additional keyword arguments passed to the Progress instance.
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
list[U]: A list of results from applying the function to each item in
|
116
|
+
the iterable.
|
117
|
+
"""
|
118
|
+
iterable = list(iterable)
|
119
|
+
total = len(iterable)
|
120
|
+
|
121
|
+
with JoblibProgress(*columns, description=description, total=total, **kwargs):
|
122
|
+
it = (joblib.delayed(func)(x) for x in iterable)
|
123
|
+
return joblib.Parallel(n_jobs=n_jobs)(it) # type: ignore
|
124
|
+
|
125
|
+
|
126
|
+
def multi_tasks_progress(
|
15
127
|
iterables: Iterable[Iterable[int | tuple[int, int]]],
|
16
128
|
*columns: ProgressColumn | str,
|
17
129
|
n_jobs: int = -1,
|
@@ -52,7 +164,8 @@ def multi_task_progress(
|
|
52
164
|
|
53
165
|
task_main = progress.add_task(main_description, total=None) if n > 1 else None
|
54
166
|
tasks = [
|
55
|
-
progress.add_task(description.format(i), start=False, total=None)
|
167
|
+
progress.add_task(description.format(i), start=False, total=None)
|
168
|
+
for i in range(n)
|
56
169
|
]
|
57
170
|
|
58
171
|
total = {}
|
@@ -87,45 +200,3 @@ def multi_task_progress(
|
|
87
200
|
|
88
201
|
else:
|
89
202
|
func(0)
|
90
|
-
|
91
|
-
|
92
|
-
if __name__ == "__main__":
|
93
|
-
import random
|
94
|
-
import time
|
95
|
-
|
96
|
-
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
|
97
|
-
|
98
|
-
from hydraflow.progress import multi_task_progress
|
99
|
-
|
100
|
-
def task(total):
|
101
|
-
for i in range(total or 90):
|
102
|
-
if total is None:
|
103
|
-
yield i
|
104
|
-
else:
|
105
|
-
yield i, total
|
106
|
-
time.sleep(random.random() / 30)
|
107
|
-
|
108
|
-
def multi_task_progress_test(unknown_total: bool):
|
109
|
-
tasks = [task(random.randint(80, 100)) for _ in range(4)]
|
110
|
-
if unknown_total:
|
111
|
-
tasks = [task(None), *tasks, task(None)]
|
112
|
-
|
113
|
-
columns = [
|
114
|
-
SpinnerColumn(),
|
115
|
-
*Progress.get_default_columns(),
|
116
|
-
MofNCompleteColumn(),
|
117
|
-
TimeElapsedColumn(),
|
118
|
-
]
|
119
|
-
|
120
|
-
kwargs = {}
|
121
|
-
if unknown_total:
|
122
|
-
kwargs["main_description"] = "unknown"
|
123
|
-
|
124
|
-
multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
|
125
|
-
|
126
|
-
multi_task_progress_test(False)
|
127
|
-
multi_task_progress_test(True)
|
128
|
-
multi_task_progress([task(100)])
|
129
|
-
multi_task_progress([task(None)], description="unknown")
|
130
|
-
multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
|
131
|
-
multi_task_progress([task(100)], description="transient", transient=True)
|
hydraflow/run_collection.py
CHANGED
@@ -1,7 +1,24 @@
|
|
1
1
|
"""
|
2
|
-
This module provides functionality for managing and interacting with MLflow
|
3
|
-
|
4
|
-
|
2
|
+
This module provides functionality for managing and interacting with MLflow runs.
|
3
|
+
It includes the `RunCollection` class, which serves as a container for multiple MLflow
|
4
|
+
run objects, and various methods to filter, retrieve, and manipulate these runs.
|
5
|
+
|
6
|
+
Key Features:
|
7
|
+
- **Run Management**: The `RunCollection` class allows for easy management of multiple
|
8
|
+
MLflow runs, providing methods to access, filter, and sort runs based on various
|
9
|
+
criteria.
|
10
|
+
- **Filtering**: The module supports filtering runs based on specific configurations
|
11
|
+
and parameters, enabling users to easily find runs that match certain conditions.
|
12
|
+
- **Retrieval**: Users can retrieve specific runs, including the first, last, or any
|
13
|
+
run that matches a given configuration.
|
14
|
+
- **Artifact Handling**: The module provides methods to access and manipulate the
|
15
|
+
artifacts associated with each run, including retrieving artifact URIs and directories.
|
16
|
+
|
17
|
+
The `RunCollection` class is designed to work seamlessly with the MLflow tracking
|
18
|
+
API, providing a robust solution for managing machine learning experiment runs and
|
19
|
+
their associated metadata. This module is particularly useful for data scientists and
|
20
|
+
machine learning engineers who need to track and analyze the results of their experiments
|
21
|
+
efficiently.
|
5
22
|
"""
|
6
23
|
|
7
24
|
from __future__ import annotations
|
@@ -10,10 +27,7 @@ from dataclasses import dataclass, field
|
|
10
27
|
from itertools import chain
|
11
28
|
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
|
12
29
|
|
13
|
-
import mlflow
|
14
|
-
from mlflow.entities import ViewType
|
15
30
|
from mlflow.entities.run import Run
|
16
|
-
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
|
17
31
|
|
18
32
|
from hydraflow.config import iter_params
|
19
33
|
from hydraflow.info import RunCollectionInfo
|
@@ -26,101 +40,6 @@ if TYPE_CHECKING:
|
|
26
40
|
from omegaconf import DictConfig
|
27
41
|
|
28
42
|
|
29
|
-
def search_runs(
|
30
|
-
experiment_ids: list[str] | None = None,
|
31
|
-
filter_string: str = "",
|
32
|
-
run_view_type: int = ViewType.ACTIVE_ONLY,
|
33
|
-
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
34
|
-
order_by: list[str] | None = None,
|
35
|
-
search_all_experiments: bool = False,
|
36
|
-
experiment_names: list[str] | None = None,
|
37
|
-
) -> RunCollection:
|
38
|
-
"""
|
39
|
-
Search for Runs that fit the specified criteria.
|
40
|
-
|
41
|
-
This function wraps the `mlflow.search_runs` function and returns the
|
42
|
-
results as a `RunCollection` object. It allows for flexible searching of
|
43
|
-
MLflow runs based on various criteria.
|
44
|
-
|
45
|
-
Note:
|
46
|
-
The returned runs are sorted by their start time in ascending order.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
experiment_ids (list[str] | None): List of experiment IDs. Search can
|
50
|
-
work with experiment IDs or experiment names, but not both in the
|
51
|
-
same call. Values other than ``None`` or ``[]`` will result in
|
52
|
-
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
53
|
-
``None`` will default to the active experiment if ``experiment_names``
|
54
|
-
is ``None`` or ``[]``.
|
55
|
-
filter_string (str): Filter query string, defaults to searching all
|
56
|
-
runs.
|
57
|
-
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
58
|
-
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
59
|
-
max_results (int): The maximum number of runs to put in the dataframe.
|
60
|
-
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
61
|
-
machine.
|
62
|
-
order_by (list[str] | None): List of columns to order by (e.g.,
|
63
|
-
"metrics.rmse"). The ``order_by`` column can contain an optional
|
64
|
-
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
65
|
-
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
66
|
-
``start_time DESC``, then ``run_id``.
|
67
|
-
search_all_experiments (bool): Boolean specifying whether all
|
68
|
-
experiments should be searched. Only honored if ``experiment_ids``
|
69
|
-
is ``[]`` or ``None``.
|
70
|
-
experiment_names (list[str] | None): List of experiment names. Search
|
71
|
-
can work with experiment IDs or experiment names, but not both in
|
72
|
-
the same call. Values other than ``None`` or ``[]`` will result in
|
73
|
-
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
74
|
-
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
75
|
-
default to the active experiment if ``experiment_ids`` is ``None``
|
76
|
-
or ``[]``.
|
77
|
-
|
78
|
-
Returns:
|
79
|
-
A `RunCollection` object containing the search results.
|
80
|
-
"""
|
81
|
-
runs = mlflow.search_runs(
|
82
|
-
experiment_ids=experiment_ids,
|
83
|
-
filter_string=filter_string,
|
84
|
-
run_view_type=run_view_type,
|
85
|
-
max_results=max_results,
|
86
|
-
order_by=order_by,
|
87
|
-
output_format="list",
|
88
|
-
search_all_experiments=search_all_experiments,
|
89
|
-
experiment_names=experiment_names,
|
90
|
-
)
|
91
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
92
|
-
return RunCollection(runs) # type: ignore
|
93
|
-
|
94
|
-
|
95
|
-
def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
|
96
|
-
"""
|
97
|
-
List all runs for the specified experiments.
|
98
|
-
|
99
|
-
This function retrieves all runs for the given list of experiment names.
|
100
|
-
If no experiment names are provided (None), it defaults to searching all runs
|
101
|
-
for the currently active experiment. If an empty list is provided, the function
|
102
|
-
will search all runs for all experiments except the "Default" experiment.
|
103
|
-
The function returns the results as a `RunCollection` object.
|
104
|
-
|
105
|
-
Note:
|
106
|
-
The returned runs are sorted by their start time in ascending order.
|
107
|
-
|
108
|
-
Args:
|
109
|
-
experiment_names (list[str] | None): List of experiment names to search
|
110
|
-
for runs. If None or an empty list is provided, the function will
|
111
|
-
search the currently active experiment or all experiments except
|
112
|
-
the "Default" experiment.
|
113
|
-
|
114
|
-
Returns:
|
115
|
-
A `RunCollection` object containing the runs for the specified experiments.
|
116
|
-
"""
|
117
|
-
if experiment_names == []:
|
118
|
-
experiments = mlflow.search_experiments()
|
119
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
120
|
-
|
121
|
-
return search_runs(experiment_names=experiment_names)
|
122
|
-
|
123
|
-
|
124
43
|
T = TypeVar("T")
|
125
44
|
P = ParamSpec("P")
|
126
45
|
|
@@ -132,6 +51,11 @@ class RunCollection:
|
|
132
51
|
|
133
52
|
This class provides methods to interact with the runs, such as filtering,
|
134
53
|
retrieving specific runs, and accessing run information.
|
54
|
+
|
55
|
+
Key Features:
|
56
|
+
- Filtering: Easily filter runs based on various criteria.
|
57
|
+
- Retrieval: Access specific runs by index or through methods.
|
58
|
+
- Metadata: Access run metadata and associated information.
|
135
59
|
"""
|
136
60
|
|
137
61
|
_runs: list[Run]
|
@@ -544,7 +468,9 @@ class RunCollection:
|
|
544
468
|
"""
|
545
469
|
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
|
546
470
|
|
547
|
-
def group_by(
|
471
|
+
def group_by(
|
472
|
+
self, *names: str | list[str]
|
473
|
+
) -> dict[tuple[str | None, ...], RunCollection]:
|
548
474
|
"""
|
549
475
|
Group runs by specified parameter names.
|
550
476
|
|
@@ -569,25 +495,6 @@ class RunCollection:
|
|
569
495
|
|
570
496
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
571
497
|
|
572
|
-
def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
|
573
|
-
"""
|
574
|
-
Group runs by specified parameter names.
|
575
|
-
|
576
|
-
This method groups the runs in the collection based on the values of the
|
577
|
-
specified parameters. Each unique combination of parameter values will
|
578
|
-
form a separate RunCollection in the returned list.
|
579
|
-
|
580
|
-
Args:
|
581
|
-
*names (str | list[str]): The names of the parameters to group by.
|
582
|
-
This can be a single parameter name or multiple names provided
|
583
|
-
as separate arguments or as a list.
|
584
|
-
|
585
|
-
Returns:
|
586
|
-
list[RunCollection]: A list of RunCollection objects, where each
|
587
|
-
object contains runs that match the specified parameter values.
|
588
|
-
"""
|
589
|
-
return list(self.group_by(*names).values())
|
590
|
-
|
591
498
|
|
592
499
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
593
500
|
"""
|
@@ -747,7 +654,9 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
|
|
747
654
|
return filtered_runs[-1]
|
748
655
|
|
749
656
|
|
750
|
-
def try_find_last_run(
|
657
|
+
def try_find_last_run(
|
658
|
+
runs: list[Run], config: object | None = None, **kwargs
|
659
|
+
) -> Run | None:
|
751
660
|
"""
|
752
661
|
Find the last run based on the provided configuration.
|
753
662
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -0,0 +1,12 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
|
2
|
+
hydraflow/asyncio.py,sha256=jdXuEFC6f7L_Dq6beASFZPQSvCnGimVxU-PRFsNc5U0,6241
|
3
|
+
hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
|
4
|
+
hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
|
5
|
+
hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
|
6
|
+
hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
|
7
|
+
hydraflow/progress.py,sha256=UIIKlweji3L0uRi4hZ_DrtRcnayHPlsMoug7hVEKq8k,6753
|
8
|
+
hydraflow/run_collection.py,sha256=V5lGdGHYgsSpBOYGaVEL1mpKJvdiEshBL0KmmZ8qeZo,29161
|
9
|
+
hydraflow-0.2.9.dist-info/METADATA,sha256=ZjJQz_4MogGkcs16dOwnsp_J0icg9ypgQdXOYxVdxJg,4181
|
10
|
+
hydraflow-0.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
11
|
+
hydraflow-0.2.9.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
12
|
+
hydraflow-0.2.9.dist-info/RECORD,,
|
hydraflow-0.2.7.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=ObIv7fGbNsqUhZf3sst-9pbgyFsJr6jVsNV10NmMQas,483
|
2
|
-
hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
|
3
|
-
hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
|
4
|
-
hydraflow/context.py,sha256=8Qn99yCSkCarDDthQ6hjgW80CBBIg0H7fnLvtw4ZXo8,7248
|
5
|
-
hydraflow/info.py,sha256=LziP71wQ-tDQPMUPFV_6JExBU8r-Ja-O05F07b_RUcc,1812
|
6
|
-
hydraflow/mlflow.py,sha256=USd51C5YFlk4Bjhs4F1PMakxDxjD6Nn2t4GhL6aZ6QQ,3647
|
7
|
-
hydraflow/progress.py,sha256=0GJfKnnY_SAHVWpGvLdgOBsogGs8vVofjLuphuUEy2g,4296
|
8
|
-
hydraflow/run_collection.py,sha256=NO_QEwIwxU0EouKCJ4HAhXd35uJrxqolI7vM5QfsNxw,33152
|
9
|
-
hydraflow-0.2.7.dist-info/METADATA,sha256=_kqK5pFLntvmiFIc1UBWOzDSRMeerXDZ0ZozhlTMkSw,4181
|
10
|
-
hydraflow-0.2.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
11
|
-
hydraflow-0.2.7.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
12
|
-
hydraflow-0.2.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|