hydraflow 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow/__init__.py +3 -0
- hydraflow/asyncio.py +9 -3
- hydraflow/progress.py +117 -46
- hydraflow/run_collection.py +6 -21
- {hydraflow-0.2.8.dist-info → hydraflow-0.2.9.dist-info}/METADATA +1 -1
- hydraflow-0.2.9.dist-info/RECORD +12 -0
- hydraflow-0.2.8.dist-info/RECORD +0 -12
- {hydraflow-0.2.8.dist-info → hydraflow-0.2.9.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.8.dist-info → hydraflow-0.2.9.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -5,6 +5,7 @@ from .mlflow import (
|
|
5
5
|
search_runs,
|
6
6
|
set_experiment,
|
7
7
|
)
|
8
|
+
from .progress import multi_tasks_progress, parallel_progress
|
8
9
|
from .run_collection import RunCollection
|
9
10
|
|
10
11
|
__all__ = [
|
@@ -15,6 +16,8 @@ __all__ = [
|
|
15
16
|
"list_runs",
|
16
17
|
"load_config",
|
17
18
|
"log_run",
|
19
|
+
"multi_tasks_progress",
|
20
|
+
"parallel_progress",
|
18
21
|
"search_runs",
|
19
22
|
"set_experiment",
|
20
23
|
"start_run",
|
hydraflow/asyncio.py
CHANGED
@@ -41,7 +41,9 @@ async def execute_command(
|
|
41
41
|
int: The return code of the process.
|
42
42
|
"""
|
43
43
|
try:
|
44
|
-
process = await asyncio.create_subprocess_exec(
|
44
|
+
process = await asyncio.create_subprocess_exec(
|
45
|
+
program, *args, stdout=PIPE, stderr=PIPE
|
46
|
+
)
|
45
47
|
await asyncio.gather(
|
46
48
|
process_stream(process.stdout, stdout),
|
47
49
|
process_stream(process.stderr, stderr),
|
@@ -100,7 +102,9 @@ async def monitor_file_changes(
|
|
100
102
|
"""
|
101
103
|
str_paths = [str(path) for path in paths]
|
102
104
|
try:
|
103
|
-
async for changes in watchfiles.awatch(
|
105
|
+
async for changes in watchfiles.awatch(
|
106
|
+
*str_paths, stop_event=stop_event, **awatch_kwargs
|
107
|
+
):
|
104
108
|
callback(changes)
|
105
109
|
except Exception as e:
|
106
110
|
logger.error(f"Error watching files: {e}")
|
@@ -129,7 +133,9 @@ async def run_and_monitor(
|
|
129
133
|
"""
|
130
134
|
stop_event = asyncio.Event()
|
131
135
|
run_task = asyncio.create_task(
|
132
|
-
execute_command(
|
136
|
+
execute_command(
|
137
|
+
program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
|
138
|
+
)
|
133
139
|
)
|
134
140
|
if watch and paths:
|
135
141
|
monitor_task = asyncio.create_task(
|
hydraflow/progress.py
CHANGED
@@ -1,17 +1,129 @@
|
|
1
|
+
"""
|
2
|
+
Module for managing progress tracking in parallel processing using Joblib
|
3
|
+
and Rich's Progress bar.
|
4
|
+
|
5
|
+
Provide context managers and functions to facilitate the execution
|
6
|
+
of tasks in parallel while displaying progress updates.
|
7
|
+
|
8
|
+
The following key components are provided:
|
9
|
+
|
10
|
+
- JoblibProgress: A context manager for tracking progress with Rich's Progress
|
11
|
+
bar.
|
12
|
+
- parallel_progress: A function to execute a given function in parallel over
|
13
|
+
an iterable with progress tracking.
|
14
|
+
- multi_tasks_progress: A function to render auto-updating progress bars for
|
15
|
+
multiple tasks concurrently.
|
16
|
+
|
17
|
+
Usage:
|
18
|
+
Import the necessary functions and use them to manage progress in your
|
19
|
+
parallel processing tasks.
|
20
|
+
"""
|
21
|
+
|
1
22
|
from __future__ import annotations
|
2
23
|
|
3
|
-
from
|
24
|
+
from contextlib import contextmanager
|
25
|
+
from typing import TYPE_CHECKING, TypeVar
|
4
26
|
|
5
27
|
import joblib
|
6
28
|
from rich.progress import Progress
|
7
29
|
|
8
30
|
if TYPE_CHECKING:
|
9
|
-
from collections.abc import Iterable
|
31
|
+
from collections.abc import Callable, Iterable, Iterator
|
10
32
|
|
11
33
|
from rich.progress import ProgressColumn
|
12
34
|
|
13
35
|
|
14
|
-
|
36
|
+
# https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
|
37
|
+
@contextmanager
|
38
|
+
def JoblibProgress(
|
39
|
+
*columns: ProgressColumn | str,
|
40
|
+
description: str | None = None,
|
41
|
+
total: int | None = None,
|
42
|
+
**kwargs,
|
43
|
+
) -> Iterator[Progress]:
|
44
|
+
"""
|
45
|
+
Context manager for tracking progress using Joblib with Rich's Progress bar.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
*columns (ProgressColumn | str): Columns to display in the progress bar.
|
49
|
+
description (str | None, optional): A description for the progress task.
|
50
|
+
Defaults to None.
|
51
|
+
total (int | None, optional): The total number of tasks. If None, it will
|
52
|
+
be determined automatically.
|
53
|
+
**kwargs: Additional keyword arguments passed to the Progress instance.
|
54
|
+
|
55
|
+
Yields:
|
56
|
+
Progress: A Progress instance for managing the progress bar.
|
57
|
+
|
58
|
+
Example:
|
59
|
+
with JoblibProgress("task", total=100) as progress:
|
60
|
+
# Your parallel processing code here
|
61
|
+
"""
|
62
|
+
if not columns:
|
63
|
+
columns = Progress.get_default_columns()
|
64
|
+
|
65
|
+
progress = Progress(*columns, **kwargs)
|
66
|
+
|
67
|
+
if description is None:
|
68
|
+
description = "Processing..."
|
69
|
+
|
70
|
+
task_id = progress.add_task(description, total=total)
|
71
|
+
print_progress = joblib.parallel.Parallel.print_progress
|
72
|
+
|
73
|
+
def update_progress(self: joblib.parallel.Parallel):
|
74
|
+
progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
|
75
|
+
return print_progress(self)
|
76
|
+
|
77
|
+
try:
|
78
|
+
joblib.parallel.Parallel.print_progress = update_progress
|
79
|
+
progress.start()
|
80
|
+
yield progress
|
81
|
+
|
82
|
+
finally:
|
83
|
+
progress.stop()
|
84
|
+
joblib.parallel.Parallel.print_progress = print_progress
|
85
|
+
|
86
|
+
|
87
|
+
T = TypeVar("T")
|
88
|
+
U = TypeVar("U")
|
89
|
+
|
90
|
+
|
91
|
+
def parallel_progress(
|
92
|
+
func: Callable[[T], U],
|
93
|
+
iterable: Iterable[T],
|
94
|
+
*columns: ProgressColumn | str,
|
95
|
+
n_jobs: int = -1,
|
96
|
+
description: str | None = None,
|
97
|
+
**kwargs,
|
98
|
+
) -> list[U]:
|
99
|
+
"""
|
100
|
+
Execute a function in parallel over an iterable with progress tracking.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
func (Callable[[T], U]): The function to execute on each item in the
|
104
|
+
iterable.
|
105
|
+
iterable (Iterable[T]): An iterable of items to process.
|
106
|
+
*columns (ProgressColumn | str): Additional columns to display in the
|
107
|
+
progress bar.
|
108
|
+
n_jobs (int, optional): The number of jobs to run in parallel.
|
109
|
+
Defaults to -1 (all processors).
|
110
|
+
description (str | None, optional): A description for the progress bar.
|
111
|
+
Defaults to None.
|
112
|
+
**kwargs: Additional keyword arguments passed to the Progress instance.
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
list[U]: A list of results from applying the function to each item in
|
116
|
+
the iterable.
|
117
|
+
"""
|
118
|
+
iterable = list(iterable)
|
119
|
+
total = len(iterable)
|
120
|
+
|
121
|
+
with JoblibProgress(*columns, description=description, total=total, **kwargs):
|
122
|
+
it = (joblib.delayed(func)(x) for x in iterable)
|
123
|
+
return joblib.Parallel(n_jobs=n_jobs)(it) # type: ignore
|
124
|
+
|
125
|
+
|
126
|
+
def multi_tasks_progress(
|
15
127
|
iterables: Iterable[Iterable[int | tuple[int, int]]],
|
16
128
|
*columns: ProgressColumn | str,
|
17
129
|
n_jobs: int = -1,
|
@@ -52,7 +164,8 @@ def multi_task_progress(
|
|
52
164
|
|
53
165
|
task_main = progress.add_task(main_description, total=None) if n > 1 else None
|
54
166
|
tasks = [
|
55
|
-
progress.add_task(description.format(i), start=False, total=None)
|
167
|
+
progress.add_task(description.format(i), start=False, total=None)
|
168
|
+
for i in range(n)
|
56
169
|
]
|
57
170
|
|
58
171
|
total = {}
|
@@ -87,45 +200,3 @@ def multi_task_progress(
|
|
87
200
|
|
88
201
|
else:
|
89
202
|
func(0)
|
90
|
-
|
91
|
-
|
92
|
-
if __name__ == "__main__":
|
93
|
-
import random
|
94
|
-
import time
|
95
|
-
|
96
|
-
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
|
97
|
-
|
98
|
-
from hydraflow.progress import multi_task_progress
|
99
|
-
|
100
|
-
def task(total):
|
101
|
-
for i in range(total or 90):
|
102
|
-
if total is None:
|
103
|
-
yield i
|
104
|
-
else:
|
105
|
-
yield i, total
|
106
|
-
time.sleep(random.random() / 30)
|
107
|
-
|
108
|
-
def multi_task_progress_test(unknown_total: bool):
|
109
|
-
tasks = [task(random.randint(80, 100)) for _ in range(4)]
|
110
|
-
if unknown_total:
|
111
|
-
tasks = [task(None), *tasks, task(None)]
|
112
|
-
|
113
|
-
columns = [
|
114
|
-
SpinnerColumn(),
|
115
|
-
*Progress.get_default_columns(),
|
116
|
-
MofNCompleteColumn(),
|
117
|
-
TimeElapsedColumn(),
|
118
|
-
]
|
119
|
-
|
120
|
-
kwargs = {}
|
121
|
-
if unknown_total:
|
122
|
-
kwargs["main_description"] = "unknown"
|
123
|
-
|
124
|
-
multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
|
125
|
-
|
126
|
-
multi_task_progress_test(False)
|
127
|
-
multi_task_progress_test(True)
|
128
|
-
multi_task_progress([task(100)])
|
129
|
-
multi_task_progress([task(None)], description="unknown")
|
130
|
-
multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
|
131
|
-
multi_task_progress([task(100)], description="transient", transient=True)
|
hydraflow/run_collection.py
CHANGED
@@ -468,7 +468,9 @@ class RunCollection:
|
|
468
468
|
"""
|
469
469
|
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
|
470
470
|
|
471
|
-
def group_by(
|
471
|
+
def group_by(
|
472
|
+
self, *names: str | list[str]
|
473
|
+
) -> dict[tuple[str | None, ...], RunCollection]:
|
472
474
|
"""
|
473
475
|
Group runs by specified parameter names.
|
474
476
|
|
@@ -493,25 +495,6 @@ class RunCollection:
|
|
493
495
|
|
494
496
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
495
497
|
|
496
|
-
def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
|
497
|
-
"""
|
498
|
-
Group runs by specified parameter names.
|
499
|
-
|
500
|
-
This method groups the runs in the collection based on the values of the
|
501
|
-
specified parameters. Each unique combination of parameter values will
|
502
|
-
form a separate RunCollection in the returned list.
|
503
|
-
|
504
|
-
Args:
|
505
|
-
*names (str | list[str]): The names of the parameters to group by.
|
506
|
-
This can be a single parameter name or multiple names provided
|
507
|
-
as separate arguments or as a list.
|
508
|
-
|
509
|
-
Returns:
|
510
|
-
list[RunCollection]: A list of RunCollection objects, where each
|
511
|
-
object contains runs that match the specified parameter values.
|
512
|
-
"""
|
513
|
-
return list(self.group_by(*names).values())
|
514
|
-
|
515
498
|
|
516
499
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
517
500
|
"""
|
@@ -671,7 +654,9 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
|
|
671
654
|
return filtered_runs[-1]
|
672
655
|
|
673
656
|
|
674
|
-
def try_find_last_run(
|
657
|
+
def try_find_last_run(
|
658
|
+
runs: list[Run], config: object | None = None, **kwargs
|
659
|
+
) -> Run | None:
|
675
660
|
"""
|
676
661
|
Find the last run based on the provided configuration.
|
677
662
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -0,0 +1,12 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
|
2
|
+
hydraflow/asyncio.py,sha256=jdXuEFC6f7L_Dq6beASFZPQSvCnGimVxU-PRFsNc5U0,6241
|
3
|
+
hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
|
4
|
+
hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
|
5
|
+
hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
|
6
|
+
hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
|
7
|
+
hydraflow/progress.py,sha256=UIIKlweji3L0uRi4hZ_DrtRcnayHPlsMoug7hVEKq8k,6753
|
8
|
+
hydraflow/run_collection.py,sha256=V5lGdGHYgsSpBOYGaVEL1mpKJvdiEshBL0KmmZ8qeZo,29161
|
9
|
+
hydraflow-0.2.9.dist-info/METADATA,sha256=ZjJQz_4MogGkcs16dOwnsp_J0icg9ypgQdXOYxVdxJg,4181
|
10
|
+
hydraflow-0.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
11
|
+
hydraflow-0.2.9.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
12
|
+
hydraflow-0.2.9.dist-info/RECORD,,
|
hydraflow-0.2.8.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=K2xXk5Za_9OkiRmbsgkuWn7EMaTcQOVCPFs5oTP_QFw,483
|
2
|
-
hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
|
3
|
-
hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
|
4
|
-
hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
|
5
|
-
hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
|
6
|
-
hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
|
7
|
-
hydraflow/progress.py,sha256=0GJfKnnY_SAHVWpGvLdgOBsogGs8vVofjLuphuUEy2g,4296
|
8
|
-
hydraflow/run_collection.py,sha256=Ge-PAsoQBbn7cuow0DYMf5SoBmIXUfZ9ftufN_75Pw8,29963
|
9
|
-
hydraflow-0.2.8.dist-info/METADATA,sha256=9CF5S8LdmDUx4sihDqVRvwLLk34FNBmy_Vv6BVoahoc,4181
|
10
|
-
hydraflow-0.2.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
11
|
-
hydraflow-0.2.8.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
12
|
-
hydraflow-0.2.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|