hydraflow 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +3 -0
- hydraflow/asyncio.py +9 -3
- hydraflow/progress.py +117 -46
- hydraflow/run_collection.py +6 -21
- {hydraflow-0.2.8.dist-info → hydraflow-0.2.9.dist-info}/METADATA +1 -1
- hydraflow-0.2.9.dist-info/RECORD +12 -0
- hydraflow-0.2.8.dist-info/RECORD +0 -12
- {hydraflow-0.2.8.dist-info → hydraflow-0.2.9.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.8.dist-info → hydraflow-0.2.9.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -5,6 +5,7 @@ from .mlflow import (
|
|
5
5
|
search_runs,
|
6
6
|
set_experiment,
|
7
7
|
)
|
8
|
+
from .progress import multi_tasks_progress, parallel_progress
|
8
9
|
from .run_collection import RunCollection
|
9
10
|
|
10
11
|
__all__ = [
|
@@ -15,6 +16,8 @@ __all__ = [
|
|
15
16
|
"list_runs",
|
16
17
|
"load_config",
|
17
18
|
"log_run",
|
19
|
+
"multi_tasks_progress",
|
20
|
+
"parallel_progress",
|
18
21
|
"search_runs",
|
19
22
|
"set_experiment",
|
20
23
|
"start_run",
|
hydraflow/asyncio.py
CHANGED
@@ -41,7 +41,9 @@ async def execute_command(
|
|
41
41
|
int: The return code of the process.
|
42
42
|
"""
|
43
43
|
try:
|
44
|
-
process = await asyncio.create_subprocess_exec(
|
44
|
+
process = await asyncio.create_subprocess_exec(
|
45
|
+
program, *args, stdout=PIPE, stderr=PIPE
|
46
|
+
)
|
45
47
|
await asyncio.gather(
|
46
48
|
process_stream(process.stdout, stdout),
|
47
49
|
process_stream(process.stderr, stderr),
|
@@ -100,7 +102,9 @@ async def monitor_file_changes(
|
|
100
102
|
"""
|
101
103
|
str_paths = [str(path) for path in paths]
|
102
104
|
try:
|
103
|
-
async for changes in watchfiles.awatch(
|
105
|
+
async for changes in watchfiles.awatch(
|
106
|
+
*str_paths, stop_event=stop_event, **awatch_kwargs
|
107
|
+
):
|
104
108
|
callback(changes)
|
105
109
|
except Exception as e:
|
106
110
|
logger.error(f"Error watching files: {e}")
|
@@ -129,7 +133,9 @@ async def run_and_monitor(
|
|
129
133
|
"""
|
130
134
|
stop_event = asyncio.Event()
|
131
135
|
run_task = asyncio.create_task(
|
132
|
-
execute_command(
|
136
|
+
execute_command(
|
137
|
+
program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
|
138
|
+
)
|
133
139
|
)
|
134
140
|
if watch and paths:
|
135
141
|
monitor_task = asyncio.create_task(
|
hydraflow/progress.py
CHANGED
@@ -1,17 +1,129 @@
|
|
1
|
+
"""
|
2
|
+
Module for managing progress tracking in parallel processing using Joblib
|
3
|
+
and Rich's Progress bar.
|
4
|
+
|
5
|
+
Provide context managers and functions to facilitate the execution
|
6
|
+
of tasks in parallel while displaying progress updates.
|
7
|
+
|
8
|
+
The following key components are provided:
|
9
|
+
|
10
|
+
- JoblibProgress: A context manager for tracking progress with Rich's Progress
|
11
|
+
bar.
|
12
|
+
- parallel_progress: A function to execute a given function in parallel over
|
13
|
+
an iterable with progress tracking.
|
14
|
+
- multi_tasks_progress: A function to render auto-updating progress bars for
|
15
|
+
multiple tasks concurrently.
|
16
|
+
|
17
|
+
Usage:
|
18
|
+
Import the necessary functions and use them to manage progress in your
|
19
|
+
parallel processing tasks.
|
20
|
+
"""
|
21
|
+
|
1
22
|
from __future__ import annotations
|
2
23
|
|
3
|
-
from
|
24
|
+
from contextlib import contextmanager
|
25
|
+
from typing import TYPE_CHECKING, TypeVar
|
4
26
|
|
5
27
|
import joblib
|
6
28
|
from rich.progress import Progress
|
7
29
|
|
8
30
|
if TYPE_CHECKING:
|
9
|
-
from collections.abc import Iterable
|
31
|
+
from collections.abc import Callable, Iterable, Iterator
|
10
32
|
|
11
33
|
from rich.progress import ProgressColumn
|
12
34
|
|
13
35
|
|
14
|
-
|
36
|
+
# https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
|
37
|
+
@contextmanager
|
38
|
+
def JoblibProgress(
|
39
|
+
*columns: ProgressColumn | str,
|
40
|
+
description: str | None = None,
|
41
|
+
total: int | None = None,
|
42
|
+
**kwargs,
|
43
|
+
) -> Iterator[Progress]:
|
44
|
+
"""
|
45
|
+
Context manager for tracking progress using Joblib with Rich's Progress bar.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
*columns (ProgressColumn | str): Columns to display in the progress bar.
|
49
|
+
description (str | None, optional): A description for the progress task.
|
50
|
+
Defaults to None.
|
51
|
+
total (int | None, optional): The total number of tasks. If None, it will
|
52
|
+
be determined automatically.
|
53
|
+
**kwargs: Additional keyword arguments passed to the Progress instance.
|
54
|
+
|
55
|
+
Yields:
|
56
|
+
Progress: A Progress instance for managing the progress bar.
|
57
|
+
|
58
|
+
Example:
|
59
|
+
with JoblibProgress("task", total=100) as progress:
|
60
|
+
# Your parallel processing code here
|
61
|
+
"""
|
62
|
+
if not columns:
|
63
|
+
columns = Progress.get_default_columns()
|
64
|
+
|
65
|
+
progress = Progress(*columns, **kwargs)
|
66
|
+
|
67
|
+
if description is None:
|
68
|
+
description = "Processing..."
|
69
|
+
|
70
|
+
task_id = progress.add_task(description, total=total)
|
71
|
+
print_progress = joblib.parallel.Parallel.print_progress
|
72
|
+
|
73
|
+
def update_progress(self: joblib.parallel.Parallel):
|
74
|
+
progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
|
75
|
+
return print_progress(self)
|
76
|
+
|
77
|
+
try:
|
78
|
+
joblib.parallel.Parallel.print_progress = update_progress
|
79
|
+
progress.start()
|
80
|
+
yield progress
|
81
|
+
|
82
|
+
finally:
|
83
|
+
progress.stop()
|
84
|
+
joblib.parallel.Parallel.print_progress = print_progress
|
85
|
+
|
86
|
+
|
87
|
+
T = TypeVar("T")
|
88
|
+
U = TypeVar("U")
|
89
|
+
|
90
|
+
|
91
|
+
def parallel_progress(
|
92
|
+
func: Callable[[T], U],
|
93
|
+
iterable: Iterable[T],
|
94
|
+
*columns: ProgressColumn | str,
|
95
|
+
n_jobs: int = -1,
|
96
|
+
description: str | None = None,
|
97
|
+
**kwargs,
|
98
|
+
) -> list[U]:
|
99
|
+
"""
|
100
|
+
Execute a function in parallel over an iterable with progress tracking.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
func (Callable[[T], U]): The function to execute on each item in the
|
104
|
+
iterable.
|
105
|
+
iterable (Iterable[T]): An iterable of items to process.
|
106
|
+
*columns (ProgressColumn | str): Additional columns to display in the
|
107
|
+
progress bar.
|
108
|
+
n_jobs (int, optional): The number of jobs to run in parallel.
|
109
|
+
Defaults to -1 (all processors).
|
110
|
+
description (str | None, optional): A description for the progress bar.
|
111
|
+
Defaults to None.
|
112
|
+
**kwargs: Additional keyword arguments passed to the Progress instance.
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
list[U]: A list of results from applying the function to each item in
|
116
|
+
the iterable.
|
117
|
+
"""
|
118
|
+
iterable = list(iterable)
|
119
|
+
total = len(iterable)
|
120
|
+
|
121
|
+
with JoblibProgress(*columns, description=description, total=total, **kwargs):
|
122
|
+
it = (joblib.delayed(func)(x) for x in iterable)
|
123
|
+
return joblib.Parallel(n_jobs=n_jobs)(it) # type: ignore
|
124
|
+
|
125
|
+
|
126
|
+
def multi_tasks_progress(
|
15
127
|
iterables: Iterable[Iterable[int | tuple[int, int]]],
|
16
128
|
*columns: ProgressColumn | str,
|
17
129
|
n_jobs: int = -1,
|
@@ -52,7 +164,8 @@ def multi_task_progress(
|
|
52
164
|
|
53
165
|
task_main = progress.add_task(main_description, total=None) if n > 1 else None
|
54
166
|
tasks = [
|
55
|
-
progress.add_task(description.format(i), start=False, total=None)
|
167
|
+
progress.add_task(description.format(i), start=False, total=None)
|
168
|
+
for i in range(n)
|
56
169
|
]
|
57
170
|
|
58
171
|
total = {}
|
@@ -87,45 +200,3 @@ def multi_task_progress(
|
|
87
200
|
|
88
201
|
else:
|
89
202
|
func(0)
|
90
|
-
|
91
|
-
|
92
|
-
if __name__ == "__main__":
|
93
|
-
import random
|
94
|
-
import time
|
95
|
-
|
96
|
-
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
|
97
|
-
|
98
|
-
from hydraflow.progress import multi_task_progress
|
99
|
-
|
100
|
-
def task(total):
|
101
|
-
for i in range(total or 90):
|
102
|
-
if total is None:
|
103
|
-
yield i
|
104
|
-
else:
|
105
|
-
yield i, total
|
106
|
-
time.sleep(random.random() / 30)
|
107
|
-
|
108
|
-
def multi_task_progress_test(unknown_total: bool):
|
109
|
-
tasks = [task(random.randint(80, 100)) for _ in range(4)]
|
110
|
-
if unknown_total:
|
111
|
-
tasks = [task(None), *tasks, task(None)]
|
112
|
-
|
113
|
-
columns = [
|
114
|
-
SpinnerColumn(),
|
115
|
-
*Progress.get_default_columns(),
|
116
|
-
MofNCompleteColumn(),
|
117
|
-
TimeElapsedColumn(),
|
118
|
-
]
|
119
|
-
|
120
|
-
kwargs = {}
|
121
|
-
if unknown_total:
|
122
|
-
kwargs["main_description"] = "unknown"
|
123
|
-
|
124
|
-
multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
|
125
|
-
|
126
|
-
multi_task_progress_test(False)
|
127
|
-
multi_task_progress_test(True)
|
128
|
-
multi_task_progress([task(100)])
|
129
|
-
multi_task_progress([task(None)], description="unknown")
|
130
|
-
multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
|
131
|
-
multi_task_progress([task(100)], description="transient", transient=True)
|
hydraflow/run_collection.py
CHANGED
@@ -468,7 +468,9 @@ class RunCollection:
|
|
468
468
|
"""
|
469
469
|
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
|
470
470
|
|
471
|
-
def group_by(
|
471
|
+
def group_by(
|
472
|
+
self, *names: str | list[str]
|
473
|
+
) -> dict[tuple[str | None, ...], RunCollection]:
|
472
474
|
"""
|
473
475
|
Group runs by specified parameter names.
|
474
476
|
|
@@ -493,25 +495,6 @@ class RunCollection:
|
|
493
495
|
|
494
496
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
495
497
|
|
496
|
-
def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
|
497
|
-
"""
|
498
|
-
Group runs by specified parameter names.
|
499
|
-
|
500
|
-
This method groups the runs in the collection based on the values of the
|
501
|
-
specified parameters. Each unique combination of parameter values will
|
502
|
-
form a separate RunCollection in the returned list.
|
503
|
-
|
504
|
-
Args:
|
505
|
-
*names (str | list[str]): The names of the parameters to group by.
|
506
|
-
This can be a single parameter name or multiple names provided
|
507
|
-
as separate arguments or as a list.
|
508
|
-
|
509
|
-
Returns:
|
510
|
-
list[RunCollection]: A list of RunCollection objects, where each
|
511
|
-
object contains runs that match the specified parameter values.
|
512
|
-
"""
|
513
|
-
return list(self.group_by(*names).values())
|
514
|
-
|
515
498
|
|
516
499
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
517
500
|
"""
|
@@ -671,7 +654,9 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
|
|
671
654
|
return filtered_runs[-1]
|
672
655
|
|
673
656
|
|
674
|
-
def try_find_last_run(
|
657
|
+
def try_find_last_run(
|
658
|
+
runs: list[Run], config: object | None = None, **kwargs
|
659
|
+
) -> Run | None:
|
675
660
|
"""
|
676
661
|
Find the last run based on the provided configuration.
|
677
662
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -0,0 +1,12 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
|
2
|
+
hydraflow/asyncio.py,sha256=jdXuEFC6f7L_Dq6beASFZPQSvCnGimVxU-PRFsNc5U0,6241
|
3
|
+
hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
|
4
|
+
hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
|
5
|
+
hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
|
6
|
+
hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
|
7
|
+
hydraflow/progress.py,sha256=UIIKlweji3L0uRi4hZ_DrtRcnayHPlsMoug7hVEKq8k,6753
|
8
|
+
hydraflow/run_collection.py,sha256=V5lGdGHYgsSpBOYGaVEL1mpKJvdiEshBL0KmmZ8qeZo,29161
|
9
|
+
hydraflow-0.2.9.dist-info/METADATA,sha256=ZjJQz_4MogGkcs16dOwnsp_J0icg9ypgQdXOYxVdxJg,4181
|
10
|
+
hydraflow-0.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
11
|
+
hydraflow-0.2.9.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
12
|
+
hydraflow-0.2.9.dist-info/RECORD,,
|
hydraflow-0.2.8.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=K2xXk5Za_9OkiRmbsgkuWn7EMaTcQOVCPFs5oTP_QFw,483
|
2
|
-
hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
|
3
|
-
hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
|
4
|
-
hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
|
5
|
-
hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
|
6
|
-
hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
|
7
|
-
hydraflow/progress.py,sha256=0GJfKnnY_SAHVWpGvLdgOBsogGs8vVofjLuphuUEy2g,4296
|
8
|
-
hydraflow/run_collection.py,sha256=Ge-PAsoQBbn7cuow0DYMf5SoBmIXUfZ9ftufN_75Pw8,29963
|
9
|
-
hydraflow-0.2.8.dist-info/METADATA,sha256=9CF5S8LdmDUx4sihDqVRvwLLk34FNBmy_Vv6BVoahoc,4181
|
10
|
-
hydraflow-0.2.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
11
|
-
hydraflow-0.2.8.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
12
|
-
hydraflow-0.2.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|