hydraflow 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hydraflow/__init__.py CHANGED
@@ -5,6 +5,7 @@ from .mlflow import (
5
5
  search_runs,
6
6
  set_experiment,
7
7
  )
8
+ from .progress import multi_tasks_progress, parallel_progress
8
9
  from .run_collection import RunCollection
9
10
 
10
11
  __all__ = [
@@ -15,6 +16,8 @@ __all__ = [
15
16
  "list_runs",
16
17
  "load_config",
17
18
  "log_run",
19
+ "multi_tasks_progress",
20
+ "parallel_progress",
18
21
  "search_runs",
19
22
  "set_experiment",
20
23
  "start_run",
hydraflow/asyncio.py CHANGED
@@ -41,7 +41,9 @@ async def execute_command(
41
41
  int: The return code of the process.
42
42
  """
43
43
  try:
44
- process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
44
+ process = await asyncio.create_subprocess_exec(
45
+ program, *args, stdout=PIPE, stderr=PIPE
46
+ )
45
47
  await asyncio.gather(
46
48
  process_stream(process.stdout, stdout),
47
49
  process_stream(process.stderr, stderr),
@@ -100,7 +102,9 @@ async def monitor_file_changes(
100
102
  """
101
103
  str_paths = [str(path) for path in paths]
102
104
  try:
103
- async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
105
+ async for changes in watchfiles.awatch(
106
+ *str_paths, stop_event=stop_event, **awatch_kwargs
107
+ ):
104
108
  callback(changes)
105
109
  except Exception as e:
106
110
  logger.error(f"Error watching files: {e}")
@@ -129,7 +133,9 @@ async def run_and_monitor(
129
133
  """
130
134
  stop_event = asyncio.Event()
131
135
  run_task = asyncio.create_task(
132
- execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
136
+ execute_command(
137
+ program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
138
+ )
133
139
  )
134
140
  if watch and paths:
135
141
  monitor_task = asyncio.create_task(
hydraflow/progress.py CHANGED
@@ -1,17 +1,129 @@
1
+ """
2
+ Module for managing progress tracking in parallel processing using Joblib
3
+ and Rich's Progress bar.
4
+
5
+ Provide context managers and functions to facilitate the execution
6
+ of tasks in parallel while displaying progress updates.
7
+
8
+ The following key components are provided:
9
+
10
+ - JoblibProgress: A context manager for tracking progress with Rich's Progress
11
+ bar.
12
+ - parallel_progress: A function to execute a given function in parallel over
13
+ an iterable with progress tracking.
14
+ - multi_tasks_progress: A function to render auto-updating progress bars for
15
+ multiple tasks concurrently.
16
+
17
+ Usage:
18
+ Import the necessary functions and use them to manage progress in your
19
+ parallel processing tasks.
20
+ """
21
+
1
22
  from __future__ import annotations
2
23
 
3
- from typing import TYPE_CHECKING
24
+ from contextlib import contextmanager
25
+ from typing import TYPE_CHECKING, TypeVar
4
26
 
5
27
  import joblib
6
28
  from rich.progress import Progress
7
29
 
8
30
  if TYPE_CHECKING:
9
- from collections.abc import Iterable
31
+ from collections.abc import Callable, Iterable, Iterator
10
32
 
11
33
  from rich.progress import ProgressColumn
12
34
 
13
35
 
14
- def multi_task_progress(
36
+ # https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
37
+ @contextmanager
38
+ def JoblibProgress(
39
+ *columns: ProgressColumn | str,
40
+ description: str | None = None,
41
+ total: int | None = None,
42
+ **kwargs,
43
+ ) -> Iterator[Progress]:
44
+ """
45
+ Context manager for tracking progress using Joblib with Rich's Progress bar.
46
+
47
+ Args:
48
+ *columns (ProgressColumn | str): Columns to display in the progress bar.
49
+ description (str | None, optional): A description for the progress task.
50
+ Defaults to None.
51
+ total (int | None, optional): The total number of tasks. If None, it will
52
+ be determined automatically.
53
+ **kwargs: Additional keyword arguments passed to the Progress instance.
54
+
55
+ Yields:
56
+ Progress: A Progress instance for managing the progress bar.
57
+
58
+ Example:
59
+ with JoblibProgress("task", total=100) as progress:
60
+ # Your parallel processing code here
61
+ """
62
+ if not columns:
63
+ columns = Progress.get_default_columns()
64
+
65
+ progress = Progress(*columns, **kwargs)
66
+
67
+ if description is None:
68
+ description = "Processing..."
69
+
70
+ task_id = progress.add_task(description, total=total)
71
+ print_progress = joblib.parallel.Parallel.print_progress
72
+
73
+ def update_progress(self: joblib.parallel.Parallel):
74
+ progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
75
+ return print_progress(self)
76
+
77
+ try:
78
+ joblib.parallel.Parallel.print_progress = update_progress
79
+ progress.start()
80
+ yield progress
81
+
82
+ finally:
83
+ progress.stop()
84
+ joblib.parallel.Parallel.print_progress = print_progress
85
+
86
+
87
+ T = TypeVar("T")
88
+ U = TypeVar("U")
89
+
90
+
91
+ def parallel_progress(
92
+ func: Callable[[T], U],
93
+ iterable: Iterable[T],
94
+ *columns: ProgressColumn | str,
95
+ n_jobs: int = -1,
96
+ description: str | None = None,
97
+ **kwargs,
98
+ ) -> list[U]:
99
+ """
100
+ Execute a function in parallel over an iterable with progress tracking.
101
+
102
+ Args:
103
+ func (Callable[[T], U]): The function to execute on each item in the
104
+ iterable.
105
+ iterable (Iterable[T]): An iterable of items to process.
106
+ *columns (ProgressColumn | str): Additional columns to display in the
107
+ progress bar.
108
+ n_jobs (int, optional): The number of jobs to run in parallel.
109
+ Defaults to -1 (all processors).
110
+ description (str | None, optional): A description for the progress bar.
111
+ Defaults to None.
112
+ **kwargs: Additional keyword arguments passed to the Progress instance.
113
+
114
+ Returns:
115
+ list[U]: A list of results from applying the function to each item in
116
+ the iterable.
117
+ """
118
+ iterable = list(iterable)
119
+ total = len(iterable)
120
+
121
+ with JoblibProgress(*columns, description=description, total=total, **kwargs):
122
+ it = (joblib.delayed(func)(x) for x in iterable)
123
+ return joblib.Parallel(n_jobs=n_jobs)(it) # type: ignore
124
+
125
+
126
+ def multi_tasks_progress(
15
127
  iterables: Iterable[Iterable[int | tuple[int, int]]],
16
128
  *columns: ProgressColumn | str,
17
129
  n_jobs: int = -1,
@@ -52,7 +164,8 @@ def multi_task_progress(
52
164
 
53
165
  task_main = progress.add_task(main_description, total=None) if n > 1 else None
54
166
  tasks = [
55
- progress.add_task(description.format(i), start=False, total=None) for i in range(n)
167
+ progress.add_task(description.format(i), start=False, total=None)
168
+ for i in range(n)
56
169
  ]
57
170
 
58
171
  total = {}
@@ -87,45 +200,3 @@ def multi_task_progress(
87
200
 
88
201
  else:
89
202
  func(0)
90
-
91
-
92
- if __name__ == "__main__":
93
- import random
94
- import time
95
-
96
- from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
97
-
98
- from hydraflow.progress import multi_task_progress
99
-
100
- def task(total):
101
- for i in range(total or 90):
102
- if total is None:
103
- yield i
104
- else:
105
- yield i, total
106
- time.sleep(random.random() / 30)
107
-
108
- def multi_task_progress_test(unknown_total: bool):
109
- tasks = [task(random.randint(80, 100)) for _ in range(4)]
110
- if unknown_total:
111
- tasks = [task(None), *tasks, task(None)]
112
-
113
- columns = [
114
- SpinnerColumn(),
115
- *Progress.get_default_columns(),
116
- MofNCompleteColumn(),
117
- TimeElapsedColumn(),
118
- ]
119
-
120
- kwargs = {}
121
- if unknown_total:
122
- kwargs["main_description"] = "unknown"
123
-
124
- multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
125
-
126
- multi_task_progress_test(False)
127
- multi_task_progress_test(True)
128
- multi_task_progress([task(100)])
129
- multi_task_progress([task(None)], description="unknown")
130
- multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
131
- multi_task_progress([task(100)], description="transient", transient=True)
@@ -468,7 +468,9 @@ class RunCollection:
468
468
  """
469
469
  return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
470
470
 
471
- def group_by(self, *names: str | list[str]) -> dict[tuple[str | None, ...], RunCollection]:
471
+ def group_by(
472
+ self, *names: str | list[str]
473
+ ) -> dict[tuple[str | None, ...], RunCollection]:
472
474
  """
473
475
  Group runs by specified parameter names.
474
476
 
@@ -493,25 +495,6 @@ class RunCollection:
493
495
 
494
496
  return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
495
497
 
496
- def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
497
- """
498
- Group runs by specified parameter names.
499
-
500
- This method groups the runs in the collection based on the values of the
501
- specified parameters. Each unique combination of parameter values will
502
- form a separate RunCollection in the returned list.
503
-
504
- Args:
505
- *names (str | list[str]): The names of the parameters to group by.
506
- This can be a single parameter name or multiple names provided
507
- as separate arguments or as a list.
508
-
509
- Returns:
510
- list[RunCollection]: A list of RunCollection objects, where each
511
- object contains runs that match the specified parameter values.
512
- """
513
- return list(self.group_by(*names).values())
514
-
515
498
 
516
499
  def _param_matches(run: Run, key: str, value: Any) -> bool:
517
500
  """
@@ -671,7 +654,9 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
671
654
  return filtered_runs[-1]
672
655
 
673
656
 
674
- def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
657
+ def try_find_last_run(
658
+ runs: list[Run], config: object | None = None, **kwargs
659
+ ) -> Run | None:
675
660
  """
676
661
  Find the last run based on the provided configuration.
677
662
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -0,0 +1,12 @@
1
+ hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
2
+ hydraflow/asyncio.py,sha256=jdXuEFC6f7L_Dq6beASFZPQSvCnGimVxU-PRFsNc5U0,6241
3
+ hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
4
+ hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
5
+ hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
6
+ hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
7
+ hydraflow/progress.py,sha256=UIIKlweji3L0uRi4hZ_DrtRcnayHPlsMoug7hVEKq8k,6753
8
+ hydraflow/run_collection.py,sha256=V5lGdGHYgsSpBOYGaVEL1mpKJvdiEshBL0KmmZ8qeZo,29161
9
+ hydraflow-0.2.9.dist-info/METADATA,sha256=ZjJQz_4MogGkcs16dOwnsp_J0icg9ypgQdXOYxVdxJg,4181
10
+ hydraflow-0.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
11
+ hydraflow-0.2.9.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
12
+ hydraflow-0.2.9.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- hydraflow/__init__.py,sha256=K2xXk5Za_9OkiRmbsgkuWn7EMaTcQOVCPFs5oTP_QFw,483
2
- hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
3
- hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
4
- hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
5
- hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
6
- hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
7
- hydraflow/progress.py,sha256=0GJfKnnY_SAHVWpGvLdgOBsogGs8vVofjLuphuUEy2g,4296
8
- hydraflow/run_collection.py,sha256=Ge-PAsoQBbn7cuow0DYMf5SoBmIXUfZ9ftufN_75Pw8,29963
9
- hydraflow-0.2.8.dist-info/METADATA,sha256=9CF5S8LdmDUx4sihDqVRvwLLk34FNBmy_Vv6BVoahoc,4181
10
- hydraflow-0.2.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
11
- hydraflow-0.2.8.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
12
- hydraflow-0.2.8.dist-info/RECORD,,