hydraflow 0.2.7__tar.gz → 0.2.9__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. {hydraflow-0.2.7 → hydraflow-0.2.9}/PKG-INFO +1 -1
  2. {hydraflow-0.2.7 → hydraflow-0.2.9}/pyproject.toml +2 -2
  3. {hydraflow-0.2.7 → hydraflow-0.2.9}/src/hydraflow/__init__.py +7 -4
  4. {hydraflow-0.2.7 → hydraflow-0.2.9}/src/hydraflow/asyncio.py +9 -3
  5. {hydraflow-0.2.7 → hydraflow-0.2.9}/src/hydraflow/context.py +24 -8
  6. {hydraflow-0.2.7 → hydraflow-0.2.9}/src/hydraflow/info.py +57 -4
  7. hydraflow-0.2.9/src/hydraflow/mlflow.py +175 -0
  8. hydraflow-0.2.9/src/hydraflow/progress.py +202 -0
  9. {hydraflow-0.2.7 → hydraflow-0.2.9}/src/hydraflow/run_collection.py +31 -122
  10. hydraflow-0.2.7/tests/scripts/log_run.py → hydraflow-0.2.9/tests/scripts/app.py +21 -3
  11. hydraflow-0.2.9/tests/scripts/progress.py +72 -0
  12. hydraflow-0.2.9/tests/test_app.py +100 -0
  13. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_context.py +1 -1
  14. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_info.py +14 -1
  15. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_log_run.py +5 -3
  16. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_progress.py +1 -1
  17. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_run_collection.py +5 -5
  18. hydraflow-0.2.7/mlruns/0/meta.yaml +0 -6
  19. hydraflow-0.2.7/src/hydraflow/mlflow.py +0 -119
  20. hydraflow-0.2.7/src/hydraflow/progress.py +0 -131
  21. {hydraflow-0.2.7 → hydraflow-0.2.9}/.devcontainer/devcontainer.json +0 -0
  22. {hydraflow-0.2.7 → hydraflow-0.2.9}/.devcontainer/postCreate.sh +0 -0
  23. {hydraflow-0.2.7 → hydraflow-0.2.9}/.devcontainer/starship.toml +0 -0
  24. {hydraflow-0.2.7 → hydraflow-0.2.9}/.gitattributes +0 -0
  25. {hydraflow-0.2.7 → hydraflow-0.2.9}/.gitignore +0 -0
  26. {hydraflow-0.2.7 → hydraflow-0.2.9}/LICENSE +0 -0
  27. {hydraflow-0.2.7 → hydraflow-0.2.9}/README.md +0 -0
  28. {hydraflow-0.2.7 → hydraflow-0.2.9}/src/hydraflow/config.py +0 -0
  29. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/scripts/__init__.py +0 -0
  30. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/scripts/watch.py +0 -0
  31. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_asyncio.py +0 -0
  32. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_config.py +0 -0
  33. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_mlflow.py +0 -0
  34. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_version.py +0 -0
  35. {hydraflow-0.2.7 → hydraflow-0.2.9}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.2.7"
7
+ version = "0.2.9"
8
8
  description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -63,7 +63,7 @@ asyncio_default_fixture_loop_scope = "function"
63
63
  exclude_lines = ["no cov", "raise NotImplementedError", "if TYPE_CHECKING:"]
64
64
 
65
65
  [tool.ruff]
66
- line-length = 100
66
+ line-length = 88
67
67
  target-version = "py312"
68
68
 
69
69
  [tool.ruff.lint]
@@ -1,11 +1,12 @@
1
1
  from .context import chdir_artifact, log_run, start_run, watch
2
- from .info import load_config
3
- from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
4
- from .run_collection import (
5
- RunCollection,
2
+ from .info import get_artifact_dir, get_hydra_output_dir, load_config
3
+ from .mlflow import (
6
4
  list_runs,
7
5
  search_runs,
6
+ set_experiment,
8
7
  )
8
+ from .progress import multi_tasks_progress, parallel_progress
9
+ from .run_collection import RunCollection
9
10
 
10
11
  __all__ = [
11
12
  "RunCollection",
@@ -15,6 +16,8 @@ __all__ = [
15
16
  "list_runs",
16
17
  "load_config",
17
18
  "log_run",
19
+ "multi_tasks_progress",
20
+ "parallel_progress",
18
21
  "search_runs",
19
22
  "set_experiment",
20
23
  "start_run",
@@ -41,7 +41,9 @@ async def execute_command(
41
41
  int: The return code of the process.
42
42
  """
43
43
  try:
44
- process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
44
+ process = await asyncio.create_subprocess_exec(
45
+ program, *args, stdout=PIPE, stderr=PIPE
46
+ )
45
47
  await asyncio.gather(
46
48
  process_stream(process.stdout, stdout),
47
49
  process_stream(process.stderr, stderr),
@@ -100,7 +102,9 @@ async def monitor_file_changes(
100
102
  """
101
103
  str_paths = [str(path) for path in paths]
102
104
  try:
103
- async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
105
+ async for changes in watchfiles.awatch(
106
+ *str_paths, stop_event=stop_event, **awatch_kwargs
107
+ ):
104
108
  callback(changes)
105
109
  except Exception as e:
106
110
  logger.error(f"Error watching files: {e}")
@@ -129,7 +133,9 @@ async def run_and_monitor(
129
133
  """
130
134
  stop_event = asyncio.Event()
131
135
  run_task = asyncio.create_task(
132
- execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
136
+ execute_command(
137
+ program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
138
+ )
133
139
  )
134
140
  if watch and paths:
135
141
  monitor_task = asyncio.create_task(
@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
14
14
 
15
15
  import mlflow
16
16
  from hydra.core.hydra_config import HydraConfig
17
- from watchdog.events import FileModifiedEvent, FileSystemEventHandler
17
+ from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
18
18
  from watchdog.observers import Observer
19
19
 
20
- from hydraflow.mlflow import get_artifact_dir, log_params
20
+ from hydraflow.info import get_artifact_dir
21
+ from hydraflow.mlflow import log_params
21
22
 
22
23
  if TYPE_CHECKING:
23
24
  from collections.abc import Callable, Iterator
@@ -68,7 +69,7 @@ def log_run(
68
69
  mlflow.log_artifact(local_path)
69
70
 
70
71
  try:
71
- with watch(log_artifact, output_dir):
72
+ with watch(log_artifact, output_dir, ignore_log=False):
72
73
  yield
73
74
 
74
75
  except Exception as e:
@@ -140,9 +141,11 @@ def start_run(
140
141
 
141
142
  @contextmanager
142
143
  def watch(
143
- func: Callable[[Path], None],
144
+ callback: Callable[[Path], None],
144
145
  dir: Path | str = "",
145
146
  timeout: int = 60,
147
+ ignore_patterns: list[str] | None = None,
148
+ ignore_log: bool = True,
146
149
  ) -> Iterator[None]:
147
150
  """
148
151
  Watch the given directory for changes and call the provided function
@@ -154,7 +157,7 @@ def watch(
154
157
  period or until the context is exited.
155
158
 
156
159
  Args:
157
- func (Callable[[Path], None]): The function to call when a change is
160
+ callback (Callable[[Path], None]): The function to call when a change is
158
161
  detected. It should accept a single argument of type `Path`,
159
162
  which is the path of the modified file.
160
163
  dir (Path | str): The directory to watch. If not specified,
@@ -174,7 +177,7 @@ def watch(
174
177
  if isinstance(dir, Path):
175
178
  dir = dir.as_posix()
176
179
 
177
- handler = Handler(func)
180
+ handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
178
181
  observer = Observer()
179
182
  observer.schedule(handler, dir, recursive=True)
180
183
  observer.start()
@@ -198,10 +201,23 @@ def watch(
198
201
  observer.join()
199
202
 
200
203
 
201
- class Handler(FileSystemEventHandler):
202
- def __init__(self, func: Callable[[Path], None]) -> None:
204
+ class Handler(PatternMatchingEventHandler):
205
+ def __init__(
206
+ self,
207
+ func: Callable[[Path], None],
208
+ ignore_patterns: list[str] | None = None,
209
+ ignore_log: bool = True,
210
+ ) -> None:
203
211
  self.func = func
204
212
 
213
+ if ignore_log:
214
+ if ignore_patterns:
215
+ ignore_patterns.append("*.log")
216
+ else:
217
+ ignore_patterns = ["*.log"]
218
+
219
+ super().__init__(ignore_patterns=ignore_patterns)
220
+
205
221
  def on_modified(self, event: FileModifiedEvent) -> None:
206
222
  file = Path(str(event.src_path))
207
223
  if file.is_file():
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from pathlib import Path
3
4
  from typing import TYPE_CHECKING
4
5
 
6
+ import mlflow
7
+ from hydra.core.hydra_config import HydraConfig
8
+ from mlflow.tracking import artifact_utils
5
9
  from omegaconf import DictConfig, OmegaConf
6
10
 
7
- from hydraflow.mlflow import get_artifact_dir
8
-
9
11
  if TYPE_CHECKING:
10
- from pathlib import Path
11
-
12
12
  from mlflow.entities import Run
13
13
 
14
14
  from hydraflow.run_collection import RunCollection
@@ -43,6 +43,59 @@ class RunCollectionInfo:
43
43
  return [load_config(run) for run in self._runs]
44
44
 
45
45
 
46
+ def get_artifact_dir(run: Run | None = None) -> Path:
47
+ """
48
+ Retrieve the artifact directory for the given run.
49
+
50
+ This function uses MLflow to get the artifact directory for the given run.
51
+
52
+ Args:
53
+ run (Run | None): The run object. Defaults to None.
54
+
55
+ Returns:
56
+ The local path to the directory where the artifacts are downloaded.
57
+ """
58
+ if run is None:
59
+ uri = mlflow.get_artifact_uri()
60
+ else:
61
+ uri = artifact_utils.get_artifact_uri(run.info.run_id)
62
+
63
+ return Path(mlflow.artifacts.download_artifacts(uri))
64
+
65
+
66
+ def get_hydra_output_dir(run: Run | None = None) -> Path:
67
+ """
68
+ Retrieve the Hydra output directory for the given run.
69
+
70
+ This function returns the Hydra output directory. If no run is provided,
71
+ it retrieves the output directory from the current Hydra configuration.
72
+ If a run is provided, it retrieves the artifact path for the run, loads
73
+ the Hydra configuration from the downloaded artifacts, and returns the
74
+ output directory specified in that configuration.
75
+
76
+ Args:
77
+ run (Run | None): The run object. Defaults to None.
78
+
79
+ Returns:
80
+ Path: The path to the Hydra output directory.
81
+
82
+ Raises:
83
+ FileNotFoundError: If the Hydra configuration file is not found
84
+ in the artifacts.
85
+ """
86
+ if run is None:
87
+ hc = HydraConfig.get()
88
+ return Path(hc.runtime.output_dir)
89
+
90
+ path = get_artifact_dir(run) / ".hydra/hydra.yaml"
91
+
92
+ if path.exists():
93
+ hc = OmegaConf.load(path)
94
+ return Path(hc.hydra.runtime.output_dir)
95
+
96
+ raise FileNotFoundError
97
+
98
+
46
99
  def load_config(run: Run) -> DictConfig:
47
100
  """
48
101
  Load the configuration for a given run.
@@ -0,0 +1,175 @@
1
+ """
2
+ This module provides functionality to log parameters from Hydra configuration objects
3
+ and set up experiments using MLflow. It includes methods for managing experiments,
4
+ searching for runs, and logging parameters and artifacts.
5
+
6
+ Key Features:
7
+ - **Experiment Management**: Set and manage MLflow experiments with customizable names
8
+ based on Hydra configuration.
9
+ - **Run Logging**: Log parameters and metrics from Hydra configuration objects to
10
+ MLflow, ensuring that all relevant information is captured during experiments.
11
+ - **Run Search**: Search for runs based on various criteria, allowing for flexible
12
+ retrieval of experiment results.
13
+ - **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
14
+ easy access to outputs generated during experiments.
15
+
16
+ This module is designed to integrate seamlessly with Hydra, providing a robust
17
+ solution for tracking machine learning experiments and their associated metadata.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING
24
+
25
+ import mlflow
26
+ from hydra.core.hydra_config import HydraConfig
27
+ from mlflow.entities import ViewType
28
+ from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
29
+
30
+ from hydraflow.config import iter_params
31
+ from hydraflow.run_collection import RunCollection
32
+
33
+ if TYPE_CHECKING:
34
+ from mlflow.entities.experiment import Experiment
35
+
36
+
37
+ def set_experiment(
38
+ prefix: str = "",
39
+ suffix: str = "",
40
+ uri: str | Path | None = None,
41
+ ) -> Experiment:
42
+ """
43
+ Sets the experiment name and tracking URI optionally.
44
+
45
+ This function sets the experiment name by combining the given prefix,
46
+ the job name from HydraConfig, and the given suffix. Optionally, it can
47
+ also set the tracking URI.
48
+
49
+ Args:
50
+ prefix (str): The prefix to prepend to the experiment name.
51
+ suffix (str): The suffix to append to the experiment name.
52
+ uri (str | Path | None): The tracking URI to use. Defaults to None.
53
+
54
+ Returns:
55
+ Experiment: An instance of `mlflow.entities.Experiment` representing
56
+ the new active experiment.
57
+ """
58
+ if uri is not None:
59
+ mlflow.set_tracking_uri(uri)
60
+
61
+ hc = HydraConfig.get()
62
+ name = f"{prefix}{hc.job.name}{suffix}"
63
+ return mlflow.set_experiment(name)
64
+
65
+
66
+ def log_params(config: object, *, synchronous: bool | None = None) -> None:
67
+ """
68
+ Log the parameters from the given configuration object.
69
+
70
+ This method logs the parameters from the provided configuration object
71
+ using MLflow. It iterates over the parameters and logs them using the
72
+ `mlflow.log_param` method.
73
+
74
+ Args:
75
+ config (object): The configuration object to log the parameters from.
76
+ synchronous (bool | None): Whether to log the parameters synchronously.
77
+ Defaults to None.
78
+ """
79
+ for key, value in iter_params(config):
80
+ mlflow.log_param(key, value, synchronous=synchronous)
81
+
82
+
83
+ def search_runs(
84
+ experiment_ids: list[str] | None = None,
85
+ filter_string: str = "",
86
+ run_view_type: int = ViewType.ACTIVE_ONLY,
87
+ max_results: int = SEARCH_MAX_RESULTS_PANDAS,
88
+ order_by: list[str] | None = None,
89
+ search_all_experiments: bool = False,
90
+ experiment_names: list[str] | None = None,
91
+ ) -> RunCollection:
92
+ """
93
+ Search for Runs that fit the specified criteria.
94
+
95
+ This function wraps the `mlflow.search_runs` function and returns the
96
+ results as a `RunCollection` object. It allows for flexible searching of
97
+ MLflow runs based on various criteria.
98
+
99
+ Note:
100
+ The returned runs are sorted by their start time in ascending order.
101
+
102
+ Args:
103
+ experiment_ids (list[str] | None): List of experiment IDs. Search can
104
+ work with experiment IDs or experiment names, but not both in the
105
+ same call. Values other than ``None`` or ``[]`` will result in
106
+ error if ``experiment_names`` is also not ``None`` or ``[]``.
107
+ ``None`` will default to the active experiment if ``experiment_names``
108
+ is ``None`` or ``[]``.
109
+ filter_string (str): Filter query string, defaults to searching all
110
+ runs.
111
+ run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
112
+ or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
113
+ max_results (int): The maximum number of runs to put in the dataframe.
114
+ Default is 100,000 to avoid causing out-of-memory issues on the user's
115
+ machine.
116
+ order_by (list[str] | None): List of columns to order by (e.g.,
117
+ "metrics.rmse"). The ``order_by`` column can contain an optional
118
+ ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
119
+ ordering is to sort by ``start_time DESC``, then ``run_id``.
120
+ ``start_time DESC``, then ``run_id``.
121
+ search_all_experiments (bool): Boolean specifying whether all
122
+ experiments should be searched. Only honored if ``experiment_ids``
123
+ is ``[]`` or ``None``.
124
+ experiment_names (list[str] | None): List of experiment names. Search
125
+ can work with experiment IDs or experiment names, but not both in
126
+ the same call. Values other than ``None`` or ``[]`` will result in
127
+ error if ``experiment_ids`` is also not ``None`` or ``[]``.
128
+ ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
129
+ default to the active experiment if ``experiment_ids`` is ``None``
130
+ or ``[]``.
131
+
132
+ Returns:
133
+ A `RunCollection` object containing the search results.
134
+ """
135
+ runs = mlflow.search_runs(
136
+ experiment_ids=experiment_ids,
137
+ filter_string=filter_string,
138
+ run_view_type=run_view_type,
139
+ max_results=max_results,
140
+ order_by=order_by,
141
+ output_format="list",
142
+ search_all_experiments=search_all_experiments,
143
+ experiment_names=experiment_names,
144
+ )
145
+ runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
146
+ return RunCollection(runs) # type: ignore
147
+
148
+
149
+ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
150
+ """
151
+ List all runs for the specified experiments.
152
+
153
+ This function retrieves all runs for the given list of experiment names.
154
+ If no experiment names are provided (None), it defaults to searching all runs
155
+ for the currently active experiment. If an empty list is provided, the function
156
+ will search all runs for all experiments except the "Default" experiment.
157
+ The function returns the results as a `RunCollection` object.
158
+
159
+ Note:
160
+ The returned runs are sorted by their start time in ascending order.
161
+
162
+ Args:
163
+ experiment_names (list[str] | None): List of experiment names to search
164
+ for runs. If None or an empty list is provided, the function will
165
+ search the currently active experiment or all experiments except
166
+ the "Default" experiment.
167
+
168
+ Returns:
169
+ A `RunCollection` object containing the runs for the specified experiments.
170
+ """
171
+ if experiment_names == []:
172
+ experiments = mlflow.search_experiments()
173
+ experiment_names = [e.name for e in experiments if e.name != "Default"]
174
+
175
+ return search_runs(experiment_names=experiment_names)
@@ -0,0 +1,202 @@
1
+ """
2
+ Module for managing progress tracking in parallel processing using Joblib
3
+ and Rich's Progress bar.
4
+
5
+ Provide context managers and functions to facilitate the execution
6
+ of tasks in parallel while displaying progress updates.
7
+
8
+ The following key components are provided:
9
+
10
+ - JoblibProgress: A context manager for tracking progress with Rich's Progress
11
+ bar.
12
+ - parallel_progress: A function to execute a given function in parallel over
13
+ an iterable with progress tracking.
14
+ - multi_tasks_progress: A function to render auto-updating progress bars for
15
+ multiple tasks concurrently.
16
+
17
+ Usage:
18
+ Import the necessary functions and use them to manage progress in your
19
+ parallel processing tasks.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from contextlib import contextmanager
25
+ from typing import TYPE_CHECKING, TypeVar
26
+
27
+ import joblib
28
+ from rich.progress import Progress
29
+
30
+ if TYPE_CHECKING:
31
+ from collections.abc import Callable, Iterable, Iterator
32
+
33
+ from rich.progress import ProgressColumn
34
+
35
+
36
+ # https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
37
+ @contextmanager
38
+ def JoblibProgress(
39
+ *columns: ProgressColumn | str,
40
+ description: str | None = None,
41
+ total: int | None = None,
42
+ **kwargs,
43
+ ) -> Iterator[Progress]:
44
+ """
45
+ Context manager for tracking progress using Joblib with Rich's Progress bar.
46
+
47
+ Args:
48
+ *columns (ProgressColumn | str): Columns to display in the progress bar.
49
+ description (str | None, optional): A description for the progress task.
50
+ Defaults to None.
51
+ total (int | None, optional): The total number of tasks. If None, it will
52
+ be determined automatically.
53
+ **kwargs: Additional keyword arguments passed to the Progress instance.
54
+
55
+ Yields:
56
+ Progress: A Progress instance for managing the progress bar.
57
+
58
+ Example:
59
+ with JoblibProgress("task", total=100) as progress:
60
+ # Your parallel processing code here
61
+ """
62
+ if not columns:
63
+ columns = Progress.get_default_columns()
64
+
65
+ progress = Progress(*columns, **kwargs)
66
+
67
+ if description is None:
68
+ description = "Processing..."
69
+
70
+ task_id = progress.add_task(description, total=total)
71
+ print_progress = joblib.parallel.Parallel.print_progress
72
+
73
+ def update_progress(self: joblib.parallel.Parallel):
74
+ progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
75
+ return print_progress(self)
76
+
77
+ try:
78
+ joblib.parallel.Parallel.print_progress = update_progress
79
+ progress.start()
80
+ yield progress
81
+
82
+ finally:
83
+ progress.stop()
84
+ joblib.parallel.Parallel.print_progress = print_progress
85
+
86
+
87
+ T = TypeVar("T")
88
+ U = TypeVar("U")
89
+
90
+
91
+ def parallel_progress(
92
+ func: Callable[[T], U],
93
+ iterable: Iterable[T],
94
+ *columns: ProgressColumn | str,
95
+ n_jobs: int = -1,
96
+ description: str | None = None,
97
+ **kwargs,
98
+ ) -> list[U]:
99
+ """
100
+ Execute a function in parallel over an iterable with progress tracking.
101
+
102
+ Args:
103
+ func (Callable[[T], U]): The function to execute on each item in the
104
+ iterable.
105
+ iterable (Iterable[T]): An iterable of items to process.
106
+ *columns (ProgressColumn | str): Additional columns to display in the
107
+ progress bar.
108
+ n_jobs (int, optional): The number of jobs to run in parallel.
109
+ Defaults to -1 (all processors).
110
+ description (str | None, optional): A description for the progress bar.
111
+ Defaults to None.
112
+ **kwargs: Additional keyword arguments passed to the Progress instance.
113
+
114
+ Returns:
115
+ list[U]: A list of results from applying the function to each item in
116
+ the iterable.
117
+ """
118
+ iterable = list(iterable)
119
+ total = len(iterable)
120
+
121
+ with JoblibProgress(*columns, description=description, total=total, **kwargs):
122
+ it = (joblib.delayed(func)(x) for x in iterable)
123
+ return joblib.Parallel(n_jobs=n_jobs)(it) # type: ignore
124
+
125
+
126
+ def multi_tasks_progress(
127
+ iterables: Iterable[Iterable[int | tuple[int, int]]],
128
+ *columns: ProgressColumn | str,
129
+ n_jobs: int = -1,
130
+ description: str = "#{:0>3}",
131
+ main_description: str = "main",
132
+ transient: bool | None = None,
133
+ **kwargs,
134
+ ) -> None:
135
+ """
136
+ Render auto-updating progress bars for multiple tasks concurrently.
137
+
138
+ Args:
139
+ iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
140
+ iterables, each representing a task. Each iterable can yield
141
+ integers (completed) or tuples of integers (completed, total).
142
+ *columns (ProgressColumn | str): Additional columns to display in the
143
+ progress bars.
144
+ n_jobs (int, optional): Number of jobs to run in parallel. Defaults to
145
+ -1, which means using all processors.
146
+ description (str, optional): Format string for describing tasks. Defaults to
147
+ "#{:0>3}".
148
+ main_description (str, optional): Description for the main task.
149
+ Defaults to "main".
150
+ transient (bool | None, optional): Whether to remove the progress bar
151
+ after completion. Defaults to None.
152
+ **kwargs: Additional keyword arguments passed to the Progress instance.
153
+
154
+ Returns:
155
+ None
156
+ """
157
+ if not columns:
158
+ columns = Progress.get_default_columns()
159
+
160
+ iterables = list(iterables)
161
+
162
+ with Progress(*columns, transient=transient or False, **kwargs) as progress:
163
+ n = len(iterables)
164
+
165
+ task_main = progress.add_task(main_description, total=None) if n > 1 else None
166
+ tasks = [
167
+ progress.add_task(description.format(i), start=False, total=None)
168
+ for i in range(n)
169
+ ]
170
+
171
+ total = {}
172
+ completed = {}
173
+
174
+ def func(i: int) -> None:
175
+ completed[i] = 0
176
+ total[i] = None
177
+ progress.start_task(tasks[i])
178
+
179
+ for index in iterables[i]:
180
+ if isinstance(index, tuple):
181
+ completed[i], total[i] = index[0] + 1, index[1]
182
+ else:
183
+ completed[i] = index + 1
184
+
185
+ progress.update(tasks[i], total=total[i], completed=completed[i])
186
+ if task_main is not None:
187
+ if all(t is not None for t in total.values()):
188
+ t = sum(total.values())
189
+ else:
190
+ t = None
191
+ c = sum(completed.values())
192
+ progress.update(task_main, total=t, completed=c)
193
+
194
+ if transient or n > 1:
195
+ progress.remove_task(tasks[i])
196
+
197
+ if n > 1:
198
+ it = (joblib.delayed(func)(i) for i in range(n))
199
+ joblib.Parallel(n_jobs, prefer="threads")(it)
200
+
201
+ else:
202
+ func(0)