PyPI - hydraflow - Versions diffs - 0.2.8__tar.gz → 0.2.10__tar.gz - Mend

hydraflow 0.2.8tar.gz → 0.2.10tar.gz

Files changed (33) hide show

{hydraflow-0.2.8 → hydraflow-0.2.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.8
+Version: 0.2.10
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.2.8 → hydraflow-0.2.10}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.8"
+version = "0.2.10"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
@@ -63,7 +63,7 @@ asyncio_default_fixture_loop_scope = "function"
 exclude_lines = ["no cov", "raise NotImplementedError", "if TYPE_CHECKING:"]
 [tool.ruff]
-line-length = 100
+line-length = 88
 target-version = "py312"
 [tool.ruff.lint]

{hydraflow-0.2.8 → hydraflow-0.2.10}/src/hydraflow/__init__.py RENAMED Viewed

@@ -5,6 +5,7 @@ from .mlflow import (
     search_runs,
     set_experiment,
 )
+from .progress import multi_tasks_progress, parallel_progress
 from .run_collection import RunCollection
 __all__ = [
@@ -15,6 +16,8 @@ __all__ = [
     "list_runs",
     "load_config",
     "log_run",
+    "multi_tasks_progress",
+    "parallel_progress",
     "search_runs",
     "set_experiment",
     "start_run",

{hydraflow-0.2.8 → hydraflow-0.2.10}/src/hydraflow/asyncio.py RENAMED Viewed

@@ -41,7 +41,9 @@ async def execute_command(
         int: The return code of the process.
     """
     try:
-        process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
+        process = await asyncio.create_subprocess_exec(
+            program, *args, stdout=PIPE, stderr=PIPE
+        )
         await asyncio.gather(
             process_stream(process.stdout, stdout),
             process_stream(process.stderr, stderr),
@@ -100,7 +102,9 @@ async def monitor_file_changes(
     """
     str_paths = [str(path) for path in paths]
     try:
-        async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
+        async for changes in watchfiles.awatch(
+            *str_paths, stop_event=stop_event, **awatch_kwargs
+        ):
             callback(changes)
     except Exception as e:
         logger.error(f"Error watching files: {e}")
@@ -129,7 +133,9 @@ async def run_and_monitor(
     """
     stop_event = asyncio.Event()
     run_task = asyncio.create_task(
-        execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
+        execute_command(
+            program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
+        )
     )
     if watch and paths:
         monitor_task = asyncio.create_task(

hydraflow-0.2.10/src/hydraflow/progress.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""
+Module for managing progress tracking in parallel processing using Joblib
+and Rich's Progress bar.
+Provide context managers and functions to facilitate the execution
+of tasks in parallel while displaying progress updates.
+The following key components are provided:
+- JoblibProgress: A context manager for tracking progress with Rich's Progress
+    bar.
+- parallel_progress: A function to execute a given function in parallel over
+    an iterable with progress tracking.
+- multi_tasks_progress: A function to render auto-updating progress bars for
+    multiple tasks concurrently.
+Usage:
+    Import the necessary functions and use them to manage progress in your
+    parallel processing tasks.
+"""
+from __future__ import annotations
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, TypeVar
+import joblib
+from rich.progress import Progress
+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterable, Iterator
+    from rich.progress import ProgressColumn
+# https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
+@contextmanager
+def JoblibProgress(
+    *columns: ProgressColumn | str,
+    description: str | None = None,
+    total: int | None = None,
+    **kwargs,
+) -> Iterator[Progress]:
+    """
+    Context manager for tracking progress using Joblib with Rich's Progress bar.
+    Args:
+        *columns (ProgressColumn | str): Columns to display in the progress bar.
+        description (str | None, optional): A description for the progress task.
+            Defaults to None.
+        total (int | None, optional): The total number of tasks. If None, it will
+            be determined automatically.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Yields:
+        Progress: A Progress instance for managing the progress bar.
+    Example:
+        with JoblibProgress("task", total=100) as progress:
+            # Your parallel processing code here
+    """
+    if not columns:
+        columns = Progress.get_default_columns()
+    progress = Progress(*columns, **kwargs)
+    if description is None:
+        description = "Processing..."
+    task_id = progress.add_task(description, total=total)
+    print_progress = joblib.parallel.Parallel.print_progress
+    def update_progress(self: joblib.parallel.Parallel):
+        progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
+        return print_progress(self)
+    try:
+        joblib.parallel.Parallel.print_progress = update_progress
+        progress.start()
+        yield progress
+    finally:
+        progress.stop()
+        joblib.parallel.Parallel.print_progress = print_progress
+T = TypeVar("T")
+U = TypeVar("U")
+def parallel_progress(
+    func: Callable[[T], U],
+    iterable: Iterable[T],
+    *columns: ProgressColumn | str,
+    n_jobs: int = -1,
+    description: str | None = None,
+    **kwargs,
+) -> list[U]:
+    """
+    Execute a function in parallel over an iterable with progress tracking.
+    Args:
+        func (Callable[[T], U]): The function to execute on each item in the
+            iterable.
+        iterable (Iterable[T]): An iterable of items to process.
+        *columns (ProgressColumn | str): Additional columns to display in the
+            progress bar.
+        n_jobs (int, optional): The number of jobs to run in parallel.
+            Defaults to -1 (all processors).
+        description (str | None, optional): A description for the progress bar.
+            Defaults to None.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Returns:
+        list[U]: A list of results from applying the function to each item in
+        the iterable.
+    """
+    iterable = list(iterable)
+    total = len(iterable)
+    with JoblibProgress(*columns, description=description, total=total, **kwargs):
+        it = (joblib.delayed(func)(x) for x in iterable)
+        return joblib.Parallel(n_jobs=n_jobs)(it)  # type: ignore
+def multi_tasks_progress(
+    iterables: Iterable[Iterable[int | tuple[int, int]]],
+    *columns: ProgressColumn | str,
+    n_jobs: int = -1,
+    description: str = "#{:0>3}",
+    main_description: str = "main",
+    transient: bool | None = None,
+    **kwargs,
+) -> None:
+    """
+    Render auto-updating progress bars for multiple tasks concurrently.
+    Args:
+        iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
+            iterables, each representing a task. Each iterable can yield
+            integers (completed) or tuples of integers (completed, total).
+        *columns (ProgressColumn | str): Additional columns to display in the
+            progress bars.
+        n_jobs (int, optional): Number of jobs to run in parallel. Defaults to
+            -1, which means using all processors.
+        description (str, optional): Format string for describing tasks. Defaults to
+            "#{:0>3}".
+        main_description (str, optional): Description for the main task.
+            Defaults to "main".
+        transient (bool | None, optional): Whether to remove the progress bar
+            after completion. Defaults to None.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Returns:
+        None
+    """
+    if not columns:
+        columns = Progress.get_default_columns()
+    iterables = list(iterables)
+    with Progress(*columns, transient=transient or False, **kwargs) as progress:
+        task_main = progress.add_task(main_description, total=None)
+        total = {}
+        completed = {}
+        def func(i: int, iterable: Iterable[int | tuple[int, int]]) -> None:
+            task_id = progress.add_task(description.format(i), total=None)
+            completed[i] = 0
+            total[i] = None
+            for index in iterable:
+                if isinstance(index, tuple):
+                    completed[i], total[i] = index[0] + 1, index[1]
+                else:
+                    completed[i] = index + 1
+                progress.update(task_id, total=total[i], completed=completed[i])
+                if all(t is not None for t in total.values()):
+                    t = sum(total.values())
+                else:
+                    t = None
+                c = sum(completed.values())
+                progress.update(task_main, total=t, completed=c)
+            if transient is not False:
+                progress.remove_task(task_id)
+        it = (joblib.delayed(func)(i, it) for i, it in enumerate(iterables))
+        joblib.Parallel(n_jobs, prefer="threads")(it)

{hydraflow-0.2.8 → hydraflow-0.2.10}/src/hydraflow/run_collection.py RENAMED Viewed

@@ -468,7 +468,9 @@ class RunCollection:
         """
         return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
-    def group_by(self, *names: str | list[str]) -> dict[tuple[str | None, ...], RunCollection]:
+    def group_by(
+        self, *names: str | list[str]
+    ) -> dict[tuple[str | None, ...], RunCollection]:
         """
         Group runs by specified parameter names.
@@ -493,25 +495,6 @@ class RunCollection:
         return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
-    def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
-        """
-        Group runs by specified parameter names.
-        This method groups the runs in the collection based on the values of the
-        specified parameters. Each unique combination of parameter values will
-        form a separate RunCollection in the returned list.
-        Args:
-            *names (str | list[str]): The names of the parameters to group by.
-                This can be a single parameter name or multiple names provided
-                as separate arguments or as a list.
-        Returns:
-            list[RunCollection]: A list of RunCollection objects, where each
-            object contains runs that match the specified parameter values.
-        """
-        return list(self.group_by(*names).values())
 def _param_matches(run: Run, key: str, value: Any) -> bool:
     """
@@ -671,7 +654,9 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
     return filtered_runs[-1]
-def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+def try_find_last_run(
+    runs: list[Run], config: object | None = None, **kwargs
+) -> Run | None:
     """
     Find the last run based on the provided configuration.

hydraflow-0.2.10/tests/scripts/progress.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+import random
+import time
+from rich.progress import (
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TimeElapsedColumn,
+)
+from hydraflow import multi_tasks_progress, parallel_progress
+def test_parallel_progress(**kwargs):
+    def func(x: int) -> str:
+        time.sleep(1)
+        return f"result: {x}"
+    it = range(12)
+    columns = [
+        SpinnerColumn(),
+        *Progress.get_default_columns(),
+        MofNCompleteColumn(),
+        TimeElapsedColumn(),
+    ]
+    parallel_progress(func, it, *columns, n_jobs=-1, **kwargs)
+def task(total):
+    for i in range(total or 90):
+        if total is None:
+            yield i
+        else:
+            yield i, total
+        time.sleep(random.random() / 30)
+def test_multi_tasks_progress(total: bool, **kwargs):
+    tasks = (task(random.randint(80, 100)) for _ in range(4))
+    if total:
+        tasks = (task(None), *list(tasks)[:2], task(None))
+    columns = [
+        SpinnerColumn(),
+        *Progress.get_default_columns(),
+        MofNCompleteColumn(),
+        TimeElapsedColumn(),
+    ]
+    if total:
+        kwargs["main_description"] = "unknown"
+    multi_tasks_progress(tasks, *columns, n_jobs=4, **kwargs)
+if __name__ == "__main__":
+    test_parallel_progress(description="parallel")
+    test_parallel_progress(transient=True)
+    test_multi_tasks_progress(False)
+    test_multi_tasks_progress(True, transient=False)
+    test_multi_tasks_progress(False, transient=True)

{hydraflow-0.2.8 → hydraflow-0.2.10}/tests/test_app.py RENAMED Viewed

@@ -98,12 +98,3 @@ def test_app_group_by(rc: RunCollection):
     assert grouped[("x",)].info.params[1] == {"port": "2", "host": "x"}
     assert grouped[("y",)].info.params[0] == {"port": "1", "host": "y"}
     assert grouped[("y",)].info.params[1] == {"port": "2", "host": "y"}
-def test_app_group_by_values(rc: RunCollection):
-    grouped = rc.group_by_values("port")
-    assert len(grouped) == 2
-    assert grouped[0].info.params[0] == {"port": "1", "host": "x"}
-    assert grouped[0].info.params[1] == {"port": "1", "host": "y"}
-    assert grouped[1].info.params[0] == {"port": "2", "host": "x"}
-    assert grouped[1].info.params[1] == {"port": "2", "host": "y"}

{hydraflow-0.2.8 → hydraflow-0.2.10}/tests/test_progress.py RENAMED Viewed

@@ -8,5 +8,5 @@ import pytest
     sys.platform == "win32", reason="'cp932' codec can't encode character '\\u2807'"
 )
 def test_progress_bar():
-    cp = run([sys.executable, "-m", "hydraflow.progress"])
+    cp = run([sys.executable, "tests/scripts/progress.py"])
     assert cp.returncode == 0

hydraflow-0.2.8/src/hydraflow/progress.py DELETED Viewed

@@ -1,131 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING
-import joblib
-from rich.progress import Progress
-if TYPE_CHECKING:
-    from collections.abc import Iterable
-    from rich.progress import ProgressColumn
-def multi_task_progress(
-    iterables: Iterable[Iterable[int | tuple[int, int]]],
-    *columns: ProgressColumn | str,
-    n_jobs: int = -1,
-    description: str = "#{:0>3}",
-    main_description: str = "main",
-    transient: bool | None = None,
-    **kwargs,
-) -> None:
-    """
-    Render auto-updating progress bars for multiple tasks concurrently.
-    Args:
-        iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
-            iterables, each representing a task. Each iterable can yield
-            integers (completed) or tuples of integers (completed, total).
-        *columns (ProgressColumn | str): Additional columns to display in the
-            progress bars.
-        n_jobs (int, optional): Number of jobs to run in parallel. Defaults to
-            -1, which means using all processors.
-        description (str, optional): Format string for describing tasks. Defaults to
-            "#{:0>3}".
-        main_description (str, optional): Description for the main task.
-            Defaults to "main".
-        transient (bool | None, optional): Whether to remove the progress bar
-            after completion. Defaults to None.
-        **kwargs: Additional keyword arguments passed to the Progress instance.
-    Returns:
-        None
-    """
-    if not columns:
-        columns = Progress.get_default_columns()
-    iterables = list(iterables)
-    with Progress(*columns, transient=transient or False, **kwargs) as progress:
-        n = len(iterables)
-        task_main = progress.add_task(main_description, total=None) if n > 1 else None
-        tasks = [
-            progress.add_task(description.format(i), start=False, total=None) for i in range(n)
-        ]
-        total = {}
-        completed = {}
-        def func(i: int) -> None:
-            completed[i] = 0
-            total[i] = None
-            progress.start_task(tasks[i])
-            for index in iterables[i]:
-                if isinstance(index, tuple):
-                    completed[i], total[i] = index[0] + 1, index[1]
-                else:
-                    completed[i] = index + 1
-                progress.update(tasks[i], total=total[i], completed=completed[i])
-                if task_main is not None:
-                    if all(t is not None for t in total.values()):
-                        t = sum(total.values())
-                    else:
-                        t = None
-                    c = sum(completed.values())
-                    progress.update(task_main, total=t, completed=c)
-            if transient or n > 1:
-                progress.remove_task(tasks[i])
-        if n > 1:
-            it = (joblib.delayed(func)(i) for i in range(n))
-            joblib.Parallel(n_jobs, prefer="threads")(it)
-        else:
-            func(0)
-if __name__ == "__main__":
-    import random
-    import time
-    from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
-    from hydraflow.progress import multi_task_progress
-    def task(total):
-        for i in range(total or 90):
-            if total is None:
-                yield i
-            else:
-                yield i, total
-            time.sleep(random.random() / 30)
-    def multi_task_progress_test(unknown_total: bool):
-        tasks = [task(random.randint(80, 100)) for _ in range(4)]
-        if unknown_total:
-            tasks = [task(None), *tasks, task(None)]
-        columns = [
-            SpinnerColumn(),
-            *Progress.get_default_columns(),
-            MofNCompleteColumn(),
-            TimeElapsedColumn(),
-        ]
-        kwargs = {}
-        if unknown_total:
-            kwargs["main_description"] = "unknown"
-        multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
-    multi_task_progress_test(False)
-    multi_task_progress_test(True)
-    multi_task_progress([task(100)])
-    multi_task_progress([task(None)], description="unknown")
-    multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
-    multi_task_progress([task(100)], description="transient", transient=True)