hydraflow 0.14.4__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hydraflow/executor/job.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  This module provides functionality for executing jobs in HydraFlow, including:
4
4
 
5
- - Argument parsing and expansion for job steps
5
+ - Argument parsing and expansion for job parameter sets
6
6
  - Batch processing of Hydra configurations
7
7
  - Execution of jobs via shell commands or Python functions
8
8
 
@@ -11,8 +11,9 @@ The module supports two execution modes:
11
11
  1. Shell command execution
12
12
  2. Python function calls
13
13
 
14
- Each job can consist of multiple steps, and each step can have its own
15
- arguments and configurations that will be expanded into multiple runs.
14
+ Each job can consist of multiple parameter sets, and each parameter
15
+ set can have its own arguments and configurations that will be expanded
16
+ into multiple runs.
16
17
  """
17
18
 
18
19
  from __future__ import annotations
@@ -39,24 +40,24 @@ if TYPE_CHECKING:
39
40
  from .conf import Job
40
41
 
41
42
 
42
- def iter_args(batch: str, args: str) -> Iterator[list[str]]:
43
+ def iter_args(each: str, all_: str) -> Iterator[list[str]]:
43
44
  """Iterate over combinations generated from parsed arguments.
44
45
 
45
46
  Generate all possible combinations of arguments by parsing and
46
47
  expanding each one, yielding them as an iterator.
47
48
 
48
49
  Args:
49
- batch (str): The batch to parse.
50
- args (str): The arguments to parse.
50
+ each (str): The 'each' parameter to parse.
51
+ all_ (str): The 'all' parameter to parse.
51
52
 
52
53
  Yields:
53
54
  list[str]: a list of the parsed argument combinations.
54
55
 
55
56
  """
56
- args_ = collect(args)
57
+ all_params = collect(all_)
57
58
 
58
- for batch_ in expand(batch):
59
- yield [*batch_, *args_]
59
+ for each_params in expand(each):
60
+ yield [*each_params, *all_params]
60
61
 
61
62
 
62
63
  def iter_batches(job: Job) -> Iterator[list[str]]:
@@ -74,14 +75,40 @@ def iter_batches(job: Job) -> Iterator[list[str]]:
74
75
 
75
76
  """
76
77
  job_name = f"hydra.job.name={job.name}"
77
- job_configs = shlex.split(job.with_)
78
+ job_add = shlex.split(job.add)
78
79
 
79
- for step in job.steps:
80
- configs = shlex.split(step.with_) or job_configs
80
+ for set_ in job.sets:
81
+ add = merge_args(job_add, shlex.split(set_.add)) if set_.add else job_add
81
82
 
82
- for args in iter_args(step.batch, step.args):
83
+ for args in iter_args(set_.each, set_.all):
83
84
  sweep_dir = f"hydra.sweep.dir=multirun/{ulid.ULID()}"
84
- yield ["--multirun", *args, job_name, sweep_dir, *configs]
85
+ yield ["--multirun", *args, job_name, sweep_dir, *add]
86
+
87
+
88
+ def merge_args(first: list[str], second: list[str]) -> list[str]:
89
+ """Merge two lists of arguments.
90
+
91
+ This function merges two lists of arguments by checking for conflicts
92
+ and resolving them by keeping the values from the second list.
93
+
94
+ Args:
95
+ first (list[str]): The first list of arguments.
96
+ second (list[str]): The second list of arguments.
97
+
98
+ Returns:
99
+ list[str]: A merged list of arguments.
100
+
101
+ """
102
+ merged = {}
103
+
104
+ for item in [*first, *second]:
105
+ if "=" in item:
106
+ key, value = item.split("=", 1)
107
+ merged[key] = value
108
+ else:
109
+ merged[item] = None
110
+
111
+ return [k if v is None else f"{k}={v}" for k, v in merged.items()]
85
112
 
86
113
 
87
114
  @dataclass
@@ -165,25 +165,26 @@ SUFFIX_EXPONENT = {
165
165
 
166
166
 
167
167
  def _get_range(arg: str) -> tuple[float, float, float]:
168
+ """Return a tuple of (start, stop, step)."""
168
169
  args = [to_number(x) for x in arg.split(":")]
169
170
 
170
171
  if len(args) == 2:
171
172
  if args[0] > args[1]:
172
173
  raise ValueError("start cannot be greater than stop")
173
174
 
174
- return (args[0], 1, args[1])
175
+ return (args[0], args[1], 1)
175
176
 
176
- if args[1] == 0:
177
+ if args[2] == 0:
177
178
  raise ValueError("step cannot be zero")
178
- if args[1] > 0 and args[0] > args[2]:
179
+ if args[2] > 0 and args[0] > args[1]:
179
180
  raise ValueError("start cannot be greater than stop")
180
- if args[1] < 0 and args[0] < args[2]:
181
+ if args[2] < 0 and args[0] < args[1]:
181
182
  raise ValueError("start cannot be less than stop")
182
183
 
183
184
  return args[0], args[1], args[2]
184
185
 
185
186
 
186
- def _arange(start: float, step: float, stop: float) -> list[float]:
187
+ def _arange(start: float, stop: float, step: float) -> list[float]:
187
188
  """Generate a range of floating point numbers.
188
189
 
189
190
  This function generates a range of floating point numbers
@@ -191,8 +192,8 @@ def _arange(start: float, step: float, stop: float) -> list[float]:
191
192
 
192
193
  Args:
193
194
  start (float): The starting value.
194
- step (float): The step size.
195
195
  stop (float): The end value (inclusive).
196
+ step (float): The step size.
196
197
 
197
198
  Returns:
198
199
  list[float]: A list of floating point numbers from start to stop
@@ -323,7 +324,7 @@ def collect_parentheses(arg: str) -> list[str]:
323
324
  list[str]: A list of the collected values.
324
325
 
325
326
  Examples:
326
- >>> collect_parentheses("(1:3,5:2:9,20)k")
327
+ >>> collect_parentheses("(1:3,5:9:2,20)k")
327
328
  ['1e3', '2e3', '3e3', '5e3', '7e3', '9e3', '20e3']
328
329
  >>> collect_parentheses("2e(-1,-2,-3)")
329
330
  ['2e-1', '2e-2', '2e-3']
@@ -352,7 +353,7 @@ def collect_values(arg: str) -> list[str]:
352
353
  Examples:
353
354
  >>> collect_values("1:4")
354
355
  ['1', '2', '3', '4']
355
- >>> collect_values("1.2:0.1:1.4:k")
356
+ >>> collect_values("1.2:1.4:0.1:k")
356
357
  ['1.2e3', '1.3e3', '1.4e3']
357
358
  >>> collect_values("0.1")
358
359
  ['0.1']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydraflow
3
- Version: 0.14.4
3
+ Version: 0.15.1
4
4
  Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
5
5
  Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -36,40 +36,40 @@ Classifier: Intended Audience :: Science/Research
36
36
  Classifier: License :: OSI Approved :: MIT License
37
37
  Classifier: Operating System :: OS Independent
38
38
  Classifier: Programming Language :: Python
39
- Classifier: Programming Language :: Python :: 3.10
40
- Classifier: Programming Language :: Python :: 3.11
41
- Classifier: Programming Language :: Python :: 3.12
42
39
  Classifier: Programming Language :: Python :: 3.13
43
40
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
44
41
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
45
- Requires-Python: >=3.10
42
+ Requires-Python: >=3.13
46
43
  Requires-Dist: hydra-core>=1.3
44
+ Requires-Dist: joblib>=1.4.0
47
45
  Requires-Dist: mlflow>=2.15
48
46
  Requires-Dist: omegaconf>=2.3
47
+ Requires-Dist: polars>=1.26
49
48
  Requires-Dist: python-ulid>=3.0.0
50
49
  Requires-Dist: rich>=13.9
50
+ Requires-Dist: ruff>=0.11
51
51
  Requires-Dist: typer>=0.15
52
52
  Description-Content-Type: text/markdown
53
53
 
54
54
  # Hydraflow
55
55
 
56
56
  [![PyPI Version][pypi-v-image]][pypi-v-link]
57
- [![Python Version][python-v-image]][python-v-link]
58
57
  [![Build Status][GHAction-image]][GHAction-link]
59
58
  [![Coverage Status][codecov-image]][codecov-link]
60
59
  [![Documentation Status][docs-image]][docs-link]
60
+ [![Python Version][python-v-image]][python-v-link]
61
61
 
62
62
  <!-- Badges -->
63
63
  [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
64
64
  [pypi-v-link]: https://pypi.org/project/hydraflow/
65
- [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
66
- [python-v-link]: https://pypi.org/project/hydraflow
67
65
  [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
68
66
  [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
69
67
  [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
70
68
  [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
71
- [docs-image]: https://readthedocs.org/projects/hydraflow/badge/?version=latest
69
+ [docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
72
70
  [docs-link]: https://daizutabi.github.io/hydraflow/
71
+ [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
72
+ [python-v-link]: https://pypi.org/project/hydraflow
73
73
 
74
74
  ## Overview
75
75
 
@@ -101,6 +101,8 @@ You can install Hydraflow via pip:
101
101
  pip install hydraflow
102
102
  ```
103
103
 
104
+ **Requirements:** Python 3.13+
105
+
104
106
  ## Quick Start
105
107
 
106
108
  Here is a simple example to get you started with Hydraflow:
@@ -0,0 +1,21 @@
1
+ hydraflow/__init__.py,sha256=5ByA9ogtS5ZfIYIUSMUjMwAIpr6xGXEXmcABOu4O8RA,673
2
+ hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
3
+ hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ hydraflow/core/context.py,sha256=LFPNJxmuJQ2VUt-WBU07MC3ySbjlY8rRZ8VxuAih4o4,4148
6
+ hydraflow/core/io.py,sha256=ZBXIL_jlBUiCI0L_J6S5S4OwtBMvdVVMXnekzMuC_JA,4404
7
+ hydraflow/core/main.py,sha256=b9o6Rpn3uoXfDB8o0XZdl-g1yX2SKkOT12-H7lB8Les,5158
8
+ hydraflow/core/run.py,sha256=KqaMdRUBOzOU4vkrRUczCrPCsVx30-XUQ_e78B78BSU,12330
9
+ hydraflow/core/run_collection.py,sha256=pV3N83uBhmda9OeaNz1jqpF9z6A9j3jfUHtqy-uxCs4,15671
10
+ hydraflow/core/run_info.py,sha256=3dW9GgWnZZNwbXwMrw-85AqQ956zlQddUi9irSNLR5g,2550
11
+ hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
13
+ hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
14
+ hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
15
+ hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
16
+ hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
17
+ hydraflow-0.15.1.dist-info/METADATA,sha256=oC-UgH0sZKw2Ry1kBiMPpNobxzlLhmhQgS8W3TIvGJI,7238
18
+ hydraflow-0.15.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
+ hydraflow-0.15.1.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
20
+ hydraflow-0.15.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
21
+ hydraflow-0.15.1.dist-info/RECORD,,
hydraflow/core/config.py DELETED
@@ -1,122 +0,0 @@
1
- """Provide functionality for working with configuration objects using the OmegaConf."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import TYPE_CHECKING
6
-
7
- from omegaconf import DictConfig, ListConfig, OmegaConf
8
-
9
- if TYPE_CHECKING:
10
- from collections.abc import Iterator
11
- from typing import Any
12
-
13
-
14
- def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
15
- """Recursively iterate over the parameters in the given configuration object.
16
-
17
- This function traverses the configuration object and yields key-value pairs
18
- representing the parameters. The keys are prefixed with the provided prefix.
19
-
20
- Args:
21
- config (Any): The configuration object to iterate over. This can be a
22
- dictionary, list, DictConfig, or ListConfig.
23
- prefix (str): The prefix to prepend to the parameter keys.
24
- Defaults to an empty string.
25
-
26
- Yields:
27
- Key-value pairs representing the parameters in the configuration object.
28
-
29
- """
30
- if config is None:
31
- return
32
-
33
- if isinstance(config, list) and all(isinstance(x, str) for x in config):
34
- config = _from_dotlist(config)
35
-
36
- if not isinstance(config, DictConfig | ListConfig):
37
- config = OmegaConf.create(config)
38
-
39
- yield from _iter_params(config, prefix)
40
-
41
-
42
- def _from_dotlist(config: list[str]) -> dict[str, str]:
43
- result = {}
44
- for item in config:
45
- if "=" in item:
46
- key, value = item.split("=", 1)
47
- result[key.strip()] = value.strip()
48
-
49
- return result
50
-
51
-
52
- def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
53
- if isinstance(config, DictConfig):
54
- for key, value in config.items():
55
- if _is_param(value):
56
- yield f"{prefix}{key}", _convert(value)
57
-
58
- else:
59
- yield from _iter_params(value, f"{prefix}{key}.")
60
-
61
- elif isinstance(config, ListConfig):
62
- for index, value in enumerate(config):
63
- if _is_param(value):
64
- yield f"{prefix}{index}", _convert(value)
65
-
66
- else:
67
- yield from _iter_params(value, f"{prefix}{index}.")
68
-
69
-
70
- def _is_param(value: Any) -> bool:
71
- """Check if the given value is a parameter."""
72
- if isinstance(value, DictConfig):
73
- return False
74
-
75
- if isinstance(value, ListConfig):
76
- if any(isinstance(v, DictConfig | ListConfig) for v in value):
77
- return False
78
-
79
- return True
80
-
81
-
82
- def _convert(value: Any) -> Any:
83
- """Convert the given value to a Python object."""
84
- if isinstance(value, ListConfig):
85
- return list(value)
86
-
87
- return value
88
-
89
-
90
- def select_config(config: Any, names: list[str]) -> dict[str, Any]:
91
- """Select the given parameters from the configuration object.
92
-
93
- This function selects the given parameters from the configuration object
94
- and returns a new configuration object containing only the selected parameters.
95
-
96
- Args:
97
- config (Any): The configuration object to select parameters from.
98
- names (list[str]): The names of the parameters to select.
99
-
100
- Returns:
101
- DictConfig: A new configuration object containing only the selected parameters.
102
-
103
- """
104
- if not isinstance(config, DictConfig):
105
- config = OmegaConf.structured(config)
106
-
107
- return {name: _get(config, name) for name in names}
108
-
109
-
110
- def _get(config: DictConfig, name: str) -> Any:
111
- """Get the value of the given parameter from the configuration object."""
112
- if "." not in name:
113
- return config.get(name)
114
-
115
- prefix, name = name.split(".", 1)
116
- return _get(config.get(prefix), name)
117
-
118
-
119
- def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
120
- """Select the given overrides from the configuration object."""
121
- names = [override.split("=")[0].strip() for override in overrides]
122
- return select_config(config, names)
hydraflow/core/mlflow.py DELETED
@@ -1,174 +0,0 @@
1
- """Integration of MLflow experiment tracking with Hydra configuration management.
2
-
3
- This module provides functions to log parameters from Hydra configuration objects
4
- to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
5
- configuration management with MLflow's experiment tracking capabilities.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from typing import TYPE_CHECKING
11
-
12
- import joblib
13
-
14
- from hydraflow.core.io import file_uri_to_path, get_artifact_dir
15
- from hydraflow.entities.run_collection import RunCollection
16
-
17
- from .config import iter_params
18
-
19
- if TYPE_CHECKING:
20
- from pathlib import Path
21
- from typing import Any
22
-
23
-
24
- def log_params(config: Any, *, synchronous: bool | None = None) -> None:
25
- """Log the parameters from the given configuration object.
26
-
27
- This method logs the parameters from the provided configuration object
28
- using MLflow. It iterates over the parameters and logs them using the
29
- `mlflow.log_param` method.
30
-
31
- Args:
32
- config (Any): The configuration object to log the parameters from.
33
- synchronous (bool | None): Whether to log the parameters synchronously.
34
- Defaults to None.
35
-
36
- """
37
- import mlflow
38
-
39
- for key, value in iter_params(config):
40
- mlflow.log_param(key, value, synchronous=synchronous)
41
-
42
-
43
- def log_text(from_dir: Path, pattern: str = "*.log") -> None:
44
- """Log text files in the given directory as artifacts.
45
-
46
- Append the text files to the existing text file in the artifact directory.
47
-
48
- Args:
49
- from_dir (Path): The directory to find the logs in.
50
- pattern (str): The pattern to match the logs.
51
-
52
- """
53
- import mlflow
54
-
55
- artifact_dir = get_artifact_dir()
56
-
57
- for file in from_dir.glob(pattern):
58
- if not file.is_file():
59
- continue
60
-
61
- file_artifact = artifact_dir / file.name
62
- if file_artifact.exists():
63
- text = file_artifact.read_text()
64
- if not text.endswith("\n"):
65
- text += "\n"
66
- else:
67
- text = ""
68
-
69
- text += file.read_text()
70
- mlflow.log_text(text, file.name)
71
-
72
-
73
- def list_run_paths(
74
- experiment_names: str | list[str] | None = None,
75
- *other: str,
76
- ) -> list[Path]:
77
- """List all run paths for the specified experiments.
78
-
79
- This function retrieves all run paths for the given list of experiment names.
80
- If no experiment names are provided (None), the function will search all runs
81
- for all experiments except the "Default" experiment.
82
-
83
- Args:
84
- experiment_names (list[str] | None): List of experiment names to search
85
- for runs. If None is provided, the function will search all runs
86
- for all experiments except the "Default" experiment.
87
- *other (str): The parts of the run directory to join.
88
-
89
- Returns:
90
- list[Path]: A list of run paths for the specified experiments.
91
-
92
- """
93
- import mlflow
94
-
95
- if isinstance(experiment_names, str):
96
- experiment_names = [experiment_names]
97
-
98
- elif experiment_names is None:
99
- experiments = mlflow.search_experiments()
100
- experiment_names = [e.name for e in experiments if e.name != "Default"]
101
-
102
- run_paths: list[Path] = []
103
-
104
- for name in experiment_names:
105
- if experiment := mlflow.get_experiment_by_name(name):
106
- uri = experiment.artifact_location
107
-
108
- if isinstance(uri, str):
109
- path = file_uri_to_path(uri)
110
- run_paths.extend(p for p in path.iterdir() if p.is_dir())
111
-
112
- if other:
113
- return [p.joinpath(*other) for p in run_paths]
114
-
115
- return run_paths
116
-
117
-
118
- def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
119
- """List all run IDs for the specified experiments.
120
-
121
- This function retrieves all runs for the given list of experiment names.
122
- If no experiment names are provided (None), the function will search all
123
- runs for all experiments except the "Default" experiment.
124
-
125
- Args:
126
- experiment_names (list[str] | None): List of experiment names to search
127
- for runs. If None is provided, the function will search all runs
128
- for all experiments except the "Default" experiment.
129
-
130
- Returns:
131
- list[str]: A list of run IDs for the specified experiments.
132
-
133
- """
134
- return [run_path.stem for run_path in list_run_paths(experiment_names)]
135
-
136
-
137
- def list_runs(
138
- experiment_names: str | list[str] | None = None,
139
- n_jobs: int = 0,
140
- ) -> RunCollection:
141
- """List all runs for the specified experiments.
142
-
143
- This function retrieves all runs for the given list of experiment names.
144
- If no experiment names are provided (None), the function will search all runs
145
- for all experiments except the "Default" experiment.
146
- The function returns the results as a `RunCollection` object.
147
-
148
- Note:
149
- The returned runs are sorted by their start time in ascending order.
150
-
151
- Args:
152
- experiment_names (list[str] | None): List of experiment names to search
153
- for runs. If None is provided, the function will search all runs
154
- for all experiments except the "Default" experiment.
155
- n_jobs (int): The number of jobs to retrieve runs in parallel.
156
-
157
- Returns:
158
- RunCollection: A `RunCollection` instance containing the runs for the
159
- specified experiments.
160
-
161
- """
162
- import mlflow
163
-
164
- run_ids = list_run_ids(experiment_names)
165
-
166
- if n_jobs == 0:
167
- runs = [mlflow.get_run(run_id) for run_id in run_ids]
168
-
169
- else:
170
- it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
171
- runs = joblib.Parallel(n_jobs, backend="threading")(it)
172
-
173
- runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
174
- return RunCollection(runs) # type: ignore