hydraflow 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hydraflow/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from hydraflow.core.context import chdir_artifact, log_run, start_run
4
4
  from hydraflow.core.io import (
5
5
  get_artifact_dir,
6
+ get_experiment_names,
6
7
  iter_artifact_paths,
7
8
  iter_artifacts_dirs,
8
9
  iter_experiment_dirs,
@@ -17,6 +18,7 @@ __all__ = [
17
18
  "RunCollection",
18
19
  "chdir_artifact",
19
20
  "get_artifact_dir",
21
+ "get_experiment_names",
20
22
  "iter_artifact_paths",
21
23
  "iter_artifacts_dirs",
22
24
  "iter_experiment_dirs",
hydraflow/core/context.py CHANGED
@@ -38,11 +38,11 @@ def log_run(run: Run) -> Iterator[None]:
38
38
  import mlflow
39
39
 
40
40
  hc = HydraConfig.get()
41
- hydra_dir = Path(hc.runtime.output_dir)
41
+ hydra_output_dir = Path(hc.runtime.output_dir)
42
42
 
43
43
  # Save '.hydra' config directory.
44
- hydra_subdir = hydra_dir / (hc.output_subdir or "")
45
- mlflow.log_artifacts(hydra_subdir.as_posix(), hc.output_subdir)
44
+ hydra_dir = hydra_output_dir / (hc.output_subdir or "")
45
+ mlflow.log_artifacts(hydra_dir.as_posix(), ".hydra")
46
46
 
47
47
  try:
48
48
  yield
@@ -53,7 +53,7 @@ def log_run(run: Run) -> Iterator[None]:
53
53
  raise
54
54
 
55
55
  finally:
56
- log_text(run, hydra_dir)
56
+ log_text(run, hydra_output_dir)
57
57
 
58
58
 
59
59
  @contextmanager
hydraflow/core/io.py CHANGED
@@ -107,6 +107,12 @@ def predicate_experiment_dir(
107
107
  return experiment_names(name)
108
108
 
109
109
 
110
+ def get_experiment_names(tracking_dir: str | Path) -> list[str]:
111
+ """Get the experiment names from the tracking directory."""
112
+ names = [get_experiment_name(path) for path in Path(tracking_dir).iterdir()]
113
+ return [name for name in names if name is not None and name != "Default"]
114
+
115
+
110
116
  def iter_experiment_dirs(
111
117
  tracking_dir: str | Path,
112
118
  experiment_names: str | list[str] | Callable[[str], bool] | None = None,
hydraflow/core/main.py CHANGED
@@ -36,7 +36,8 @@ Example:
36
36
  from __future__ import annotations
37
37
 
38
38
  from functools import wraps
39
- from typing import TYPE_CHECKING, TypeVar
39
+ from pathlib import Path
40
+ from typing import TYPE_CHECKING
40
41
 
41
42
  import hydra
42
43
  from hydra.core.config_store import ConfigStore
@@ -48,23 +49,20 @@ from hydraflow.core.io import file_uri_to_path
48
49
 
49
50
  if TYPE_CHECKING:
50
51
  from collections.abc import Callable
51
- from pathlib import Path
52
52
  from typing import Any
53
53
 
54
54
  from mlflow.entities import Run
55
55
 
56
56
 
57
- T = TypeVar("T")
58
-
59
-
60
- def main(
61
- node: T | type[T],
57
+ def main[C](
58
+ node: C | type[C],
62
59
  config_name: str = "config",
63
60
  *,
64
61
  chdir: bool = False,
65
62
  force_new_run: bool = False,
66
63
  match_overrides: bool = False,
67
64
  rerun_finished: bool = False,
65
+ update: Callable[[C], C | None] | None = None,
68
66
  ):
69
67
  """Decorator for configuring and running MLflow experiments with Hydra.
70
68
 
@@ -83,6 +81,8 @@ def main(
83
81
  instead of full config. Defaults to False.
84
82
  rerun_finished: If True, allows rerunning completed runs. Defaults to
85
83
  False.
84
+ update: A function that takes a configuration and returns a new
85
+ configuration. Defaults to None.
86
86
 
87
87
  """
88
88
  import mlflow
@@ -90,21 +90,29 @@ def main(
90
90
 
91
91
  finished = RunStatus.to_string(RunStatus.FINISHED)
92
92
 
93
- def decorator(app: Callable[[Run, T], None]) -> Callable[[], None]:
93
+ def decorator(app: Callable[[Run, C], None]) -> Callable[[], None]:
94
94
  ConfigStore.instance().store(config_name, node)
95
95
 
96
96
  @hydra.main(config_name=config_name, version_base=None)
97
97
  @wraps(app)
98
- def inner_decorator(config: T) -> None:
98
+ def inner_decorator(cfg: C) -> None:
99
99
  hc = HydraConfig.get()
100
100
  experiment = mlflow.set_experiment(hc.job.name)
101
101
 
102
+ if update:
103
+ if cfg_ := update(cfg):
104
+ cfg = cfg_
105
+
106
+ hydra_dir = Path(hc.runtime.output_dir) / (hc.output_subdir or "")
107
+ cfg_path = hydra_dir.joinpath("config.yaml")
108
+ OmegaConf.save(cfg, cfg_path)
109
+
102
110
  if force_new_run:
103
111
  run_id = None
104
112
  else:
105
113
  uri = experiment.artifact_location
106
114
  overrides = hc.overrides.task if match_overrides else None
107
- run_id = get_run_id(uri, config, overrides)
115
+ run_id = get_run_id(uri, cfg, overrides)
108
116
 
109
117
  if run_id and not rerun_finished:
110
118
  run = mlflow.get_run(run_id)
@@ -112,7 +120,7 @@ def main(
112
120
  return
113
121
 
114
122
  with start_run(run_id=run_id, chdir=chdir) as run:
115
- app(run, config)
123
+ app(run, cfg)
116
124
 
117
125
  return inner_decorator
118
126
 
hydraflow/core/run.py CHANGED
@@ -229,6 +229,8 @@ class Run[C, I = None]:
229
229
  cfg: DictConfig = self.cfg # type: ignore
230
230
 
231
231
  if isinstance(key, str):
232
+ key = key.replace("__", ".")
233
+
232
234
  if force or OmegaConf.select(cfg, key, default=MISSING) is MISSING:
233
235
  v = value(self) if callable(value) else value # type: ignore
234
236
  OmegaConf.update(cfg, key, v, force_add=True)
@@ -246,32 +248,51 @@ class Run[C, I = None]:
246
248
  raise TypeError(msg)
247
249
 
248
250
  for k, v in zip(key, value, strict=True):
249
- if force or OmegaConf.select(cfg, k, default=MISSING) is MISSING:
250
- OmegaConf.update(cfg, k, v, force_add=True)
251
+ k_ = k.replace("__", ".")
252
+ if force or OmegaConf.select(cfg, k_, default=MISSING) is MISSING:
253
+ OmegaConf.update(cfg, k_, v, force_add=True)
251
254
 
252
- def get(self, key: str) -> Any:
255
+ def get(self, key: str, default: Any | Callable[[Self], Any] = MISSING) -> Any:
253
256
  """Get a value from the information or configuration.
254
257
 
255
258
  Args:
256
- key: The key to look for. Can use dot notation for nested keys
257
- in configuration.
259
+ key: The key to look for. Can use dot notation for
260
+ nested keys in configuration.
261
+ default: Value to return if the key is not found.
262
+ If a callable, it will be called with the Run instance
263
+ and the value returned will be used as the default.
264
+ If not provided, AttributeError will be raised.
258
265
 
259
266
  Returns:
260
- Any: The value associated with the key.
267
+ Any: The value associated with the key, or the
268
+ default value if the key is not found and a default
269
+ is provided.
261
270
 
262
271
  Raises:
263
- AttributeError: If the key is not found in any of the components.
272
+ AttributeError: If the key is not found and
273
+ no default is provided.
264
274
 
265
275
  """
276
+ key = key.replace("__", ".")
277
+
266
278
  value = OmegaConf.select(self.cfg, key, default=MISSING) # type: ignore
267
279
  if value is not MISSING:
268
280
  return value
269
281
 
282
+ if self.impl and hasattr(self.impl, key):
283
+ return getattr(self.impl, key)
284
+
270
285
  info = self.info.to_dict()
271
286
  if key in info:
272
287
  return info[key]
273
288
 
274
- msg = f"Key not found: {key}"
289
+ if default is not MISSING:
290
+ if callable(default):
291
+ return default(self)
292
+
293
+ return default
294
+
295
+ msg = f"No such key: {key}"
275
296
  raise AttributeError(msg)
276
297
 
277
298
  def predicate(self, key: str, value: Any) -> bool:
@@ -298,32 +319,35 @@ class Run[C, I = None]:
298
319
 
299
320
  """
300
321
  attr = self.get(key)
322
+ return _predicate(attr, value)
301
323
 
302
- if callable(value):
303
- return bool(value(attr))
324
+ def to_dict(self) -> dict[str, Any]:
325
+ """Convert the Run to a dictionary."""
326
+ info = self.info.to_dict()
327
+ cfg = OmegaConf.to_container(self.cfg)
328
+ return info | _flatten_dict(cfg) # type: ignore
304
329
 
305
- if isinstance(value, ListConfig):
306
- value = list(value)
307
330
 
308
- if isinstance(value, list | set) and not _is_iterable(attr):
309
- return attr in value
331
+ def _predicate(attr: Any, value: Any) -> bool:
332
+ if callable(value):
333
+ return bool(value(attr))
310
334
 
311
- if isinstance(value, tuple) and len(value) == 2 and not _is_iterable(attr):
312
- return value[0] <= attr <= value[1]
335
+ if isinstance(value, ListConfig):
336
+ value = list(value)
313
337
 
314
- if _is_iterable(value):
315
- value = list(value)
338
+ if isinstance(value, list | set) and not _is_iterable(attr):
339
+ return attr in value
316
340
 
317
- if _is_iterable(attr):
318
- attr = list(attr)
341
+ if isinstance(value, tuple) and len(value) == 2 and not _is_iterable(attr):
342
+ return value[0] <= attr <= value[1]
319
343
 
320
- return attr == value
344
+ if _is_iterable(value):
345
+ value = list(value)
321
346
 
322
- def to_dict(self) -> dict[str, Any]:
323
- """Convert the Run to a dictionary."""
324
- info = self.info.to_dict()
325
- cfg = OmegaConf.to_container(self.cfg)
326
- return info | _flatten_dict(cfg) # type: ignore
347
+ if _is_iterable(attr):
348
+ attr = list(attr)
349
+
350
+ return attr == value
327
351
 
328
352
 
329
353
  def _is_iterable(value: Any) -> bool:
@@ -38,12 +38,13 @@ Note:
38
38
  from __future__ import annotations
39
39
 
40
40
  from collections.abc import Hashable, Iterable, Sequence
41
+ from dataclasses import MISSING
41
42
  from typing import TYPE_CHECKING, overload
42
43
 
43
44
  import numpy as np
44
45
  import polars as pl
45
46
  from omegaconf import OmegaConf
46
- from polars import DataFrame
47
+ from polars import DataFrame, Series
47
48
 
48
49
  from .run import Run
49
50
 
@@ -139,6 +140,47 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
139
140
  """
140
141
  return iter(self.runs)
141
142
 
143
+ def preload(
144
+ self,
145
+ *,
146
+ n_jobs: int = 0,
147
+ cfg: bool = True,
148
+ impl: bool = True,
149
+ ) -> Self:
150
+ """Pre-load configuration and implementation objects for all runs in parallel.
151
+
152
+ This method eagerly evaluates the cfg and impl properties of all runs
153
+ in the collection, potentially in parallel using joblib. This can
154
+ significantly improve performance for subsequent operations that
155
+ access these properties, as they will be already loaded in memory.
156
+
157
+ Args:
158
+ cfg (bool): Whether to preload the configuration objects
159
+ impl (bool): Whether to preload the implementation objects
160
+ n_jobs (int): Number of parallel jobs to run
161
+ (-1 means using all processors)
162
+
163
+ Returns:
164
+ Self: The same RunCollection instance with preloaded
165
+ configuration and implementation objects.
166
+
167
+ """
168
+
169
+ def load(run: R) -> None:
170
+ _ = cfg and run.cfg
171
+ _ = impl and run.impl
172
+
173
+ if n_jobs == 0:
174
+ for run in self:
175
+ load(run)
176
+ return self
177
+
178
+ from joblib import Parallel, delayed
179
+
180
+ parallel = Parallel(backend="threading", n_jobs=n_jobs)
181
+ parallel(delayed(load)(run) for run in self)
182
+ return self
183
+
142
184
  @overload
143
185
  def update(
144
186
  self,
@@ -334,56 +376,107 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
334
376
 
335
377
  raise _value_error()
336
378
 
337
- def to_list(self, key: str) -> list[Any]:
379
+ def to_list(
380
+ self,
381
+ key: str,
382
+ default: Any | Callable[[R], Any] = MISSING,
383
+ ) -> list[Any]:
338
384
  """Extract a list of values for a specific key from all runs.
339
385
 
340
386
  Args:
341
387
  key: The key to extract from each run.
388
+ default: The default value to return if the key is not found.
389
+ If a callable, it will be called with the Run instance
390
+ and the value returned will be used as the default.
342
391
 
343
392
  Returns:
344
393
  list[Any]: A list containing the values for the
345
394
  specified key from each run.
346
395
 
347
396
  """
348
- return [run.get(key) for run in self]
397
+ return [run.get(key, default) for run in self]
349
398
 
350
- def to_numpy(self, key: str) -> NDArray:
399
+ def to_numpy(
400
+ self,
401
+ key: str,
402
+ default: Any | Callable[[R], Any] = MISSING,
403
+ ) -> NDArray:
351
404
  """Extract values for a specific key from all runs as a NumPy array.
352
405
 
353
406
  Args:
354
407
  key: The key to extract from each run.
408
+ default: The default value to return if the key is not found.
409
+ If a callable, it will be called with the Run instance
410
+ and the value returned will be used as the default.
355
411
 
356
412
  Returns:
357
413
  NDArray: A NumPy array containing the values for the
358
414
  specified key from each run.
359
415
 
360
416
  """
361
- return np.array(self.to_list(key))
417
+ return np.array(self.to_list(key, default))
362
418
 
363
- def unique(self, key: str) -> NDArray:
419
+ def to_series(
420
+ self,
421
+ key: str,
422
+ default: Any | Callable[[R], Any] = MISSING,
423
+ *,
424
+ name: str | None = None,
425
+ ) -> Series:
426
+ """Extract values for a specific key from all runs as a Polars series.
427
+
428
+ Args:
429
+ key: The key to extract from each run.
430
+ default: The default value to return if the key is not found.
431
+ If a callable, it will be called with the Run instance
432
+ and the value returned will be used as the default.
433
+ name: The name of the series. If not provided, the key will be used.
434
+
435
+ Returns:
436
+ Series: A Polars series containing the values for the
437
+ specified key from each run.
438
+
439
+ """
440
+ return Series(name or key, self.to_list(key, default))
441
+
442
+ def unique(
443
+ self,
444
+ key: str,
445
+ default: Any | Callable[[R], Any] = MISSING,
446
+ ) -> NDArray:
364
447
  """Get the unique values for a specific key across all runs.
365
448
 
366
449
  Args:
367
450
  key: The key to extract unique values for.
451
+ default: The default value to return if the key is not found.
452
+ If a callable, it will be called with the Run instance
453
+ and the value returned will be used as the default.
368
454
 
369
455
  Returns:
370
456
  NDArray: A NumPy array containing the unique values for the
371
457
  specified key.
372
458
 
373
459
  """
374
- return np.unique(self.to_numpy(key), axis=0)
460
+ return np.unique(self.to_numpy(key, default), axis=0)
375
461
 
376
- def n_unique(self, key: str) -> int:
462
+ def n_unique(
463
+ self,
464
+ key: str,
465
+ default: Any | Callable[[R], Any] = MISSING,
466
+ ) -> int:
377
467
  """Count the number of unique values for a specific key across all runs.
378
468
 
379
469
  Args:
380
470
  key: The key to count unique values for.
471
+ default: The default value to return if the key is not found.
472
+ If a callable, it will be called with the Run instance
473
+ and the value returned will be used as the default.
381
474
 
382
475
  Returns:
383
476
  int: The number of unique values for the specified key.
384
477
 
385
478
  """
386
- return len(self.unique(key))
479
+ return len(self.unique(key, default))
387
480
 
388
481
  def sort(self, *keys: str, reverse: bool = False) -> Self:
389
482
  """Sort runs based on one or more keys.
@@ -409,13 +502,22 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
409
502
 
410
503
  return self[index]
411
504
 
412
- def to_frame(self, *keys: str, **kwargs: Callable[[R], Any]) -> DataFrame:
505
+ def to_frame(
506
+ self,
507
+ *keys: str,
508
+ defaults: dict[str, Any | Callable[[R], Any]] | None = None,
509
+ **kwargs: Callable[[R], Any],
510
+ ) -> DataFrame:
413
511
  """Convert the collection to a Polars DataFrame.
414
512
 
415
513
  Args:
416
514
  *keys (str): The keys to include as columns in the DataFrame.
417
515
  If not provided, all keys from each run's to_dict() method
418
516
  will be used.
517
+ defaults (dict[str, Any | Callable[[R], Any]] | None): Default
518
+ values for the keys. If a callable, it will be called with
519
+ the Run instance and the value returned will be used as the
520
+ default.
419
521
  **kwargs (Callable[[R], Any]): Additional columns to compute
420
522
  using callables that take a Run and return a value.
421
523
 
@@ -424,15 +526,20 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
424
526
  from the runs.
425
527
 
426
528
  """
529
+ if defaults is None:
530
+ defaults = {}
531
+
427
532
  if keys:
428
- df = DataFrame({key: self.to_list(key) for key in keys})
533
+ df = DataFrame(
534
+ {key: self.to_list(key, defaults.get(key, MISSING)) for key in keys},
535
+ )
429
536
  else:
430
537
  df = DataFrame(r.to_dict() for r in self)
431
538
 
432
539
  if not kwargs:
433
540
  return df
434
541
 
435
- columns = [pl.Series(k, [v(r) for r in self]) for k, v in kwargs.items()]
542
+ columns = [Series(k, [v(r) for r in self]) for k, v in kwargs.items()]
436
543
  return df.with_columns(*columns)
437
544
 
438
545
  def _group_by(self, *keys: str) -> dict[Any, Self]:
@@ -11,9 +11,12 @@ was created.
11
11
  from __future__ import annotations
12
12
 
13
13
  from dataclasses import dataclass
14
- from functools import cached_property
14
+ from functools import cache, cached_property
15
+ from pathlib import Path
15
16
  from typing import TYPE_CHECKING
16
17
 
18
+ from omegaconf import OmegaConf
19
+
17
20
  if TYPE_CHECKING:
18
21
  from pathlib import Path
19
22
  from typing import Any
@@ -47,7 +50,7 @@ class RunInfo:
47
50
  Hydra configuration file (e.g., if the file does not exist or does not
48
51
  contain the expected format).
49
52
  """
50
- return get_job_name(self.run_dir)
53
+ return get_job_name(self.run_dir.parent)
51
54
 
52
55
  def to_dict(self) -> dict[str, Any]:
53
56
  """Convert the RunInfo to a dictionary."""
@@ -58,27 +61,23 @@ class RunInfo:
58
61
  }
59
62
 
60
63
 
61
- def get_job_name(run_dir: Path) -> str:
62
- """Extract the Hydra job name from the Hydra configuration file.
64
+ @cache
65
+ def get_job_name(experiment_dir: Path) -> str:
66
+ """Get the job name from an experiment directory.
63
67
 
64
- Return an empty string if the job name cannot be extracted from the
65
- Hydra configuration file (e.g., if the file does not exist or does not
66
- contain the expected format).
68
+ Extracts the job name from the meta.yaml file. Returns an empty string
69
+ if the file does not exist or if the job name cannot be found.
67
70
 
68
71
  Args:
69
- run_dir (Path): The directory where the run artifacts are stored.
72
+ experiment_dir: Path to the experiment directory containing the meta.yaml file
70
73
 
71
74
  Returns:
72
- str: The Hydra job name, which was used as the MLflow Experiment name.
75
+ The job name as a string, or an empty string if the file does not exist
73
76
 
74
77
  """
75
- hydra_file = run_dir / "artifacts/.hydra/hydra.yaml"
76
-
77
- if not hydra_file.exists():
78
+ path = experiment_dir / "meta.yaml"
79
+ if not path.exists():
78
80
  return ""
79
81
 
80
- text = hydra_file.read_text()
81
- if " job:\n name: " in text:
82
- return text.split(" job:\n name: ")[1].split("\n")[0]
83
-
84
- return ""
82
+ meta = OmegaConf.load(experiment_dir / "meta.yaml")
83
+ return OmegaConf.select(meta, "name")
@@ -4,10 +4,10 @@ from dataclasses import dataclass, field
4
4
 
5
5
 
6
6
  @dataclass
7
- class Step:
8
- batch: str = ""
9
- args: str = ""
10
- with_: str = ""
7
+ class Set:
8
+ each: str = ""
9
+ all: str = ""
10
+ add: str = ""
11
11
 
12
12
 
13
13
  @dataclass
@@ -16,8 +16,8 @@ class Job:
16
16
  run: str = ""
17
17
  call: str = ""
18
18
  submit: str = ""
19
- with_: str = ""
20
- steps: list[Step] = field(default_factory=list)
19
+ add: str = ""
20
+ sets: list[Set] = field(default_factory=list)
21
21
 
22
22
 
23
23
  @dataclass
hydraflow/executor/io.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from pathlib import Path
6
6
  from typing import TYPE_CHECKING
7
7
 
8
- from omegaconf import DictConfig, ListConfig, OmegaConf
8
+ from omegaconf import DictConfig, OmegaConf
9
9
 
10
10
  from .conf import HydraflowConf
11
11
 
@@ -38,25 +38,9 @@ def load_config() -> HydraflowConf:
38
38
  if not isinstance(cfg, DictConfig):
39
39
  return schema
40
40
 
41
- rename_with(cfg)
42
-
43
41
  return OmegaConf.merge(schema, cfg) # type: ignore[return-value]
44
42
 
45
43
 
46
- def rename_with(cfg: DictConfig) -> None:
47
- """Rename the `with` field to `with_`."""
48
- if "with" in cfg:
49
- cfg["with_"] = cfg.pop("with")
50
-
51
- for key in list(cfg.keys()):
52
- if isinstance(cfg[key], DictConfig):
53
- rename_with(cfg[key])
54
- elif isinstance(cfg[key], ListConfig):
55
- for item in cfg[key]:
56
- if isinstance(item, DictConfig):
57
- rename_with(item)
58
-
59
-
60
44
  def get_job(name: str) -> Job:
61
45
  """Get a job from the config."""
62
46
  cfg = load_config()
hydraflow/executor/job.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  This module provides functionality for executing jobs in HydraFlow, including:
4
4
 
5
- - Argument parsing and expansion for job steps
5
+ - Argument parsing and expansion for job parameter sets
6
6
  - Batch processing of Hydra configurations
7
7
  - Execution of jobs via shell commands or Python functions
8
8
 
@@ -11,8 +11,9 @@ The module supports two execution modes:
11
11
  1. Shell command execution
12
12
  2. Python function calls
13
13
 
14
- Each job can consist of multiple steps, and each step can have its own
15
- arguments and configurations that will be expanded into multiple runs.
14
+ Each job can consist of multiple parameter sets, and each parameter
15
+ set can have its own arguments and configurations that will be expanded
16
+ into multiple runs.
16
17
  """
17
18
 
18
19
  from __future__ import annotations
@@ -39,24 +40,24 @@ if TYPE_CHECKING:
39
40
  from .conf import Job
40
41
 
41
42
 
42
- def iter_args(batch: str, args: str) -> Iterator[list[str]]:
43
+ def iter_args(each: str, all_: str) -> Iterator[list[str]]:
43
44
  """Iterate over combinations generated from parsed arguments.
44
45
 
45
46
  Generate all possible combinations of arguments by parsing and
46
47
  expanding each one, yielding them as an iterator.
47
48
 
48
49
  Args:
49
- batch (str): The batch to parse.
50
- args (str): The arguments to parse.
50
+ each (str): The 'each' parameter to parse.
51
+ all_ (str): The 'all' parameter to parse.
51
52
 
52
53
  Yields:
53
54
  list[str]: a list of the parsed argument combinations.
54
55
 
55
56
  """
56
- args_ = collect(args)
57
+ all_params = collect(all_)
57
58
 
58
- for batch_ in expand(batch):
59
- yield [*batch_, *args_]
59
+ for each_params in expand(each):
60
+ yield [*each_params, *all_params]
60
61
 
61
62
 
62
63
  def iter_batches(job: Job) -> Iterator[list[str]]:
@@ -74,14 +75,40 @@ def iter_batches(job: Job) -> Iterator[list[str]]:
74
75
 
75
76
  """
76
77
  job_name = f"hydra.job.name={job.name}"
77
- job_configs = shlex.split(job.with_)
78
+ job_add = shlex.split(job.add)
78
79
 
79
- for step in job.steps:
80
- configs = shlex.split(step.with_) or job_configs
80
+ for set_ in job.sets:
81
+ add = merge_args(job_add, shlex.split(set_.add)) if set_.add else job_add
81
82
 
82
- for args in iter_args(step.batch, step.args):
83
+ for args in iter_args(set_.each, set_.all):
83
84
  sweep_dir = f"hydra.sweep.dir=multirun/{ulid.ULID()}"
84
- yield ["--multirun", *args, job_name, sweep_dir, *configs]
85
+ yield ["--multirun", *args, job_name, sweep_dir, *add]
86
+
87
+
88
+ def merge_args(first: list[str], second: list[str]) -> list[str]:
89
+ """Merge two lists of arguments.
90
+
91
+ This function merges two lists of arguments by checking for conflicts
92
+ and resolving them by keeping the values from the second list.
93
+
94
+ Args:
95
+ first (list[str]): The first list of arguments.
96
+ second (list[str]): The second list of arguments.
97
+
98
+ Returns:
99
+ list[str]: A merged list of arguments.
100
+
101
+ """
102
+ merged = {}
103
+
104
+ for item in [*first, *second]:
105
+ if "=" in item:
106
+ key, value = item.split("=", 1)
107
+ merged[key] = value
108
+ else:
109
+ merged[item] = None
110
+
111
+ return [k if v is None else f"{k}={v}" for k, v in merged.items()]
85
112
 
86
113
 
87
114
  @dataclass
@@ -165,25 +165,26 @@ SUFFIX_EXPONENT = {
165
165
 
166
166
 
167
167
  def _get_range(arg: str) -> tuple[float, float, float]:
168
+ """Return a tuple of (start, stop, step)."""
168
169
  args = [to_number(x) for x in arg.split(":")]
169
170
 
170
171
  if len(args) == 2:
171
172
  if args[0] > args[1]:
172
173
  raise ValueError("start cannot be greater than stop")
173
174
 
174
- return (args[0], 1, args[1])
175
+ return (args[0], args[1], 1)
175
176
 
176
- if args[1] == 0:
177
+ if args[2] == 0:
177
178
  raise ValueError("step cannot be zero")
178
- if args[1] > 0 and args[0] > args[2]:
179
+ if args[2] > 0 and args[0] > args[1]:
179
180
  raise ValueError("start cannot be greater than stop")
180
- if args[1] < 0 and args[0] < args[2]:
181
+ if args[2] < 0 and args[0] < args[1]:
181
182
  raise ValueError("start cannot be less than stop")
182
183
 
183
184
  return args[0], args[1], args[2]
184
185
 
185
186
 
186
- def _arange(start: float, step: float, stop: float) -> list[float]:
187
+ def _arange(start: float, stop: float, step: float) -> list[float]:
187
188
  """Generate a range of floating point numbers.
188
189
 
189
190
  This function generates a range of floating point numbers
@@ -191,8 +192,8 @@ def _arange(start: float, step: float, stop: float) -> list[float]:
191
192
 
192
193
  Args:
193
194
  start (float): The starting value.
194
- step (float): The step size.
195
195
  stop (float): The end value (inclusive).
196
+ step (float): The step size.
196
197
 
197
198
  Returns:
198
199
  list[float]: A list of floating point numbers from start to stop
@@ -323,7 +324,7 @@ def collect_parentheses(arg: str) -> list[str]:
323
324
  list[str]: A list of the collected values.
324
325
 
325
326
  Examples:
326
- >>> collect_parentheses("(1:3,5:2:9,20)k")
327
+ >>> collect_parentheses("(1:3,5:9:2,20)k")
327
328
  ['1e3', '2e3', '3e3', '5e3', '7e3', '9e3', '20e3']
328
329
  >>> collect_parentheses("2e(-1,-2,-3)")
329
330
  ['2e-1', '2e-2', '2e-3']
@@ -352,7 +353,7 @@ def collect_values(arg: str) -> list[str]:
352
353
  Examples:
353
354
  >>> collect_values("1:4")
354
355
  ['1', '2', '3', '4']
355
- >>> collect_values("1.2:0.1:1.4:k")
356
+ >>> collect_values("1.2:1.4:0.1:k")
356
357
  ['1.2e3', '1.3e3', '1.4e3']
357
358
  >>> collect_values("0.1")
358
359
  ['0.1']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydraflow
3
- Version: 0.15.0
3
+ Version: 0.16.0
4
4
  Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
5
5
  Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -51,7 +51,7 @@ Requires-Dist: ruff>=0.11
51
51
  Requires-Dist: typer>=0.15
52
52
  Description-Content-Type: text/markdown
53
53
 
54
- # Hydraflow
54
+ # HydraFlow
55
55
 
56
56
  [![PyPI Version][pypi-v-image]][pypi-v-link]
57
57
  [![Build Status][GHAction-image]][GHAction-link]
@@ -60,6 +60,7 @@ Description-Content-Type: text/markdown
60
60
  [![Python Version][python-v-image]][python-v-link]
61
61
 
62
62
  <!-- Badges -->
63
+
63
64
  [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
64
65
  [pypi-v-link]: https://pypi.org/project/hydraflow/
65
66
  [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
@@ -73,117 +74,125 @@ Description-Content-Type: text/markdown
73
74
 
74
75
  ## Overview
75
76
 
76
- Hydraflow is a library designed to seamlessly integrate
77
- [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
78
- manage and track machine learning experiments. By combining the flexibility of
79
- Hydra's configuration management with the robust experiment tracking capabilities
80
- of MLflow, Hydraflow provides a comprehensive solution for managing complex
81
- machine learning workflows.
77
+ HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/) to streamline machine learning experiment workflows. By combining Hydra's powerful configuration management with MLflow's robust experiment tracking, HydraFlow provides a comprehensive solution for defining, executing, and analyzing machine learning experiments.
78
+
79
+ ## Design Principles
80
+
81
+ HydraFlow is built on the following design principles:
82
+
83
+ 1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
84
+ 2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
85
+ 3. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
86
+ 4. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
82
87
 
83
88
  ## Key Features
84
89
 
85
- - **Configuration Management**: Utilize Hydra's advanced configuration management
86
- to handle complex parameter sweeps and experiment setups.
87
- - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters,
88
- metrics, and artifacts for each run.
89
- - **Artifact Management**: Automatically log and manage artifacts, such as model
90
- checkpoints and configuration files, with MLflow.
91
- - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
92
- projects with minimal setup.
93
- - **Rich CLI Interface**: Command-line tools for managing experiments and viewing results.
94
- - **Cross-Platform Support**: Works consistently across different operating systems.
90
+ - **Type-safe Configuration Management** - Define experiment parameters using Python dataclasses with full IDE support and validation
91
+ - **Seamless Hydra-MLflow Integration** - Automatically register configurations with Hydra and track experiments with MLflow
92
+ - **Advanced Parameter Sweeps** - Define complex parameter spaces using extended sweep syntax for numerical ranges, combinations, and SI prefixes
93
+ - **Workflow Automation** - Create reusable experiment workflows with YAML-based job definitions
94
+ - **Powerful Analysis Tools** - Filter, group, and analyze experiment results with type-aware APIs
95
+ - **Custom Implementation Support** - Extend experiment analysis with domain-specific functionality
95
96
 
96
97
  ## Installation
97
98
 
98
- You can install Hydraflow via pip:
99
-
100
99
  ```bash
101
100
  pip install hydraflow
102
101
  ```
103
102
 
104
103
  **Requirements:** Python 3.13+
105
104
 
106
- ## Quick Start
107
-
108
- Here is a simple example to get you started with Hydraflow:
105
+ ## Quick Example
109
106
 
110
107
  ```python
111
- from __future__ import annotations
112
-
113
108
  from dataclasses import dataclass
114
- from typing import TYPE_CHECKING
115
-
109
+ from mlflow.entities import Run
116
110
  import hydraflow
117
- import mlflow
118
111
 
119
- if TYPE_CHECKING:
120
- from mlflow.entities import Run
112
+ @dataclass
113
+ class Config:
114
+ width: int = 1024
115
+ height: int = 768
121
116
 
117
+ @hydraflow.main(Config)
118
+ def app(run: Run, cfg: Config) -> None:
119
+ # Your experiment code here
120
+ print(f"Running with width={cfg.width}, height={cfg.height}")
121
+
122
+ # Log metrics
123
+ hydraflow.log_metric("area", cfg.width * cfg.height)
122
124
 
125
+ if __name__ == "__main__":
126
+ app()
127
+ ```
128
+
129
+ Execute a parameter sweep with:
130
+
131
+ ```bash
132
+ python app.py -m width=800,1200 height=600,900
133
+ ```
134
+
135
+ ## Core Components
136
+
137
+ HydraFlow consists of the following key components:
138
+
139
+ ### Configuration Management
140
+
141
+ Define type-safe configurations using Python dataclasses:
142
+
143
+ ```python
123
144
  @dataclass
124
145
  class Config:
125
- """Configuration for the ML training experiment."""
126
- # Training hyperparameters
127
146
  learning_rate: float = 0.001
128
147
  batch_size: int = 32
129
148
  epochs: int = 10
149
+ ```
130
150
 
131
- # Model architecture parameters
132
- hidden_size: int = 128
133
- dropout: float = 0.1
134
-
135
- # Dataset parameters
136
- train_size: float = 0.8
137
- random_seed: int = 42
151
+ ### Main Decorator
138
152
 
153
+ The `@hydraflow.main` decorator integrates Hydra and MLflow:
139
154
 
155
+ ```python
140
156
  @hydraflow.main(Config)
141
- def app(run: Run, cfg: Config):
142
- """Train a model with the given configuration.
143
-
144
- This example demonstrates how to:
157
+ def train(run: Run, cfg: Config) -> None:
158
+ # Your experiment code
159
+ ```
145
160
 
146
- 1. Define a configuration using dataclasses
147
- 2. Use Hydraflow to integrate with MLflow
148
- 3. Track metrics and parameters automatically
161
+ ### Workflow Automation
149
162
 
150
- Args:
151
- run: MLflow run for the experiment corresponding to the Hydra app.
152
- This `Run` instance is automatically created by Hydraflow.
153
- cfg: Configuration for the experiment's run.
154
- This `Config` instance is originally defined by Hydra, and then
155
- automatically passed to the app by Hydraflow.
156
- """
157
- # Training loop
158
- for epoch in range(cfg.epochs):
159
- # Simulate training and validation
160
- train_loss = 1.0 / (epoch + 1)
161
- val_loss = 1.1 / (epoch + 1)
163
+ Define reusable experiment workflows in YAML:
162
164
 
163
- # Log metrics to MLflow
164
- mlflow.log_metrics({
165
- "train_loss": train_loss,
166
- "val_loss": val_loss
167
- }, step=epoch)
165
+ ```yaml
166
+ jobs:
167
+ train_models:
168
+ run: python train.py
169
+ sets:
170
+ - each: model=small,medium,large
171
+ all: learning_rate=0.001,0.01,0.1
172
+ ```
168
173
 
169
- print(f"Epoch {epoch}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}")
174
+ ### Analysis Tools
170
175
 
176
+ Analyze experiment results with powerful APIs:
171
177
 
172
- if __name__ == "__main__":
173
- app()
174
- ```
178
+ ```python
179
+ from hydraflow import Run, iter_run_dirs
175
180
 
176
- This example demonstrates:
181
+ # Load runs
182
+ runs = Run.load(iter_run_dirs("mlruns"))
177
183
 
178
- - Configuration management with Hydra
179
- - Automatic experiment tracking with MLflow
180
- - Parameter logging and metric tracking
181
- - Type-safe configuration with dataclasses
184
+ # Filter and analyze
185
+ best_runs = runs.filter(model_type="transformer").to_frame("learning_rate", "accuracy")
186
+ ```
182
187
 
183
188
  ## Documentation
184
189
 
185
- For detailed documentation, including advanced usage examples and API reference,
186
- visit our [documentation site](https://daizutabi.github.io/hydraflow/).
190
+ For detailed documentation, visit our [documentation site](https://daizutabi.github.io/hydraflow/):
191
+
192
+ - [Getting Started](https://daizutabi.github.io/hydraflow/getting-started/) - Installation and core concepts
193
+ - [Practical Tutorials](https://daizutabi.github.io/hydraflow/practical-tutorials/) - Learn through hands-on examples
194
+ - [User Guide](https://daizutabi.github.io/hydraflow/part1-applications/) - Detailed documentation of HydraFlow's capabilities
195
+ - [API Reference](https://daizutabi.github.io/hydraflow/api/hydraflow/) - Complete API documentation
187
196
 
188
197
  ## Contributing
189
198
 
@@ -191,4 +200,4 @@ We welcome contributions! Please see our [contributing guide](CONTRIBUTING.md) f
191
200
 
192
201
  ## License
193
202
 
194
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
203
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,21 @@
1
+ hydraflow/__init__.py,sha256=8UraqH00Qp0In301ZUmQBRTIGbV1L5zSZACOUlIRPn8,727
2
+ hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
3
+ hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ hydraflow/core/context.py,sha256=igE17oQESGjH-sBnICI8HkZbngY_crkHTgx2E-YkmEo,4155
6
+ hydraflow/core/io.py,sha256=gIH3-Lzs4d5TL3b9y-Nb064Aya7cXQHAuc7EjgKzxII,4694
7
+ hydraflow/core/main.py,sha256=mnYcm1SaCaJwpMCKLEm337LcjW6P5G5LMUjOf78ejkk,5574
8
+ hydraflow/core/run.py,sha256=SugX6JLdBqsfz3JTrB66I3muo03rrmwDvITVZQaF48w,12685
9
+ hydraflow/core/run_collection.py,sha256=cbaJO68WzE-QNlTc8NhOyQ1pHDNberJs-31qTY7P9Fo,19495
10
+ hydraflow/core/run_info.py,sha256=DTuT2eYhOj1WEeIsesOLjY0yltCw6f3Y-5hhvIbDROQ,2518
11
+ hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
13
+ hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
14
+ hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
15
+ hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
16
+ hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
17
+ hydraflow-0.16.0.dist-info/METADATA,sha256=g8PnKA-cAU6P0YCPg-hU9E-hpvljNk4v9tOgV3bT_dw,7691
18
+ hydraflow-0.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
+ hydraflow-0.16.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
20
+ hydraflow-0.16.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
21
+ hydraflow-0.16.0.dist-info/RECORD,,
@@ -1,21 +0,0 @@
1
- hydraflow/__init__.py,sha256=5ByA9ogtS5ZfIYIUSMUjMwAIpr6xGXEXmcABOu4O8RA,673
2
- hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
3
- hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- hydraflow/core/context.py,sha256=LFPNJxmuJQ2VUt-WBU07MC3ySbjlY8rRZ8VxuAih4o4,4148
6
- hydraflow/core/io.py,sha256=ZBXIL_jlBUiCI0L_J6S5S4OwtBMvdVVMXnekzMuC_JA,4404
7
- hydraflow/core/main.py,sha256=b9o6Rpn3uoXfDB8o0XZdl-g1yX2SKkOT12-H7lB8Les,5158
8
- hydraflow/core/run.py,sha256=9JNk3axDdKLpttGx-BC9aqw3d7rosygn2cIzL-fxVlM,11876
9
- hydraflow/core/run_collection.py,sha256=pV3N83uBhmda9OeaNz1jqpF9z6A9j3jfUHtqy-uxCs4,15671
10
- hydraflow/core/run_info.py,sha256=3dW9GgWnZZNwbXwMrw-85AqQ956zlQddUi9irSNLR5g,2550
11
- hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
13
- hydraflow/executor/conf.py,sha256=icGbLDh86KgkyiGXwDoEkmZpgAP3X8Jmu_PYqJoTooY,423
14
- hydraflow/executor/io.py,sha256=yZMcBVmAbPZZ82cAXhgiJfj9p8WvHmzOCMBg_vtEVek,1509
15
- hydraflow/executor/job.py,sha256=JX6xX9ffvHB7IiAVIfzVRjjnWKaPDxBgqdZf4ZO14CY,4651
16
- hydraflow/executor/parser.py,sha256=_Rfund3FDgrXitTt_znsTpgEtMDqZ_ICynaB_Zje14Q,14561
17
- hydraflow-0.15.0.dist-info/METADATA,sha256=2OpqrXDfnVxQ_ZJkS5tEjQH0VTa3yx8jkfFOjbkCK50,7238
18
- hydraflow-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
- hydraflow-0.15.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
20
- hydraflow-0.15.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
21
- hydraflow-0.15.0.dist-info/RECORD,,