hydraflow 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +2 -0
- hydraflow/core/collection.py +74 -2
- hydraflow/core/context.py +3 -4
- hydraflow/core/run.py +82 -14
- hydraflow/core/run_collection.py +47 -7
- {hydraflow-0.17.0.dist-info → hydraflow-0.17.2.dist-info}/METADATA +1 -1
- {hydraflow-0.17.0.dist-info → hydraflow-0.17.2.dist-info}/RECORD +10 -10
- {hydraflow-0.17.0.dist-info → hydraflow-0.17.2.dist-info}/WHEEL +0 -0
- {hydraflow-0.17.0.dist-info → hydraflow-0.17.2.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.17.0.dist-info → hydraflow-0.17.2.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Integrate Hydra and MLflow to manage and track machine learning experiments."""
|
2
2
|
|
3
|
+
from hydraflow.core.collection import Collection
|
3
4
|
from hydraflow.core.context import chdir_artifact, log_run, start_run
|
4
5
|
from hydraflow.core.io import (
|
5
6
|
get_artifact_dir,
|
@@ -14,6 +15,7 @@ from hydraflow.core.run import Run
|
|
14
15
|
from hydraflow.core.run_collection import RunCollection
|
15
16
|
|
16
17
|
__all__ = [
|
18
|
+
"Collection",
|
17
19
|
"Run",
|
18
20
|
"RunCollection",
|
19
21
|
"chdir_artifact",
|
hydraflow/core/collection.py
CHANGED
@@ -4,9 +4,10 @@ from __future__ import annotations
|
|
4
4
|
|
5
5
|
from collections.abc import Hashable, Iterable, Sequence
|
6
6
|
from dataclasses import MISSING
|
7
|
-
from typing import TYPE_CHECKING, overload
|
7
|
+
from typing import TYPE_CHECKING, Concatenate, overload
|
8
8
|
|
9
9
|
import numpy as np
|
10
|
+
from joblib.parallel import Parallel, delayed
|
10
11
|
from omegaconf import ListConfig, OmegaConf
|
11
12
|
from polars import DataFrame, Series
|
12
13
|
|
@@ -378,6 +379,77 @@ class Collection[I](Sequence[I]):
|
|
378
379
|
|
379
380
|
return self[index]
|
380
381
|
|
382
|
+
def map[**P, R](
|
383
|
+
self,
|
384
|
+
function: Callable[Concatenate[I, P], R],
|
385
|
+
*args: P.args,
|
386
|
+
**kwargs: P.kwargs,
|
387
|
+
) -> Iterator[R]:
|
388
|
+
"""Apply a function to each item and return an iterator of results.
|
389
|
+
|
390
|
+
This is a memory-efficient mapping operation that lazily evaluates results.
|
391
|
+
Ideal for large collections where memory usage is a concern.
|
392
|
+
|
393
|
+
Args:
|
394
|
+
function: Function to apply to each item. The item is passed
|
395
|
+
as the first argument.
|
396
|
+
*args: Additional positional arguments to pass to the function.
|
397
|
+
**kwargs: Additional keyword arguments to pass to the function.
|
398
|
+
|
399
|
+
Returns:
|
400
|
+
Iterator[R]: An iterator of the function's results.
|
401
|
+
|
402
|
+
Examples:
|
403
|
+
```python
|
404
|
+
# Process results one at a time
|
405
|
+
for result in collection.map(process_item, additional_arg):
|
406
|
+
handle_result(result)
|
407
|
+
|
408
|
+
# Convert to list if needed
|
409
|
+
results = list(collection.map(transform_item))
|
410
|
+
```
|
411
|
+
|
412
|
+
"""
|
413
|
+
yield from (function(i, *args, **kwargs) for i in self)
|
414
|
+
|
415
|
+
def pmap[**P, R](
|
416
|
+
self,
|
417
|
+
function: Callable[Concatenate[I, P], R],
|
418
|
+
n_jobs: int = -1,
|
419
|
+
backend: str = "multiprocessing",
|
420
|
+
*args: P.args,
|
421
|
+
**kwargs: P.kwargs,
|
422
|
+
) -> list[R]:
|
423
|
+
"""Apply a function to each item in parallel and return a list of results.
|
424
|
+
|
425
|
+
This method processes items concurrently for improved performance on
|
426
|
+
CPU-bound or I/O-bound operations, depending on the backend.
|
427
|
+
|
428
|
+
Args:
|
429
|
+
function: Function to apply to each item. The item is passed
|
430
|
+
as the first argument.
|
431
|
+
n_jobs (int): Number of jobs to run in parallel. -1 means using all
|
432
|
+
processors.
|
433
|
+
backend (str): Parallelization backend.
|
434
|
+
*args: Additional positional arguments to pass to the function.
|
435
|
+
**kwargs: Additional keyword arguments to pass to the function.
|
436
|
+
|
437
|
+
Returns:
|
438
|
+
list[R]: A list containing all results of the function applications.
|
439
|
+
|
440
|
+
Examples:
|
441
|
+
```python
|
442
|
+
# Process all items in parallel using all cores
|
443
|
+
results = collection.pmap(heavy_computation)
|
444
|
+
|
445
|
+
# Specify number of parallel jobs and backend
|
446
|
+
results = collection.pmap(process_files, n_jobs=4, backend="threading")
|
447
|
+
```
|
448
|
+
|
449
|
+
"""
|
450
|
+
parallel = Parallel(n_jobs=n_jobs, backend=backend, return_as="list")
|
451
|
+
return parallel(delayed(function)(i, *args, **kwargs) for i in self) # type: ignore
|
452
|
+
|
381
453
|
def to_frame(
|
382
454
|
self,
|
383
455
|
*keys: str,
|
@@ -409,7 +481,7 @@ class Collection[I](Sequence[I]):
|
|
409
481
|
if not kwargs:
|
410
482
|
return df
|
411
483
|
|
412
|
-
columns = [Series(k,
|
484
|
+
columns = [Series(k, self.map(v)) for k, v in kwargs.items()]
|
413
485
|
return df.with_columns(*columns)
|
414
486
|
|
415
487
|
def group_by(self, *by: str) -> GroupBy[Self, I]:
|
hydraflow/core/context.py
CHANGED
@@ -128,13 +128,12 @@ def chdir_artifact(run: Run) -> Iterator[Path]:
|
|
128
128
|
run (Run | None): The run to get the artifact directory from.
|
129
129
|
|
130
130
|
"""
|
131
|
-
|
131
|
+
current_dir = Path.cwd()
|
132
132
|
artifact_dir = get_artifact_dir(run)
|
133
133
|
|
134
|
-
os.chdir(artifact_dir)
|
135
|
-
|
136
134
|
try:
|
135
|
+
os.chdir(artifact_dir)
|
137
136
|
yield artifact_dir
|
138
137
|
|
139
138
|
finally:
|
140
|
-
os.chdir(
|
139
|
+
os.chdir(current_dir)
|
hydraflow/core/run.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
This module provides the Run class, which represents an MLflow
|
4
4
|
Run in HydraFlow. A Run contains three main components:
|
5
5
|
|
6
|
-
1. info: Information about the run,
|
6
|
+
1. info: Information about the run, which includes the run directory,
|
7
7
|
run ID, and job name.
|
8
8
|
2. cfg: Configuration loaded from the Hydra configuration file.
|
9
9
|
3. impl: Implementation instance created by the provided
|
@@ -23,7 +23,9 @@ behavior based on the run's configuration.
|
|
23
23
|
from __future__ import annotations
|
24
24
|
|
25
25
|
import inspect
|
26
|
+
import os
|
26
27
|
from collections.abc import Callable, Iterable
|
28
|
+
from contextlib import contextmanager
|
27
29
|
from dataclasses import MISSING
|
28
30
|
from functools import cached_property
|
29
31
|
from pathlib import Path
|
@@ -34,6 +36,7 @@ from omegaconf import DictConfig, OmegaConf
|
|
34
36
|
from .run_info import RunInfo
|
35
37
|
|
36
38
|
if TYPE_CHECKING:
|
39
|
+
from collections.abc import Iterator
|
37
40
|
from typing import Any, Self
|
38
41
|
|
39
42
|
from .run_collection import RunCollection
|
@@ -122,7 +125,7 @@ class Run[C, I = None]:
|
|
122
125
|
def load( # type: ignore
|
123
126
|
cls,
|
124
127
|
run_dir: str | Path,
|
125
|
-
impl_factory: Callable[[Path], I] | Callable[[Path, C], I]
|
128
|
+
impl_factory: Callable[[Path], I] | Callable[[Path, C], I] | None = None,
|
126
129
|
) -> Self: ...
|
127
130
|
|
128
131
|
@overload
|
@@ -130,7 +133,7 @@ class Run[C, I = None]:
|
|
130
133
|
def load(
|
131
134
|
cls,
|
132
135
|
run_dir: Iterable[str | Path],
|
133
|
-
impl_factory: Callable[[Path], I] | Callable[[Path, C], I]
|
136
|
+
impl_factory: Callable[[Path], I] | Callable[[Path, C], I] | None = None,
|
134
137
|
*,
|
135
138
|
n_jobs: int = 0,
|
136
139
|
) -> RunCollection[Self, I]: ...
|
@@ -139,7 +142,7 @@ class Run[C, I = None]:
|
|
139
142
|
def load(
|
140
143
|
cls,
|
141
144
|
run_dir: str | Path | Iterable[str | Path],
|
142
|
-
impl_factory: Callable[[Path], I] | Callable[[Path, C], I]
|
145
|
+
impl_factory: Callable[[Path], I] | Callable[[Path, C], I] | None = None,
|
143
146
|
*,
|
144
147
|
n_jobs: int = 0,
|
145
148
|
) -> Self | RunCollection[Self, I]:
|
@@ -149,11 +152,11 @@ class Run[C, I = None]:
|
|
149
152
|
run_dir (str | Path | Iterable[str | Path]): The directory where the
|
150
153
|
MLflow runs are stored, either as a string, a Path instance,
|
151
154
|
or an iterable of them.
|
152
|
-
impl_factory (Callable[[Path], I] | Callable[[Path, C], I]):
|
153
|
-
function that creates the implementation instance. It
|
154
|
-
either just the artifacts directory path, or both the
|
155
|
-
the configuration instance. Defaults to
|
156
|
-
None.
|
155
|
+
impl_factory (Callable[[Path], I] | Callable[[Path, C], I] | None):
|
156
|
+
A factory function that creates the implementation instance. It
|
157
|
+
can accept either just the artifacts directory path, or both the
|
158
|
+
path and the configuration instance. Defaults to None, in which
|
159
|
+
case a function that returns None is used.
|
157
160
|
n_jobs (int): The number of parallel jobs. If 0 (default), runs
|
158
161
|
sequentially. If -1, uses all available CPU cores.
|
159
162
|
|
@@ -284,10 +287,11 @@ class Run[C, I = None]:
|
|
284
287
|
|
285
288
|
Note:
|
286
289
|
The search order for keys is:
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
290
|
+
|
291
|
+
1. Configuration (`cfg`)
|
292
|
+
2. Implementation (`impl`)
|
293
|
+
3. Run information (`info`)
|
294
|
+
4. Run object itself (`self`)
|
291
295
|
|
292
296
|
"""
|
293
297
|
key = key.replace("__", ".")
|
@@ -298,7 +302,7 @@ class Run[C, I = None]:
|
|
298
302
|
|
299
303
|
for attr in [self.impl, self.info, self]:
|
300
304
|
value = getattr(attr, key, MISSING)
|
301
|
-
if value is not MISSING:
|
305
|
+
if value is not MISSING and not callable(value):
|
302
306
|
return value
|
303
307
|
|
304
308
|
if default is not MISSING:
|
@@ -332,6 +336,70 @@ class Run[C, I = None]:
|
|
332
336
|
|
333
337
|
return standard_dict
|
334
338
|
|
339
|
+
@contextmanager
|
340
|
+
def chdir(self, relative_dir: str = "") -> Iterator[Path]:
|
341
|
+
"""Change the current working directory to the artifact directory.
|
342
|
+
|
343
|
+
This context manager changes the current working directory
|
344
|
+
to the artifact directory of the run.
|
345
|
+
It ensures that the directory is changed back
|
346
|
+
to the original directory after the context is exited.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
relative_dir (str): The relative directory to the artifact
|
350
|
+
directory. Defaults to an empty string.
|
351
|
+
|
352
|
+
Yields:
|
353
|
+
Path: The artifact directory of the run.
|
354
|
+
|
355
|
+
"""
|
356
|
+
artifacts_dir = self.info.run_dir / "artifacts" / relative_dir
|
357
|
+
current_dir = Path.cwd()
|
358
|
+
|
359
|
+
try:
|
360
|
+
os.chdir(artifacts_dir)
|
361
|
+
yield artifacts_dir
|
362
|
+
|
363
|
+
finally:
|
364
|
+
os.chdir(current_dir)
|
365
|
+
|
366
|
+
def path(self, relative_path: str = "") -> Path:
|
367
|
+
"""Return the path relative to the artifact directory.
|
368
|
+
|
369
|
+
Args:
|
370
|
+
relative_path (str): The relative path to the artifact directory.
|
371
|
+
|
372
|
+
Returns:
|
373
|
+
Path: The path relative to the artifact directory.
|
374
|
+
|
375
|
+
"""
|
376
|
+
return self.info.run_dir / "artifacts" / relative_path
|
377
|
+
|
378
|
+
def iterdir(self, relative_dir: str = "") -> Iterator[Path]:
|
379
|
+
"""Iterate over the artifact directories for the run.
|
380
|
+
|
381
|
+
Args:
|
382
|
+
relative_dir (str): The relative directory to iterate over.
|
383
|
+
|
384
|
+
Yields:
|
385
|
+
Path: The artifact directory for the run.
|
386
|
+
|
387
|
+
"""
|
388
|
+
yield from self.path(relative_dir).iterdir()
|
389
|
+
|
390
|
+
def glob(self, pattern: str, relative_dir: str = "") -> Iterator[Path]:
|
391
|
+
"""Glob the artifact directories for the run.
|
392
|
+
|
393
|
+
Args:
|
394
|
+
pattern (str): The pattern to glob.
|
395
|
+
relative_dir (str): The relative directory to glob.
|
396
|
+
|
397
|
+
Yields:
|
398
|
+
Path: The existing artifact paths that match the pattern.
|
399
|
+
|
400
|
+
"""
|
401
|
+
yield from self.path(relative_dir).glob(pattern)
|
402
|
+
|
335
403
|
|
336
404
|
def _flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
|
337
405
|
items = []
|
hydraflow/core/run_collection.py
CHANGED
@@ -20,10 +20,13 @@ Example:
|
|
20
20
|
# Sort runs by specific keys
|
21
21
|
sorted_runs = runs.sort("metrics.accuracy", reverse=True)
|
22
22
|
|
23
|
-
# Group runs by model type
|
24
|
-
grouped = runs.group_by("model.type"
|
25
|
-
|
26
|
-
|
23
|
+
# Group runs by model type
|
24
|
+
grouped = runs.group_by("model.type")
|
25
|
+
|
26
|
+
# Compute aggregates on grouped data
|
27
|
+
metrics_df = grouped.agg(
|
28
|
+
avg_acc=lambda rc: sum(r.get("metrics.accuracy") for r in rc) / len(rc)
|
29
|
+
)
|
27
30
|
|
28
31
|
# Convert runs to a DataFrame for analysis
|
29
32
|
df = runs.to_frame("run_id", "model.type", "metrics.accuracy")
|
@@ -44,7 +47,8 @@ from .collection import Collection
|
|
44
47
|
from .run import Run
|
45
48
|
|
46
49
|
if TYPE_CHECKING:
|
47
|
-
from collections.abc import Callable, Iterable
|
50
|
+
from collections.abc import Callable, Iterable, Iterator
|
51
|
+
from pathlib import Path
|
48
52
|
from typing import Any, Self
|
49
53
|
|
50
54
|
|
@@ -166,10 +170,46 @@ class RunCollection[R: Run[Any, Any], I = None](Collection[R]):
|
|
166
170
|
|
167
171
|
@cached_property
|
168
172
|
def impls(self) -> Collection[I]:
|
169
|
-
"""Get the implementation
|
173
|
+
"""Get the implementation objects for all runs in the collection.
|
170
174
|
|
171
175
|
Returns:
|
172
|
-
Collection[
|
176
|
+
Collection[I]: A collection of implementation objects for all runs.
|
173
177
|
|
174
178
|
"""
|
175
179
|
return Collection(run.impl for run in self)
|
180
|
+
|
181
|
+
def iterdir(self, relative_dir: str = "") -> Iterator[Path]:
|
182
|
+
"""Iterate over the artifact directories for all runs in the collection.
|
183
|
+
|
184
|
+
This method yields all files and directories in the specified
|
185
|
+
relative directory for each run in the collection.
|
186
|
+
|
187
|
+
Args:
|
188
|
+
relative_dir (str): The relative directory within the artifacts
|
189
|
+
directory to iterate over.
|
190
|
+
|
191
|
+
Yields:
|
192
|
+
Path: Each path in the specified directory for each run
|
193
|
+
in the collection.
|
194
|
+
|
195
|
+
"""
|
196
|
+
for run in self:
|
197
|
+
yield from run.path(relative_dir).iterdir()
|
198
|
+
|
199
|
+
def glob(self, pattern: str, relative_dir: str = "") -> Iterator[Path]:
|
200
|
+
"""Glob the artifact directories for all runs in the collection.
|
201
|
+
|
202
|
+
This method yields all paths matching the specified pattern
|
203
|
+
in the relative directory for each run in the collection.
|
204
|
+
|
205
|
+
Args:
|
206
|
+
pattern (str): The glob pattern to match files or directories.
|
207
|
+
relative_dir (str): The relative directory within the artifacts
|
208
|
+
directory to search in.
|
209
|
+
|
210
|
+
Yields:
|
211
|
+
Path: Each path matching the pattern for each run in the collection.
|
212
|
+
|
213
|
+
"""
|
214
|
+
for run in self:
|
215
|
+
yield from run.path(relative_dir).glob(pattern)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.17.
|
3
|
+
Version: 0.17.2
|
4
4
|
Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -1,14 +1,14 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=
|
1
|
+
hydraflow/__init__.py,sha256=_cLLokEv0pUlwvG8RMnjOwCTtDQBs0-RgGbtDk5m_Xg,794
|
2
2
|
hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
|
3
3
|
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
hydraflow/core/collection.py,sha256=
|
6
|
-
hydraflow/core/context.py,sha256=
|
5
|
+
hydraflow/core/collection.py,sha256=RSYgS4VsGjSm0Inrz4GAng_jmm-Ct_VSDmZ9rvKFQQw,19472
|
6
|
+
hydraflow/core/context.py,sha256=6vpwe0Xfl6mzh2hHLE-4uB9Hjew-CK4pA0KFihQ80U8,4168
|
7
7
|
hydraflow/core/group_by.py,sha256=Pnw-oA5aXHeRG9lMLz-bKc8drqQ8LIRsWzvVn153iyQ,5488
|
8
8
|
hydraflow/core/io.py,sha256=B3-jPuJWttRgpbIpy_XA-Z2qpXzNF1ATwyYEwA7Pv3w,5172
|
9
9
|
hydraflow/core/main.py,sha256=pgr2b9A4VoZuwbApE71NElmV64MFJv8UKda05q4uCqk,6010
|
10
|
-
hydraflow/core/run.py,sha256=
|
11
|
-
hydraflow/core/run_collection.py,sha256=
|
10
|
+
hydraflow/core/run.py,sha256=Kbq4s47f6KDNeyNUwrUpW55FrWlf5CCpmdgVCMakU2g,14046
|
11
|
+
hydraflow/core/run_collection.py,sha256=sdbkjs01ougaqXlp88gGC10TmO_7s-UEQozLl0jMI4Y,6771
|
12
12
|
hydraflow/core/run_info.py,sha256=SMOTZXEa7OBV_XjTyctk5gJGrggmYwhePvRF8CLF1kU,1616
|
13
13
|
hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
|
@@ -16,8 +16,8 @@ hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,41
|
|
16
16
|
hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
|
17
17
|
hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
|
18
18
|
hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
|
19
|
-
hydraflow-0.17.
|
20
|
-
hydraflow-0.17.
|
21
|
-
hydraflow-0.17.
|
22
|
-
hydraflow-0.17.
|
23
|
-
hydraflow-0.17.
|
19
|
+
hydraflow-0.17.2.dist-info/METADATA,sha256=zEjD1acRRed6Le0G8-KjkWtUYaxXKB6JO9T6StRNkVM,7535
|
20
|
+
hydraflow-0.17.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
21
|
+
hydraflow-0.17.2.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
22
|
+
hydraflow-0.17.2.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
23
|
+
hydraflow-0.17.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|