hydraflow 0.14.3__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +3 -13
- hydraflow/core/context.py +12 -32
- hydraflow/core/io.py +36 -115
- hydraflow/core/main.py +3 -3
- hydraflow/core/run.py +341 -0
- hydraflow/core/run_collection.py +525 -0
- hydraflow/core/run_info.py +84 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/METADATA +12 -10
- hydraflow-0.15.0.dist-info/RECORD +21 -0
- hydraflow/core/config.py +0 -122
- hydraflow/core/mlflow.py +0 -174
- hydraflow/core/param.py +0 -165
- hydraflow/entities/__init__.py +0 -0
- hydraflow/entities/run_collection.py +0 -583
- hydraflow/entities/run_data.py +0 -61
- hydraflow/entities/run_info.py +0 -36
- hydraflow-0.14.3.dist-info/RECORD +0 -25
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/core/config.py
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
"""Provide functionality for working with configuration objects using the OmegaConf."""
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import TYPE_CHECKING
|
6
|
-
|
7
|
-
from omegaconf import DictConfig, ListConfig, OmegaConf
|
8
|
-
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from collections.abc import Iterator
|
11
|
-
from typing import Any
|
12
|
-
|
13
|
-
|
14
|
-
def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
15
|
-
"""Recursively iterate over the parameters in the given configuration object.
|
16
|
-
|
17
|
-
This function traverses the configuration object and yields key-value pairs
|
18
|
-
representing the parameters. The keys are prefixed with the provided prefix.
|
19
|
-
|
20
|
-
Args:
|
21
|
-
config (Any): The configuration object to iterate over. This can be a
|
22
|
-
dictionary, list, DictConfig, or ListConfig.
|
23
|
-
prefix (str): The prefix to prepend to the parameter keys.
|
24
|
-
Defaults to an empty string.
|
25
|
-
|
26
|
-
Yields:
|
27
|
-
Key-value pairs representing the parameters in the configuration object.
|
28
|
-
|
29
|
-
"""
|
30
|
-
if config is None:
|
31
|
-
return
|
32
|
-
|
33
|
-
if isinstance(config, list) and all(isinstance(x, str) for x in config):
|
34
|
-
config = _from_dotlist(config)
|
35
|
-
|
36
|
-
if not isinstance(config, DictConfig | ListConfig):
|
37
|
-
config = OmegaConf.create(config)
|
38
|
-
|
39
|
-
yield from _iter_params(config, prefix)
|
40
|
-
|
41
|
-
|
42
|
-
def _from_dotlist(config: list[str]) -> dict[str, str]:
|
43
|
-
result = {}
|
44
|
-
for item in config:
|
45
|
-
if "=" in item:
|
46
|
-
key, value = item.split("=", 1)
|
47
|
-
result[key.strip()] = value.strip()
|
48
|
-
|
49
|
-
return result
|
50
|
-
|
51
|
-
|
52
|
-
def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
53
|
-
if isinstance(config, DictConfig):
|
54
|
-
for key, value in config.items():
|
55
|
-
if _is_param(value):
|
56
|
-
yield f"{prefix}{key}", _convert(value)
|
57
|
-
|
58
|
-
else:
|
59
|
-
yield from _iter_params(value, f"{prefix}{key}.")
|
60
|
-
|
61
|
-
elif isinstance(config, ListConfig):
|
62
|
-
for index, value in enumerate(config):
|
63
|
-
if _is_param(value):
|
64
|
-
yield f"{prefix}{index}", _convert(value)
|
65
|
-
|
66
|
-
else:
|
67
|
-
yield from _iter_params(value, f"{prefix}{index}.")
|
68
|
-
|
69
|
-
|
70
|
-
def _is_param(value: Any) -> bool:
|
71
|
-
"""Check if the given value is a parameter."""
|
72
|
-
if isinstance(value, DictConfig):
|
73
|
-
return False
|
74
|
-
|
75
|
-
if isinstance(value, ListConfig):
|
76
|
-
if any(isinstance(v, DictConfig | ListConfig) for v in value):
|
77
|
-
return False
|
78
|
-
|
79
|
-
return True
|
80
|
-
|
81
|
-
|
82
|
-
def _convert(value: Any) -> Any:
|
83
|
-
"""Convert the given value to a Python object."""
|
84
|
-
if isinstance(value, ListConfig):
|
85
|
-
return list(value)
|
86
|
-
|
87
|
-
return value
|
88
|
-
|
89
|
-
|
90
|
-
def select_config(config: Any, names: list[str]) -> dict[str, Any]:
|
91
|
-
"""Select the given parameters from the configuration object.
|
92
|
-
|
93
|
-
This function selects the given parameters from the configuration object
|
94
|
-
and returns a new configuration object containing only the selected parameters.
|
95
|
-
|
96
|
-
Args:
|
97
|
-
config (Any): The configuration object to select parameters from.
|
98
|
-
names (list[str]): The names of the parameters to select.
|
99
|
-
|
100
|
-
Returns:
|
101
|
-
DictConfig: A new configuration object containing only the selected parameters.
|
102
|
-
|
103
|
-
"""
|
104
|
-
if not isinstance(config, DictConfig):
|
105
|
-
config = OmegaConf.structured(config)
|
106
|
-
|
107
|
-
return {name: _get(config, name) for name in names}
|
108
|
-
|
109
|
-
|
110
|
-
def _get(config: DictConfig, name: str) -> Any:
|
111
|
-
"""Get the value of the given parameter from the configuration object."""
|
112
|
-
if "." not in name:
|
113
|
-
return config.get(name)
|
114
|
-
|
115
|
-
prefix, name = name.split(".", 1)
|
116
|
-
return _get(config.get(prefix), name)
|
117
|
-
|
118
|
-
|
119
|
-
def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
|
120
|
-
"""Select the given overrides from the configuration object."""
|
121
|
-
names = [override.split("=")[0].strip() for override in overrides]
|
122
|
-
return select_config(config, names)
|
hydraflow/core/mlflow.py
DELETED
@@ -1,174 +0,0 @@
|
|
1
|
-
"""Integration of MLflow experiment tracking with Hydra configuration management.
|
2
|
-
|
3
|
-
This module provides functions to log parameters from Hydra configuration objects
|
4
|
-
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
|
-
configuration management with MLflow's experiment tracking capabilities.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from __future__ import annotations
|
9
|
-
|
10
|
-
from typing import TYPE_CHECKING
|
11
|
-
|
12
|
-
import joblib
|
13
|
-
|
14
|
-
from hydraflow.core.io import file_uri_to_path, get_artifact_dir
|
15
|
-
from hydraflow.entities.run_collection import RunCollection
|
16
|
-
|
17
|
-
from .config import iter_params
|
18
|
-
|
19
|
-
if TYPE_CHECKING:
|
20
|
-
from pathlib import Path
|
21
|
-
from typing import Any
|
22
|
-
|
23
|
-
|
24
|
-
def log_params(config: Any, *, synchronous: bool | None = None) -> None:
|
25
|
-
"""Log the parameters from the given configuration object.
|
26
|
-
|
27
|
-
This method logs the parameters from the provided configuration object
|
28
|
-
using MLflow. It iterates over the parameters and logs them using the
|
29
|
-
`mlflow.log_param` method.
|
30
|
-
|
31
|
-
Args:
|
32
|
-
config (Any): The configuration object to log the parameters from.
|
33
|
-
synchronous (bool | None): Whether to log the parameters synchronously.
|
34
|
-
Defaults to None.
|
35
|
-
|
36
|
-
"""
|
37
|
-
import mlflow
|
38
|
-
|
39
|
-
for key, value in iter_params(config):
|
40
|
-
mlflow.log_param(key, value, synchronous=synchronous)
|
41
|
-
|
42
|
-
|
43
|
-
def log_text(from_dir: Path, pattern: str = "*.log") -> None:
|
44
|
-
"""Log text files in the given directory as artifacts.
|
45
|
-
|
46
|
-
Append the text files to the existing text file in the artifact directory.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
from_dir (Path): The directory to find the logs in.
|
50
|
-
pattern (str): The pattern to match the logs.
|
51
|
-
|
52
|
-
"""
|
53
|
-
import mlflow
|
54
|
-
|
55
|
-
artifact_dir = get_artifact_dir()
|
56
|
-
|
57
|
-
for file in from_dir.glob(pattern):
|
58
|
-
if not file.is_file():
|
59
|
-
continue
|
60
|
-
|
61
|
-
file_artifact = artifact_dir / file.name
|
62
|
-
if file_artifact.exists():
|
63
|
-
text = file_artifact.read_text()
|
64
|
-
if not text.endswith("\n"):
|
65
|
-
text += "\n"
|
66
|
-
else:
|
67
|
-
text = ""
|
68
|
-
|
69
|
-
text += file.read_text()
|
70
|
-
mlflow.log_text(text, file.name)
|
71
|
-
|
72
|
-
|
73
|
-
def list_run_paths(
|
74
|
-
experiment_names: str | list[str] | None = None,
|
75
|
-
*other: str,
|
76
|
-
) -> list[Path]:
|
77
|
-
"""List all run paths for the specified experiments.
|
78
|
-
|
79
|
-
This function retrieves all run paths for the given list of experiment names.
|
80
|
-
If no experiment names are provided (None), the function will search all runs
|
81
|
-
for all experiments except the "Default" experiment.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
experiment_names (list[str] | None): List of experiment names to search
|
85
|
-
for runs. If None is provided, the function will search all runs
|
86
|
-
for all experiments except the "Default" experiment.
|
87
|
-
*other (str): The parts of the run directory to join.
|
88
|
-
|
89
|
-
Returns:
|
90
|
-
list[Path]: A list of run paths for the specified experiments.
|
91
|
-
|
92
|
-
"""
|
93
|
-
import mlflow
|
94
|
-
|
95
|
-
if isinstance(experiment_names, str):
|
96
|
-
experiment_names = [experiment_names]
|
97
|
-
|
98
|
-
elif experiment_names is None:
|
99
|
-
experiments = mlflow.search_experiments()
|
100
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
101
|
-
|
102
|
-
run_paths: list[Path] = []
|
103
|
-
|
104
|
-
for name in experiment_names:
|
105
|
-
if experiment := mlflow.get_experiment_by_name(name):
|
106
|
-
uri = experiment.artifact_location
|
107
|
-
|
108
|
-
if isinstance(uri, str):
|
109
|
-
path = file_uri_to_path(uri)
|
110
|
-
run_paths.extend(p for p in path.iterdir() if p.is_dir())
|
111
|
-
|
112
|
-
if other:
|
113
|
-
return [p.joinpath(*other) for p in run_paths]
|
114
|
-
|
115
|
-
return run_paths
|
116
|
-
|
117
|
-
|
118
|
-
def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
|
119
|
-
"""List all run IDs for the specified experiments.
|
120
|
-
|
121
|
-
This function retrieves all runs for the given list of experiment names.
|
122
|
-
If no experiment names are provided (None), the function will search all
|
123
|
-
runs for all experiments except the "Default" experiment.
|
124
|
-
|
125
|
-
Args:
|
126
|
-
experiment_names (list[str] | None): List of experiment names to search
|
127
|
-
for runs. If None is provided, the function will search all runs
|
128
|
-
for all experiments except the "Default" experiment.
|
129
|
-
|
130
|
-
Returns:
|
131
|
-
list[str]: A list of run IDs for the specified experiments.
|
132
|
-
|
133
|
-
"""
|
134
|
-
return [run_path.stem for run_path in list_run_paths(experiment_names)]
|
135
|
-
|
136
|
-
|
137
|
-
def list_runs(
|
138
|
-
experiment_names: str | list[str] | None = None,
|
139
|
-
n_jobs: int = 0,
|
140
|
-
) -> RunCollection:
|
141
|
-
"""List all runs for the specified experiments.
|
142
|
-
|
143
|
-
This function retrieves all runs for the given list of experiment names.
|
144
|
-
If no experiment names are provided (None), the function will search all runs
|
145
|
-
for all experiments except the "Default" experiment.
|
146
|
-
The function returns the results as a `RunCollection` object.
|
147
|
-
|
148
|
-
Note:
|
149
|
-
The returned runs are sorted by their start time in ascending order.
|
150
|
-
|
151
|
-
Args:
|
152
|
-
experiment_names (list[str] | None): List of experiment names to search
|
153
|
-
for runs. If None is provided, the function will search all runs
|
154
|
-
for all experiments except the "Default" experiment.
|
155
|
-
n_jobs (int): The number of jobs to retrieve runs in parallel.
|
156
|
-
|
157
|
-
Returns:
|
158
|
-
RunCollection: A `RunCollection` instance containing the runs for the
|
159
|
-
specified experiments.
|
160
|
-
|
161
|
-
"""
|
162
|
-
import mlflow
|
163
|
-
|
164
|
-
run_ids = list_run_ids(experiment_names)
|
165
|
-
|
166
|
-
if n_jobs == 0:
|
167
|
-
runs = [mlflow.get_run(run_id) for run_id in run_ids]
|
168
|
-
|
169
|
-
else:
|
170
|
-
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
171
|
-
runs = joblib.Parallel(n_jobs, backend="threading")(it)
|
172
|
-
|
173
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
174
|
-
return RunCollection(runs) # type: ignore
|
hydraflow/core/param.py
DELETED
@@ -1,165 +0,0 @@
|
|
1
|
-
"""Provide utility functions for parameter matching.
|
2
|
-
|
3
|
-
The main function `match` checks if a given parameter matches a specified value.
|
4
|
-
It supports various types of values including None, boolean, list, tuple, int,
|
5
|
-
float, and str.
|
6
|
-
|
7
|
-
Helper functions `_match_list` and `_match_tuple` are used internally to handle
|
8
|
-
matching for list and tuple types respectively.
|
9
|
-
"""
|
10
|
-
|
11
|
-
from __future__ import annotations
|
12
|
-
|
13
|
-
from typing import TYPE_CHECKING, Any
|
14
|
-
|
15
|
-
from omegaconf import ListConfig, OmegaConf
|
16
|
-
|
17
|
-
if TYPE_CHECKING:
|
18
|
-
from mlflow.entities import Run
|
19
|
-
|
20
|
-
|
21
|
-
def match(param: str, value: Any) -> bool:
|
22
|
-
"""Check if the string matches the specified value.
|
23
|
-
|
24
|
-
Args:
|
25
|
-
param (str): The parameter to check.
|
26
|
-
value (Any): The value to check.
|
27
|
-
|
28
|
-
Returns:
|
29
|
-
True if the parameter matches the specified value,
|
30
|
-
False otherwise.
|
31
|
-
|
32
|
-
"""
|
33
|
-
if callable(value):
|
34
|
-
return value(param)
|
35
|
-
|
36
|
-
if any(value is x for x in [None, True, False]):
|
37
|
-
return param == str(value)
|
38
|
-
|
39
|
-
if isinstance(value, list) and (m := _match_list(param, value)) is not None:
|
40
|
-
return m
|
41
|
-
|
42
|
-
if isinstance(value, tuple) and (m := _match_tuple(param, value)) is not None:
|
43
|
-
return m
|
44
|
-
|
45
|
-
if isinstance(value, int | float):
|
46
|
-
return float(param) == value
|
47
|
-
|
48
|
-
if isinstance(value, str):
|
49
|
-
return param == value
|
50
|
-
|
51
|
-
return param == str(value)
|
52
|
-
|
53
|
-
|
54
|
-
def _match_list(param: str, value: list) -> bool | None:
|
55
|
-
if not value:
|
56
|
-
return None
|
57
|
-
|
58
|
-
if any(param.startswith(x) for x in ["[", "(", "{"]):
|
59
|
-
return None
|
60
|
-
|
61
|
-
if isinstance(value[0], bool):
|
62
|
-
return None
|
63
|
-
|
64
|
-
if not isinstance(value[0], int | float | str):
|
65
|
-
return None
|
66
|
-
|
67
|
-
return type(value[0])(param) in value
|
68
|
-
|
69
|
-
|
70
|
-
def _match_tuple(param: str, value: tuple) -> bool | None:
|
71
|
-
if len(value) != 2:
|
72
|
-
return None
|
73
|
-
|
74
|
-
if any(param.startswith(x) for x in ["[", "(", "{"]):
|
75
|
-
return None
|
76
|
-
|
77
|
-
if isinstance(value[0], bool):
|
78
|
-
return None
|
79
|
-
|
80
|
-
if not isinstance(value[0], int | float | str):
|
81
|
-
return None
|
82
|
-
|
83
|
-
if type(value[0]) is not type(value[1]):
|
84
|
-
return None
|
85
|
-
|
86
|
-
return value[0] <= type(value[0])(param) <= value[1] # type: ignore
|
87
|
-
|
88
|
-
|
89
|
-
def to_value(param: str | None, type_: type) -> Any:
|
90
|
-
"""Convert the parameter to the specified type.
|
91
|
-
|
92
|
-
Args:
|
93
|
-
param (str | None): The parameter to convert.
|
94
|
-
type_ (type): The type to convert to.
|
95
|
-
|
96
|
-
Returns:
|
97
|
-
The converted value.
|
98
|
-
|
99
|
-
"""
|
100
|
-
if param is None or param == "None":
|
101
|
-
return None
|
102
|
-
|
103
|
-
if type_ is int:
|
104
|
-
return int(param)
|
105
|
-
|
106
|
-
if type_ is float:
|
107
|
-
return float(param)
|
108
|
-
|
109
|
-
if type_ is bool:
|
110
|
-
return param == "True"
|
111
|
-
|
112
|
-
if type_ is list or type_ is ListConfig:
|
113
|
-
return list(OmegaConf.create(param))
|
114
|
-
|
115
|
-
return param
|
116
|
-
|
117
|
-
|
118
|
-
def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
119
|
-
"""Retrieve the values of specified parameters from the given run.
|
120
|
-
|
121
|
-
This function extracts the values of the parameters identified by the
|
122
|
-
provided names from the specified run. It can accept both individual
|
123
|
-
parameter names and lists of parameter names.
|
124
|
-
|
125
|
-
Args:
|
126
|
-
run (Run): The run object from which to extract parameter values.
|
127
|
-
*names (str | list[str]): The names of the parameters to retrieve.
|
128
|
-
This can be a single parameter name or multiple names provided
|
129
|
-
as separate arguments or as a list.
|
130
|
-
|
131
|
-
Returns:
|
132
|
-
tuple[str | None, ...]: A tuple containing the values of the specified
|
133
|
-
parameters in the order they were provided.
|
134
|
-
|
135
|
-
"""
|
136
|
-
names_ = []
|
137
|
-
for name in names:
|
138
|
-
if isinstance(name, list):
|
139
|
-
names_.extend(name)
|
140
|
-
else:
|
141
|
-
names_.append(name)
|
142
|
-
|
143
|
-
params = run.data.params
|
144
|
-
return tuple(params.get(name) for name in names_)
|
145
|
-
|
146
|
-
|
147
|
-
def get_values(run: Run, names: list[str], types: list[type]) -> tuple[Any, ...]:
|
148
|
-
"""Retrieve the values of specified parameters from the given run.
|
149
|
-
|
150
|
-
This function extracts the values of the parameters identified by the
|
151
|
-
provided names from the specified run.
|
152
|
-
|
153
|
-
Args:
|
154
|
-
run (Run): The run object from which to extract parameter values.
|
155
|
-
names (list[str]): The names of the parameters to retrieve.
|
156
|
-
types (list[type]): The types to convert to.
|
157
|
-
|
158
|
-
Returns:
|
159
|
-
tuple[Any, ...]: A tuple containing the values of the specified
|
160
|
-
parameters in the order they were provided.
|
161
|
-
|
162
|
-
"""
|
163
|
-
params = get_params(run, names)
|
164
|
-
it = zip(params, types, strict=True)
|
165
|
-
return tuple(to_value(param, type_) for param, type_ in it)
|
hydraflow/entities/__init__.py
DELETED
File without changes
|