hydraflow 0.14.4__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +3 -13
- hydraflow/core/context.py +12 -32
- hydraflow/core/io.py +36 -115
- hydraflow/core/main.py +3 -3
- hydraflow/core/run.py +355 -0
- hydraflow/core/run_collection.py +525 -0
- hydraflow/core/run_info.py +84 -0
- hydraflow/executor/conf.py +6 -6
- hydraflow/executor/io.py +1 -17
- hydraflow/executor/job.py +41 -14
- hydraflow/executor/parser.py +9 -8
- {hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/METADATA +11 -9
- hydraflow-0.15.1.dist-info/RECORD +21 -0
- hydraflow/core/config.py +0 -122
- hydraflow/core/mlflow.py +0 -174
- hydraflow/core/param.py +0 -165
- hydraflow/entities/__init__.py +0 -0
- hydraflow/entities/run_collection.py +0 -583
- hydraflow/entities/run_data.py +0 -61
- hydraflow/entities/run_info.py +0 -36
- hydraflow-0.14.4.dist-info/RECORD +0 -25
- {hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/WHEEL +0 -0
- {hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/licenses/LICENSE +0 -0
hydraflow/executor/job.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
This module provides functionality for executing jobs in HydraFlow, including:
|
4
4
|
|
5
|
-
- Argument parsing and expansion for job
|
5
|
+
- Argument parsing and expansion for job parameter sets
|
6
6
|
- Batch processing of Hydra configurations
|
7
7
|
- Execution of jobs via shell commands or Python functions
|
8
8
|
|
@@ -11,8 +11,9 @@ The module supports two execution modes:
|
|
11
11
|
1. Shell command execution
|
12
12
|
2. Python function calls
|
13
13
|
|
14
|
-
Each job can consist of multiple
|
15
|
-
arguments and configurations that will be expanded
|
14
|
+
Each job can consist of multiple parameter sets, and each parameter
|
15
|
+
set can have its own arguments and configurations that will be expanded
|
16
|
+
into multiple runs.
|
16
17
|
"""
|
17
18
|
|
18
19
|
from __future__ import annotations
|
@@ -39,24 +40,24 @@ if TYPE_CHECKING:
|
|
39
40
|
from .conf import Job
|
40
41
|
|
41
42
|
|
42
|
-
def iter_args(
|
43
|
+
def iter_args(each: str, all_: str) -> Iterator[list[str]]:
|
43
44
|
"""Iterate over combinations generated from parsed arguments.
|
44
45
|
|
45
46
|
Generate all possible combinations of arguments by parsing and
|
46
47
|
expanding each one, yielding them as an iterator.
|
47
48
|
|
48
49
|
Args:
|
49
|
-
|
50
|
-
|
50
|
+
each (str): The 'each' parameter to parse.
|
51
|
+
all_ (str): The 'all' parameter to parse.
|
51
52
|
|
52
53
|
Yields:
|
53
54
|
list[str]: a list of the parsed argument combinations.
|
54
55
|
|
55
56
|
"""
|
56
|
-
|
57
|
+
all_params = collect(all_)
|
57
58
|
|
58
|
-
for
|
59
|
-
yield [*
|
59
|
+
for each_params in expand(each):
|
60
|
+
yield [*each_params, *all_params]
|
60
61
|
|
61
62
|
|
62
63
|
def iter_batches(job: Job) -> Iterator[list[str]]:
|
@@ -74,14 +75,40 @@ def iter_batches(job: Job) -> Iterator[list[str]]:
|
|
74
75
|
|
75
76
|
"""
|
76
77
|
job_name = f"hydra.job.name={job.name}"
|
77
|
-
|
78
|
+
job_add = shlex.split(job.add)
|
78
79
|
|
79
|
-
for
|
80
|
-
|
80
|
+
for set_ in job.sets:
|
81
|
+
add = merge_args(job_add, shlex.split(set_.add)) if set_.add else job_add
|
81
82
|
|
82
|
-
for args in iter_args(
|
83
|
+
for args in iter_args(set_.each, set_.all):
|
83
84
|
sweep_dir = f"hydra.sweep.dir=multirun/{ulid.ULID()}"
|
84
|
-
yield ["--multirun", *args, job_name, sweep_dir, *
|
85
|
+
yield ["--multirun", *args, job_name, sweep_dir, *add]
|
86
|
+
|
87
|
+
|
88
|
+
def merge_args(first: list[str], second: list[str]) -> list[str]:
|
89
|
+
"""Merge two lists of arguments.
|
90
|
+
|
91
|
+
This function merges two lists of arguments by checking for conflicts
|
92
|
+
and resolving them by keeping the values from the second list.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
first (list[str]): The first list of arguments.
|
96
|
+
second (list[str]): The second list of arguments.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
list[str]: A merged list of arguments.
|
100
|
+
|
101
|
+
"""
|
102
|
+
merged = {}
|
103
|
+
|
104
|
+
for item in [*first, *second]:
|
105
|
+
if "=" in item:
|
106
|
+
key, value = item.split("=", 1)
|
107
|
+
merged[key] = value
|
108
|
+
else:
|
109
|
+
merged[item] = None
|
110
|
+
|
111
|
+
return [k if v is None else f"{k}={v}" for k, v in merged.items()]
|
85
112
|
|
86
113
|
|
87
114
|
@dataclass
|
hydraflow/executor/parser.py
CHANGED
@@ -165,25 +165,26 @@ SUFFIX_EXPONENT = {
|
|
165
165
|
|
166
166
|
|
167
167
|
def _get_range(arg: str) -> tuple[float, float, float]:
|
168
|
+
"""Return a tuple of (start, stop, step)."""
|
168
169
|
args = [to_number(x) for x in arg.split(":")]
|
169
170
|
|
170
171
|
if len(args) == 2:
|
171
172
|
if args[0] > args[1]:
|
172
173
|
raise ValueError("start cannot be greater than stop")
|
173
174
|
|
174
|
-
return (args[0], 1,
|
175
|
+
return (args[0], args[1], 1)
|
175
176
|
|
176
|
-
if args[
|
177
|
+
if args[2] == 0:
|
177
178
|
raise ValueError("step cannot be zero")
|
178
|
-
if args[
|
179
|
+
if args[2] > 0 and args[0] > args[1]:
|
179
180
|
raise ValueError("start cannot be greater than stop")
|
180
|
-
if args[
|
181
|
+
if args[2] < 0 and args[0] < args[1]:
|
181
182
|
raise ValueError("start cannot be less than stop")
|
182
183
|
|
183
184
|
return args[0], args[1], args[2]
|
184
185
|
|
185
186
|
|
186
|
-
def _arange(start: float,
|
187
|
+
def _arange(start: float, stop: float, step: float) -> list[float]:
|
187
188
|
"""Generate a range of floating point numbers.
|
188
189
|
|
189
190
|
This function generates a range of floating point numbers
|
@@ -191,8 +192,8 @@ def _arange(start: float, step: float, stop: float) -> list[float]:
|
|
191
192
|
|
192
193
|
Args:
|
193
194
|
start (float): The starting value.
|
194
|
-
step (float): The step size.
|
195
195
|
stop (float): The end value (inclusive).
|
196
|
+
step (float): The step size.
|
196
197
|
|
197
198
|
Returns:
|
198
199
|
list[float]: A list of floating point numbers from start to stop
|
@@ -323,7 +324,7 @@ def collect_parentheses(arg: str) -> list[str]:
|
|
323
324
|
list[str]: A list of the collected values.
|
324
325
|
|
325
326
|
Examples:
|
326
|
-
>>> collect_parentheses("(1:3,5:2
|
327
|
+
>>> collect_parentheses("(1:3,5:9:2,20)k")
|
327
328
|
['1e3', '2e3', '3e3', '5e3', '7e3', '9e3', '20e3']
|
328
329
|
>>> collect_parentheses("2e(-1,-2,-3)")
|
329
330
|
['2e-1', '2e-2', '2e-3']
|
@@ -352,7 +353,7 @@ def collect_values(arg: str) -> list[str]:
|
|
352
353
|
Examples:
|
353
354
|
>>> collect_values("1:4")
|
354
355
|
['1', '2', '3', '4']
|
355
|
-
>>> collect_values("1.2:0.1:
|
356
|
+
>>> collect_values("1.2:1.4:0.1:k")
|
356
357
|
['1.2e3', '1.3e3', '1.4e3']
|
357
358
|
>>> collect_values("0.1")
|
358
359
|
['0.1']
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.15.1
|
4
4
|
Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -36,40 +36,40 @@ Classifier: Intended Audience :: Science/Research
|
|
36
36
|
Classifier: License :: OSI Approved :: MIT License
|
37
37
|
Classifier: Operating System :: OS Independent
|
38
38
|
Classifier: Programming Language :: Python
|
39
|
-
Classifier: Programming Language :: Python :: 3.10
|
40
|
-
Classifier: Programming Language :: Python :: 3.11
|
41
|
-
Classifier: Programming Language :: Python :: 3.12
|
42
39
|
Classifier: Programming Language :: Python :: 3.13
|
43
40
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
44
41
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
45
|
-
Requires-Python: >=3.
|
42
|
+
Requires-Python: >=3.13
|
46
43
|
Requires-Dist: hydra-core>=1.3
|
44
|
+
Requires-Dist: joblib>=1.4.0
|
47
45
|
Requires-Dist: mlflow>=2.15
|
48
46
|
Requires-Dist: omegaconf>=2.3
|
47
|
+
Requires-Dist: polars>=1.26
|
49
48
|
Requires-Dist: python-ulid>=3.0.0
|
50
49
|
Requires-Dist: rich>=13.9
|
50
|
+
Requires-Dist: ruff>=0.11
|
51
51
|
Requires-Dist: typer>=0.15
|
52
52
|
Description-Content-Type: text/markdown
|
53
53
|
|
54
54
|
# Hydraflow
|
55
55
|
|
56
56
|
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
57
|
-
[![Python Version][python-v-image]][python-v-link]
|
58
57
|
[![Build Status][GHAction-image]][GHAction-link]
|
59
58
|
[![Coverage Status][codecov-image]][codecov-link]
|
60
59
|
[![Documentation Status][docs-image]][docs-link]
|
60
|
+
[![Python Version][python-v-image]][python-v-link]
|
61
61
|
|
62
62
|
<!-- Badges -->
|
63
63
|
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
64
64
|
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
65
|
-
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
66
|
-
[python-v-link]: https://pypi.org/project/hydraflow
|
67
65
|
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
68
66
|
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
69
67
|
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
70
68
|
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
71
|
-
[docs-image]: https://
|
69
|
+
[docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
|
72
70
|
[docs-link]: https://daizutabi.github.io/hydraflow/
|
71
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
72
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
73
73
|
|
74
74
|
## Overview
|
75
75
|
|
@@ -101,6 +101,8 @@ You can install Hydraflow via pip:
|
|
101
101
|
pip install hydraflow
|
102
102
|
```
|
103
103
|
|
104
|
+
**Requirements:** Python 3.13+
|
105
|
+
|
104
106
|
## Quick Start
|
105
107
|
|
106
108
|
Here is a simple example to get you started with Hydraflow:
|
@@ -0,0 +1,21 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=5ByA9ogtS5ZfIYIUSMUjMwAIpr6xGXEXmcABOu4O8RA,673
|
2
|
+
hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
|
3
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
hydraflow/core/context.py,sha256=LFPNJxmuJQ2VUt-WBU07MC3ySbjlY8rRZ8VxuAih4o4,4148
|
6
|
+
hydraflow/core/io.py,sha256=ZBXIL_jlBUiCI0L_J6S5S4OwtBMvdVVMXnekzMuC_JA,4404
|
7
|
+
hydraflow/core/main.py,sha256=b9o6Rpn3uoXfDB8o0XZdl-g1yX2SKkOT12-H7lB8Les,5158
|
8
|
+
hydraflow/core/run.py,sha256=KqaMdRUBOzOU4vkrRUczCrPCsVx30-XUQ_e78B78BSU,12330
|
9
|
+
hydraflow/core/run_collection.py,sha256=pV3N83uBhmda9OeaNz1jqpF9z6A9j3jfUHtqy-uxCs4,15671
|
10
|
+
hydraflow/core/run_info.py,sha256=3dW9GgWnZZNwbXwMrw-85AqQ956zlQddUi9irSNLR5g,2550
|
11
|
+
hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
|
13
|
+
hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
|
14
|
+
hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
|
15
|
+
hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
|
16
|
+
hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
|
17
|
+
hydraflow-0.15.1.dist-info/METADATA,sha256=oC-UgH0sZKw2Ry1kBiMPpNobxzlLhmhQgS8W3TIvGJI,7238
|
18
|
+
hydraflow-0.15.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
19
|
+
hydraflow-0.15.1.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
20
|
+
hydraflow-0.15.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
21
|
+
hydraflow-0.15.1.dist-info/RECORD,,
|
hydraflow/core/config.py
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
"""Provide functionality for working with configuration objects using the OmegaConf."""
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import TYPE_CHECKING
|
6
|
-
|
7
|
-
from omegaconf import DictConfig, ListConfig, OmegaConf
|
8
|
-
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from collections.abc import Iterator
|
11
|
-
from typing import Any
|
12
|
-
|
13
|
-
|
14
|
-
def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
15
|
-
"""Recursively iterate over the parameters in the given configuration object.
|
16
|
-
|
17
|
-
This function traverses the configuration object and yields key-value pairs
|
18
|
-
representing the parameters. The keys are prefixed with the provided prefix.
|
19
|
-
|
20
|
-
Args:
|
21
|
-
config (Any): The configuration object to iterate over. This can be a
|
22
|
-
dictionary, list, DictConfig, or ListConfig.
|
23
|
-
prefix (str): The prefix to prepend to the parameter keys.
|
24
|
-
Defaults to an empty string.
|
25
|
-
|
26
|
-
Yields:
|
27
|
-
Key-value pairs representing the parameters in the configuration object.
|
28
|
-
|
29
|
-
"""
|
30
|
-
if config is None:
|
31
|
-
return
|
32
|
-
|
33
|
-
if isinstance(config, list) and all(isinstance(x, str) for x in config):
|
34
|
-
config = _from_dotlist(config)
|
35
|
-
|
36
|
-
if not isinstance(config, DictConfig | ListConfig):
|
37
|
-
config = OmegaConf.create(config)
|
38
|
-
|
39
|
-
yield from _iter_params(config, prefix)
|
40
|
-
|
41
|
-
|
42
|
-
def _from_dotlist(config: list[str]) -> dict[str, str]:
|
43
|
-
result = {}
|
44
|
-
for item in config:
|
45
|
-
if "=" in item:
|
46
|
-
key, value = item.split("=", 1)
|
47
|
-
result[key.strip()] = value.strip()
|
48
|
-
|
49
|
-
return result
|
50
|
-
|
51
|
-
|
52
|
-
def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
53
|
-
if isinstance(config, DictConfig):
|
54
|
-
for key, value in config.items():
|
55
|
-
if _is_param(value):
|
56
|
-
yield f"{prefix}{key}", _convert(value)
|
57
|
-
|
58
|
-
else:
|
59
|
-
yield from _iter_params(value, f"{prefix}{key}.")
|
60
|
-
|
61
|
-
elif isinstance(config, ListConfig):
|
62
|
-
for index, value in enumerate(config):
|
63
|
-
if _is_param(value):
|
64
|
-
yield f"{prefix}{index}", _convert(value)
|
65
|
-
|
66
|
-
else:
|
67
|
-
yield from _iter_params(value, f"{prefix}{index}.")
|
68
|
-
|
69
|
-
|
70
|
-
def _is_param(value: Any) -> bool:
|
71
|
-
"""Check if the given value is a parameter."""
|
72
|
-
if isinstance(value, DictConfig):
|
73
|
-
return False
|
74
|
-
|
75
|
-
if isinstance(value, ListConfig):
|
76
|
-
if any(isinstance(v, DictConfig | ListConfig) for v in value):
|
77
|
-
return False
|
78
|
-
|
79
|
-
return True
|
80
|
-
|
81
|
-
|
82
|
-
def _convert(value: Any) -> Any:
|
83
|
-
"""Convert the given value to a Python object."""
|
84
|
-
if isinstance(value, ListConfig):
|
85
|
-
return list(value)
|
86
|
-
|
87
|
-
return value
|
88
|
-
|
89
|
-
|
90
|
-
def select_config(config: Any, names: list[str]) -> dict[str, Any]:
|
91
|
-
"""Select the given parameters from the configuration object.
|
92
|
-
|
93
|
-
This function selects the given parameters from the configuration object
|
94
|
-
and returns a new configuration object containing only the selected parameters.
|
95
|
-
|
96
|
-
Args:
|
97
|
-
config (Any): The configuration object to select parameters from.
|
98
|
-
names (list[str]): The names of the parameters to select.
|
99
|
-
|
100
|
-
Returns:
|
101
|
-
DictConfig: A new configuration object containing only the selected parameters.
|
102
|
-
|
103
|
-
"""
|
104
|
-
if not isinstance(config, DictConfig):
|
105
|
-
config = OmegaConf.structured(config)
|
106
|
-
|
107
|
-
return {name: _get(config, name) for name in names}
|
108
|
-
|
109
|
-
|
110
|
-
def _get(config: DictConfig, name: str) -> Any:
|
111
|
-
"""Get the value of the given parameter from the configuration object."""
|
112
|
-
if "." not in name:
|
113
|
-
return config.get(name)
|
114
|
-
|
115
|
-
prefix, name = name.split(".", 1)
|
116
|
-
return _get(config.get(prefix), name)
|
117
|
-
|
118
|
-
|
119
|
-
def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
|
120
|
-
"""Select the given overrides from the configuration object."""
|
121
|
-
names = [override.split("=")[0].strip() for override in overrides]
|
122
|
-
return select_config(config, names)
|
hydraflow/core/mlflow.py
DELETED
@@ -1,174 +0,0 @@
|
|
1
|
-
"""Integration of MLflow experiment tracking with Hydra configuration management.
|
2
|
-
|
3
|
-
This module provides functions to log parameters from Hydra configuration objects
|
4
|
-
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
|
-
configuration management with MLflow's experiment tracking capabilities.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from __future__ import annotations
|
9
|
-
|
10
|
-
from typing import TYPE_CHECKING
|
11
|
-
|
12
|
-
import joblib
|
13
|
-
|
14
|
-
from hydraflow.core.io import file_uri_to_path, get_artifact_dir
|
15
|
-
from hydraflow.entities.run_collection import RunCollection
|
16
|
-
|
17
|
-
from .config import iter_params
|
18
|
-
|
19
|
-
if TYPE_CHECKING:
|
20
|
-
from pathlib import Path
|
21
|
-
from typing import Any
|
22
|
-
|
23
|
-
|
24
|
-
def log_params(config: Any, *, synchronous: bool | None = None) -> None:
|
25
|
-
"""Log the parameters from the given configuration object.
|
26
|
-
|
27
|
-
This method logs the parameters from the provided configuration object
|
28
|
-
using MLflow. It iterates over the parameters and logs them using the
|
29
|
-
`mlflow.log_param` method.
|
30
|
-
|
31
|
-
Args:
|
32
|
-
config (Any): The configuration object to log the parameters from.
|
33
|
-
synchronous (bool | None): Whether to log the parameters synchronously.
|
34
|
-
Defaults to None.
|
35
|
-
|
36
|
-
"""
|
37
|
-
import mlflow
|
38
|
-
|
39
|
-
for key, value in iter_params(config):
|
40
|
-
mlflow.log_param(key, value, synchronous=synchronous)
|
41
|
-
|
42
|
-
|
43
|
-
def log_text(from_dir: Path, pattern: str = "*.log") -> None:
|
44
|
-
"""Log text files in the given directory as artifacts.
|
45
|
-
|
46
|
-
Append the text files to the existing text file in the artifact directory.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
from_dir (Path): The directory to find the logs in.
|
50
|
-
pattern (str): The pattern to match the logs.
|
51
|
-
|
52
|
-
"""
|
53
|
-
import mlflow
|
54
|
-
|
55
|
-
artifact_dir = get_artifact_dir()
|
56
|
-
|
57
|
-
for file in from_dir.glob(pattern):
|
58
|
-
if not file.is_file():
|
59
|
-
continue
|
60
|
-
|
61
|
-
file_artifact = artifact_dir / file.name
|
62
|
-
if file_artifact.exists():
|
63
|
-
text = file_artifact.read_text()
|
64
|
-
if not text.endswith("\n"):
|
65
|
-
text += "\n"
|
66
|
-
else:
|
67
|
-
text = ""
|
68
|
-
|
69
|
-
text += file.read_text()
|
70
|
-
mlflow.log_text(text, file.name)
|
71
|
-
|
72
|
-
|
73
|
-
def list_run_paths(
|
74
|
-
experiment_names: str | list[str] | None = None,
|
75
|
-
*other: str,
|
76
|
-
) -> list[Path]:
|
77
|
-
"""List all run paths for the specified experiments.
|
78
|
-
|
79
|
-
This function retrieves all run paths for the given list of experiment names.
|
80
|
-
If no experiment names are provided (None), the function will search all runs
|
81
|
-
for all experiments except the "Default" experiment.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
experiment_names (list[str] | None): List of experiment names to search
|
85
|
-
for runs. If None is provided, the function will search all runs
|
86
|
-
for all experiments except the "Default" experiment.
|
87
|
-
*other (str): The parts of the run directory to join.
|
88
|
-
|
89
|
-
Returns:
|
90
|
-
list[Path]: A list of run paths for the specified experiments.
|
91
|
-
|
92
|
-
"""
|
93
|
-
import mlflow
|
94
|
-
|
95
|
-
if isinstance(experiment_names, str):
|
96
|
-
experiment_names = [experiment_names]
|
97
|
-
|
98
|
-
elif experiment_names is None:
|
99
|
-
experiments = mlflow.search_experiments()
|
100
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
101
|
-
|
102
|
-
run_paths: list[Path] = []
|
103
|
-
|
104
|
-
for name in experiment_names:
|
105
|
-
if experiment := mlflow.get_experiment_by_name(name):
|
106
|
-
uri = experiment.artifact_location
|
107
|
-
|
108
|
-
if isinstance(uri, str):
|
109
|
-
path = file_uri_to_path(uri)
|
110
|
-
run_paths.extend(p for p in path.iterdir() if p.is_dir())
|
111
|
-
|
112
|
-
if other:
|
113
|
-
return [p.joinpath(*other) for p in run_paths]
|
114
|
-
|
115
|
-
return run_paths
|
116
|
-
|
117
|
-
|
118
|
-
def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
|
119
|
-
"""List all run IDs for the specified experiments.
|
120
|
-
|
121
|
-
This function retrieves all runs for the given list of experiment names.
|
122
|
-
If no experiment names are provided (None), the function will search all
|
123
|
-
runs for all experiments except the "Default" experiment.
|
124
|
-
|
125
|
-
Args:
|
126
|
-
experiment_names (list[str] | None): List of experiment names to search
|
127
|
-
for runs. If None is provided, the function will search all runs
|
128
|
-
for all experiments except the "Default" experiment.
|
129
|
-
|
130
|
-
Returns:
|
131
|
-
list[str]: A list of run IDs for the specified experiments.
|
132
|
-
|
133
|
-
"""
|
134
|
-
return [run_path.stem for run_path in list_run_paths(experiment_names)]
|
135
|
-
|
136
|
-
|
137
|
-
def list_runs(
|
138
|
-
experiment_names: str | list[str] | None = None,
|
139
|
-
n_jobs: int = 0,
|
140
|
-
) -> RunCollection:
|
141
|
-
"""List all runs for the specified experiments.
|
142
|
-
|
143
|
-
This function retrieves all runs for the given list of experiment names.
|
144
|
-
If no experiment names are provided (None), the function will search all runs
|
145
|
-
for all experiments except the "Default" experiment.
|
146
|
-
The function returns the results as a `RunCollection` object.
|
147
|
-
|
148
|
-
Note:
|
149
|
-
The returned runs are sorted by their start time in ascending order.
|
150
|
-
|
151
|
-
Args:
|
152
|
-
experiment_names (list[str] | None): List of experiment names to search
|
153
|
-
for runs. If None is provided, the function will search all runs
|
154
|
-
for all experiments except the "Default" experiment.
|
155
|
-
n_jobs (int): The number of jobs to retrieve runs in parallel.
|
156
|
-
|
157
|
-
Returns:
|
158
|
-
RunCollection: A `RunCollection` instance containing the runs for the
|
159
|
-
specified experiments.
|
160
|
-
|
161
|
-
"""
|
162
|
-
import mlflow
|
163
|
-
|
164
|
-
run_ids = list_run_ids(experiment_names)
|
165
|
-
|
166
|
-
if n_jobs == 0:
|
167
|
-
runs = [mlflow.get_run(run_id) for run_id in run_ids]
|
168
|
-
|
169
|
-
else:
|
170
|
-
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
171
|
-
runs = joblib.Parallel(n_jobs, backend="threading")(it)
|
172
|
-
|
173
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
174
|
-
return RunCollection(runs) # type: ignore
|