hydraflow 0.2.10__tar.gz → 0.2.12__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. {hydraflow-0.2.10 → hydraflow-0.2.12}/PKG-INFO +1 -1
  2. {hydraflow-0.2.10 → hydraflow-0.2.12}/pyproject.toml +1 -1
  3. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/mlflow.py +28 -3
  4. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/progress.py +12 -6
  5. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_app.py +14 -0
  6. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_run_collection.py +2 -2
  7. {hydraflow-0.2.10 → hydraflow-0.2.12}/.devcontainer/devcontainer.json +0 -0
  8. {hydraflow-0.2.10 → hydraflow-0.2.12}/.devcontainer/postCreate.sh +0 -0
  9. {hydraflow-0.2.10 → hydraflow-0.2.12}/.devcontainer/starship.toml +0 -0
  10. {hydraflow-0.2.10 → hydraflow-0.2.12}/.gitattributes +0 -0
  11. {hydraflow-0.2.10 → hydraflow-0.2.12}/.gitignore +0 -0
  12. {hydraflow-0.2.10 → hydraflow-0.2.12}/LICENSE +0 -0
  13. {hydraflow-0.2.10 → hydraflow-0.2.12}/README.md +0 -0
  14. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/__init__.py +0 -0
  15. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/asyncio.py +0 -0
  16. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/config.py +0 -0
  17. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/context.py +0 -0
  18. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/info.py +0 -0
  19. {hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/run_collection.py +0 -0
  20. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/scripts/__init__.py +0 -0
  21. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/scripts/app.py +0 -0
  22. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/scripts/progress.py +0 -0
  23. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/scripts/watch.py +0 -0
  24. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_asyncio.py +0 -0
  25. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_config.py +0 -0
  26. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_context.py +0 -0
  27. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_info.py +0 -0
  28. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_log_run.py +0 -0
  29. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_mlflow.py +0 -0
  30. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_progress.py +0 -0
  31. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_version.py +0 -0
  32. {hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.2.10"
7
+ version = "0.2.12"
8
8
  description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -22,6 +22,7 @@ from __future__ import annotations
22
22
  from pathlib import Path
23
23
  from typing import TYPE_CHECKING
24
24
 
25
+ import joblib
25
26
  import mlflow
26
27
  from hydra.core.hydra_config import HydraConfig
27
28
  from mlflow.entities import ViewType
@@ -146,7 +147,9 @@ def search_runs(
146
147
  return RunCollection(runs) # type: ignore
147
148
 
148
149
 
149
- def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
150
+ def list_runs(
151
+ experiment_names: str | list[str] | None = None, *, n_jobs: int = 0
152
+ ) -> RunCollection:
150
153
  """
151
154
  List all runs for the specified experiments.
152
155
 
@@ -168,8 +171,30 @@ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
168
171
  Returns:
169
172
  A `RunCollection` object containing the runs for the specified experiments.
170
173
  """
171
- if experiment_names == []:
174
+ if isinstance(experiment_names, str):
175
+ experiment_names = [experiment_names]
176
+
177
+ elif experiment_names == []:
172
178
  experiments = mlflow.search_experiments()
173
179
  experiment_names = [e.name for e in experiments if e.name != "Default"]
174
180
 
175
- return search_runs(experiment_names=experiment_names)
181
+ if n_jobs == 0:
182
+ return search_runs(experiment_names=experiment_names)
183
+
184
+ if experiment_names is None:
185
+ raise NotImplementedError
186
+
187
+ run_ids = []
188
+
189
+ for name in experiment_names:
190
+ if experiment := mlflow.get_experiment_by_name(name):
191
+ loc = experiment.artifact_location
192
+
193
+ if isinstance(loc, str) and loc.startswith("file://"):
194
+ path = Path(mlflow.artifacts.download_artifacts(loc))
195
+ run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
196
+
197
+ it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
198
+ runs = joblib.Parallel(n_jobs, prefer="threads")(it)
199
+ runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
200
+ return RunCollection(runs) # type: ignore
@@ -161,21 +161,27 @@ def multi_tasks_progress(
161
161
 
162
162
  with Progress(*columns, transient=transient or False, **kwargs) as progress:
163
163
  task_main = progress.add_task(main_description, total=None)
164
+
165
+ task_ids = [
166
+ progress.add_task(description.format(i), start=False, total=None)
167
+ for i in range(len(iterables))
168
+ ]
169
+
164
170
  total = {}
165
171
  completed = {}
166
172
 
167
- def func(i: int, iterable: Iterable[int | tuple[int, int]]) -> None:
168
- task_id = progress.add_task(description.format(i), total=None)
173
+ def func(i: int) -> None:
169
174
  completed[i] = 0
170
175
  total[i] = None
176
+ progress.start_task(task_ids[i])
171
177
 
172
- for index in iterable:
178
+ for index in iterables[i]:
173
179
  if isinstance(index, tuple):
174
180
  completed[i], total[i] = index[0] + 1, index[1]
175
181
  else:
176
182
  completed[i] = index + 1
177
183
 
178
- progress.update(task_id, total=total[i], completed=completed[i])
184
+ progress.update(task_ids[i], total=total[i], completed=completed[i])
179
185
 
180
186
  if all(t is not None for t in total.values()):
181
187
  t = sum(total.values())
@@ -185,7 +191,7 @@ def multi_tasks_progress(
185
191
  progress.update(task_main, total=t, completed=c)
186
192
 
187
193
  if transient is not False:
188
- progress.remove_task(task_id)
194
+ progress.remove_task(task_ids[i])
189
195
 
190
- it = (joblib.delayed(func)(i, it) for i, it in enumerate(iterables))
196
+ it = (joblib.delayed(func)(i) for i in range(len(iterables)))
191
197
  joblib.Parallel(n_jobs, prefer="threads")(it)
@@ -26,6 +26,20 @@ def rc(monkeypatch, tmp_path):
26
26
  yield hydraflow.list_runs()
27
27
 
28
28
 
29
+ @pytest.mark.parametrize("n_jobs", [0, 1, 2, 4, -1])
30
+ def test_list_runs_parallel(rc: RunCollection, n_jobs: int):
31
+ from hydraflow.mlflow import list_runs
32
+
33
+ rc_ = list_runs("_info_", n_jobs=n_jobs)
34
+ assert len(rc) == len(rc_)
35
+
36
+ for a, b in zip(rc, rc_):
37
+ assert a.info.run_id == b.info.run_id
38
+ assert a.info.start_time == b.info.start_time
39
+ assert a.info.status == b.info.status
40
+ assert a.info.artifact_uri == b.info.artifact_uri
41
+
42
+
29
43
  def test_app_info_run_id(rc: RunCollection):
30
44
  assert len(rc.info.run_id) == 4
31
45
 
@@ -364,14 +364,14 @@ def test_list_runs_empty_list(runs, runs2):
364
364
  def test_list_runs_list(runs, runs2, name, n):
365
365
  from hydraflow.mlflow import list_runs
366
366
 
367
- filtered_runs = list_runs(experiment_names=[name])
367
+ filtered_runs = list_runs(name)
368
368
  assert len(filtered_runs) == n
369
369
 
370
370
 
371
371
  def test_list_runs_none(runs, runs2):
372
372
  from hydraflow.mlflow import list_runs
373
373
 
374
- no_runs = list_runs(experiment_names=["non_existent_experiment"])
374
+ no_runs = list_runs(["non_existent_experiment"])
375
375
  assert len(no_runs) == 0
376
376
 
377
377
 
File without changes
File without changes
File without changes
File without changes