furu 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@
11
11
  href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,600;0,700;1,400&family=JetBrains+Mono:wght@400;500&display=swap"
12
12
  rel="stylesheet"
13
13
  />
14
- <script type="module" crossorigin src="/assets/index-BjyrY-Zz.js"></script>
14
+ <script type="module" crossorigin src="/assets/index-NiDdQnqO.js"></script>
15
15
  <link rel="stylesheet" crossorigin href="/assets/index-BXAIKNNr.css">
16
16
  </head>
17
17
  <body>
furu/dashboard/scanner.py CHANGED
@@ -1,15 +1,24 @@
1
1
  """Filesystem scanner for discovering and parsing Furu experiment state."""
2
2
 
3
3
  import datetime as _dt
4
+ import importlib
5
+ import sys
4
6
  from collections import defaultdict
5
- from collections.abc import Iterator
6
7
  from pathlib import Path
7
8
  from typing import cast
8
9
 
9
- from ..config import FURU_CONFIG
10
+ from ..aliases import (
11
+ AliasKey,
12
+ alias_key,
13
+ collect_aliases,
14
+ find_experiment_dirs,
15
+ iter_roots,
16
+ )
17
+ from ..schema import schema_key_from_cls, schema_key_from_metadata_raw
10
18
  from ..storage import MetadataManager, MigrationManager, MigrationRecord, StateAttempt
11
19
  from ..storage.state import StateManager, _FuruState
12
20
  from .api.models import (
21
+ AliasInfo,
13
22
  ChildExperiment,
14
23
  DAGEdge,
15
24
  DAGExperiment,
@@ -25,14 +34,6 @@ from .api.models import (
25
34
  )
26
35
 
27
36
 
28
- def _iter_roots() -> Iterator[Path]:
29
- """Iterate over all existing Furu storage roots."""
30
- for version_controlled in (False, True):
31
- root = FURU_CONFIG.get_root(version_controlled)
32
- if root.exists():
33
- yield root
34
-
35
-
36
37
  def _parse_namespace_from_path(experiment_dir: Path, root: Path) -> tuple[str, str]:
37
38
  """
38
39
  Parse namespace and furu_hash from experiment directory path.
@@ -48,34 +49,6 @@ def _parse_namespace_from_path(experiment_dir: Path, root: Path) -> tuple[str, s
48
49
  return namespace, furu_hash
49
50
 
50
51
 
51
- def _alias_key(migration: MigrationRecord) -> tuple[str, str, str]:
52
- return (migration.from_namespace, migration.from_hash, migration.from_root)
53
-
54
-
55
- def _collect_aliases() -> dict[tuple[str, str, str], list[MigrationRecord]]:
56
- aliases: dict[tuple[str, str, str], list[MigrationRecord]] = defaultdict(list)
57
- for root in _iter_roots():
58
- for experiment_dir in _find_experiment_dirs(root):
59
- migration = MigrationManager.read_migration(experiment_dir)
60
- if migration is None or migration.kind != "alias":
61
- continue
62
- if migration.overwritten_at is not None:
63
- continue
64
- aliases[_alias_key(migration)].append(migration)
65
- return aliases
66
-
67
-
68
- def _alias_reference(
69
- aliases: dict[tuple[str, str, str], list[MigrationRecord]],
70
- ) -> dict[str, dict[str, list[str]]]:
71
- ref: dict[str, dict[str, list[str]]] = {}
72
- for key, records in aliases.items():
73
- from_namespace, from_hash, _from_root = key
74
- namespace_map = ref.setdefault(from_namespace, {})
75
- namespace_map[from_hash] = [record.to_hash for record in records]
76
- return ref
77
-
78
-
79
52
  def _get_class_name(namespace: str) -> str:
80
53
  """Extract class name from namespace (last component)."""
81
54
  parts = namespace.split(".")
@@ -114,6 +87,12 @@ def _state_to_summary(
114
87
  original_status: str | None = None,
115
88
  original_namespace: str | None = None,
116
89
  original_hash: str | None = None,
90
+ schema_key: tuple[str, ...] | None = None,
91
+ current_schema_key: tuple[str, ...] | None = None,
92
+ *,
93
+ is_stale: bool | None = None,
94
+ is_alias: bool | None = None,
95
+ aliases: list[AliasInfo] | None = None,
117
96
  ) -> ExperimentSummary:
118
97
  """Convert a Furu state to an experiment summary."""
119
98
  attempt = state.attempt
@@ -141,23 +120,18 @@ def _state_to_summary(
141
120
  to_namespace=migration.to_namespace if migration else None,
142
121
  to_hash=migration.to_hash if migration else None,
143
122
  original_result_status=original_status,
123
+ original_namespace=original_namespace,
124
+ original_hash=original_hash,
125
+ schema_key=list(schema_key) if schema_key is not None else None,
126
+ current_schema_key=list(current_schema_key)
127
+ if current_schema_key is not None
128
+ else None,
129
+ is_stale=is_stale,
130
+ is_alias=is_alias,
131
+ aliases=aliases,
144
132
  )
145
133
 
146
134
 
147
- def _find_experiment_dirs(root: Path) -> list[Path]:
148
- """Find all directories containing .furu/state.json files."""
149
- experiments = []
150
-
151
- # Walk the directory tree looking for .furu directories
152
- for furu_dir in root.rglob(StateManager.INTERNAL_DIR):
153
- if furu_dir.is_dir():
154
- state_file = furu_dir / StateManager.STATE_FILE
155
- if state_file.is_file():
156
- experiments.append(furu_dir.parent)
157
-
158
- return experiments
159
-
160
-
161
135
  def _parse_datetime(value: str | None) -> _dt.datetime | None:
162
136
  """Parse ISO datetime string to datetime object."""
163
137
  if not value:
@@ -168,35 +142,61 @@ def _parse_datetime(value: str | None) -> _dt.datetime | None:
168
142
  return dt
169
143
 
170
144
 
171
- def _read_metadata_with_defaults(
172
- directory: Path, migration: MigrationRecord | None
173
- ) -> JsonDict | None:
174
- metadata = MetadataManager.read_metadata_raw(directory)
175
- if not metadata or migration is None:
176
- return metadata
177
- if migration.kind != "alias" or migration.overwritten_at is not None:
178
- return metadata
179
- if not migration.default_values:
180
- return metadata
181
-
182
- furu_obj = metadata.get("furu_obj")
183
- if not isinstance(furu_obj, dict):
184
- return metadata
185
-
186
- defaults = migration.default_values
187
- updates: dict[str, str | int | float | bool] = {}
188
- for field, value in defaults.items():
189
- if field not in furu_obj:
190
- updates[field] = value
145
+ def _current_schema_key(
146
+ namespace: str,
147
+ cache: dict[str, tuple[str, ...] | None],
148
+ ) -> tuple[str, ...] | None:
149
+ if namespace in cache:
150
+ return cache[namespace]
151
+ module_path, _, class_name = namespace.rpartition(".")
152
+ if not module_path:
153
+ cache[namespace] = None
154
+ return None
155
+ module = sys.modules.get(module_path)
156
+ if module is None:
157
+ if not _module_on_path(module_path):
158
+ cache[namespace] = None
159
+ return None
160
+ module = importlib.import_module(module_path)
161
+ obj = getattr(module, class_name, None)
162
+ if obj is None:
163
+ cache[namespace] = None
164
+ return None
165
+ key = schema_key_from_cls(obj)
166
+ cache[namespace] = key
167
+ return key
191
168
 
192
- if not updates:
193
- return metadata
194
169
 
195
- updated_obj = dict(furu_obj)
196
- updated_obj.update(updates)
197
- updated_metadata = dict(metadata)
198
- updated_metadata["furu_obj"] = updated_obj
199
- return updated_metadata
170
+ def _module_on_path(module_path: str) -> bool:
171
+ root_name = module_path.split(".", maxsplit=1)[0]
172
+ for entry in sys.path:
173
+ if not entry:
174
+ continue
175
+ base = Path(entry)
176
+ if (base / root_name).is_dir() or (base / f"{root_name}.py").is_file():
177
+ return True
178
+ return False
179
+
180
+
181
+ def _alias_infos(
182
+ aliases: dict[AliasKey, list[MigrationRecord]],
183
+ original_key: AliasKey,
184
+ ) -> list[AliasInfo] | None:
185
+ records = aliases.get(original_key)
186
+ if not records:
187
+ return None
188
+ ordered = sorted(records, key=lambda record: record.migrated_at)
189
+ return [
190
+ AliasInfo(
191
+ namespace=record.to_namespace,
192
+ furu_hash=record.to_hash,
193
+ migrated_at=record.migrated_at,
194
+ overwritten_at=record.overwritten_at,
195
+ origin=record.origin,
196
+ note=record.note,
197
+ )
198
+ for record in ordered
199
+ ]
200
200
 
201
201
 
202
202
  def _get_nested_value(data: dict, path: str) -> str | int | float | bool | None:
@@ -234,6 +234,7 @@ def scan_experiments(
234
234
  config_filter: str | None = None,
235
235
  migration_kind: str | None = None,
236
236
  migration_policy: str | None = None,
237
+ schema: str = "current",
237
238
  view: str = "resolved",
238
239
  ) -> list[ExperimentSummary]:
239
240
  """
@@ -251,13 +252,19 @@ def scan_experiments(
251
252
  updated_after: Filter experiments updated after this ISO datetime
252
253
  updated_before: Filter experiments updated before this ISO datetime
253
254
  config_filter: Filter by config field in format "field.path=value"
255
+ schema: Filter by schema status (current, stale, any)
254
256
  view: "resolved" uses alias metadata; "original" uses original metadata/state.
255
257
 
256
258
  Returns:
257
259
  List of experiment summaries, sorted by updated_at (newest first)
258
260
  """
259
261
  experiments: list[ExperimentSummary] = []
260
- seen_original: set[tuple[str, str, str]] = set()
262
+ seen_original: set[AliasKey] = set()
263
+ alias_index = collect_aliases(include_inactive=True)
264
+ schema_cache: dict[str, tuple[str, ...] | None] = {}
265
+
266
+ if schema not in {"current", "stale", "any"}:
267
+ raise ValueError("schema must be one of: current, stale, any")
261
268
 
262
269
  # Parse datetime filters
263
270
  started_after_dt = _parse_datetime(started_after)
@@ -271,8 +278,8 @@ def scan_experiments(
271
278
  if config_filter and "=" in config_filter:
272
279
  config_field, config_value = config_filter.split("=", 1)
273
280
 
274
- for root in _iter_roots():
275
- for experiment_dir in _find_experiment_dirs(root):
281
+ for root in iter_roots():
282
+ for experiment_dir in find_experiment_dirs(root):
276
283
  state = StateManager.read_state(experiment_dir)
277
284
  namespace, furu_hash = _parse_namespace_from_path(experiment_dir, root)
278
285
  migration = MigrationManager.read_migration(experiment_dir)
@@ -280,6 +287,9 @@ def scan_experiments(
280
287
  original_state: _FuruState | None = None
281
288
  metadata_dir = experiment_dir
282
289
  alias_active = False
290
+ is_alias_view = False
291
+ original_namespace: str | None = None
292
+ original_hash: str | None = None
283
293
 
284
294
  if migration is not None and migration.kind == "alias":
285
295
  original_dir = MigrationManager.resolve_dir(migration, target="from")
@@ -290,11 +300,7 @@ def scan_experiments(
290
300
  and state.result.status == "migrated"
291
301
  and original_status == "success"
292
302
  )
293
- original_key = (
294
- migration.from_namespace,
295
- migration.from_hash,
296
- migration.from_root,
297
- )
303
+ original_key = alias_key(migration)
298
304
  if view == "original":
299
305
  if original_key in seen_original:
300
306
  continue
@@ -303,8 +309,10 @@ def scan_experiments(
303
309
  namespace = migration.from_namespace
304
310
  furu_hash = migration.from_hash
305
311
  metadata_dir = original_dir
306
- elif alias_active:
307
- metadata_dir = original_dir
312
+ else:
313
+ is_alias_view = True
314
+ original_namespace = migration.from_namespace
315
+ original_hash = migration.from_hash
308
316
  elif view == "original":
309
317
  original_key = (
310
318
  namespace,
@@ -315,14 +323,34 @@ def scan_experiments(
315
323
  continue
316
324
  seen_original.add(original_key)
317
325
 
326
+ metadata = MetadataManager.read_metadata_raw(metadata_dir)
327
+ if metadata is None:
328
+ continue
329
+ schema_key = schema_key_from_metadata_raw(metadata)
330
+ current_schema_key = _current_schema_key(namespace, schema_cache)
331
+ if current_schema_key is None:
332
+ is_stale = None
333
+ else:
334
+ is_stale = schema_key != current_schema_key
335
+
336
+ aliases = None
337
+ if not is_alias_view:
338
+ root_kind = MigrationManager.root_kind_for_dir(metadata_dir)
339
+ aliases = _alias_infos(alias_index, (namespace, furu_hash, root_kind))
340
+
318
341
  summary = _state_to_summary(
319
342
  state,
320
343
  namespace,
321
344
  furu_hash,
322
345
  migration=migration,
323
346
  original_status=original_status,
324
- original_namespace=migration.from_namespace if migration else None,
325
- original_hash=migration.from_hash if migration else None,
347
+ original_namespace=original_namespace,
348
+ original_hash=original_hash,
349
+ schema_key=schema_key,
350
+ current_schema_key=current_schema_key,
351
+ is_stale=is_stale,
352
+ is_alias=is_alias_view,
353
+ aliases=aliases,
326
354
  )
327
355
 
328
356
  if (
@@ -361,6 +389,10 @@ def scan_experiments(
361
389
  continue
362
390
  if migration_policy and summary.migration_policy != migration_policy:
363
391
  continue
392
+ if schema == "current" and summary.is_stale is True:
393
+ continue
394
+ if schema == "stale" and summary.is_stale is not True:
395
+ continue
364
396
 
365
397
  # Date filters
366
398
  if started_after_dt or started_before_dt:
@@ -387,18 +419,10 @@ def scan_experiments(
387
419
 
388
420
  # Config field filter - requires reading metadata
389
421
  if config_field and config_value is not None:
390
- defaults_migration = migration if view == "resolved" else None
391
- metadata = _read_metadata_with_defaults(
392
- metadata_dir,
393
- defaults_migration,
394
- )
395
- if metadata:
396
- furu_obj = metadata.get("furu_obj")
397
- if isinstance(furu_obj, dict):
398
- actual_value = _get_nested_value(furu_obj, config_field)
399
- if str(actual_value) != config_value:
400
- continue
401
- else:
422
+ furu_obj = metadata.get("furu_obj")
423
+ if isinstance(furu_obj, dict):
424
+ actual_value = _get_nested_value(furu_obj, config_field)
425
+ if str(actual_value) != config_value:
402
426
  continue
403
427
  else:
404
428
  continue
@@ -431,11 +455,11 @@ def get_experiment_detail(
431
455
  Returns:
432
456
  Experiment detail or None if not found
433
457
  """
434
- # Convert namespace to path
435
458
  namespace_path = Path(*namespace.split("."))
436
- alias_reference = _alias_reference(_collect_aliases())
459
+ alias_index = collect_aliases(include_inactive=True)
460
+ schema_cache: dict[str, tuple[str, ...] | None] = {}
437
461
 
438
- for root in _iter_roots():
462
+ for root in iter_roots():
439
463
  experiment_dir = root / namespace_path / furu_hash
440
464
  state_path = StateManager.get_state_path(experiment_dir)
441
465
 
@@ -444,28 +468,26 @@ def get_experiment_detail(
444
468
 
445
469
  state = StateManager.read_state(experiment_dir)
446
470
  migration = MigrationManager.read_migration(experiment_dir)
447
- metadata = _read_metadata_with_defaults(
448
- experiment_dir,
449
- migration if view == "resolved" else None,
450
- )
471
+ metadata_dir = experiment_dir
451
472
  original_status: str | None = None
452
473
  original_namespace: str | None = None
453
474
  original_hash: str | None = None
475
+ is_alias_view = False
454
476
 
455
477
  if migration is not None and migration.kind == "alias":
456
478
  original_dir = MigrationManager.resolve_dir(migration, target="from")
457
479
  original_state = StateManager.read_state(original_dir)
458
480
  original_status = original_state.result.status
459
- original_namespace = migration.from_namespace
460
- original_hash = migration.from_hash
461
481
  if view == "original":
462
482
  state = original_state
463
- metadata = MetadataManager.read_metadata_raw(original_dir)
483
+ metadata_dir = original_dir
464
484
  experiment_dir = original_dir
465
- namespace = original_namespace
466
- furu_hash = original_hash
485
+ namespace = migration.from_namespace
486
+ furu_hash = migration.from_hash
467
487
  else:
468
- metadata = _read_metadata_with_defaults(original_dir, migration)
488
+ is_alias_view = True
489
+ original_namespace = migration.from_namespace
490
+ original_hash = migration.from_hash
469
491
  elif migration is not None and migration.kind in {
470
492
  "moved",
471
493
  "copied",
@@ -474,29 +496,31 @@ def get_experiment_detail(
474
496
  if view == "original":
475
497
  original_dir = MigrationManager.resolve_dir(migration, target="from")
476
498
  state = StateManager.read_state(original_dir)
477
- metadata = MetadataManager.read_metadata_raw(original_dir)
499
+ metadata_dir = original_dir
478
500
  experiment_dir = original_dir
479
501
  namespace = migration.from_namespace
480
502
  furu_hash = migration.from_hash
481
503
  original_namespace = migration.from_namespace
482
504
  original_hash = migration.from_hash
483
505
 
484
- attempt = state.attempt
485
- if view == "original" and migration is not None and migration.kind == "alias":
486
- alias_source_namespace = migration.from_namespace
487
- alias_source_hash = migration.from_hash
488
- else:
489
- alias_source_namespace = namespace
490
- alias_source_hash = furu_hash
506
+ metadata = MetadataManager.read_metadata_raw(metadata_dir)
507
+ schema_key: tuple[str, ...] | None = None
508
+ current_schema_key: tuple[str, ...] | None = None
509
+ is_stale: bool | None = None
510
+ if metadata is not None:
511
+ schema_key = schema_key_from_metadata_raw(metadata)
512
+ current_schema_key = _current_schema_key(namespace, schema_cache)
513
+ if current_schema_key is None:
514
+ is_stale = None
515
+ else:
516
+ is_stale = schema_key != current_schema_key
491
517
 
492
- alias_keys = alias_reference.get(alias_source_namespace, {}).get(
493
- alias_source_hash,
494
- [],
495
- )
496
- alias_namespaces = (
497
- [alias_source_namespace] * len(alias_keys) if alias_keys else None
498
- )
499
- alias_hashes = alias_keys if alias_keys else None
518
+ aliases = None
519
+ if not is_alias_view:
520
+ root_kind = MigrationManager.root_kind_for_dir(metadata_dir)
521
+ aliases = _alias_infos(alias_index, (namespace, furu_hash, root_kind))
522
+
523
+ attempt = state.attempt
500
524
  return ExperimentDetail(
501
525
  namespace=namespace,
502
526
  furu_hash=furu_hash,
@@ -526,8 +550,13 @@ def get_experiment_detail(
526
550
  original_result_status=original_status,
527
551
  original_namespace=original_namespace,
528
552
  original_hash=original_hash,
529
- alias_namespaces=alias_namespaces,
530
- alias_hashes=alias_hashes,
553
+ schema_key=list(schema_key) if schema_key is not None else None,
554
+ current_schema_key=list(current_schema_key)
555
+ if current_schema_key is not None
556
+ else None,
557
+ is_stale=is_stale,
558
+ is_alias=is_alias_view,
559
+ aliases=aliases,
531
560
  )
532
561
 
533
562
  return None
@@ -548,8 +577,8 @@ def get_stats() -> DashboardStats:
548
577
  failed = 0
549
578
  success = 0
550
579
 
551
- for root in _iter_roots():
552
- for experiment_dir in _find_experiment_dirs(root):
580
+ for root in iter_roots():
581
+ for experiment_dir in find_experiment_dirs(root):
553
582
  state = StateManager.read_state(experiment_dir)
554
583
  total += 1
555
584
 
@@ -647,8 +676,8 @@ def get_experiment_dag() -> ExperimentDAG:
647
676
  # Collect all edges (deduped by class pair)
648
677
  edge_set: set[tuple[str, str, str]] = set() # (source_class, target_class, field)
649
678
 
650
- for root in _iter_roots():
651
- for experiment_dir in _find_experiment_dirs(root):
679
+ for root in iter_roots():
680
+ for experiment_dir in find_experiment_dirs(root):
652
681
  state = StateManager.read_state(experiment_dir)
653
682
  namespace, furu_hash = _parse_namespace_from_path(experiment_dir, root)
654
683
  metadata = MetadataManager.read_metadata_raw(experiment_dir)
@@ -761,13 +790,13 @@ def _find_experiment_by_furu_obj(
761
790
  # e.g., "my_project.pipelines.TrainModel" -> "my_project/pipelines/TrainModel"
762
791
  namespace_path = Path(*full_class_name.split("."))
763
792
 
764
- for root in _iter_roots():
793
+ for root in iter_roots():
765
794
  class_dir = root / namespace_path
766
795
  if not class_dir.exists():
767
796
  continue
768
797
 
769
798
  # Search through experiments of this class
770
- for experiment_dir in _find_experiment_dirs(class_dir):
799
+ for experiment_dir in find_experiment_dirs(class_dir):
771
800
  metadata = MetadataManager.read_metadata_raw(experiment_dir)
772
801
  if not metadata:
773
802
  continue
@@ -806,7 +835,7 @@ def get_experiment_relationships(
806
835
 
807
836
  target_metadata: JsonDict | None = None
808
837
 
809
- for root in _iter_roots():
838
+ for root in iter_roots():
810
839
  experiment_dir = root / namespace_path / furu_hash
811
840
  state_path = StateManager.get_state_path(experiment_dir)
812
841
 
@@ -820,10 +849,7 @@ def get_experiment_relationships(
820
849
  experiment_dir = MigrationManager.resolve_dir(migration, target="from")
821
850
  target_metadata = MetadataManager.read_metadata_raw(experiment_dir)
822
851
  else:
823
- target_metadata = _read_metadata_with_defaults(
824
- experiment_dir,
825
- migration if view == "resolved" else None,
826
- )
852
+ target_metadata = MetadataManager.read_metadata_raw(experiment_dir)
827
853
  break
828
854
 
829
855
  if not target_metadata:
@@ -881,8 +907,8 @@ def get_experiment_relationships(
881
907
  # Find children by scanning all experiments
882
908
  children: list[ChildExperiment] = []
883
909
 
884
- for root in _iter_roots():
885
- for experiment_dir in _find_experiment_dirs(root):
910
+ for root in iter_roots():
911
+ for experiment_dir in find_experiment_dirs(root):
886
912
  migration = MigrationManager.read_migration(experiment_dir)
887
913
  if migration is not None and migration.kind == "alias":
888
914
  continue