furu 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {furu-0.0.4 → furu-0.0.6}/PKG-INFO +101 -6
  2. {furu-0.0.4 → furu-0.0.6}/README.md +100 -5
  3. {furu-0.0.4 → furu-0.0.6}/pyproject.toml +1 -1
  4. {furu-0.0.4 → furu-0.0.6}/src/furu/config.py +27 -40
  5. {furu-0.0.4 → furu-0.0.6}/src/furu/core/furu.py +203 -126
  6. {furu-0.0.4 → furu-0.0.6}/src/furu/core/list.py +3 -2
  7. furu-0.0.4/src/furu/dashboard/frontend/dist/assets/index-DS3FsqcY.js → furu-0.0.6/src/furu/dashboard/frontend/dist/assets/index-BjyrY-Zz.js +1 -1
  8. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/frontend/dist/index.html +1 -1
  9. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/local.py +9 -7
  10. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/plan.py +117 -25
  11. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/slurm_dag.py +16 -14
  12. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/slurm_pool.py +5 -5
  13. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/slurm_spec.py +2 -2
  14. {furu-0.0.4 → furu-0.0.6}/src/furu/migration.py +1 -2
  15. {furu-0.0.4 → furu-0.0.6}/src/furu/runtime/env.py +1 -1
  16. {furu-0.0.4 → furu-0.0.6}/src/furu/runtime/logging.py +30 -4
  17. furu-0.0.6/src/furu/runtime/overrides.py +37 -0
  18. {furu-0.0.4 → furu-0.0.6}/src/furu/storage/metadata.py +26 -29
  19. {furu-0.0.4 → furu-0.0.6}/src/furu/storage/migration.py +0 -1
  20. {furu-0.0.4 → furu-0.0.6}/src/furu/storage/state.py +86 -92
  21. furu-0.0.6/src/furu/testing.py +232 -0
  22. {furu-0.0.4 → furu-0.0.6}/src/furu/__init__.py +0 -0
  23. {furu-0.0.4 → furu-0.0.6}/src/furu/adapters/__init__.py +0 -0
  24. {furu-0.0.4 → furu-0.0.6}/src/furu/adapters/submitit.py +0 -0
  25. {furu-0.0.4 → furu-0.0.6}/src/furu/core/__init__.py +0 -0
  26. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/__init__.py +0 -0
  27. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/__main__.py +0 -0
  28. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/api/__init__.py +0 -0
  29. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/api/models.py +0 -0
  30. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/api/routes.py +0 -0
  31. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/frontend/dist/assets/index-BXAIKNNr.css +0 -0
  32. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/frontend/dist/favicon.svg +0 -0
  33. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/main.py +0 -0
  34. {furu-0.0.4 → furu-0.0.6}/src/furu/dashboard/scanner.py +0 -0
  35. {furu-0.0.4 → furu-0.0.6}/src/furu/errors.py +0 -0
  36. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/__init__.py +0 -0
  37. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/context.py +0 -0
  38. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/paths.py +0 -0
  39. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/plan_utils.py +0 -0
  40. {furu-0.0.4 → furu-0.0.6}/src/furu/execution/submitit_factory.py +0 -0
  41. {furu-0.0.4 → furu-0.0.6}/src/furu/migrate.py +0 -0
  42. {furu-0.0.4 → furu-0.0.6}/src/furu/runtime/__init__.py +0 -0
  43. {furu-0.0.4 → furu-0.0.6}/src/furu/runtime/tracebacks.py +0 -0
  44. {furu-0.0.4 → furu-0.0.6}/src/furu/serialization/__init__.py +0 -0
  45. {furu-0.0.4 → furu-0.0.6}/src/furu/serialization/migrations.py +0 -0
  46. {furu-0.0.4 → furu-0.0.6}/src/furu/serialization/serializer.py +0 -0
  47. {furu-0.0.4 → furu-0.0.6}/src/furu/storage/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: furu
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs.
5
5
  Author: Herman Brunborg
6
6
  Author-email: Herman Brunborg <herman@brunborg.com>
@@ -459,8 +459,11 @@ The `/api/experiments` endpoint supports:
459
459
  |----------|---------|-------------|
460
460
  | `FURU_PATH` | `<project>/furu-data` | Base storage directory for non-versioned artifacts |
461
461
  | `FURU_VERSION_CONTROLLED_PATH` | `<project>/furu-data/artifacts` | Override version-controlled storage root |
462
+ | `FURU_SUBMITIT_PATH` | `<FURU_PATH>/submitit` | Override submitit logs root |
462
463
  | `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
463
- | `FURU_IGNORE_DIFF` | `false` | Skip embedding git diff in metadata |
464
+ | `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting (set `0` to disable) |
465
+ | `FURU_RECORD_GIT` | `cached` | Git provenance capture: `ignore` skips git metadata, `cached` records once per process, `uncached` records every time |
466
+ | `FURU_ALLOW_NO_GIT_ORIGIN` | `false` | Allow missing git `origin` when recording git metadata (invalid with `FURU_RECORD_GIT=ignore`) |
464
467
  | `FURU_ALWAYS_RERUN` | `""` | Comma-separated class qualnames to always rerun (use `ALL` to bypass cache globally; cannot combine with other entries; entries must be importable) |
465
468
  | `FURU_RETRY_FAILED` | `true` | Retry failed artifacts by default (set to `0` to keep failures sticky) |
466
469
  | `FURU_MAX_COMPUTE_RETRIES` | `3` | Maximum compute retries per node after the first failure |
@@ -469,12 +472,21 @@ The `/api/experiments` endpoint supports:
469
472
  | `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
470
473
  | `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
471
474
  | `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
472
- | `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs |
475
+ | `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs (min 1s) |
473
476
  | `FURU_PREEMPT_MAX` | `5` | Maximum submitit requeues on preemption |
474
477
  | `FURU_CANCELLED_IS_PREEMPTED` | `false` | Treat SLURM CANCELLED as preempted |
475
- | `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting |
478
+ | `SLURM_JOB_ID` | unset | Read-only; set by Slurm to record job id and enable submitit context |
476
479
 
477
- Local `.env` files are loaded automatically if `python-dotenv` is installed.
480
+ Local `.env` files are not loaded automatically. Call `furu.load_env()` when you
481
+ want to load `.env` values (requires `python-dotenv`).
482
+
483
+ ### Test and CI Environment Variables
484
+
485
+ | Variable | Default | Description |
486
+ |----------|---------|-------------|
487
+ | `FURU_DASHBOARD_DEV_DATA_DIR` | unset | Override data dir for `make dashboard-dev` (defaults to a temp dir) |
488
+ | `FURU_E2E_DATA_DIR` | unset | Required for Playwright e2e runs; used as the data root and to set `FURU_PATH` |
489
+ | `CI` | unset | Enables CI-friendly Playwright settings (retries, single worker, traces, screenshots, video) |
478
490
 
479
491
  ### Programmatic Configuration
480
492
 
@@ -487,10 +499,93 @@ furu.set_furu_root(Path("/my/storage"))
487
499
  root = furu.get_furu_root()
488
500
 
489
501
  # Access config directly
490
- furu.FURU_CONFIG.ignore_git_diff = True
502
+ furu.FURU_CONFIG.record_git = "uncached"
491
503
  furu.FURU_CONFIG.poll_interval = 5.0
492
504
  ```
493
505
 
506
+ ### Testing with pytest
507
+
508
+ Use the built-in pytest fixture to isolate Furu storage in tests (each test gets
509
+ its own temp root, so identical configs in separate tests will not collide):
510
+
511
+ ```python
512
+ # conftest.py
513
+ pytest_plugins = ["furu.testing"]
514
+ ```
515
+
516
+ ```python
517
+ # test_pipeline.py
518
+ import json
519
+ from pathlib import Path
520
+
521
+ import furu
522
+
523
+
524
+ class TrainModel(furu.Furu[Path]):
525
+ lr: float = furu.chz.field(default=1e-3)
526
+
527
+ def _create(self) -> Path:
528
+ path = self.furu_dir / "metrics.json"
529
+ path.write_text(json.dumps({"lr": self.lr}))
530
+ return path
531
+
532
+ def _load(self) -> Path:
533
+ return self.furu_dir / "metrics.json"
534
+
535
+
536
+ def test_create_and_reload(furu_tmp_root):
537
+ obj = TrainModel(lr=1e-3)
538
+ first = obj.get()
539
+ second = obj.get()
540
+ assert first.read_text() == second.read_text()
541
+ assert (furu_tmp_root / "data").exists()
542
+ ```
543
+
544
+ Override specific dependencies when you want to skip deeper chains:
545
+
546
+ ```python
547
+ from furu.testing import override_results
548
+
549
+
550
+ class Normalize(furu.Furu[str]):
551
+ def _create(self) -> str:
552
+ return "normalized"
553
+
554
+ def _load(self) -> str:
555
+ return "normalized"
556
+
557
+
558
+ class TrainModel(furu.Furu[str]):
559
+ normalizer: Normalize = furu.chz.field(default_factory=Normalize)
560
+
561
+ def _create(self) -> str:
562
+ return f"trained:{self.normalizer.get()}"
563
+
564
+ def _load(self) -> str:
565
+ return "trained"
566
+
567
+
568
+ def test_override_dependency(furu_tmp_root):
569
+ normalizer = Normalize()
570
+ model = TrainModel(normalizer=normalizer)
571
+ with override_results({normalizer: "stub"}):
572
+ assert model.get() == "trained:stub"
573
+ ```
574
+
575
+ If you want to override without instantiating the dependency directly, target it
576
+ by dotted path from the root object (chz-style paths, e.g. `deps.0` for lists and
577
+ `deps.key` for mappings):
578
+
579
+ ```python
580
+ from furu.testing import override_results_for
581
+
582
+
583
+ def test_override_by_path(furu_tmp_root):
584
+ model = TrainModel()
585
+ with override_results_for(model, {"normalizer": "stub"}):
586
+ assert model.get() == "trained:stub"
587
+ ```
588
+
494
589
  ### Class-Level Options
495
590
 
496
591
  ```python
@@ -440,8 +440,11 @@ The `/api/experiments` endpoint supports:
440
440
  |----------|---------|-------------|
441
441
  | `FURU_PATH` | `<project>/furu-data` | Base storage directory for non-versioned artifacts |
442
442
  | `FURU_VERSION_CONTROLLED_PATH` | `<project>/furu-data/artifacts` | Override version-controlled storage root |
443
+ | `FURU_SUBMITIT_PATH` | `<FURU_PATH>/submitit` | Override submitit logs root |
443
444
  | `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
444
- | `FURU_IGNORE_DIFF` | `false` | Skip embedding git diff in metadata |
445
+ | `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting (set `0` to disable) |
446
+ | `FURU_RECORD_GIT` | `cached` | Git provenance capture: `ignore` skips git metadata, `cached` records once per process, `uncached` records every time |
447
+ | `FURU_ALLOW_NO_GIT_ORIGIN` | `false` | Allow missing git `origin` when recording git metadata (invalid with `FURU_RECORD_GIT=ignore`) |
445
448
  | `FURU_ALWAYS_RERUN` | `""` | Comma-separated class qualnames to always rerun (use `ALL` to bypass cache globally; cannot combine with other entries; entries must be importable) |
446
449
  | `FURU_RETRY_FAILED` | `true` | Retry failed artifacts by default (set to `0` to keep failures sticky) |
447
450
  | `FURU_MAX_COMPUTE_RETRIES` | `3` | Maximum compute retries per node after the first failure |
@@ -450,12 +453,21 @@ The `/api/experiments` endpoint supports:
450
453
  | `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
451
454
  | `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
452
455
  | `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
453
- | `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs |
456
+ | `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs (min 1s) |
454
457
  | `FURU_PREEMPT_MAX` | `5` | Maximum submitit requeues on preemption |
455
458
  | `FURU_CANCELLED_IS_PREEMPTED` | `false` | Treat SLURM CANCELLED as preempted |
456
- | `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting |
459
+ | `SLURM_JOB_ID` | unset | Read-only; set by Slurm to record job id and enable submitit context |
457
460
 
458
- Local `.env` files are loaded automatically if `python-dotenv` is installed.
461
+ Local `.env` files are not loaded automatically. Call `furu.load_env()` when you
462
+ want to load `.env` values (requires `python-dotenv`).
463
+
464
+ ### Test and CI Environment Variables
465
+
466
+ | Variable | Default | Description |
467
+ |----------|---------|-------------|
468
+ | `FURU_DASHBOARD_DEV_DATA_DIR` | unset | Override data dir for `make dashboard-dev` (defaults to a temp dir) |
469
+ | `FURU_E2E_DATA_DIR` | unset | Required for Playwright e2e runs; used as the data root and to set `FURU_PATH` |
470
+ | `CI` | unset | Enables CI-friendly Playwright settings (retries, single worker, traces, screenshots, video) |
459
471
 
460
472
  ### Programmatic Configuration
461
473
 
@@ -468,10 +480,93 @@ furu.set_furu_root(Path("/my/storage"))
468
480
  root = furu.get_furu_root()
469
481
 
470
482
  # Access config directly
471
- furu.FURU_CONFIG.ignore_git_diff = True
483
+ furu.FURU_CONFIG.record_git = "uncached"
472
484
  furu.FURU_CONFIG.poll_interval = 5.0
473
485
  ```
474
486
 
487
+ ### Testing with pytest
488
+
489
+ Use the built-in pytest fixture to isolate Furu storage in tests (each test gets
490
+ its own temp root, so identical configs in separate tests will not collide):
491
+
492
+ ```python
493
+ # conftest.py
494
+ pytest_plugins = ["furu.testing"]
495
+ ```
496
+
497
+ ```python
498
+ # test_pipeline.py
499
+ import json
500
+ from pathlib import Path
501
+
502
+ import furu
503
+
504
+
505
+ class TrainModel(furu.Furu[Path]):
506
+ lr: float = furu.chz.field(default=1e-3)
507
+
508
+ def _create(self) -> Path:
509
+ path = self.furu_dir / "metrics.json"
510
+ path.write_text(json.dumps({"lr": self.lr}))
511
+ return path
512
+
513
+ def _load(self) -> Path:
514
+ return self.furu_dir / "metrics.json"
515
+
516
+
517
+ def test_create_and_reload(furu_tmp_root):
518
+ obj = TrainModel(lr=1e-3)
519
+ first = obj.get()
520
+ second = obj.get()
521
+ assert first.read_text() == second.read_text()
522
+ assert (furu_tmp_root / "data").exists()
523
+ ```
524
+
525
+ Override specific dependencies when you want to skip deeper chains:
526
+
527
+ ```python
528
+ from furu.testing import override_results
529
+
530
+
531
+ class Normalize(furu.Furu[str]):
532
+ def _create(self) -> str:
533
+ return "normalized"
534
+
535
+ def _load(self) -> str:
536
+ return "normalized"
537
+
538
+
539
+ class TrainModel(furu.Furu[str]):
540
+ normalizer: Normalize = furu.chz.field(default_factory=Normalize)
541
+
542
+ def _create(self) -> str:
543
+ return f"trained:{self.normalizer.get()}"
544
+
545
+ def _load(self) -> str:
546
+ return "trained"
547
+
548
+
549
+ def test_override_dependency(furu_tmp_root):
550
+ normalizer = Normalize()
551
+ model = TrainModel(normalizer=normalizer)
552
+ with override_results({normalizer: "stub"}):
553
+ assert model.get() == "trained:stub"
554
+ ```
555
+
556
+ If you want to override without instantiating the dependency directly, target it
557
+ by dotted path from the root object (chz-style paths, e.g. `deps.0` for lists and
558
+ `deps.key` for mappings):
559
+
560
+ ```python
561
+ from furu.testing import override_results_for
562
+
563
+
564
+ def test_override_by_path(furu_tmp_root):
565
+ model = TrainModel()
566
+ with override_results_for(model, {"normalizer": "stub"}):
567
+ assert model.get() == "trained:stub"
568
+ ```
569
+
475
570
  ### Class-Level Options
476
571
 
477
572
  ```python
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "furu"
3
- version = "0.0.4"
3
+ version = "0.0.6"
4
4
  description = "Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -1,6 +1,10 @@
1
1
  import os
2
2
  from importlib import import_module
3
3
  from pathlib import Path
4
+ from typing import Literal, cast
5
+
6
+
7
+ RecordGitMode = Literal["ignore", "cached", "uncached"]
4
8
 
5
9
 
6
10
  class FuruConfig:
@@ -41,21 +45,14 @@ class FuruConfig:
41
45
  "true",
42
46
  "yes",
43
47
  }
44
- self.ignore_git_diff = os.getenv("FURU_IGNORE_DIFF", "0").lower() in {
45
- "1",
46
- "true",
47
- "yes",
48
- }
49
- self.require_git = os.getenv("FURU_REQUIRE_GIT", "1").lower() in {
50
- "1",
51
- "true",
52
- "yes",
53
- }
54
- self.require_git_remote = os.getenv("FURU_REQUIRE_GIT_REMOTE", "1").lower() in {
55
- "1",
56
- "true",
57
- "yes",
58
- }
48
+ self.record_git = self._parse_record_git(os.getenv("FURU_RECORD_GIT", "cached"))
49
+ self.allow_no_git_origin = self._parse_bool(
50
+ os.getenv("FURU_ALLOW_NO_GIT_ORIGIN", "0")
51
+ )
52
+ if self.allow_no_git_origin and self.record_git == "ignore":
53
+ raise ValueError(
54
+ "FURU_ALLOW_NO_GIT_ORIGIN cannot be enabled when FURU_RECORD_GIT=ignore"
55
+ )
59
56
  always_rerun_items = {
60
57
  item.strip()
61
58
  for item in os.getenv("FURU_ALWAYS_RERUN", "").split(",")
@@ -77,35 +74,25 @@ class FuruConfig:
77
74
  "FURU_CANCELLED_IS_PREEMPTED", "false"
78
75
  ).lower() in {"1", "true", "yes"}
79
76
 
80
- # Parse FURU_CACHE_METADATA: "never", "forever", or duration like "5m", "1h"
81
- # Default: "5m" (5 minutes) - balances performance with freshness
82
- self.cache_metadata_ttl_sec: float | None = self._parse_cache_duration(
83
- os.getenv("FURU_CACHE_METADATA", "5m")
84
- )
85
-
86
77
  @staticmethod
87
- def _parse_cache_duration(value: str) -> float | None:
88
- """Parse cache duration string into seconds. Returns None for 'never', float('inf') for 'forever'."""
89
- value = value.strip().lower()
90
- if value in {"never", "0", "false", "no"}:
91
- return None # No caching
92
- if value in {"forever", "inf", "true", "yes", "1"}:
93
- return float("inf") # Cache forever
94
-
95
- # Parse duration like "5m", "1h", "30s"
96
- import re
97
-
98
- match = re.match(r"^(\d+(?:\.\d+)?)\s*([smh]?)$", value)
99
- if not match:
78
+ def _parse_bool(value: str) -> bool:
79
+ return value.strip().lower() in {"1", "true", "yes"}
80
+
81
+ @classmethod
82
+ def _parse_record_git(cls, value: str) -> RecordGitMode:
83
+ normalized = value.strip().lower()
84
+ allowed = {"ignore", "cached", "uncached"}
85
+ if normalized not in allowed:
100
86
  raise ValueError(
101
- f"Invalid FURU_CACHE_METADATA value: {value!r}. "
102
- "Use 'never', 'forever', or duration like '5m', '1h', '30s'"
87
+ "FURU_RECORD_GIT must be one of 'ignore', 'cached', or 'uncached'"
103
88
  )
89
+ return cast(RecordGitMode, normalized)
104
90
 
105
- num = float(match.group(1))
106
- unit = match.group(2) or "s"
107
- multipliers = {"s": 1, "m": 60, "h": 3600}
108
- return num * multipliers[unit]
91
+ @property
92
+ def cache_metadata_ttl_sec(self) -> float | None:
93
+ if self.record_git == "cached":
94
+ return float("inf")
95
+ return None
109
96
 
110
97
  def get_root(self, version_controlled: bool = False) -> Path:
111
98
  """Get root directory for storage (version_controlled uses its own root)."""