PyPI - furu - Versions diffs - 0.0.4__tar.gz → 0.0.6__tar.gz - Mend

furu 0.0.4tar.gz → 0.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{furu-0.0.4 → furu-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: furu
-Version: 0.0.4
+Version: 0.0.6
 Summary: Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs.
 Author: Herman Brunborg
 Author-email: Herman Brunborg <herman@brunborg.com>
@@ -459,8 +459,11 @@ The `/api/experiments` endpoint supports:
 |----------|---------|-------------|
 | `FURU_PATH` | `<project>/furu-data` | Base storage directory for non-versioned artifacts |
 | `FURU_VERSION_CONTROLLED_PATH` | `<project>/furu-data/artifacts` | Override version-controlled storage root |
+| `FURU_SUBMITIT_PATH` | `<FURU_PATH>/submitit` | Override submitit logs root |
 | `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
-| `FURU_IGNORE_DIFF` | `false` | Skip embedding git diff in metadata |
+| `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting (set `0` to disable) |
+| `FURU_RECORD_GIT` | `cached` | Git provenance capture: `ignore` skips git metadata, `cached` records once per process, `uncached` records every time |
+| `FURU_ALLOW_NO_GIT_ORIGIN` | `false` | Allow missing git `origin` when recording git metadata (invalid with `FURU_RECORD_GIT=ignore`) |
 | `FURU_ALWAYS_RERUN` | `""` | Comma-separated class qualnames to always rerun (use `ALL` to bypass cache globally; cannot combine with other entries; entries must be importable) |
 | `FURU_RETRY_FAILED` | `true` | Retry failed artifacts by default (set to `0` to keep failures sticky) |
 | `FURU_MAX_COMPUTE_RETRIES` | `3` | Maximum compute retries per node after the first failure |
@@ -469,12 +472,21 @@ The `/api/experiments` endpoint supports:
 | `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
 | `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
 | `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
-| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs |
+| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs (min 1s) |
 | `FURU_PREEMPT_MAX` | `5` | Maximum submitit requeues on preemption |
 | `FURU_CANCELLED_IS_PREEMPTED` | `false` | Treat SLURM CANCELLED as preempted |
-| `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting |
+| `SLURM_JOB_ID` | unset | Read-only; set by Slurm to record job id and enable submitit context |
-Local `.env` files are loaded automatically if `python-dotenv` is installed.
+Local `.env` files are not loaded automatically. Call `furu.load_env()` when you
+want to load `.env` values (requires `python-dotenv`).
+### Test and CI Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `FURU_DASHBOARD_DEV_DATA_DIR` | unset | Override data dir for `make dashboard-dev` (defaults to a temp dir) |
+| `FURU_E2E_DATA_DIR` | unset | Required for Playwright e2e runs; used as the data root and to set `FURU_PATH` |
+| `CI` | unset | Enables CI-friendly Playwright settings (retries, single worker, traces, screenshots, video) |
 ### Programmatic Configuration
@@ -487,10 +499,93 @@ furu.set_furu_root(Path("/my/storage"))
 root = furu.get_furu_root()
 # Access config directly
-furu.FURU_CONFIG.ignore_git_diff = True
+furu.FURU_CONFIG.record_git = "uncached"
 furu.FURU_CONFIG.poll_interval = 5.0
 ```
+### Testing with pytest
+Use the built-in pytest fixture to isolate Furu storage in tests (each test gets
+its own temp root, so identical configs in separate tests will not collide):
+```python
+# conftest.py
+pytest_plugins = ["furu.testing"]
+```
+```python
+# test_pipeline.py
+import json
+from pathlib import Path
+import furu
+class TrainModel(furu.Furu[Path]):
+    lr: float = furu.chz.field(default=1e-3)
+    def _create(self) -> Path:
+        path = self.furu_dir / "metrics.json"
+        path.write_text(json.dumps({"lr": self.lr}))
+        return path
+    def _load(self) -> Path:
+        return self.furu_dir / "metrics.json"
+def test_create_and_reload(furu_tmp_root):
+    obj = TrainModel(lr=1e-3)
+    first = obj.get()
+    second = obj.get()
+    assert first.read_text() == second.read_text()
+    assert (furu_tmp_root / "data").exists()
+```
+Override specific dependencies when you want to skip deeper chains:
+```python
+from furu.testing import override_results
+class Normalize(furu.Furu[str]):
+    def _create(self) -> str:
+        return "normalized"
+    def _load(self) -> str:
+        return "normalized"
+class TrainModel(furu.Furu[str]):
+    normalizer: Normalize = furu.chz.field(default_factory=Normalize)
+    def _create(self) -> str:
+        return f"trained:{self.normalizer.get()}"
+    def _load(self) -> str:
+        return "trained"
+def test_override_dependency(furu_tmp_root):
+    normalizer = Normalize()
+    model = TrainModel(normalizer=normalizer)
+    with override_results({normalizer: "stub"}):
+        assert model.get() == "trained:stub"
+```
+If you want to override without instantiating the dependency directly, target it
+by dotted path from the root object (chz-style paths, e.g. `deps.0` for lists and
+`deps.key` for mappings):
+```python
+from furu.testing import override_results_for
+def test_override_by_path(furu_tmp_root):
+    model = TrainModel()
+    with override_results_for(model, {"normalizer": "stub"}):
+        assert model.get() == "trained:stub"
+```
 ### Class-Level Options
 ```python

{furu-0.0.4 → furu-0.0.6}/README.md RENAMED Viewed

@@ -440,8 +440,11 @@ The `/api/experiments` endpoint supports:
 |----------|---------|-------------|
 | `FURU_PATH` | `<project>/furu-data` | Base storage directory for non-versioned artifacts |
 | `FURU_VERSION_CONTROLLED_PATH` | `<project>/furu-data/artifacts` | Override version-controlled storage root |
+| `FURU_SUBMITIT_PATH` | `<FURU_PATH>/submitit` | Override submitit logs root |
 | `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
-| `FURU_IGNORE_DIFF` | `false` | Skip embedding git diff in metadata |
+| `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting (set `0` to disable) |
+| `FURU_RECORD_GIT` | `cached` | Git provenance capture: `ignore` skips git metadata, `cached` records once per process, `uncached` records every time |
+| `FURU_ALLOW_NO_GIT_ORIGIN` | `false` | Allow missing git `origin` when recording git metadata (invalid with `FURU_RECORD_GIT=ignore`) |
 | `FURU_ALWAYS_RERUN` | `""` | Comma-separated class qualnames to always rerun (use `ALL` to bypass cache globally; cannot combine with other entries; entries must be importable) |
 | `FURU_RETRY_FAILED` | `true` | Retry failed artifacts by default (set to `0` to keep failures sticky) |
 | `FURU_MAX_COMPUTE_RETRIES` | `3` | Maximum compute retries per node after the first failure |
@@ -450,12 +453,21 @@ The `/api/experiments` endpoint supports:
 | `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
 | `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
 | `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
-| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs |
+| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs (min 1s) |
 | `FURU_PREEMPT_MAX` | `5` | Maximum submitit requeues on preemption |
 | `FURU_CANCELLED_IS_PREEMPTED` | `false` | Treat SLURM CANCELLED as preempted |
-| `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting |
+| `SLURM_JOB_ID` | unset | Read-only; set by Slurm to record job id and enable submitit context |
-Local `.env` files are loaded automatically if `python-dotenv` is installed.
+Local `.env` files are not loaded automatically. Call `furu.load_env()` when you
+want to load `.env` values (requires `python-dotenv`).
+### Test and CI Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `FURU_DASHBOARD_DEV_DATA_DIR` | unset | Override data dir for `make dashboard-dev` (defaults to a temp dir) |
+| `FURU_E2E_DATA_DIR` | unset | Required for Playwright e2e runs; used as the data root and to set `FURU_PATH` |
+| `CI` | unset | Enables CI-friendly Playwright settings (retries, single worker, traces, screenshots, video) |
 ### Programmatic Configuration
@@ -468,10 +480,93 @@ furu.set_furu_root(Path("/my/storage"))
 root = furu.get_furu_root()
 # Access config directly
-furu.FURU_CONFIG.ignore_git_diff = True
+furu.FURU_CONFIG.record_git = "uncached"
 furu.FURU_CONFIG.poll_interval = 5.0
 ```
+### Testing with pytest
+Use the built-in pytest fixture to isolate Furu storage in tests (each test gets
+its own temp root, so identical configs in separate tests will not collide):
+```python
+# conftest.py
+pytest_plugins = ["furu.testing"]
+```
+```python
+# test_pipeline.py
+import json
+from pathlib import Path
+import furu
+class TrainModel(furu.Furu[Path]):
+    lr: float = furu.chz.field(default=1e-3)
+    def _create(self) -> Path:
+        path = self.furu_dir / "metrics.json"
+        path.write_text(json.dumps({"lr": self.lr}))
+        return path
+    def _load(self) -> Path:
+        return self.furu_dir / "metrics.json"
+def test_create_and_reload(furu_tmp_root):
+    obj = TrainModel(lr=1e-3)
+    first = obj.get()
+    second = obj.get()
+    assert first.read_text() == second.read_text()
+    assert (furu_tmp_root / "data").exists()
+```
+Override specific dependencies when you want to skip deeper chains:
+```python
+from furu.testing import override_results
+class Normalize(furu.Furu[str]):
+    def _create(self) -> str:
+        return "normalized"
+    def _load(self) -> str:
+        return "normalized"
+class TrainModel(furu.Furu[str]):
+    normalizer: Normalize = furu.chz.field(default_factory=Normalize)
+    def _create(self) -> str:
+        return f"trained:{self.normalizer.get()}"
+    def _load(self) -> str:
+        return "trained"
+def test_override_dependency(furu_tmp_root):
+    normalizer = Normalize()
+    model = TrainModel(normalizer=normalizer)
+    with override_results({normalizer: "stub"}):
+        assert model.get() == "trained:stub"
+```
+If you want to override without instantiating the dependency directly, target it
+by dotted path from the root object (chz-style paths, e.g. `deps.0` for lists and
+`deps.key` for mappings):
+```python
+from furu.testing import override_results_for
+def test_override_by_path(furu_tmp_root):
+    model = TrainModel()
+    with override_results_for(model, {"normalizer": "stub"}):
+        assert model.get() == "trained:stub"
+```
 ### Class-Level Options
 ```python

{furu-0.0.4 → furu-0.0.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "furu"
-version = "0.0.4"
+version = "0.0.6"
 description = "Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs."
 readme = "README.md"
 authors = [

{furu-0.0.4 → furu-0.0.6}/src/furu/config.py RENAMED Viewed

@@ -1,6 +1,10 @@
 import os
 from importlib import import_module
 from pathlib import Path
+from typing import Literal, cast
+RecordGitMode = Literal["ignore", "cached", "uncached"]
 class FuruConfig:
@@ -41,21 +45,14 @@ class FuruConfig:
             "true",
             "yes",
         }
-        self.ignore_git_diff = os.getenv("FURU_IGNORE_DIFF", "0").lower() in {
-            "1",
-            "true",
-            "yes",
-        }
-        self.require_git = os.getenv("FURU_REQUIRE_GIT", "1").lower() in {
-            "1",
-            "true",
-            "yes",
-        }
-        self.require_git_remote = os.getenv("FURU_REQUIRE_GIT_REMOTE", "1").lower() in {
-            "1",
-            "true",
-            "yes",
-        }
+        self.record_git = self._parse_record_git(os.getenv("FURU_RECORD_GIT", "cached"))
+        self.allow_no_git_origin = self._parse_bool(
+            os.getenv("FURU_ALLOW_NO_GIT_ORIGIN", "0")
+        )
+        if self.allow_no_git_origin and self.record_git == "ignore":
+            raise ValueError(
+                "FURU_ALLOW_NO_GIT_ORIGIN cannot be enabled when FURU_RECORD_GIT=ignore"
+            )
         always_rerun_items = {
             item.strip()
             for item in os.getenv("FURU_ALWAYS_RERUN", "").split(",")
@@ -77,35 +74,25 @@ class FuruConfig:
             "FURU_CANCELLED_IS_PREEMPTED", "false"
         ).lower() in {"1", "true", "yes"}
-        # Parse FURU_CACHE_METADATA: "never", "forever", or duration like "5m", "1h"
-        # Default: "5m" (5 minutes) - balances performance with freshness
-        self.cache_metadata_ttl_sec: float | None = self._parse_cache_duration(
-            os.getenv("FURU_CACHE_METADATA", "5m")
-        )
     @staticmethod
-    def _parse_cache_duration(value: str) -> float | None:
-        """Parse cache duration string into seconds. Returns None for 'never', float('inf') for 'forever'."""
-        value = value.strip().lower()
-        if value in {"never", "0", "false", "no"}:
-            return None  # No caching
-        if value in {"forever", "inf", "true", "yes", "1"}:
-            return float("inf")  # Cache forever
-        # Parse duration like "5m", "1h", "30s"
-        import re
-        match = re.match(r"^(\d+(?:\.\d+)?)\s*([smh]?)$", value)
-        if not match:
+    def _parse_bool(value: str) -> bool:
+        return value.strip().lower() in {"1", "true", "yes"}
+    @classmethod
+    def _parse_record_git(cls, value: str) -> RecordGitMode:
+        normalized = value.strip().lower()
+        allowed = {"ignore", "cached", "uncached"}
+        if normalized not in allowed:
             raise ValueError(
-                f"Invalid FURU_CACHE_METADATA value: {value!r}. "
-                "Use 'never', 'forever', or duration like '5m', '1h', '30s'"
+                "FURU_RECORD_GIT must be one of 'ignore', 'cached', or 'uncached'"
             )
+        return cast(RecordGitMode, normalized)
-        num = float(match.group(1))
-        unit = match.group(2) or "s"
-        multipliers = {"s": 1, "m": 60, "h": 3600}
-        return num * multipliers[unit]
+    @property
+    def cache_metadata_ttl_sec(self) -> float | None:
+        if self.record_git == "cached":
+            return float("inf")
+        return None
     def get_root(self, version_controlled: bool = False) -> Path:
         """Get root directory for storage (version_controlled uses its own root)."""

furu 0.0.4__tar.gz → 0.0.6__tar.gz

furu 0.0.4tar.gz → 0.0.6tar.gz