PyPI - furu - Versions diffs - 0.0.1__py3-none-any.whl - Mend

furu 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

furu/__init__.py +82 -0
furu/adapters/__init__.py +3 -0
furu/adapters/submitit.py +195 -0
furu/config.py +98 -0
furu/core/__init__.py +4 -0
furu/core/furu.py +999 -0
furu/core/list.py +123 -0
furu/dashboard/__init__.py +9 -0
furu/dashboard/__main__.py +7 -0
furu/dashboard/api/__init__.py +7 -0
furu/dashboard/api/models.py +170 -0
furu/dashboard/api/routes.py +135 -0
furu/dashboard/frontend/dist/assets/index-CbdDfSOZ.css +1 -0
furu/dashboard/frontend/dist/assets/index-DDv_TYB_.js +67 -0
furu/dashboard/frontend/dist/favicon.svg +10 -0
furu/dashboard/frontend/dist/index.html +22 -0
furu/dashboard/main.py +134 -0
furu/dashboard/scanner.py +931 -0
furu/errors.py +76 -0
furu/migrate.py +48 -0
furu/migration.py +926 -0
furu/runtime/__init__.py +27 -0
furu/runtime/env.py +8 -0
furu/runtime/logging.py +301 -0
furu/runtime/tracebacks.py +64 -0
furu/serialization/__init__.py +20 -0
furu/serialization/migrations.py +246 -0
furu/serialization/serializer.py +233 -0
furu/storage/__init__.py +32 -0
furu/storage/metadata.py +282 -0
furu/storage/migration.py +81 -0
furu/storage/state.py +1107 -0
furu-0.0.1.dist-info/METADATA +502 -0
furu-0.0.1.dist-info/RECORD +36 -0
furu-0.0.1.dist-info/WHEEL +4 -0
furu-0.0.1.dist-info/entry_points.txt +2 -0

furu-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,502 @@
+Metadata-Version: 2.4
+Name: furu
+Version: 0.0.1
+Summary: Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs.
+Author-email: Herman Brunborg <herman@brunborg.com>
+Requires-Python: >=3.12
+Requires-Dist: chz>=0.4.0
+Requires-Dist: cloudpickle>=3.1.1
+Requires-Dist: pydantic>=2.12.5
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: rich>=14.2.0
+Requires-Dist: submitit>=1.5.3
+Provides-Extra: dashboard
+Requires-Dist: fastapi>=0.109.0; extra == 'dashboard'
+Requires-Dist: typer>=0.9.0; extra == 'dashboard'
+Requires-Dist: uvicorn[standard]>=0.27.0; extra == 'dashboard'
+Description-Content-Type: text/markdown
+# furu
+> **Note:** `v0.0.x` is alpha and may include breaking changes.
+A Python library for building cacheable, nested pipelines. Define computations as config objects; furu turns configs into stable on-disk artifact directories, records metadata/state, and reuses results across runs.
+Built on [chz](https://github.com/openai/chz) for declarative configs.
+## Installation
+```bash
+uv add "furu[dashboard]"
+```
+Or with pip:
+```bash
+pip install "furu[dashboard]"
+```
+The `[dashboard]` extra includes the web dashboard. Omit it for the core library only.
+## Quickstart
+1. Subclass `furu.Furu[T]`
+2. Implement `_create(self) -> T` (compute and write to `self.furu_dir`)
+3. Implement `_load(self) -> T` (load from `self.furu_dir`)
+4. Call `load_or_create()`
+```python
+# my_project/pipelines.py
+import json
+from pathlib import Path
+import furu
+class TrainModel(furu.Furu[Path]):
+    lr: float = furu.chz.field(default=1e-3)
+    steps: int = furu.chz.field(default=1000)
+    def _create(self) -> Path:
+        # Write outputs into the artifact directory
+        (self.furu_dir / "metrics.json").write_text(
+            json.dumps({"lr": self.lr, "steps": self.steps})
+        )
+        ckpt = self.furu_dir / "checkpoint.bin"
+        ckpt.write_bytes(b"...")
+        return ckpt
+    def _load(self) -> Path:
+        # Load outputs back from disk
+        return self.furu_dir / "checkpoint.bin"
+```
+```python
+# run_train.py
+from my_project.pipelines import TrainModel
+# First call: runs _create(), caches result
+artifact = TrainModel(lr=3e-4, steps=5000).load_or_create()
+# Second call with same config: loads from cache via _load()
+artifact = TrainModel(lr=3e-4, steps=5000).load_or_create()
+```
+> **Tip:** Define Furu classes in importable modules (not `__main__`); the artifact namespace is derived from the class's module + qualified name.
+## Core Concepts
+### How Caching Works
+Each `Furu` instance maps deterministically to a directory based on its config:
+```
+<root>/<namespace>/<hash>/
+```
+- **namespace**: Derived from the class's module + qualified name (e.g., `my_project.pipelines/TrainModel`)
+- **hash**: Computed from the object's config values using Blake2s
+When you call `load_or_create()`:
+1. If no cached result exists → run `_create()`, save state as "success"
+2. If cached result exists → run `_load()` to retrieve it
+3. If another process is running → wait for it to finish, then load
+### Nested Pipelines (Dependencies)
+Furu objects compose via nested configs. Each dependency gets its own artifact folder:
+```python
+import furu
+class Dataset(furu.Furu[str]):
+    name: str = furu.chz.field(default="toy")
+    def _create(self) -> str:
+        (self.furu_dir / "data.txt").write_text("hello\nworld\n")
+        return "ready"
+    def _load(self) -> str:
+        return (self.furu_dir / "data.txt").read_text()
+class TrainTextModel(furu.Furu[str]):
+    dataset: Dataset = furu.chz.field(default_factory=Dataset)
+    def _create(self) -> str:
+        data = self.dataset.load_or_create()  # Triggers Dataset cache
+        (self.furu_dir / "model.txt").write_text(f"trained on:\n{data}")
+        return "trained"
+    def _load(self) -> str:
+        return (self.furu_dir / "model.txt").read_text()
+```
+### Storage Structure
+```
+$FURU_PATH/
+├── data/                         # Default storage (version_controlled=False)
+│   └── <module>/<Class>/
+│       └── <hash>/
+│           ├── .furu/
+│           │   ├── metadata.json # Config, git info, environment
+│           │   ├── state.json    # Status and timestamps
+│           │   ├── furu.log    # Captured logs
+│           │   └── SUCCESS.json  # Marker file
+│           └── <your outputs>    # Files from _create()
+├── git/                          # For version_controlled=True
+│   └── <same structure>
+└── raw/                          # Shared directory for large files
+```
+## Features
+### FuruList: Managing Experiment Collections
+`FuruList` provides a collection interface for organizing related experiments:
+```python
+import furu
+class MyExperiments(furu.FuruList[TrainModel]):
+    baseline = TrainModel(lr=1e-3, steps=1000)
+    fast_lr = TrainModel(lr=1e-2, steps=1000)
+    long_run = TrainModel(lr=1e-3, steps=10000)
+    # Can also use a dict for dynamic configs
+    configs = {
+        "tiny": TrainModel(lr=1e-3, steps=100),
+        "huge": TrainModel(lr=1e-4, steps=100000),
+    }
+# Iterate over all experiments
+for exp in MyExperiments:
+    exp.load_or_create()
+# Access by name
+exp = MyExperiments.by_name("baseline")
+# Get all as list
+all_exps = MyExperiments.all()
+# Get (name, instance) pairs
+for name, exp in MyExperiments.items():
+    print(f"{name}: {exp.exists()}")
+```
+### Custom Validation
+Override `_validate()` to add custom cache invalidation logic:
+```python
+class ModelWithValidation(furu.Furu[Path]):
+    checkpoint_name: str = "model.pt"
+    def _validate(self) -> bool:
+        # Return False to force re-computation
+        ckpt = self.furu_dir / self.checkpoint_name
+        return ckpt.exists() and ckpt.stat().st_size > 0
+    def _create(self) -> Path:
+        ...
+    def _load(self) -> Path:
+        ...
+```
+### Checking State Without Loading
+```python
+obj = TrainModel(lr=3e-4, steps=5000)
+# Check if cached result exists (runs _validate())
+if obj.exists():
+    print("Already computed!")
+# Get metadata without triggering computation
+metadata = obj.get_metadata()
+print(f"Hash: {obj._furu_hash}")
+print(f"Dir: {obj.furu_dir}")
+```
+### Serialization
+Furu objects can be serialized to/from dictionaries:
+```python
+obj = TrainModel(lr=3e-4, steps=5000)
+# Serialize to dict (for storage, transmission)
+data = obj.to_dict()
+# Reconstruct from dict
+obj2 = TrainModel.from_dict(data)
+# Get Python code representation (useful for logging)
+print(obj.to_python())
+# Output: TrainModel(lr=0.0003, steps=5000)
+```
+### Raw Directory
+For large files that shouldn't be versioned per-config, use the shared raw directory:
+```python
+class LargeDataProcessor(furu.Furu[Path]):
+    def _create(self) -> Path:
+        # self.raw_dir is shared across all configs
+        # Create a subfolder for isolation if needed
+        my_raw = self.raw_dir / self._furu_hash
+        my_raw.mkdir(exist_ok=True)
+        large_file = my_raw / "huge_dataset.bin"
+        # ... write large file ...
+        return large_file
+```
+### Version-Controlled Storage
+For artifacts that should be stored separately (e.g., checked into git):
+```python
+class VersionedConfig(furu.Furu[dict], version_controlled=True):
+    # Stored under $FURU_PATH/git/ instead of $FURU_PATH/data/
+    ...
+```
+## Logging
+Furu installs stdlib `logging` handlers that capture logs to per-artifact files.
+```python
+import logging
+import furu
+log = logging.getLogger(__name__)
+class MyPipeline(furu.Furu[str]):
+    def _create(self) -> str:
+        log.info("Starting computation...")  # Goes to furu.log
+        log.debug("Debug details...")
+        return "done"
+```
+### Console Output
+By default, furu logs to console using Rich in a compact format:
+```
+HHMMSS file.py:line message
+```
+Furu emits status messages like:
+```
+load_or_create TrainModel abc123def (missing->create)
+load_or_create TrainModel abc123def (success->load)
+```
+### Explicit Setup
+```python
+import furu
+# Eagerly install logging handlers (optional, happens automatically)
+furu.configure_logging()
+# Get the furu logger
+logger = furu.get_logger()
+```
+## Error Handling
+```python
+from furu import FuruComputeError, FuruWaitTimeout, FuruLockNotAcquired
+try:
+    result = obj.load_or_create()
+except FuruComputeError as e:
+    print(f"Computation failed: {e}")
+    print(f"State file: {e.state_path}")
+    print(f"Original error: {e.original_error}")
+except FuruWaitTimeout:
+    print("Timed out waiting for another process")
+except FuruLockNotAcquired:
+    print("Could not acquire lock")
+```
+## Submitit Integration
+Run computations on SLURM clusters via [submitit](https://github.com/facebookincubator/submitit):
+```python
+import submitit
+import furu
+executor = submitit.AutoExecutor(folder="submitit_logs")
+executor.update_parameters(
+    timeout_min=60,
+    slurm_partition="gpu",
+    gpus_per_node=1,
+)
+# Submit job and return immediately
+job = my_furu_obj.load_or_create(executor=executor)
+# Job ID is tracked in .furu/state.json
+print(job.job_id)
+```
+Furu handles preemption, requeuing, and state tracking automatically.
+## Dashboard
+The web dashboard provides experiment browsing, filtering, and dependency visualization.
+### Running the Dashboard
+```bash
+# Full dashboard with React frontend
+furu-dashboard serve
+# Or with options
+furu-dashboard serve --host 0.0.0.0 --port 8000 --reload
+# API server only (no frontend)
+furu-dashboard api
+```
+Or via Python:
+```bash
+python -m furu.dashboard serve
+```
+### API Endpoints
+| Endpoint | Description |
+|----------|-------------|
+| `GET /api/experiments` | List experiments with filtering/pagination |
+| `GET /api/experiments/{namespace}/{hash}` | Get experiment details |
+| `GET /api/experiments/{namespace}/{hash}/relationships` | Get dependencies |
+| `GET /api/stats` | Aggregate statistics |
+| `GET /api/dag` | Dependency graph for visualization |
+### Filtering
+The `/api/experiments` endpoint supports:
+- `result_status`: `absent`, `incomplete`, `success`, `failed`
+- `attempt_status`: `queued`, `running`, `success`, `failed`, `cancelled`, `preempted`, `crashed`
+- `namespace`: Filter by namespace prefix
+- `backend`: `local`, `submitit`
+- `hostname`, `user`: Filter by execution environment
+- `started_after`, `started_before`: ISO datetime filters
+- `config_filter`: Filter by config field (e.g., `lr=0.001`)
+## Configuration Reference
+### Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `FURU_PATH` | `./data-furu/` | Base storage directory |
+| `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
+| `FURU_IGNORE_DIFF` | `false` | Skip embedding git diff in metadata |
+| `FURU_POLL_INTERVAL_SECS` | `10` | Polling interval for queued/running jobs |
+| `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
+| `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
+| `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
+| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs |
+| `FURU_PREEMPT_MAX` | `5` | Maximum submitit requeues on preemption |
+| `FURU_CANCELLED_IS_PREEMPTED` | `false` | Treat SLURM CANCELLED as preempted |
+| `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting |
+Local `.env` files are loaded automatically if `python-dotenv` is installed.
+### Programmatic Configuration
+```python
+import furu
+from pathlib import Path
+# Set/get root directory
+furu.set_furu_root(Path("/my/storage"))
+root = furu.get_furu_root()
+# Access config directly
+furu.FURU_CONFIG.ignore_git_diff = True
+furu.FURU_CONFIG.poll_interval = 5.0
+```
+### Class-Level Options
+```python
+class MyPipeline(furu.Furu[Path], version_controlled=True):
+    _max_wait_time_sec = 3600.0  # Wait up to 1 hour (default: 600)
+    ...
+```
+## Metadata
+Each artifact records:
+| Category | Fields |
+|----------|--------|
+| **Config** | `furu_python_def`, `furu_obj`, `furu_hash`, `furu_path` |
+| **Git** | `git_commit`, `git_branch`, `git_remote`, `git_patch`, `git_submodules` |
+| **Environment** | `timestamp`, `command`, `python_version`, `executable`, `platform`, `hostname`, `user`, `pid` |
+Access via:
+```python
+metadata = obj.get_metadata()
+print(metadata.git_commit)
+print(metadata.hostname)
+```
+## Public API
+```python
+from furu import (
+    # Core
+    Furu,
+    FuruList,
+    FURU_CONFIG,
+    # Configuration
+    get_furu_root,
+    set_furu_root,
+    # Errors
+    FuruError,
+    FuruComputeError,
+    FuruLockNotAcquired,
+    FuruWaitTimeout,
+    MISSING,
+    # Serialization
+    FuruSerializer,
+    # Storage
+    StateManager,
+    MetadataManager,
+    # Runtime
+    configure_logging,
+    get_logger,
+    load_env,
+    # Adapters
+    SubmititAdapter,
+    # Re-exports
+    chz,
+    submitit,
+    # Version
+    __version__,
+)
+```
+## Non-goals / Caveats
+- **Prototype status**: APIs and on-disk formats may change
+- **Not a workflow scheduler** (for now): It's a lightweight caching layer for Python code
+- **No distributed coordination**: Lock files work on shared filesystems but aren't distributed

furu-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,36 @@
+furu/__init__.py,sha256=fhSViHOJ9W-64swuaBFdZOfq0ZMuSj6LSiX2ZfcjhD8,1736
+furu/config.py,sha256=F_Bh9vs0Dq5-3fXMylEBbm7F9-Q2n9aLt1iTb-RAl-4,3538
+furu/errors.py,sha256=d1Kp5O9cVoQwXmQeZC-35u7xldw_c3ryYXrbVfv-Lws,2001
+furu/migrate.py,sha256=x_Uh7oXAv40L5ZAHJhdnw-o7ct56rWUSZLbHHfRObeY,1313
+furu/migration.py,sha256=A91dng1XRn1N_xJrmBhh-OvU22GlseqOh6PmVhNZh3w,31307
+furu/adapters/__init__.py,sha256=onLzEj9hccPK15g8a8va2T19nqQXoxb9rQlJIjKSKnE,69
+furu/adapters/submitit.py,sha256=OuCP0pEkO1kI4WLcSUvMqXwVCCy-8uwUE7v1qvkLZnU,6214
+furu/core/__init__.py,sha256=gzFMgaAYnffofQksR6E1NegiwBF99h0ysn_QeD5wIhw,82
+furu/core/furu.py,sha256=MjwpJtS0T8aRtLsFiiVTB8oh5UtIQrF3ohzYbD9XFIc,39047
+furu/core/list.py,sha256=hwwlvqaKB1grPBGKXc15scF1RCqDvWc0AoDbhKlN4W0,3625
+furu/dashboard/__init__.py,sha256=zNVddterfpjQtcpihIl3TRJdgdjOHYR0uO0cOSaGABg,172
+furu/dashboard/__main__.py,sha256=cNs65IMl4kwZFpxa9xLXmFSy4-M5D1X1ZBfTDxW11vo,144
+furu/dashboard/main.py,sha256=8JYc79gbJ9MjvIRdGDuAcR2Mme9kyY4ryZb11ZZ4uVA,4069
+furu/dashboard/scanner.py,sha256=qXCvkvFByBc09TUdth5Js67rS8zpRBlRkVQ9dJ7YbdE,34696
+furu/dashboard/api/__init__.py,sha256=9-WyWOt-VQJJBIsdW29D-7JvR-BivJd9G_SRaRptCz0,80
+furu/dashboard/api/models.py,sha256=SCu-kLJyW7dwSKswdgQNS3wQuj25ORs0pHkvX9xBbo4,4767
+furu/dashboard/api/routes.py,sha256=iZez0khIUvbgfeSoy1BJvmoEEbgUrdSQA8SN8iAIkM8,4813
+furu/dashboard/frontend/dist/favicon.svg,sha256=3TSLHNZITFe3JTPoYHZnDgiGsJxIzf39v97l2A1Hodo,369
+furu/dashboard/frontend/dist/index.html,sha256=o3XhvegC9rBpUiWNfXdCHqf_tg2795nob1NI0nBpFS4,810
+furu/dashboard/frontend/dist/assets/index-CbdDfSOZ.css,sha256=k3kxCuCqyxKgIv4M9itoAImMU8NMzkzAdTNQ4v_4fMU,34612
+furu/dashboard/frontend/dist/assets/index-DDv_TYB_.js,sha256=FH0uqY7P7vm3rikvDaJ504FZh0Z97nCkVcIglK-ElAY,543928
+furu/runtime/__init__.py,sha256=fQqE7wUuWunLD73Vm3lss7BFSij3UVxXOKQXBAOS8zw,504
+furu/runtime/env.py,sha256=o1phhoTDhOnhALr3Ozf1ldrdvk2ClyEvBWbebHM6BXg,160
+furu/runtime/logging.py,sha256=JkuTFtbv6dYk088P6_Bga46bnKSDt-ElAqmiY86hMys,9773
+furu/runtime/tracebacks.py,sha256=PGCuOq8QkWSoun791gjUXM8frOP2wWV8IBlqaA4nuGE,1631
+furu/serialization/__init__.py,sha256=L7oHuIbxdSh7GCY3thMQnDwlt_ERH-TMy0YKEAZLrPs,341
+furu/serialization/migrations.py,sha256=HD5g8JCBdH3Y0rHJYc4Ug1IXBVcUDxLE7nfiXZnXcUE,7772
+furu/serialization/serializer.py,sha256=THWqHzpSwXj3Nj3PZ3JhwlWJ8sgvVyGrwBEDB_EWuAE,8355
+furu/storage/__init__.py,sha256=cLLL-GPpSu9C72Mdk5S6TGu3g-SnBfEuxzfpx5ZJPtw,616
+furu/storage/metadata.py,sha256=u4F4V1dDZtsiniO5xDCy8YxJZxGnreriYnJ1fOvQ2Bg,9232
+furu/storage/migration.py,sha256=Ars9aYwvhXpIBDf6L9ojGjp_l656-RfdtEAFKN0sZZY,2640
+furu/storage/state.py,sha256=tbVX74P6nVHhL1EBztgKp9BCe0UHpW0nyGkSeJXPejs,37581
+furu-0.0.1.dist-info/METADATA,sha256=mGC5hO68kGPxMUepH1Cnws-TDowOyCi1cgJ36pgTTOA,13294
+furu-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+furu-0.0.1.dist-info/entry_points.txt,sha256=pIkNLYq-gaxYbh_lATWl31BHTrKBg1jN6jK1AgN6-QY,59
+furu-0.0.1.dist-info/RECORD,,

furu-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

furu-0.0.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ furu-dashboard = furu.dashboard.main:cli