fractfs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractfs/__init__.py +172 -0
- fractfs/backend.py +112 -0
- fractfs/config.py +160 -0
- fractfs/fsspec_backend.py +61 -0
- fractfs/patterns.py +63 -0
- fractfs/provisioner.py +217 -0
- fractfs/resolver.py +54 -0
- fractfs/sync.py +244 -0
- fractfs-0.1.0.dist-info/METADATA +284 -0
- fractfs-0.1.0.dist-info/RECORD +12 -0
- fractfs-0.1.0.dist-info/WHEEL +4 -0
- fractfs-0.1.0.dist-info/licenses/LICENSE +201 -0
fractfs/__init__.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""fractfs — drop-in tiered file storage for apps on ephemeral nodes.
|
|
2
|
+
|
|
3
|
+
The whole drop-in surface is three calls::
|
|
4
|
+
|
|
5
|
+
import fractfs
|
|
6
|
+
fractfs.init() # load config, provision symlinks, restore, start syncing
|
|
7
|
+
fractfs.sync_now() # optional: force a checkpoint (e.g. before shutdown)
|
|
8
|
+
fractfs.status() # optional: inspect tiers and last sync time
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import atexit
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
from .backend import make_backend
|
|
20
|
+
from .config import Config, load_config
|
|
21
|
+
from .provisioner import ClobberError, provision, warnings_for
|
|
22
|
+
from .resolver import Tier, resolve
|
|
23
|
+
from .sync import SyncDaemon, SyncEngine
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"init",
|
|
27
|
+
"sync_now",
|
|
28
|
+
"status",
|
|
29
|
+
"shutdown",
|
|
30
|
+
"resolve",
|
|
31
|
+
"Tier",
|
|
32
|
+
"Config",
|
|
33
|
+
"load_config",
|
|
34
|
+
"ClobberError",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
__version__ = "0.1.0"
|
|
38
|
+
|
|
39
|
+
log = logging.getLogger("fractfs")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class _Runtime:
|
|
44
|
+
cfg: Config
|
|
45
|
+
engine: SyncEngine
|
|
46
|
+
daemon: Optional[SyncDaemon] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_RUNTIME: Optional[_Runtime] = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def init(
|
|
53
|
+
root: Optional[os.PathLike] = None,
|
|
54
|
+
*,
|
|
55
|
+
force: bool = False,
|
|
56
|
+
start_daemon: bool = True,
|
|
57
|
+
restore: bool = True,
|
|
58
|
+
) -> Config:
|
|
59
|
+
"""Initialise fractfs: load config, provision symlinks, restore, start syncing.
|
|
60
|
+
|
|
61
|
+
Blocks on restore before returning so the app never reads cold state. Safe to
|
|
62
|
+
call once at startup. ``force=True`` lets provisioning migrate a non-empty real
|
|
63
|
+
local dir into the Volume (see :class:`ClobberError`).
|
|
64
|
+
"""
|
|
65
|
+
global _RUNTIME
|
|
66
|
+
|
|
67
|
+
cfg = load_config(root)
|
|
68
|
+
engine = SyncEngine(cfg, backend=make_backend(cfg)) if cfg.is_provisionable() else None
|
|
69
|
+
|
|
70
|
+
for warning in warnings_for(cfg):
|
|
71
|
+
log.warning("fractfs config: %s", warning)
|
|
72
|
+
|
|
73
|
+
if cfg.is_provisionable():
|
|
74
|
+
actions = provision(cfg, force=force)
|
|
75
|
+
for a in actions:
|
|
76
|
+
log.debug("provision: %s", a)
|
|
77
|
+
# Identify the deployed bundle before restore so it's excluded from the
|
|
78
|
+
# checkpoint (re-supplied from the image on every cold start anyway).
|
|
79
|
+
if cfg.auto_ignore_bundle and engine is not None:
|
|
80
|
+
bundle = engine.detect_bundle()
|
|
81
|
+
if bundle:
|
|
82
|
+
log.info("fractfs auto-ignoring %d deploy-bundle file(s)", len(bundle))
|
|
83
|
+
# Cold-start ordering: restore must finish before the app reads anything.
|
|
84
|
+
if restore and engine is not None:
|
|
85
|
+
restored = engine.restore()
|
|
86
|
+
if restored:
|
|
87
|
+
log.info("fractfs restored %d checkpointed file(s)", len(restored))
|
|
88
|
+
else:
|
|
89
|
+
log.warning(
|
|
90
|
+
"fractfs: no fractfs_VOLUME_ROOT set; running in passthrough mode "
|
|
91
|
+
"(no redirect, no checkpoint)."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
daemon = None
|
|
95
|
+
if engine is not None and start_daemon and cfg.sync_interval > 0:
|
|
96
|
+
daemon = SyncDaemon(engine, cfg.sync_interval)
|
|
97
|
+
daemon.start()
|
|
98
|
+
atexit.register(_atexit_stop)
|
|
99
|
+
|
|
100
|
+
_RUNTIME = _Runtime(cfg=cfg, engine=engine, daemon=daemon) if engine is not None else _Runtime(
|
|
101
|
+
cfg=cfg, engine=_NullEngine(cfg) # type: ignore[arg-type]
|
|
102
|
+
)
|
|
103
|
+
return cfg
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def sync_now() -> List[str]:
|
|
107
|
+
"""Force a checkpoint immediately. Returns the rel paths that were copied."""
|
|
108
|
+
rt = _require_runtime()
|
|
109
|
+
return rt.engine.checkpoint()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def status() -> Dict[str, Any]:
|
|
113
|
+
"""Report current configuration, the tier of each tracked path, and last sync."""
|
|
114
|
+
rt = _require_runtime()
|
|
115
|
+
cfg = rt.cfg
|
|
116
|
+
tracked: Dict[str, str] = {}
|
|
117
|
+
for d in cfg.dir_paths:
|
|
118
|
+
tracked[d] = resolve(d, cfg).value
|
|
119
|
+
return {
|
|
120
|
+
"backend": cfg.backend,
|
|
121
|
+
"volume_root": str(cfg.volume_root) if cfg.volume_root else None,
|
|
122
|
+
"scratch": str(cfg.scratch),
|
|
123
|
+
"sync_interval": cfg.sync_interval,
|
|
124
|
+
"provisionable": cfg.is_provisionable(),
|
|
125
|
+
"daemon_running": rt.daemon is not None,
|
|
126
|
+
"last_sync_time": getattr(rt.engine, "last_sync_time", None),
|
|
127
|
+
"auto_ignore_bundle": cfg.auto_ignore_bundle,
|
|
128
|
+
"bundle_file_count": len(getattr(rt.engine, "bundle_paths", set())),
|
|
129
|
+
"dirs": tracked,
|
|
130
|
+
"ignore_patterns": list(cfg.ignore_patterns),
|
|
131
|
+
"local_patterns": list(cfg.local_patterns),
|
|
132
|
+
"warnings": warnings_for(cfg),
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def shutdown(*, final_sync: bool = True) -> None:
|
|
137
|
+
"""Stop the sync daemon, optionally running one last checkpoint."""
|
|
138
|
+
global _RUNTIME
|
|
139
|
+
if _RUNTIME is not None and _RUNTIME.daemon is not None:
|
|
140
|
+
_RUNTIME.daemon.stop(final_sync=final_sync)
|
|
141
|
+
_RUNTIME.daemon = None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _require_runtime() -> _Runtime:
|
|
145
|
+
if _RUNTIME is None:
|
|
146
|
+
raise RuntimeError("fractfs.init() has not been called")
|
|
147
|
+
return _RUNTIME
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _atexit_stop() -> None:
|
|
151
|
+
try:
|
|
152
|
+
shutdown(final_sync=True)
|
|
153
|
+
except Exception: # best-effort on interpreter shutdown
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class _NullEngine:
|
|
158
|
+
"""Stand-in when there is no Volume: checkpoint/restore are no-ops."""
|
|
159
|
+
|
|
160
|
+
def __init__(self, cfg: Config):
|
|
161
|
+
self.cfg = cfg
|
|
162
|
+
self.last_sync_time = None
|
|
163
|
+
self.bundle_paths: set = set()
|
|
164
|
+
|
|
165
|
+
def detect_bundle(self) -> set:
|
|
166
|
+
return set()
|
|
167
|
+
|
|
168
|
+
def checkpoint(self) -> List[str]:
|
|
169
|
+
return []
|
|
170
|
+
|
|
171
|
+
def restore(self, **_: Any) -> List[str]:
|
|
172
|
+
return []
|
fractfs/backend.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Backend abstraction for the remote/durable store.
|
|
2
|
+
|
|
3
|
+
The primary target (Databricks Volumes) is a POSIX FUSE mount, and ``local``
|
|
4
|
+
backing is just a directory, so both are served by :class:`PosixBackend`. The
|
|
5
|
+
``fsspec``-backed path (``s3`` and friends) is wired through :class:`FsspecBackend`
|
|
6
|
+
which is imported lazily so the base install stays dependency-light.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import shutil
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Iterable, Protocol, runtime_checkable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class Backend(Protocol):
|
|
19
|
+
"""Minimal surface the provisioner and sync daemon need from a store."""
|
|
20
|
+
|
|
21
|
+
def exists(self, path: str) -> bool: ...
|
|
22
|
+
|
|
23
|
+
def makedirs(self, path: str) -> None: ...
|
|
24
|
+
|
|
25
|
+
def put_file(self, local_path: os.PathLike, remote_path: str) -> None:
|
|
26
|
+
"""Copy a local file to ``remote_path``, atomically where possible."""
|
|
27
|
+
|
|
28
|
+
def get_file(self, remote_path: str, local_path: os.PathLike) -> None:
|
|
29
|
+
"""Copy ``remote_path`` down to a local file, atomically where possible."""
|
|
30
|
+
|
|
31
|
+
def list_files(self, path: str) -> Iterable[str]:
|
|
32
|
+
"""Yield remote-relative paths of every file under ``path`` (recursive)."""
|
|
33
|
+
|
|
34
|
+
def remove(self, path: str) -> None: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class PosixBackend:
|
|
38
|
+
"""Backend over a POSIX-visible root (a Volume mount or a local directory).
|
|
39
|
+
|
|
40
|
+
``root`` is the absolute remote root; ``path`` arguments to every method are
|
|
41
|
+
interpreted relative to it (or accepted as already-absolute paths under it).
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, root: os.PathLike, *, atomic_rename: bool = True):
|
|
45
|
+
self.root = Path(root)
|
|
46
|
+
# FUSE mounts don't always honour atomic rename; callers can disable it
|
|
47
|
+
# to fall back to a plain copy (see plan: FUSE atomicity open question).
|
|
48
|
+
self.atomic_rename = atomic_rename
|
|
49
|
+
|
|
50
|
+
def _abs(self, path: str) -> Path:
|
|
51
|
+
p = Path(path)
|
|
52
|
+
return p if p.is_absolute() else self.root / p
|
|
53
|
+
|
|
54
|
+
def exists(self, path: str) -> bool:
|
|
55
|
+
return self._abs(path).exists()
|
|
56
|
+
|
|
57
|
+
def makedirs(self, path: str) -> None:
|
|
58
|
+
self._abs(path).mkdir(parents=True, exist_ok=True)
|
|
59
|
+
|
|
60
|
+
def put_file(self, local_path: os.PathLike, remote_path: str) -> None:
|
|
61
|
+
dst = self._abs(remote_path)
|
|
62
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
self._write_atomic(Path(local_path), dst)
|
|
64
|
+
|
|
65
|
+
def get_file(self, remote_path: str, local_path: os.PathLike) -> None:
|
|
66
|
+
dst = Path(local_path)
|
|
67
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
self._write_atomic(self._abs(remote_path), dst)
|
|
69
|
+
|
|
70
|
+
def list_files(self, path: str) -> Iterable[str]:
|
|
71
|
+
base = self._abs(path)
|
|
72
|
+
if not base.exists():
|
|
73
|
+
return
|
|
74
|
+
for dirpath, _dirnames, filenames in os.walk(base):
|
|
75
|
+
for name in filenames:
|
|
76
|
+
full = Path(dirpath) / name
|
|
77
|
+
yield str(full.relative_to(self.root))
|
|
78
|
+
|
|
79
|
+
def remove(self, path: str) -> None:
|
|
80
|
+
p = self._abs(path)
|
|
81
|
+
if p.is_dir() and not p.is_symlink():
|
|
82
|
+
shutil.rmtree(p)
|
|
83
|
+
elif p.exists() or p.is_symlink():
|
|
84
|
+
p.unlink()
|
|
85
|
+
|
|
86
|
+
def _write_atomic(self, src: Path, dst: Path) -> None:
|
|
87
|
+
"""Copy ``src`` to ``dst`` via a temp file + rename when atomic_rename."""
|
|
88
|
+
if not self.atomic_rename:
|
|
89
|
+
shutil.copy2(src, dst)
|
|
90
|
+
return
|
|
91
|
+
tmp = dst.with_name(f".{dst.name}.fractfs.tmp.{os.getpid()}")
|
|
92
|
+
try:
|
|
93
|
+
shutil.copy2(src, tmp)
|
|
94
|
+
os.replace(tmp, dst)
|
|
95
|
+
finally:
|
|
96
|
+
if tmp.exists():
|
|
97
|
+
tmp.unlink()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def make_backend(cfg) -> Backend:
|
|
101
|
+
"""Construct the backend for a :class:`~fractfs.config.Config`."""
|
|
102
|
+
if cfg.backend in ("volumes", "local"):
|
|
103
|
+
if cfg.volume_root is None:
|
|
104
|
+
raise ValueError(
|
|
105
|
+
f"backend {cfg.backend!r} requires fractfs_VOLUME_ROOT to be set"
|
|
106
|
+
)
|
|
107
|
+
return PosixBackend(cfg.volume_root)
|
|
108
|
+
if cfg.backend == "s3":
|
|
109
|
+
from .fsspec_backend import FsspecBackend
|
|
110
|
+
|
|
111
|
+
return FsspecBackend(cfg)
|
|
112
|
+
raise ValueError(f"no backend implementation for {cfg.backend!r}")
|
fractfs/config.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Config loader: parse ``.fractfs.toml`` + environment into a ``Config``.
|
|
2
|
+
|
|
3
|
+
Env vars override the TOML file for the scalar fields so deployments can tune
|
|
4
|
+
behaviour (backend, volume root, cadence) without editing the repo.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import List, Optional
|
|
14
|
+
|
|
15
|
+
import pathspec
|
|
16
|
+
|
|
17
|
+
if sys.version_info >= (3, 11):
|
|
18
|
+
import tomllib
|
|
19
|
+
else: # pragma: no cover - exercised only on <3.11
|
|
20
|
+
import tomli as tomllib
|
|
21
|
+
|
|
22
|
+
CONFIG_FILENAME = ".fractfs.toml"
|
|
23
|
+
|
|
24
|
+
# Backends we know how to provision against.
|
|
25
|
+
BACKENDS = ("volumes", "s3", "local")
|
|
26
|
+
|
|
27
|
+
# pathspec renamed the gitignore factory; prefer the current name, fall back for
|
|
28
|
+
# older pathspec releases that only ship "gitwildmatch".
|
|
29
|
+
try:
|
|
30
|
+
pathspec.PathSpec.from_lines("gitignore", [])
|
|
31
|
+
_PATHSPEC_FACTORY = "gitignore"
|
|
32
|
+
except (ValueError, KeyError, LookupError): # pragma: no cover - old pathspec
|
|
33
|
+
_PATHSPEC_FACTORY = "gitwildmatch"
|
|
34
|
+
|
|
35
|
+
_DEFAULT_SYNC_INTERVAL = 300
|
|
36
|
+
_DEFAULT_CHECKPOINT_SUBDIR = "_checkpoint"
|
|
37
|
+
_DEFAULT_SCRATCH = "/tmp/fractfs"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class Config:
|
|
42
|
+
"""Resolved configuration.
|
|
43
|
+
|
|
44
|
+
``root`` is the application root (the dir holding ``.fractfs.toml``); all
|
|
45
|
+
``dir_paths`` and resolver inputs are relative to it.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
root: Path
|
|
49
|
+
backend: str = "local"
|
|
50
|
+
volume_root: Optional[Path] = None
|
|
51
|
+
scratch: Path = Path(_DEFAULT_SCRATCH)
|
|
52
|
+
sync_interval: int = _DEFAULT_SYNC_INTERVAL
|
|
53
|
+
checkpoint_subdir: str = _DEFAULT_CHECKPOINT_SUBDIR
|
|
54
|
+
dir_paths: List[str] = field(default_factory=list)
|
|
55
|
+
ignore_patterns: List[str] = field(default_factory=list)
|
|
56
|
+
local_patterns: List[str] = field(default_factory=list)
|
|
57
|
+
use_content_hash: bool = False
|
|
58
|
+
auto_ignore_bundle: bool = True
|
|
59
|
+
|
|
60
|
+
ignore_spec: pathspec.PathSpec = field(init=False)
|
|
61
|
+
local_spec: pathspec.PathSpec = field(init=False)
|
|
62
|
+
|
|
63
|
+
def __post_init__(self) -> None:
|
|
64
|
+
self.root = Path(self.root).resolve()
|
|
65
|
+
if self.volume_root is not None:
|
|
66
|
+
self.volume_root = Path(self.volume_root)
|
|
67
|
+
self.scratch = Path(self.scratch)
|
|
68
|
+
# Normalize dir paths to forward-slash relative strings (drop "./", trailing "/").
|
|
69
|
+
self.dir_paths = [d.strip("/").replace("\\", "/") for d in self.dir_paths if d.strip("/")]
|
|
70
|
+
self.ignore_spec = pathspec.PathSpec.from_lines(_PATHSPEC_FACTORY, self.ignore_patterns)
|
|
71
|
+
self.local_spec = pathspec.PathSpec.from_lines(_PATHSPEC_FACTORY, self.local_patterns)
|
|
72
|
+
if self.backend not in BACKENDS:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"unknown fractfs backend {self.backend!r}; expected one of {BACKENDS}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# -- derived paths -----------------------------------------------------
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def checkpoint_root(self) -> Optional[Path]:
|
|
81
|
+
"""Absolute path under the Volume where LOCAL_SYNCED checkpoints land."""
|
|
82
|
+
if self.volume_root is None:
|
|
83
|
+
return None
|
|
84
|
+
return self.volume_root / self.checkpoint_subdir
|
|
85
|
+
|
|
86
|
+
def is_provisionable(self) -> bool:
|
|
87
|
+
"""Whether dir-redirect / back-symlink provisioning can run.
|
|
88
|
+
|
|
89
|
+
Requires a Volume root; without one (pure ``local`` backend, no mount)
|
|
90
|
+
only checkpoint/restore against a local volume_root is meaningful.
|
|
91
|
+
"""
|
|
92
|
+
return self.volume_root is not None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _env(name: str) -> Optional[str]:
|
|
96
|
+
return os.environ.get(f"fractfs_{name}") or os.environ.get(f"FRACTFS_{name}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def load_config(root: Optional[os.PathLike] = None) -> Config:
|
|
100
|
+
"""Load config from ``<root>/.fractfs.toml`` with env-var overrides.
|
|
101
|
+
|
|
102
|
+
``root`` defaults to ``$fractfs_ROOT`` then the current working directory.
|
|
103
|
+
"""
|
|
104
|
+
if root is None:
|
|
105
|
+
root = _env("ROOT") or os.getcwd()
|
|
106
|
+
root = Path(root).resolve()
|
|
107
|
+
|
|
108
|
+
data = _read_toml(root / CONFIG_FILENAME)
|
|
109
|
+
|
|
110
|
+
dirs = data.get("dirs", {}).get("paths", []) or []
|
|
111
|
+
ignore = data.get("ignore", {}).get("patterns", []) or []
|
|
112
|
+
local = data.get("local", {}).get("patterns", []) or []
|
|
113
|
+
|
|
114
|
+
backend = _env("BACKEND") or data.get("backend") or "local"
|
|
115
|
+
|
|
116
|
+
volume_root = _env("VOLUME_ROOT") or data.get("volume_root")
|
|
117
|
+
scratch = _env("SCRATCH") or data.get("scratch") or _DEFAULT_SCRATCH
|
|
118
|
+
checkpoint_subdir = (
|
|
119
|
+
_env("CHECKPOINT_SUBDIR") or data.get("checkpoint_subdir") or _DEFAULT_CHECKPOINT_SUBDIR
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
sync_interval_raw = _env("SYNC_INTERVAL") or data.get("sync_interval")
|
|
123
|
+
sync_interval = int(sync_interval_raw) if sync_interval_raw is not None else _DEFAULT_SYNC_INTERVAL
|
|
124
|
+
|
|
125
|
+
hash_raw = _env("CONTENT_HASH") or data.get("content_hash")
|
|
126
|
+
use_content_hash = _as_bool(hash_raw, default=False)
|
|
127
|
+
|
|
128
|
+
bundle_raw = _env("AUTO_IGNORE_BUNDLE")
|
|
129
|
+
if bundle_raw is None and "auto_ignore_bundle" in data:
|
|
130
|
+
bundle_raw = data.get("auto_ignore_bundle")
|
|
131
|
+
auto_ignore_bundle = _as_bool(bundle_raw, default=True)
|
|
132
|
+
|
|
133
|
+
return Config(
|
|
134
|
+
root=root,
|
|
135
|
+
backend=backend,
|
|
136
|
+
volume_root=volume_root,
|
|
137
|
+
scratch=scratch,
|
|
138
|
+
sync_interval=sync_interval,
|
|
139
|
+
checkpoint_subdir=checkpoint_subdir,
|
|
140
|
+
dir_paths=list(dirs),
|
|
141
|
+
ignore_patterns=list(ignore),
|
|
142
|
+
local_patterns=list(local),
|
|
143
|
+
use_content_hash=use_content_hash,
|
|
144
|
+
auto_ignore_bundle=auto_ignore_bundle,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _read_toml(path: Path) -> dict:
|
|
149
|
+
if not path.exists():
|
|
150
|
+
return {}
|
|
151
|
+
with open(path, "rb") as fh:
|
|
152
|
+
return tomllib.load(fh)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _as_bool(value, default: bool) -> bool:
|
|
156
|
+
if value is None:
|
|
157
|
+
return default
|
|
158
|
+
if isinstance(value, bool):
|
|
159
|
+
return value
|
|
160
|
+
return str(value).strip().lower() in ("1", "true", "yes", "on")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""fsspec-backed implementation (S3 / ADLS / GCS) — optional extra.
|
|
2
|
+
|
|
3
|
+
Kept in its own module so importing fractfs never pulls in ``fsspec``/``s3fs``
|
|
4
|
+
unless an fsspec backend is actually requested. Install with ``fractfs[s3]``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Iterable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FsspecBackend:
|
|
14
|
+
"""Backend over any fsspec filesystem rooted at ``cfg.volume_root`` URL.
|
|
15
|
+
|
|
16
|
+
``volume_root`` here is an fsspec URL such as ``s3://bucket/prefix``.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, cfg):
|
|
20
|
+
try:
|
|
21
|
+
import fsspec
|
|
22
|
+
except ImportError as exc: # pragma: no cover - depends on optional extra
|
|
23
|
+
raise ImportError(
|
|
24
|
+
"the s3/fsspec backend requires the 'fsspec' extra: pip install 'fractfs[s3]'"
|
|
25
|
+
) from exc
|
|
26
|
+
if cfg.volume_root is None:
|
|
27
|
+
raise ValueError("fsspec backend requires fractfs_VOLUME_ROOT (an fsspec URL)")
|
|
28
|
+
self.url_root = str(cfg.volume_root).rstrip("/")
|
|
29
|
+
self.fs, self.path_root = fsspec.core.url_to_fs(self.url_root)
|
|
30
|
+
|
|
31
|
+
def _abs(self, path: str) -> str:
|
|
32
|
+
path = str(path)
|
|
33
|
+
if path.startswith(self.path_root):
|
|
34
|
+
return path
|
|
35
|
+
return f"{self.path_root}/{path.lstrip('/')}"
|
|
36
|
+
|
|
37
|
+
def exists(self, path: str) -> bool:
|
|
38
|
+
return self.fs.exists(self._abs(path))
|
|
39
|
+
|
|
40
|
+
def makedirs(self, path: str) -> None:
|
|
41
|
+
self.fs.makedirs(self._abs(path), exist_ok=True)
|
|
42
|
+
|
|
43
|
+
def put_file(self, local_path: os.PathLike, remote_path: str) -> None:
|
|
44
|
+
self.fs.put_file(str(local_path), self._abs(remote_path))
|
|
45
|
+
|
|
46
|
+
def get_file(self, remote_path: str, local_path: os.PathLike) -> None:
|
|
47
|
+
os.makedirs(os.path.dirname(local_path) or ".", exist_ok=True)
|
|
48
|
+
self.fs.get_file(self._abs(remote_path), str(local_path))
|
|
49
|
+
|
|
50
|
+
def list_files(self, path: str) -> Iterable[str]:
|
|
51
|
+
base = self._abs(path)
|
|
52
|
+
if not self.fs.exists(base):
|
|
53
|
+
return
|
|
54
|
+
for full in self.fs.find(base):
|
|
55
|
+
rel = full[len(self.path_root):].lstrip("/")
|
|
56
|
+
yield rel
|
|
57
|
+
|
|
58
|
+
def remove(self, path: str) -> None:
|
|
59
|
+
target = self._abs(path)
|
|
60
|
+
if self.fs.exists(target):
|
|
61
|
+
self.fs.rm(target, recursive=True)
|
fractfs/patterns.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Static analysis of gitignore patterns, used to decide what can be pinned.
|
|
2
|
+
|
|
3
|
+
A ``local``/``ignore`` file inside a Volume-redirected ``[dirs]`` directory only
|
|
4
|
+
stays node-local if a back-symlink exists *before* the write. We can pre-create
|
|
5
|
+
that symlink for any pattern naming a concrete location; we cannot for a glob,
|
|
6
|
+
because the filename isn't known until the app creates it.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import List, Tuple
|
|
12
|
+
|
|
13
|
+
from .resolver import _under_any_dir
|
|
14
|
+
|
|
15
|
+
# gitignore wildcard metacharacters. A pattern containing any of these names a
|
|
16
|
+
# set of paths, not one path, so its back-symlink location can't be predicted.
|
|
17
|
+
_GLOB_META = "*?["
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_glob(pattern: str) -> bool:
|
|
21
|
+
return any(c in pattern for c in _GLOB_META)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def is_negation(pattern: str) -> bool:
|
|
25
|
+
return pattern.lstrip().startswith("!")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_dir_pattern(pattern: str) -> bool:
|
|
29
|
+
"""True if the pattern targets a directory (trailing slash)."""
|
|
30
|
+
return pattern.rstrip().endswith("/")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def is_anchored(pattern: str) -> bool:
|
|
34
|
+
"""gitignore anchoring: a leading or internal slash pins the pattern to root."""
|
|
35
|
+
body = pattern.strip()
|
|
36
|
+
if body.startswith("/"):
|
|
37
|
+
return True
|
|
38
|
+
return "/" in body.rstrip("/")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_concrete(pattern: str) -> bool:
|
|
42
|
+
"""A pattern we can pre-create a back-symlink for (an exact path/name)."""
|
|
43
|
+
return bool(pattern.strip()) and not is_negation(pattern) and not is_glob(pattern)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def pin_targets(pattern: str, dir_paths: List[str]) -> List[Tuple[str, bool]]:
|
|
47
|
+
"""Back-symlink targets for ``pattern`` that fall inside ``dir_paths``.
|
|
48
|
+
|
|
49
|
+
Returns ``(rel_path, is_dir)`` tuples. Empty for globs/negations (can't be
|
|
50
|
+
predicted) and for concrete paths that don't land inside any Volume dir
|
|
51
|
+
(those already live in the plain local tree and need no symlink).
|
|
52
|
+
"""
|
|
53
|
+
if not is_concrete(pattern):
|
|
54
|
+
return []
|
|
55
|
+
is_dir = is_dir_pattern(pattern)
|
|
56
|
+
clean = pattern.strip().strip("/")
|
|
57
|
+
if not clean:
|
|
58
|
+
return []
|
|
59
|
+
if is_anchored(pattern):
|
|
60
|
+
return [(clean, is_dir)] if _under_any_dir(clean, dir_paths) else []
|
|
61
|
+
# Unanchored bare name (e.g. "manifest.json", ".locks/"): the common case is
|
|
62
|
+
# one at the top of each Volume dir — reserve a slot there.
|
|
63
|
+
return [(f"{d}/{clean}", is_dir) for d in dir_paths]
|