ntask 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ntask/__init__.py +21 -0
- ntask/__main__.py +6 -0
- ntask/_cache/__init__.py +178 -0
- ntask/_cache/body.py +39 -0
- ntask/_cache/diff.py +127 -0
- ntask/_cache/hash.py +43 -0
- ntask/_cache/key.py +68 -0
- ntask/_cache/manifest.py +58 -0
- ntask/_cache/outputs.py +86 -0
- ntask/_cache/store.py +116 -0
- ntask/_cli.py +475 -0
- ntask/_cli_args.py +98 -0
- ntask/_cli_docstring.py +55 -0
- ntask/_cli_format.py +76 -0
- ntask/_cli_why.py +122 -0
- ntask/_config.py +62 -0
- ntask/_coordinator.py +56 -0
- ntask/_dag.py +120 -0
- ntask/_depends.py +119 -0
- ntask/_discovery.py +52 -0
- ntask/_errors.py +27 -0
- ntask/_executor.py +221 -0
- ntask/_registry.py +62 -0
- ntask/_remote/__init__.py +48 -0
- ntask/_remote/_tar.py +61 -0
- ntask/_remote/base.py +20 -0
- ntask/_remote/gcs.py +64 -0
- ntask/_remote/http.py +89 -0
- ntask/_remote/local_fs.py +48 -0
- ntask/_remote/s3.py +101 -0
- ntask/_render/__init__.py +5 -0
- ntask/_render/base.py +14 -0
- ntask/_render/log.py +38 -0
- ntask/_render/rich.py +49 -0
- ntask/_render/tui.py +191 -0
- ntask/_shell.py +238 -0
- ntask/_task.py +159 -0
- ntask/_watch.py +147 -0
- ntask-1.0.0.dist-info/METADATA +172 -0
- ntask-1.0.0.dist-info/RECORD +44 -0
- ntask-1.0.0.dist-info/WHEEL +5 -0
- ntask-1.0.0.dist-info/entry_points.txt +2 -0
- ntask-1.0.0.dist-info/licenses/LICENSE +28 -0
- ntask-1.0.0.dist-info/top_level.txt +1 -0
ntask/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""ntask: a Python-native task runner with content-hash caching and DAG execution."""
|
|
2
|
+
|
|
3
|
+
from ._depends import depends
|
|
4
|
+
from ._errors import CycleError, DiscoveryError, NtaskError, ShellError
|
|
5
|
+
from ._shell import ShellResult, shell
|
|
6
|
+
from ._task import cached, group, task
|
|
7
|
+
|
|
8
|
+
__version__ = "1.0.0"
|
|
9
|
+
__all__ = [
|
|
10
|
+
"CycleError",
|
|
11
|
+
"DiscoveryError",
|
|
12
|
+
"NtaskError",
|
|
13
|
+
"ShellError",
|
|
14
|
+
"ShellResult",
|
|
15
|
+
"__version__",
|
|
16
|
+
"cached",
|
|
17
|
+
"depends",
|
|
18
|
+
"group",
|
|
19
|
+
"shell",
|
|
20
|
+
"task",
|
|
21
|
+
]
|
ntask/__main__.py
ADDED
ntask/_cache/__init__.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .._remote.base import RemoteBackend
|
|
10
|
+
from .._task import Task
|
|
11
|
+
from .body import hash_task_body
|
|
12
|
+
from .key import CacheBreakdown, CacheKeyInputs, InputRecord, compute_cache_key
|
|
13
|
+
from .manifest import compute_input_manifest
|
|
14
|
+
from .outputs import OutputStore
|
|
15
|
+
from .store import CacheEntry, CacheStore
|
|
16
|
+
|
|
17
|
+
# Module-level flag for warn-once pattern.
|
|
18
|
+
_remote_warn_fired = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _warn_once_remote_failed(e: BaseException) -> None:
|
|
22
|
+
"""Print a single warning the first time a remote call fails in this process."""
|
|
23
|
+
global _remote_warn_fired
|
|
24
|
+
if not _remote_warn_fired:
|
|
25
|
+
print(
|
|
26
|
+
f"warning: remote cache unreachable: {type(e).__name__}: {e}. "
|
|
27
|
+
f"Falling back to local.",
|
|
28
|
+
file=sys.stderr,
|
|
29
|
+
)
|
|
30
|
+
_remote_warn_fired = True
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _python_version_str() -> str:
|
|
34
|
+
return ".".join(str(x) for x in sys.version_info[:3])
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _platform_tag_str() -> str:
|
|
38
|
+
return f"{platform.system()}-{platform.machine()}".lower()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class CacheEngine:
|
|
42
|
+
def __init__(self, root: Path, remote: RemoteBackend | None = None):
|
|
43
|
+
self.root = root
|
|
44
|
+
self.store = CacheStore(root=root)
|
|
45
|
+
self.outputs = OutputStore(root=root)
|
|
46
|
+
self._remote = remote
|
|
47
|
+
|
|
48
|
+
def compute_key_and_breakdown(
|
|
49
|
+
self,
|
|
50
|
+
t: Task,
|
|
51
|
+
*,
|
|
52
|
+
workspace: Path,
|
|
53
|
+
upstream_keys_by_dep: dict[str, str],
|
|
54
|
+
) -> tuple[str, CacheBreakdown]:
|
|
55
|
+
cfg = t.cached_config
|
|
56
|
+
assert cfg is not None
|
|
57
|
+
manifest = compute_input_manifest(cfg.inputs, root=workspace)
|
|
58
|
+
body = hash_task_body(t.func)
|
|
59
|
+
env_values = {
|
|
60
|
+
name: os.environ[name] if name in os.environ else "<unset>"
|
|
61
|
+
for name in cfg.env
|
|
62
|
+
}
|
|
63
|
+
py_version = _python_version_str()
|
|
64
|
+
plat_tag = _platform_tag_str()
|
|
65
|
+
|
|
66
|
+
# Tuple ordering for cache key matches insertion order of upstream_keys_by_dep.
|
|
67
|
+
upstream_tuple = tuple(upstream_keys_by_dep.values())
|
|
68
|
+
key_inputs = CacheKeyInputs(
|
|
69
|
+
task_fqn=t.fqn,
|
|
70
|
+
task_body_hash=body,
|
|
71
|
+
env={name: ("" if v == "<unset>" else v) for name, v in env_values.items()},
|
|
72
|
+
env_names=cfg.env,
|
|
73
|
+
input_patterns=cfg.inputs,
|
|
74
|
+
input_manifest_digest=manifest.digest,
|
|
75
|
+
root=workspace,
|
|
76
|
+
upstream_keys=upstream_tuple if cfg.propagate else (),
|
|
77
|
+
strict=cfg.strict,
|
|
78
|
+
)
|
|
79
|
+
key = compute_cache_key(key_inputs)
|
|
80
|
+
|
|
81
|
+
input_records = tuple(
|
|
82
|
+
InputRecord(
|
|
83
|
+
path=fh.path.relative_to(workspace).as_posix(),
|
|
84
|
+
digest=fh.digest,
|
|
85
|
+
mode=fh.mode,
|
|
86
|
+
)
|
|
87
|
+
for fh in manifest.files
|
|
88
|
+
)
|
|
89
|
+
breakdown = CacheBreakdown(
|
|
90
|
+
input_patterns=cfg.inputs,
|
|
91
|
+
inputs=input_records,
|
|
92
|
+
env_values=env_values,
|
|
93
|
+
task_body_hash=body,
|
|
94
|
+
python_version=py_version,
|
|
95
|
+
platform_tag=plat_tag,
|
|
96
|
+
upstream_keys_by_dep=dict(upstream_keys_by_dep),
|
|
97
|
+
)
|
|
98
|
+
return key, breakdown
|
|
99
|
+
|
|
100
|
+
def check(self, t: Task, key: str) -> CacheEntry | None:
|
|
101
|
+
return self.store.get(t.fqn, key)
|
|
102
|
+
|
|
103
|
+
def store_entry(
|
|
104
|
+
self,
|
|
105
|
+
t: Task,
|
|
106
|
+
key: str,
|
|
107
|
+
*,
|
|
108
|
+
workspace: Path,
|
|
109
|
+
duration: float,
|
|
110
|
+
breakdown: CacheBreakdown,
|
|
111
|
+
upstream_keys: tuple[str, ...],
|
|
112
|
+
) -> CacheEntry:
|
|
113
|
+
cfg = t.cached_config
|
|
114
|
+
assert cfg is not None
|
|
115
|
+
outputs_hash = (
|
|
116
|
+
self.outputs.capture(cfg.outputs, root=workspace) if cfg.outputs else None
|
|
117
|
+
)
|
|
118
|
+
entry = CacheEntry(
|
|
119
|
+
key=key,
|
|
120
|
+
outputs_hash=outputs_hash,
|
|
121
|
+
duration=duration,
|
|
122
|
+
completed_at=time.time(),
|
|
123
|
+
upstream_keys=upstream_keys,
|
|
124
|
+
breakdown=breakdown,
|
|
125
|
+
)
|
|
126
|
+
self.store.put(t.fqn, entry)
|
|
127
|
+
return entry
|
|
128
|
+
|
|
129
|
+
def restore_outputs(self, entry: CacheEntry, *, workspace: Path) -> None:
|
|
130
|
+
if entry.outputs_hash:
|
|
131
|
+
self.outputs.restore(entry.outputs_hash, root=workspace)
|
|
132
|
+
|
|
133
|
+
def check_remote(self, t: Task, key: str) -> CacheEntry | None:
|
|
134
|
+
"""Try remote: if hit, download entry + outputs and populate local store."""
|
|
135
|
+
if self._remote is None:
|
|
136
|
+
return None
|
|
137
|
+
try:
|
|
138
|
+
entry_dict = self._remote.get_entry(t.fqn, key)
|
|
139
|
+
except BaseException as e:
|
|
140
|
+
_warn_once_remote_failed(e)
|
|
141
|
+
return None
|
|
142
|
+
if entry_dict is None:
|
|
143
|
+
return None
|
|
144
|
+
try:
|
|
145
|
+
entry = CacheEntry.from_dict(entry_dict)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
_warn_once_remote_failed(e)
|
|
148
|
+
return None
|
|
149
|
+
# Download outputs into the content-addressed store if needed.
|
|
150
|
+
if entry.outputs_hash is not None:
|
|
151
|
+
target_dir = self.outputs._hash_dir(entry.outputs_hash)
|
|
152
|
+
if not target_dir.exists():
|
|
153
|
+
try:
|
|
154
|
+
self._remote.get_output(entry.outputs_hash, target_dir)
|
|
155
|
+
except BaseException as e:
|
|
156
|
+
_warn_once_remote_failed(e)
|
|
157
|
+
return None
|
|
158
|
+
# Mirror the entry into local store so future runs short-circuit.
|
|
159
|
+
self.store.put(t.fqn, entry)
|
|
160
|
+
return entry
|
|
161
|
+
|
|
162
|
+
def push_remote(self, t: Task, entry: CacheEntry) -> None:
|
|
163
|
+
"""Upload entry + outputs to remote. Non-fatal on error."""
|
|
164
|
+
if self._remote is None:
|
|
165
|
+
return
|
|
166
|
+
try:
|
|
167
|
+
self._remote.put_entry(t.fqn, entry.key, entry.to_dict())
|
|
168
|
+
except BaseException as e:
|
|
169
|
+
_warn_once_remote_failed(e)
|
|
170
|
+
return
|
|
171
|
+
if entry.outputs_hash is not None:
|
|
172
|
+
source = self.outputs._hash_dir(entry.outputs_hash)
|
|
173
|
+
if source.is_dir():
|
|
174
|
+
try:
|
|
175
|
+
self._remote.put_output(entry.outputs_hash, source)
|
|
176
|
+
except BaseException as e:
|
|
177
|
+
_warn_once_remote_failed(e)
|
|
178
|
+
|
ntask/_cache/body.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import inspect
|
|
5
|
+
import textwrap
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .hash import hash_bytes
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _strip_docstring(tree: ast.AST) -> None:
|
|
13
|
+
"""Remove docstring nodes from functions/classes/modules in-place."""
|
|
14
|
+
for node in ast.walk(tree):
|
|
15
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef,
|
|
16
|
+
ast.ClassDef, ast.Module)):
|
|
17
|
+
continue
|
|
18
|
+
body = node.body
|
|
19
|
+
if (body and isinstance(body[0], ast.Expr)
|
|
20
|
+
and isinstance(body[0].value, ast.Constant)
|
|
21
|
+
and isinstance(body[0].value.value, str)):
|
|
22
|
+
node.body = body[1:]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def hash_task_body_source(source: str) -> str:
|
|
26
|
+
"""Hash source code structurally, ignoring docstrings/comments/whitespace."""
|
|
27
|
+
tree = ast.parse(textwrap.dedent(source))
|
|
28
|
+
_strip_docstring(tree)
|
|
29
|
+
dumped = ast.dump(tree, annotate_fields=False, include_attributes=False)
|
|
30
|
+
return hash_bytes(dumped.encode())
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def hash_task_body(func: Callable[..., Any]) -> str:
|
|
34
|
+
"""Structural hash of a function's source. Stable under doc/whitespace/comment noise."""
|
|
35
|
+
try:
|
|
36
|
+
source = inspect.getsource(func)
|
|
37
|
+
except (OSError, TypeError):
|
|
38
|
+
return hash_bytes(f"<no-source:{func.__qualname__}>".encode())
|
|
39
|
+
return hash_task_body_source(source)
|
ntask/_cache/diff.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from .key import CacheBreakdown, InputRecord
|
|
7
|
+
|
|
8
|
+
MissKind = Literal[
|
|
9
|
+
"first-run",
|
|
10
|
+
"input-modified", "input-added", "input-removed",
|
|
11
|
+
"env-changed", "env-added", "env-removed",
|
|
12
|
+
"body-changed",
|
|
13
|
+
"upstream-invalidated",
|
|
14
|
+
"python-changed", "platform-changed",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
|
+
class MissItem:
|
|
20
|
+
kind: MissKind
|
|
21
|
+
detail: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class MissReport:
|
|
26
|
+
items: tuple[MissItem, ...]
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def is_hit(self) -> bool:
|
|
30
|
+
return not self.items
|
|
31
|
+
|
|
32
|
+
def summary(self) -> str:
|
|
33
|
+
if not self.items:
|
|
34
|
+
return "no changes"
|
|
35
|
+
first = self.items[0]
|
|
36
|
+
rest = len(self.items) - 1
|
|
37
|
+
label = f"{first.kind}: {first.detail}" if first.detail else first.kind
|
|
38
|
+
return f"{label}" + (f" (+{rest} more)" if rest else "")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _diff_inputs(
|
|
42
|
+
current: tuple[InputRecord, ...],
|
|
43
|
+
prior: tuple[InputRecord, ...],
|
|
44
|
+
) -> list[MissItem]:
|
|
45
|
+
prior_by_path = {r.path: r for r in prior}
|
|
46
|
+
current_by_path = {r.path: r for r in current}
|
|
47
|
+
items: list[MissItem] = []
|
|
48
|
+
|
|
49
|
+
all_paths = sorted(set(prior_by_path) | set(current_by_path))
|
|
50
|
+
for path in all_paths:
|
|
51
|
+
p = prior_by_path.get(path)
|
|
52
|
+
c = current_by_path.get(path)
|
|
53
|
+
if p is None and c is not None:
|
|
54
|
+
items.append(MissItem(kind="input-added", detail=path))
|
|
55
|
+
elif c is None and p is not None:
|
|
56
|
+
items.append(MissItem(kind="input-removed", detail=path))
|
|
57
|
+
elif p is not None and c is not None:
|
|
58
|
+
if p.digest != c.digest or p.mode != c.mode:
|
|
59
|
+
items.append(MissItem(kind="input-modified", detail=path))
|
|
60
|
+
return items
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _diff_env(
|
|
64
|
+
current: dict[str, str],
|
|
65
|
+
prior: dict[str, str],
|
|
66
|
+
) -> list[MissItem]:
|
|
67
|
+
items: list[MissItem] = []
|
|
68
|
+
all_keys = sorted(set(prior) | set(current))
|
|
69
|
+
for name in all_keys:
|
|
70
|
+
if name in current and name not in prior:
|
|
71
|
+
items.append(MissItem(kind="env-added", detail=name))
|
|
72
|
+
elif name in prior and name not in current:
|
|
73
|
+
items.append(MissItem(kind="env-removed", detail=name))
|
|
74
|
+
elif current[name] != prior[name]:
|
|
75
|
+
items.append(MissItem(
|
|
76
|
+
kind="env-changed",
|
|
77
|
+
detail=f"{name}: {prior[name]!r} → {current[name]!r}",
|
|
78
|
+
))
|
|
79
|
+
return items
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _diff_upstream(
|
|
83
|
+
current: dict[str, str],
|
|
84
|
+
prior: dict[str, str],
|
|
85
|
+
) -> list[MissItem]:
|
|
86
|
+
items: list[MissItem] = []
|
|
87
|
+
all_deps = sorted(set(prior) | set(current))
|
|
88
|
+
for dep in all_deps:
|
|
89
|
+
if prior.get(dep) != current.get(dep):
|
|
90
|
+
items.append(MissItem(kind="upstream-invalidated", detail=dep))
|
|
91
|
+
return items
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def diff_cache_state(
|
|
95
|
+
current: CacheBreakdown,
|
|
96
|
+
prior: CacheBreakdown | None,
|
|
97
|
+
) -> MissReport:
|
|
98
|
+
"""Compute a MissReport describing what changed between prior and current.
|
|
99
|
+
|
|
100
|
+
Items are emitted in priority order:
|
|
101
|
+
1. input-* (alphabetical by path)
|
|
102
|
+
2. env-* (alphabetical by name)
|
|
103
|
+
3. body-changed
|
|
104
|
+
4. upstream-invalidated (alphabetical by dep name)
|
|
105
|
+
5. python-changed, platform-changed
|
|
106
|
+
"""
|
|
107
|
+
if prior is None:
|
|
108
|
+
return MissReport(items=(MissItem(kind="first-run", detail=""),))
|
|
109
|
+
|
|
110
|
+
items: list[MissItem] = []
|
|
111
|
+
items.extend(_diff_inputs(current.inputs, prior.inputs))
|
|
112
|
+
items.extend(_diff_env(current.env_values, prior.env_values))
|
|
113
|
+
if current.task_body_hash != prior.task_body_hash:
|
|
114
|
+
items.append(MissItem(kind="body-changed", detail=""))
|
|
115
|
+
items.extend(_diff_upstream(current.upstream_keys_by_dep, prior.upstream_keys_by_dep))
|
|
116
|
+
if current.python_version != prior.python_version:
|
|
117
|
+
items.append(MissItem(
|
|
118
|
+
kind="python-changed",
|
|
119
|
+
detail=f"{prior.python_version!r} → {current.python_version!r}",
|
|
120
|
+
))
|
|
121
|
+
if current.platform_tag != prior.platform_tag:
|
|
122
|
+
items.append(MissItem(
|
|
123
|
+
kind="platform-changed",
|
|
124
|
+
detail=f"{prior.platform_tag!r} → {current.platform_tag!r}",
|
|
125
|
+
))
|
|
126
|
+
|
|
127
|
+
return MissReport(items=tuple(items))
|
ntask/_cache/hash.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import xxhash
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def hash_bytes(data: bytes) -> str:
|
|
11
|
+
return xxhash.xxh3_128(data).hexdigest()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def hash_many(parts: list[bytes]) -> str:
|
|
15
|
+
h = xxhash.xxh3_128()
|
|
16
|
+
for p in parts:
|
|
17
|
+
h.update(len(p).to_bytes(8, "little"))
|
|
18
|
+
h.update(p)
|
|
19
|
+
return h.hexdigest()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True, slots=True)
|
|
23
|
+
class FileHash:
|
|
24
|
+
path: Path
|
|
25
|
+
digest: str
|
|
26
|
+
mode: int
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def hash_file(path: Path) -> FileHash:
|
|
30
|
+
h = xxhash.xxh3_128()
|
|
31
|
+
with path.open("rb") as f:
|
|
32
|
+
while chunk := f.read(1 << 20): # 1 MiB
|
|
33
|
+
h.update(chunk)
|
|
34
|
+
st = path.stat()
|
|
35
|
+
return FileHash(path=path, digest=h.hexdigest(), mode=st.st_mode & 0o777)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def hash_many_files(paths: list[Path]) -> list[FileHash]:
|
|
39
|
+
if not paths:
|
|
40
|
+
return []
|
|
41
|
+
sorted_paths = sorted(paths)
|
|
42
|
+
with ThreadPoolExecutor() as ex:
|
|
43
|
+
return list(ex.map(hash_file, sorted_paths))
|
ntask/_cache/key.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import platform
|
|
4
|
+
import sys
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .hash import hash_many
|
|
9
|
+
|
|
10
|
+
FORMAT_VERSION = b"ntask/v1"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True, slots=True)
|
|
14
|
+
class CacheKeyInputs:
|
|
15
|
+
task_fqn: str
|
|
16
|
+
task_body_hash: str
|
|
17
|
+
env: dict[str, str]
|
|
18
|
+
env_names: tuple[str, ...]
|
|
19
|
+
input_patterns: tuple[str, ...]
|
|
20
|
+
input_manifest_digest: str
|
|
21
|
+
root: Path
|
|
22
|
+
upstream_keys: tuple[str, ...]
|
|
23
|
+
strict: bool = True
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _python_version_tuple() -> bytes:
|
|
27
|
+
return ".".join(str(x) for x in sys.version_info[:3]).encode()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _platform_tag() -> bytes:
|
|
31
|
+
return f"{platform.system()}-{platform.machine()}".lower().encode()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def compute_cache_key(inp: CacheKeyInputs) -> str:
|
|
35
|
+
parts: list[bytes] = [
|
|
36
|
+
FORMAT_VERSION,
|
|
37
|
+
inp.task_fqn.encode(),
|
|
38
|
+
inp.task_body_hash.encode() if inp.strict else b"<body-unstrict>",
|
|
39
|
+
_python_version_tuple() if inp.strict else b"<py-unstrict>",
|
|
40
|
+
_platform_tag() if inp.strict else b"<plat-unstrict>",
|
|
41
|
+
]
|
|
42
|
+
env_parts: list[str] = []
|
|
43
|
+
for name in sorted(inp.env_names):
|
|
44
|
+
val = inp.env.get(name)
|
|
45
|
+
env_parts.append(f"{name}={'<unset>' if val is None else val}")
|
|
46
|
+
parts.append("\n".join(env_parts).encode())
|
|
47
|
+
parts.extend(p.encode() for p in sorted(inp.input_patterns))
|
|
48
|
+
parts.append(inp.input_manifest_digest.encode())
|
|
49
|
+
parts.extend(k.encode() for k in inp.upstream_keys)
|
|
50
|
+
return hash_many(parts)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True, slots=True)
|
|
54
|
+
class InputRecord:
|
|
55
|
+
path: str
|
|
56
|
+
digest: str
|
|
57
|
+
mode: int
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True, slots=True)
|
|
61
|
+
class CacheBreakdown:
|
|
62
|
+
input_patterns: tuple[str, ...]
|
|
63
|
+
inputs: tuple[InputRecord, ...]
|
|
64
|
+
env_values: dict[str, str]
|
|
65
|
+
task_body_hash: str
|
|
66
|
+
python_version: str
|
|
67
|
+
platform_tag: str
|
|
68
|
+
upstream_keys_by_dep: dict[str, str]
|
ntask/_cache/manifest.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import pathspec
|
|
7
|
+
from pathspec.pattern import Pattern
|
|
8
|
+
|
|
9
|
+
from .hash import FileHash, hash_many, hash_many_files
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True, slots=True)
|
|
13
|
+
class InputManifest:
|
|
14
|
+
files: list[FileHash]
|
|
15
|
+
digest: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _load_gitignore(root: Path) -> pathspec.PathSpec[Pattern] | None:
|
|
19
|
+
gi = root / ".gitignore"
|
|
20
|
+
if not gi.is_file():
|
|
21
|
+
return None
|
|
22
|
+
return pathspec.PathSpec.from_lines("gitwildmatch", gi.read_text().splitlines())
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def compute_input_manifest(
|
|
26
|
+
patterns: list[str] | tuple[str, ...],
|
|
27
|
+
*,
|
|
28
|
+
root: Path,
|
|
29
|
+
respect_gitignore: bool = True,
|
|
30
|
+
) -> InputManifest:
|
|
31
|
+
if not patterns:
|
|
32
|
+
return InputManifest(files=[], digest=hash_many([b"<empty-manifest>"]))
|
|
33
|
+
|
|
34
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
35
|
+
candidates: set[Path] = set()
|
|
36
|
+
for p in root.rglob("*"):
|
|
37
|
+
if not p.is_file():
|
|
38
|
+
continue
|
|
39
|
+
rel = p.relative_to(root).as_posix()
|
|
40
|
+
if spec.match_file(rel):
|
|
41
|
+
candidates.add(p)
|
|
42
|
+
|
|
43
|
+
if respect_gitignore:
|
|
44
|
+
ig = _load_gitignore(root)
|
|
45
|
+
if ig is not None:
|
|
46
|
+
candidates = {
|
|
47
|
+
p for p in candidates if not ig.match_file(p.relative_to(root).as_posix())
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
file_hashes = hash_many_files(sorted(candidates))
|
|
51
|
+
parts: list[bytes] = []
|
|
52
|
+
for fh in file_hashes:
|
|
53
|
+
rel_str: str = fh.path.relative_to(root).as_posix()
|
|
54
|
+
parts.append(rel_str.encode())
|
|
55
|
+
parts.append(fh.digest.encode())
|
|
56
|
+
parts.append(str(fh.mode).encode())
|
|
57
|
+
digest = hash_many(parts) if parts else hash_many([b"<empty-manifest>"])
|
|
58
|
+
return InputManifest(files=file_hashes, digest=digest)
|
ntask/_cache/outputs.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pathspec
|
|
8
|
+
|
|
9
|
+
from .hash import hash_many, hash_many_files
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OutputStore:
|
|
13
|
+
"""Content-addressed output blob store.
|
|
14
|
+
|
|
15
|
+
Layout: ``<root>/outputs/<outputs-hash>/...`` - preserving relative paths.
|
|
16
|
+
Restore uses hardlinks on POSIX, falls back to copy on Windows or cross-fs.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, root: Path):
|
|
20
|
+
self.root = root
|
|
21
|
+
self._outputs_dir = root / "outputs"
|
|
22
|
+
|
|
23
|
+
def _hash_dir(self, digest: str) -> Path:
|
|
24
|
+
return self._outputs_dir / digest
|
|
25
|
+
|
|
26
|
+
def capture(
|
|
27
|
+
self,
|
|
28
|
+
patterns: list[str] | tuple[str, ...],
|
|
29
|
+
*,
|
|
30
|
+
root: Path,
|
|
31
|
+
) -> str | None:
|
|
32
|
+
"""Capture matching files into the content-addressed store, return the hash."""
|
|
33
|
+
if not patterns:
|
|
34
|
+
return None
|
|
35
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
36
|
+
matches: list[Path] = []
|
|
37
|
+
for p in root.rglob("*"):
|
|
38
|
+
if not p.is_file():
|
|
39
|
+
continue
|
|
40
|
+
rel = p.relative_to(root).as_posix()
|
|
41
|
+
if spec.match_file(rel):
|
|
42
|
+
matches.append(p)
|
|
43
|
+
if not matches:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
file_hashes = hash_many_files(sorted(matches))
|
|
47
|
+
parts: list[bytes] = []
|
|
48
|
+
for fh in file_hashes:
|
|
49
|
+
rel_encoded = fh.path.relative_to(root).as_posix().encode()
|
|
50
|
+
parts.append(rel_encoded)
|
|
51
|
+
parts.append(fh.digest.encode())
|
|
52
|
+
digest = hash_many(parts)
|
|
53
|
+
|
|
54
|
+
target = self._hash_dir(digest)
|
|
55
|
+
if target.exists():
|
|
56
|
+
return digest
|
|
57
|
+
|
|
58
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
for fh in file_hashes:
|
|
60
|
+
rel_path = fh.path.relative_to(root)
|
|
61
|
+
dest = target / rel_path
|
|
62
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
self._link_or_copy(fh.path, dest)
|
|
64
|
+
return digest
|
|
65
|
+
|
|
66
|
+
def restore(self, digest: str, *, root: Path) -> None:
|
|
67
|
+
"""Restore a previously captured output tree into ``root``."""
|
|
68
|
+
src = self._hash_dir(digest)
|
|
69
|
+
if not src.is_dir():
|
|
70
|
+
raise FileNotFoundError(f"no output blob for hash {digest}")
|
|
71
|
+
for p in src.rglob("*"):
|
|
72
|
+
if not p.is_file():
|
|
73
|
+
continue
|
|
74
|
+
rel = p.relative_to(src)
|
|
75
|
+
dest = root / rel
|
|
76
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
if dest.exists():
|
|
78
|
+
dest.unlink()
|
|
79
|
+
self._link_or_copy(p, dest)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _link_or_copy(src: Path, dest: Path) -> None:
|
|
83
|
+
try:
|
|
84
|
+
os.link(src, dest)
|
|
85
|
+
except (OSError, NotImplementedError):
|
|
86
|
+
shutil.copy2(src, dest)
|