pycasher 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- casher/__init__.py +5 -0
- casher/_runner.py +70 -0
- casher/acache.py +138 -0
- casher/audit_hook.py +65 -0
- casher/auto_cli.py +52 -0
- casher/capture.py +29 -0
- casher/config.py +42 -0
- casher/decorator.py +438 -0
- casher/deps.py +62 -0
- casher/eviction.py +81 -0
- casher/hasher.py +42 -0
- casher/serializer.py +121 -0
- casher/store.py +218 -0
- casher/strace.py +118 -0
- casher/tests/__init__.py +0 -0
- casher/tests/config.py +10 -0
- casher/tests/test_acache.py +222 -0
- casher/tests/test_audit_hook.py +103 -0
- casher/tests/test_auto_cli.py +108 -0
- casher/tests/test_capture.py +88 -0
- casher/tests/test_decorator.py +541 -0
- casher/tests/test_deps.py +132 -0
- casher/tests/test_eviction.py +149 -0
- casher/tests/test_hasher.py +87 -0
- casher/tests/test_serializer.py +92 -0
- casher/tests/test_store.py +186 -0
- casher/tests/test_strace.py +144 -0
- pycasher-0.1.0.dist-info/METADATA +95 -0
- pycasher-0.1.0.dist-info/RECORD +32 -0
- pycasher-0.1.0.dist-info/WHEEL +4 -0
- pycasher-0.1.0.dist-info/entry_points.txt +2 -0
- pycasher-0.1.0.dist-info/licenses/LICENSE +21 -0
casher/__init__.py
ADDED
casher/_runner.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Subprocess entry point for casher.
|
|
2
|
+
|
|
3
|
+
Invoked as: python -m casher._runner <work_dir>
|
|
4
|
+
|
|
5
|
+
Protocol:
|
|
6
|
+
- Reads args.pkl from work_dir (dict with module, function, args, kwargs)
|
|
7
|
+
- Imports the module, gets the function (unwraps @cached if needed)
|
|
8
|
+
- Executes the function
|
|
9
|
+
- Writes result.pkl, status.json, and exception.pkl (on failure) to work_dir
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import importlib
|
|
13
|
+
import json
|
|
14
|
+
import pickle
|
|
15
|
+
import sys
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _unwrap_cached(fn):
|
|
20
|
+
"""If fn is wrapped by @cached, return the raw function."""
|
|
21
|
+
return getattr(fn, "__wrapped__", fn)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _resolve_function(module_name: str, qualname: str):
|
|
25
|
+
"""Import module and resolve the function by qualname."""
|
|
26
|
+
mod = importlib.import_module(module_name)
|
|
27
|
+
obj = mod
|
|
28
|
+
for attr in qualname.split("."):
|
|
29
|
+
obj = getattr(obj, attr)
|
|
30
|
+
return _unwrap_cached(obj)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def run(work_dir: Path) -> None:
|
|
34
|
+
work_dir = Path(work_dir)
|
|
35
|
+
|
|
36
|
+
# Read args
|
|
37
|
+
with open(work_dir / "args.pkl", "rb") as f:
|
|
38
|
+
payload = pickle.load(f) # noqa: S301
|
|
39
|
+
|
|
40
|
+
module_name = payload["module"]
|
|
41
|
+
func_name = payload["function"]
|
|
42
|
+
args = payload["args"]
|
|
43
|
+
kwargs = payload["kwargs"]
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
fn = _resolve_function(module_name, func_name)
|
|
47
|
+
result = fn(*args, **kwargs)
|
|
48
|
+
|
|
49
|
+
# Serialize result
|
|
50
|
+
with open(work_dir / "result.pkl", "wb") as f:
|
|
51
|
+
pickle.dump(result, f)
|
|
52
|
+
|
|
53
|
+
(work_dir / "status.json").write_text(json.dumps({"success": True}))
|
|
54
|
+
|
|
55
|
+
except Exception as exc:
|
|
56
|
+
(work_dir / "status.json").write_text(json.dumps({"success": False}))
|
|
57
|
+
try:
|
|
58
|
+
with open(work_dir / "exception.pkl", "wb") as f:
|
|
59
|
+
pickle.dump(exc, f)
|
|
60
|
+
except Exception:
|
|
61
|
+
# Fallback for unpicklable exceptions
|
|
62
|
+
with open(work_dir / "exception.pkl", "wb") as f:
|
|
63
|
+
pickle.dump(RuntimeError(repr(exc)), f)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
assert (
|
|
68
|
+
len(sys.argv) == 2
|
|
69
|
+
), f"Usage: python -m casher._runner <work_dir>, got {sys.argv}"
|
|
70
|
+
run(Path(sys.argv[1]))
|
casher/acache.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import hashlib
|
|
3
|
+
import shutil
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
from casher.config import (
|
|
11
|
+
DEFAULT_CACHE_DIR,
|
|
12
|
+
DEFAULT_MAX_CACHE_BYTES,
|
|
13
|
+
HASH_ALGORITHM,
|
|
14
|
+
OUTPUT_FILES_DIR,
|
|
15
|
+
)
|
|
16
|
+
from casher.hasher import compute_file_hash
|
|
17
|
+
from casher.store import CacheEntry, find_cached, store_result
|
|
18
|
+
from casher.strace import is_strace_available, run_with_strace
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _compute_command_partial_key(cmd: list[str]) -> str:
|
|
22
|
+
h = hashlib.new(HASH_ALGORITHM)
|
|
23
|
+
h.update(" ".join(cmd).encode())
|
|
24
|
+
return h.hexdigest()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def main() -> None:
|
|
28
|
+
parser = argparse.ArgumentParser(
|
|
29
|
+
description="Cache results of arbitrary CLI programs using strace.",
|
|
30
|
+
usage="acache [--cache-dir DIR] [--max-cache-bytes N] -- command...",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument("--cache-dir", type=Path, default=None)
|
|
33
|
+
parser.add_argument("--max-cache-bytes", type=int, default=None)
|
|
34
|
+
|
|
35
|
+
# Split on -- to separate acache args from the command
|
|
36
|
+
argv = sys.argv[1:]
|
|
37
|
+
if "--" in argv:
|
|
38
|
+
split_idx = argv.index("--")
|
|
39
|
+
acache_args = argv[:split_idx]
|
|
40
|
+
cmd = argv[split_idx + 1 :]
|
|
41
|
+
else:
|
|
42
|
+
acache_args = []
|
|
43
|
+
cmd = argv
|
|
44
|
+
|
|
45
|
+
if not cmd:
|
|
46
|
+
raise SystemExit("No command provided. Usage: acache [options] -- command...")
|
|
47
|
+
|
|
48
|
+
args = parser.parse_args(acache_args)
|
|
49
|
+
cache_dir = args.cache_dir or DEFAULT_CACHE_DIR
|
|
50
|
+
max_bytes = (
|
|
51
|
+
args.max_cache_bytes
|
|
52
|
+
if args.max_cache_bytes is not None
|
|
53
|
+
else DEFAULT_MAX_CACHE_BYTES
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
logger.info("acache: cmd={} | cache_dir={}", " ".join(cmd), cache_dir)
|
|
57
|
+
|
|
58
|
+
if not is_strace_available():
|
|
59
|
+
raise SystemExit(
|
|
60
|
+
"strace is required for acache. "
|
|
61
|
+
"Install it (e.g., 'sudo pacman -S strace') or use the Python decorator API."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
partial_key = _compute_command_partial_key(cmd)
|
|
65
|
+
|
|
66
|
+
# Phase 1: cache lookup
|
|
67
|
+
cached_result = find_cached(cache_dir, partial_key)
|
|
68
|
+
if cached_result is not None:
|
|
69
|
+
entry, entry_dir = cached_result
|
|
70
|
+
logger.info("acache cache HIT: key={}…", partial_key[:12])
|
|
71
|
+
|
|
72
|
+
# Replay stdout/stderr
|
|
73
|
+
if entry.stdout:
|
|
74
|
+
sys.stdout.write(entry.stdout)
|
|
75
|
+
if entry.stderr:
|
|
76
|
+
sys.stderr.write(entry.stderr)
|
|
77
|
+
|
|
78
|
+
# Restore output files
|
|
79
|
+
output_dir = entry_dir / OUTPUT_FILES_DIR
|
|
80
|
+
for rel_path in entry.output_files:
|
|
81
|
+
safe_path = rel_path.lstrip("/")
|
|
82
|
+
cached_path = output_dir / safe_path
|
|
83
|
+
if not cached_path.exists():
|
|
84
|
+
continue
|
|
85
|
+
target = Path(rel_path)
|
|
86
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
shutil.copy2(str(cached_path), str(target))
|
|
88
|
+
|
|
89
|
+
sys.exit(int(entry.return_value))
|
|
90
|
+
|
|
91
|
+
# Phase 2: trace and store
|
|
92
|
+
logger.info("acache cache MISS: key={}… | tracing with strace", partial_key[:12])
|
|
93
|
+
strace_result = run_with_strace(cmd)
|
|
94
|
+
|
|
95
|
+
# Classify files
|
|
96
|
+
read_files = [p for p in strace_result.read_files if p.is_file()]
|
|
97
|
+
write_files = [p for p in strace_result.write_files if p.is_file()]
|
|
98
|
+
|
|
99
|
+
# Compute file hash from read files
|
|
100
|
+
if read_files:
|
|
101
|
+
file_hash = compute_file_hash(read_files)
|
|
102
|
+
else:
|
|
103
|
+
file_hash = "none"
|
|
104
|
+
|
|
105
|
+
# Build input_files dict
|
|
106
|
+
input_files_dict: dict[str, str] = {}
|
|
107
|
+
for p in read_files:
|
|
108
|
+
input_files_dict[str(p)] = compute_file_hash([p])
|
|
109
|
+
|
|
110
|
+
# Build output_files dict
|
|
111
|
+
output_files_dict: dict[str, Path] = {}
|
|
112
|
+
for p in write_files:
|
|
113
|
+
output_files_dict[str(p)] = p
|
|
114
|
+
|
|
115
|
+
entry = CacheEntry(
|
|
116
|
+
return_value=strace_result.returncode,
|
|
117
|
+
stdout=strace_result.stdout,
|
|
118
|
+
stderr=strace_result.stderr,
|
|
119
|
+
output_files=output_files_dict,
|
|
120
|
+
input_files=input_files_dict,
|
|
121
|
+
created_at=time.time(),
|
|
122
|
+
func_module="acache",
|
|
123
|
+
func_name=" ".join(cmd),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
store_result(cache_dir, partial_key, file_hash, entry, max_bytes=max_bytes)
|
|
127
|
+
|
|
128
|
+
# Output stdout/stderr
|
|
129
|
+
if strace_result.stdout:
|
|
130
|
+
sys.stdout.write(strace_result.stdout)
|
|
131
|
+
if strace_result.stderr:
|
|
132
|
+
sys.stderr.write(strace_result.stderr)
|
|
133
|
+
|
|
134
|
+
sys.exit(strace_result.returncode)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
main()
|
casher/audit_hook.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import contextvars
|
|
2
|
+
import sys
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_active_tracker: contextvars.ContextVar["FileIOTracker | None"] = (
|
|
9
|
+
contextvars.ContextVar("casher_file_tracker", default=None)
|
|
10
|
+
)
|
|
11
|
+
_hook_installed = False
|
|
12
|
+
|
|
13
|
+
_READ_MODES = {"r", "rb", "rt"}
|
|
14
|
+
_WRITE_MODES = {"w", "wb", "wt", "a", "ab", "at", "x", "xb", "xt"}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FileIOTracker:
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
self.read_files: list[Path] = []
|
|
20
|
+
self.write_files: list[Path] = []
|
|
21
|
+
self._seen_read: set[str] = set()
|
|
22
|
+
self._seen_write: set[str] = set()
|
|
23
|
+
|
|
24
|
+
def record(self, path: str, mode: str) -> None:
|
|
25
|
+
if mode in _WRITE_MODES:
|
|
26
|
+
if path not in self._seen_write:
|
|
27
|
+
self._seen_write.add(path)
|
|
28
|
+
self.write_files.append(Path(path))
|
|
29
|
+
elif mode in _READ_MODES:
|
|
30
|
+
if path not in self._seen_read:
|
|
31
|
+
self._seen_read.add(path)
|
|
32
|
+
self.read_files.append(Path(path))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _audit_hook(event: str, args: tuple) -> None:
|
|
36
|
+
if event != "open":
|
|
37
|
+
return
|
|
38
|
+
tracker = _active_tracker.get()
|
|
39
|
+
if tracker is None:
|
|
40
|
+
return
|
|
41
|
+
# args for "open" event: (path, mode, flags)
|
|
42
|
+
path = args[0]
|
|
43
|
+
mode = args[1]
|
|
44
|
+
if not isinstance(path, str) or not isinstance(mode, str):
|
|
45
|
+
return
|
|
46
|
+
tracker.record(path, mode)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _ensure_hook_installed() -> None:
|
|
50
|
+
global _hook_installed
|
|
51
|
+
if _hook_installed:
|
|
52
|
+
return
|
|
53
|
+
sys.addaudithook(_audit_hook)
|
|
54
|
+
_hook_installed = True
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@contextmanager
|
|
58
|
+
def track_file_io() -> Generator[FileIOTracker, None, None]:
|
|
59
|
+
_ensure_hook_installed()
|
|
60
|
+
tracker = FileIOTracker()
|
|
61
|
+
token = _active_tracker.set(tracker)
|
|
62
|
+
try:
|
|
63
|
+
yield tracker
|
|
64
|
+
finally:
|
|
65
|
+
_active_tracker.reset(token)
|
casher/auto_cli.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import inspect
|
|
3
|
+
import sys
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_TYPE_MAP = {
|
|
9
|
+
str: str,
|
|
10
|
+
int: int,
|
|
11
|
+
float: float,
|
|
12
|
+
Path: Path,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def run(func: Callable) -> None:
|
|
17
|
+
"""Generate CLI from function signature and run it."""
|
|
18
|
+
sig = inspect.signature(func)
|
|
19
|
+
parser = argparse.ArgumentParser(description=func.__doc__ or "")
|
|
20
|
+
|
|
21
|
+
for name, param in sig.parameters.items():
|
|
22
|
+
annotation = (
|
|
23
|
+
param.annotation if param.annotation != inspect.Parameter.empty else str
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if param.default is inspect.Parameter.empty:
|
|
27
|
+
# Positional argument
|
|
28
|
+
parser.add_argument(name, type=_TYPE_MAP.get(annotation, str))
|
|
29
|
+
else:
|
|
30
|
+
cli_name = f"--{name.replace('_', '-')}"
|
|
31
|
+
if annotation is bool:
|
|
32
|
+
if param.default is False:
|
|
33
|
+
parser.add_argument(cli_name, action="store_true", dest=name)
|
|
34
|
+
else:
|
|
35
|
+
parser.add_argument(cli_name, action="store_false", dest=name)
|
|
36
|
+
else:
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
cli_name,
|
|
39
|
+
type=_TYPE_MAP.get(annotation, str),
|
|
40
|
+
default=param.default,
|
|
41
|
+
dest=name,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
args = parser.parse_args()
|
|
45
|
+
result = func(**vars(args))
|
|
46
|
+
if result is None:
|
|
47
|
+
return
|
|
48
|
+
module = type(result).__module__
|
|
49
|
+
if module.startswith("polars") or module.startswith("pandas"):
|
|
50
|
+
sys.stdout.write(result.to_csv())
|
|
51
|
+
else:
|
|
52
|
+
print(result)
|
casher/capture.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from casher.config import OUTPUT_FILES_DIR
|
|
6
|
+
from casher.store import CacheEntry
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def replay_side_effects(entry: CacheEntry, entry_dir: Path) -> None:
|
|
10
|
+
"""Replay cached side effects:
|
|
11
|
+
- print stdout to sys.stdout
|
|
12
|
+
- print stderr to sys.stderr
|
|
13
|
+
- restore output files from cache to their original paths
|
|
14
|
+
"""
|
|
15
|
+
if entry.stdout:
|
|
16
|
+
sys.stdout.write(entry.stdout)
|
|
17
|
+
|
|
18
|
+
if entry.stderr:
|
|
19
|
+
sys.stderr.write(entry.stderr)
|
|
20
|
+
|
|
21
|
+
output_dir = entry_dir / OUTPUT_FILES_DIR
|
|
22
|
+
for rel_path in entry.output_files:
|
|
23
|
+
safe_path = rel_path.lstrip("/")
|
|
24
|
+
cached_path = output_dir / safe_path
|
|
25
|
+
if not cached_path.exists():
|
|
26
|
+
continue
|
|
27
|
+
target = Path(rel_path)
|
|
28
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
shutil.copy2(str(cached_path), str(target))
|
casher/config.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
# Environment variable names
|
|
6
|
+
CACHE_DIR_ENV_VAR = "CASHER_CACHE_DIR"
|
|
7
|
+
MAX_CACHE_BYTES_ENV_VAR = "CASHER_MAX_CACHE_BYTES"
|
|
8
|
+
|
|
9
|
+
# Defaults
|
|
10
|
+
_DEFAULT_CACHE_DIR_PATH = Path("~/.cache/casher").expanduser()
|
|
11
|
+
_DEFAULT_MAX_CACHE_BYTES = 32 * 1024 * 1024 * 1024 # 32 GB
|
|
12
|
+
|
|
13
|
+
DEFAULT_CACHE_DIR = Path(os.environ.get(CACHE_DIR_ENV_VAR, str(_DEFAULT_CACHE_DIR_PATH)))
|
|
14
|
+
DEFAULT_MAX_CACHE_BYTES = int(os.environ.get(MAX_CACHE_BYTES_ENV_VAR, str(_DEFAULT_MAX_CACHE_BYTES)))
|
|
15
|
+
|
|
16
|
+
HASH_ALGORITHM = "sha256"
|
|
17
|
+
META_FILENAME = "meta.json"
|
|
18
|
+
RESULT_FILENAME = "result.pkl"
|
|
19
|
+
STDOUT_FILENAME = "stdout.txt"
|
|
20
|
+
STDERR_FILENAME = "stderr.txt"
|
|
21
|
+
OUTPUT_FILES_DIR = "output_files"
|
|
22
|
+
|
|
23
|
+
# Platform support — caching only works on Linux (strace, audit hooks)
|
|
24
|
+
PLATFORM_SUPPORTED = sys.platform == "linux"
|
|
25
|
+
|
|
26
|
+
# strace filtering: paths matching these prefixes are NOT considered user file I/O
|
|
27
|
+
STRACE_IGNORE_PREFIXES = (
|
|
28
|
+
"/dev/",
|
|
29
|
+
"/proc/",
|
|
30
|
+
"/sys/",
|
|
31
|
+
"/tmp/casher-",
|
|
32
|
+
"/usr/lib/",
|
|
33
|
+
"/lib/",
|
|
34
|
+
"/usr/local/lib/",
|
|
35
|
+
"/usr/share/",
|
|
36
|
+
"/etc/",
|
|
37
|
+
)
|
|
38
|
+
STRACE_IGNORE_SUFFIXES = (".pyc", ".pyo", ".so", ".dll", ".pth")
|
|
39
|
+
STRACE_IGNORE_CONTAINS = ("__pycache__", "site-packages", "/python3.")
|
|
40
|
+
|
|
41
|
+
# Dependency tracking: paths to consider as "own code"
|
|
42
|
+
DEFAULT_DEP_ROOTS: list[Path] = []
|