pycasher 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
casher/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ # casher — Python function result caching with side-effect capture
2
+
3
+ from casher.decorator import cached
4
+
5
+ __all__ = ["cached"]
casher/_runner.py ADDED
@@ -0,0 +1,70 @@
1
+ """Subprocess entry point for casher.
2
+
3
+ Invoked as: python -m casher._runner <work_dir>
4
+
5
+ Protocol:
6
+ - Reads args.pkl from work_dir (dict with module, function, args, kwargs)
7
+ - Imports the module, gets the function (unwraps @cached if needed)
8
+ - Executes the function
9
+ - Writes result.pkl, status.json, and exception.pkl (on failure) to work_dir
10
+ """
11
+
12
+ import importlib
13
+ import json
14
+ import pickle
15
+ import sys
16
+ from pathlib import Path
17
+
18
+
19
+ def _unwrap_cached(fn):
20
+ """If fn is wrapped by @cached, return the raw function."""
21
+ return getattr(fn, "__wrapped__", fn)
22
+
23
+
24
+ def _resolve_function(module_name: str, qualname: str):
25
+ """Import module and resolve the function by qualname."""
26
+ mod = importlib.import_module(module_name)
27
+ obj = mod
28
+ for attr in qualname.split("."):
29
+ obj = getattr(obj, attr)
30
+ return _unwrap_cached(obj)
31
+
32
+
33
+ def run(work_dir: Path) -> None:
34
+ work_dir = Path(work_dir)
35
+
36
+ # Read args
37
+ with open(work_dir / "args.pkl", "rb") as f:
38
+ payload = pickle.load(f) # noqa: S301
39
+
40
+ module_name = payload["module"]
41
+ func_name = payload["function"]
42
+ args = payload["args"]
43
+ kwargs = payload["kwargs"]
44
+
45
+ try:
46
+ fn = _resolve_function(module_name, func_name)
47
+ result = fn(*args, **kwargs)
48
+
49
+ # Serialize result
50
+ with open(work_dir / "result.pkl", "wb") as f:
51
+ pickle.dump(result, f)
52
+
53
+ (work_dir / "status.json").write_text(json.dumps({"success": True}))
54
+
55
+ except Exception as exc:
56
+ (work_dir / "status.json").write_text(json.dumps({"success": False}))
57
+ try:
58
+ with open(work_dir / "exception.pkl", "wb") as f:
59
+ pickle.dump(exc, f)
60
+ except Exception:
61
+ # Fallback for unpicklable exceptions
62
+ with open(work_dir / "exception.pkl", "wb") as f:
63
+ pickle.dump(RuntimeError(repr(exc)), f)
64
+
65
+
66
+ if __name__ == "__main__":
67
+ assert (
68
+ len(sys.argv) == 2
69
+ ), f"Usage: python -m casher._runner <work_dir>, got {sys.argv}"
70
+ run(Path(sys.argv[1]))
casher/acache.py ADDED
@@ -0,0 +1,138 @@
1
+ import argparse
2
+ import hashlib
3
+ import shutil
4
+ import sys
5
+ import time
6
+ from pathlib import Path
7
+
8
+ from loguru import logger
9
+
10
+ from casher.config import (
11
+ DEFAULT_CACHE_DIR,
12
+ DEFAULT_MAX_CACHE_BYTES,
13
+ HASH_ALGORITHM,
14
+ OUTPUT_FILES_DIR,
15
+ )
16
+ from casher.hasher import compute_file_hash
17
+ from casher.store import CacheEntry, find_cached, store_result
18
+ from casher.strace import is_strace_available, run_with_strace
19
+
20
+
21
+ def _compute_command_partial_key(cmd: list[str]) -> str:
22
+ h = hashlib.new(HASH_ALGORITHM)
23
+ h.update(" ".join(cmd).encode())
24
+ return h.hexdigest()
25
+
26
+
27
+ def main() -> None:
28
+ parser = argparse.ArgumentParser(
29
+ description="Cache results of arbitrary CLI programs using strace.",
30
+ usage="acache [--cache-dir DIR] [--max-cache-bytes N] -- command...",
31
+ )
32
+ parser.add_argument("--cache-dir", type=Path, default=None)
33
+ parser.add_argument("--max-cache-bytes", type=int, default=None)
34
+
35
+ # Split on -- to separate acache args from the command
36
+ argv = sys.argv[1:]
37
+ if "--" in argv:
38
+ split_idx = argv.index("--")
39
+ acache_args = argv[:split_idx]
40
+ cmd = argv[split_idx + 1 :]
41
+ else:
42
+ acache_args = []
43
+ cmd = argv
44
+
45
+ if not cmd:
46
+ raise SystemExit("No command provided. Usage: acache [options] -- command...")
47
+
48
+ args = parser.parse_args(acache_args)
49
+ cache_dir = args.cache_dir or DEFAULT_CACHE_DIR
50
+ max_bytes = (
51
+ args.max_cache_bytes
52
+ if args.max_cache_bytes is not None
53
+ else DEFAULT_MAX_CACHE_BYTES
54
+ )
55
+
56
+ logger.info("acache: cmd={} | cache_dir={}", " ".join(cmd), cache_dir)
57
+
58
+ if not is_strace_available():
59
+ raise SystemExit(
60
+ "strace is required for acache. "
61
+ "Install it (e.g., 'sudo pacman -S strace') or use the Python decorator API."
62
+ )
63
+
64
+ partial_key = _compute_command_partial_key(cmd)
65
+
66
+ # Phase 1: cache lookup
67
+ cached_result = find_cached(cache_dir, partial_key)
68
+ if cached_result is not None:
69
+ entry, entry_dir = cached_result
70
+ logger.info("acache cache HIT: key={}…", partial_key[:12])
71
+
72
+ # Replay stdout/stderr
73
+ if entry.stdout:
74
+ sys.stdout.write(entry.stdout)
75
+ if entry.stderr:
76
+ sys.stderr.write(entry.stderr)
77
+
78
+ # Restore output files
79
+ output_dir = entry_dir / OUTPUT_FILES_DIR
80
+ for rel_path in entry.output_files:
81
+ safe_path = rel_path.lstrip("/")
82
+ cached_path = output_dir / safe_path
83
+ if not cached_path.exists():
84
+ continue
85
+ target = Path(rel_path)
86
+ target.parent.mkdir(parents=True, exist_ok=True)
87
+ shutil.copy2(str(cached_path), str(target))
88
+
89
+ sys.exit(int(entry.return_value))
90
+
91
+ # Phase 2: trace and store
92
+ logger.info("acache cache MISS: key={}… | tracing with strace", partial_key[:12])
93
+ strace_result = run_with_strace(cmd)
94
+
95
+ # Classify files
96
+ read_files = [p for p in strace_result.read_files if p.is_file()]
97
+ write_files = [p for p in strace_result.write_files if p.is_file()]
98
+
99
+ # Compute file hash from read files
100
+ if read_files:
101
+ file_hash = compute_file_hash(read_files)
102
+ else:
103
+ file_hash = "none"
104
+
105
+ # Build input_files dict
106
+ input_files_dict: dict[str, str] = {}
107
+ for p in read_files:
108
+ input_files_dict[str(p)] = compute_file_hash([p])
109
+
110
+ # Build output_files dict
111
+ output_files_dict: dict[str, Path] = {}
112
+ for p in write_files:
113
+ output_files_dict[str(p)] = p
114
+
115
+ entry = CacheEntry(
116
+ return_value=strace_result.returncode,
117
+ stdout=strace_result.stdout,
118
+ stderr=strace_result.stderr,
119
+ output_files=output_files_dict,
120
+ input_files=input_files_dict,
121
+ created_at=time.time(),
122
+ func_module="acache",
123
+ func_name=" ".join(cmd),
124
+ )
125
+
126
+ store_result(cache_dir, partial_key, file_hash, entry, max_bytes=max_bytes)
127
+
128
+ # Output stdout/stderr
129
+ if strace_result.stdout:
130
+ sys.stdout.write(strace_result.stdout)
131
+ if strace_result.stderr:
132
+ sys.stderr.write(strace_result.stderr)
133
+
134
+ sys.exit(strace_result.returncode)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ main()
casher/audit_hook.py ADDED
@@ -0,0 +1,65 @@
1
+ import contextvars
2
+ import sys
3
+ from collections.abc import Generator
4
+ from contextlib import contextmanager
5
+ from pathlib import Path
6
+
7
+
8
+ _active_tracker: contextvars.ContextVar["FileIOTracker | None"] = (
9
+ contextvars.ContextVar("casher_file_tracker", default=None)
10
+ )
11
+ _hook_installed = False
12
+
13
+ _READ_MODES = {"r", "rb", "rt"}
14
+ _WRITE_MODES = {"w", "wb", "wt", "a", "ab", "at", "x", "xb", "xt"}
15
+
16
+
17
+ class FileIOTracker:
18
+ def __init__(self) -> None:
19
+ self.read_files: list[Path] = []
20
+ self.write_files: list[Path] = []
21
+ self._seen_read: set[str] = set()
22
+ self._seen_write: set[str] = set()
23
+
24
+ def record(self, path: str, mode: str) -> None:
25
+ if mode in _WRITE_MODES:
26
+ if path not in self._seen_write:
27
+ self._seen_write.add(path)
28
+ self.write_files.append(Path(path))
29
+ elif mode in _READ_MODES:
30
+ if path not in self._seen_read:
31
+ self._seen_read.add(path)
32
+ self.read_files.append(Path(path))
33
+
34
+
35
+ def _audit_hook(event: str, args: tuple) -> None:
36
+ if event != "open":
37
+ return
38
+ tracker = _active_tracker.get()
39
+ if tracker is None:
40
+ return
41
+ # args for "open" event: (path, mode, flags)
42
+ path = args[0]
43
+ mode = args[1]
44
+ if not isinstance(path, str) or not isinstance(mode, str):
45
+ return
46
+ tracker.record(path, mode)
47
+
48
+
49
+ def _ensure_hook_installed() -> None:
50
+ global _hook_installed
51
+ if _hook_installed:
52
+ return
53
+ sys.addaudithook(_audit_hook)
54
+ _hook_installed = True
55
+
56
+
57
+ @contextmanager
58
+ def track_file_io() -> Generator[FileIOTracker, None, None]:
59
+ _ensure_hook_installed()
60
+ tracker = FileIOTracker()
61
+ token = _active_tracker.set(tracker)
62
+ try:
63
+ yield tracker
64
+ finally:
65
+ _active_tracker.reset(token)
casher/auto_cli.py ADDED
@@ -0,0 +1,52 @@
1
+ import argparse
2
+ import inspect
3
+ import sys
4
+ from collections.abc import Callable
5
+ from pathlib import Path
6
+
7
+
8
+ _TYPE_MAP = {
9
+ str: str,
10
+ int: int,
11
+ float: float,
12
+ Path: Path,
13
+ }
14
+
15
+
16
+ def run(func: Callable) -> None:
17
+ """Generate CLI from function signature and run it."""
18
+ sig = inspect.signature(func)
19
+ parser = argparse.ArgumentParser(description=func.__doc__ or "")
20
+
21
+ for name, param in sig.parameters.items():
22
+ annotation = (
23
+ param.annotation if param.annotation != inspect.Parameter.empty else str
24
+ )
25
+
26
+ if param.default is inspect.Parameter.empty:
27
+ # Positional argument
28
+ parser.add_argument(name, type=_TYPE_MAP.get(annotation, str))
29
+ else:
30
+ cli_name = f"--{name.replace('_', '-')}"
31
+ if annotation is bool:
32
+ if param.default is False:
33
+ parser.add_argument(cli_name, action="store_true", dest=name)
34
+ else:
35
+ parser.add_argument(cli_name, action="store_false", dest=name)
36
+ else:
37
+ parser.add_argument(
38
+ cli_name,
39
+ type=_TYPE_MAP.get(annotation, str),
40
+ default=param.default,
41
+ dest=name,
42
+ )
43
+
44
+ args = parser.parse_args()
45
+ result = func(**vars(args))
46
+ if result is None:
47
+ return
48
+ module = type(result).__module__
49
+ if module.startswith("polars") or module.startswith("pandas"):
50
+ sys.stdout.write(result.to_csv())
51
+ else:
52
+ print(result)
casher/capture.py ADDED
@@ -0,0 +1,29 @@
1
+ import shutil
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from casher.config import OUTPUT_FILES_DIR
6
+ from casher.store import CacheEntry
7
+
8
+
9
+ def replay_side_effects(entry: CacheEntry, entry_dir: Path) -> None:
10
+ """Replay cached side effects:
11
+ - print stdout to sys.stdout
12
+ - print stderr to sys.stderr
13
+ - restore output files from cache to their original paths
14
+ """
15
+ if entry.stdout:
16
+ sys.stdout.write(entry.stdout)
17
+
18
+ if entry.stderr:
19
+ sys.stderr.write(entry.stderr)
20
+
21
+ output_dir = entry_dir / OUTPUT_FILES_DIR
22
+ for rel_path in entry.output_files:
23
+ safe_path = rel_path.lstrip("/")
24
+ cached_path = output_dir / safe_path
25
+ if not cached_path.exists():
26
+ continue
27
+ target = Path(rel_path)
28
+ target.parent.mkdir(parents=True, exist_ok=True)
29
+ shutil.copy2(str(cached_path), str(target))
casher/config.py ADDED
@@ -0,0 +1,42 @@
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ # Environment variable names
6
+ CACHE_DIR_ENV_VAR = "CASHER_CACHE_DIR"
7
+ MAX_CACHE_BYTES_ENV_VAR = "CASHER_MAX_CACHE_BYTES"
8
+
9
+ # Defaults
10
+ _DEFAULT_CACHE_DIR_PATH = Path("~/.cache/casher").expanduser()
11
+ _DEFAULT_MAX_CACHE_BYTES = 32 * 1024 * 1024 * 1024 # 32 GB
12
+
13
+ DEFAULT_CACHE_DIR = Path(os.environ.get(CACHE_DIR_ENV_VAR, str(_DEFAULT_CACHE_DIR_PATH)))
14
+ DEFAULT_MAX_CACHE_BYTES = int(os.environ.get(MAX_CACHE_BYTES_ENV_VAR, str(_DEFAULT_MAX_CACHE_BYTES)))
15
+
16
+ HASH_ALGORITHM = "sha256"
17
+ META_FILENAME = "meta.json"
18
+ RESULT_FILENAME = "result.pkl"
19
+ STDOUT_FILENAME = "stdout.txt"
20
+ STDERR_FILENAME = "stderr.txt"
21
+ OUTPUT_FILES_DIR = "output_files"
22
+
23
+ # Platform support — caching only works on Linux (strace, audit hooks)
24
+ PLATFORM_SUPPORTED = sys.platform == "linux"
25
+
26
+ # strace filtering: paths matching these prefixes are NOT considered user file I/O
27
+ STRACE_IGNORE_PREFIXES = (
28
+ "/dev/",
29
+ "/proc/",
30
+ "/sys/",
31
+ "/tmp/casher-",
32
+ "/usr/lib/",
33
+ "/lib/",
34
+ "/usr/local/lib/",
35
+ "/usr/share/",
36
+ "/etc/",
37
+ )
38
+ STRACE_IGNORE_SUFFIXES = (".pyc", ".pyo", ".so", ".dll", ".pth")
39
+ STRACE_IGNORE_CONTAINS = ("__pycache__", "site-packages", "/python3.")
40
+
41
+ # Dependency tracking: paths to consider as "own code"
42
+ DEFAULT_DEP_ROOTS: list[Path] = []