chunk-memo 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ output/
2
+ __pycache__/
3
+ *.pyc
4
+ dist/
5
+ build/
6
+ .pytest_cache/
7
+ .ruff_cache/
8
+ .venv/
9
+ .session.vim
@@ -0,0 +1,16 @@
1
+ # Changelog
2
+
3
+ ## 0.2.0 (unreleased)
4
+ - **New**: Support callable `axis_values` for memory-efficient lazy loading
5
+ - Axis values can now be provided as callables instead of lists
6
+ - Enables lazy loading from databases, files, or APIs
7
+ - Supports index-based pattern: `lambda idx: data[idx]`
8
+ - Supports list-returning pattern: `lambda: full_list`
9
+ - Maintains full backward compatibility with existing list/tuple axis_values
10
+ - Added `_get_all_axis_values()` helper for internal value resolution
11
+ - Added `_make_axis_values_serializable()` for JSON serialization
12
+ - Stores both runtime accessors (`self._axis_values`) and serializable representation (`self._axis_values_serializable`)
13
+ - See `examples/callable_axis_values.py` for usage examples
14
+
15
+ ## 0.1.0
16
+ - Initial release.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Matthew Farrell
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.4
2
+ Name: chunk-memo
3
+ Version: 0.1.0
4
+ Summary: Memoization with chunked caching and parallelization.
5
+ Project-URL: Repository, https://github.com/msf235/chunk-memo
6
+ Author-email: Matthew Farrell <matthew.farrell.235@gmail.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 Matthew Farrell
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ License-File: LICENSE
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: Programming Language :: Python :: 3 :: Only
32
+ Classifier: Programming Language :: Python :: 3.10
33
+ Classifier: Programming Language :: Python :: 3.11
34
+ Classifier: Programming Language :: Python :: 3.12
35
+ Classifier: Programming Language :: Python :: 3.13
36
+ Requires-Python: >=3.10
37
+ Provides-Extra: bench
38
+ Requires-Dist: joblib>=1.5.3; extra == 'bench'
39
+ Provides-Extra: test
40
+ Requires-Dist: flaky>=3.8.1; extra == 'test'
41
+ Requires-Dist: pytest; extra == 'test'
42
+ Description-Content-Type: text/markdown
43
+
44
+ # chunk-memo
45
+
46
+ Chunked memoization for grid-style parameter sweeps. Define a parameter grid, cache
47
+ chunked outputs to disk, and reuse cached work across runs.
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ pip install chunk-memo
53
+ ```
54
+
55
+ ## Links
56
+
57
+ - Repository: https://github.com/msf235/chunk-memo
@@ -0,0 +1,14 @@
1
+ # chunk-memo
2
+
3
+ Chunked memoization for grid-style parameter sweeps. Define a parameter grid, cache
4
+ chunked outputs to disk, and reuse cached work across runs.
5
+
6
+ ## Install
7
+
8
+ ```bash
9
+ pip install chunk-memo
10
+ ```
11
+
12
+ ## Links
13
+
14
+ - Repository: https://github.com/msf235/chunk-memo
@@ -0,0 +1,34 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ from .cache import ChunkCache
4
+ from .memo import ChunkMemo
5
+ from .identity import params_to_cache_id
6
+ from .runner_protocol import CacheStatus, RunnerContext
7
+ from .runners import (
8
+ Diagnostics,
9
+ run,
10
+ run_parallel,
11
+ run_streaming,
12
+ )
13
+
14
+ auto_load = ChunkMemo.auto_load
15
+
16
+
17
+ try:
18
+ __version__ = version("chunk-memo")
19
+ except PackageNotFoundError:
20
+ __version__ = "unknown"
21
+
22
+ __all__ = [
23
+ "ChunkCache",
24
+ "ChunkMemo",
25
+ "Diagnostics",
26
+ "__version__",
27
+ "auto_load",
28
+ "run",
29
+ "run_parallel",
30
+ "run_streaming",
31
+ "RunnerContext",
32
+ "CacheStatus",
33
+ "params_to_cache_id",
34
+ ]
@@ -0,0 +1,188 @@
1
+ import math
2
+ import threading
3
+ import time
4
+ from dataclasses import dataclass
5
+ from typing import Any, Callable, Mapping, Sequence, Tuple
6
+
7
+ ChunkKey = Tuple[Tuple[str, Tuple[Any, ...]], ...]
8
+
9
+ _thread_local = threading.local()
10
+ PROGRESS_REPORT_DIVISOR = 50
11
+
12
+
13
+ @dataclass
14
+ class _ProgressTracker:
15
+ last_len: int = 0
16
+ last_msg: str = ""
17
+
18
+ def print_progress(self, message: str, *, final: bool) -> None:
19
+ if final:
20
+ print(message, end="\n", flush=True)
21
+ self.last_len = 0
22
+ self.last_msg = ""
23
+ return
24
+ pad = max(self.last_len - len(message), 0)
25
+ print(message + (" " * pad), end="\r", flush=True)
26
+ self.last_len = len(message)
27
+ self.last_msg = message
28
+
29
+ def print_detail(self, message: str) -> None:
30
+ if self.last_len:
31
+ print()
32
+ print(message)
33
+ if self.last_msg:
34
+ print(self.last_msg, end="\r", flush=True)
35
+ self.last_len = len(self.last_msg)
36
+
37
+
38
+ def format_axis_values(values: Any) -> str:
39
+ if isinstance(values, (list, tuple)):
40
+ if len(values) <= 4:
41
+ inner = ", ".join(repr(value) for value in values)
42
+ return f"[{inner}]"
43
+ head = ", ".join(repr(value) for value in values[:2])
44
+ tail = ", ".join(repr(value) for value in values[-2:])
45
+ return f"[{head}, ..., {tail}]"
46
+ return repr(values)
47
+
48
+
49
+ def format_cache_id(cache_id: str | None) -> list[str]:
50
+ lines = ["[ChunkCache] cache_id:"]
51
+ if not cache_id:
52
+ lines.append(" (none)")
53
+ return lines
54
+ lines.append(f" {cache_id}")
55
+ return lines
56
+
57
+
58
+ def format_params(params: Mapping[str, Any]) -> list[str]:
59
+ lines = ["[ChunkCache] params:"]
60
+ if not params:
61
+ lines.append(" (none)")
62
+ return lines
63
+ for key in sorted(params):
64
+ value = params[key]
65
+ lines.append(f" {key}={value!r}")
66
+ return lines
67
+
68
+
69
+ def format_spec(axis_values: Mapping[str, Any], axis_order: Sequence[str]) -> list[str]:
70
+ lines = ["[ChunkCache] spec:"]
71
+ for axis in axis_order:
72
+ values = axis_values.get(axis)
73
+ lines.append(f" {axis}={format_axis_values(values)}")
74
+ return lines
75
+
76
+
77
+ def format_eta(seconds: float) -> str:
78
+ if seconds < 0 or not math.isfinite(seconds):
79
+ return "--:--:--"
80
+ total = int(seconds)
81
+ hours = total // 3600
82
+ minutes = (total % 3600) // 60
83
+ secs = total % 60
84
+ return f"{hours:02d}:{minutes:02d}:{secs:02d}"
85
+
86
+
87
+ def format_rate_eta(
88
+ label: str,
89
+ processed: int,
90
+ total: int,
91
+ start_time: float,
92
+ *,
93
+ rate_processed: int | None = None,
94
+ rate_total: int | None = None,
95
+ ) -> str:
96
+ elapsed = max(time.monotonic() - start_time, 1e-6)
97
+ if rate_processed is None:
98
+ rate_processed = processed
99
+ if rate_total is None:
100
+ rate_total = total
101
+ rate = rate_processed / elapsed
102
+ percent = 100.0 if total <= 0 else (processed / total * 100.0)
103
+ remaining = 0 if rate_total <= 0 else max(rate_total - rate_processed, 0)
104
+ eta = remaining / rate if rate > 0 else float("inf")
105
+ return (
106
+ f"[ChunkCache] {label} {processed}/{total} "
107
+ f"({percent:0.1f}%) rate={rate:0.1f}/s ETA={format_eta(eta)}"
108
+ )
109
+
110
+
111
+ def print_progress(message: str, *, final: bool) -> None:
112
+ if not hasattr(_thread_local, "tracker"):
113
+ _thread_local.tracker = _ProgressTracker()
114
+ _thread_local.tracker.print_progress(message, final=final)
115
+
116
+
117
+ def print_detail(message: str) -> None:
118
+ if not hasattr(_thread_local, "tracker"):
119
+ _thread_local.tracker = _ProgressTracker()
120
+ _thread_local.tracker.print_detail(message)
121
+
122
+
123
+ def chunk_key_size(chunk_key: ChunkKey) -> int:
124
+ if not chunk_key:
125
+ return 0
126
+ return math.prod(len(values) for _, values in chunk_key)
127
+
128
+
129
+ def build_plan_lines(
130
+ cache_id: str | None,
131
+ params: Mapping[str, Any],
132
+ axis_values: Mapping[str, Any],
133
+ axis_order: Sequence[str],
134
+ cached_count: int,
135
+ execute_count: int,
136
+ ) -> list[str]:
137
+ lines: list[str] = []
138
+ lines.extend(format_cache_id(cache_id))
139
+ lines.extend(format_params(params))
140
+ lines.extend(format_spec(axis_values, axis_order))
141
+ lines.append(f"[ChunkCache] plan: cached={cached_count} execute={execute_count}")
142
+ return lines
143
+
144
+
145
+ def prepare_progress(
146
+ *,
147
+ total_chunks: int,
148
+ total_items: int,
149
+ verbose: int,
150
+ label: str = "planning",
151
+ ) -> tuple[Callable[[int, bool], None], Callable[[int], None]]:
152
+ progress_step = max(1, total_chunks // PROGRESS_REPORT_DIVISOR)
153
+ start_time = time.monotonic()
154
+ processed_items = 0
155
+
156
+ def update_processed(count: int) -> None:
157
+ nonlocal processed_items
158
+ processed_items += count
159
+
160
+ def report_progress(processed: int, final: bool = False) -> None:
161
+ if verbose != 1:
162
+ return
163
+ if not final and processed % progress_step != 0 and processed != total_chunks:
164
+ return
165
+ message = format_rate_eta(
166
+ label,
167
+ processed_items,
168
+ total_items,
169
+ start_time,
170
+ )
171
+ print_progress(message, final=final)
172
+
173
+ return report_progress, update_processed
174
+
175
+
176
+ def print_chunk_summary(
177
+ diagnostics: Any,
178
+ verbose: int,
179
+ ) -> None:
180
+ if verbose >= 2:
181
+ partial_chunks = getattr(diagnostics, "partial_chunks", 0)
182
+ partial_suffix = f" partial={partial_chunks}" if partial_chunks else ""
183
+ print_detail(
184
+ "[ChunkCache] summary "
185
+ f"cached={diagnostics.cached_chunks} "
186
+ f"executed={diagnostics.executed_chunks} "
187
+ f"total={diagnostics.total_chunks}" + partial_suffix
188
+ )