chunk-memo 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunk_memo-0.1.0/.gitignore +9 -0
- chunk_memo-0.1.0/CHANGELOG.md +16 -0
- chunk_memo-0.1.0/LICENSE +21 -0
- chunk_memo-0.1.0/PKG-INFO +57 -0
- chunk_memo-0.1.0/README_PYPI.md +14 -0
- chunk_memo-0.1.0/chunk_memo/__init__.py +34 -0
- chunk_memo-0.1.0/chunk_memo/_format.py +188 -0
- chunk_memo-0.1.0/chunk_memo/cache.py +1204 -0
- chunk_memo-0.1.0/chunk_memo/cache_index.py +40 -0
- chunk_memo-0.1.0/chunk_memo/data_write_utils.py +60 -0
- chunk_memo-0.1.0/chunk_memo/identity.py +17 -0
- chunk_memo-0.1.0/chunk_memo/memo.py +448 -0
- chunk_memo-0.1.0/chunk_memo/runner_protocol.py +110 -0
- chunk_memo-0.1.0/chunk_memo/runners.py +473 -0
- chunk_memo-0.1.0/chunk_memo/runners_common.py +393 -0
- chunk_memo-0.1.0/chunk_memo/runners_parallel.py +1011 -0
- chunk_memo-0.1.0/pyproject.toml +46 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.2.0 (unreleased)
|
|
4
|
+
- **New**: Support callable `axis_values` for memory-efficient lazy loading
|
|
5
|
+
- Axis values can now be provided as callables instead of lists
|
|
6
|
+
- Enables lazy loading from databases, files, or APIs
|
|
7
|
+
- Supports index-based pattern: `lambda idx: data[idx]`
|
|
8
|
+
- Supports list-returning pattern: `lambda: full_list`
|
|
9
|
+
- Maintains full backward compatibility with existing list/tuple axis_values
|
|
10
|
+
- Added `_get_all_axis_values()` helper for internal value resolution
|
|
11
|
+
- Added `_make_axis_values_serializable()` for JSON serialization
|
|
12
|
+
- Stores both runtime accessors (`self._axis_values`) and serializable representation (`self._axis_values_serializable`)
|
|
13
|
+
- See `examples/callable_axis_values.py` for usage examples
|
|
14
|
+
|
|
15
|
+
## 0.1.0
|
|
16
|
+
- Initial release.
|
chunk_memo-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Matthew Farrell
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chunk-memo
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Memoization with chunked caching and parallelization.
|
|
5
|
+
Project-URL: Repository, https://github.com/msf235/chunk-memo
|
|
6
|
+
Author-email: Matthew Farrell <matthew.farrell.235@gmail.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2026 Matthew Farrell
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
30
|
+
Classifier: Programming Language :: Python :: 3
|
|
31
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
32
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
36
|
+
Requires-Python: >=3.10
|
|
37
|
+
Provides-Extra: bench
|
|
38
|
+
Requires-Dist: joblib>=1.5.3; extra == 'bench'
|
|
39
|
+
Provides-Extra: test
|
|
40
|
+
Requires-Dist: flaky>=3.8.1; extra == 'test'
|
|
41
|
+
Requires-Dist: pytest; extra == 'test'
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
|
|
44
|
+
# chunk-memo
|
|
45
|
+
|
|
46
|
+
Chunked memoization for grid-style parameter sweeps. Define a parameter grid, cache
|
|
47
|
+
chunked outputs to disk, and reuse cached work across runs.
|
|
48
|
+
|
|
49
|
+
## Install
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install chunk-memo
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Links
|
|
56
|
+
|
|
57
|
+
- Repository: https://github.com/msf235/chunk-memo
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# chunk-memo
|
|
2
|
+
|
|
3
|
+
Chunked memoization for grid-style parameter sweeps. Define a parameter grid, cache
|
|
4
|
+
chunked outputs to disk, and reuse cached work across runs.
|
|
5
|
+
|
|
6
|
+
## Install
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install chunk-memo
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Links
|
|
13
|
+
|
|
14
|
+
- Repository: https://github.com/msf235/chunk-memo
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
2
|
+
|
|
3
|
+
from .cache import ChunkCache
|
|
4
|
+
from .memo import ChunkMemo
|
|
5
|
+
from .identity import params_to_cache_id
|
|
6
|
+
from .runner_protocol import CacheStatus, RunnerContext
|
|
7
|
+
from .runners import (
|
|
8
|
+
Diagnostics,
|
|
9
|
+
run,
|
|
10
|
+
run_parallel,
|
|
11
|
+
run_streaming,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
auto_load = ChunkMemo.auto_load
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
__version__ = version("chunk-memo")
|
|
19
|
+
except PackageNotFoundError:
|
|
20
|
+
__version__ = "unknown"
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"ChunkCache",
|
|
24
|
+
"ChunkMemo",
|
|
25
|
+
"Diagnostics",
|
|
26
|
+
"__version__",
|
|
27
|
+
"auto_load",
|
|
28
|
+
"run",
|
|
29
|
+
"run_parallel",
|
|
30
|
+
"run_streaming",
|
|
31
|
+
"RunnerContext",
|
|
32
|
+
"CacheStatus",
|
|
33
|
+
"params_to_cache_id",
|
|
34
|
+
]
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Callable, Mapping, Sequence, Tuple
|
|
6
|
+
|
|
7
|
+
ChunkKey = Tuple[Tuple[str, Tuple[Any, ...]], ...]
|
|
8
|
+
|
|
9
|
+
_thread_local = threading.local()
|
|
10
|
+
PROGRESS_REPORT_DIVISOR = 50
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class _ProgressTracker:
|
|
15
|
+
last_len: int = 0
|
|
16
|
+
last_msg: str = ""
|
|
17
|
+
|
|
18
|
+
def print_progress(self, message: str, *, final: bool) -> None:
|
|
19
|
+
if final:
|
|
20
|
+
print(message, end="\n", flush=True)
|
|
21
|
+
self.last_len = 0
|
|
22
|
+
self.last_msg = ""
|
|
23
|
+
return
|
|
24
|
+
pad = max(self.last_len - len(message), 0)
|
|
25
|
+
print(message + (" " * pad), end="\r", flush=True)
|
|
26
|
+
self.last_len = len(message)
|
|
27
|
+
self.last_msg = message
|
|
28
|
+
|
|
29
|
+
def print_detail(self, message: str) -> None:
|
|
30
|
+
if self.last_len:
|
|
31
|
+
print()
|
|
32
|
+
print(message)
|
|
33
|
+
if self.last_msg:
|
|
34
|
+
print(self.last_msg, end="\r", flush=True)
|
|
35
|
+
self.last_len = len(self.last_msg)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def format_axis_values(values: Any) -> str:
|
|
39
|
+
if isinstance(values, (list, tuple)):
|
|
40
|
+
if len(values) <= 4:
|
|
41
|
+
inner = ", ".join(repr(value) for value in values)
|
|
42
|
+
return f"[{inner}]"
|
|
43
|
+
head = ", ".join(repr(value) for value in values[:2])
|
|
44
|
+
tail = ", ".join(repr(value) for value in values[-2:])
|
|
45
|
+
return f"[{head}, ..., {tail}]"
|
|
46
|
+
return repr(values)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def format_cache_id(cache_id: str | None) -> list[str]:
|
|
50
|
+
lines = ["[ChunkCache] cache_id:"]
|
|
51
|
+
if not cache_id:
|
|
52
|
+
lines.append(" (none)")
|
|
53
|
+
return lines
|
|
54
|
+
lines.append(f" {cache_id}")
|
|
55
|
+
return lines
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def format_params(params: Mapping[str, Any]) -> list[str]:
|
|
59
|
+
lines = ["[ChunkCache] params:"]
|
|
60
|
+
if not params:
|
|
61
|
+
lines.append(" (none)")
|
|
62
|
+
return lines
|
|
63
|
+
for key in sorted(params):
|
|
64
|
+
value = params[key]
|
|
65
|
+
lines.append(f" {key}={value!r}")
|
|
66
|
+
return lines
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def format_spec(axis_values: Mapping[str, Any], axis_order: Sequence[str]) -> list[str]:
|
|
70
|
+
lines = ["[ChunkCache] spec:"]
|
|
71
|
+
for axis in axis_order:
|
|
72
|
+
values = axis_values.get(axis)
|
|
73
|
+
lines.append(f" {axis}={format_axis_values(values)}")
|
|
74
|
+
return lines
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def format_eta(seconds: float) -> str:
|
|
78
|
+
if seconds < 0 or not math.isfinite(seconds):
|
|
79
|
+
return "--:--:--"
|
|
80
|
+
total = int(seconds)
|
|
81
|
+
hours = total // 3600
|
|
82
|
+
minutes = (total % 3600) // 60
|
|
83
|
+
secs = total % 60
|
|
84
|
+
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def format_rate_eta(
|
|
88
|
+
label: str,
|
|
89
|
+
processed: int,
|
|
90
|
+
total: int,
|
|
91
|
+
start_time: float,
|
|
92
|
+
*,
|
|
93
|
+
rate_processed: int | None = None,
|
|
94
|
+
rate_total: int | None = None,
|
|
95
|
+
) -> str:
|
|
96
|
+
elapsed = max(time.monotonic() - start_time, 1e-6)
|
|
97
|
+
if rate_processed is None:
|
|
98
|
+
rate_processed = processed
|
|
99
|
+
if rate_total is None:
|
|
100
|
+
rate_total = total
|
|
101
|
+
rate = rate_processed / elapsed
|
|
102
|
+
percent = 100.0 if total <= 0 else (processed / total * 100.0)
|
|
103
|
+
remaining = 0 if rate_total <= 0 else max(rate_total - rate_processed, 0)
|
|
104
|
+
eta = remaining / rate if rate > 0 else float("inf")
|
|
105
|
+
return (
|
|
106
|
+
f"[ChunkCache] {label} {processed}/{total} "
|
|
107
|
+
f"({percent:0.1f}%) rate={rate:0.1f}/s ETA={format_eta(eta)}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def print_progress(message: str, *, final: bool) -> None:
|
|
112
|
+
if not hasattr(_thread_local, "tracker"):
|
|
113
|
+
_thread_local.tracker = _ProgressTracker()
|
|
114
|
+
_thread_local.tracker.print_progress(message, final=final)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def print_detail(message: str) -> None:
|
|
118
|
+
if not hasattr(_thread_local, "tracker"):
|
|
119
|
+
_thread_local.tracker = _ProgressTracker()
|
|
120
|
+
_thread_local.tracker.print_detail(message)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def chunk_key_size(chunk_key: ChunkKey) -> int:
|
|
124
|
+
if not chunk_key:
|
|
125
|
+
return 0
|
|
126
|
+
return math.prod(len(values) for _, values in chunk_key)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def build_plan_lines(
|
|
130
|
+
cache_id: str | None,
|
|
131
|
+
params: Mapping[str, Any],
|
|
132
|
+
axis_values: Mapping[str, Any],
|
|
133
|
+
axis_order: Sequence[str],
|
|
134
|
+
cached_count: int,
|
|
135
|
+
execute_count: int,
|
|
136
|
+
) -> list[str]:
|
|
137
|
+
lines: list[str] = []
|
|
138
|
+
lines.extend(format_cache_id(cache_id))
|
|
139
|
+
lines.extend(format_params(params))
|
|
140
|
+
lines.extend(format_spec(axis_values, axis_order))
|
|
141
|
+
lines.append(f"[ChunkCache] plan: cached={cached_count} execute={execute_count}")
|
|
142
|
+
return lines
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def prepare_progress(
|
|
146
|
+
*,
|
|
147
|
+
total_chunks: int,
|
|
148
|
+
total_items: int,
|
|
149
|
+
verbose: int,
|
|
150
|
+
label: str = "planning",
|
|
151
|
+
) -> tuple[Callable[[int, bool], None], Callable[[int], None]]:
|
|
152
|
+
progress_step = max(1, total_chunks // PROGRESS_REPORT_DIVISOR)
|
|
153
|
+
start_time = time.monotonic()
|
|
154
|
+
processed_items = 0
|
|
155
|
+
|
|
156
|
+
def update_processed(count: int) -> None:
|
|
157
|
+
nonlocal processed_items
|
|
158
|
+
processed_items += count
|
|
159
|
+
|
|
160
|
+
def report_progress(processed: int, final: bool = False) -> None:
|
|
161
|
+
if verbose != 1:
|
|
162
|
+
return
|
|
163
|
+
if not final and processed % progress_step != 0 and processed != total_chunks:
|
|
164
|
+
return
|
|
165
|
+
message = format_rate_eta(
|
|
166
|
+
label,
|
|
167
|
+
processed_items,
|
|
168
|
+
total_items,
|
|
169
|
+
start_time,
|
|
170
|
+
)
|
|
171
|
+
print_progress(message, final=final)
|
|
172
|
+
|
|
173
|
+
return report_progress, update_processed
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def print_chunk_summary(
|
|
177
|
+
diagnostics: Any,
|
|
178
|
+
verbose: int,
|
|
179
|
+
) -> None:
|
|
180
|
+
if verbose >= 2:
|
|
181
|
+
partial_chunks = getattr(diagnostics, "partial_chunks", 0)
|
|
182
|
+
partial_suffix = f" partial={partial_chunks}" if partial_chunks else ""
|
|
183
|
+
print_detail(
|
|
184
|
+
"[ChunkCache] summary "
|
|
185
|
+
f"cached={diagnostics.cached_chunks} "
|
|
186
|
+
f"executed={diagnostics.executed_chunks} "
|
|
187
|
+
f"total={diagnostics.total_chunks}" + partial_suffix
|
|
188
|
+
)
|