arrayfile 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ # WTFPL + Warranty
2
+
3
+ Licensed under the WTFPL with one additional clause:
4
+
5
+ 1. Don't blame me.
6
+
7
+ Do whatever the fuck you want, just don't blame me.
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: arrayfile
3
+ Version: 0.0.1
4
+ Summary: Arrays backed by disk
5
+ Keywords: logging,terminal,scrollback,indexing
6
+ Author-email: Gareth Davidson <gaz@bitplane.net>
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Topic :: System :: Logging
18
+ Classifier: Topic :: Terminals
19
+ License-File: LICENSE.md
20
+ Requires-Dist: pre-commit ; extra == "dev"
21
+ Requires-Dist: pytest ; extra == "dev"
22
+ Requires-Dist: coverage ; extra == "dev"
23
+ Requires-Dist: pytest-cov ; extra == "dev"
24
+ Requires-Dist: build ; extra == "dev"
25
+ Requires-Dist: twine ; extra == "dev"
26
+ Requires-Dist: ruff ; extra == "dev"
27
+ Requires-Dist: pydoc-markdown ; extra == "dev"
28
+ Provides-Extra: dev
29
+
30
+ # arrayfile
31
+
32
+ A file-backed numeric array using struct.pack. Does not support inserts or
33
+ slicing.
34
+
35
+ Smaller than relying on numpy though.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install arrayfile
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ### Temporary Array
46
+
47
+ This creates an array in your temp dir:
48
+
49
+ ```python
50
+ from arrayfile import Array
51
+
52
+ # Create a temporary array with float data
53
+ arr = Array('f')
54
+ arr.append(3.14)
55
+ arr.append(2.71)
56
+ arr.extend([1.41, 1.73])
57
+
58
+ print(f"Length: {len(arr)}")
59
+ print(f"Values: {[arr[i] for i in range(len(arr))]}")
60
+ arr.close() # Clean up resources
61
+ ```
62
+
63
+ ### Persistent Array
64
+
65
+ You can use the same file, if you want to persist your data across sessions:
66
+
67
+ ```python
68
+ from arrayfile import Array
69
+
70
+ # Create and populate an array file
71
+ arr = Array('i', 'numbers.array', 'w+b')
72
+ for i in range(1000):
73
+ arr.append(i * 2)
74
+ arr.close()
75
+
76
+ # Reopen the same file later
77
+ arr = Array('i', 'numbers.array', 'r+b')
78
+ print(f"Array has {len(arr)} elements")
79
+ print(f"First element: {arr[0]}")
80
+ print(f"Last element: {arr[-1]}")
81
+
82
+ # Add more data
83
+ arr.append(2000)
84
+ arr.close()
85
+ ```
86
+
87
+ ## Context manager
88
+
89
+ It has a finalizer in case you forget to call `close()`, but if you like to keep
90
+ your code tidy, you can use a context manager, like so:
91
+
92
+ ```python
93
+ from arrayfile import Array
94
+
95
+ # Using double precision floats with context manager
96
+ with Array('d', 'measurements.array', 'w+b') as arr:
97
+ arr.extend([3.141592653589793, 2.718281828459045, 1.4142135623730951])
98
+
99
+ print(f"Stored {len(arr)} precise measurements")
100
+ for i, value in enumerate(arr):
101
+ print(f" {i}: {value:.15f}")
102
+ ```
103
+
104
+
@@ -0,0 +1,74 @@
1
+ # arrayfile
2
+
3
+ A file-backed numeric array using struct.pack. Does not support inserts or
4
+ slicing.
5
+
6
+ Smaller than relying on numpy though.
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ pip install arrayfile
12
+ ```
13
+
14
+ ## Usage
15
+
16
+ ### Temporary Array
17
+
18
+ This creates an array in your temp dir:
19
+
20
+ ```python
21
+ from arrayfile import Array
22
+
23
+ # Create a temporary array with float data
24
+ arr = Array('f')
25
+ arr.append(3.14)
26
+ arr.append(2.71)
27
+ arr.extend([1.41, 1.73])
28
+
29
+ print(f"Length: {len(arr)}")
30
+ print(f"Values: {[arr[i] for i in range(len(arr))]}")
31
+ arr.close() # Clean up resources
32
+ ```
33
+
34
+ ### Persistent Array
35
+
36
+ You can use the same file, if you want to persist your data across sessions:
37
+
38
+ ```python
39
+ from arrayfile import Array
40
+
41
+ # Create and populate an array file
42
+ arr = Array('i', 'numbers.array', 'w+b')
43
+ for i in range(1000):
44
+ arr.append(i * 2)
45
+ arr.close()
46
+
47
+ # Reopen the same file later
48
+ arr = Array('i', 'numbers.array', 'r+b')
49
+ print(f"Array has {len(arr)} elements")
50
+ print(f"First element: {arr[0]}")
51
+ print(f"Last element: {arr[-1]}")
52
+
53
+ # Add more data
54
+ arr.append(2000)
55
+ arr.close()
56
+ ```
57
+
58
+ ## Context manager
59
+
60
+ It has a finalizer in case you forget to call `close()`, but if you like to keep
61
+ your code tidy, you can use a context manager, like so:
62
+
63
+ ```python
64
+ from arrayfile import Array
65
+
66
+ # Using double precision floats with context manager
67
+ with Array('d', 'measurements.array', 'w+b') as arr:
68
+ arr.extend([3.141592653589793, 2.718281828459045, 1.4142135623730951])
69
+
70
+ print(f"Stored {len(arr)} precise measurements")
71
+ for i, value in enumerate(arr):
72
+ print(f" {i}: {value:.15f}")
73
+ ```
74
+
@@ -0,0 +1,49 @@
1
+ [project]
2
+ name = "arrayfile"
3
+ description = "Arrays backed by disk"
4
+ version = "0.0.1"
5
+ authors = [
6
+ { name = "Gareth Davidson", email = "gaz@bitplane.net" }
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.10"
10
+ license = {text = "CC0"} # wtfpl
11
+ keywords = ["logging", "terminal", "scrollback", "indexing"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Operating System :: OS Independent",
21
+ "Topic :: System :: Logging",
22
+ "Topic :: Terminals",
23
+ ]
24
+
25
+ dependencies = [
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ dev = [
30
+ "pre-commit",
31
+ "pytest",
32
+ "coverage",
33
+ "pytest-cov",
34
+ "build",
35
+ "twine",
36
+ "ruff",
37
+ "pydoc-markdown"
38
+ ]
39
+
40
+ [build-system]
41
+ build-backend = "flit_core.buildapi"
42
+ requires = ["flit_core >=3.2,<4"]
43
+
44
+ [tool.ruff]
45
+ line-length = 120
46
+ target-version = "py310"
47
+
48
+ [tool.ruff.format]
49
+ docstring-code-format = true
@@ -0,0 +1,5 @@
1
+ """arrayfile - Memory-mapped array implementation for efficient file-backed arrays."""
2
+
3
+ from .array import Array
4
+
5
+ __all__ = ["Array"]
@@ -0,0 +1,286 @@
1
+ import mmap
2
+ import os
3
+ import struct
4
+ import tempfile
5
+ import threading
6
+ import weakref
7
+
8
+
9
+ class Array:
10
+ CHUNK_SIZE_BYTES = 4096
11
+
12
+ # Header constants
13
+ MAGIC = b"ARYF"
14
+ HEADER_VERSION = 1
15
+ HEADER_SIZE = 32
16
+ HEADER_FORMAT = (
17
+ "<4sHB8sIQ5x" # magic(4), version(2), dtype_len(1), dtype(8), element_size(4), length(8), reserved(5)
18
+ )
19
+
20
+ def __init__(self, dtype, filename=None, mode="r+b", initial_elements=0):
21
+ self._lock = threading.Lock()
22
+
23
+ if filename is None:
24
+ fd, filename = tempfile.mkstemp()
25
+ os.close(fd)
26
+ mode = "w+b" # Always create new temp files
27
+
28
+ self._filename = filename
29
+ self._dtype = dtype
30
+ self._dtype_format = dtype
31
+ self._element_size = struct.calcsize(dtype)
32
+ self._file = None
33
+ self._mmap = None
34
+ self._len = 0
35
+ self._capacity = 0
36
+ self._capacity_bytes = 0 # Initialize _capacity_bytes here
37
+ self._data_offset = self.HEADER_SIZE # All data starts after header
38
+
39
+ if "w" in mode or not os.path.exists(filename):
40
+ # Create or truncate file
41
+ self._file = open(filename, "w+b")
42
+ self._len = 0
43
+ self._allocate_capacity(initial_elements)
44
+ self._write_header()
45
+ else:
46
+ # Open existing file
47
+ self._file = open(filename, mode)
48
+ if not self._read_header():
49
+ raise ValueError("File does not have a valid array header")
50
+
51
+ current_file_size = os.fstat(self._file.fileno()).st_size
52
+ data_size = current_file_size - self.HEADER_SIZE
53
+
54
+ # Calculate capacity based on current data size and ensure chunk alignment
55
+ min_elements = (data_size + self._element_size - 1) // self._element_size
56
+ self._allocate_capacity(min_elements)
57
+
58
+ # Only mmap if the file has a non-zero size
59
+ if self._capacity_bytes > 0:
60
+ self._mmap = mmap.mmap(self._file.fileno(), 0)
61
+
62
+ # Set up finalizer to ensure cleanup even if close() isn't called
63
+ self._finalizer = weakref.finalize(self, self.close)
64
+
65
+ def __len__(self):
66
+ return self._len
67
+
68
+ def __iter__(self):
69
+ current_len = self._len
70
+ for i in range(current_len):
71
+ yield self[i]
72
+
73
+ def _validate_index(self, index):
74
+ """Validate and normalize an index, returning the normalized value."""
75
+ if not isinstance(index, int):
76
+ raise TypeError("Index must be an integer")
77
+
78
+ # Handle negative indices
79
+ if index < 0:
80
+ index = self._len + index
81
+
82
+ if not (0 <= index < self._len):
83
+ raise IndexError("Index out of bounds")
84
+
85
+ return index
86
+
87
+ def _pack_value(self, value):
88
+ """Pack a value into bytes according to the dtype format."""
89
+ try:
90
+ return struct.pack(self._dtype_format, value)
91
+ except struct.error as e:
92
+ raise TypeError(f"Value {value} cannot be packed as {self._dtype_format}: {e}")
93
+
94
+ def _write_header(self):
95
+ """Write header to the beginning of the file."""
96
+ dtype_bytes = self._dtype.encode("ascii")[:8] # Limit to 8 bytes
97
+ dtype_bytes = dtype_bytes.ljust(8, b"\x00") # Pad with nulls
98
+
99
+ header = struct.pack(
100
+ self.HEADER_FORMAT,
101
+ self.MAGIC,
102
+ self.HEADER_VERSION,
103
+ len(self._dtype),
104
+ dtype_bytes,
105
+ self._element_size,
106
+ self._len,
107
+ )
108
+
109
+ self._file.seek(0)
110
+ self._file.write(header)
111
+ self._file.flush()
112
+
113
+ def _read_header(self):
114
+ """Read and validate header from file. Returns True if valid header, False if no header."""
115
+ self._file.seek(0)
116
+ header_data = self._file.read(self.HEADER_SIZE)
117
+
118
+ if len(header_data) < self.HEADER_SIZE:
119
+ return False
120
+
121
+ magic, version, dtype_len, dtype_bytes, element_size, length = struct.unpack(self.HEADER_FORMAT, header_data)
122
+
123
+ if magic != self.MAGIC:
124
+ return False
125
+
126
+ if version != self.HEADER_VERSION:
127
+ raise ValueError(f"Unsupported header version: {version}")
128
+
129
+ # Extract dtype string
130
+ dtype = dtype_bytes[:dtype_len].decode("ascii")
131
+
132
+ # Validate dtype matches
133
+ if dtype != self._dtype:
134
+ raise ValueError(f"File dtype '{dtype}' does not match requested dtype '{self._dtype}'")
135
+
136
+ if element_size != self._element_size:
137
+ raise ValueError(f"File element size {element_size} does not match expected {self._element_size}")
138
+
139
+ self._len = length
140
+ return True
141
+
142
+ def __getitem__(self, index):
143
+ index = self._validate_index(index)
144
+
145
+ if not self._mmap:
146
+ raise RuntimeError("Array is not memory-mapped. This should not happen if len > 0.")
147
+
148
+ offset = self._data_offset + index * self._element_size
149
+ data = self._mmap[offset : offset + self._element_size]
150
+ return struct.unpack(self._dtype_format, data)[0]
151
+
152
+ def __setitem__(self, index, value):
153
+ index = self._validate_index(index)
154
+
155
+ with self._lock:
156
+ if not self._mmap:
157
+ raise RuntimeError("Array is not memory-mapped. This should not happen if len > 0.")
158
+
159
+ offset = self._data_offset + index * self._element_size
160
+ packed_value = self._pack_value(value)
161
+ self._mmap[offset : offset + self._element_size] = packed_value
162
+
163
+ def append(self, value):
164
+ with self._lock:
165
+ if self._len == self._capacity:
166
+ self._resize(self._len + 1)
167
+
168
+ offset = self._data_offset + self._len * self._element_size
169
+ packed_value = self._pack_value(value)
170
+
171
+ self._mmap[offset : offset + self._element_size] = packed_value
172
+ self._len += 1
173
+
174
+ def _allocate_capacity(self, min_elements):
175
+ """Allocate capacity for at least min_elements, rounded up to chunk boundary."""
176
+ bytes_needed = min_elements * self._element_size + self.HEADER_SIZE
177
+ chunks_needed = (bytes_needed + self.CHUNK_SIZE_BYTES - 1) // self.CHUNK_SIZE_BYTES
178
+ total_file_size = chunks_needed * self.CHUNK_SIZE_BYTES
179
+ self._capacity_bytes = total_file_size - self.HEADER_SIZE
180
+ self._capacity = self._capacity_bytes // self._element_size
181
+ self._file.truncate(total_file_size)
182
+
183
+ def _resize(self, min_new_len):
184
+ if self._mmap:
185
+ self._mmap.close()
186
+
187
+ self._allocate_capacity(min_new_len)
188
+ self._mmap = mmap.mmap(self._file.fileno(), 0)
189
+
190
+ def extend(self, iterable):
191
+ values = list(iterable)
192
+ num_new_elements = len(values)
193
+
194
+ if num_new_elements == 0:
195
+ return
196
+
197
+ with self._lock:
198
+ new_len = self._len + num_new_elements
199
+ if new_len > self._capacity:
200
+ self._resize(new_len)
201
+
202
+ # Batch write all values directly to mmap
203
+ offset = self._data_offset + self._len * self._element_size
204
+ for value in values:
205
+ packed_value = self._pack_value(value)
206
+ self._mmap[offset : offset + self._element_size] = packed_value
207
+ offset += self._element_size
208
+
209
+ self._len = new_len
210
+
211
+ def __contains__(self, value):
212
+ for i in range(self._len):
213
+ if self[i] == value:
214
+ return True
215
+ return False
216
+
217
+ def __iadd__(self, other):
218
+ if hasattr(other, "__iter__"):
219
+ self.extend(other)
220
+ return self
221
+ return NotImplemented
222
+
223
+ def __imul__(self, value):
224
+ if not isinstance(value, int) or value < 0:
225
+ return NotImplemented
226
+
227
+ with self._lock:
228
+ if value == 0:
229
+ self._len = 0
230
+ if self._mmap:
231
+ self._mmap.close()
232
+ self._mmap = None
233
+ if self._file:
234
+ self._file.truncate(0)
235
+ self._capacity = 0
236
+ self._capacity_bytes = 0
237
+ elif value > 1:
238
+ original_len = self._len
239
+ new_total_len = original_len * value
240
+
241
+ # Resize if needed
242
+ if new_total_len > self._capacity:
243
+ self._resize(new_total_len)
244
+
245
+ # Copy data in-place
246
+ src_offset = self._data_offset
247
+ dst_offset = self._data_offset + original_len * self._element_size
248
+
249
+ for _ in range(value - 1):
250
+ # Copy the original data chunk
251
+ chunk_size = original_len * self._element_size
252
+ self._mmap[dst_offset : dst_offset + chunk_size] = self._mmap[src_offset : src_offset + chunk_size]
253
+ dst_offset += chunk_size
254
+
255
+ self._len = new_total_len
256
+ return self
257
+
258
+ def flush(self):
259
+ if self._mmap:
260
+ self._mmap.flush()
261
+
262
+ def close(self):
263
+ if self._mmap:
264
+ # Ensure all writes are on disk before truncating
265
+ self._mmap.flush()
266
+ self._mmap.close()
267
+ self._mmap = None
268
+
269
+ if self._file:
270
+ # Update header with final length
271
+ self._write_header()
272
+
273
+ # Only truncate if the file was opened in a writable mode
274
+ # and if the current size is greater than the actual data length
275
+ current_file_size = os.fstat(self._file.fileno()).st_size
276
+ actual_total_size = self.HEADER_SIZE + self._len * self._element_size
277
+ if current_file_size > actual_total_size:
278
+ self._file.truncate(actual_total_size)
279
+ self._file.close()
280
+ self._file = None
281
+
282
+ def __enter__(self):
283
+ return self
284
+
285
+ def __exit__(self, exc_type, exc_val, exc_tb):
286
+ self.close()