arrayfile 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arrayfile/__init__.py
ADDED
arrayfile/array.py
ADDED
@@ -0,0 +1,286 @@
|
|
1
|
+
import mmap
|
2
|
+
import os
|
3
|
+
import struct
|
4
|
+
import tempfile
|
5
|
+
import threading
|
6
|
+
import weakref
|
7
|
+
|
8
|
+
|
9
|
+
class Array:
|
10
|
+
CHUNK_SIZE_BYTES = 4096
|
11
|
+
|
12
|
+
# Header constants
|
13
|
+
MAGIC = b"ARYF"
|
14
|
+
HEADER_VERSION = 1
|
15
|
+
HEADER_SIZE = 32
|
16
|
+
HEADER_FORMAT = (
|
17
|
+
"<4sHB8sIQ5x" # magic(4), version(2), dtype_len(1), dtype(8), element_size(4), length(8), reserved(5)
|
18
|
+
)
|
19
|
+
|
20
|
+
def __init__(self, dtype, filename=None, mode="r+b", initial_elements=0):
|
21
|
+
self._lock = threading.Lock()
|
22
|
+
|
23
|
+
if filename is None:
|
24
|
+
fd, filename = tempfile.mkstemp()
|
25
|
+
os.close(fd)
|
26
|
+
mode = "w+b" # Always create new temp files
|
27
|
+
|
28
|
+
self._filename = filename
|
29
|
+
self._dtype = dtype
|
30
|
+
self._dtype_format = dtype
|
31
|
+
self._element_size = struct.calcsize(dtype)
|
32
|
+
self._file = None
|
33
|
+
self._mmap = None
|
34
|
+
self._len = 0
|
35
|
+
self._capacity = 0
|
36
|
+
self._capacity_bytes = 0 # Initialize _capacity_bytes here
|
37
|
+
self._data_offset = self.HEADER_SIZE # All data starts after header
|
38
|
+
|
39
|
+
if "w" in mode or not os.path.exists(filename):
|
40
|
+
# Create or truncate file
|
41
|
+
self._file = open(filename, "w+b")
|
42
|
+
self._len = 0
|
43
|
+
self._allocate_capacity(initial_elements)
|
44
|
+
self._write_header()
|
45
|
+
else:
|
46
|
+
# Open existing file
|
47
|
+
self._file = open(filename, mode)
|
48
|
+
if not self._read_header():
|
49
|
+
raise ValueError("File does not have a valid array header")
|
50
|
+
|
51
|
+
current_file_size = os.fstat(self._file.fileno()).st_size
|
52
|
+
data_size = current_file_size - self.HEADER_SIZE
|
53
|
+
|
54
|
+
# Calculate capacity based on current data size and ensure chunk alignment
|
55
|
+
min_elements = (data_size + self._element_size - 1) // self._element_size
|
56
|
+
self._allocate_capacity(min_elements)
|
57
|
+
|
58
|
+
# Only mmap if the file has a non-zero size
|
59
|
+
if self._capacity_bytes > 0:
|
60
|
+
self._mmap = mmap.mmap(self._file.fileno(), 0)
|
61
|
+
|
62
|
+
# Set up finalizer to ensure cleanup even if close() isn't called
|
63
|
+
self._finalizer = weakref.finalize(self, self.close)
|
64
|
+
|
65
|
+
def __len__(self):
|
66
|
+
return self._len
|
67
|
+
|
68
|
+
def __iter__(self):
|
69
|
+
current_len = self._len
|
70
|
+
for i in range(current_len):
|
71
|
+
yield self[i]
|
72
|
+
|
73
|
+
def _validate_index(self, index):
|
74
|
+
"""Validate and normalize an index, returning the normalized value."""
|
75
|
+
if not isinstance(index, int):
|
76
|
+
raise TypeError("Index must be an integer")
|
77
|
+
|
78
|
+
# Handle negative indices
|
79
|
+
if index < 0:
|
80
|
+
index = self._len + index
|
81
|
+
|
82
|
+
if not (0 <= index < self._len):
|
83
|
+
raise IndexError("Index out of bounds")
|
84
|
+
|
85
|
+
return index
|
86
|
+
|
87
|
+
def _pack_value(self, value):
|
88
|
+
"""Pack a value into bytes according to the dtype format."""
|
89
|
+
try:
|
90
|
+
return struct.pack(self._dtype_format, value)
|
91
|
+
except struct.error as e:
|
92
|
+
raise TypeError(f"Value {value} cannot be packed as {self._dtype_format}: {e}")
|
93
|
+
|
94
|
+
def _write_header(self):
|
95
|
+
"""Write header to the beginning of the file."""
|
96
|
+
dtype_bytes = self._dtype.encode("ascii")[:8] # Limit to 8 bytes
|
97
|
+
dtype_bytes = dtype_bytes.ljust(8, b"\x00") # Pad with nulls
|
98
|
+
|
99
|
+
header = struct.pack(
|
100
|
+
self.HEADER_FORMAT,
|
101
|
+
self.MAGIC,
|
102
|
+
self.HEADER_VERSION,
|
103
|
+
len(self._dtype),
|
104
|
+
dtype_bytes,
|
105
|
+
self._element_size,
|
106
|
+
self._len,
|
107
|
+
)
|
108
|
+
|
109
|
+
self._file.seek(0)
|
110
|
+
self._file.write(header)
|
111
|
+
self._file.flush()
|
112
|
+
|
113
|
+
def _read_header(self):
|
114
|
+
"""Read and validate header from file. Returns True if valid header, False if no header."""
|
115
|
+
self._file.seek(0)
|
116
|
+
header_data = self._file.read(self.HEADER_SIZE)
|
117
|
+
|
118
|
+
if len(header_data) < self.HEADER_SIZE:
|
119
|
+
return False
|
120
|
+
|
121
|
+
magic, version, dtype_len, dtype_bytes, element_size, length = struct.unpack(self.HEADER_FORMAT, header_data)
|
122
|
+
|
123
|
+
if magic != self.MAGIC:
|
124
|
+
return False
|
125
|
+
|
126
|
+
if version != self.HEADER_VERSION:
|
127
|
+
raise ValueError(f"Unsupported header version: {version}")
|
128
|
+
|
129
|
+
# Extract dtype string
|
130
|
+
dtype = dtype_bytes[:dtype_len].decode("ascii")
|
131
|
+
|
132
|
+
# Validate dtype matches
|
133
|
+
if dtype != self._dtype:
|
134
|
+
raise ValueError(f"File dtype '{dtype}' does not match requested dtype '{self._dtype}'")
|
135
|
+
|
136
|
+
if element_size != self._element_size:
|
137
|
+
raise ValueError(f"File element size {element_size} does not match expected {self._element_size}")
|
138
|
+
|
139
|
+
self._len = length
|
140
|
+
return True
|
141
|
+
|
142
|
+
def __getitem__(self, index):
|
143
|
+
index = self._validate_index(index)
|
144
|
+
|
145
|
+
if not self._mmap:
|
146
|
+
raise RuntimeError("Array is not memory-mapped. This should not happen if len > 0.")
|
147
|
+
|
148
|
+
offset = self._data_offset + index * self._element_size
|
149
|
+
data = self._mmap[offset : offset + self._element_size]
|
150
|
+
return struct.unpack(self._dtype_format, data)[0]
|
151
|
+
|
152
|
+
def __setitem__(self, index, value):
|
153
|
+
index = self._validate_index(index)
|
154
|
+
|
155
|
+
with self._lock:
|
156
|
+
if not self._mmap:
|
157
|
+
raise RuntimeError("Array is not memory-mapped. This should not happen if len > 0.")
|
158
|
+
|
159
|
+
offset = self._data_offset + index * self._element_size
|
160
|
+
packed_value = self._pack_value(value)
|
161
|
+
self._mmap[offset : offset + self._element_size] = packed_value
|
162
|
+
|
163
|
+
def append(self, value):
|
164
|
+
with self._lock:
|
165
|
+
if self._len == self._capacity:
|
166
|
+
self._resize(self._len + 1)
|
167
|
+
|
168
|
+
offset = self._data_offset + self._len * self._element_size
|
169
|
+
packed_value = self._pack_value(value)
|
170
|
+
|
171
|
+
self._mmap[offset : offset + self._element_size] = packed_value
|
172
|
+
self._len += 1
|
173
|
+
|
174
|
+
def _allocate_capacity(self, min_elements):
|
175
|
+
"""Allocate capacity for at least min_elements, rounded up to chunk boundary."""
|
176
|
+
bytes_needed = min_elements * self._element_size + self.HEADER_SIZE
|
177
|
+
chunks_needed = (bytes_needed + self.CHUNK_SIZE_BYTES - 1) // self.CHUNK_SIZE_BYTES
|
178
|
+
total_file_size = chunks_needed * self.CHUNK_SIZE_BYTES
|
179
|
+
self._capacity_bytes = total_file_size - self.HEADER_SIZE
|
180
|
+
self._capacity = self._capacity_bytes // self._element_size
|
181
|
+
self._file.truncate(total_file_size)
|
182
|
+
|
183
|
+
def _resize(self, min_new_len):
|
184
|
+
if self._mmap:
|
185
|
+
self._mmap.close()
|
186
|
+
|
187
|
+
self._allocate_capacity(min_new_len)
|
188
|
+
self._mmap = mmap.mmap(self._file.fileno(), 0)
|
189
|
+
|
190
|
+
def extend(self, iterable):
|
191
|
+
values = list(iterable)
|
192
|
+
num_new_elements = len(values)
|
193
|
+
|
194
|
+
if num_new_elements == 0:
|
195
|
+
return
|
196
|
+
|
197
|
+
with self._lock:
|
198
|
+
new_len = self._len + num_new_elements
|
199
|
+
if new_len > self._capacity:
|
200
|
+
self._resize(new_len)
|
201
|
+
|
202
|
+
# Batch write all values directly to mmap
|
203
|
+
offset = self._data_offset + self._len * self._element_size
|
204
|
+
for value in values:
|
205
|
+
packed_value = self._pack_value(value)
|
206
|
+
self._mmap[offset : offset + self._element_size] = packed_value
|
207
|
+
offset += self._element_size
|
208
|
+
|
209
|
+
self._len = new_len
|
210
|
+
|
211
|
+
def __contains__(self, value):
|
212
|
+
for i in range(self._len):
|
213
|
+
if self[i] == value:
|
214
|
+
return True
|
215
|
+
return False
|
216
|
+
|
217
|
+
def __iadd__(self, other):
|
218
|
+
if hasattr(other, "__iter__"):
|
219
|
+
self.extend(other)
|
220
|
+
return self
|
221
|
+
return NotImplemented
|
222
|
+
|
223
|
+
def __imul__(self, value):
|
224
|
+
if not isinstance(value, int) or value < 0:
|
225
|
+
return NotImplemented
|
226
|
+
|
227
|
+
with self._lock:
|
228
|
+
if value == 0:
|
229
|
+
self._len = 0
|
230
|
+
if self._mmap:
|
231
|
+
self._mmap.close()
|
232
|
+
self._mmap = None
|
233
|
+
if self._file:
|
234
|
+
self._file.truncate(0)
|
235
|
+
self._capacity = 0
|
236
|
+
self._capacity_bytes = 0
|
237
|
+
elif value > 1:
|
238
|
+
original_len = self._len
|
239
|
+
new_total_len = original_len * value
|
240
|
+
|
241
|
+
# Resize if needed
|
242
|
+
if new_total_len > self._capacity:
|
243
|
+
self._resize(new_total_len)
|
244
|
+
|
245
|
+
# Copy data in-place
|
246
|
+
src_offset = self._data_offset
|
247
|
+
dst_offset = self._data_offset + original_len * self._element_size
|
248
|
+
|
249
|
+
for _ in range(value - 1):
|
250
|
+
# Copy the original data chunk
|
251
|
+
chunk_size = original_len * self._element_size
|
252
|
+
self._mmap[dst_offset : dst_offset + chunk_size] = self._mmap[src_offset : src_offset + chunk_size]
|
253
|
+
dst_offset += chunk_size
|
254
|
+
|
255
|
+
self._len = new_total_len
|
256
|
+
return self
|
257
|
+
|
258
|
+
def flush(self):
|
259
|
+
if self._mmap:
|
260
|
+
self._mmap.flush()
|
261
|
+
|
262
|
+
def close(self):
|
263
|
+
if self._mmap:
|
264
|
+
# Ensure all writes are on disk before truncating
|
265
|
+
self._mmap.flush()
|
266
|
+
self._mmap.close()
|
267
|
+
self._mmap = None
|
268
|
+
|
269
|
+
if self._file:
|
270
|
+
# Update header with final length
|
271
|
+
self._write_header()
|
272
|
+
|
273
|
+
# Only truncate if the file was opened in a writable mode
|
274
|
+
# and if the current size is greater than the actual data length
|
275
|
+
current_file_size = os.fstat(self._file.fileno()).st_size
|
276
|
+
actual_total_size = self.HEADER_SIZE + self._len * self._element_size
|
277
|
+
if current_file_size > actual_total_size:
|
278
|
+
self._file.truncate(actual_total_size)
|
279
|
+
self._file.close()
|
280
|
+
self._file = None
|
281
|
+
|
282
|
+
def __enter__(self):
|
283
|
+
return self
|
284
|
+
|
285
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
286
|
+
self.close()
|
@@ -0,0 +1,104 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: arrayfile
|
3
|
+
Version: 0.0.1
|
4
|
+
Summary: Arrays backed by disk
|
5
|
+
Keywords: logging,terminal,scrollback,indexing
|
6
|
+
Author-email: Gareth Davidson <gaz@bitplane.net>
|
7
|
+
Requires-Python: >=3.10
|
8
|
+
Description-Content-Type: text/markdown
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
10
|
+
Classifier: Intended Audience :: Developers
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Operating System :: OS Independent
|
17
|
+
Classifier: Topic :: System :: Logging
|
18
|
+
Classifier: Topic :: Terminals
|
19
|
+
License-File: LICENSE.md
|
20
|
+
Requires-Dist: pre-commit ; extra == "dev"
|
21
|
+
Requires-Dist: pytest ; extra == "dev"
|
22
|
+
Requires-Dist: coverage ; extra == "dev"
|
23
|
+
Requires-Dist: pytest-cov ; extra == "dev"
|
24
|
+
Requires-Dist: build ; extra == "dev"
|
25
|
+
Requires-Dist: twine ; extra == "dev"
|
26
|
+
Requires-Dist: ruff ; extra == "dev"
|
27
|
+
Requires-Dist: pydoc-markdown ; extra == "dev"
|
28
|
+
Provides-Extra: dev
|
29
|
+
|
30
|
+
# arrayfile
|
31
|
+
|
32
|
+
A file-backed numeric array using struct.pack. Does not support inserts or
|
33
|
+
slicing.
|
34
|
+
|
35
|
+
Smaller than relying on numpy though.
|
36
|
+
|
37
|
+
## Installation
|
38
|
+
|
39
|
+
```bash
|
40
|
+
pip install arrayfile
|
41
|
+
```
|
42
|
+
|
43
|
+
## Usage
|
44
|
+
|
45
|
+
### Temporary Array
|
46
|
+
|
47
|
+
This creates an array in your temp dir:
|
48
|
+
|
49
|
+
```python
|
50
|
+
from arrayfile import Array
|
51
|
+
|
52
|
+
# Create a temporary array with float data
|
53
|
+
arr = Array('f')
|
54
|
+
arr.append(3.14)
|
55
|
+
arr.append(2.71)
|
56
|
+
arr.extend([1.41, 1.73])
|
57
|
+
|
58
|
+
print(f"Length: {len(arr)}")
|
59
|
+
print(f"Values: {[arr[i] for i in range(len(arr))]}")
|
60
|
+
arr.close() # Clean up resources
|
61
|
+
```
|
62
|
+
|
63
|
+
### Persistent Array
|
64
|
+
|
65
|
+
You can use the same file, if you want to persist your data across sessions:
|
66
|
+
|
67
|
+
```python
|
68
|
+
from arrayfile import Array
|
69
|
+
|
70
|
+
# Create and populate an array file
|
71
|
+
arr = Array('i', 'numbers.array', 'w+b')
|
72
|
+
for i in range(1000):
|
73
|
+
arr.append(i * 2)
|
74
|
+
arr.close()
|
75
|
+
|
76
|
+
# Reopen the same file later
|
77
|
+
arr = Array('i', 'numbers.array', 'r+b')
|
78
|
+
print(f"Array has {len(arr)} elements")
|
79
|
+
print(f"First element: {arr[0]}")
|
80
|
+
print(f"Last element: {arr[-1]}")
|
81
|
+
|
82
|
+
# Add more data
|
83
|
+
arr.append(2000)
|
84
|
+
arr.close()
|
85
|
+
```
|
86
|
+
|
87
|
+
## Context manager
|
88
|
+
|
89
|
+
It has a finalizer in case you forget to call `close()`, but if you like to keep
|
90
|
+
your code tidy, you can use a context manager, like so:
|
91
|
+
|
92
|
+
```python
|
93
|
+
from arrayfile import Array
|
94
|
+
|
95
|
+
# Using double precision floats with context manager
|
96
|
+
with Array('d', 'measurements.array', 'w+b') as arr:
|
97
|
+
arr.extend([3.141592653589793, 2.718281828459045, 1.4142135623730951])
|
98
|
+
|
99
|
+
print(f"Stored {len(arr)} precise measurements")
|
100
|
+
for i, value in enumerate(arr):
|
101
|
+
print(f" {i}: {value:.15f}")
|
102
|
+
```
|
103
|
+
|
104
|
+
|
@@ -0,0 +1,6 @@
|
|
1
|
+
arrayfile/__init__.py,sha256=Hg22gYr-wT8GVurijwO9iIArr5CLgb0mgyHt8XS7otA,134
|
2
|
+
arrayfile/array.py,sha256=ZBIY7PAYxpY4ZPYHQBL7Ou9CJftssVAYvMp8cTiCgaw,9804
|
3
|
+
arrayfile-0.0.1.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
|
4
|
+
arrayfile-0.0.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
5
|
+
arrayfile-0.0.1.dist-info/METADATA,sha256=78ozkmNIQ6EPAqSRvLyvjRf0XZyXqZ8UlgcHX0yrEd0,2629
|
6
|
+
arrayfile-0.0.1.dist-info/RECORD,,
|