flatdata-py 0.4.10__tar.gz → 0.4.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/PKG-INFO +33 -2
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/README.md +31 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/archive.py +2 -2
- flatdata_py-0.4.11/flatdata/lib/data_access.py +168 -0
- flatdata_py-0.4.11/flatdata/lib/data_access.py.orig +204 -0
- flatdata_py-0.4.11/flatdata/lib/data_access_BACKUP_91129.py +219 -0
- flatdata_py-0.4.11/flatdata/lib/data_access_LOCAL_91129.py +112 -0
- flatdata_py-0.4.11/flatdata/lib/data_access_REMOTE_91129.py +168 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/resources.py +60 -14
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/structure.py +21 -10
- flatdata_py-0.4.11/flatdata/lib/structure.py.orig +92 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/pyproject.toml +2 -2
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/.gitignore +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/__init__.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/archive_builder.py +0 -0
- /flatdata_py-0.4.10/flatdata/lib/data_access.py → /flatdata_py-0.4.11/flatdata/lib/data_access_BASE_91129.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/errors.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/file_resource_storage.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/file_resource_writer.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/flatdata_writer.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/inspector.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/resource_storage.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/tar_archive_resource_storage.py +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.11}/flatdata/lib/writer.py +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flatdata-py
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.11
|
|
4
4
|
Summary: Python 3 implementation of Flatdata
|
|
5
5
|
Project-URL: Homepage, https://github.com/heremaps/flatdata
|
|
6
6
|
Author: Flatdata Developers
|
|
7
7
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Requires-Dist: flatdata-generator==0.4.
|
|
10
|
+
Requires-Dist: flatdata-generator==0.4.11
|
|
11
11
|
Requires-Dist: numpy
|
|
12
12
|
Requires-Dist: pandas
|
|
13
13
|
Provides-Extra: inspector
|
|
@@ -34,6 +34,37 @@ Once you have [created a flatdata schema file](../README.md#creating-a-schema),
|
|
|
34
34
|
flatdata-generator --gen py --schema locations.flatdata --output-file locations.py
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
+
## Performance tips
|
|
38
|
+
|
|
39
|
+
`flatdata-py` supports two data access patterns with very different performance characteristics on large archives.
|
|
40
|
+
|
|
41
|
+
Iterating over a vector yields one Python object per element. Each field access unpacks bits from the underlying memory-mapped data. This is fine for accessing individual elements or small ranges, but has significant per-element overhead for bulk operations:
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
count = sum(1 for x in archive.links if x.speed_limit > 100)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
For bulk operations, use the vectorized access methods that read fields directly into NumPy arrays:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# single column access, returns a pandas DataFrame
|
|
51
|
+
df = archive.links.speed_limit
|
|
52
|
+
count = len(df[df['speed_limit'] > 100])
|
|
53
|
+
|
|
54
|
+
# full NumPy structured array with all fields
|
|
55
|
+
arr = archive.links.to_numpy()
|
|
56
|
+
count = int(np.sum(arr['speed_limit'] > 100))
|
|
57
|
+
|
|
58
|
+
# slices work too
|
|
59
|
+
arr = archive.links[1000:2000].to_numpy()
|
|
60
|
+
df = archive.links[::10].to_data_frame()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
* Use `vector.field_name` (column access) when you only need one or a few fields.
|
|
64
|
+
* Use `vector.to_numpy()` or `vector.to_data_frame()` when you need all fields at once.
|
|
65
|
+
* Use `vector[i].field` for random access to individual elements.
|
|
66
|
+
* The underlying data is memory-mapped; the OS pages it from disk on demand. Vectorized results are materialized as NumPy arrays in RAM.
|
|
67
|
+
|
|
37
68
|
## Using the inspector
|
|
38
69
|
|
|
39
70
|
`flatdata-py` comes with a handy tool called the `flatdata-inspector` to inspect the contents of an archive:
|
|
@@ -18,6 +18,37 @@ Once you have [created a flatdata schema file](../README.md#creating-a-schema),
|
|
|
18
18
|
flatdata-generator --gen py --schema locations.flatdata --output-file locations.py
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
+
## Performance tips
|
|
22
|
+
|
|
23
|
+
`flatdata-py` supports two data access patterns with very different performance characteristics on large archives.
|
|
24
|
+
|
|
25
|
+
Iterating over a vector yields one Python object per element. Each field access unpacks bits from the underlying memory-mapped data. This is fine for accessing individual elements or small ranges, but has significant per-element overhead for bulk operations:
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
count = sum(1 for x in archive.links if x.speed_limit > 100)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
For bulk operations, use the vectorized access methods that read fields directly into NumPy arrays:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
# single column access, returns a pandas DataFrame
|
|
35
|
+
df = archive.links.speed_limit
|
|
36
|
+
count = len(df[df['speed_limit'] > 100])
|
|
37
|
+
|
|
38
|
+
# full NumPy structured array with all fields
|
|
39
|
+
arr = archive.links.to_numpy()
|
|
40
|
+
count = int(np.sum(arr['speed_limit'] > 100))
|
|
41
|
+
|
|
42
|
+
# slices work too
|
|
43
|
+
arr = archive.links[1000:2000].to_numpy()
|
|
44
|
+
df = archive.links[::10].to_data_frame()
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
* Use `vector.field_name` (column access) when you only need one or a few fields.
|
|
48
|
+
* Use `vector.to_numpy()` or `vector.to_data_frame()` when you need all fields at once.
|
|
49
|
+
* Use `vector[i].field` for random access to individual elements.
|
|
50
|
+
* The underlying data is memory-mapped; the OS pages it from disk on demand. Vectorized results are materialized as NumPy arrays in RAM.
|
|
51
|
+
|
|
21
52
|
## Using the inspector
|
|
22
53
|
|
|
23
54
|
`flatdata-py` comes with a handy tool called the `flatdata-inspector` to inspect the contents of an archive:
|
|
@@ -39,9 +39,9 @@ class Archive:
|
|
|
39
39
|
self.__getattr__(name)
|
|
40
40
|
|
|
41
41
|
def __getattr__(self, name):
|
|
42
|
-
if name not in
|
|
42
|
+
if name not in self._RESOURCES:
|
|
43
43
|
raise AttributeError("Resource %s not defined in archive." % name)
|
|
44
|
-
if name not in
|
|
44
|
+
if name not in self._loaded_resources:
|
|
45
45
|
self._loaded_resources[name] = self._open_resource(name)
|
|
46
46
|
return self._loaded_resources[name]
|
|
47
47
|
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Copyright (c) 2017 HERE Europe B.V.
|
|
3
|
+
See the LICENSE file in the root of this project for license details.
|
|
4
|
+
'''
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
# Sign bits cache for the value reading.
|
|
9
|
+
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_field_reader(offset_bits, num_bits, is_signed):
|
|
13
|
+
"""Build a specialized closure for reading a single field from a structure.
|
|
14
|
+
|
|
15
|
+
Returns a function reader(data, pos_bytes) that reads the field value
|
|
16
|
+
from ``data`` at byte position ``pos_bytes``. All constants (byte offset,
|
|
17
|
+
bit shift, mask, sign handling) are pre-computed and captured by the
|
|
18
|
+
closure so the hot path does minimal work.
|
|
19
|
+
"""
|
|
20
|
+
offset_bytes, offset_extra = divmod(offset_bits, 8)
|
|
21
|
+
total_bytes = (num_bits + 7) // 8
|
|
22
|
+
end_byte = offset_bytes + total_bytes
|
|
23
|
+
mask = (1 << num_bits) - 1
|
|
24
|
+
needs_extra = (total_bytes * 8 - offset_extra) < num_bits
|
|
25
|
+
extra_shift = total_bytes * 8 - offset_extra
|
|
26
|
+
|
|
27
|
+
if num_bits == 1:
|
|
28
|
+
bit_mask = 1 << offset_extra
|
|
29
|
+
def reader(data, pos):
|
|
30
|
+
return int((data[pos + offset_bytes] & bit_mask) != 0)
|
|
31
|
+
return reader
|
|
32
|
+
|
|
33
|
+
if is_signed:
|
|
34
|
+
sign_bit = _SIGN_BITS[num_bits]
|
|
35
|
+
sign_mask = sign_bit - 1
|
|
36
|
+
if needs_extra:
|
|
37
|
+
def reader(data, pos):
|
|
38
|
+
result = int.from_bytes(
|
|
39
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
40
|
+
result >>= offset_extra
|
|
41
|
+
result |= data[pos + end_byte] << extra_shift
|
|
42
|
+
result &= mask
|
|
43
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
44
|
+
elif offset_extra:
|
|
45
|
+
def reader(data, pos):
|
|
46
|
+
result = (int.from_bytes(
|
|
47
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
48
|
+
byteorder="little") >> offset_extra) & mask
|
|
49
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
50
|
+
else:
|
|
51
|
+
def reader(data, pos):
|
|
52
|
+
result = int.from_bytes(
|
|
53
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
54
|
+
byteorder="little") & mask
|
|
55
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
56
|
+
return reader
|
|
57
|
+
|
|
58
|
+
# Unsigned paths
|
|
59
|
+
if needs_extra:
|
|
60
|
+
def reader(data, pos):
|
|
61
|
+
result = int.from_bytes(
|
|
62
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
63
|
+
result >>= offset_extra
|
|
64
|
+
result |= data[pos + end_byte] << extra_shift
|
|
65
|
+
return result & mask
|
|
66
|
+
elif offset_extra:
|
|
67
|
+
def reader(data, pos):
|
|
68
|
+
return (int.from_bytes(
|
|
69
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
70
|
+
byteorder="little") >> offset_extra) & mask
|
|
71
|
+
else:
|
|
72
|
+
def reader(data, pos):
|
|
73
|
+
return int.from_bytes(
|
|
74
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
75
|
+
byteorder="little") & mask
|
|
76
|
+
return reader
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
80
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
81
|
+
|
|
82
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
83
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
84
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
85
|
+
:param is_signed: whether to sign-extend the result
|
|
86
|
+
:return: numpy array of field values
|
|
87
|
+
"""
|
|
88
|
+
if field_width_bits == 1:
|
|
89
|
+
byte_idx = field_offset_bits // 8
|
|
90
|
+
bit_idx = field_offset_bits % 8
|
|
91
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
92
|
+
np.uint64(1))
|
|
93
|
+
|
|
94
|
+
byte_start = field_offset_bits // 8
|
|
95
|
+
bit_shift = field_offset_bits % 8
|
|
96
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
97
|
+
|
|
98
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
99
|
+
# then broadcast back to the array.
|
|
100
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
101
|
+
for b in range(min(bytes_needed, 8)):
|
|
102
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
103
|
+
result >>= np.uint64(bit_shift)
|
|
104
|
+
|
|
105
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
106
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
107
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
108
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
109
|
+
result |= extra << np.uint64(bits_so_far)
|
|
110
|
+
|
|
111
|
+
if field_width_bits < 64:
|
|
112
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
113
|
+
|
|
114
|
+
if is_signed:
|
|
115
|
+
if field_width_bits == 64:
|
|
116
|
+
return result.view(np.int64)
|
|
117
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
118
|
+
offset = -(1 << field_width_bits)
|
|
119
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
120
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def read_value(data, offset_bits, num_bits, is_signed):
|
|
126
|
+
"""Read a bit-packed value from data at the given bit offset.
|
|
127
|
+
|
|
128
|
+
This is a convenience wrapper around :func:`make_field_reader` for one-off
|
|
129
|
+
reads. For repeated reads of the same field, prefer building a reader once
|
|
130
|
+
with ``make_field_reader`` and reusing it.
|
|
131
|
+
"""
|
|
132
|
+
reader = make_field_reader(offset_bits, num_bits, is_signed)
|
|
133
|
+
return reader(data, 0)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
137
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
138
|
+
|
|
139
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
140
|
+
total_bytes = (num_bits + 7) // 8
|
|
141
|
+
|
|
142
|
+
if num_bits == 1:
|
|
143
|
+
if value == 1:
|
|
144
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
145
|
+
else:
|
|
146
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
mask = (1 << num_bits) - 1
|
|
150
|
+
value <<= offset_extra_bits
|
|
151
|
+
value &= mask << offset_extra_bits
|
|
152
|
+
value_in_little_endian = value.to_bytes(total_bytes + 1, byteorder="little", signed=is_signed)
|
|
153
|
+
surrounding_bits = data[offset_bytes] & ((1 << offset_bits) - 1)
|
|
154
|
+
|
|
155
|
+
byte_idx = 0
|
|
156
|
+
data[offset_bytes] = value_in_little_endian[byte_idx]
|
|
157
|
+
data[offset_bytes] |= surrounding_bits
|
|
158
|
+
|
|
159
|
+
byte_idx += 1
|
|
160
|
+
while byte_idx < total_bytes:
|
|
161
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx]
|
|
162
|
+
byte_idx += 1
|
|
163
|
+
|
|
164
|
+
bits_written = total_bytes * 8 - offset_extra_bits
|
|
165
|
+
if bits_written < num_bits:
|
|
166
|
+
surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
|
|
167
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
|
|
168
|
+
data[offset_bytes + byte_idx] |= surrounding_bits
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Copyright (c) 2017 HERE Europe B.V.
|
|
3
|
+
See the LICENSE file in the root of this project for license details.
|
|
4
|
+
'''
|
|
5
|
+
|
|
6
|
+
# Sign bits cache for the value reading.
|
|
7
|
+
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_field_reader(offset_bits, num_bits, is_signed):
|
|
11
|
+
"""Build a specialized closure for reading a single field from a structure.
|
|
12
|
+
|
|
13
|
+
Returns a function reader(data, pos_bytes) that reads the field value
|
|
14
|
+
from ``data`` at byte position ``pos_bytes``. All constants (byte offset,
|
|
15
|
+
bit shift, mask, sign handling) are pre-computed and captured by the
|
|
16
|
+
closure so the hot path does minimal work.
|
|
17
|
+
"""
|
|
18
|
+
offset_bytes, offset_extra = divmod(offset_bits, 8)
|
|
19
|
+
total_bytes = (num_bits + 7) // 8
|
|
20
|
+
<<<<<<< HEAD
|
|
21
|
+
|
|
22
|
+
if num_bits == 1:
|
|
23
|
+
return int((data[offset_bytes] & (1 << offset_extra_bits)) != 0)
|
|
24
|
+
|
|
25
|
+
result = int.from_bytes(data[offset_bytes: offset_bytes + total_bytes], byteorder="little")
|
|
26
|
+
result >>= offset_extra_bits
|
|
27
|
+
if (total_bytes * 8 - offset_extra_bits) < num_bits:
|
|
28
|
+
remainder = data[offset_bytes + total_bytes]
|
|
29
|
+
result |= remainder << (total_bytes * 8 - offset_extra_bits)
|
|
30
|
+
|
|
31
|
+
if num_bits < 64:
|
|
32
|
+
result = result & ((1 << num_bits) - 1)
|
|
33
|
+
|
|
34
|
+
if not is_signed:
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
return (result & (_SIGN_BITS[num_bits] - 1)) - (result & _SIGN_BITS[num_bits])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
41
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
42
|
+
|
|
43
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
44
|
+
total_bytes = (num_bits + 7) // 8
|
|
45
|
+
|
|
46
|
+
if num_bits == 1:
|
|
47
|
+
if value == 1:
|
|
48
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
49
|
+
else:
|
|
50
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
=======
|
|
54
|
+
end_byte = offset_bytes + total_bytes
|
|
55
|
+
>>>>>>> e486615 (Also improve scalar readers by caching)
|
|
56
|
+
mask = (1 << num_bits) - 1
|
|
57
|
+
needs_extra = (total_bytes * 8 - offset_extra) < num_bits
|
|
58
|
+
extra_shift = total_bytes * 8 - offset_extra
|
|
59
|
+
|
|
60
|
+
if num_bits == 1:
|
|
61
|
+
bit_mask = 1 << offset_extra
|
|
62
|
+
def reader(data, pos):
|
|
63
|
+
return int((data[pos + offset_bytes] & bit_mask) != 0)
|
|
64
|
+
return reader
|
|
65
|
+
|
|
66
|
+
if is_signed:
|
|
67
|
+
sign_bit = _SIGN_BITS[num_bits]
|
|
68
|
+
sign_mask = sign_bit - 1
|
|
69
|
+
if needs_extra:
|
|
70
|
+
def reader(data, pos):
|
|
71
|
+
result = int.from_bytes(
|
|
72
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
73
|
+
result >>= offset_extra
|
|
74
|
+
result |= data[pos + end_byte] << extra_shift
|
|
75
|
+
result &= mask
|
|
76
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
77
|
+
elif offset_extra:
|
|
78
|
+
def reader(data, pos):
|
|
79
|
+
result = (int.from_bytes(
|
|
80
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
81
|
+
byteorder="little") >> offset_extra) & mask
|
|
82
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
83
|
+
else:
|
|
84
|
+
def reader(data, pos):
|
|
85
|
+
result = int.from_bytes(
|
|
86
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
87
|
+
byteorder="little") & mask
|
|
88
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
89
|
+
return reader
|
|
90
|
+
|
|
91
|
+
<<<<<<< HEAD
|
|
92
|
+
=======
|
|
93
|
+
# Unsigned paths
|
|
94
|
+
if needs_extra:
|
|
95
|
+
def reader(data, pos):
|
|
96
|
+
result = int.from_bytes(
|
|
97
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
98
|
+
result >>= offset_extra
|
|
99
|
+
result |= data[pos + end_byte] << extra_shift
|
|
100
|
+
return result & mask
|
|
101
|
+
elif offset_extra:
|
|
102
|
+
def reader(data, pos):
|
|
103
|
+
return (int.from_bytes(
|
|
104
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
105
|
+
byteorder="little") >> offset_extra) & mask
|
|
106
|
+
else:
|
|
107
|
+
def reader(data, pos):
|
|
108
|
+
return int.from_bytes(
|
|
109
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
110
|
+
byteorder="little") & mask
|
|
111
|
+
return reader
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
115
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
116
|
+
|
|
117
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
118
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
119
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
120
|
+
:param is_signed: whether to sign-extend the result
|
|
121
|
+
:return: numpy array of field values
|
|
122
|
+
"""
|
|
123
|
+
if field_width_bits == 1:
|
|
124
|
+
byte_idx = field_offset_bits // 8
|
|
125
|
+
bit_idx = field_offset_bits % 8
|
|
126
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
127
|
+
np.uint64(1))
|
|
128
|
+
|
|
129
|
+
byte_start = field_offset_bits // 8
|
|
130
|
+
bit_shift = field_offset_bits % 8
|
|
131
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
132
|
+
|
|
133
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
134
|
+
# then broadcast back to the array.
|
|
135
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
136
|
+
for b in range(min(bytes_needed, 8)):
|
|
137
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
138
|
+
result >>= np.uint64(bit_shift)
|
|
139
|
+
|
|
140
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
141
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
142
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
143
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
144
|
+
result |= extra << np.uint64(bits_so_far)
|
|
145
|
+
|
|
146
|
+
if field_width_bits < 64:
|
|
147
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
148
|
+
|
|
149
|
+
if is_signed:
|
|
150
|
+
if field_width_bits == 64:
|
|
151
|
+
return result.view(np.int64)
|
|
152
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
153
|
+
offset = -(1 << field_width_bits)
|
|
154
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
155
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
156
|
+
|
|
157
|
+
return result
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def read_value(data, offset_bits, num_bits, is_signed):
|
|
161
|
+
"""Read a bit-packed value from data at the given bit offset.
|
|
162
|
+
|
|
163
|
+
This is a convenience wrapper around :func:`make_field_reader` for one-off
|
|
164
|
+
reads. For repeated reads of the same field, prefer building a reader once
|
|
165
|
+
with ``make_field_reader`` and reusing it.
|
|
166
|
+
"""
|
|
167
|
+
reader = make_field_reader(offset_bits, num_bits, is_signed)
|
|
168
|
+
return reader(data, 0)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
172
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
173
|
+
|
|
174
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
175
|
+
total_bytes = (num_bits + 7) // 8
|
|
176
|
+
|
|
177
|
+
if num_bits == 1:
|
|
178
|
+
if value == 1:
|
|
179
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
180
|
+
else:
|
|
181
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
mask = (1 << num_bits) - 1
|
|
185
|
+
value <<= offset_extra_bits
|
|
186
|
+
value &= mask << offset_extra_bits
|
|
187
|
+
value_in_little_endian = value.to_bytes(total_bytes + 1, byteorder="little", signed=is_signed)
|
|
188
|
+
surrounding_bits = data[offset_bytes] & ((1 << offset_bits) - 1)
|
|
189
|
+
|
|
190
|
+
byte_idx = 0
|
|
191
|
+
data[offset_bytes] = value_in_little_endian[byte_idx]
|
|
192
|
+
data[offset_bytes] |= surrounding_bits
|
|
193
|
+
|
|
194
|
+
byte_idx += 1
|
|
195
|
+
while byte_idx < total_bytes:
|
|
196
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx]
|
|
197
|
+
byte_idx += 1
|
|
198
|
+
|
|
199
|
+
>>>>>>> e486615 (Also improve scalar readers by caching)
|
|
200
|
+
bits_written = total_bytes * 8 - offset_extra_bits
|
|
201
|
+
if bits_written < num_bits:
|
|
202
|
+
surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
|
|
203
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
|
|
204
|
+
data[offset_bytes + byte_idx] |= surrounding_bits
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Copyright (c) 2017 HERE Europe B.V.
|
|
3
|
+
See the LICENSE file in the root of this project for license details.
|
|
4
|
+
'''
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
# Sign bits cache for the value reading.
|
|
9
|
+
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_field_reader(offset_bits, num_bits, is_signed):
|
|
13
|
+
"""Build a specialized closure for reading a single field from a structure.
|
|
14
|
+
|
|
15
|
+
Returns a function reader(data, pos_bytes) that reads the field value
|
|
16
|
+
from ``data`` at byte position ``pos_bytes``. All constants (byte offset,
|
|
17
|
+
bit shift, mask, sign handling) are pre-computed and captured by the
|
|
18
|
+
closure so the hot path does minimal work.
|
|
19
|
+
"""
|
|
20
|
+
offset_bytes, offset_extra = divmod(offset_bits, 8)
|
|
21
|
+
total_bytes = (num_bits + 7) // 8
|
|
22
|
+
end_byte = offset_bytes + total_bytes
|
|
23
|
+
mask = (1 << num_bits) - 1
|
|
24
|
+
needs_extra = (total_bytes * 8 - offset_extra) < num_bits
|
|
25
|
+
extra_shift = total_bytes * 8 - offset_extra
|
|
26
|
+
|
|
27
|
+
if num_bits == 1:
|
|
28
|
+
bit_mask = 1 << offset_extra
|
|
29
|
+
def reader(data, pos):
|
|
30
|
+
return int((data[pos + offset_bytes] & bit_mask) != 0)
|
|
31
|
+
return reader
|
|
32
|
+
|
|
33
|
+
if is_signed:
|
|
34
|
+
sign_bit = _SIGN_BITS[num_bits]
|
|
35
|
+
sign_mask = sign_bit - 1
|
|
36
|
+
if needs_extra:
|
|
37
|
+
def reader(data, pos):
|
|
38
|
+
result = int.from_bytes(
|
|
39
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
40
|
+
result >>= offset_extra
|
|
41
|
+
result |= data[pos + end_byte] << extra_shift
|
|
42
|
+
result &= mask
|
|
43
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
44
|
+
elif offset_extra:
|
|
45
|
+
def reader(data, pos):
|
|
46
|
+
result = (int.from_bytes(
|
|
47
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
48
|
+
byteorder="little") >> offset_extra) & mask
|
|
49
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
50
|
+
else:
|
|
51
|
+
def reader(data, pos):
|
|
52
|
+
result = int.from_bytes(
|
|
53
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
54
|
+
byteorder="little") & mask
|
|
55
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
56
|
+
return reader
|
|
57
|
+
|
|
58
|
+
<<<<<<< HEAD
|
|
59
|
+
if num_bits < 64 or offset_extra_bits > 0:
|
|
60
|
+
result = result & ((1 << num_bits) - 1)
|
|
61
|
+
=======
|
|
62
|
+
# Unsigned paths
|
|
63
|
+
if needs_extra:
|
|
64
|
+
def reader(data, pos):
|
|
65
|
+
result = int.from_bytes(
|
|
66
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
67
|
+
result >>= offset_extra
|
|
68
|
+
result |= data[pos + end_byte] << extra_shift
|
|
69
|
+
return result & mask
|
|
70
|
+
elif offset_extra:
|
|
71
|
+
def reader(data, pos):
|
|
72
|
+
return (int.from_bytes(
|
|
73
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
74
|
+
byteorder="little") >> offset_extra) & mask
|
|
75
|
+
else:
|
|
76
|
+
def reader(data, pos):
|
|
77
|
+
return int.from_bytes(
|
|
78
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
79
|
+
byteorder="little") & mask
|
|
80
|
+
return reader
|
|
81
|
+
>>>>>>> c635308 (Also improve scalar readers by caching)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
85
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
86
|
+
|
|
87
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
88
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
89
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
90
|
+
:param is_signed: whether to sign-extend the result
|
|
91
|
+
:return: numpy array of field values
|
|
92
|
+
"""
|
|
93
|
+
if field_width_bits == 1:
|
|
94
|
+
byte_idx = field_offset_bits // 8
|
|
95
|
+
bit_idx = field_offset_bits % 8
|
|
96
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
97
|
+
np.uint64(1))
|
|
98
|
+
|
|
99
|
+
byte_start = field_offset_bits // 8
|
|
100
|
+
bit_shift = field_offset_bits % 8
|
|
101
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
102
|
+
|
|
103
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
104
|
+
# then broadcast back to the array.
|
|
105
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
106
|
+
for b in range(min(bytes_needed, 8)):
|
|
107
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
108
|
+
result >>= np.uint64(bit_shift)
|
|
109
|
+
|
|
110
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
111
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
112
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
113
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
114
|
+
result |= extra << np.uint64(bits_so_far)
|
|
115
|
+
|
|
116
|
+
if field_width_bits < 64:
|
|
117
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
118
|
+
|
|
119
|
+
if is_signed:
|
|
120
|
+
if field_width_bits == 64:
|
|
121
|
+
return result.view(np.int64)
|
|
122
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
123
|
+
offset = -(1 << field_width_bits)
|
|
124
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
125
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
126
|
+
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def read_value(data, offset_bits, num_bits, is_signed):
|
|
131
|
+
"""Read a bit-packed value from data at the given bit offset.
|
|
132
|
+
|
|
133
|
+
This is a convenience wrapper around :func:`make_field_reader` for one-off
|
|
134
|
+
reads. For repeated reads of the same field, prefer building a reader once
|
|
135
|
+
with ``make_field_reader`` and reusing it.
|
|
136
|
+
"""
|
|
137
|
+
reader = make_field_reader(offset_bits, num_bits, is_signed)
|
|
138
|
+
return reader(data, 0)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
142
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
143
|
+
|
|
144
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
145
|
+
total_bytes = (num_bits + 7) // 8
|
|
146
|
+
|
|
147
|
+
if num_bits == 1:
|
|
148
|
+
if value == 1:
|
|
149
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
150
|
+
else:
|
|
151
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
mask = (1 << num_bits) - 1
|
|
155
|
+
value <<= offset_extra_bits
|
|
156
|
+
value &= mask << offset_extra_bits
|
|
157
|
+
value_in_little_endian = value.to_bytes(total_bytes + 1, byteorder="little", signed=is_signed)
|
|
158
|
+
surrounding_bits = data[offset_bytes] & ((1 << offset_bits) - 1)
|
|
159
|
+
|
|
160
|
+
byte_idx = 0
|
|
161
|
+
data[offset_bytes] = value_in_little_endian[byte_idx]
|
|
162
|
+
data[offset_bytes] |= surrounding_bits
|
|
163
|
+
|
|
164
|
+
byte_idx += 1
|
|
165
|
+
while byte_idx < total_bytes:
|
|
166
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx]
|
|
167
|
+
byte_idx += 1
|
|
168
|
+
|
|
169
|
+
bits_written = total_bytes * 8 - offset_extra_bits
|
|
170
|
+
if bits_written < num_bits:
|
|
171
|
+
surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
|
|
172
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
|
|
173
|
+
data[offset_bytes + byte_idx] |= surrounding_bits
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
177
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
178
|
+
|
|
179
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
180
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
181
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
182
|
+
:param is_signed: whether to sign-extend the result
|
|
183
|
+
:return: numpy array of field values
|
|
184
|
+
"""
|
|
185
|
+
if field_width_bits == 1:
|
|
186
|
+
byte_idx = field_offset_bits // 8
|
|
187
|
+
bit_idx = field_offset_bits % 8
|
|
188
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
189
|
+
np.uint64(1))
|
|
190
|
+
|
|
191
|
+
byte_start = field_offset_bits // 8
|
|
192
|
+
bit_shift = field_offset_bits % 8
|
|
193
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
194
|
+
|
|
195
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
196
|
+
# then broadcast back to the array.
|
|
197
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
198
|
+
for b in range(min(bytes_needed, 8)):
|
|
199
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
200
|
+
result >>= np.uint64(bit_shift)
|
|
201
|
+
|
|
202
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
203
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
204
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
205
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
206
|
+
result |= extra << np.uint64(bits_so_far)
|
|
207
|
+
|
|
208
|
+
if field_width_bits < 64:
|
|
209
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
210
|
+
|
|
211
|
+
if is_signed:
|
|
212
|
+
if field_width_bits == 64:
|
|
213
|
+
return result.view(np.int64)
|
|
214
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
215
|
+
offset = -(1 << field_width_bits)
|
|
216
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
217
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
218
|
+
|
|
219
|
+
return result
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Copyright (c) 2017 HERE Europe B.V.
|
|
3
|
+
See the LICENSE file in the root of this project for license details.
|
|
4
|
+
'''
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
# Sign bits cache for the value reading.
|
|
9
|
+
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def read_value(data, offset_bits, num_bits, is_signed):
|
|
13
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
14
|
+
total_bytes = (num_bits + 7) // 8
|
|
15
|
+
|
|
16
|
+
if num_bits == 1:
|
|
17
|
+
return int((data[offset_bytes] & (1 << offset_extra_bits)) != 0)
|
|
18
|
+
|
|
19
|
+
result = int.from_bytes(data[offset_bytes: offset_bytes + total_bytes], byteorder="little")
|
|
20
|
+
result >>= offset_extra_bits
|
|
21
|
+
if (total_bytes * 8 - offset_extra_bits) < num_bits:
|
|
22
|
+
remainder = data[offset_bytes + total_bytes]
|
|
23
|
+
result |= remainder << (total_bytes * 8 - offset_extra_bits)
|
|
24
|
+
|
|
25
|
+
if num_bits < 64 or offset_extra_bits > 0:
|
|
26
|
+
result = result & ((1 << num_bits) - 1)
|
|
27
|
+
|
|
28
|
+
if not is_signed:
|
|
29
|
+
return result
|
|
30
|
+
|
|
31
|
+
return (result & (_SIGN_BITS[num_bits] - 1)) - (result & _SIGN_BITS[num_bits])
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
35
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
36
|
+
|
|
37
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
38
|
+
total_bytes = (num_bits + 7) // 8
|
|
39
|
+
|
|
40
|
+
if num_bits == 1:
|
|
41
|
+
if value == 1:
|
|
42
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
43
|
+
else:
|
|
44
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
mask = (1 << num_bits) - 1
|
|
48
|
+
value <<= offset_extra_bits
|
|
49
|
+
value &= mask << offset_extra_bits
|
|
50
|
+
value_in_little_endian = value.to_bytes(total_bytes + 1, byteorder="little", signed=is_signed)
|
|
51
|
+
surrounding_bits = data[offset_bytes] & ((1 << offset_bits) - 1)
|
|
52
|
+
|
|
53
|
+
byte_idx = 0
|
|
54
|
+
data[offset_bytes] = value_in_little_endian[byte_idx]
|
|
55
|
+
data[offset_bytes] |= surrounding_bits
|
|
56
|
+
|
|
57
|
+
byte_idx += 1
|
|
58
|
+
while byte_idx < total_bytes:
|
|
59
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx]
|
|
60
|
+
byte_idx += 1
|
|
61
|
+
|
|
62
|
+
bits_written = total_bytes * 8 - offset_extra_bits
|
|
63
|
+
if bits_written < num_bits:
|
|
64
|
+
surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
|
|
65
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
|
|
66
|
+
data[offset_bytes + byte_idx] |= surrounding_bits
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
70
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
71
|
+
|
|
72
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
73
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
74
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
75
|
+
:param is_signed: whether to sign-extend the result
|
|
76
|
+
:return: numpy array of field values
|
|
77
|
+
"""
|
|
78
|
+
if field_width_bits == 1:
|
|
79
|
+
byte_idx = field_offset_bits // 8
|
|
80
|
+
bit_idx = field_offset_bits % 8
|
|
81
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
82
|
+
np.uint64(1))
|
|
83
|
+
|
|
84
|
+
byte_start = field_offset_bits // 8
|
|
85
|
+
bit_shift = field_offset_bits % 8
|
|
86
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
87
|
+
|
|
88
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
89
|
+
# then broadcast back to the array.
|
|
90
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
91
|
+
for b in range(min(bytes_needed, 8)):
|
|
92
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
93
|
+
result >>= np.uint64(bit_shift)
|
|
94
|
+
|
|
95
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
96
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
97
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
98
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
99
|
+
result |= extra << np.uint64(bits_so_far)
|
|
100
|
+
|
|
101
|
+
if field_width_bits < 64:
|
|
102
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
103
|
+
|
|
104
|
+
if is_signed:
|
|
105
|
+
if field_width_bits == 64:
|
|
106
|
+
return result.view(np.int64)
|
|
107
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
108
|
+
offset = -(1 << field_width_bits)
|
|
109
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
110
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
111
|
+
|
|
112
|
+
return result
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Copyright (c) 2017 HERE Europe B.V.
|
|
3
|
+
See the LICENSE file in the root of this project for license details.
|
|
4
|
+
'''
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
# Sign bits cache for the value reading.
|
|
9
|
+
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_field_reader(offset_bits, num_bits, is_signed):
|
|
13
|
+
"""Build a specialized closure for reading a single field from a structure.
|
|
14
|
+
|
|
15
|
+
Returns a function reader(data, pos_bytes) that reads the field value
|
|
16
|
+
from ``data`` at byte position ``pos_bytes``. All constants (byte offset,
|
|
17
|
+
bit shift, mask, sign handling) are pre-computed and captured by the
|
|
18
|
+
closure so the hot path does minimal work.
|
|
19
|
+
"""
|
|
20
|
+
offset_bytes, offset_extra = divmod(offset_bits, 8)
|
|
21
|
+
total_bytes = (num_bits + 7) // 8
|
|
22
|
+
end_byte = offset_bytes + total_bytes
|
|
23
|
+
mask = (1 << num_bits) - 1
|
|
24
|
+
needs_extra = (total_bytes * 8 - offset_extra) < num_bits
|
|
25
|
+
extra_shift = total_bytes * 8 - offset_extra
|
|
26
|
+
|
|
27
|
+
if num_bits == 1:
|
|
28
|
+
bit_mask = 1 << offset_extra
|
|
29
|
+
def reader(data, pos):
|
|
30
|
+
return int((data[pos + offset_bytes] & bit_mask) != 0)
|
|
31
|
+
return reader
|
|
32
|
+
|
|
33
|
+
if is_signed:
|
|
34
|
+
sign_bit = _SIGN_BITS[num_bits]
|
|
35
|
+
sign_mask = sign_bit - 1
|
|
36
|
+
if needs_extra:
|
|
37
|
+
def reader(data, pos):
|
|
38
|
+
result = int.from_bytes(
|
|
39
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
40
|
+
result >>= offset_extra
|
|
41
|
+
result |= data[pos + end_byte] << extra_shift
|
|
42
|
+
result &= mask
|
|
43
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
44
|
+
elif offset_extra:
|
|
45
|
+
def reader(data, pos):
|
|
46
|
+
result = (int.from_bytes(
|
|
47
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
48
|
+
byteorder="little") >> offset_extra) & mask
|
|
49
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
50
|
+
else:
|
|
51
|
+
def reader(data, pos):
|
|
52
|
+
result = int.from_bytes(
|
|
53
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
54
|
+
byteorder="little") & mask
|
|
55
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
56
|
+
return reader
|
|
57
|
+
|
|
58
|
+
# Unsigned paths
|
|
59
|
+
if needs_extra:
|
|
60
|
+
def reader(data, pos):
|
|
61
|
+
result = int.from_bytes(
|
|
62
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
63
|
+
result >>= offset_extra
|
|
64
|
+
result |= data[pos + end_byte] << extra_shift
|
|
65
|
+
return result & mask
|
|
66
|
+
elif offset_extra:
|
|
67
|
+
def reader(data, pos):
|
|
68
|
+
return (int.from_bytes(
|
|
69
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
70
|
+
byteorder="little") >> offset_extra) & mask
|
|
71
|
+
else:
|
|
72
|
+
def reader(data, pos):
|
|
73
|
+
return int.from_bytes(
|
|
74
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
75
|
+
byteorder="little") & mask
|
|
76
|
+
return reader
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
80
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
81
|
+
|
|
82
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
83
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
84
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
85
|
+
:param is_signed: whether to sign-extend the result
|
|
86
|
+
:return: numpy array of field values
|
|
87
|
+
"""
|
|
88
|
+
if field_width_bits == 1:
|
|
89
|
+
byte_idx = field_offset_bits // 8
|
|
90
|
+
bit_idx = field_offset_bits % 8
|
|
91
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
92
|
+
np.uint64(1))
|
|
93
|
+
|
|
94
|
+
byte_start = field_offset_bits // 8
|
|
95
|
+
bit_shift = field_offset_bits % 8
|
|
96
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
97
|
+
|
|
98
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
99
|
+
# then broadcast back to the array.
|
|
100
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
101
|
+
for b in range(min(bytes_needed, 8)):
|
|
102
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
103
|
+
result >>= np.uint64(bit_shift)
|
|
104
|
+
|
|
105
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
106
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
107
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
108
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
109
|
+
result |= extra << np.uint64(bits_so_far)
|
|
110
|
+
|
|
111
|
+
if field_width_bits < 64:
|
|
112
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
113
|
+
|
|
114
|
+
if is_signed:
|
|
115
|
+
if field_width_bits == 64:
|
|
116
|
+
return result.view(np.int64)
|
|
117
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
118
|
+
offset = -(1 << field_width_bits)
|
|
119
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
120
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def read_value(data, offset_bits, num_bits, is_signed):
|
|
126
|
+
"""Read a bit-packed value from data at the given bit offset.
|
|
127
|
+
|
|
128
|
+
This is a convenience wrapper around :func:`make_field_reader` for one-off
|
|
129
|
+
reads. For repeated reads of the same field, prefer building a reader once
|
|
130
|
+
with ``make_field_reader`` and reusing it.
|
|
131
|
+
"""
|
|
132
|
+
reader = make_field_reader(offset_bits, num_bits, is_signed)
|
|
133
|
+
return reader(data, 0)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
137
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
138
|
+
|
|
139
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
140
|
+
total_bytes = (num_bits + 7) // 8
|
|
141
|
+
|
|
142
|
+
if num_bits == 1:
|
|
143
|
+
if value == 1:
|
|
144
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
145
|
+
else:
|
|
146
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
mask = (1 << num_bits) - 1
|
|
150
|
+
value <<= offset_extra_bits
|
|
151
|
+
value &= mask << offset_extra_bits
|
|
152
|
+
value_in_little_endian = value.to_bytes(total_bytes + 1, byteorder="little", signed=is_signed)
|
|
153
|
+
surrounding_bits = data[offset_bytes] & ((1 << offset_bits) - 1)
|
|
154
|
+
|
|
155
|
+
byte_idx = 0
|
|
156
|
+
data[offset_bytes] = value_in_little_endian[byte_idx]
|
|
157
|
+
data[offset_bytes] |= surrounding_bits
|
|
158
|
+
|
|
159
|
+
byte_idx += 1
|
|
160
|
+
while byte_idx < total_bytes:
|
|
161
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx]
|
|
162
|
+
byte_idx += 1
|
|
163
|
+
|
|
164
|
+
bits_written = total_bytes * 8 - offset_extra_bits
|
|
165
|
+
if bits_written < num_bits:
|
|
166
|
+
surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
|
|
167
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
|
|
168
|
+
data[offset_bytes + byte_idx] |= surrounding_bits
|
|
@@ -8,7 +8,7 @@ import json
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
|
-
from .data_access import read_value
|
|
11
|
+
from .data_access import read_value, read_field_vectorized
|
|
12
12
|
from .errors import CorruptResourceError
|
|
13
13
|
|
|
14
14
|
SIZE_OFFSET_IN_BITS = 64
|
|
@@ -24,6 +24,7 @@ class ResourceBase:
|
|
|
24
24
|
self._element_type = element_type
|
|
25
25
|
self._element_types = [element_type]
|
|
26
26
|
self._type_size_in_bytes = self._element_type._SIZE_IN_BYTES if self._element_type else 1
|
|
27
|
+
self._raw_numpy_2d = None
|
|
27
28
|
|
|
28
29
|
def size_in_bytes(self):
|
|
29
30
|
return len(self._mem)
|
|
@@ -35,6 +36,20 @@ class ResourceBase:
|
|
|
35
36
|
offset = self._item_offset(index)
|
|
36
37
|
return self._element_type(self._mem, offset)
|
|
37
38
|
|
|
39
|
+
def _as_numpy_2d(self):
|
|
40
|
+
"""Return the raw data as a 2D numpy uint8 array of shape (n, struct_size).
|
|
41
|
+
Zero-copy via np.frombuffer on the mmap'd memory. Cached after first call.
|
|
42
|
+
"""
|
|
43
|
+
if self._raw_numpy_2d is None:
|
|
44
|
+
n = len(self)
|
|
45
|
+
struct_size = self._type_size_in_bytes
|
|
46
|
+
raw = np.frombuffer(
|
|
47
|
+
self._mem[SIZE_OFFSET_IN_BYTES:SIZE_OFFSET_IN_BYTES + n * struct_size],
|
|
48
|
+
dtype=np.uint8,
|
|
49
|
+
)
|
|
50
|
+
self._raw_numpy_2d = raw.reshape(n, struct_size)
|
|
51
|
+
return self._raw_numpy_2d
|
|
52
|
+
|
|
38
53
|
def _repr_attributes(self):
|
|
39
54
|
return {
|
|
40
55
|
"container_type": self.__class__.__name__,
|
|
@@ -60,14 +75,18 @@ class _VectorSlice:
|
|
|
60
75
|
self._sequence = sequence
|
|
61
76
|
|
|
62
77
|
def to_numpy(self, limit=None):
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
78
|
+
raw_2d = self._sequence._as_numpy_2d()
|
|
79
|
+
sliced = raw_2d[self._slice]
|
|
80
|
+
if limit is not None:
|
|
81
|
+
sliced = sliced[:limit]
|
|
82
|
+
|
|
83
|
+
fields = self._sequence._element_type._FIELDS
|
|
84
|
+
dtype = self._sequence._element_type.dtype()
|
|
85
|
+
result = np.empty(sliced.shape[0], dtype=dtype)
|
|
86
|
+
for name, field in fields.items():
|
|
87
|
+
result[name] = read_field_vectorized(
|
|
88
|
+
sliced, field.offset, field.width, field.is_signed
|
|
89
|
+
)
|
|
71
90
|
return result
|
|
72
91
|
|
|
73
92
|
def to_data_frame(self, limit=None):
|
|
@@ -78,7 +97,13 @@ class _VectorSlice:
|
|
|
78
97
|
yield self._sequence[i]
|
|
79
98
|
|
|
80
99
|
def __getattr__(self, name):
|
|
81
|
-
|
|
100
|
+
try:
|
|
101
|
+
field = self._sequence._element_type._FIELDS[name]
|
|
102
|
+
except KeyError:
|
|
103
|
+
raise AttributeError("Field %s not found in structure" % name)
|
|
104
|
+
raw_2d = self._sequence._as_numpy_2d()[self._slice]
|
|
105
|
+
values = read_field_vectorized(raw_2d, field.offset, field.width, field.is_signed)
|
|
106
|
+
return pd.DataFrame(data=values, columns=[name])
|
|
82
107
|
|
|
83
108
|
def __repr__(self):
|
|
84
109
|
return "Displaying first 100 records:\n" + self.to_data_frame(limit=100).__repr__()
|
|
@@ -92,8 +117,20 @@ class Vector(ResourceBase):
|
|
|
92
117
|
assert rem == 0, "Malformed vector"
|
|
93
118
|
self._size = size
|
|
94
119
|
|
|
120
|
+
def to_numpy(self):
|
|
121
|
+
"""Convert entire vector to a numpy structured array (vectorized)."""
|
|
122
|
+
raw_2d = self._as_numpy_2d()
|
|
123
|
+
fields = self._element_type._FIELDS
|
|
124
|
+
dtype = self._element_type.dtype()
|
|
125
|
+
result = np.empty(self._size, dtype=dtype)
|
|
126
|
+
for name, field in fields.items():
|
|
127
|
+
result[name] = read_field_vectorized(
|
|
128
|
+
raw_2d, field.offset, field.width, field.is_signed
|
|
129
|
+
)
|
|
130
|
+
return result
|
|
131
|
+
|
|
95
132
|
def to_data_frame(self):
|
|
96
|
-
return self
|
|
133
|
+
return pd.DataFrame(data=self.to_numpy())
|
|
97
134
|
|
|
98
135
|
def __getitem__(self, index):
|
|
99
136
|
if isinstance(index, slice):
|
|
@@ -106,11 +143,20 @@ class Vector(ResourceBase):
|
|
|
106
143
|
return self._get_item(index)
|
|
107
144
|
|
|
108
145
|
def __iter__(self):
|
|
109
|
-
|
|
110
|
-
|
|
146
|
+
mem = self._mem
|
|
147
|
+
element_type = self._element_type
|
|
148
|
+
size_bytes = self._type_size_in_bytes
|
|
149
|
+
for i in range(self._size):
|
|
150
|
+
yield element_type(mem, SIZE_OFFSET_IN_BYTES + size_bytes * i)
|
|
111
151
|
|
|
112
152
|
def __getattr__(self, name):
|
|
113
|
-
|
|
153
|
+
try:
|
|
154
|
+
field = self._element_type._FIELDS[name]
|
|
155
|
+
except KeyError:
|
|
156
|
+
raise AttributeError("Field %s not found in structure" % name)
|
|
157
|
+
raw_2d = self._as_numpy_2d()
|
|
158
|
+
values = read_field_vectorized(raw_2d, field.offset, field.width, field.is_signed)
|
|
159
|
+
return pd.DataFrame(data=values, columns=[name])
|
|
114
160
|
|
|
115
161
|
def __len__(self):
|
|
116
162
|
return self._size
|
|
@@ -2,26 +2,33 @@ from collections import namedtuple
|
|
|
2
2
|
import json
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
from .data_access import
|
|
5
|
+
from .data_access import make_field_reader
|
|
6
6
|
|
|
7
7
|
FieldSignature = namedtuple(
|
|
8
8
|
"FieldSignature", ["offset", "width", "is_signed", "dtype"])
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class Structure:
|
|
12
|
+
__slots__ = ('_mem', '_pos')
|
|
13
|
+
_READERS = {}
|
|
14
|
+
|
|
15
|
+
def __init_subclass__(cls, **kwargs):
|
|
16
|
+
super().__init_subclass__(**kwargs)
|
|
17
|
+
fields = cls.__dict__.get('_FIELDS')
|
|
18
|
+
if fields is not None:
|
|
19
|
+
cls._READERS = {name: make_field_reader(f.offset, f.width, f.is_signed)
|
|
20
|
+
for name, f in fields.items()}
|
|
21
|
+
|
|
12
22
|
def __init__(self, mem, pos):
|
|
13
23
|
self._mem = mem
|
|
14
24
|
self._pos = pos
|
|
15
25
|
|
|
16
26
|
def __getattr__(self, name):
|
|
17
27
|
try:
|
|
18
|
-
|
|
28
|
+
reader = self._READERS[name]
|
|
19
29
|
except KeyError:
|
|
20
30
|
raise AttributeError("Field %s not found in structure" % name)
|
|
21
|
-
return self.
|
|
22
|
-
|
|
23
|
-
def _get_value(self, field):
|
|
24
|
-
return read_value(self._mem, self._pos * 8 + field.offset, field.width, field.is_signed)
|
|
31
|
+
return reader(self._mem, self._pos)
|
|
25
32
|
|
|
26
33
|
def __dir__(self):
|
|
27
34
|
return self._FIELD_KEYS
|
|
@@ -31,20 +38,24 @@ class Structure:
|
|
|
31
38
|
yield getattr(self, name)
|
|
32
39
|
|
|
33
40
|
def as_dict(self):
|
|
34
|
-
|
|
41
|
+
mem, pos = self._mem, self._pos
|
|
42
|
+
return {name: reader(mem, pos) for name, reader in self._READERS.items()}
|
|
35
43
|
|
|
36
44
|
def as_list(self):
|
|
37
|
-
|
|
45
|
+
mem, pos = self._mem, self._pos
|
|
46
|
+
return [reader(mem, pos) for reader in self._READERS.values()]
|
|
38
47
|
|
|
39
48
|
def as_tuple(self):
|
|
40
|
-
|
|
49
|
+
mem, pos = self._mem, self._pos
|
|
50
|
+
return tuple(reader(mem, pos) for reader in self._READERS.values())
|
|
41
51
|
|
|
42
52
|
@classmethod
|
|
43
53
|
def dtype(cls):
|
|
44
54
|
return [(name, np.dtype(field.dtype)) for name, field in cls._FIELDS.items()]
|
|
45
55
|
|
|
46
56
|
def as_nparray(self):
|
|
47
|
-
|
|
57
|
+
mem, pos = self._mem, self._pos
|
|
58
|
+
return np.array([tuple(reader(mem, pos) for reader in self._READERS.values())],
|
|
48
59
|
dtype=self.dtype())
|
|
49
60
|
|
|
50
61
|
def schema(self):
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from collections import namedtuple
|
|
2
|
+
import json
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .data_access import make_field_reader
|
|
6
|
+
|
|
7
|
+
FieldSignature = namedtuple(
|
|
8
|
+
"FieldSignature", ["offset", "width", "is_signed", "dtype"])
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Structure:
|
|
12
|
+
<<<<<<< HEAD
|
|
13
|
+
=======
|
|
14
|
+
__slots__ = ('_mem', '_pos')
|
|
15
|
+
_READERS = {}
|
|
16
|
+
|
|
17
|
+
def __init_subclass__(cls, **kwargs):
|
|
18
|
+
super().__init_subclass__(**kwargs)
|
|
19
|
+
fields = cls.__dict__.get('_FIELDS')
|
|
20
|
+
if fields is not None:
|
|
21
|
+
cls._READERS = {name: make_field_reader(f.offset, f.width, f.is_signed)
|
|
22
|
+
for name, f in fields.items()}
|
|
23
|
+
|
|
24
|
+
>>>>>>> e486615 (Also improve scalar readers by caching)
|
|
25
|
+
def __init__(self, mem, pos):
|
|
26
|
+
self._mem = mem
|
|
27
|
+
self._pos = pos
|
|
28
|
+
|
|
29
|
+
def __getattr__(self, name):
|
|
30
|
+
try:
|
|
31
|
+
reader = self._READERS[name]
|
|
32
|
+
except KeyError:
|
|
33
|
+
raise AttributeError("Field %s not found in structure" % name)
|
|
34
|
+
return reader(self._mem, self._pos)
|
|
35
|
+
|
|
36
|
+
def __dir__(self):
|
|
37
|
+
return self._FIELD_KEYS
|
|
38
|
+
|
|
39
|
+
def __iter__(self):
|
|
40
|
+
for name in self._FIELD_KEYS:
|
|
41
|
+
yield getattr(self, name)
|
|
42
|
+
|
|
43
|
+
def as_dict(self):
|
|
44
|
+
mem, pos = self._mem, self._pos
|
|
45
|
+
return {name: reader(mem, pos) for name, reader in self._READERS.items()}
|
|
46
|
+
|
|
47
|
+
def as_list(self):
|
|
48
|
+
mem, pos = self._mem, self._pos
|
|
49
|
+
return [reader(mem, pos) for reader in self._READERS.values()]
|
|
50
|
+
|
|
51
|
+
def as_tuple(self):
|
|
52
|
+
mem, pos = self._mem, self._pos
|
|
53
|
+
return tuple(reader(mem, pos) for reader in self._READERS.values())
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def dtype(cls):
|
|
57
|
+
return [(name, np.dtype(field.dtype)) for name, field in cls._FIELDS.items()]
|
|
58
|
+
|
|
59
|
+
def as_nparray(self):
|
|
60
|
+
mem, pos = self._mem, self._pos
|
|
61
|
+
return np.array([tuple(reader(mem, pos) for reader in self._READERS.values())],
|
|
62
|
+
dtype=self.dtype())
|
|
63
|
+
|
|
64
|
+
def schema(self):
|
|
65
|
+
return self._SCHEMA
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def _repr_attributes(cls):
|
|
69
|
+
return {
|
|
70
|
+
"name": cls.__name__,
|
|
71
|
+
"doc": cls.__doc__,
|
|
72
|
+
"attributes": [
|
|
73
|
+
{
|
|
74
|
+
"name": name,
|
|
75
|
+
"offset": signature.offset,
|
|
76
|
+
"width": signature.width,
|
|
77
|
+
"is_signed": signature.is_signed
|
|
78
|
+
}
|
|
79
|
+
for name, signature in cls._FIELDS.items()]
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def __repr__(cls):
|
|
84
|
+
return json.dumps(cls._repr_attributes())
|
|
85
|
+
|
|
86
|
+
def __repr__(self):
|
|
87
|
+
return json.dumps({
|
|
88
|
+
"name": self.__class__.__name__,
|
|
89
|
+
"attributes":
|
|
90
|
+
{name: getattr(self, name)
|
|
91
|
+
for name, signature in self._FIELDS.items()}
|
|
92
|
+
}, indent=4)
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "flatdata-py"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.11"
|
|
8
8
|
description = "Python 3 implementation of Flatdata"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
@@ -16,7 +16,7 @@ classifiers = [
|
|
|
16
16
|
"Programming Language :: Python :: 3",
|
|
17
17
|
]
|
|
18
18
|
dependencies = [
|
|
19
|
-
"flatdata-generator==0.4.
|
|
19
|
+
"flatdata-generator==0.4.11",
|
|
20
20
|
"numpy",
|
|
21
21
|
"pandas",
|
|
22
22
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|