flatdata-py 0.4.10__tar.gz → 0.4.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/.gitignore +0 -4
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/PKG-INFO +34 -2
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/README.md +31 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/archive.py +38 -25
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/archive_builder.py +45 -26
- flatdata_py-0.4.12/flatdata/lib/data_access.py +177 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/errors.py +15 -13
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/file_resource_storage.py +7 -5
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/file_resource_writer.py +12 -6
- flatdata_py-0.4.12/flatdata/lib/flatdata_writer.py +106 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/inspector.py +14 -7
- flatdata_py-0.4.12/flatdata/lib/py.typed +0 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/resource_storage.py +42 -20
- flatdata_py-0.4.12/flatdata/lib/resources.py +305 -0
- flatdata_py-0.4.12/flatdata/lib/structure.py +99 -0
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/tar_archive_resource_storage.py +7 -5
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/writer.py +2 -5
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/pyproject.toml +21 -2
- flatdata_py-0.4.10/flatdata/lib/data_access.py +0 -64
- flatdata_py-0.4.10/flatdata/lib/flatdata_writer.py +0 -72
- flatdata_py-0.4.10/flatdata/lib/resources.py +0 -239
- flatdata_py-0.4.10/flatdata/lib/structure.py +0 -78
- {flatdata_py-0.4.10 → flatdata_py-0.4.12}/flatdata/lib/__init__.py +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flatdata-py
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.12
|
|
4
4
|
Summary: Python 3 implementation of Flatdata
|
|
5
5
|
Project-URL: Homepage, https://github.com/heremaps/flatdata
|
|
6
6
|
Author: Flatdata Developers
|
|
7
7
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Requires-
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Requires-Dist: flatdata-generator==0.4.12
|
|
11
12
|
Requires-Dist: numpy
|
|
12
13
|
Requires-Dist: pandas
|
|
13
14
|
Provides-Extra: inspector
|
|
@@ -34,6 +35,37 @@ Once you have [created a flatdata schema file](../README.md#creating-a-schema),
|
|
|
34
35
|
flatdata-generator --gen py --schema locations.flatdata --output-file locations.py
|
|
35
36
|
```
|
|
36
37
|
|
|
38
|
+
## Performance tips
|
|
39
|
+
|
|
40
|
+
`flatdata-py` supports two data access patterns with very different performance characteristics on large archives.
|
|
41
|
+
|
|
42
|
+
Iterating over a vector yields one Python object per element. Each field access unpacks bits from the underlying memory-mapped data. This is fine for accessing individual elements or small ranges, but has significant per-element overhead for bulk operations:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
count = sum(1 for x in archive.links if x.speed_limit > 100)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
For bulk operations, use the vectorized access methods that read fields directly into NumPy arrays:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
# single column access, returns a pandas DataFrame
|
|
52
|
+
df = archive.links.speed_limit
|
|
53
|
+
count = len(df[df['speed_limit'] > 100])
|
|
54
|
+
|
|
55
|
+
# full NumPy structured array with all fields
|
|
56
|
+
arr = archive.links.to_numpy()
|
|
57
|
+
count = int(np.sum(arr['speed_limit'] > 100))
|
|
58
|
+
|
|
59
|
+
# slices work too
|
|
60
|
+
arr = archive.links[1000:2000].to_numpy()
|
|
61
|
+
df = archive.links[::10].to_data_frame()
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
* Use `vector.field_name` (column access) when you only need one or a few fields.
|
|
65
|
+
* Use `vector.to_numpy()` or `vector.to_data_frame()` when you need all fields at once.
|
|
66
|
+
* Use `vector[i].field` for random access to individual elements.
|
|
67
|
+
* The underlying data is memory-mapped; the OS pages it from disk on demand. Vectorized results are materialized as NumPy arrays in RAM.
|
|
68
|
+
|
|
37
69
|
## Using the inspector
|
|
38
70
|
|
|
39
71
|
`flatdata-py` comes with a handy tool called the `flatdata-inspector` to inspect the contents of an archive:
|
|
@@ -18,6 +18,37 @@ Once you have [created a flatdata schema file](../README.md#creating-a-schema),
|
|
|
18
18
|
flatdata-generator --gen py --schema locations.flatdata --output-file locations.py
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
+
## Performance tips
|
|
22
|
+
|
|
23
|
+
`flatdata-py` supports two data access patterns with very different performance characteristics on large archives.
|
|
24
|
+
|
|
25
|
+
Iterating over a vector yields one Python object per element. Each field access unpacks bits from the underlying memory-mapped data. This is fine for accessing individual elements or small ranges, but has significant per-element overhead for bulk operations:
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
count = sum(1 for x in archive.links if x.speed_limit > 100)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
For bulk operations, use the vectorized access methods that read fields directly into NumPy arrays:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
# single column access, returns a pandas DataFrame
|
|
35
|
+
df = archive.links.speed_limit
|
|
36
|
+
count = len(df[df['speed_limit'] > 100])
|
|
37
|
+
|
|
38
|
+
# full NumPy structured array with all fields
|
|
39
|
+
arr = archive.links.to_numpy()
|
|
40
|
+
count = int(np.sum(arr['speed_limit'] > 100))
|
|
41
|
+
|
|
42
|
+
# slices work too
|
|
43
|
+
arr = archive.links[1000:2000].to_numpy()
|
|
44
|
+
df = archive.links[::10].to_data_frame()
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
* Use `vector.field_name` (column access) when you only need one or a few fields.
|
|
48
|
+
* Use `vector.to_numpy()` or `vector.to_data_frame()` when you need all fields at once.
|
|
49
|
+
* Use `vector[i].field` for random access to individual elements.
|
|
50
|
+
* The underlying data is memory-mapped; the OS pages it from disk on demand. Vectorized results are materialized as NumPy arrays in RAM.
|
|
51
|
+
|
|
21
52
|
## Using the inspector
|
|
22
53
|
|
|
23
54
|
`flatdata-py` comes with a handy tool called the `flatdata-inspector` to inspect the contents of an archive:
|
|
@@ -3,17 +3,27 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
'''
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any, NamedTuple, TYPE_CHECKING
|
|
7
9
|
|
|
8
10
|
import pandas as pd
|
|
9
11
|
|
|
10
12
|
from .errors import MissingResourceError, SchemaMismatchError
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .resources import ReadStorage, ResourceBase
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ResourceSignature(NamedTuple):
|
|
19
|
+
container: type[ResourceBase] | type[Archive]
|
|
20
|
+
initializer: Any
|
|
21
|
+
schema: str
|
|
22
|
+
is_optional: bool
|
|
23
|
+
doc: str
|
|
14
24
|
|
|
15
|
-
def _is_archive_signature(resource_signature):
|
|
16
|
-
return resource_signature.container == Archive
|
|
25
|
+
def _is_archive_signature(resource_signature: ResourceSignature) -> bool:
|
|
26
|
+
return bool(resource_signature.container == Archive)
|
|
17
27
|
|
|
18
28
|
_SCHEMA_EXT = ".schema"
|
|
19
29
|
|
|
@@ -23,35 +33,38 @@ class Archive:
|
|
|
23
33
|
Archive class. Entry point to Flatdata.
|
|
24
34
|
Provides access to flatdata resources and verifies archive/resource schemas on opening.
|
|
25
35
|
"""
|
|
36
|
+
_NAME: str
|
|
37
|
+
_SCHEMA: str
|
|
38
|
+
_RESOURCES: dict[str, ResourceSignature]
|
|
26
39
|
|
|
27
|
-
def __init__(self, resource_storage):
|
|
40
|
+
def __init__(self, resource_storage: ReadStorage) -> None:
|
|
28
41
|
"""
|
|
29
42
|
Opens archive from a given resource storage.
|
|
30
43
|
:raises flatdata.errors.CorruptArchiveError
|
|
31
44
|
:raises flatdata.errors.SchemaMismatchError
|
|
32
45
|
:param resource_storage: Resource storage to use.
|
|
33
46
|
"""
|
|
34
|
-
self._resource_storage = resource_storage
|
|
35
|
-
self._loaded_resources = {}
|
|
47
|
+
self._resource_storage: ReadStorage = resource_storage
|
|
48
|
+
self._loaded_resources: dict[str, Any] = {}
|
|
36
49
|
|
|
37
50
|
# Preload resources and check their schemas
|
|
38
51
|
for name, _ in sorted(list(self._RESOURCES.items())):
|
|
39
52
|
self.__getattr__(name)
|
|
40
53
|
|
|
41
|
-
def __getattr__(self, name):
|
|
42
|
-
if name not in
|
|
54
|
+
def __getattr__(self, name: str) -> Any:
|
|
55
|
+
if name not in self._RESOURCES:
|
|
43
56
|
raise AttributeError("Resource %s not defined in archive." % name)
|
|
44
|
-
if name not in
|
|
57
|
+
if name not in self._loaded_resources:
|
|
45
58
|
self._loaded_resources[name] = self._open_resource(name)
|
|
46
59
|
return self._loaded_resources[name]
|
|
47
60
|
|
|
48
|
-
def __dir__(self):
|
|
61
|
+
def __dir__(self) -> list[str]:
|
|
49
62
|
return list(self._RESOURCES.keys()) + ['schema']
|
|
50
63
|
|
|
51
|
-
def __repr__(self):
|
|
52
|
-
return self.to_data_frame()
|
|
64
|
+
def __repr__(self) -> str:
|
|
65
|
+
return repr(self.to_data_frame())
|
|
53
66
|
|
|
54
|
-
def to_data_frame(self):
|
|
67
|
+
def to_data_frame(self) -> pd.DataFrame:
|
|
55
68
|
result = []
|
|
56
69
|
for name, signature in self._RESOURCES.items():
|
|
57
70
|
resource = self.__getattr__(name)
|
|
@@ -62,34 +75,34 @@ class Archive:
|
|
|
62
75
|
columns=["Name", "Type", "Optional", "SizeInBytes", "Size"])
|
|
63
76
|
|
|
64
77
|
@classmethod
|
|
65
|
-
def name(cls):
|
|
78
|
+
def name(cls) -> str:
|
|
66
79
|
return cls._NAME
|
|
67
80
|
|
|
68
81
|
@classmethod
|
|
69
|
-
def schema(cls):
|
|
82
|
+
def schema(cls) -> str:
|
|
70
83
|
return cls._SCHEMA
|
|
71
84
|
|
|
72
85
|
@classmethod
|
|
73
|
-
def resource_schema(cls, resource):
|
|
74
|
-
return cls._RESOURCES[resource].schema
|
|
86
|
+
def resource_schema(cls, resource: str) -> str:
|
|
87
|
+
return str(cls._RESOURCES[resource].schema)
|
|
75
88
|
|
|
76
89
|
@classmethod
|
|
77
|
-
def open(cls, storage, name, initializer, is_optional=False):
|
|
90
|
+
def open(cls, storage: ReadStorage, name: str, initializer: type[Archive], is_optional: bool = False) -> Archive | None:
|
|
78
91
|
nested_storage = storage.get(name, is_optional)
|
|
79
92
|
assert nested_storage is not None or is_optional
|
|
80
93
|
if nested_storage is None:
|
|
81
94
|
return None
|
|
82
95
|
return initializer(nested_storage)
|
|
83
96
|
|
|
84
|
-
def size_in_bytes(self):
|
|
97
|
+
def size_in_bytes(self) -> int:
|
|
85
98
|
return sum(resource_value.size_in_bytes() for resource_value in
|
|
86
99
|
(self.__getattr__(resource) for resource in self._RESOURCES.keys())
|
|
87
100
|
if resource_value)
|
|
88
101
|
|
|
89
|
-
def __len__(self):
|
|
102
|
+
def __len__(self) -> int:
|
|
90
103
|
return len(self._RESOURCES)
|
|
91
104
|
|
|
92
|
-
def _schema_validated_resource_signature(self, name):
|
|
105
|
+
def _schema_validated_resource_signature(self, name: str) -> ResourceSignature | None:
|
|
93
106
|
resource_signature = self._RESOURCES[name]
|
|
94
107
|
# We check only schema for non-subarchives, since the subarchives schema is checked,
|
|
95
108
|
# when it is initialized.
|
|
@@ -103,7 +116,7 @@ class Archive:
|
|
|
103
116
|
return None
|
|
104
117
|
return resource_signature
|
|
105
118
|
|
|
106
|
-
def _open_resource(self, name):
|
|
119
|
+
def _open_resource(self, name: str) -> Any:
|
|
107
120
|
resource_signature = self._schema_validated_resource_signature(name)
|
|
108
121
|
if resource_signature:
|
|
109
122
|
resource = resource_signature.container.open(storage=self._resource_storage,
|
|
@@ -116,7 +129,7 @@ class Archive:
|
|
|
116
129
|
return None
|
|
117
130
|
|
|
118
131
|
@staticmethod
|
|
119
|
-
def _check_non_subarchive_schema(name, resource_signature, storage):
|
|
132
|
+
def _check_non_subarchive_schema(name: str, resource_signature: ResourceSignature, storage: Any) -> None:
|
|
120
133
|
actual_schema = bytes(storage).decode()
|
|
121
134
|
if actual_schema != resource_signature.schema:
|
|
122
135
|
raise SchemaMismatchError(
|
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
'''
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
7
8
|
import os
|
|
9
|
+
from typing import Any, NamedTuple, Protocol, TYPE_CHECKING
|
|
8
10
|
|
|
9
11
|
from .errors import IndexWriterError, MissingFieldError, UnknownFieldError, \
|
|
10
12
|
UnknownStructureError, UnknownResourceError, ResourceAlreadySetError
|
|
@@ -12,10 +14,24 @@ from .errors import IndexWriterError, MissingFieldError, UnknownFieldError, \
|
|
|
12
14
|
from .resources import Instance, Vector, Multivector, RawData
|
|
13
15
|
from .data_access import write_value
|
|
14
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from .resource_storage import _Resource
|
|
19
|
+
from .structure import Structure
|
|
20
|
+
|
|
15
21
|
_SCHEMA_EXT = ".schema"
|
|
16
22
|
|
|
17
|
-
|
|
18
|
-
|
|
23
|
+
|
|
24
|
+
class ResourceSignature(NamedTuple):
|
|
25
|
+
container: type
|
|
26
|
+
initializer: Any
|
|
27
|
+
schema: str
|
|
28
|
+
is_optional: bool
|
|
29
|
+
doc: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class WriteStorage(Protocol):
|
|
33
|
+
def get(self, resource_name: str, is_subarchive: bool = False) -> Any: ...
|
|
34
|
+
def close(self) -> None: ...
|
|
19
35
|
|
|
20
36
|
|
|
21
37
|
class IndexWriter:
|
|
@@ -23,7 +39,7 @@ class IndexWriter:
|
|
|
23
39
|
IndexWriter class. Only applicable when multivector is present in archive schema.
|
|
24
40
|
"""
|
|
25
41
|
|
|
26
|
-
def __init__(self, name, size, resource_storage):
|
|
42
|
+
def __init__(self, name: str, size: int, resource_storage: WriteStorage) -> None:
|
|
27
43
|
"""
|
|
28
44
|
Create IndexWriter class.
|
|
29
45
|
|
|
@@ -36,9 +52,9 @@ class IndexWriter:
|
|
|
36
52
|
|
|
37
53
|
self._name = name
|
|
38
54
|
self._index_size = size
|
|
39
|
-
self._fout = resource_storage.get(f'{self._name}_index', False)
|
|
55
|
+
self._fout: _Resource = resource_storage.get(f'{self._name}_index', False)
|
|
40
56
|
|
|
41
|
-
def add(self, index):
|
|
57
|
+
def add(self, index: int) -> None:
|
|
42
58
|
"""
|
|
43
59
|
Convert index(number) to bytearray and add to in memory store
|
|
44
60
|
"""
|
|
@@ -46,7 +62,7 @@ class IndexWriter:
|
|
|
46
62
|
byteorder="little", signed=False)
|
|
47
63
|
self._fout.write(index_bytes)
|
|
48
64
|
|
|
49
|
-
def finish(self):
|
|
65
|
+
def finish(self) -> None:
|
|
50
66
|
"""
|
|
51
67
|
Complete index resource by adding size and padding followed by writing to file
|
|
52
68
|
"""
|
|
@@ -60,30 +76,33 @@ class ArchiveBuilder:
|
|
|
60
76
|
ArchiveBuilder class. Entry point to writing Flatdata.
|
|
61
77
|
Provides methods to create flatdata archives.
|
|
62
78
|
"""
|
|
79
|
+
_NAME: str
|
|
80
|
+
_SCHEMA: str
|
|
81
|
+
_RESOURCES: dict[str, ResourceSignature]
|
|
63
82
|
|
|
64
|
-
def __init__(self, resource_storage, path=""):
|
|
83
|
+
def __init__(self, resource_storage: WriteStorage, path: str = "") -> None:
|
|
65
84
|
"""
|
|
66
85
|
Opens archive from a given resource writer.
|
|
67
86
|
:param resource_storage: storage manager to store and write to disc
|
|
68
87
|
:param path: file path where archive is created
|
|
69
88
|
"""
|
|
70
89
|
self._path = os.path.join(path, self._NAME)
|
|
71
|
-
self._resource_storage = resource_storage
|
|
90
|
+
self._resource_storage: WriteStorage = resource_storage
|
|
72
91
|
self._write_archive_signature()
|
|
73
92
|
self._write_archive_schema()
|
|
74
93
|
self._resources_written = [f"{self._NAME}.archive"]
|
|
75
94
|
|
|
76
95
|
@classmethod
|
|
77
|
-
def name(cls):
|
|
96
|
+
def name(cls) -> str:
|
|
78
97
|
'''Returns archive name'''
|
|
79
98
|
return cls._NAME
|
|
80
99
|
|
|
81
100
|
@classmethod
|
|
82
|
-
def schema(cls):
|
|
101
|
+
def schema(cls) -> str:
|
|
83
102
|
'''Returns archive schema'''
|
|
84
103
|
return cls._SCHEMA
|
|
85
104
|
|
|
86
|
-
def _write_raw_data(self, name, data):
|
|
105
|
+
def _write_raw_data(self, name: str, data: bytes | bytearray) -> None:
|
|
87
106
|
'''
|
|
88
107
|
Helper function to write data
|
|
89
108
|
|
|
@@ -94,7 +113,7 @@ class ArchiveBuilder:
|
|
|
94
113
|
storage.write(data)
|
|
95
114
|
storage.close()
|
|
96
115
|
|
|
97
|
-
def _write_schema(self, name):
|
|
116
|
+
def _write_schema(self, name: str) -> None:
|
|
98
117
|
'''
|
|
99
118
|
Writes resource schema
|
|
100
119
|
|
|
@@ -103,29 +122,29 @@ class ArchiveBuilder:
|
|
|
103
122
|
self._write_raw_data(f"{name}.schema", bytes(
|
|
104
123
|
self._RESOURCES[name].schema, 'utf-8'))
|
|
105
124
|
|
|
106
|
-
def _write_archive_signature(self):
|
|
125
|
+
def _write_archive_signature(self) -> None:
|
|
107
126
|
'''Writes archive's signature'''
|
|
108
127
|
self._write_raw_data(f"{self._NAME}.archive", b'\x00' * 16)
|
|
109
128
|
|
|
110
|
-
def _write_archive_schema(self):
|
|
129
|
+
def _write_archive_schema(self) -> None:
|
|
111
130
|
'''Writes archive schema'''
|
|
112
131
|
self._write_raw_data(
|
|
113
132
|
f"{self._NAME}.archive.schema", bytes(self._SCHEMA, 'utf-8'))
|
|
114
133
|
|
|
115
|
-
def _write_index_schema(self, resource_name, schema):
|
|
134
|
+
def _write_index_schema(self, resource_name: str, schema: str) -> None:
|
|
116
135
|
self._write_raw_data(
|
|
117
136
|
f"{resource_name}_index.schema", bytes(schema, 'utf-8'))
|
|
118
137
|
|
|
119
|
-
def subarchive(self, name):
|
|
138
|
+
def subarchive(self, name: str) -> 'ArchiveBuilder':
|
|
120
139
|
"""
|
|
121
140
|
Returns an archive builder for the sub-archive `name`.
|
|
122
141
|
:raises $name_not_subarchive_error
|
|
123
142
|
:param name: name of the sub-archive
|
|
124
143
|
"""
|
|
125
|
-
|
|
144
|
+
raise NotImplementedError(f"subarchive '{name}' is not implemented")
|
|
126
145
|
|
|
127
146
|
@classmethod
|
|
128
|
-
def __validate_structure_fields(cls, name, struct, initializer):
|
|
147
|
+
def __validate_structure_fields(cls, name: str, struct: dict[str, Any], initializer: type[Structure]) -> None:
|
|
129
148
|
'''
|
|
130
149
|
Validates whether passed object has all required fields
|
|
131
150
|
|
|
@@ -142,7 +161,7 @@ class ArchiveBuilder:
|
|
|
142
161
|
if key not in initializer._FIELD_KEYS:
|
|
143
162
|
raise UnknownFieldError(key, name)
|
|
144
163
|
|
|
145
|
-
def __set_instance(self, storage, name, value):
|
|
164
|
+
def __set_instance(self, storage: _Resource, name: str, value: dict[str, Any]) -> None:
|
|
146
165
|
'''
|
|
147
166
|
Creates and writes instance type resource
|
|
148
167
|
|
|
@@ -160,7 +179,7 @@ class ArchiveBuilder:
|
|
|
160
179
|
|
|
161
180
|
storage.write(bout)
|
|
162
181
|
|
|
163
|
-
def __set_vector(self, storage, name, vector):
|
|
182
|
+
def __set_vector(self, storage: _Resource, name: str, vector: list[dict[str, Any]]) -> None:
|
|
164
183
|
'''
|
|
165
184
|
Creates and writes vector resource
|
|
166
185
|
|
|
@@ -179,7 +198,7 @@ class ArchiveBuilder:
|
|
|
179
198
|
field.is_signed, value[key])
|
|
180
199
|
storage.write(bout)
|
|
181
200
|
|
|
182
|
-
def __set_multivector(self, storage, name, value):
|
|
201
|
+
def __set_multivector(self, storage: _Resource, name: str, value: list[list[dict[str, Any]]]) -> None:
|
|
183
202
|
'''
|
|
184
203
|
Creates and writes multivector resource
|
|
185
204
|
|
|
@@ -193,10 +212,10 @@ class ArchiveBuilder:
|
|
|
193
212
|
for index, obj_type in enumerate(initializer_list[1:]):
|
|
194
213
|
initializers[obj_type._NAME] = (index, obj_type)
|
|
195
214
|
|
|
196
|
-
def valid_structure_name(_obj):
|
|
215
|
+
def valid_structure_name(_obj: dict[str, Any]) -> bool:
|
|
197
216
|
return _obj['name'] in [_initializer._NAME for _initializer in initializer_list[1:]]
|
|
198
217
|
|
|
199
|
-
def validate_fields(_obj):
|
|
218
|
+
def validate_fields(_obj: dict[str, Any]) -> None:
|
|
200
219
|
matched_obj_list = [
|
|
201
220
|
_initializer for _initializer in initializer_list[1:] \
|
|
202
221
|
if _initializer._NAME == _obj['name']]
|
|
@@ -248,7 +267,7 @@ class ArchiveBuilder:
|
|
|
248
267
|
self._resources_written.append(name)
|
|
249
268
|
self._resources_written.append(f'{name}_index')
|
|
250
269
|
|
|
251
|
-
def set(self, name, value):
|
|
270
|
+
def set(self, name: str, value: Any) -> None:
|
|
252
271
|
"""
|
|
253
272
|
Write a resource for this archive at once.
|
|
254
273
|
Can only be done once. `set` and `start` can't be used for the same resource.
|
|
@@ -284,7 +303,7 @@ class ArchiveBuilder:
|
|
|
284
303
|
|
|
285
304
|
self._resources_written.append(name)
|
|
286
305
|
|
|
287
|
-
def finish(self):
|
|
306
|
+
def finish(self) -> None:
|
|
288
307
|
"""
|
|
289
308
|
Closes the storage manager
|
|
290
309
|
"""
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Copyright (c) 2017 HERE Europe B.V.
|
|
3
|
+
See the LICENSE file in the root of this project for license details.
|
|
4
|
+
'''
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import mmap
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
from numpy.typing import NDArray
|
|
14
|
+
|
|
15
|
+
ReadableBuffer = Union[bytes, bytearray, memoryview, mmap.mmap]
|
|
16
|
+
|
|
17
|
+
# Sign bits cache for the value reading.
|
|
18
|
+
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def make_field_reader(offset_bits: int, num_bits: int, is_signed: bool) -> Callable[[ReadableBuffer, int], int]:
|
|
22
|
+
"""Build a specialized closure for reading a single field from a structure.
|
|
23
|
+
|
|
24
|
+
Returns a function reader(data, pos_bytes) that reads the field value
|
|
25
|
+
from ``data`` at byte position ``pos_bytes``. All constants (byte offset,
|
|
26
|
+
bit shift, mask, sign handling) are pre-computed and captured by the
|
|
27
|
+
closure so the hot path does minimal work.
|
|
28
|
+
"""
|
|
29
|
+
offset_bytes, offset_extra = divmod(offset_bits, 8)
|
|
30
|
+
total_bytes = (num_bits + 7) // 8
|
|
31
|
+
end_byte = offset_bytes + total_bytes
|
|
32
|
+
mask = (1 << num_bits) - 1
|
|
33
|
+
needs_extra = (total_bytes * 8 - offset_extra) < num_bits
|
|
34
|
+
extra_shift = total_bytes * 8 - offset_extra
|
|
35
|
+
|
|
36
|
+
if num_bits == 1:
|
|
37
|
+
bit_mask = 1 << offset_extra
|
|
38
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
39
|
+
return int((data[pos + offset_bytes] & bit_mask) != 0)
|
|
40
|
+
return reader
|
|
41
|
+
|
|
42
|
+
if is_signed:
|
|
43
|
+
sign_bit = _SIGN_BITS[num_bits]
|
|
44
|
+
sign_mask = sign_bit - 1
|
|
45
|
+
if needs_extra:
|
|
46
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
47
|
+
result = int.from_bytes(
|
|
48
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
49
|
+
result >>= offset_extra
|
|
50
|
+
result |= data[pos + end_byte] << extra_shift
|
|
51
|
+
result &= mask
|
|
52
|
+
return int((result & sign_mask) - (result & sign_bit))
|
|
53
|
+
elif offset_extra:
|
|
54
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
55
|
+
result = (int.from_bytes(
|
|
56
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
57
|
+
byteorder="little") >> offset_extra) & mask
|
|
58
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
59
|
+
else:
|
|
60
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
61
|
+
result = int.from_bytes(
|
|
62
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
63
|
+
byteorder="little") & mask
|
|
64
|
+
return (result & sign_mask) - (result & sign_bit)
|
|
65
|
+
return reader
|
|
66
|
+
|
|
67
|
+
# Unsigned paths
|
|
68
|
+
if needs_extra:
|
|
69
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
70
|
+
result = int.from_bytes(
|
|
71
|
+
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
72
|
+
result >>= offset_extra
|
|
73
|
+
result |= data[pos + end_byte] << extra_shift
|
|
74
|
+
return int(result & mask)
|
|
75
|
+
elif offset_extra:
|
|
76
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
77
|
+
return (int.from_bytes(
|
|
78
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
79
|
+
byteorder="little") >> offset_extra) & mask
|
|
80
|
+
else:
|
|
81
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
82
|
+
return int.from_bytes(
|
|
83
|
+
data[pos + offset_bytes: pos + end_byte],
|
|
84
|
+
byteorder="little") & mask
|
|
85
|
+
return reader
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read_field_vectorized(raw_bytes_2d: NDArray[np.uint8], field_offset_bits: int, field_width_bits: int, is_signed: bool) -> NDArray[np.uint64] | NDArray[np.int64]:
|
|
89
|
+
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
90
|
+
|
|
91
|
+
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
92
|
+
:param field_offset_bits: bit offset of the field within each element
|
|
93
|
+
:param field_width_bits: width of the field in bits (max 64)
|
|
94
|
+
:param is_signed: whether to sign-extend the result
|
|
95
|
+
:return: numpy array of field values
|
|
96
|
+
"""
|
|
97
|
+
if field_width_bits == 1:
|
|
98
|
+
byte_idx = field_offset_bits // 8
|
|
99
|
+
bit_idx = field_offset_bits % 8
|
|
100
|
+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
|
|
101
|
+
np.uint64(1))
|
|
102
|
+
|
|
103
|
+
byte_start = field_offset_bits // 8
|
|
104
|
+
bit_shift = field_offset_bits % 8
|
|
105
|
+
bytes_needed = (bit_shift + field_width_bits + 7) // 8
|
|
106
|
+
|
|
107
|
+
# Use Python int arithmetic for the shift to avoid numpy overflow,
|
|
108
|
+
# then broadcast back to the array.
|
|
109
|
+
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
|
|
110
|
+
for b in range(min(bytes_needed, 8)):
|
|
111
|
+
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
|
|
112
|
+
result >>= np.uint64(bit_shift)
|
|
113
|
+
|
|
114
|
+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
|
|
115
|
+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
|
|
116
|
+
if bits_so_far < field_width_bits and bytes_needed > 8:
|
|
117
|
+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
|
|
118
|
+
result |= extra << np.uint64(bits_so_far)
|
|
119
|
+
|
|
120
|
+
if field_width_bits < 64:
|
|
121
|
+
result &= np.uint64((1 << field_width_bits) - 1)
|
|
122
|
+
|
|
123
|
+
if is_signed:
|
|
124
|
+
if field_width_bits == 64:
|
|
125
|
+
return result.view(np.int64)
|
|
126
|
+
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
127
|
+
offset = -(1 << field_width_bits)
|
|
128
|
+
signed = result.astype(np.int64) + np.int64(offset)
|
|
129
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64)) # type: ignore[assignment, unused-ignore]
|
|
130
|
+
|
|
131
|
+
return result
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def read_value(data: ReadableBuffer, offset_bits: int, num_bits: int, is_signed: bool) -> int:
|
|
135
|
+
"""Read a bit-packed value from data at the given bit offset.
|
|
136
|
+
|
|
137
|
+
This is a convenience wrapper around :func:`make_field_reader` for one-off
|
|
138
|
+
reads. For repeated reads of the same field, prefer building a reader once
|
|
139
|
+
with ``make_field_reader`` and reusing it.
|
|
140
|
+
"""
|
|
141
|
+
reader = make_field_reader(offset_bits, num_bits, is_signed)
|
|
142
|
+
return reader(data, 0)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def write_value(data: bytearray, offset_bits: int, num_bits: int, is_signed: bool, value: int) -> None:
|
|
146
|
+
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
147
|
+
|
|
148
|
+
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
149
|
+
total_bytes = (num_bits + 7) // 8
|
|
150
|
+
|
|
151
|
+
if num_bits == 1:
|
|
152
|
+
if value == 1:
|
|
153
|
+
data[offset_bytes] |= 1 << offset_extra_bits
|
|
154
|
+
else:
|
|
155
|
+
data[offset_bytes] &= ~(1 << offset_extra_bits)
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
mask = (1 << num_bits) - 1
|
|
159
|
+
value <<= offset_extra_bits
|
|
160
|
+
value &= mask << offset_extra_bits
|
|
161
|
+
value_in_little_endian = value.to_bytes(total_bytes + 1, byteorder="little", signed=is_signed)
|
|
162
|
+
surrounding_bits = data[offset_bytes] & ((1 << offset_bits) - 1)
|
|
163
|
+
|
|
164
|
+
byte_idx = 0
|
|
165
|
+
data[offset_bytes] = value_in_little_endian[byte_idx]
|
|
166
|
+
data[offset_bytes] |= surrounding_bits
|
|
167
|
+
|
|
168
|
+
byte_idx += 1
|
|
169
|
+
while byte_idx < total_bytes:
|
|
170
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx]
|
|
171
|
+
byte_idx += 1
|
|
172
|
+
|
|
173
|
+
bits_written = total_bytes * 8 - offset_extra_bits
|
|
174
|
+
if bits_written < num_bits:
|
|
175
|
+
surrounding_bits = data[offset_bytes + byte_idx] & ~((1 << offset_bits) - 1)
|
|
176
|
+
data[offset_bytes + byte_idx] = value_in_little_endian[byte_idx] & ((1 << (8 - (bits_written % 8))) - 1)
|
|
177
|
+
data[offset_bytes + byte_idx] |= surrounding_bits
|