flatdata-py 0.4.11__tar.gz → 0.4.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/.gitignore +0 -4
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/PKG-INFO +3 -2
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/archive.py +36 -23
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/archive_builder.py +45 -26
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/data_access.py +23 -14
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/errors.py +15 -13
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/file_resource_storage.py +7 -5
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/file_resource_writer.py +12 -6
- flatdata_py-0.4.12/flatdata/lib/flatdata_writer.py +106 -0
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/inspector.py +14 -7
- flatdata_py-0.4.12/flatdata/lib/py.typed +0 -0
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/resource_storage.py +42 -20
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/resources.py +68 -48
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/structure.py +27 -17
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/tar_archive_resource_storage.py +7 -5
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/writer.py +2 -5
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/pyproject.toml +21 -2
- flatdata_py-0.4.11/flatdata/lib/data_access.py.orig +0 -204
- flatdata_py-0.4.11/flatdata/lib/data_access_BACKUP_91129.py +0 -219
- flatdata_py-0.4.11/flatdata/lib/data_access_BASE_91129.py +0 -64
- flatdata_py-0.4.11/flatdata/lib/data_access_LOCAL_91129.py +0 -112
- flatdata_py-0.4.11/flatdata/lib/data_access_REMOTE_91129.py +0 -168
- flatdata_py-0.4.11/flatdata/lib/flatdata_writer.py +0 -72
- flatdata_py-0.4.11/flatdata/lib/structure.py.orig +0 -92
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/README.md +0 -0
- {flatdata_py-0.4.11 → flatdata_py-0.4.12}/flatdata/lib/__init__.py +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flatdata-py
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.12
|
|
4
4
|
Summary: Python 3 implementation of Flatdata
|
|
5
5
|
Project-URL: Homepage, https://github.com/heremaps/flatdata
|
|
6
6
|
Author: Flatdata Developers
|
|
7
7
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Requires-
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Requires-Dist: flatdata-generator==0.4.12
|
|
11
12
|
Requires-Dist: numpy
|
|
12
13
|
Requires-Dist: pandas
|
|
13
14
|
Provides-Extra: inspector
|
|
@@ -3,17 +3,27 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
'''
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any, NamedTuple, TYPE_CHECKING
|
|
7
9
|
|
|
8
10
|
import pandas as pd
|
|
9
11
|
|
|
10
12
|
from .errors import MissingResourceError, SchemaMismatchError
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .resources import ReadStorage, ResourceBase
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ResourceSignature(NamedTuple):
|
|
19
|
+
container: type[ResourceBase] | type[Archive]
|
|
20
|
+
initializer: Any
|
|
21
|
+
schema: str
|
|
22
|
+
is_optional: bool
|
|
23
|
+
doc: str
|
|
14
24
|
|
|
15
|
-
def _is_archive_signature(resource_signature):
|
|
16
|
-
return resource_signature.container == Archive
|
|
25
|
+
def _is_archive_signature(resource_signature: ResourceSignature) -> bool:
|
|
26
|
+
return bool(resource_signature.container == Archive)
|
|
17
27
|
|
|
18
28
|
_SCHEMA_EXT = ".schema"
|
|
19
29
|
|
|
@@ -23,35 +33,38 @@ class Archive:
|
|
|
23
33
|
Archive class. Entry point to Flatdata.
|
|
24
34
|
Provides access to flatdata resources and verifies archive/resource schemas on opening.
|
|
25
35
|
"""
|
|
36
|
+
_NAME: str
|
|
37
|
+
_SCHEMA: str
|
|
38
|
+
_RESOURCES: dict[str, ResourceSignature]
|
|
26
39
|
|
|
27
|
-
def __init__(self, resource_storage):
|
|
40
|
+
def __init__(self, resource_storage: ReadStorage) -> None:
|
|
28
41
|
"""
|
|
29
42
|
Opens archive from a given resource storage.
|
|
30
43
|
:raises flatdata.errors.CorruptArchiveError
|
|
31
44
|
:raises flatdata.errors.SchemaMismatchError
|
|
32
45
|
:param resource_storage: Resource storage to use.
|
|
33
46
|
"""
|
|
34
|
-
self._resource_storage = resource_storage
|
|
35
|
-
self._loaded_resources = {}
|
|
47
|
+
self._resource_storage: ReadStorage = resource_storage
|
|
48
|
+
self._loaded_resources: dict[str, Any] = {}
|
|
36
49
|
|
|
37
50
|
# Preload resources and check their schemas
|
|
38
51
|
for name, _ in sorted(list(self._RESOURCES.items())):
|
|
39
52
|
self.__getattr__(name)
|
|
40
53
|
|
|
41
|
-
def __getattr__(self, name):
|
|
54
|
+
def __getattr__(self, name: str) -> Any:
|
|
42
55
|
if name not in self._RESOURCES:
|
|
43
56
|
raise AttributeError("Resource %s not defined in archive." % name)
|
|
44
57
|
if name not in self._loaded_resources:
|
|
45
58
|
self._loaded_resources[name] = self._open_resource(name)
|
|
46
59
|
return self._loaded_resources[name]
|
|
47
60
|
|
|
48
|
-
def __dir__(self):
|
|
61
|
+
def __dir__(self) -> list[str]:
|
|
49
62
|
return list(self._RESOURCES.keys()) + ['schema']
|
|
50
63
|
|
|
51
|
-
def __repr__(self):
|
|
52
|
-
return self.to_data_frame()
|
|
64
|
+
def __repr__(self) -> str:
|
|
65
|
+
return repr(self.to_data_frame())
|
|
53
66
|
|
|
54
|
-
def to_data_frame(self):
|
|
67
|
+
def to_data_frame(self) -> pd.DataFrame:
|
|
55
68
|
result = []
|
|
56
69
|
for name, signature in self._RESOURCES.items():
|
|
57
70
|
resource = self.__getattr__(name)
|
|
@@ -62,34 +75,34 @@ class Archive:
|
|
|
62
75
|
columns=["Name", "Type", "Optional", "SizeInBytes", "Size"])
|
|
63
76
|
|
|
64
77
|
@classmethod
|
|
65
|
-
def name(cls):
|
|
78
|
+
def name(cls) -> str:
|
|
66
79
|
return cls._NAME
|
|
67
80
|
|
|
68
81
|
@classmethod
|
|
69
|
-
def schema(cls):
|
|
82
|
+
def schema(cls) -> str:
|
|
70
83
|
return cls._SCHEMA
|
|
71
84
|
|
|
72
85
|
@classmethod
|
|
73
|
-
def resource_schema(cls, resource):
|
|
74
|
-
return cls._RESOURCES[resource].schema
|
|
86
|
+
def resource_schema(cls, resource: str) -> str:
|
|
87
|
+
return str(cls._RESOURCES[resource].schema)
|
|
75
88
|
|
|
76
89
|
@classmethod
|
|
77
|
-
def open(cls, storage, name, initializer, is_optional=False):
|
|
90
|
+
def open(cls, storage: ReadStorage, name: str, initializer: type[Archive], is_optional: bool = False) -> Archive | None:
|
|
78
91
|
nested_storage = storage.get(name, is_optional)
|
|
79
92
|
assert nested_storage is not None or is_optional
|
|
80
93
|
if nested_storage is None:
|
|
81
94
|
return None
|
|
82
95
|
return initializer(nested_storage)
|
|
83
96
|
|
|
84
|
-
def size_in_bytes(self):
|
|
97
|
+
def size_in_bytes(self) -> int:
|
|
85
98
|
return sum(resource_value.size_in_bytes() for resource_value in
|
|
86
99
|
(self.__getattr__(resource) for resource in self._RESOURCES.keys())
|
|
87
100
|
if resource_value)
|
|
88
101
|
|
|
89
|
-
def __len__(self):
|
|
102
|
+
def __len__(self) -> int:
|
|
90
103
|
return len(self._RESOURCES)
|
|
91
104
|
|
|
92
|
-
def _schema_validated_resource_signature(self, name):
|
|
105
|
+
def _schema_validated_resource_signature(self, name: str) -> ResourceSignature | None:
|
|
93
106
|
resource_signature = self._RESOURCES[name]
|
|
94
107
|
# We check only schema for non-subarchives, since the subarchives schema is checked,
|
|
95
108
|
# when it is initialized.
|
|
@@ -103,7 +116,7 @@ class Archive:
|
|
|
103
116
|
return None
|
|
104
117
|
return resource_signature
|
|
105
118
|
|
|
106
|
-
def _open_resource(self, name):
|
|
119
|
+
def _open_resource(self, name: str) -> Any:
|
|
107
120
|
resource_signature = self._schema_validated_resource_signature(name)
|
|
108
121
|
if resource_signature:
|
|
109
122
|
resource = resource_signature.container.open(storage=self._resource_storage,
|
|
@@ -116,7 +129,7 @@ class Archive:
|
|
|
116
129
|
return None
|
|
117
130
|
|
|
118
131
|
@staticmethod
|
|
119
|
-
def _check_non_subarchive_schema(name, resource_signature, storage):
|
|
132
|
+
def _check_non_subarchive_schema(name: str, resource_signature: ResourceSignature, storage: Any) -> None:
|
|
120
133
|
actual_schema = bytes(storage).decode()
|
|
121
134
|
if actual_schema != resource_signature.schema:
|
|
122
135
|
raise SchemaMismatchError(
|
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
'''
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
7
8
|
import os
|
|
9
|
+
from typing import Any, NamedTuple, Protocol, TYPE_CHECKING
|
|
8
10
|
|
|
9
11
|
from .errors import IndexWriterError, MissingFieldError, UnknownFieldError, \
|
|
10
12
|
UnknownStructureError, UnknownResourceError, ResourceAlreadySetError
|
|
@@ -12,10 +14,24 @@ from .errors import IndexWriterError, MissingFieldError, UnknownFieldError, \
|
|
|
12
14
|
from .resources import Instance, Vector, Multivector, RawData
|
|
13
15
|
from .data_access import write_value
|
|
14
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from .resource_storage import _Resource
|
|
19
|
+
from .structure import Structure
|
|
20
|
+
|
|
15
21
|
_SCHEMA_EXT = ".schema"
|
|
16
22
|
|
|
17
|
-
|
|
18
|
-
|
|
23
|
+
|
|
24
|
+
class ResourceSignature(NamedTuple):
|
|
25
|
+
container: type
|
|
26
|
+
initializer: Any
|
|
27
|
+
schema: str
|
|
28
|
+
is_optional: bool
|
|
29
|
+
doc: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class WriteStorage(Protocol):
|
|
33
|
+
def get(self, resource_name: str, is_subarchive: bool = False) -> Any: ...
|
|
34
|
+
def close(self) -> None: ...
|
|
19
35
|
|
|
20
36
|
|
|
21
37
|
class IndexWriter:
|
|
@@ -23,7 +39,7 @@ class IndexWriter:
|
|
|
23
39
|
IndexWriter class. Only applicable when multivector is present in archive schema.
|
|
24
40
|
"""
|
|
25
41
|
|
|
26
|
-
def __init__(self, name, size, resource_storage):
|
|
42
|
+
def __init__(self, name: str, size: int, resource_storage: WriteStorage) -> None:
|
|
27
43
|
"""
|
|
28
44
|
Create IndexWriter class.
|
|
29
45
|
|
|
@@ -36,9 +52,9 @@ class IndexWriter:
|
|
|
36
52
|
|
|
37
53
|
self._name = name
|
|
38
54
|
self._index_size = size
|
|
39
|
-
self._fout = resource_storage.get(f'{self._name}_index', False)
|
|
55
|
+
self._fout: _Resource = resource_storage.get(f'{self._name}_index', False)
|
|
40
56
|
|
|
41
|
-
def add(self, index):
|
|
57
|
+
def add(self, index: int) -> None:
|
|
42
58
|
"""
|
|
43
59
|
Convert index(number) to bytearray and add to in memory store
|
|
44
60
|
"""
|
|
@@ -46,7 +62,7 @@ class IndexWriter:
|
|
|
46
62
|
byteorder="little", signed=False)
|
|
47
63
|
self._fout.write(index_bytes)
|
|
48
64
|
|
|
49
|
-
def finish(self):
|
|
65
|
+
def finish(self) -> None:
|
|
50
66
|
"""
|
|
51
67
|
Complete index resource by adding size and padding followed by writing to file
|
|
52
68
|
"""
|
|
@@ -60,30 +76,33 @@ class ArchiveBuilder:
|
|
|
60
76
|
ArchiveBuilder class. Entry point to writing Flatdata.
|
|
61
77
|
Provides methods to create flatdata archives.
|
|
62
78
|
"""
|
|
79
|
+
_NAME: str
|
|
80
|
+
_SCHEMA: str
|
|
81
|
+
_RESOURCES: dict[str, ResourceSignature]
|
|
63
82
|
|
|
64
|
-
def __init__(self, resource_storage, path=""):
|
|
83
|
+
def __init__(self, resource_storage: WriteStorage, path: str = "") -> None:
|
|
65
84
|
"""
|
|
66
85
|
Opens archive from a given resource writer.
|
|
67
86
|
:param resource_storage: storage manager to store and write to disc
|
|
68
87
|
:param path: file path where archive is created
|
|
69
88
|
"""
|
|
70
89
|
self._path = os.path.join(path, self._NAME)
|
|
71
|
-
self._resource_storage = resource_storage
|
|
90
|
+
self._resource_storage: WriteStorage = resource_storage
|
|
72
91
|
self._write_archive_signature()
|
|
73
92
|
self._write_archive_schema()
|
|
74
93
|
self._resources_written = [f"{self._NAME}.archive"]
|
|
75
94
|
|
|
76
95
|
@classmethod
|
|
77
|
-
def name(cls):
|
|
96
|
+
def name(cls) -> str:
|
|
78
97
|
'''Returns archive name'''
|
|
79
98
|
return cls._NAME
|
|
80
99
|
|
|
81
100
|
@classmethod
|
|
82
|
-
def schema(cls):
|
|
101
|
+
def schema(cls) -> str:
|
|
83
102
|
'''Returns archive schema'''
|
|
84
103
|
return cls._SCHEMA
|
|
85
104
|
|
|
86
|
-
def _write_raw_data(self, name, data):
|
|
105
|
+
def _write_raw_data(self, name: str, data: bytes | bytearray) -> None:
|
|
87
106
|
'''
|
|
88
107
|
Helper function to write data
|
|
89
108
|
|
|
@@ -94,7 +113,7 @@ class ArchiveBuilder:
|
|
|
94
113
|
storage.write(data)
|
|
95
114
|
storage.close()
|
|
96
115
|
|
|
97
|
-
def _write_schema(self, name):
|
|
116
|
+
def _write_schema(self, name: str) -> None:
|
|
98
117
|
'''
|
|
99
118
|
Writes resource schema
|
|
100
119
|
|
|
@@ -103,29 +122,29 @@ class ArchiveBuilder:
|
|
|
103
122
|
self._write_raw_data(f"{name}.schema", bytes(
|
|
104
123
|
self._RESOURCES[name].schema, 'utf-8'))
|
|
105
124
|
|
|
106
|
-
def _write_archive_signature(self):
|
|
125
|
+
def _write_archive_signature(self) -> None:
|
|
107
126
|
'''Writes archive's signature'''
|
|
108
127
|
self._write_raw_data(f"{self._NAME}.archive", b'\x00' * 16)
|
|
109
128
|
|
|
110
|
-
def _write_archive_schema(self):
|
|
129
|
+
def _write_archive_schema(self) -> None:
|
|
111
130
|
'''Writes archive schema'''
|
|
112
131
|
self._write_raw_data(
|
|
113
132
|
f"{self._NAME}.archive.schema", bytes(self._SCHEMA, 'utf-8'))
|
|
114
133
|
|
|
115
|
-
def _write_index_schema(self, resource_name, schema):
|
|
134
|
+
def _write_index_schema(self, resource_name: str, schema: str) -> None:
|
|
116
135
|
self._write_raw_data(
|
|
117
136
|
f"{resource_name}_index.schema", bytes(schema, 'utf-8'))
|
|
118
137
|
|
|
119
|
-
def subarchive(self, name):
|
|
138
|
+
def subarchive(self, name: str) -> 'ArchiveBuilder':
|
|
120
139
|
"""
|
|
121
140
|
Returns an archive builder for the sub-archive `name`.
|
|
122
141
|
:raises $name_not_subarchive_error
|
|
123
142
|
:param name: name of the sub-archive
|
|
124
143
|
"""
|
|
125
|
-
|
|
144
|
+
raise NotImplementedError(f"subarchive '{name}' is not implemented")
|
|
126
145
|
|
|
127
146
|
@classmethod
|
|
128
|
-
def __validate_structure_fields(cls, name, struct, initializer):
|
|
147
|
+
def __validate_structure_fields(cls, name: str, struct: dict[str, Any], initializer: type[Structure]) -> None:
|
|
129
148
|
'''
|
|
130
149
|
Validates whether passed object has all required fields
|
|
131
150
|
|
|
@@ -142,7 +161,7 @@ class ArchiveBuilder:
|
|
|
142
161
|
if key not in initializer._FIELD_KEYS:
|
|
143
162
|
raise UnknownFieldError(key, name)
|
|
144
163
|
|
|
145
|
-
def __set_instance(self, storage, name, value):
|
|
164
|
+
def __set_instance(self, storage: _Resource, name: str, value: dict[str, Any]) -> None:
|
|
146
165
|
'''
|
|
147
166
|
Creates and writes instance type resource
|
|
148
167
|
|
|
@@ -160,7 +179,7 @@ class ArchiveBuilder:
|
|
|
160
179
|
|
|
161
180
|
storage.write(bout)
|
|
162
181
|
|
|
163
|
-
def __set_vector(self, storage, name, vector):
|
|
182
|
+
def __set_vector(self, storage: _Resource, name: str, vector: list[dict[str, Any]]) -> None:
|
|
164
183
|
'''
|
|
165
184
|
Creates and writes vector resource
|
|
166
185
|
|
|
@@ -179,7 +198,7 @@ class ArchiveBuilder:
|
|
|
179
198
|
field.is_signed, value[key])
|
|
180
199
|
storage.write(bout)
|
|
181
200
|
|
|
182
|
-
def __set_multivector(self, storage, name, value):
|
|
201
|
+
def __set_multivector(self, storage: _Resource, name: str, value: list[list[dict[str, Any]]]) -> None:
|
|
183
202
|
'''
|
|
184
203
|
Creates and writes multivector resource
|
|
185
204
|
|
|
@@ -193,10 +212,10 @@ class ArchiveBuilder:
|
|
|
193
212
|
for index, obj_type in enumerate(initializer_list[1:]):
|
|
194
213
|
initializers[obj_type._NAME] = (index, obj_type)
|
|
195
214
|
|
|
196
|
-
def valid_structure_name(_obj):
|
|
215
|
+
def valid_structure_name(_obj: dict[str, Any]) -> bool:
|
|
197
216
|
return _obj['name'] in [_initializer._NAME for _initializer in initializer_list[1:]]
|
|
198
217
|
|
|
199
|
-
def validate_fields(_obj):
|
|
218
|
+
def validate_fields(_obj: dict[str, Any]) -> None:
|
|
200
219
|
matched_obj_list = [
|
|
201
220
|
_initializer for _initializer in initializer_list[1:] \
|
|
202
221
|
if _initializer._NAME == _obj['name']]
|
|
@@ -248,7 +267,7 @@ class ArchiveBuilder:
|
|
|
248
267
|
self._resources_written.append(name)
|
|
249
268
|
self._resources_written.append(f'{name}_index')
|
|
250
269
|
|
|
251
|
-
def set(self, name, value):
|
|
270
|
+
def set(self, name: str, value: Any) -> None:
|
|
252
271
|
"""
|
|
253
272
|
Write a resource for this archive at once.
|
|
254
273
|
Can only be done once. `set` and `start` can't be used for the same resource.
|
|
@@ -284,7 +303,7 @@ class ArchiveBuilder:
|
|
|
284
303
|
|
|
285
304
|
self._resources_written.append(name)
|
|
286
305
|
|
|
287
|
-
def finish(self):
|
|
306
|
+
def finish(self) -> None:
|
|
288
307
|
"""
|
|
289
308
|
Closes the storage manager
|
|
290
309
|
"""
|
|
@@ -3,13 +3,22 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
'''
|
|
5
5
|
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import mmap
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
6
12
|
import numpy as np
|
|
13
|
+
from numpy.typing import NDArray
|
|
14
|
+
|
|
15
|
+
ReadableBuffer = Union[bytes, bytearray, memoryview, mmap.mmap]
|
|
7
16
|
|
|
8
17
|
# Sign bits cache for the value reading.
|
|
9
18
|
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
|
|
10
19
|
|
|
11
20
|
|
|
12
|
-
def make_field_reader(offset_bits, num_bits, is_signed):
|
|
21
|
+
def make_field_reader(offset_bits: int, num_bits: int, is_signed: bool) -> Callable[[ReadableBuffer, int], int]:
|
|
13
22
|
"""Build a specialized closure for reading a single field from a structure.
|
|
14
23
|
|
|
15
24
|
Returns a function reader(data, pos_bytes) that reads the field value
|
|
@@ -26,7 +35,7 @@ def make_field_reader(offset_bits, num_bits, is_signed):
|
|
|
26
35
|
|
|
27
36
|
if num_bits == 1:
|
|
28
37
|
bit_mask = 1 << offset_extra
|
|
29
|
-
def reader(data, pos):
|
|
38
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
30
39
|
return int((data[pos + offset_bytes] & bit_mask) != 0)
|
|
31
40
|
return reader
|
|
32
41
|
|
|
@@ -34,21 +43,21 @@ def make_field_reader(offset_bits, num_bits, is_signed):
|
|
|
34
43
|
sign_bit = _SIGN_BITS[num_bits]
|
|
35
44
|
sign_mask = sign_bit - 1
|
|
36
45
|
if needs_extra:
|
|
37
|
-
def reader(data, pos):
|
|
46
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
38
47
|
result = int.from_bytes(
|
|
39
48
|
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
40
49
|
result >>= offset_extra
|
|
41
50
|
result |= data[pos + end_byte] << extra_shift
|
|
42
51
|
result &= mask
|
|
43
|
-
return (result & sign_mask) - (result & sign_bit)
|
|
52
|
+
return int((result & sign_mask) - (result & sign_bit))
|
|
44
53
|
elif offset_extra:
|
|
45
|
-
def reader(data, pos):
|
|
54
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
46
55
|
result = (int.from_bytes(
|
|
47
56
|
data[pos + offset_bytes: pos + end_byte],
|
|
48
57
|
byteorder="little") >> offset_extra) & mask
|
|
49
58
|
return (result & sign_mask) - (result & sign_bit)
|
|
50
59
|
else:
|
|
51
|
-
def reader(data, pos):
|
|
60
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
52
61
|
result = int.from_bytes(
|
|
53
62
|
data[pos + offset_bytes: pos + end_byte],
|
|
54
63
|
byteorder="little") & mask
|
|
@@ -57,26 +66,26 @@ def make_field_reader(offset_bits, num_bits, is_signed):
|
|
|
57
66
|
|
|
58
67
|
# Unsigned paths
|
|
59
68
|
if needs_extra:
|
|
60
|
-
def reader(data, pos):
|
|
69
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
61
70
|
result = int.from_bytes(
|
|
62
71
|
data[pos + offset_bytes: pos + end_byte], byteorder="little")
|
|
63
72
|
result >>= offset_extra
|
|
64
73
|
result |= data[pos + end_byte] << extra_shift
|
|
65
|
-
return result & mask
|
|
74
|
+
return int(result & mask)
|
|
66
75
|
elif offset_extra:
|
|
67
|
-
def reader(data, pos):
|
|
76
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
68
77
|
return (int.from_bytes(
|
|
69
78
|
data[pos + offset_bytes: pos + end_byte],
|
|
70
79
|
byteorder="little") >> offset_extra) & mask
|
|
71
80
|
else:
|
|
72
|
-
def reader(data, pos):
|
|
81
|
+
def reader(data: ReadableBuffer, pos: int) -> int:
|
|
73
82
|
return int.from_bytes(
|
|
74
83
|
data[pos + offset_bytes: pos + end_byte],
|
|
75
84
|
byteorder="little") & mask
|
|
76
85
|
return reader
|
|
77
86
|
|
|
78
87
|
|
|
79
|
-
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
|
|
88
|
+
def read_field_vectorized(raw_bytes_2d: NDArray[np.uint8], field_offset_bits: int, field_width_bits: int, is_signed: bool) -> NDArray[np.uint64] | NDArray[np.int64]:
|
|
80
89
|
"""Read a bit-packed field from all elements at once, returning a numpy array.
|
|
81
90
|
|
|
82
91
|
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
|
|
@@ -117,12 +126,12 @@ def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_
|
|
|
117
126
|
sign_bit = np.uint64(1 << (field_width_bits - 1))
|
|
118
127
|
offset = -(1 << field_width_bits)
|
|
119
128
|
signed = result.astype(np.int64) + np.int64(offset)
|
|
120
|
-
result = np.where(result & sign_bit, signed, result.astype(np.int64))
|
|
129
|
+
result = np.where(result & sign_bit, signed, result.astype(np.int64)) # type: ignore[assignment, unused-ignore]
|
|
121
130
|
|
|
122
131
|
return result
|
|
123
132
|
|
|
124
133
|
|
|
125
|
-
def read_value(data, offset_bits, num_bits, is_signed):
|
|
134
|
+
def read_value(data: ReadableBuffer, offset_bits: int, num_bits: int, is_signed: bool) -> int:
|
|
126
135
|
"""Read a bit-packed value from data at the given bit offset.
|
|
127
136
|
|
|
128
137
|
This is a convenience wrapper around :func:`make_field_reader` for one-off
|
|
@@ -133,7 +142,7 @@ def read_value(data, offset_bits, num_bits, is_signed):
|
|
|
133
142
|
return reader(data, 0)
|
|
134
143
|
|
|
135
144
|
|
|
136
|
-
def write_value(data, offset_bits, num_bits, is_signed, value):
|
|
145
|
+
def write_value(data: bytearray, offset_bits: int, num_bits: int, is_signed: bool, value: int) -> None:
|
|
137
146
|
assert num_bits <= 64, f'Number of bits to write is greater than 64'
|
|
138
147
|
|
|
139
148
|
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
6
8
|
import difflib
|
|
7
9
|
|
|
8
10
|
|
|
@@ -11,7 +13,7 @@ class SchemaMismatchError(RuntimeError):
|
|
|
11
13
|
Schema mismatch: archive does not match software expectations.
|
|
12
14
|
"""
|
|
13
15
|
|
|
14
|
-
def __init__(self, name, expected_schema, actual_schema):
|
|
16
|
+
def __init__(self, name: str, expected_schema: list[str], actual_schema: list[str]) -> None:
|
|
15
17
|
diff = '\n'.join([l for l in difflib.unified_diff(expected_schema, actual_schema)])
|
|
16
18
|
message = "Schema mismatch for resource {name}. Expected: \n{expected}\n\nActual:{actual}\n\nDiff:{diff}"
|
|
17
19
|
RuntimeError.__init__(self,
|
|
@@ -36,7 +38,7 @@ class MissingResourceError(KeyError, CorruptArchiveError):
|
|
|
36
38
|
"""
|
|
37
39
|
Resource or schema is missing.
|
|
38
40
|
"""
|
|
39
|
-
def __init__(self, key):
|
|
41
|
+
def __init__(self, key: str) -> None:
|
|
40
42
|
super().__init__("Resource {key} not found".format(key=key))
|
|
41
43
|
|
|
42
44
|
|
|
@@ -50,7 +52,7 @@ class MissingFieldError(RuntimeError):
|
|
|
50
52
|
"""
|
|
51
53
|
Fields missing in provided dictionary object
|
|
52
54
|
"""
|
|
53
|
-
def __init__(self, key, name):
|
|
55
|
+
def __init__(self, key: str, name: str) -> None:
|
|
54
56
|
super().__init__(f'Missing "{key}" is required for "{name}"')
|
|
55
57
|
|
|
56
58
|
|
|
@@ -58,21 +60,21 @@ class UnknownFieldError(RuntimeError):
|
|
|
58
60
|
"""
|
|
59
61
|
Field provided is not present in resource schema
|
|
60
62
|
"""
|
|
61
|
-
def __init__(self, key, name):
|
|
63
|
+
def __init__(self, key: str, name: str) -> None:
|
|
62
64
|
super().__init__(f'Field "{key}" is not specified for "{name}"')
|
|
63
65
|
|
|
64
66
|
class FileExistsError(RuntimeError):
|
|
65
67
|
"""
|
|
66
68
|
Provided file name is already present.
|
|
67
69
|
"""
|
|
68
|
-
def __init__(self, key):
|
|
70
|
+
def __init__(self, key: str) -> None:
|
|
69
71
|
super().__init__(f'File "{key}" exists already')
|
|
70
72
|
|
|
71
73
|
class DirExistsError(RuntimeError):
|
|
72
74
|
"""
|
|
73
75
|
Directory with given path is already present
|
|
74
76
|
"""
|
|
75
|
-
def __init__(self, path):
|
|
77
|
+
def __init__(self, path: str) -> None:
|
|
76
78
|
super().__init__(f'Directory "{path}" exists already')
|
|
77
79
|
|
|
78
80
|
class UnknownStructureError(RuntimeError):
|
|
@@ -80,47 +82,47 @@ class UnknownStructureError(RuntimeError):
|
|
|
80
82
|
Provided structure/dictionary is not part of any initializer defined
|
|
81
83
|
in multivector schema
|
|
82
84
|
"""
|
|
83
|
-
def __init__(self, name):
|
|
85
|
+
def __init__(self, name: str) -> None:
|
|
84
86
|
super().__init__(f'"{name}" structure is not part of the multivector')
|
|
85
87
|
|
|
86
88
|
class IndexWriterError(RuntimeError):
|
|
87
89
|
"""
|
|
88
90
|
Error while creating instance of IndexWriter needed for multivector
|
|
89
91
|
"""
|
|
90
|
-
def __init__(self, error_str="Error initializing IndexWritter Class"):
|
|
92
|
+
def __init__(self, error_str: str = "Error initializing IndexWritter Class") -> None:
|
|
91
93
|
super().__init__(f'{error_str}')
|
|
92
94
|
|
|
93
95
|
class ArchivePathNotProvidedError(RuntimeError):
|
|
94
96
|
"""
|
|
95
97
|
Path where archive will be created is missing
|
|
96
98
|
"""
|
|
97
|
-
def __init__(self):
|
|
99
|
+
def __init__(self) -> None:
|
|
98
100
|
super().__init__("File path is not provided")
|
|
99
101
|
|
|
100
102
|
class MissingResourceName(RuntimeError):
|
|
101
103
|
"""
|
|
102
104
|
Resource name is not provided
|
|
103
105
|
"""
|
|
104
|
-
def __init__(self):
|
|
106
|
+
def __init__(self) -> None:
|
|
105
107
|
super().__init__("Resource name is not provided")
|
|
106
108
|
|
|
107
109
|
class FileNameNotProvided(RuntimeError):
|
|
108
110
|
"""
|
|
109
111
|
File name is not provided
|
|
110
112
|
"""
|
|
111
|
-
def __init__(self):
|
|
113
|
+
def __init__(self) -> None:
|
|
112
114
|
super().__init__("File name is not provided")
|
|
113
115
|
|
|
114
116
|
class ResourceAlreadySetError(RuntimeError):
|
|
115
117
|
"""
|
|
116
118
|
Provided resource name is already set for the archive
|
|
117
119
|
"""
|
|
118
|
-
def __init__(self):
|
|
120
|
+
def __init__(self) -> None:
|
|
119
121
|
super().__init__("Resource is already set")
|
|
120
122
|
|
|
121
123
|
class UnknownResourceError(RuntimeError):
|
|
122
124
|
"""
|
|
123
125
|
Provided resource name is not in archive schema
|
|
124
126
|
"""
|
|
125
|
-
def __init__(self, name):
|
|
127
|
+
def __init__(self, name: str) -> None:
|
|
126
128
|
super().__init__(f"Resource {name} is not part of provided schema")
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
See the LICENSE file in the root of this project for license details.
|
|
4
4
|
'''
|
|
5
5
|
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
6
8
|
import mmap
|
|
7
9
|
import os
|
|
8
10
|
|
|
@@ -15,7 +17,7 @@ class FileResourceStorage:
|
|
|
15
17
|
"""
|
|
16
18
|
|
|
17
19
|
@staticmethod
|
|
18
|
-
def memory_map(filename):
|
|
20
|
+
def memory_map(filename: str) -> mmap.mmap:
|
|
19
21
|
"""
|
|
20
22
|
Memory maps given file. Introduced to be able to swap mmap implementations.
|
|
21
23
|
:param filename:
|
|
@@ -24,10 +26,10 @@ class FileResourceStorage:
|
|
|
24
26
|
opened_file = open(filename, 'r')
|
|
25
27
|
return mmap.mmap(opened_file.fileno(), 0, access=mmap.ACCESS_READ)
|
|
26
28
|
|
|
27
|
-
def __init__(self, path):
|
|
28
|
-
self.path = path
|
|
29
|
+
def __init__(self, path: str) -> None:
|
|
30
|
+
self.path: str = path
|
|
29
31
|
|
|
30
|
-
def get(self, key, is_optional=False):
|
|
32
|
+
def get(self, key: str, is_optional: bool = False) -> mmap.mmap | 'FileResourceStorage' | None:
|
|
31
33
|
filename = os.path.join(self.path, key)
|
|
32
34
|
if not os.path.exists(filename):
|
|
33
35
|
if not is_optional:
|
|
@@ -40,5 +42,5 @@ class FileResourceStorage:
|
|
|
40
42
|
|
|
41
43
|
return FileResourceStorage(filename)
|
|
42
44
|
|
|
43
|
-
def ls(self):
|
|
45
|
+
def ls(self) -> list[str]:
|
|
44
46
|
return os.listdir(self.path)
|