gemf-map 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gemf/gemf.py ADDED
@@ -0,0 +1,916 @@
1
+ """
2
+ This submodule defines the core functionality of the package through the `GEMF` class.
3
+ """
4
+
5
+ from collections import defaultdict
6
+ import os
7
+ import shutil
8
+ import json
9
+ from typing import Dict, List, Callable, Optional
10
+
11
+ from gemf.utils import kwargify, listdirs, to_json
12
+ from gemf.tiles import AbstractRange, BufferTile, NamedTileBase, SlippyTileMap, Tile, TileCollection, TileRange
13
+
14
+ # TODO: rename RangeDetails RangeDetailList ? check specification for name
15
+
16
+ class SIZES:
17
+ DATA = 4
18
+ OFFSET = 8
19
+
20
+ HEADER_INFO = DATA + OFFSET
21
+
22
+ RANGE = 6*DATA + OFFSET
23
+ DETAIL = OFFSET + DATA
24
+
25
+
26
+ ENCODING = "ascii"
27
+ ENCODING_ERROR = "ignore"
28
+
29
+
30
+ class DetailsNotLoadedError(Exception):
31
+ def __init__(self, **kwargs) -> None:
32
+ super().__init__("`RangeDetails` have not been loaded. Call `gemf.load_details()` before or instantiate `GEMF` with `lazy = False`.", **kwargs)
33
+
34
+
35
+ # auxiliary methods, used to read encoded binary data
36
+ def _read_value(f, N):
37
+ """Read N bytes from a binary file and return as int."""
38
+ value_bytes = f.read(N)
39
+ return int.from_bytes(value_bytes, "big")
40
+
41
+ def _read_int(f):
42
+ """Read int as encoded in GEMF."""
43
+ return _read_value(f, SIZES.DATA)
44
+
45
+ def _read_offset(f):
46
+ """Read offset as encoded in GEMF."""
47
+ return _read_value(f, SIZES.OFFSET)
48
+
49
+ def _read_string(f, N):
50
+ """Read string as encoded in GEMF."""
51
+ str_bytes = f.read(N)
52
+ return str_bytes.decode(ENCODING)
53
+
54
+
55
+ # encoding utilities, used to write binary gemf file
56
+ def _encode_data(value):
57
+ """Encode a data integer, i.e. as 4 bytes."""
58
+ return (value).to_bytes(SIZES.DATA, "big")
59
+
60
+ def _encode_offset(value):
61
+ """Encode an offset integer, i.e. as 8 bytes."""
62
+ return (value).to_bytes(SIZES.OFFSET, "big")
63
+
64
+ def _encode_string(value):
65
+ """Encode a string, with variable byte length."""
66
+ return value.encode(ENCODING, ENCODING_ERROR)
67
+
68
+
69
+
70
+ # GEMF classes
71
+ class GEMFValueBase:
72
+ """
73
+ Base class defining basic GEMF type behavior. Python primitives are wrapped in
74
+ custom GEMF classes for easier integration in the GEMF workflow.
75
+ """
76
+ def __str__(self): return super().__str__()
77
+ def __repr__(self): return f"{type(self).__name__}: {super().__str__()}"
78
+
79
+ def __len__(self):
80
+ """Return the encoded size."""
81
+ return len(self._encode(self))
82
+
83
+ def write(self, f, *args, **kwargs):
84
+ """Write the encoded value to a binary gemf file."""
85
+ return f.write(self._encode(self), *args, **kwargs)
86
+
87
+
88
+ class GEMFIntBase(int, GEMFValueBase):
89
+ """Base class for int-like GEMF types."""
90
+ def __new__(cls, value: int, encoding_method: Callable[[int], None]):
91
+ if not isinstance(value, int): raise TypeError("`value` must be of type `int`")
92
+ obj = int.__new__(cls, value)
93
+ obj._encode = encoding_method
94
+ return obj
95
+
96
+
97
+ class GEMFInt(GEMFIntBase):
98
+ """Class representing a 4 byte integer."""
99
+ def __new__(cls, value: int) -> None:
100
+ obj = GEMFIntBase.__new__(cls, value, _encode_data)
101
+ return obj
102
+
103
+
104
+ class GEMFOffset(GEMFIntBase):
105
+ """Class representing a 8 byte integer, aka 'offset'."""
106
+ def __new__(cls, value: int) -> None:
107
+ obj = GEMFIntBase.__new__(cls, value, _encode_offset)
108
+ return obj
109
+
110
+
111
+ class GEMFString(GEMFValueBase, str):
112
+ """Class representing a variable size string."""
113
+ def __new__(cls, value: str, encoding: str = ENCODING, error: str = ENCODING_ERROR) -> None:
114
+ obj = str.__new__(cls, value)
115
+ obj.encoding = encoding
116
+ obj.error = error
117
+ obj._encode = _encode_string
118
+ return obj
119
+
120
+
121
+ class GEMFList:
122
+ """An iterable of GEMF objects."""
123
+ def __init__(self, items: list) -> None:
124
+ self.items = items
125
+
126
+ def __len__(self):
127
+ """Sum of byte-length of member items."""
128
+ return sum([len(item) for item in self.items])
129
+
130
+ def __iter__(self): return iter(self.items)
131
+ def __getitem__(self, idx): return self.items[idx]
132
+
133
+ def to_dict(self, *args, **kwargs):
134
+ """Return a list of dict-representation of the member items."""
135
+ list_dict = []
136
+ for item in self.items:
137
+ if hasattr(item, "to_dict"):
138
+ list_dict.append(item.to_dict())
139
+ else:
140
+ list_dict.append(to_json(item))
141
+ return list_dict
142
+
143
+ def __str__(self) -> str: return str(self.to_dict())
144
+
145
+ def append(self, item):
146
+ if len(self.items): assert isinstance(item, type(self[0])), "Items must be of equal type."
147
+ return self.items.append(item)
148
+
149
+ def extend(self, items):
150
+ if len(self.items): assert all([isinstance(item, type(self[0])) for item in items]), "Items must be of equal type."
151
+ return self.items.extend(items)
152
+
153
+ def write(self, f, *args, **kwargs):
154
+ """Sequentially write binary representation of member items."""
155
+ for item in self.items:
156
+ item.write(f, *args, **kwargs)
157
+
158
+
159
+ class GEMFSectionBase:
160
+ """
161
+ Base class representing a GEMF section.
162
+
163
+ **Properties**
164
+ - the length of a section equals the length of its binary representation.
165
+ - the `write()` method is responsible for ultimately serializing the map as a binary GEMF file
166
+ """
167
+
168
+ def __len__(self):
169
+ """
170
+ The length of a section equals the length of its binary representation.
171
+ It is the sum of the binary length of its children elements.
172
+ """
173
+ # def mylen(key, val):
174
+ # try:
175
+ # return len(val)
176
+ # except Exception as e:
177
+ # raise RuntimeError(f"No len: for object '{key}': {val}") from e
178
+ # return sum([mylen(key, value) for key, value in self.__dict__.items() if not key.startswith("_")])
179
+ return sum([len(value) for key, value in self.__dict__.items() if not key.startswith("_")])
180
+
181
+ def __str__(self) -> str:
182
+ return json.dumps(self.to_dict(), indent=2)
183
+
184
+ def to_dict(self, ignore_private: bool = True) -> dict:
185
+ """
186
+ Recursively build a dictionary representation of the GEMF structure.
187
+ Attributes starting with an underscore are supressed by default.
188
+ """
189
+ out_dict = {}
190
+
191
+ for key, el in self.__dict__.items():
192
+ if ignore_private and key.startswith("_"): continue
193
+
194
+ if hasattr(el, "to_dict"):
195
+ out_dict[key] = el.to_dict(ignore_private)
196
+ else:
197
+ out_dict[key] = to_json(el)
198
+
199
+ return out_dict
200
+
201
+ def write(self, f, *args, **kwargs):
202
+ """
203
+ Recursively write the binary representation of the object and its child objects.
204
+ Attributes starting with an underscore are not included in the serialization.
205
+ """
206
+ for key, val in self.__dict__.items():
207
+ if key.startswith("_"): continue
208
+ val.write(f, *args, **kwargs)
209
+
210
+
211
+ class ObjectDescriptor:
212
+ """Utility class with the purpose of renaming general fieldnames of `GEMFList` to subclass-specific names."""
213
+ def __init__(self, name) -> None:
214
+ self.name = name
215
+
216
+ def __get__(self, obj, objtype=None):
217
+ value = getattr(obj, self.name)
218
+ return value
219
+
220
+ def __set__(self, obj, value):
221
+ setattr(obj, self.name, value)
222
+
223
+
224
+ class GEMFListSectionBase(GEMFSectionBase):
225
+ """
226
+ Base class for GEMF sections which are comprised of two attributes:
227
+ - a count of child elements
228
+ - a list of child elements
229
+ """
230
+ _items = ObjectDescriptor("items")
231
+ _num_items = ObjectDescriptor("num_items")
232
+
233
+ def __init__(self, itemlist: GEMFList) -> None:
234
+ self._num_items = GEMFInt(len(itemlist.items))
235
+ self._items = itemlist
236
+
237
+ def __getitem__(self, idx): return self._items[idx]
238
+ def __iter__(self): return iter(self._items)
239
+
240
+
241
+ class HeaderInfo(GEMFSectionBase):
242
+ """
243
+ GEMF meta-information as specified in section '3.1 Overall Header' of the format specification.
244
+
245
+ # Parameters
246
+ - `GEMF version`: file format version
247
+ - `tile_size`: size of contained tiles
248
+ """
249
+ def __init__(self, version: int, tile_size: int) -> None:
250
+ self.version = GEMFInt(version)
251
+ self.tile_size = GEMFInt(tile_size)
252
+
253
+
254
+ class GEMFSource(GEMFSectionBase):
255
+ """
256
+ A single GEMF source, aka tile provider (see section '3.2 Source Data')
257
+
258
+ # Parameters
259
+ - `index`: source index, 0-indexed
260
+ - `name_length`: length of encoded source name
261
+ - `name`: encoded source name
262
+ """
263
+ def __init__(self, index: int, name: str) -> None:
264
+ self.index = GEMFInt(index)
265
+ self.name_length = GEMFInt(len(name.encode(ENCODING, ENCODING_ERROR)))
266
+ self.name = GEMFString(name)
267
+
268
+
269
+ class SourceList(GEMFList):
270
+ """Wrap a list of `GEMFSource`s."""
271
+ def __init__(self, sources: List[GEMFSource]) -> None:
272
+ super().__init__(sources)
273
+
274
+
275
+ class SourceData(GEMFListSectionBase):
276
+ """
277
+ Information on the contained sources as specified in section '3.2 Source Data' of the format specification.
278
+
279
+ # Parameters
280
+ - `num_sources`: number of contained sources
281
+ - `sources`: list of sources
282
+ """
283
+ # renaming the attributes of `GEMFListSectionBase` to adhere with the specification naming convention
284
+ _num_items = ObjectDescriptor("num_sources")
285
+ _items = ObjectDescriptor("sources")
286
+
287
+ def __init__(self, sourcelist: SourceList) -> None:
288
+ super().__init__(sourcelist)
289
+
290
+ @classmethod
291
+ def from_root(cls, root_dir: str):
292
+ """Create a `SourceData` object from a root directory of tiles."""
293
+ gemfsources = []
294
+ for i, source_name in enumerate(listdirs(root_dir)):
295
+ gemfsources.append(GEMFSource(i, source_name))
296
+
297
+ return SourceData(SourceList(gemfsources))
298
+
299
+
300
+ class GEMFRange(GEMFSectionBase):
301
+ """
302
+ A single GEMF range, aka rectangular collection of tiles (see section '3.3 Range Data')
303
+
304
+ # Parameters
305
+ - `z`: the range's zoom level
306
+ - `xmin`: the range's minimum x coordinate
307
+ - `xmax`: the range's maximum x coordinate
308
+ - `ymin`: the range's minimum y coordinate
309
+ - `ymax`: the range's maximum y coordinate
310
+ - `src_index`: the index of the corresponding source
311
+ - `offset`: the start byte of the corresponding 'Range Detail' in the binary file
312
+ """
313
+ size = 6*SIZES.DATA + SIZES.OFFSET; """Static attribute: size of a `GEMFRange` if serialized."""
314
+
315
+ def __init__(self, z: int, xmin: int, xmax: int, ymin: int, ymax: int, src_index: int, details_offset: int, first_tile_idx: int) -> None:
316
+ self.z = GEMFInt(z)
317
+ self.xmin = GEMFInt(xmin)
318
+ self.xmax = GEMFInt(xmax)
319
+ self.ymin = GEMFInt(ymin)
320
+ self.ymax = GEMFInt(ymax)
321
+ self.src_index = GEMFInt(src_index)
322
+ self.offset = GEMFOffset(details_offset)
323
+
324
+ self._first_tile_idx = first_tile_idx
325
+
326
+ def get_zxy(self, index: int):
327
+ """Get the n-th tile of the range by index. Ranges are traversed in column-major order."""
328
+ return self.z, *AbstractRange.index2xy(index, self.xmin, self.xmax, self.ymin, self.ymax, major="col")
329
+
330
+ @property
331
+ def num_tiles(self):
332
+ return AbstractRange.len(self.xmin, self.xmax, self.ymin, self.ymax)
333
+
334
+
335
+ class RangeList(GEMFList):
336
+ """Wrap a list of `GEMFRange`s."""
337
+ def __init__(self, ranges: List[GEMFRange]) -> None:
338
+ super().__init__(ranges)
339
+
340
+
341
+ # TODO: move outside of class?
342
+ def ranges_from_root(root_dir: str, sourcedata: SourceData, mode: str = "split", **kwargs):
343
+ range_dict = {}
344
+
345
+ # collect tile ranges (aka rectangular collections of tiles)
346
+ for i_source, source in enumerate(sourcedata.sources):
347
+ tc = TileCollection.from_tiledir(os.path.join(root_dir, source.name)) # first, collect all tiles in source dir
348
+ tileranges_ = tc.to_tileranges(mode=mode, **kwargs) # split collection into valid tileranges
349
+ range_dict[i_source] = tileranges_
350
+
351
+ return range_dict
352
+
353
+
354
+ class RangeData(GEMFListSectionBase):
355
+ """
356
+ Information on the contained ranges as specified in section '3.3 Range Data' of the format specification.
357
+
358
+ # Parameters
359
+ - `num_ranges`: number of contained ranges
360
+ - `ranges`: list of ranges
361
+ """
362
+ # renaming the attributes of `GEMFListSectionBase` to adhere with the specification naming convention
363
+ _num_items = ObjectDescriptor("num_ranges")
364
+ _items = ObjectDescriptor("ranges")
365
+
366
+ @classmethod
367
+ def from_root(cls, root_dir: str, headerinfo: HeaderInfo, sourcedata: SourceData, mode: str = "split", **kwargs):
368
+ """
369
+ Create a `RangeData` object from the previously loaded GEMF objects.
370
+ Additionally, return a list of `TileRange` objects, which are required for further GEMF creation steps.
371
+ """
372
+
373
+ range_dict = ranges_from_root(root_dir, sourcedata, mode=mode, **kwargs)
374
+ return cls.from_ranges(range_dict, headerinfo, sourcedata)
375
+
376
+
377
+ @classmethod
378
+ def from_ranges(cls, range_dict: Dict[int, List[TileRange]], headerinfo: HeaderInfo, sourcedata: SourceData):
379
+ """
380
+ Create a `RangeData` object from the previously loaded GEMF objects.
381
+ Additionally, return a list of `TileRange` objects, which are required for further GEMF creation steps.
382
+ """
383
+ tile_count = 0
384
+ range_list = RangeList([])
385
+ tileranges = []
386
+
387
+ # traverse all source dirs
388
+ for i_source, ranges in range_dict.items():
389
+ num_ranges = len(ranges)
390
+
391
+ OFFSET = len(headerinfo) + len(sourcedata) + RangeData.size(num_ranges) + len(range_list) # initial offset for details based on previous sections and length of RangeData
392
+
393
+ # iterate over all found tile ranges
394
+ for range_ in ranges:
395
+ # create corresponding GEMF object and collect objects
396
+ gemf_range = GEMFRange(range_.z, range_.xmin, range_.xmax, range_.ymin, range_.ymax, i_source, OFFSET, tile_count)
397
+ tileranges.append(range_)
398
+ range_list.append(gemf_range)
399
+
400
+ # advance offset to block of range details, corresponding to the next range
401
+ num_tiles = len(range_)
402
+ OFFSET += num_tiles * RangeDetail.size
403
+ tile_count += num_tiles
404
+
405
+ rangedata = cls(range_list)
406
+ return rangedata, tileranges
407
+
408
+ @property
409
+ def num_tiles(self):
410
+ """Number of tiles contained in `RangeData`."""
411
+ return sum([range.num_tiles for range in self.ranges])
412
+
413
+ @staticmethod
414
+ def size(num_ranges: int):
415
+ """Pre-compute the size of a `RangeData` object if serialized."""
416
+ return SIZES.DATA + num_ranges * GEMFRange.size
417
+
418
+
419
+ class RangeDetail(GEMFSectionBase):
420
+ """
421
+ A single GEMF range detail, aka tile info (see section '3.4 Range Details')
422
+
423
+ **Note**: This implementation does not fully follow the paradigm described in the format specification. Since the concept of a range detail
424
+ and the corresponding data is so closely related, we unify the two in the class `RangeDetail`. It both serves as the direct representation
425
+ of a GEMF range detail through the parameters `address` and `length`, and as such inherits from `GEMFSectionBase` for automatic serialization.
426
+ Additionally, derived classes shall provide the functionality to ultimately load and write binary tile data from associated tile objects.
427
+
428
+ # Parameters
429
+ - `address`: the start byte of the corresponding data in the binary file
430
+ - `length`: the length of the encoded data corresponding to this `RangeDetail`
431
+ """
432
+ size = SIZES.OFFSET + SIZES.DATA; """Static attribute: size of a `RangeDetail` if serialized."""
433
+
434
+ def __init__(self, address: int, length: int, **kwargs) -> None:
435
+ self.address = GEMFOffset(address)
436
+ self.length = GEMFInt(length)
437
+
438
+ super().__init__(**kwargs)
439
+
440
+ def __len__(self): return len(self.address) + len(self.length)
441
+
442
+ def to_dict(self):
443
+ return {"address": self.address, "length": self.length}
444
+
445
+ def write(self, f, *args, **kwargs):
446
+ for val in [self.address, self.length]:
447
+ val.write(f, *args, **kwargs)
448
+
449
+ def to_dict_data(self):
450
+ """Representation of subclasses with additional attributes for `GEMFDataSection`."""
451
+ return f"{type(self).__name__}"
452
+
453
+ def write_data(self, f):
454
+ data_bytes = self.load_bytes()
455
+ f.write(data_bytes)
456
+
457
+
458
+ class RangeDetails(GEMFList):
459
+ """
460
+ Information on the contained tiles as specified in section '3.4 Range Details' of the format specification.
461
+ """
462
+ def __init__(self, range_details: List[RangeDetail]) -> None:
463
+ super().__init__(range_details)
464
+
465
+ @classmethod
466
+ def from_root(cls, headerinfo: HeaderInfo, sourcedata: SourceData, rangedata: RangeData, tileranges: List[TileRange]):
467
+ """Create a `RangeDetails` object from the previously loaded GEMF objects and a set of `TileRange` objects."""
468
+ rangedetails = RangeDetails([])
469
+
470
+ # determine initial byte position after header, source data, range data and range details
471
+ OFFSET = len(headerinfo) + len(sourcedata) + len(rangedata) + RangeDetails.size(rangedata.num_tiles)
472
+
473
+ # iterate through all tileranges and create a RangeDetail from it
474
+ for i_range, tr_ in enumerate(tileranges):
475
+ for i_tile, tile in enumerate(tr_):
476
+ len_image_bytes = os.stat(tile.get_filepath()).st_size
477
+
478
+ rangedetail = GEMFTile.from_tile(OFFSET, len_image_bytes, tile)
479
+ rangedetail._range_idx = i_range
480
+ rangedetail._tile_idx = i_tile
481
+
482
+ rangedetails.append(rangedetail)
483
+
484
+ OFFSET += len_image_bytes
485
+
486
+ return rangedetails
487
+
488
+ @staticmethod
489
+ def size(num_tiles: int):
490
+ """Pre-compute the size of a `RangeDetails` object if serialized."""
491
+ return num_tiles * RangeDetail.size
492
+
493
+
494
+ class GEMFHeaderSection(GEMFSectionBase):
495
+ """
496
+ All meta-information of the GEMF map file as specified in section '3. Header Area' of the format specification.
497
+ """
498
+ def __init__(self, header_info: HeaderInfo,
499
+ source_data: SourceData,
500
+ range_data: RangeData,
501
+ range_details: RangeDetails
502
+ ) -> None:
503
+ self.header_info = header_info
504
+ self.source_data = source_data
505
+ self.range_data = range_data
506
+ self.range_details = range_details
507
+
508
+
509
+ class GEMFReference(RangeDetail, NamedTileBase):
510
+ """
511
+ Direct successor of the `RangeDetail` class, holding a reference to an existing range detail section in a `.gemf` file.
512
+ """
513
+ # TODO: enable format passing
514
+ # __slots__ = ("address", "length", "z", "x", "y", "file", "name", "format")
515
+
516
+ def __init__(self, address: int, length: int, file: str, z: int, x: int, y: int, format: Optional[str] = None) -> None:
517
+ super().__init__(address=address, length=length, z=z, x=x, y=y, name="tile", format=format)
518
+ self._file = file
519
+
520
+
521
+ def load_bytes(self, f_src=None):
522
+ """Load the tile data from file. Optionally specify `f_src` if the associated source file is already open (useful for repeated access to a source file)."""
523
+
524
+ # define logic for reuse below
525
+ def _load(f_src):
526
+ f_src.seek(self.address)
527
+ data = f_src.read(self.length)
528
+ return data
529
+
530
+ # either open associated source file or use open file
531
+ if f_src is None:
532
+ with open(self._file, "rb") as f_in:
533
+ return _load(f_in)
534
+ else:
535
+ return _load(f_src)
536
+
537
+
538
+ class GEMFBufferTile(RangeDetail, BufferTile):
539
+ """
540
+ GEMF version of a buffered tile, i.e. a tile without a corresponding file on disk.
541
+ Used to hold binary data when greedily loading a `.gemf` file.
542
+
543
+ **Note**: not suitable for large `.gemf` files, as the whole image data may overwhelm memory capacity.
544
+ Instead, use `GEMFReference` for existing `.gemf` files or `GEMFTile` for tiles on disc.
545
+ """
546
+ def __init__(self,
547
+ address: int, length: int, # params for init of RangeDetail
548
+ z: int, x: int, y: int, data: bytes, **kwargs # params and additional kwargs for init of BufferTile
549
+ ) -> None:
550
+ super().__init__(**kwargify(locals()))
551
+
552
+
553
+ class GEMFTile(RangeDetail, Tile):
554
+ """
555
+ GEMF version of a regular tile, i.e. a tile with a corresponding file on disk.
556
+ Used to load binary data when creating a `.gemf` file from tiles.
557
+ """
558
+ def __init__(self,
559
+ address: int, length: int, # params for init of RangeDetail
560
+ z: int, x: int, y: int, tiledir: str, name: str, format: str # params for init of Tile
561
+ ) -> None:
562
+ super().__init__(**kwargify(locals()))
563
+
564
+ @classmethod
565
+ def from_file(cls, tile_file: str, address: int) -> None:
566
+ len_image_bytes = os.stat(tile_file).st_size
567
+ return cls(address=address, length=len_image_bytes, **Tile.parse_filepath(tile_file))
568
+
569
+ @classmethod
570
+ def from_tile(cls, address: int, length: int, tile: Tile):
571
+ """Utility constructor to instantiate a `GEMFTile` from a regular `Tile`."""
572
+ return cls(address, length, tile.z, tile.x, tile.y, tile.tiledir, tile.name, tile.format)
573
+
574
+
575
+ class GEMFDataSection(RangeDetails):
576
+ """
577
+ Sequential aggregate of encoded tile data as specified in section '4. Data Area' of the format specification.
578
+
579
+ **Note**: This class is non-greedy, i.e. it does not load the tile data into memory. It only stores the necessary information to retrieve the binary tile data when requested.
580
+ Hence, in this implementation, the `GEMFDataSection` only differs from `RangeDetails` in the serialization behavior, but holds the same detail data internally.
581
+ """
582
+ # TODO: why not accept rangedetaillist?
583
+ def __init__(self, range_details: List[RangeDetail]) -> None:
584
+ super().__init__(range_details)
585
+
586
+ def __len__(self):
587
+ """Sum of byte-length of member data."""
588
+ return sum([detail.length for detail in self])
589
+
590
+ def to_dict(self, *args, **kwargs):
591
+ return [item.to_dict_data() for item in self]
592
+
593
+ # override the default writing behavior of writing the instance's attributes sequentially
594
+ def write(self, f):
595
+ """Serialize the GEMF data section by loading the associated tile's data and writing to file."""
596
+ # iterate over range details
597
+ for range_detail in self:
598
+ data_bytes = range_detail.load_bytes()
599
+ f.write(data_bytes)
600
+
601
+
602
+ class GEMF(GEMFSectionBase):
603
+ """
604
+ Core class to read and write map files of the GEMF format. For a detailed description of the file format,
605
+ see https://www.cgtk.co.uk/gemf.
606
+
607
+ The `GEMF` class supports...
608
+ - reading `.gemf` map files via the `from_file()` classmethod
609
+ - creating a GEMF object from PNG or JPG tiles via the `from_tiles()` classmethod
610
+ - writing the newly created GEMF object to file via the `write()` method
611
+
612
+ Further features are...
613
+ - extracting tiles (PNG or JPG) from binary `.gemf` files via the `save_tiles()` method
614
+ - adding tiles to an existing `.gemf` file (TODO)
615
+ """
616
+ def __init__(self, gemf_header: GEMFHeaderSection, gemf_data: GEMFDataSection) -> None:
617
+ self.header = gemf_header
618
+ self.data = gemf_data
619
+
620
+ self._src = None # expected to be set in classmethods for `GEMF` initialization
621
+
622
+ def __len__(self):
623
+ if self.header.range_details is None: raise DetailsNotLoadedError
624
+ return super().__len__()
625
+
626
+
627
+ @property
628
+ def num_sources(self):
629
+ """Number of sources contained in `GEMF`."""
630
+ return self.header.source_data._num_items
631
+
632
+ @property
633
+ def num_tiles(self):
634
+ """Number of tiles contained in `GEMF`."""
635
+ return sum([range.num_tiles for range in self.header.range_data])
636
+
637
+
638
+ # initiation methods
639
+ @classmethod
640
+ def from_file(cls, gemf_file: str, lazy: bool = True):
641
+ """
642
+ Read a `.gemf` file from file.
643
+
644
+ If `lazy` is set to `False`, range details will also be loaded into memory, and the full range of features is supported.
645
+ However, for large files, this may take a while.
646
+
647
+ The following operations can be performed if `lazy` is `True`:
648
+ - `gemf.num_sources()`
649
+ - `gemf.num_tiles()`
650
+
651
+ The following operations require additional loading if the file was loaded lazily:
652
+ - `len(gemf)`
653
+ """
654
+ gemf = cls(None, None) # instantiate empty `GEMF` object
655
+ gemf._src = gemf_file
656
+
657
+ with open(gemf_file, "rb") as f: # populate object
658
+ gemf._read_gemf(f, lazy)
659
+
660
+ return gemf
661
+
662
+ @classmethod
663
+ def from_tiles(cls, root_dir: str, mode: str = "split", version: int = 4, tile_size: int = 256, lazy: bool = True):
664
+ """Create a `GEMF` object from tiles."""
665
+ # TODO: auto-tilesize?
666
+ gemf = cls(None, None)
667
+
668
+ header_info = HeaderInfo(version, tile_size)
669
+ source_data = SourceData.from_root(root_dir)
670
+ range_data, tileranges = RangeData.from_root(root_dir, header_info, source_data, mode=mode)
671
+
672
+ if lazy:
673
+ range_details = None
674
+ gemf._tileranges = tileranges # save for later loading of details
675
+ else:
676
+ range_details = RangeDetails.from_root(header_info, source_data, range_data, tileranges)
677
+ gemf.data = GEMFDataSection(range_details.items)
678
+
679
+ gemf.header = GEMFHeaderSection(header_info, source_data, range_data, range_details)
680
+
681
+ gemf._src = root_dir
682
+ return gemf
683
+
684
+
685
+ # manipulation methods
686
+ def set_tile(self, *args):
687
+ # TODO: implement
688
+ # remove if set_val is None?
689
+ pass
690
+
691
+ # TODO: editing methods
692
+ def add_source(self): pass
693
+ def add_range(self): pass
694
+
695
+ def filter_zooms(self, *zooms: int):
696
+ pass
697
+
698
+ def crop(self, z: int, xmin: int, xmax: int, ymin: int, ymax: int, drop_empty_sources: bool = True, lazy: bool = True):
699
+ ranges_cropped = defaultdict(list)
700
+
701
+ # crop/drop each range
702
+ for range_ in self.header.range_data:
703
+ range_: GEMFRange
704
+
705
+ # translate bounds to current range zoom level
706
+ if range_.z > z:
707
+ _, x_min_, y_min_ = SlippyTileMap.get_subtile_at_zoom(z, range_.z, xmin, ymin, subtile="tl")
708
+ _, x_max_, y_max_ = SlippyTileMap.get_subtile_at_zoom(z, range_.z, xmax, ymax, subtile="br")
709
+ elif range_.z < z:
710
+ _, x_min_, y_min_ = SlippyTileMap.get_parent_tile_at_zoom(z, range_.z, xmin, ymin)
711
+ _, x_max_, y_max_ = SlippyTileMap.get_parent_tile_at_zoom(z, range_.z, xmax, ymax)
712
+ else:
713
+ x_min_, x_max_, y_min_, y_max_ = xmin, xmax, ymin, ymax
714
+
715
+
716
+ # find intersecting tiles
717
+ tiles_crop_ = []
718
+ for tile_ in self.get_range_details(range_):
719
+ if AbstractRange.intersection(
720
+ (x_min_, x_max_, y_min_, y_max_),
721
+ (tile_.x, tile_.x, tile_.y, tile_.y)
722
+ ):
723
+ tiles_crop_.append(tile_)
724
+
725
+ if tiles_crop_:
726
+ tilerange_crop_ = TileRange(range_.z, tiles_crop_)
727
+ ranges_cropped[range_.src_index].append(tilerange_crop_)
728
+
729
+ # drop sources if no range remains
730
+ if drop_empty_sources:
731
+ source_data = SourceData(SourceList([source_ for source_ in self.header.source_data if source_.index in ranges_cropped]))
732
+ else:
733
+ source_data = self.header.source_data # TODO: copy needed?
734
+
735
+ header_info = HeaderInfo(self.header.header_info.version, self.header.header_info.tile_size)
736
+ range_data, tileranges = RangeData.from_ranges(ranges_cropped, header_info, source_data)
737
+
738
+ if lazy:
739
+ # TODO: why is GEMFDataSection slow? replace by something that is not None?
740
+ range_details = None
741
+ data_section = None
742
+ else:
743
+ range_details = RangeDetails.from_root(header_info, source_data, range_data, tileranges)
744
+ data_section = GEMFDataSection(range_details.items)
745
+
746
+ header = GEMFHeaderSection(header_info, source_data, range_data, range_details)
747
+
748
+ gemf_cropped = GEMF(header, data_section)
749
+ gemf_cropped._tileranges = tileranges # save for later loading of details
750
+
751
+ return gemf_cropped
752
+
753
+
754
+ def crop_geom(self, geom):
755
+ # TODO questions: adds dependencies? maybe as optional dependencies?
756
+ pass
757
+
758
+ def reindex_regions(self, method: str):
759
+ # see tiles.py/RangeCollection
760
+ pass
761
+
762
+
763
+ # serialization methods
764
+ def write(self, gemf_file: str):
765
+ """Serialize the `GEMF` object to file."""
766
+ if filedir := os.path.dirname(gemf_file):
767
+ os.makedirs(filedir, exist_ok=True)
768
+
769
+ with open(gemf_file, "wb") as f:
770
+ super().write(f)
771
+
772
+
773
+ def save_tiles(self, tiledir_root: str, save_empty: bool = False):
774
+ """Save the `GEMF` object's tiles to file."""
775
+ # TODO: leave .gemf file open if data is just References?
776
+ if self.data is None:
777
+ raise DetailsNotLoadedError()
778
+
779
+ if os.path.exists(tiledir_root):
780
+ shutil.rmtree(tiledir_root)
781
+ os.makedirs(tiledir_root)
782
+
783
+ # create all source subdirectories
784
+ for source in self.header.source_data:
785
+ os.makedirs(os.path.join(tiledir_root, source.name))
786
+
787
+ # write tiles
788
+ for data in self.data:
789
+ src = self.header.source_data[self.header.range_data[data._range_idx].src_index]
790
+
791
+ if data.length > 0 or save_empty:
792
+ data.save(os.path.join(tiledir_root, src.name))
793
+
794
+
795
+ # other methods
796
+ def load_details(self):
797
+ """
798
+ Load the `RangeDetails` section of the `.gemf` file if `GEMF` was instantiated with `lazy = True`.
799
+ The range details may be required for some operations.
800
+ """
801
+ if self.data is None:
802
+ self._load_details()
803
+
804
+
805
+ # utility methods
806
+ def _read_gemf(self, f, lazy: bool = False):
807
+ header_info = self._read_header(f)
808
+ source_data = self._read_source_data(f, header_info)
809
+ range_data = self._read_range_data(f, header_info, source_data)
810
+
811
+ if lazy:
812
+ range_details = None
813
+ self.data = None
814
+ else:
815
+ range_details = self._read_range_details(f, header_info, source_data, range_data)
816
+ self.data = GEMFDataSection(range_details.items)
817
+
818
+ self.header = GEMFHeaderSection(header_info, source_data, range_data, range_details)
819
+
820
+ def _read_header(self, f):
821
+ f.seek(0)
822
+ gemf_version = _read_int(f)
823
+ tile_size = _read_int(f)
824
+ headerinfo = HeaderInfo(gemf_version, tile_size)
825
+ return headerinfo
826
+
827
+ def _read_source_data(self, f, headerinfo: HeaderInfo):
828
+ f.seek(len(headerinfo))
829
+ num_sources = _read_int(f)
830
+ sourcelist = SourceList([])
831
+
832
+ for i_src in range(num_sources):
833
+ src_idx = _read_int(f)
834
+ assert i_src == src_idx
835
+ name_length = _read_int(f)
836
+ name = _read_string(f, name_length)
837
+
838
+ sourcelist.append(GEMFSource(i_src, name))
839
+
840
+ sourcedata = SourceData(sourcelist)
841
+ return sourcedata
842
+
843
+ def _read_range_data(self, f, headerinfo: HeaderInfo, sourcedata: SourceData):
844
+ f.seek(len(headerinfo) + len(sourcedata))
845
+ num_ranges = _read_int(f)
846
+
847
+ tile_count = 0
848
+ rangelist = RangeList([])
849
+ for _ in range(num_ranges):
850
+ z = _read_int(f)
851
+ xmin = _read_int(f)
852
+ xmax = _read_int(f)
853
+ ymin = _read_int(f)
854
+ ymax = _read_int(f)
855
+ src_idx = _read_int(f)
856
+ offset_details = _read_offset(f)
857
+
858
+ range_ = GEMFRange(z, xmin, xmax, ymin, ymax, src_idx, offset_details, tile_count)
859
+ rangelist.append(range_)
860
+ tile_count += range_.num_tiles
861
+
862
+ return RangeData(rangelist)
863
+
864
+ def _read_range_details(self, f, headerinfo: HeaderInfo, sourcedata: SourceData, rangedata: RangeData):
865
+ print("reading file", f.name)
866
+ f.seek(len(headerinfo) + len(sourcedata) + len(rangedata))
867
+
868
+ rangedetails = RangeDetails([])
869
+ for i_range, range_ in enumerate(rangedata):
870
+ for i_tile in range(range_.num_tiles):
871
+ address = _read_offset(f)
872
+ len_data = _read_int(f)
873
+
874
+ range_ = rangedata[i_range]
875
+ z, x, y = range_.get_zxy(i_tile)
876
+
877
+ rangedetail = GEMFReference(address, len_data, f.name, z, x, y)
878
+ rangedetail._range_idx = i_range
879
+ rangedetail._tile_idx = i_tile
880
+
881
+ rangedetails.append(rangedetail)
882
+ return rangedetails
883
+
884
+ def _load_details(self):
885
+ """Continue loading of `GEMF` by reading the `RangeDetails` section."""
886
+
887
+ # read from gemf file
888
+ if os.path.isfile(self._src):
889
+ with open(self._src, "rb") as f:
890
+ range_details = self._read_range_details(f, self.header.header_info, self.header.source_data, self.header.range_data)
891
+ self.header.range_details = range_details
892
+ self.data = GEMFDataSection(range_details.items)
893
+
894
+ # read from tiles
895
+ elif os.path.isdir(self._src):
896
+ range_details = RangeDetails.from_root(self.header.header_info, self.header.source_data, self.header.range_data, self._tileranges)
897
+ del self._tileranges
898
+ self.data = GEMFDataSection(range_details.items)
899
+
900
+ else:
901
+ raise ValueError("Unsupported source.")
902
+
903
+
904
+ # utility functions for information/manipulation
905
+ def get_zoom_levels(self):
906
+ zooms = set()
907
+ for range_ in self.header.range_data:
908
+ zooms.add(range_.z)
909
+ return list(zooms)
910
+
911
+
912
+ def get_range_details(self, range: GEMFRange) -> RangeDetails:
913
+ """Get range details corresponding to the a range."""
914
+ idx_details = range._first_tile_idx
915
+ details = RangeDetails(self.header.range_details[idx_details: idx_details+range.num_tiles])
916
+ return details