dmg-builder 26.5.0 → 26.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1262 +0,0 @@
1
- import plistlib
2
- import struct
3
-
4
- import mac_alias
5
-
6
- from . import buddy
7
-
8
-
9
- class ILocCodec:
10
- @staticmethod
11
- def encode(point):
12
- return struct.pack(b">IIII", point[0], point[1], 0xFFFFFFFF, 0xFFFF0000)
13
-
14
- @staticmethod
15
- def decode(bytesData):
16
- if isinstance(bytesData, bytearray):
17
- x, y = struct.unpack_from(b">II", bytes(bytesData[:8]))
18
- else:
19
- x, y = struct.unpack(b">II", bytesData[:8])
20
- return (x, y)
21
-
22
-
23
- class PlistCodec:
24
- @staticmethod
25
- def encode(plist):
26
- return plistlib.dumps(plist, fmt=plistlib.FMT_BINARY)
27
-
28
- @staticmethod
29
- def decode(bytes):
30
- return plistlib.loads(bytes)
31
-
32
-
33
- class BookmarkCodec:
34
- @staticmethod
35
- def encode(bmk):
36
- return bmk.to_bytes()
37
-
38
- @staticmethod
39
- def decode(bytes):
40
- return mac_alias.Bookmark.from_bytes(bytes)
41
-
42
-
43
- # This list tells the code how to decode particular kinds of entry in the
44
- # .DS_Store file. This is really a convenience, and we currently only
45
- # support a tiny subset of the possible entry types.
46
- codecs = {
47
- b"Iloc": ILocCodec,
48
- b"bwsp": PlistCodec,
49
- b"lsvp": PlistCodec,
50
- b"lsvP": PlistCodec,
51
- b"icvp": PlistCodec,
52
- b"pBBk": BookmarkCodec,
53
- }
54
-
55
-
56
- class DSStoreEntry:
57
- """Holds the data from an entry in a ``.DS_Store`` file. Note that this is
58
- not meant to represent the entry itself---i.e. if you change the type or
59
- value, your changes will *not* be reflected in the underlying file.
60
-
61
- If you want to make a change, you should either use the
62
- :class:`DSStore` object's :meth:`DSStore.insert` method (which will
63
- replace a key if it already exists), or the mapping access mode for
64
- :class:`DSStore` (often simpler anyway).
65
- """
66
-
67
- def __init__(self, filename, code, typecode, value=None):
68
- if str != bytes and type(filename) == bytes:
69
- filename = filename.decode("utf-8")
70
-
71
- if not isinstance(code, bytes):
72
- code = code.encode("latin_1")
73
-
74
- self.filename = filename
75
- self.code = code
76
- self.type = typecode
77
- self.value = value
78
-
79
- @classmethod
80
- def read(cls, block):
81
- """Read a ``.DS_Store`` entry from the containing Block."""
82
- # First read the filename
83
- nlen = block.read(b">I")[0]
84
- filename = block.read(2 * nlen).decode("utf-16be")
85
-
86
- # Next, read the code and type
87
- code, typecode = block.read(b">4s4s")
88
-
89
- # Finally, read the data
90
- if typecode == b"bool":
91
- value = block.read(b">?")[0]
92
- elif typecode == b"long" or typecode == b"shor":
93
- value = block.read(b">I")[0]
94
- elif typecode == b"blob":
95
- vlen = block.read(b">I")[0]
96
- value = block.read(vlen)
97
-
98
- codec = codecs.get(code, None)
99
- if codec:
100
- value = codec.decode(value)
101
- typecode = codec
102
- elif typecode == b"ustr":
103
- vlen = block.read(b">I")[0]
104
- value = block.read(2 * vlen).decode("utf-16be")
105
- elif typecode == b"type":
106
- value = block.read(b">4s")[0]
107
- elif typecode == b"comp" or typecode == b"dutc":
108
- value = block.read(b">Q")[0]
109
- else:
110
- raise ValueError('Unknown type code "%s"' % typecode)
111
-
112
- return DSStoreEntry(filename, code, typecode, value)
113
-
114
- def __lt__(self, other):
115
- if not isinstance(other, DSStoreEntry):
116
- raise TypeError("Can only compare against other DSStoreEntry objects")
117
- sfl = self.filename.lower()
118
- ofl = other.filename.lower()
119
- return sfl < ofl or (self.filename == other.filename and self.code < other.code)
120
-
121
- def __le__(self, other):
122
- if not isinstance(other, DSStoreEntry):
123
- raise TypeError("Can only compare against other DSStoreEntry objects")
124
- sfl = self.filename.lower()
125
- ofl = other.filename.lower()
126
- return sfl < ofl or (sfl == ofl and self.code <= other.code)
127
-
128
- def __eq__(self, other):
129
- if not isinstance(other, DSStoreEntry):
130
- raise TypeError("Can only compare against other DSStoreEntry objects")
131
- sfl = self.filename.lower()
132
- ofl = other.filename.lower()
133
- return sfl == ofl and self.code == other.code
134
-
135
- def __ne__(self, other):
136
- if not isinstance(other, DSStoreEntry):
137
- raise TypeError("Can only compare against other DSStoreEntry objects")
138
- sfl = self.filename.lower()
139
- ofl = other.filename.lower()
140
- return sfl != ofl or self.code != other.code
141
-
142
- def __gt__(self, other):
143
- if not isinstance(other, DSStoreEntry):
144
- raise TypeError("Can only compare against other DSStoreEntry objects")
145
- sfl = self.filename.lower()
146
- ofl = other.filename.lower()
147
-
148
- selfCode = self.code
149
- if str != bytes and type(selfCode) is bytes:
150
- selfCode = selfCode.decode("utf-8")
151
- otherCode = other.code
152
- if str != bytes and type(otherCode) is bytes:
153
- otherCode = otherCode.decode("utf-8")
154
-
155
- return sfl > ofl or (sfl == ofl and selfCode > otherCode)
156
-
157
- def __ge__(self, other):
158
- if not isinstance(other, DSStoreEntry):
159
- raise TypeError("Can only compare against other DSStoreEntry objects")
160
- sfl = self.filename.lower()
161
- ofl = other.filename.lower()
162
- return sfl > ofl or (sfl == ofl and self.code >= other.code)
163
-
164
- def byte_length(self):
165
- """Compute the length of this entry, in bytes."""
166
- utf16 = self.filename.encode("utf-16be")
167
- length = 4 + len(utf16) + 8
168
-
169
- if isinstance(self.type, str):
170
- entry_type = self.type.encode("latin_1")
171
- value = self.value
172
- elif isinstance(self.type, (bytes, str)):
173
- entry_type = self.type
174
- value = self.value
175
- else:
176
- entry_type = b"blob"
177
- value = self.type.encode(self.value)
178
-
179
- if entry_type == b"bool":
180
- length += 1
181
- elif entry_type == b"long" or entry_type == b"shor":
182
- length += 4
183
- elif entry_type == b"blob":
184
- length += 4 + len(value)
185
- elif entry_type == b"ustr":
186
- utf16 = value.encode("utf-16be")
187
- length += 4 + len(utf16)
188
- elif entry_type == b"type":
189
- length += 4
190
- elif entry_type == b"comp" or entry_type == b"dutc":
191
- length += 8
192
- else:
193
- raise ValueError('Unknown type code "%s"' % entry_type)
194
-
195
- return length
196
-
197
- def write(self, block, insert=False):
198
- """Write this entry to the specified Block."""
199
- if insert:
200
- w = block.insert
201
- else:
202
- w = block.write
203
-
204
- if isinstance(self.type, str):
205
- entry_type = self.type.encode("latin_1")
206
- value = self.value
207
- elif isinstance(self.type, (bytes, str)):
208
- entry_type = self.type
209
- value = self.value
210
- else:
211
- entry_type = b"blob"
212
- value = self.type.encode(self.value)
213
-
214
- utf16 = self.filename.encode("utf-16be")
215
- w(b">I", len(utf16) // 2)
216
- w(utf16)
217
- w(b">4s4s", self.code, entry_type)
218
-
219
- if entry_type == b"bool":
220
- w(b">?", value)
221
- elif entry_type == b"long" or entry_type == b"shor":
222
- w(b">I", value)
223
- elif entry_type == b"blob":
224
- w(b">I", len(value))
225
- w(value)
226
- elif entry_type == b"ustr":
227
- utf16 = value.encode("utf-16be")
228
- w(b">I", len(utf16) // 2)
229
- w(utf16)
230
- elif entry_type == b"type":
231
- if isinstance(value, str):
232
- value = value.encode("latin_1")
233
- w(b">4s", value)
234
- elif entry_type == b"comp" or entry_type == b"dutc":
235
- w(b">Q", value)
236
- else:
237
- raise ValueError('Unknown type code "%s"' % entry_type)
238
-
239
- def __repr__(self):
240
- return f"<{self.filename} {self.code}>"
241
-
242
-
243
- class DSStore:
244
- """Python interface to a ``.DS_Store`` file. Works by manipulating the
245
- file on the disk---so this code will work with ``.DS_Store`` files for.
246
-
247
- *very* large directories.
248
-
249
- A :class:`DSStore` object can be used as if it was a mapping, e.g.::
250
-
251
- d['foobar.dat']['Iloc']
252
-
253
- will fetch the "Iloc" record for "foobar.dat", or raise :class:`KeyError` if
254
- there is no such record. If used in this manner, the :class:`DSStore` object
255
- will return (type, value) tuples, unless the type is "blob" and the module
256
- knows how to decode it.
257
-
258
- Currently, we know how to decode "Iloc", "bwsp", "lsvp", "lsvP" and "icvp"
259
- blobs. "Iloc" decodes to an (x, y) tuple, while the others are all decoded
260
- using ``biplist`` or ``plistlib`` depending on Python version.
261
-
262
- Assignment also works, e.g.::
263
-
264
- d['foobar.dat']['note'] = ('ustr', u'Hello World!')
265
-
266
- as does deletion with ``del``::
267
-
268
- del d['foobar.dat']['note']
269
-
270
- This is usually going to be the most convenient interface, though
271
- occasionally (for instance when creating a new ``.DS_Store`` file) you
272
- may wish to drop down to using :class:`DSStoreEntry` objects directly.
273
- """
274
-
275
- def __init__(self, store):
276
- self._store = store
277
- self._superblk = self._store["DSDB"]
278
- with self._get_block(self._superblk) as s:
279
- (
280
- self._rootnode,
281
- self._levels,
282
- self._records,
283
- self._nodes,
284
- self._page_size,
285
- ) = s.read(b">IIIII")
286
- self._min_usage = 2 * self._page_size // 3
287
- self._dirty = False
288
-
289
- @classmethod
290
- def open(cls, file_or_name, mode="r+", initial_entries=None):
291
- """Open a ``.DS_Store`` file; pass either a Python file object, or a
292
- filename in the ``file_or_name`` argument and a file access mode in the
293
- ``mode`` argument.
294
-
295
- If you are creating a new file using the "w" or "w+" modes, you
296
- may also specify a list of entries with which to initialise the
297
- file.
298
- """
299
- store = buddy.Allocator.open(file_or_name, mode)
300
-
301
- if mode == "w" or mode == "w+":
302
- superblk = store.allocate(20)
303
- store["DSDB"] = superblk
304
- page_size = 4096
305
-
306
- if not initial_entries:
307
- root = store.allocate(page_size)
308
-
309
- with store.get_block(root) as rootblk:
310
- rootblk.zero_fill()
311
-
312
- with store.get_block(superblk) as s:
313
- s.write(b">IIIII", root, 0, 0, 1, page_size)
314
- else:
315
- # Make sure they're in sorted order
316
- initial_entries = list(initial_entries)
317
- initial_entries.sort()
318
-
319
- # Construct the tree
320
- current_level = initial_entries
321
- next_level = []
322
- levels = []
323
- ptr_size = 0
324
- node_count = 0
325
- while True:
326
- total = 8
327
- nodes = []
328
- node = []
329
- for e in current_level:
330
- new_total = total + ptr_size + e.byte_length()
331
- if new_total > page_size:
332
- nodes.append(node)
333
- next_level.append(e)
334
- total = 8
335
- node = []
336
- else:
337
- total = new_total
338
- node.append(e)
339
- if node:
340
- nodes.append(node)
341
-
342
- node_count += len(nodes)
343
- levels.append(nodes)
344
-
345
- if len(nodes) == 1:
346
- break
347
-
348
- current_level = next_level
349
- next_level = []
350
- ptr_size = 4
351
-
352
- # Allocate nodes
353
- ptrs = [store.allocate(page_size) for n in range(node_count)]
354
-
355
- # Generate nodes
356
- pointers = []
357
- prev_pointers = None
358
- for level in levels:
359
- ppndx = 0
360
- lptrs = ptrs[-len(level) :]
361
- del ptrs[-len(level) :]
362
- for node in level:
363
- ndx = lptrs.pop(0)
364
- if prev_pointers is None:
365
- with store.get_block(ndx) as block:
366
- block.write(b">II", 0, len(node))
367
- for e in node:
368
- e.write(block)
369
- else:
370
- next_node = prev_pointers[ppndx + len(node)]
371
- node_ptrs = prev_pointers[ppndx : ppndx + len(node)]
372
-
373
- with store.get_block(ndx) as block:
374
- block.write(b">II", next_node, len(node))
375
- for ptr, e in zip(node_ptrs, node):
376
- block.write(b">I", ptr)
377
- e.write(block)
378
-
379
- pointers.append(ndx)
380
- prev_pointers = pointers
381
- pointers = []
382
-
383
- root = prev_pointers[0]
384
-
385
- with store.get_block(superblk) as s:
386
- s.write(
387
- b">IIIII",
388
- root,
389
- len(levels),
390
- len(initial_entries),
391
- node_count,
392
- page_size,
393
- )
394
-
395
- return DSStore(store)
396
-
397
- def _get_block(self, number):
398
- return self._store.get_block(number)
399
-
400
- def flush(self):
401
- """Flush any dirty data back to the file."""
402
- if self._dirty:
403
- self._dirty = False
404
-
405
- with self._get_block(self._superblk) as s:
406
- s.write(
407
- b">IIIII",
408
- self._rootnode,
409
- self._levels,
410
- self._records,
411
- self._nodes,
412
- self._page_size,
413
- )
414
- self._store.flush()
415
-
416
- def close(self):
417
- """Flush dirty data and close the underlying file."""
418
- self.flush()
419
- self._store.close()
420
-
421
- def __enter__(self):
422
- return self
423
-
424
- def __exit__(self, exc_type, exc_value, traceback):
425
- self.close()
426
-
427
- # Internal B-Tree nodes look like this:
428
- #
429
- # [ next | count | (ptr0 | rec0) | (ptr1 | rec1) ... (ptrN | recN) ]
430
-
431
- # Leaf nodes look like this:
432
- #
433
- # [ 0 | count | rec0 | rec1 ... recN ]
434
-
435
- # Iterate over the tree, starting at `node'
436
- def _traverse(self, node):
437
- if node is None:
438
- node = self._rootnode
439
- with self._get_block(node) as block:
440
- next_node, count = block.read(b">II")
441
- if next_node:
442
- for n in range(count):
443
- ptr = block.read(b">I")[0]
444
- for e in self._traverse(ptr):
445
- yield e
446
- e = DSStoreEntry.read(block)
447
- yield e
448
- for e in self._traverse(next_node):
449
- yield e
450
- else:
451
- for n in range(count):
452
- e = DSStoreEntry.read(block)
453
- yield e
454
-
455
- # Display the data in `node'
456
- def _dump_node(self, node):
457
- with self._get_block(node) as block:
458
- next_node, count = block.read(b">II")
459
- print("next: %u\ncount: %u\n" % (next_node, count))
460
- for n in range(count):
461
- if next_node:
462
- ptr = block.read(b">I")[0]
463
- print("%8u " % ptr, end=" ")
464
- else:
465
- print(" ", end=" ")
466
- e = DSStoreEntry.read(block)
467
- print(e, " (%u)" % e.byte_length())
468
- print("used: %u" % block.tell())
469
-
470
- # Display the data in the super block
471
- def _dump_super(self):
472
- print(
473
- "root: %u\nlevels: %u\nrecords: %u\nnodes: %u\npage-size: %u"
474
- % (
475
- self._rootnode,
476
- self._levels,
477
- self._records,
478
- self._nodes,
479
- self._page_size,
480
- )
481
- )
482
-
483
- # Splits entries across two blocks, returning one pivot
484
- #
485
- # Tries to balance the block usage across the two as best it can
486
- def _split2(self, blocks, entries, pointers, before, internal):
487
- left_block = blocks[0]
488
- right_block = blocks[1]
489
-
490
- count = len(entries)
491
-
492
- # Find the feasible splits
493
- best_split = None
494
- best_diff = None
495
- total = before[count]
496
-
497
- if 8 + total <= self._page_size:
498
- # We can use a *single* node for this
499
- best_split = count
500
- else:
501
- # Split into two nodes
502
- for n in range(1, count - 1):
503
- left_size = 8 + before[n]
504
- right_size = 8 + total - before[n + 1]
505
-
506
- if left_size > self._page_size:
507
- break
508
- if right_size > self._page_size:
509
- continue
510
-
511
- diff = abs(left_size - right_size)
512
-
513
- if best_split is None or diff < best_diff:
514
- best_split = n
515
- best_diff = diff
516
-
517
- if best_split is None:
518
- return None
519
-
520
- # Write the nodes
521
- left_block.seek(0)
522
- if internal:
523
- next_node = pointers[best_split]
524
- else:
525
- next_node = 0
526
- left_block.write(b">II", next_node, best_split)
527
-
528
- for n in range(best_split):
529
- if internal:
530
- left_block.write(b">I", pointers[n])
531
- entries[n].write(left_block)
532
-
533
- left_block.zero_fill()
534
-
535
- if best_split == count:
536
- return []
537
-
538
- right_block.seek(0)
539
- if internal:
540
- next_node = pointers[count]
541
- else:
542
- next_node = 0
543
- right_block.write(b">II", next_node, count - best_split - 1)
544
-
545
- for n in range(best_split + 1, count):
546
- if internal:
547
- right_block.write(b">I", pointers[n])
548
- entries[n].write(right_block)
549
-
550
- right_block.zero_fill()
551
-
552
- pivot = entries[best_split]
553
-
554
- return [pivot]
555
-
556
- def _split(self, node, entry, right_ptr=0):
557
- self._nodes += 1
558
- self._dirty = True
559
- new_right = self._store.allocate(self._page_size)
560
- with self._get_block(node) as block, self._get_block(new_right) as right_block:
561
-
562
- # First, measure and extract all the elements
563
- entry_size = entry.byte_length()
564
- # ?? entry_pos = None
565
- next_node, count = block.read(b">II")
566
- if next_node:
567
- entry_size += 4
568
- pointers = []
569
- entries = []
570
- before = []
571
- total = 0
572
- for n in range(count):
573
- pos = block.tell()
574
- if next_node:
575
- ptr = block.read(b">I")[0]
576
- pointers.append(ptr)
577
- e = DSStoreEntry.read(block)
578
- if e > entry:
579
- # ?? entry_pos = n
580
- entries.append(entry)
581
- pointers.append(right_ptr)
582
- before.append(total)
583
- total += entry_size
584
- entries.append(e)
585
- before.append(total)
586
- total += block.tell() - pos
587
- before.append(total)
588
- if next_node:
589
- pointers.append(next_node)
590
-
591
- pivot = self._split2(
592
- [block, right_block], entries, pointers, before, bool(next_node)
593
- )[0]
594
-
595
- self._records += 1
596
- self._nodes += 1
597
- self._dirty = True
598
-
599
- return (pivot, new_right)
600
-
601
- # Allocate a new root node containing the element `pivot' and the pointers
602
- # `left' and `right'
603
- def _new_root(self, left, pivot, right):
604
- new_root = self._store.allocate(self._page_size)
605
- with self._get_block(new_root) as block:
606
- block.write(b">III", right, 1, left)
607
- pivot.write(block)
608
- self._rootnode = new_root
609
- self._levels += 1
610
- self._nodes += 1
611
- self._dirty = True
612
-
613
- # Insert an entry into an inner node; `path' is the path from the root
614
- # to `node', not including `node' itself. `right_ptr' is the new node
615
- # pointer (inserted to the RIGHT of `entry')
616
- def _insert_inner(self, path, node, entry, right_ptr):
617
- with self._get_block(node) as block:
618
- next_node, count = block.read(b">II")
619
- insert_pos = None
620
- insert_ndx = None
621
- n = 0
622
- while n < count:
623
- pos = block.tell()
624
- ptr = block.read(b">I")[0]
625
- e = DSStoreEntry.read(block)
626
- if e == entry:
627
- if n == count - 1:
628
- right_ptr = next_node
629
- next_node = ptr
630
- block.seek(pos)
631
- else:
632
- right_ptr = block.read(b">I")[0]
633
- block.seek(pos + 4)
634
- insert_pos = pos
635
- insert_ndx = n
636
- block.delete(e.byte_length() + 4)
637
- count -= 1
638
- self._records += 1
639
- self._dirty = True
640
- continue
641
- elif insert_pos is None and e > entry:
642
- insert_pos = pos
643
- insert_ndx = n
644
- n += 1
645
- if insert_pos is None:
646
- insert_pos = block.tell()
647
- insert_ndx = count
648
- remaining = self._page_size - block.tell()
649
-
650
- if remaining < entry.byte_length() + 4:
651
- pivot, new_right = self._split(node, entry, right_ptr)
652
- if path:
653
- self._insert_inner(path[:-1], path[-1], pivot, new_right)
654
- else:
655
- self._new_root(node, pivot, new_right)
656
- else:
657
- if insert_ndx == count:
658
- block.seek(insert_pos)
659
- block.write(b">I", next_node)
660
- entry.write(block)
661
- next_node = right_ptr
662
- else:
663
- block.seek(insert_pos + 4)
664
- entry.write(block, True)
665
- block.insert(">I", right_ptr)
666
- block.seek(0)
667
- count += 1
668
- block.write(b">II", next_node, count)
669
- self._records += 1
670
- self._dirty = True
671
-
672
- # Insert `entry' into the leaf node `node'
673
- def _insert_leaf(self, path, node, entry):
674
- with self._get_block(node) as block:
675
- next_node, count = block.read(b">II")
676
- insert_pos = None
677
- # ?? insert_ndx = None
678
- n = 0
679
- while n < count:
680
- pos = block.tell()
681
- e = DSStoreEntry.read(block)
682
- if e == entry:
683
- insert_pos = pos
684
- # ?? insert_ndx = n
685
- block.seek(pos)
686
- block.delete(e.byte_length())
687
- count -= 1
688
- self._records += 1
689
- self._dirty = True
690
- continue
691
- elif insert_pos is None and e > entry:
692
- insert_pos = pos
693
- # ?? insert_ndx = n
694
- n += 1
695
- if insert_pos is None:
696
- insert_pos = block.tell()
697
- # ?? insert_ndx = count
698
- remaining = self._page_size - block.tell()
699
-
700
- if remaining < entry.byte_length():
701
- pivot, new_right = self._split(node, entry)
702
- if path:
703
- self._insert_inner(path[:-1], path[-1], pivot, new_right)
704
- else:
705
- self._new_root(node, pivot, new_right)
706
- else:
707
- block.seek(insert_pos)
708
- entry.write(block, True)
709
- block.seek(0)
710
- count += 1
711
- block.write(b">II", next_node, count)
712
- self._records += 1
713
- self._dirty = True
714
-
715
- def insert(self, entry):
716
- """Insert ``entry`` (which should be a :class:`DSStoreEntry`) into the
717
- B-Tree."""
718
- path = []
719
- node = self._rootnode
720
- while True:
721
- with self._get_block(node) as block:
722
- next_node, count = block.read(b">II")
723
- if next_node:
724
- for n in range(count):
725
- ptr = block.read(b">I")[0]
726
- e = DSStoreEntry.read(block)
727
- if entry < e:
728
- next_node = ptr
729
- break
730
- elif entry == e:
731
- # If we find an existing entry the same, replace it
732
- self._insert_inner(path, node, entry, None)
733
- return
734
- path.append(node)
735
- node = next_node
736
- else:
737
- self._insert_leaf(path, node, entry)
738
- return
739
-
740
- # Return usage information for the specified `node'
741
- def _block_usage(self, node):
742
- with self._get_block(node) as block:
743
- next_node, count = block.read(b">II")
744
-
745
- for n in range(count):
746
- if next_node:
747
- block.read(b">I")[0]
748
- DSStoreEntry.read(block)
749
-
750
- used = block.tell()
751
-
752
- return (count, used)
753
-
754
- # Splits entries across three blocks, returning two pivots
755
- def _split3(self, blocks, entries, pointers, before, internal):
756
- count = len(entries)
757
-
758
- # Find the feasible splits
759
- best_split = None
760
- best_diff = None
761
- total = before[count]
762
- for n in range(1, count - 3):
763
- left_size = 8 + before[n]
764
- remaining = 16 + total - before[n + 1]
765
-
766
- if left_size > self._page_size:
767
- break
768
- if remaining > 2 * self._page_size:
769
- continue
770
-
771
- for m in range(n + 2, count - 1):
772
- mid_size = 8 + before[m] - before[n + 1]
773
- right_size = 8 + total - before[m + 1]
774
-
775
- if mid_size > self._page_size:
776
- break
777
- if right_size > self._page_size:
778
- continue
779
-
780
- diff = abs(left_size - mid_size) * abs(right_size - mid_size)
781
-
782
- if best_split is None or diff < best_diff:
783
- best_split = (n, m, count)
784
- best_diff = diff
785
-
786
- if best_split is None:
787
- return None
788
-
789
- # Write the nodes
790
- prev_split = -1
791
- for block, split in zip(blocks, best_split):
792
- block.seek(0)
793
- if internal:
794
- next_node = pointers[split]
795
- else:
796
- next_node = 0
797
- block.write(b">II", next_node, split)
798
-
799
- for n in range(prev_split + 1, split):
800
- if internal:
801
- block.write(b">I", pointers[n])
802
- entries[n].write(block)
803
-
804
- block.zero_fill()
805
-
806
- prev_split = split
807
-
808
- return (entries[best_split[0]], entries[best_split[1]])
809
-
810
- # Extract all of the entries from the specified list of `blocks',
811
- # separating them by the specified `pivots'. Also computes the
812
- # amount of space used before each entry.
813
- def _extract(self, blocks, pivots):
814
- pointers = []
815
- entries = []
816
- before = []
817
- total = 0
818
- ppivots = pivots + [None]
819
- for b, p in zip(blocks, ppivots):
820
- b.seek(0)
821
- next_node, count = b.read(b">II")
822
- for n in range(count):
823
- pos = b.tell()
824
- if next_node:
825
- ptr = b.read(b">I")[0]
826
- pointers.append(ptr)
827
- e = DSStoreEntry.read(b)
828
- entries.append(e)
829
- before.append(total)
830
- total += b.tell() - pos
831
- if next_node:
832
- pointers.append(next_node)
833
- if p:
834
- entries.append(p)
835
- before.append(total)
836
- total += p.byte_length()
837
- if next_node:
838
- total += 4
839
- before.append(total)
840
-
841
- return (entries, pointers, before)
842
-
843
- # Rebalance the specified `node', whose path from the root is `path'.
844
- def _rebalance(self, path, node):
845
- # Can't rebalance the root
846
- if not path:
847
- return
848
-
849
- with self._get_block(node) as block:
850
- next_node, count = block.read(b">II")
851
-
852
- with self._get_block(path[-1]) as parent:
853
- # Find the left and right siblings and respective pivots
854
- parent_next, parent_count = parent.read(b">II")
855
- left_pos = None
856
- left_node = None
857
- left_pivot = None
858
- node_pos = None
859
- right_pos = None
860
- right_node = None
861
- right_pivot = None
862
- prev_e = prev_ptr = prev_pos = None
863
- for n in range(parent_count):
864
- pos = parent.tell()
865
- ptr = parent.read(b">I")[0]
866
- e = DSStoreEntry.read(parent)
867
-
868
- if ptr == node:
869
- node_pos = pos
870
- right_pivot = e
871
- left_pos = prev_pos
872
- left_pivot = prev_e
873
- left_node = prev_ptr
874
- elif prev_ptr == node:
875
- right_node = ptr
876
- right_pos = pos
877
- break
878
-
879
- prev_e = e
880
- prev_ptr = ptr
881
- prev_pos = pos
882
-
883
- if parent_next == node:
884
- node_pos = parent.tell()
885
- left_pos = prev_pos
886
- left_pivot = prev_e
887
- left_node = prev_ptr
888
- elif right_node is None:
889
- right_node = parent_next
890
- right_pos = parent.tell()
891
-
892
- _ = parent.tell()
893
-
894
- if left_node and right_node:
895
- with self._get_block(left_node) as left, self._get_block(
896
- right_node
897
- ) as right:
898
- blocks = [left, block, right]
899
- pivots = [left_pivot, right_pivot]
900
-
901
- entries, pointers, before = self._extract(blocks, pivots)
902
-
903
- # If there's a chance that we could use two pages instead
904
- # of three, go for it
905
- pivots = self._split2(
906
- blocks, entries, pointers, before, bool(next_node)
907
- )
908
- if pivots is None:
909
- ptrs = [left_node, node, right_node]
910
- pivots = self._split3(
911
- blocks, entries, pointers, before, bool(next_node)
912
- )
913
- else:
914
- if pivots:
915
- ptrs = [left_node, node]
916
- else:
917
- ptrs = [left_node]
918
- self._store.release(node)
919
- self._nodes -= 1
920
- node = left_node
921
- self._store.release(right_node)
922
- self._nodes -= 1
923
- self._dirty = True
924
-
925
- # Remove the pivots from the parent
926
- with self._get_block(path[-1]) as parent:
927
- if right_node == parent_next:
928
- parent.seek(left_pos)
929
- parent.delete(right_pos - left_pos)
930
- parent_next = left_node
931
- else:
932
- parent.seek(left_pos + 4)
933
- parent.delete(right_pos - left_pos)
934
- parent.seek(0)
935
- parent_count -= 2
936
- parent.write(b">II", parent_next, parent_count)
937
- self._records -= 2
938
-
939
- # Replace with those in pivots
940
- for e, rp in zip(pivots, ptrs[1:]):
941
- self._insert_inner(path[:-1], path[-1], e, rp)
942
- elif left_node:
943
- with self._get_block(left_node) as left:
944
- blocks = [left, block]
945
- pivots = [left_pivot]
946
-
947
- entries, pointers, before = self._extract(blocks, pivots)
948
-
949
- pivots = self._split2(
950
- blocks, entries, pointers, before, bool(next_node)
951
- )
952
-
953
- # Remove the pivot from the parent
954
- with self._get_block(path[-1]) as parent:
955
- if node == parent_next:
956
- parent.seek(left_pos)
957
- parent.delete(node_pos - left_pos)
958
- parent_next = left_node
959
- else:
960
- parent.seek(left_pos + 4)
961
- parent.delete(node_pos - left_pos)
962
- parent.seek(0)
963
- parent_count -= 1
964
- parent.write(b">II", parent_next, parent_count)
965
- self._records -= 1
966
-
967
- # Replace the pivot
968
- if pivots:
969
- self._insert_inner(path[:-1], path[-1], pivots[0], node)
970
- elif right_node:
971
- with self._get_block(right_node) as right:
972
- blocks = [block, right]
973
- pivots = [right_pivot]
974
-
975
- entries, pointers, before = self._extract(blocks, pivots)
976
-
977
- pivots = self._split2(
978
- blocks, entries, pointers, before, bool(next_node)
979
- )
980
-
981
- # Remove the pivot from the parent
982
- with self._get_block(path[-1]) as parent:
983
- if right_node == parent_next:
984
- parent.seek(pos)
985
- parent.delete(right_pos - node_pos)
986
- parent_next = node
987
- else:
988
- parent.seek(pos + 4)
989
- parent.delete(right_pos - node_pos)
990
- parent.seek(0)
991
- parent_count -= 1
992
- parent.write(b">II", parent_next, parent_count)
993
- self._records -= 1
994
-
995
- # Replace the pivot
996
- if pivots:
997
- self._insert_inner(path[:-1], path[-1], pivots[0], right_node)
998
-
999
- if not path and not parent_count:
1000
- self._store.release(path[-1])
1001
- self._nodes -= 1
1002
- self._dirty = True
1003
- self._rootnode = node
1004
- else:
1005
- count, used = self._block_usage(path[-1])
1006
-
1007
- if used < self._page_size // 2:
1008
- self._rebalance(path[:-1], path[-1])
1009
-
1010
- # Delete from the leaf node `node'. `filename_lc' has already been
1011
- # lower-cased.
1012
- def _delete_leaf(self, node, filename_lc, code):
1013
- found = False
1014
-
1015
- with self._get_block(node) as block:
1016
- next_node, count = block.read(b">II")
1017
-
1018
- for n in range(count):
1019
- pos = block.tell()
1020
- e = DSStoreEntry.read(block)
1021
- if e.filename.lower() == filename_lc and (
1022
- code is None or e.code == code
1023
- ):
1024
- block.seek(pos)
1025
- block.delete(e.byte_length())
1026
- found = True
1027
-
1028
- # This does not affect the loop; THIS IS NOT A BUG
1029
- count -= 1
1030
-
1031
- self._records -= 1
1032
- self._dirty = True
1033
-
1034
- if found:
1035
- used = block.tell()
1036
-
1037
- block.seek(0)
1038
- block.write(b">II", next_node, count)
1039
-
1040
- return used < self._page_size // 2
1041
- else:
1042
- return False
1043
-
1044
- # Remove the largest entry from the subtree starting at `node' (with
1045
- # path from root `path'). Returns a tuple (rebalance, entry) where
1046
- # rebalance is either None if no rebalancing is required, or a
1047
- # (path, node) tuple giving the details of the node to rebalance.
1048
- def _take_largest(self, path, node):
1049
- path = list(path)
1050
- rebalance = None
1051
- while True:
1052
- with self._get_block(node) as block:
1053
- next_node, count = block.read(b">II")
1054
-
1055
- if next_node:
1056
- path.append(node)
1057
- node = next_node
1058
- continue
1059
-
1060
- for n in range(count):
1061
- pos = block.tell()
1062
- e = DSStoreEntry.read(block)
1063
-
1064
- count -= 1
1065
- block.seek(0)
1066
- block.write(b">II", next_node, count)
1067
-
1068
- if pos < self._page_size // 2:
1069
- rebalance = (path, node)
1070
- break
1071
-
1072
- return rebalance, e
1073
-
1074
- # Delete an entry from an inner node, `node'
1075
- def _delete_inner(self, path, node, filename_lc, code):
1076
- rebalance = False
1077
-
1078
- with self._get_block(node) as block:
1079
- next_node, count = block.read(b">II")
1080
-
1081
- for n in range(count):
1082
- pos = block.tell()
1083
- ptr = block.read(b">I")[0]
1084
- e = DSStoreEntry.read(block)
1085
- if e.filename.lower() == filename_lc and (
1086
- code is None or e.code == code
1087
- ):
1088
- # Take the largest from the left subtree
1089
- rebalance, largest = self._take_largest(path, ptr)
1090
-
1091
- # Delete this entry
1092
- if n == count - 1:
1093
- right_ptr = next_node
1094
- next_node = ptr
1095
- block.seek(pos)
1096
- else:
1097
- right_ptr = block.read(b">I")[0]
1098
- block.seek(pos + 4)
1099
-
1100
- block.delete(e.byte_length() + 4)
1101
-
1102
- count -= 1
1103
- block.seek(0)
1104
- block.write(b">II", next_node, count)
1105
-
1106
- self._records -= 1
1107
- self._dirty = True
1108
-
1109
- break
1110
-
1111
- # Replace the pivot value
1112
- self._insert_inner(path, node, largest, right_ptr)
1113
-
1114
- # Rebalance from the node we stole from
1115
- if rebalance:
1116
- self._rebalance(rebalance[0], rebalance[1])
1117
- return True
1118
- return False
1119
-
1120
- def delete(self, filename, code):
1121
- """Delete an item, identified by ``filename`` and ``code`` from the
1122
- B-Tree."""
1123
- if isinstance(filename, DSStoreEntry):
1124
- code = filename.code
1125
- filename = filename.filename
1126
-
1127
- # If we're deleting *every* node for "filename", we must recurse
1128
- if code is None:
1129
- # TODO: Fix this so we can do bulk deletes
1130
- raise ValueError("You must delete items individually. Sorry")
1131
-
1132
- # Otherwise, we're deleting *one* specific node
1133
- filename_lc = filename.lower()
1134
- path = []
1135
- node = self._rootnode
1136
- while True:
1137
- with self._get_block(node) as block:
1138
- next_node, count = block.read(b">II")
1139
- if next_node:
1140
- for n in range(count):
1141
- ptr = block.read(b">I")[0]
1142
- e = DSStoreEntry.read(block)
1143
- e_lc = e.filename.lower()
1144
- if filename_lc < e_lc or (
1145
- filename_lc == e_lc and code < e.code
1146
- ):
1147
- next_node = ptr
1148
- break
1149
- elif filename_lc == e_lc and code == e.code:
1150
- self._delete_inner(path, node, filename_lc, code)
1151
- return
1152
- path.append(node)
1153
- node = next_node
1154
- else:
1155
- if self._delete_leaf(node, filename_lc, code):
1156
- self._rebalance(path, node)
1157
- return
1158
-
1159
- # Find implementation
1160
- def _find(self, node, filename_lc, code=None):
1161
- if code is not None and not isinstance(code, bytes):
1162
- code = code.encode("latin_1")
1163
- with self._get_block(node) as block:
1164
- next_node, count = block.read(b">II")
1165
- if next_node:
1166
- for n in range(count):
1167
- ptr = block.read(b">I")[0]
1168
- e = DSStoreEntry.read(block)
1169
- if filename_lc < e.filename.lower():
1170
- for e in self._find(ptr, filename_lc, code):
1171
- yield e
1172
- return
1173
- elif filename_lc == e.filename.lower():
1174
- if code is None or (code and code < e.code):
1175
- for e in self._find(ptr, filename_lc, code):
1176
- yield e
1177
- if code is None or code == e.code:
1178
- yield e
1179
- elif code < e.code:
1180
- return
1181
- for e in self._find(next_node, filename_lc, code):
1182
- yield e
1183
- else:
1184
- for n in range(count):
1185
- e = DSStoreEntry.read(block)
1186
- if filename_lc == e.filename.lower():
1187
- if code is None or code == e.code:
1188
- yield e
1189
- elif code < e.code:
1190
- return
1191
-
1192
- def find(self, filename, code=None):
1193
- """Returns a generator that will iterate over matching entries in the
1194
- B-Tree."""
1195
- if isinstance(filename, DSStoreEntry):
1196
- code = filename.code
1197
- filename = filename.filename
1198
-
1199
- filename_lc = filename.lower()
1200
-
1201
- return self._find(self._rootnode, filename_lc, code)
1202
-
1203
- def __len__(self):
1204
- return self._records
1205
-
1206
- def __iter__(self):
1207
- return self._traverse(self._rootnode)
1208
-
1209
- class Partial:
1210
- """This is used to implement indexing."""
1211
-
1212
- def __init__(self, store, filename):
1213
- self._store = store
1214
- self._filename = filename
1215
-
1216
- def __getitem__(self, code):
1217
- if code is None:
1218
- raise KeyError("no such key - [%s][None]" % self._filename)
1219
-
1220
- if not isinstance(code, bytes):
1221
- code = code.encode("latin_1")
1222
-
1223
- try:
1224
- item = next(self._store.find(self._filename, code))
1225
- except StopIteration:
1226
- raise KeyError(f"no such key - [{self._filename}][{code}]")
1227
-
1228
- if not isinstance(item.type, (bytes, str)):
1229
- return item.value
1230
-
1231
- return (item.type, item.value)
1232
-
1233
- def __setitem__(self, code, value):
1234
- if code is None:
1235
- raise KeyError("bad key - [%s][None]" % self._filename)
1236
-
1237
- if not isinstance(code, bytes):
1238
- code = code.encode("latin_1")
1239
-
1240
- codec = codecs.get(code, None)
1241
- if codec:
1242
- entry_type = codec
1243
- entry_value = value
1244
- else:
1245
- entry_type = value[0]
1246
- entry_value = value[1]
1247
-
1248
- self._store.insert(
1249
- DSStoreEntry(self._filename, code, entry_type, entry_value)
1250
- )
1251
-
1252
- def __delitem__(self, code):
1253
- if code is None:
1254
- raise KeyError("no such key - [%s][None]" % self._filename)
1255
-
1256
- self._store.delete(self._filename, code)
1257
-
1258
- def __iter__(self):
1259
- yield from self._store.find(self._filename)
1260
-
1261
- def __getitem__(self, filename):
1262
- return self.Partial(self, filename)