geotree 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +4 -2
- data/README.txt +6 -8
- data/lib/geotree/blockfile.rb +0 -28
- data/lib/geotree/geotree.rb +55 -119
- data/lib/geotree/multitree.rb +2 -21
- data/lib/geotree/pswriter.rb +217 -150
- data/lib/geotree/tools.rb +1 -0
- data/test/test_blockfile.rb +1 -1
- data/test/test_externalsort.rb +15 -1
- data/test/test_geotree.rb +24 -7
- data/test/test_ps.rb +1 -1
- metadata +2 -5
- data/lib/fig/geo_tree.pdf +0 -0
- data/lib/fig/multi_tree.pdf +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 676eca7a9c5e6fa6057d9d0ac7f6635428678934
|
4
|
+
data.tar.gz: ac4607c36e9d32e42f0391686c3cc1c6286ddec1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8060ef2768eb956aca745fb29cd808a573a0317ee3daea5630ec4b1723ad8c9fe253137fabd9cef5d639afff70021a4ca0c1b254d7a1aa64936c73fa6e1b4ceb
|
7
|
+
data.tar.gz: c3a131f6fa5151027b58c3f6107e2d07bfc3bac2a5b6c2ea33ea95d4dd59ea1b60d78f933de8050fd9c97937d32fdfab0be94dc5302427b917c407bdb2ee81d6
|
data/CHANGELOG.txt
CHANGED
data/README.txt
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
# @markup markdown
|
2
2
|
|
3
|
-
|
4
|
-
and supports multiple levels of detail.
|
3
|
+
geotree
|
5
4
|
=======
|
6
|
-
|
5
|
+
A ruby gem that maintains a set of geographical points, reports points lying within a query rectangle,
|
6
|
+
and supports multiple levels of detail.
|
7
7
|
|
8
|
+
Written and (c) by Jeff Sember, April 2013.
|
8
9
|
|
9
10
|
|
10
11
|
GeoTree
|
@@ -16,16 +17,13 @@ size of a city). GeoTrees are disk-based data structures and can store a very l
|
|
16
17
|
number of points efficiently. If desired, for smaller data sets, memory-only trees
|
17
18
|
can be constructed instead.
|
18
19
|
|
19
|
-
[An animation of a GeoTree in action.](
|
20
|
+
[An animation of a GeoTree in action.](http://www.cs.ubc.ca/~jpsember/geo_tree.ps)
|
20
21
|
|
21
22
|
MultiTree
|
22
23
|
-------
|
23
24
|
|
24
|
-
|
25
25
|
The gem includes MultiTree, a GeoTree variant that supports queries at multiple
|
26
26
|
levels of detail. For example, when focusing on a small region it can return points
|
27
27
|
that would be omitted when querying a much larger region.
|
28
28
|
|
29
|
-
[An animation of a MultiTree in action.](
|
30
|
-
|
31
|
-
|
29
|
+
[An animation of a MultiTree in action.](http://www.cs.ubc.ca/~jpsember/multi_tree.ps)
|
data/lib/geotree/blockfile.rb
CHANGED
@@ -67,12 +67,8 @@ class BlockFile
|
|
67
67
|
# @return true if underlying storage already existed
|
68
68
|
#
|
69
69
|
def open
|
70
|
-
db = false
|
71
|
-
# db = true
|
72
|
-
!db || pr("BlockFile.open\n")
|
73
70
|
!open? || raise(IllegalStateException)
|
74
71
|
existed = open_storage
|
75
|
-
!db || pr(" existed=#{existed}\n")
|
76
72
|
if !existed
|
77
73
|
@header_data = alloc_buffer
|
78
74
|
BlockFile.write_int(@header_data, HDR_VERSION_, VERSION_)
|
@@ -92,7 +88,6 @@ class BlockFile
|
|
92
88
|
if BlockFile.read_int(@header_data,HDR_BLOCKSIZE_) != block_size
|
93
89
|
raise ArgumentError,"unexpected block size"
|
94
90
|
end
|
95
|
-
!db || puts(hex_dump_to_string(@header_data,'header data'))
|
96
91
|
@recycle_data = read(rdir_head_name)
|
97
92
|
end
|
98
93
|
existed
|
@@ -104,21 +99,15 @@ class BlockFile
|
|
104
99
|
#
|
105
100
|
def alloc(src = nil)
|
106
101
|
|
107
|
-
db = false
|
108
|
-
!db || pr("blockfile alloc\n")
|
109
|
-
#!db || puts(self.to_s)
|
110
|
-
|
111
102
|
ensure_open
|
112
103
|
|
113
104
|
src ||= alloc_buffer
|
114
105
|
|
115
106
|
# get index of last recycle block directory
|
116
107
|
r_index = rdir_head_name
|
117
|
-
!db||pr(" last recycle block dir=%d\n",r_index)
|
118
108
|
|
119
109
|
# any entries remain in this directory?
|
120
110
|
n_ent = get_rdir_slots_used
|
121
|
-
!db||pr(" n_ent=%d\n",n_ent);
|
122
111
|
|
123
112
|
if n_ent == 0
|
124
113
|
prev_rb_block = get_rdir_next_name
|
@@ -129,17 +118,14 @@ class BlockFile
|
|
129
118
|
r_index = prev_rb_block
|
130
119
|
write_hdr(HDR_RECYCLEINDEX_, r_index)
|
131
120
|
read(prev_rb_block, @recycle_data)
|
132
|
-
!db||pr(" using directory as new block: %d\n",ret)
|
133
121
|
append_or_replace(ret, src)
|
134
122
|
else
|
135
123
|
ret = name_max
|
136
|
-
!db||pr(" using name_max %d\n",ret)
|
137
124
|
append_or_replace(ret, src)
|
138
125
|
end
|
139
126
|
else
|
140
127
|
slot = n_ent - 1;
|
141
128
|
ret = get_rdir_slot(slot)
|
142
|
-
!db || pr(" read slot %d to get %d\n",slot,ret)
|
143
129
|
set_rdir_slot(slot,0)
|
144
130
|
set_rdir_slots_used(slot)
|
145
131
|
append_or_replace(r_index, @recycle_data)
|
@@ -163,16 +149,13 @@ class BlockFile
|
|
163
149
|
set_rdir_slots_used(slot+1)
|
164
150
|
append_or_replace(rdir_head_name, @recycle_data)
|
165
151
|
else
|
166
|
-
|
167
152
|
# use freed block as next recycle page
|
168
|
-
|
169
153
|
old_dir = rdir_head_name
|
170
154
|
|
171
155
|
write_hdr(HDR_RECYCLEINDEX_, block_name)
|
172
156
|
|
173
157
|
read(block_name, @recycle_data)
|
174
158
|
BlockFile.clear_block(@recycle_data)
|
175
|
-
# mark_rc_block
|
176
159
|
|
177
160
|
set_rdir_next_name(old_dir)
|
178
161
|
append_or_replace(block_name, @recycle_data)
|
@@ -295,7 +278,6 @@ class BlockFile
|
|
295
278
|
end
|
296
279
|
|
297
280
|
def BlockFile.copy_block(dest, src)
|
298
|
-
# assert!(dest && src)
|
299
281
|
dest[0..-1] = src
|
300
282
|
end
|
301
283
|
|
@@ -311,10 +293,6 @@ class BlockFile
|
|
311
293
|
# @return buffer
|
312
294
|
#
|
313
295
|
def read(block_name, dest_buffer = nil)
|
314
|
-
db = false
|
315
|
-
# db = true
|
316
|
-
!db || pr("BlockFile read #{block_name}, memory version!\n")
|
317
|
-
|
318
296
|
dest_buffer ||= alloc_buffer
|
319
297
|
if block_name >= @mem_file.size
|
320
298
|
raise ArgumentError,"No such block name #{block_name} exists (size=#{@mem_file.size})"
|
@@ -322,7 +300,6 @@ class BlockFile
|
|
322
300
|
|
323
301
|
src = @mem_file[block_name]
|
324
302
|
BlockFile.copy_block(dest_buffer, src)
|
325
|
-
!db || hex_dump(dest_buffer,"Contents of block #{block_name}")
|
326
303
|
dest_buffer
|
327
304
|
end
|
328
305
|
|
@@ -332,7 +309,6 @@ class BlockFile
|
|
332
309
|
# @param block_name name of block
|
333
310
|
# @param src_buffer data to write
|
334
311
|
def write(block_name, src_buffer)
|
335
|
-
|
336
312
|
if block_name == @mem_file.size
|
337
313
|
@mem_file << alloc_buffer
|
338
314
|
end
|
@@ -415,10 +391,6 @@ class BlockFile
|
|
415
391
|
BlockFile.write_int(@recycle_data,RC_PREV_DIR_NAME_,n)
|
416
392
|
end
|
417
393
|
|
418
|
-
# def mark_rc_block
|
419
|
-
# @recycle_data[RC_BLOCKTYPE_] = BLOCKTYPE_RECYCLE_.chr
|
420
|
-
# end
|
421
|
-
|
422
394
|
# Get name of first recycle directory block (they are connected as
|
423
395
|
# a singly-linked list)
|
424
396
|
#
|
data/lib/geotree/geotree.rb
CHANGED
@@ -5,7 +5,7 @@ req 'diskblockfile ptbuffer'
|
|
5
5
|
module GeoTreeModule
|
6
6
|
#
|
7
7
|
# A variant of a kd-tree, it is capable of maintaining sets of 2D points and efficiently
|
8
|
-
# reporting all points lying within (axis-aligned) query rectangles.
|
8
|
+
# reporting all points lying within (axis-aligned) query rectangles.
|
9
9
|
#
|
10
10
|
# Like a B+ tree, it has a large branching factor
|
11
11
|
# and the nodes are large to improve performance when the tree is stored
|
@@ -14,7 +14,7 @@ module GeoTreeModule
|
|
14
14
|
# A GeoTree is usually stored within a disk file, though it is also possible to
|
15
15
|
# construct a tree that exists only in memory; see the initialize(...) method.
|
16
16
|
#
|
17
|
-
# {An animation of a GeoTree in action.}[link
|
17
|
+
# {An animation of a GeoTree in action.}[link:http://www.cs.ubc.ca/~jpsember/geo_tree.ps]
|
18
18
|
#
|
19
19
|
# Usage:
|
20
20
|
#
|
@@ -41,17 +41,17 @@ module GeoTreeModule
|
|
41
41
|
#
|
42
42
|
# t.close()
|
43
43
|
#
|
44
|
-
#
|
44
|
+
#
|
45
45
|
# One of the problems with kd-trees (including this one) is that they can become
|
46
|
-
# unbalanced after a number of insertions and deletions. To deal with this,
|
46
|
+
# unbalanced after a number of insertions and deletions. To deal with this,
|
47
47
|
# consider these two suggestions:
|
48
48
|
#
|
49
49
|
# 1) When constructing the initial tree, if the datapoints are given in a random
|
50
50
|
# order, the tree will (with high probability) be constructed in a balanced form.
|
51
51
|
# By contrast, consider what happens if the points (1,1), (2,2), (3,3), ... are
|
52
52
|
# added in sequence to an initially empty tree. The tree will be very unbalanced,
|
53
|
-
# with poor performance.
|
54
|
-
# To address this problem, if you are not confident that the points you initially
|
53
|
+
# with poor performance.
|
54
|
+
# To address this problem, if you are not confident that the points you initially
|
55
55
|
# provide are in a sufficiently random sequence, you can enable 'point buffering':
|
56
56
|
#
|
57
57
|
# t = GeoTree.open("treepath.bin")
|
@@ -62,32 +62,26 @@ module GeoTreeModule
|
|
62
62
|
# t.add(dp2) # these points are stored in a temporary disk file
|
63
63
|
# t.add(dp3)
|
64
64
|
# :
|
65
|
-
#
|
65
|
+
#
|
66
66
|
# t.buffering = false # the points will be shuffled into a random sequence and
|
67
67
|
# # added to the tree
|
68
68
|
#
|
69
69
|
#
|
70
|
-
# 2) Periodically, you can start with a new tree, and add all of the datapoints using the
|
70
|
+
# 2) Periodically, you can start with a new tree, and add all of the datapoints using the
|
71
71
|
# above buffering technique. This is easy to do if the datapoints are also stored
|
72
72
|
# externally to the GeoTree (for instance, as parts of larger records in some database).
|
73
|
-
# Otherwise, (i) the datapoints can be retrieved from the tree to an array
|
74
|
-
# (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
|
73
|
+
# Otherwise, (i) the datapoints can be retrieved from the tree to an array
|
74
|
+
# (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
|
75
75
|
# and (iii) each of the points in the array can be added to the new tree.
|
76
|
-
#
|
76
|
+
#
|
77
77
|
class GeoTree
|
78
78
|
|
79
79
|
ROOT_NODE_NAME_ = BlockFile::FIRST_BLOCK_ID
|
80
80
|
|
81
81
|
privatize(self)
|
82
82
|
def buffering=(val)
|
83
|
-
db = false
|
84
|
-
# db = true
|
85
|
-
!db || pr("\nSetting buffering to #{val} (was #{@buffer.active})\n\n")
|
86
|
-
|
87
83
|
raise IllegalStateException if !open?
|
88
|
-
|
89
84
|
@buffer.active = val
|
90
|
-
|
91
85
|
end
|
92
86
|
|
93
87
|
# Construct GeoTree
|
@@ -114,7 +108,6 @@ module GeoTreeModule
|
|
114
108
|
root_name = @block_file.alloc(encode_block(root))
|
115
109
|
write_node(root)
|
116
110
|
end
|
117
|
-
|
118
111
|
end
|
119
112
|
|
120
113
|
def open?
|
@@ -164,17 +157,8 @@ module GeoTreeModule
|
|
164
157
|
# @param path path of file; if nil, constructs tree in memory only
|
165
158
|
#
|
166
159
|
def self.open(path = nil)
|
167
|
-
db = false
|
168
|
-
# db = true
|
169
|
-
!db || pr("GeoTree.open path=#{path}\n")
|
170
160
|
bf = nil
|
171
161
|
if path
|
172
|
-
!db || pr(" exists=#{File.file?(path)}\n")
|
173
|
-
|
174
|
-
if (db && File.file?(path))
|
175
|
-
hex_dump(read_text_file(path),"path #{path}")
|
176
|
-
end
|
177
|
-
|
178
162
|
bf = DiskBlockFile.new(KDTREE_BLOCKSIZE, path)
|
179
163
|
end
|
180
164
|
GeoTree.new(bf);
|
@@ -186,7 +170,7 @@ module GeoTreeModule
|
|
186
170
|
#
|
187
171
|
def add(data_point)
|
188
172
|
raise IllegalStateException if !open?
|
189
|
-
|
173
|
+
@buffer.add(data_point)
|
190
174
|
end
|
191
175
|
|
192
176
|
# Remove a datapoint. Returns the datapoint if it was found and removed,
|
@@ -197,9 +181,6 @@ module GeoTreeModule
|
|
197
181
|
|
198
182
|
raise IllegalStateException if @buffer.active
|
199
183
|
|
200
|
-
db = false
|
201
|
-
!db || pr("remove #{data_point}\n")
|
202
|
-
|
203
184
|
removed = nil
|
204
185
|
block do
|
205
186
|
|
@@ -211,13 +192,11 @@ module GeoTreeModule
|
|
211
192
|
|
212
193
|
while !n.leaf
|
213
194
|
|
214
|
-
!db || pr(" add #{n} to internal path\n")
|
215
195
|
internal_path << n
|
216
196
|
|
217
197
|
# find the child that will contain the point
|
218
198
|
child_slot = n.slot_intersecting_line(n.vertical ? data_point.loc.y : data_point.loc.x)
|
219
199
|
next_name = n.slot_child(child_slot)
|
220
|
-
!db || pr(" child_slot=#{child_slot}, next_name=#{next_name}\n")
|
221
200
|
if next_name == 0
|
222
201
|
n = nil
|
223
202
|
break
|
@@ -228,7 +207,6 @@ module GeoTreeModule
|
|
228
207
|
|
229
208
|
# build list of overflow nodes
|
230
209
|
leaf_set = build_leaf_set(n)
|
231
|
-
!db || pr(" built leaf set: #{d(leaf_set)}\n")
|
232
210
|
|
233
211
|
# We now have path containing the path of internal nodes, and leaf_set the leaf nodes
|
234
212
|
|
@@ -289,7 +267,6 @@ module GeoTreeModule
|
|
289
267
|
if inode.population < SPLIT_SIZE/2
|
290
268
|
collapse_internal_node(inode)
|
291
269
|
end
|
292
|
-
|
293
270
|
end
|
294
271
|
end
|
295
272
|
done_operation
|
@@ -411,10 +388,7 @@ module GeoTreeModule
|
|
411
388
|
# Replace an internal node with a leaf node, one containing all the
|
412
389
|
# datapoints in the internal node's subtree.
|
413
390
|
def collapse_internal_node(n)
|
414
|
-
|
415
|
-
!db || pr("internal node population has dropped below half leaf set capacity;\n%s\n",d(n))
|
416
|
-
!db || puts(dump)
|
417
|
-
|
391
|
+
|
418
392
|
dp_set = []
|
419
393
|
node_set = []
|
420
394
|
gather_datapoints(n,dp_set,node_set)
|
@@ -424,11 +398,7 @@ module GeoTreeModule
|
|
424
398
|
"Interior node actual population #{dp_set.size} disagrees with stored value #{n.population};\n#{dump(n)}"
|
425
399
|
end
|
426
400
|
|
427
|
-
!db || pr("\ndp_set=#{d2(dp_set)}\n\n")
|
428
|
-
!db || pr("node_set=#{d2(node_set)}\n\n")
|
429
|
-
|
430
401
|
node_set.each do |n2|
|
431
|
-
!db || pr(" removing #{n2} from mod/cache\n")
|
432
402
|
delete_node(n2)
|
433
403
|
end
|
434
404
|
|
@@ -448,9 +418,6 @@ module GeoTreeModule
|
|
448
418
|
write_node(n)
|
449
419
|
n = n2
|
450
420
|
end
|
451
|
-
|
452
|
-
!db || printf("After collapsing\n#{dump}\n\n")
|
453
|
-
|
454
421
|
end
|
455
422
|
|
456
423
|
def aux_stats(node_name, b,v,overflow,depth, st)
|
@@ -513,10 +480,6 @@ module GeoTreeModule
|
|
513
480
|
# @return locations of partitions (1 + NODEI_CHILDREN of them)
|
514
481
|
#
|
515
482
|
def self.calc_partitions(bounds, unsorted_pts, vertical)
|
516
|
-
db = false
|
517
|
-
# db = true
|
518
|
-
!db || pr("calc_partitions for bounds #{bounds}\n")
|
519
|
-
|
520
483
|
a = []
|
521
484
|
|
522
485
|
# Convert inputs so we need deal only with x coordinates
|
@@ -530,7 +493,6 @@ module GeoTreeModule
|
|
530
493
|
end
|
531
494
|
|
532
495
|
pts = unsorted_pts.sort{|a,b| a.loc.x <=> b.loc.x}
|
533
|
-
!db || pr(" starting with left boundary #{bounds.x}\n")
|
534
496
|
|
535
497
|
# Add location of left boundary
|
536
498
|
a << bounds.x
|
@@ -541,7 +503,6 @@ module GeoTreeModule
|
|
541
503
|
# how many zones are the items cutting it into at present?
|
542
504
|
n_items = pts.size + 1
|
543
505
|
f_step = n_items / (n_zones.to_f)
|
544
|
-
!db || puts(" n_items=#{n_items}, zones=#{n_zones}, step=#{f_step}")
|
545
506
|
while a.size < n_zones
|
546
507
|
f_pos = f_step * a.size
|
547
508
|
left_item = f_pos.floor.to_i
|
@@ -570,10 +531,8 @@ module GeoTreeModule
|
|
570
531
|
x_new = [prev+1, bounds.x + bounds.w].min
|
571
532
|
end
|
572
533
|
|
573
|
-
!db || pr(" adding #{x_new}, for f_step #{f_step}\n")
|
574
534
|
a << x_new
|
575
535
|
end
|
576
|
-
!db || pr("partitions=#{a} (bounds=#{bounds})\n")
|
577
536
|
a
|
578
537
|
end
|
579
538
|
|
@@ -585,10 +544,8 @@ module GeoTreeModule
|
|
585
544
|
end
|
586
545
|
|
587
546
|
def read_node(node_name, bounds, vertical)
|
588
|
-
db = false
|
589
547
|
# Determine if node is in cache
|
590
548
|
n = @cache_dict[node_name]
|
591
|
-
!db || pr("read_node #{node_name}, from cache=#{n}\n")
|
592
549
|
if !n
|
593
550
|
bp = @block_file.read(node_name)
|
594
551
|
n = decode_block(bp, node_name, vertical, bounds)
|
@@ -636,9 +593,6 @@ module GeoTreeModule
|
|
636
593
|
# Encode a node to a block of bytes
|
637
594
|
def encode_block(n)
|
638
595
|
|
639
|
-
db = false
|
640
|
-
!db || pr("encode_block for #{n}\n")
|
641
|
-
|
642
596
|
b = @block_file.alloc_buffer
|
643
597
|
|
644
598
|
flags = 0
|
@@ -664,17 +618,11 @@ module GeoTreeModule
|
|
664
618
|
off += DATAPOINT_INTS
|
665
619
|
end
|
666
620
|
end
|
667
|
-
!db || hex_dump(b)
|
668
|
-
|
669
621
|
b
|
670
622
|
end
|
671
623
|
|
672
624
|
# Decode a node from a block of bytes
|
673
625
|
def decode_block(b, node_name, vertical, bounds)
|
674
|
-
db = false
|
675
|
-
# db = (node_name == 2)
|
676
|
-
!db || pr("decode_block\n")
|
677
|
-
!db || hex_dump(b)
|
678
626
|
|
679
627
|
flags = BlockFile.read_int(b, HDR_FLAGS)
|
680
628
|
type = (flags & 1)
|
@@ -702,7 +650,6 @@ module GeoTreeModule
|
|
702
650
|
off += DATAPOINT_INTS
|
703
651
|
end
|
704
652
|
end
|
705
|
-
!db || pr("decoded to #{n}\n")
|
706
653
|
n
|
707
654
|
end
|
708
655
|
|
@@ -724,9 +671,6 @@ module GeoTreeModule
|
|
724
671
|
# new child nodes.
|
725
672
|
# Returns the new internal node
|
726
673
|
def split_leaf_set(node,path)
|
727
|
-
db = false
|
728
|
-
# db = true
|
729
|
-
!db || pr("\nsplit_leaf_set #{node} bounds=#{node.bounds} vert=#{node.vertical}...\n")
|
730
674
|
|
731
675
|
# list of data points from the leaf node (and its overflow siblings)
|
732
676
|
dp = []
|
@@ -752,12 +696,9 @@ module GeoTreeModule
|
|
752
696
|
n2 = read_node(next_id,b,n2.vertical)
|
753
697
|
end
|
754
698
|
|
755
|
-
!db || pr(" datapoints=#{d(dp)}\n")
|
756
|
-
|
757
699
|
ni = NodeI.new(node.name,node.vertical,node.bounds)
|
758
700
|
|
759
701
|
a = GeoTree.calc_partitions(ni.bounds,dp,ni.vertical)
|
760
|
-
!db || pr(" partitions=#{d(a)}\n")
|
761
702
|
|
762
703
|
a.each_with_index do |posn,i|
|
763
704
|
p = Partition.new(posn,0)
|
@@ -783,15 +724,11 @@ module GeoTreeModule
|
|
783
724
|
end
|
784
725
|
|
785
726
|
def add_data_point(dp, node_name, path, b, v)
|
786
|
-
db = false
|
787
|
-
# db = true
|
788
727
|
|
789
|
-
!db || pr("\n\nadd_data_point #{dp}, node name #{node_name}\n")
|
790
728
|
n = read_node(node_name,b,v)
|
791
729
|
|
792
730
|
# iterate until we have found a leaf node with remaining capacity
|
793
731
|
while true
|
794
|
-
!db || pr(" ...top of iteration\n")
|
795
732
|
|
796
733
|
if (n.leaf)
|
797
734
|
# If the leaf node and overflow nodes have reached a certain size, create a new internal node,
|
@@ -901,7 +838,6 @@ module GeoTreeModule
|
|
901
838
|
end
|
902
839
|
|
903
840
|
def dump_aux(s, n, indent, dc)
|
904
|
-
# assert!(!(dc.member? n.name))
|
905
841
|
dc[n.name] = n.name
|
906
842
|
tab(s,indent)
|
907
843
|
s << n.to_s
|
@@ -931,52 +867,52 @@ module GeoTreeModule
|
|
931
867
|
|
932
868
|
end
|
933
869
|
|
934
|
-
private
|
935
|
-
|
936
|
-
class TreeStats
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
870
|
+
private
|
871
|
+
|
872
|
+
class TreeStats
|
873
|
+
attr_accessor :leaf_count, :interior_count, :overflow_count, :leaf_depth_max
|
874
|
+
def initialize
|
875
|
+
@leaf_count = 0
|
876
|
+
@interior_count = 0
|
877
|
+
@overflow_count = 0
|
878
|
+
@leaf_used_sum = 0
|
879
|
+
@leaf_depth_sum = 0
|
880
|
+
@leaf_depth_max = 0
|
881
|
+
end
|
882
|
+
|
883
|
+
def process_node(n, overflow, depth)
|
884
|
+
if n.leaf
|
885
|
+
@leaf_count += 1
|
886
|
+
@leaf_used_sum += n.used
|
887
|
+
@leaf_depth_sum += depth
|
888
|
+
if overflow
|
889
|
+
@overflow_count += 1
|
890
|
+
end
|
891
|
+
@leaf_depth_max = [@leaf_depth_max,depth].max
|
892
|
+
else
|
893
|
+
@interior_count += 1
|
894
|
+
end
|
895
|
+
end
|
946
896
|
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
897
|
+
def summary
|
898
|
+
s = {}
|
899
|
+
s['leaf_nodes'] = leaf_count
|
900
|
+
s['interior_nodes'] = interior_count
|
901
|
+
s['overflow_nodes'] = overflow_count
|
902
|
+
leaf_usage = 0
|
903
|
+
if (leaf_count > 0)
|
904
|
+
leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
|
954
905
|
end
|
955
|
-
|
956
|
-
|
957
|
-
@
|
906
|
+
s['leaf_usage'] = leaf_usage
|
907
|
+
avg_depth = 0
|
908
|
+
if @leaf_count > 0
|
909
|
+
avg_depth = @leaf_depth_sum / @leaf_count.to_f
|
910
|
+
end
|
911
|
+
s['leaf_depth (avg)'] = avg_depth
|
912
|
+
s['leaf_depth (max)'] = leaf_depth_max
|
913
|
+
s
|
958
914
|
end
|
959
|
-
end
|
960
915
|
|
961
|
-
def summary
|
962
|
-
s = {}
|
963
|
-
s['leaf_nodes'] = leaf_count
|
964
|
-
s['interior_nodes'] = interior_count
|
965
|
-
s['overflow_nodes'] = overflow_count
|
966
|
-
leaf_usage = 0
|
967
|
-
if (leaf_count > 0)
|
968
|
-
leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
|
969
|
-
end
|
970
|
-
s['leaf_usage'] = leaf_usage
|
971
|
-
avg_depth = 0
|
972
|
-
if @leaf_count > 0
|
973
|
-
avg_depth = @leaf_depth_sum / @leaf_count.to_f
|
974
|
-
end
|
975
|
-
s['leaf_depth (avg)'] = avg_depth
|
976
|
-
s['leaf_depth (max)'] = leaf_depth_max
|
977
|
-
s
|
978
916
|
end
|
979
917
|
|
980
918
|
end
|
981
|
-
|
982
|
-
end
|