geotree 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +4 -2
- data/README.txt +6 -8
- data/lib/geotree/blockfile.rb +0 -28
- data/lib/geotree/geotree.rb +55 -119
- data/lib/geotree/multitree.rb +2 -21
- data/lib/geotree/pswriter.rb +217 -150
- data/lib/geotree/tools.rb +1 -0
- data/test/test_blockfile.rb +1 -1
- data/test/test_externalsort.rb +15 -1
- data/test/test_geotree.rb +24 -7
- data/test/test_ps.rb +1 -1
- metadata +2 -5
- data/lib/fig/geo_tree.pdf +0 -0
- data/lib/fig/multi_tree.pdf +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 676eca7a9c5e6fa6057d9d0ac7f6635428678934
|
4
|
+
data.tar.gz: ac4607c36e9d32e42f0391686c3cc1c6286ddec1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8060ef2768eb956aca745fb29cd808a573a0317ee3daea5630ec4b1723ad8c9fe253137fabd9cef5d639afff70021a4ca0c1b254d7a1aa64936c73fa6e1b4ceb
|
7
|
+
data.tar.gz: c3a131f6fa5151027b58c3f6107e2d07bfc3bac2a5b6c2ea33ea95d4dd59ea1b60d78f933de8050fd9c97937d32fdfab0be94dc5302427b917c407bdb2ee81d6
|
data/CHANGELOG.txt
CHANGED
data/README.txt
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
# @markup markdown
|
2
2
|
|
3
|
-
|
4
|
-
and supports multiple levels of detail.
|
3
|
+
geotree
|
5
4
|
=======
|
6
|
-
|
5
|
+
A ruby gem that maintains a set of geographical points, reports points lying within a query rectangle,
|
6
|
+
and supports multiple levels of detail.
|
7
7
|
|
8
|
+
Written and (c) by Jeff Sember, April 2013.
|
8
9
|
|
9
10
|
|
10
11
|
GeoTree
|
@@ -16,16 +17,13 @@ size of a city). GeoTrees are disk-based data structures and can store a very l
|
|
16
17
|
number of points efficiently. If desired, for smaller data sets, memory-only trees
|
17
18
|
can be constructed instead.
|
18
19
|
|
19
|
-
[An animation of a GeoTree in action.](
|
20
|
+
[An animation of a GeoTree in action.](http://www.cs.ubc.ca/~jpsember/geo_tree.ps)
|
20
21
|
|
21
22
|
MultiTree
|
22
23
|
-------
|
23
24
|
|
24
|
-
|
25
25
|
The gem includes MultiTree, a GeoTree variant that supports queries at multiple
|
26
26
|
levels of detail. For example, when focusing on a small region it can return points
|
27
27
|
that would be omitted when querying a much larger region.
|
28
28
|
|
29
|
-
[An animation of a MultiTree in action.](
|
30
|
-
|
31
|
-
|
29
|
+
[An animation of a MultiTree in action.](http://www.cs.ubc.ca/~jpsember/multi_tree.ps)
|
data/lib/geotree/blockfile.rb
CHANGED
@@ -67,12 +67,8 @@ class BlockFile
|
|
67
67
|
# @return true if underlying storage already existed
|
68
68
|
#
|
69
69
|
def open
|
70
|
-
db = false
|
71
|
-
# db = true
|
72
|
-
!db || pr("BlockFile.open\n")
|
73
70
|
!open? || raise(IllegalStateException)
|
74
71
|
existed = open_storage
|
75
|
-
!db || pr(" existed=#{existed}\n")
|
76
72
|
if !existed
|
77
73
|
@header_data = alloc_buffer
|
78
74
|
BlockFile.write_int(@header_data, HDR_VERSION_, VERSION_)
|
@@ -92,7 +88,6 @@ class BlockFile
|
|
92
88
|
if BlockFile.read_int(@header_data,HDR_BLOCKSIZE_) != block_size
|
93
89
|
raise ArgumentError,"unexpected block size"
|
94
90
|
end
|
95
|
-
!db || puts(hex_dump_to_string(@header_data,'header data'))
|
96
91
|
@recycle_data = read(rdir_head_name)
|
97
92
|
end
|
98
93
|
existed
|
@@ -104,21 +99,15 @@ class BlockFile
|
|
104
99
|
#
|
105
100
|
def alloc(src = nil)
|
106
101
|
|
107
|
-
db = false
|
108
|
-
!db || pr("blockfile alloc\n")
|
109
|
-
#!db || puts(self.to_s)
|
110
|
-
|
111
102
|
ensure_open
|
112
103
|
|
113
104
|
src ||= alloc_buffer
|
114
105
|
|
115
106
|
# get index of last recycle block directory
|
116
107
|
r_index = rdir_head_name
|
117
|
-
!db||pr(" last recycle block dir=%d\n",r_index)
|
118
108
|
|
119
109
|
# any entries remain in this directory?
|
120
110
|
n_ent = get_rdir_slots_used
|
121
|
-
!db||pr(" n_ent=%d\n",n_ent);
|
122
111
|
|
123
112
|
if n_ent == 0
|
124
113
|
prev_rb_block = get_rdir_next_name
|
@@ -129,17 +118,14 @@ class BlockFile
|
|
129
118
|
r_index = prev_rb_block
|
130
119
|
write_hdr(HDR_RECYCLEINDEX_, r_index)
|
131
120
|
read(prev_rb_block, @recycle_data)
|
132
|
-
!db||pr(" using directory as new block: %d\n",ret)
|
133
121
|
append_or_replace(ret, src)
|
134
122
|
else
|
135
123
|
ret = name_max
|
136
|
-
!db||pr(" using name_max %d\n",ret)
|
137
124
|
append_or_replace(ret, src)
|
138
125
|
end
|
139
126
|
else
|
140
127
|
slot = n_ent - 1;
|
141
128
|
ret = get_rdir_slot(slot)
|
142
|
-
!db || pr(" read slot %d to get %d\n",slot,ret)
|
143
129
|
set_rdir_slot(slot,0)
|
144
130
|
set_rdir_slots_used(slot)
|
145
131
|
append_or_replace(r_index, @recycle_data)
|
@@ -163,16 +149,13 @@ class BlockFile
|
|
163
149
|
set_rdir_slots_used(slot+1)
|
164
150
|
append_or_replace(rdir_head_name, @recycle_data)
|
165
151
|
else
|
166
|
-
|
167
152
|
# use freed block as next recycle page
|
168
|
-
|
169
153
|
old_dir = rdir_head_name
|
170
154
|
|
171
155
|
write_hdr(HDR_RECYCLEINDEX_, block_name)
|
172
156
|
|
173
157
|
read(block_name, @recycle_data)
|
174
158
|
BlockFile.clear_block(@recycle_data)
|
175
|
-
# mark_rc_block
|
176
159
|
|
177
160
|
set_rdir_next_name(old_dir)
|
178
161
|
append_or_replace(block_name, @recycle_data)
|
@@ -295,7 +278,6 @@ class BlockFile
|
|
295
278
|
end
|
296
279
|
|
297
280
|
def BlockFile.copy_block(dest, src)
|
298
|
-
# assert!(dest && src)
|
299
281
|
dest[0..-1] = src
|
300
282
|
end
|
301
283
|
|
@@ -311,10 +293,6 @@ class BlockFile
|
|
311
293
|
# @return buffer
|
312
294
|
#
|
313
295
|
def read(block_name, dest_buffer = nil)
|
314
|
-
db = false
|
315
|
-
# db = true
|
316
|
-
!db || pr("BlockFile read #{block_name}, memory version!\n")
|
317
|
-
|
318
296
|
dest_buffer ||= alloc_buffer
|
319
297
|
if block_name >= @mem_file.size
|
320
298
|
raise ArgumentError,"No such block name #{block_name} exists (size=#{@mem_file.size})"
|
@@ -322,7 +300,6 @@ class BlockFile
|
|
322
300
|
|
323
301
|
src = @mem_file[block_name]
|
324
302
|
BlockFile.copy_block(dest_buffer, src)
|
325
|
-
!db || hex_dump(dest_buffer,"Contents of block #{block_name}")
|
326
303
|
dest_buffer
|
327
304
|
end
|
328
305
|
|
@@ -332,7 +309,6 @@ class BlockFile
|
|
332
309
|
# @param block_name name of block
|
333
310
|
# @param src_buffer data to write
|
334
311
|
def write(block_name, src_buffer)
|
335
|
-
|
336
312
|
if block_name == @mem_file.size
|
337
313
|
@mem_file << alloc_buffer
|
338
314
|
end
|
@@ -415,10 +391,6 @@ class BlockFile
|
|
415
391
|
BlockFile.write_int(@recycle_data,RC_PREV_DIR_NAME_,n)
|
416
392
|
end
|
417
393
|
|
418
|
-
# def mark_rc_block
|
419
|
-
# @recycle_data[RC_BLOCKTYPE_] = BLOCKTYPE_RECYCLE_.chr
|
420
|
-
# end
|
421
|
-
|
422
394
|
# Get name of first recycle directory block (they are connected as
|
423
395
|
# a singly-linked list)
|
424
396
|
#
|
data/lib/geotree/geotree.rb
CHANGED
@@ -5,7 +5,7 @@ req 'diskblockfile ptbuffer'
|
|
5
5
|
module GeoTreeModule
|
6
6
|
#
|
7
7
|
# A variant of a kd-tree, it is capable of maintaining sets of 2D points and efficiently
|
8
|
-
# reporting all points lying within (axis-aligned) query rectangles.
|
8
|
+
# reporting all points lying within (axis-aligned) query rectangles.
|
9
9
|
#
|
10
10
|
# Like a B+ tree, it has a large branching factor
|
11
11
|
# and the nodes are large to improve performance when the tree is stored
|
@@ -14,7 +14,7 @@ module GeoTreeModule
|
|
14
14
|
# A GeoTree is usually stored within a disk file, though it is also possible to
|
15
15
|
# construct a tree that exists only in memory; see the initialize(...) method.
|
16
16
|
#
|
17
|
-
# {An animation of a GeoTree in action.}[link
|
17
|
+
# {An animation of a GeoTree in action.}[link:http://www.cs.ubc.ca/~jpsember/geo_tree.ps]
|
18
18
|
#
|
19
19
|
# Usage:
|
20
20
|
#
|
@@ -41,17 +41,17 @@ module GeoTreeModule
|
|
41
41
|
#
|
42
42
|
# t.close()
|
43
43
|
#
|
44
|
-
#
|
44
|
+
#
|
45
45
|
# One of the problems with kd-trees (including this one) is that they can become
|
46
|
-
# unbalanced after a number of insertions and deletions. To deal with this,
|
46
|
+
# unbalanced after a number of insertions and deletions. To deal with this,
|
47
47
|
# consider these two suggestions:
|
48
48
|
#
|
49
49
|
# 1) When constructing the initial tree, if the datapoints are given in a random
|
50
50
|
# order, the tree will (with high probability) be constructed in a balanced form.
|
51
51
|
# By contrast, consider what happens if the points (1,1), (2,2), (3,3), ... are
|
52
52
|
# added in sequence to an initially empty tree. The tree will be very unbalanced,
|
53
|
-
# with poor performance.
|
54
|
-
# To address this problem, if you are not confident that the points you initially
|
53
|
+
# with poor performance.
|
54
|
+
# To address this problem, if you are not confident that the points you initially
|
55
55
|
# provide are in a sufficiently random sequence, you can enable 'point buffering':
|
56
56
|
#
|
57
57
|
# t = GeoTree.open("treepath.bin")
|
@@ -62,32 +62,26 @@ module GeoTreeModule
|
|
62
62
|
# t.add(dp2) # these points are stored in a temporary disk file
|
63
63
|
# t.add(dp3)
|
64
64
|
# :
|
65
|
-
#
|
65
|
+
#
|
66
66
|
# t.buffering = false # the points will be shuffled into a random sequence and
|
67
67
|
# # added to the tree
|
68
68
|
#
|
69
69
|
#
|
70
|
-
# 2) Periodically, you can start with a new tree, and add all of the datapoints using the
|
70
|
+
# 2) Periodically, you can start with a new tree, and add all of the datapoints using the
|
71
71
|
# above buffering technique. This is easy to do if the datapoints are also stored
|
72
72
|
# externally to the GeoTree (for instance, as parts of larger records in some database).
|
73
|
-
# Otherwise, (i) the datapoints can be retrieved from the tree to an array
|
74
|
-
# (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
|
73
|
+
# Otherwise, (i) the datapoints can be retrieved from the tree to an array
|
74
|
+
# (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
|
75
75
|
# and (iii) each of the points in the array can be added to the new tree.
|
76
|
-
#
|
76
|
+
#
|
77
77
|
class GeoTree
|
78
78
|
|
79
79
|
ROOT_NODE_NAME_ = BlockFile::FIRST_BLOCK_ID
|
80
80
|
|
81
81
|
privatize(self)
|
82
82
|
def buffering=(val)
|
83
|
-
db = false
|
84
|
-
# db = true
|
85
|
-
!db || pr("\nSetting buffering to #{val} (was #{@buffer.active})\n\n")
|
86
|
-
|
87
83
|
raise IllegalStateException if !open?
|
88
|
-
|
89
84
|
@buffer.active = val
|
90
|
-
|
91
85
|
end
|
92
86
|
|
93
87
|
# Construct GeoTree
|
@@ -114,7 +108,6 @@ module GeoTreeModule
|
|
114
108
|
root_name = @block_file.alloc(encode_block(root))
|
115
109
|
write_node(root)
|
116
110
|
end
|
117
|
-
|
118
111
|
end
|
119
112
|
|
120
113
|
def open?
|
@@ -164,17 +157,8 @@ module GeoTreeModule
|
|
164
157
|
# @param path path of file; if nil, constructs tree in memory only
|
165
158
|
#
|
166
159
|
def self.open(path = nil)
|
167
|
-
db = false
|
168
|
-
# db = true
|
169
|
-
!db || pr("GeoTree.open path=#{path}\n")
|
170
160
|
bf = nil
|
171
161
|
if path
|
172
|
-
!db || pr(" exists=#{File.file?(path)}\n")
|
173
|
-
|
174
|
-
if (db && File.file?(path))
|
175
|
-
hex_dump(read_text_file(path),"path #{path}")
|
176
|
-
end
|
177
|
-
|
178
162
|
bf = DiskBlockFile.new(KDTREE_BLOCKSIZE, path)
|
179
163
|
end
|
180
164
|
GeoTree.new(bf);
|
@@ -186,7 +170,7 @@ module GeoTreeModule
|
|
186
170
|
#
|
187
171
|
def add(data_point)
|
188
172
|
raise IllegalStateException if !open?
|
189
|
-
|
173
|
+
@buffer.add(data_point)
|
190
174
|
end
|
191
175
|
|
192
176
|
# Remove a datapoint. Returns the datapoint if it was found and removed,
|
@@ -197,9 +181,6 @@ module GeoTreeModule
|
|
197
181
|
|
198
182
|
raise IllegalStateException if @buffer.active
|
199
183
|
|
200
|
-
db = false
|
201
|
-
!db || pr("remove #{data_point}\n")
|
202
|
-
|
203
184
|
removed = nil
|
204
185
|
block do
|
205
186
|
|
@@ -211,13 +192,11 @@ module GeoTreeModule
|
|
211
192
|
|
212
193
|
while !n.leaf
|
213
194
|
|
214
|
-
!db || pr(" add #{n} to internal path\n")
|
215
195
|
internal_path << n
|
216
196
|
|
217
197
|
# find the child that will contain the point
|
218
198
|
child_slot = n.slot_intersecting_line(n.vertical ? data_point.loc.y : data_point.loc.x)
|
219
199
|
next_name = n.slot_child(child_slot)
|
220
|
-
!db || pr(" child_slot=#{child_slot}, next_name=#{next_name}\n")
|
221
200
|
if next_name == 0
|
222
201
|
n = nil
|
223
202
|
break
|
@@ -228,7 +207,6 @@ module GeoTreeModule
|
|
228
207
|
|
229
208
|
# build list of overflow nodes
|
230
209
|
leaf_set = build_leaf_set(n)
|
231
|
-
!db || pr(" built leaf set: #{d(leaf_set)}\n")
|
232
210
|
|
233
211
|
# We now have path containing the path of internal nodes, and leaf_set the leaf nodes
|
234
212
|
|
@@ -289,7 +267,6 @@ module GeoTreeModule
|
|
289
267
|
if inode.population < SPLIT_SIZE/2
|
290
268
|
collapse_internal_node(inode)
|
291
269
|
end
|
292
|
-
|
293
270
|
end
|
294
271
|
end
|
295
272
|
done_operation
|
@@ -411,10 +388,7 @@ module GeoTreeModule
|
|
411
388
|
# Replace an internal node with a leaf node, one containing all the
|
412
389
|
# datapoints in the internal node's subtree.
|
413
390
|
def collapse_internal_node(n)
|
414
|
-
|
415
|
-
!db || pr("internal node population has dropped below half leaf set capacity;\n%s\n",d(n))
|
416
|
-
!db || puts(dump)
|
417
|
-
|
391
|
+
|
418
392
|
dp_set = []
|
419
393
|
node_set = []
|
420
394
|
gather_datapoints(n,dp_set,node_set)
|
@@ -424,11 +398,7 @@ module GeoTreeModule
|
|
424
398
|
"Interior node actual population #{dp_set.size} disagrees with stored value #{n.population};\n#{dump(n)}"
|
425
399
|
end
|
426
400
|
|
427
|
-
!db || pr("\ndp_set=#{d2(dp_set)}\n\n")
|
428
|
-
!db || pr("node_set=#{d2(node_set)}\n\n")
|
429
|
-
|
430
401
|
node_set.each do |n2|
|
431
|
-
!db || pr(" removing #{n2} from mod/cache\n")
|
432
402
|
delete_node(n2)
|
433
403
|
end
|
434
404
|
|
@@ -448,9 +418,6 @@ module GeoTreeModule
|
|
448
418
|
write_node(n)
|
449
419
|
n = n2
|
450
420
|
end
|
451
|
-
|
452
|
-
!db || printf("After collapsing\n#{dump}\n\n")
|
453
|
-
|
454
421
|
end
|
455
422
|
|
456
423
|
def aux_stats(node_name, b,v,overflow,depth, st)
|
@@ -513,10 +480,6 @@ module GeoTreeModule
|
|
513
480
|
# @return locations of partitions (1 + NODEI_CHILDREN of them)
|
514
481
|
#
|
515
482
|
def self.calc_partitions(bounds, unsorted_pts, vertical)
|
516
|
-
db = false
|
517
|
-
# db = true
|
518
|
-
!db || pr("calc_partitions for bounds #{bounds}\n")
|
519
|
-
|
520
483
|
a = []
|
521
484
|
|
522
485
|
# Convert inputs so we need deal only with x coordinates
|
@@ -530,7 +493,6 @@ module GeoTreeModule
|
|
530
493
|
end
|
531
494
|
|
532
495
|
pts = unsorted_pts.sort{|a,b| a.loc.x <=> b.loc.x}
|
533
|
-
!db || pr(" starting with left boundary #{bounds.x}\n")
|
534
496
|
|
535
497
|
# Add location of left boundary
|
536
498
|
a << bounds.x
|
@@ -541,7 +503,6 @@ module GeoTreeModule
|
|
541
503
|
# how many zones are the items cutting it into at present?
|
542
504
|
n_items = pts.size + 1
|
543
505
|
f_step = n_items / (n_zones.to_f)
|
544
|
-
!db || puts(" n_items=#{n_items}, zones=#{n_zones}, step=#{f_step}")
|
545
506
|
while a.size < n_zones
|
546
507
|
f_pos = f_step * a.size
|
547
508
|
left_item = f_pos.floor.to_i
|
@@ -570,10 +531,8 @@ module GeoTreeModule
|
|
570
531
|
x_new = [prev+1, bounds.x + bounds.w].min
|
571
532
|
end
|
572
533
|
|
573
|
-
!db || pr(" adding #{x_new}, for f_step #{f_step}\n")
|
574
534
|
a << x_new
|
575
535
|
end
|
576
|
-
!db || pr("partitions=#{a} (bounds=#{bounds})\n")
|
577
536
|
a
|
578
537
|
end
|
579
538
|
|
@@ -585,10 +544,8 @@ module GeoTreeModule
|
|
585
544
|
end
|
586
545
|
|
587
546
|
def read_node(node_name, bounds, vertical)
|
588
|
-
db = false
|
589
547
|
# Determine if node is in cache
|
590
548
|
n = @cache_dict[node_name]
|
591
|
-
!db || pr("read_node #{node_name}, from cache=#{n}\n")
|
592
549
|
if !n
|
593
550
|
bp = @block_file.read(node_name)
|
594
551
|
n = decode_block(bp, node_name, vertical, bounds)
|
@@ -636,9 +593,6 @@ module GeoTreeModule
|
|
636
593
|
# Encode a node to a block of bytes
|
637
594
|
def encode_block(n)
|
638
595
|
|
639
|
-
db = false
|
640
|
-
!db || pr("encode_block for #{n}\n")
|
641
|
-
|
642
596
|
b = @block_file.alloc_buffer
|
643
597
|
|
644
598
|
flags = 0
|
@@ -664,17 +618,11 @@ module GeoTreeModule
|
|
664
618
|
off += DATAPOINT_INTS
|
665
619
|
end
|
666
620
|
end
|
667
|
-
!db || hex_dump(b)
|
668
|
-
|
669
621
|
b
|
670
622
|
end
|
671
623
|
|
672
624
|
# Decode a node from a block of bytes
|
673
625
|
def decode_block(b, node_name, vertical, bounds)
|
674
|
-
db = false
|
675
|
-
# db = (node_name == 2)
|
676
|
-
!db || pr("decode_block\n")
|
677
|
-
!db || hex_dump(b)
|
678
626
|
|
679
627
|
flags = BlockFile.read_int(b, HDR_FLAGS)
|
680
628
|
type = (flags & 1)
|
@@ -702,7 +650,6 @@ module GeoTreeModule
|
|
702
650
|
off += DATAPOINT_INTS
|
703
651
|
end
|
704
652
|
end
|
705
|
-
!db || pr("decoded to #{n}\n")
|
706
653
|
n
|
707
654
|
end
|
708
655
|
|
@@ -724,9 +671,6 @@ module GeoTreeModule
|
|
724
671
|
# new child nodes.
|
725
672
|
# Returns the new internal node
|
726
673
|
def split_leaf_set(node,path)
|
727
|
-
db = false
|
728
|
-
# db = true
|
729
|
-
!db || pr("\nsplit_leaf_set #{node} bounds=#{node.bounds} vert=#{node.vertical}...\n")
|
730
674
|
|
731
675
|
# list of data points from the leaf node (and its overflow siblings)
|
732
676
|
dp = []
|
@@ -752,12 +696,9 @@ module GeoTreeModule
|
|
752
696
|
n2 = read_node(next_id,b,n2.vertical)
|
753
697
|
end
|
754
698
|
|
755
|
-
!db || pr(" datapoints=#{d(dp)}\n")
|
756
|
-
|
757
699
|
ni = NodeI.new(node.name,node.vertical,node.bounds)
|
758
700
|
|
759
701
|
a = GeoTree.calc_partitions(ni.bounds,dp,ni.vertical)
|
760
|
-
!db || pr(" partitions=#{d(a)}\n")
|
761
702
|
|
762
703
|
a.each_with_index do |posn,i|
|
763
704
|
p = Partition.new(posn,0)
|
@@ -783,15 +724,11 @@ module GeoTreeModule
|
|
783
724
|
end
|
784
725
|
|
785
726
|
def add_data_point(dp, node_name, path, b, v)
|
786
|
-
db = false
|
787
|
-
# db = true
|
788
727
|
|
789
|
-
!db || pr("\n\nadd_data_point #{dp}, node name #{node_name}\n")
|
790
728
|
n = read_node(node_name,b,v)
|
791
729
|
|
792
730
|
# iterate until we have found a leaf node with remaining capacity
|
793
731
|
while true
|
794
|
-
!db || pr(" ...top of iteration\n")
|
795
732
|
|
796
733
|
if (n.leaf)
|
797
734
|
# If the leaf node and overflow nodes have reached a certain size, create a new internal node,
|
@@ -901,7 +838,6 @@ module GeoTreeModule
|
|
901
838
|
end
|
902
839
|
|
903
840
|
def dump_aux(s, n, indent, dc)
|
904
|
-
# assert!(!(dc.member? n.name))
|
905
841
|
dc[n.name] = n.name
|
906
842
|
tab(s,indent)
|
907
843
|
s << n.to_s
|
@@ -931,52 +867,52 @@ module GeoTreeModule
|
|
931
867
|
|
932
868
|
end
|
933
869
|
|
934
|
-
private
|
935
|
-
|
936
|
-
class TreeStats
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
870
|
+
private
|
871
|
+
|
872
|
+
class TreeStats
|
873
|
+
attr_accessor :leaf_count, :interior_count, :overflow_count, :leaf_depth_max
|
874
|
+
def initialize
|
875
|
+
@leaf_count = 0
|
876
|
+
@interior_count = 0
|
877
|
+
@overflow_count = 0
|
878
|
+
@leaf_used_sum = 0
|
879
|
+
@leaf_depth_sum = 0
|
880
|
+
@leaf_depth_max = 0
|
881
|
+
end
|
882
|
+
|
883
|
+
def process_node(n, overflow, depth)
|
884
|
+
if n.leaf
|
885
|
+
@leaf_count += 1
|
886
|
+
@leaf_used_sum += n.used
|
887
|
+
@leaf_depth_sum += depth
|
888
|
+
if overflow
|
889
|
+
@overflow_count += 1
|
890
|
+
end
|
891
|
+
@leaf_depth_max = [@leaf_depth_max,depth].max
|
892
|
+
else
|
893
|
+
@interior_count += 1
|
894
|
+
end
|
895
|
+
end
|
946
896
|
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
897
|
+
def summary
|
898
|
+
s = {}
|
899
|
+
s['leaf_nodes'] = leaf_count
|
900
|
+
s['interior_nodes'] = interior_count
|
901
|
+
s['overflow_nodes'] = overflow_count
|
902
|
+
leaf_usage = 0
|
903
|
+
if (leaf_count > 0)
|
904
|
+
leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
|
954
905
|
end
|
955
|
-
|
956
|
-
|
957
|
-
@
|
906
|
+
s['leaf_usage'] = leaf_usage
|
907
|
+
avg_depth = 0
|
908
|
+
if @leaf_count > 0
|
909
|
+
avg_depth = @leaf_depth_sum / @leaf_count.to_f
|
910
|
+
end
|
911
|
+
s['leaf_depth (avg)'] = avg_depth
|
912
|
+
s['leaf_depth (max)'] = leaf_depth_max
|
913
|
+
s
|
958
914
|
end
|
959
|
-
end
|
960
915
|
|
961
|
-
def summary
|
962
|
-
s = {}
|
963
|
-
s['leaf_nodes'] = leaf_count
|
964
|
-
s['interior_nodes'] = interior_count
|
965
|
-
s['overflow_nodes'] = overflow_count
|
966
|
-
leaf_usage = 0
|
967
|
-
if (leaf_count > 0)
|
968
|
-
leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
|
969
|
-
end
|
970
|
-
s['leaf_usage'] = leaf_usage
|
971
|
-
avg_depth = 0
|
972
|
-
if @leaf_count > 0
|
973
|
-
avg_depth = @leaf_depth_sum / @leaf_count.to_f
|
974
|
-
end
|
975
|
-
s['leaf_depth (avg)'] = avg_depth
|
976
|
-
s['leaf_depth (max)'] = leaf_depth_max
|
977
|
-
s
|
978
916
|
end
|
979
917
|
|
980
918
|
end
|
981
|
-
|
982
|
-
end
|