geotree 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d139e141c3f1de2d164a60d382a57dab14c145cb
4
- data.tar.gz: a4e2951d2f513a39aac941e073dedb6f77a4b3bc
3
+ metadata.gz: 676eca7a9c5e6fa6057d9d0ac7f6635428678934
4
+ data.tar.gz: ac4607c36e9d32e42f0391686c3cc1c6286ddec1
5
5
  SHA512:
6
- metadata.gz: ec83653bcabfe8121c7954dc80550acb11f8578b4eb58b55a989450369b99d2b4d353008c4584d4d7dea6aaf0aab91ed77b3f7abd12086d64f27dd8aa826849f
7
- data.tar.gz: 51fd669841ada4aae664cf500c63e8f358c4c596d4e9860d48075278de0365a82c78f5c7f78b99a5bb8786dd282b21f152aca973dee0dbb7197079ca5d76af8b
6
+ metadata.gz: 8060ef2768eb956aca745fb29cd808a573a0317ee3daea5630ec4b1723ad8c9fe253137fabd9cef5d639afff70021a4ca0c1b254d7a1aa64936c73fa6e1b4ceb
7
+ data.tar.gz: c3a131f6fa5151027b58c3f6107e2d07bfc3bac2a5b6c2ea33ea95d4dd59ea1b60d78f933de8050fd9c97937d32fdfab0be94dc5302427b917c407bdb2ee81d6
@@ -2,6 +2,8 @@
2
2
  * Version 1.0.0
3
3
 
4
4
  2013-04-03
5
- * Version 1.0.1
5
+ * Version 1.1.1
6
6
  * Enhanced README file
7
-
7
+
8
+ * Version 1.1.2
9
+ * Moved figures to personal homepage
data/README.txt CHANGED
@@ -1,10 +1,11 @@
1
1
  # @markup markdown
2
2
 
3
- 'geotree' : A ruby gem that maintains a set of geographical points, reports points lying within a query rectangle,
4
- and supports multiple levels of detail.
3
+ geotree
5
4
  =======
6
- Written and (c) by Jeff Sember, April 2013.
5
+ A ruby gem that maintains a set of geographical points, reports points lying within a query rectangle,
6
+ and supports multiple levels of detail.
7
7
 
8
+ Written and (c) by Jeff Sember, April 2013.
8
9
 
9
10
 
10
11
  GeoTree
@@ -16,16 +17,13 @@ size of a city). GeoTrees are disk-based data structures and can store a very l
16
17
  number of points efficiently. If desired, for smaller data sets, memory-only trees
17
18
  can be constructed instead.
18
19
 
19
- [An animation of a GeoTree in action.](../geotree/lib/fig/geo_tree.pdf "geo_tree.pdf")
20
+ [An animation of a GeoTree in action.](http://www.cs.ubc.ca/~jpsember/geo_tree.ps)
20
21
 
21
22
  MultiTree
22
23
  -------
23
24
 
24
-
25
25
  The gem includes MultiTree, a GeoTree variant that supports queries at multiple
26
26
  levels of detail. For example, when focusing on a small region it can return points
27
27
  that would be omitted when querying a much larger region.
28
28
 
29
- [An animation of a MultiTree in action.](../geotree/lib/fig/multi_tree.pdf "multi_tree.pdf")
30
-
31
-
29
+ [An animation of a MultiTree in action.](http://www.cs.ubc.ca/~jpsember/multi_tree.ps)
@@ -67,12 +67,8 @@ class BlockFile
67
67
  # @return true if underlying storage already existed
68
68
  #
69
69
  def open
70
- db = false
71
- # db = true
72
- !db || pr("BlockFile.open\n")
73
70
  !open? || raise(IllegalStateException)
74
71
  existed = open_storage
75
- !db || pr(" existed=#{existed}\n")
76
72
  if !existed
77
73
  @header_data = alloc_buffer
78
74
  BlockFile.write_int(@header_data, HDR_VERSION_, VERSION_)
@@ -92,7 +88,6 @@ class BlockFile
92
88
  if BlockFile.read_int(@header_data,HDR_BLOCKSIZE_) != block_size
93
89
  raise ArgumentError,"unexpected block size"
94
90
  end
95
- !db || puts(hex_dump_to_string(@header_data,'header data'))
96
91
  @recycle_data = read(rdir_head_name)
97
92
  end
98
93
  existed
@@ -104,21 +99,15 @@ class BlockFile
104
99
  #
105
100
  def alloc(src = nil)
106
101
 
107
- db = false
108
- !db || pr("blockfile alloc\n")
109
- #!db || puts(self.to_s)
110
-
111
102
  ensure_open
112
103
 
113
104
  src ||= alloc_buffer
114
105
 
115
106
  # get index of last recycle block directory
116
107
  r_index = rdir_head_name
117
- !db||pr(" last recycle block dir=%d\n",r_index)
118
108
 
119
109
  # any entries remain in this directory?
120
110
  n_ent = get_rdir_slots_used
121
- !db||pr(" n_ent=%d\n",n_ent);
122
111
 
123
112
  if n_ent == 0
124
113
  prev_rb_block = get_rdir_next_name
@@ -129,17 +118,14 @@ class BlockFile
129
118
  r_index = prev_rb_block
130
119
  write_hdr(HDR_RECYCLEINDEX_, r_index)
131
120
  read(prev_rb_block, @recycle_data)
132
- !db||pr(" using directory as new block: %d\n",ret)
133
121
  append_or_replace(ret, src)
134
122
  else
135
123
  ret = name_max
136
- !db||pr(" using name_max %d\n",ret)
137
124
  append_or_replace(ret, src)
138
125
  end
139
126
  else
140
127
  slot = n_ent - 1;
141
128
  ret = get_rdir_slot(slot)
142
- !db || pr(" read slot %d to get %d\n",slot,ret)
143
129
  set_rdir_slot(slot,0)
144
130
  set_rdir_slots_used(slot)
145
131
  append_or_replace(r_index, @recycle_data)
@@ -163,16 +149,13 @@ class BlockFile
163
149
  set_rdir_slots_used(slot+1)
164
150
  append_or_replace(rdir_head_name, @recycle_data)
165
151
  else
166
-
167
152
  # use freed block as next recycle page
168
-
169
153
  old_dir = rdir_head_name
170
154
 
171
155
  write_hdr(HDR_RECYCLEINDEX_, block_name)
172
156
 
173
157
  read(block_name, @recycle_data)
174
158
  BlockFile.clear_block(@recycle_data)
175
- # mark_rc_block
176
159
 
177
160
  set_rdir_next_name(old_dir)
178
161
  append_or_replace(block_name, @recycle_data)
@@ -295,7 +278,6 @@ class BlockFile
295
278
  end
296
279
 
297
280
  def BlockFile.copy_block(dest, src)
298
- # assert!(dest && src)
299
281
  dest[0..-1] = src
300
282
  end
301
283
 
@@ -311,10 +293,6 @@ class BlockFile
311
293
  # @return buffer
312
294
  #
313
295
  def read(block_name, dest_buffer = nil)
314
- db = false
315
- # db = true
316
- !db || pr("BlockFile read #{block_name}, memory version!\n")
317
-
318
296
  dest_buffer ||= alloc_buffer
319
297
  if block_name >= @mem_file.size
320
298
  raise ArgumentError,"No such block name #{block_name} exists (size=#{@mem_file.size})"
@@ -322,7 +300,6 @@ class BlockFile
322
300
 
323
301
  src = @mem_file[block_name]
324
302
  BlockFile.copy_block(dest_buffer, src)
325
- !db || hex_dump(dest_buffer,"Contents of block #{block_name}")
326
303
  dest_buffer
327
304
  end
328
305
 
@@ -332,7 +309,6 @@ class BlockFile
332
309
  # @param block_name name of block
333
310
  # @param src_buffer data to write
334
311
  def write(block_name, src_buffer)
335
-
336
312
  if block_name == @mem_file.size
337
313
  @mem_file << alloc_buffer
338
314
  end
@@ -415,10 +391,6 @@ class BlockFile
415
391
  BlockFile.write_int(@recycle_data,RC_PREV_DIR_NAME_,n)
416
392
  end
417
393
 
418
- # def mark_rc_block
419
- # @recycle_data[RC_BLOCKTYPE_] = BLOCKTYPE_RECYCLE_.chr
420
- # end
421
-
422
394
  # Get name of first recycle directory block (they are connected as
423
395
  # a singly-linked list)
424
396
  #
@@ -5,7 +5,7 @@ req 'diskblockfile ptbuffer'
5
5
  module GeoTreeModule
6
6
  #
7
7
  # A variant of a kd-tree, it is capable of maintaining sets of 2D points and efficiently
8
- # reporting all points lying within (axis-aligned) query rectangles.
8
+ # reporting all points lying within (axis-aligned) query rectangles.
9
9
  #
10
10
  # Like a B+ tree, it has a large branching factor
11
11
  # and the nodes are large to improve performance when the tree is stored
@@ -14,7 +14,7 @@ module GeoTreeModule
14
14
  # A GeoTree is usually stored within a disk file, though it is also possible to
15
15
  # construct a tree that exists only in memory; see the initialize(...) method.
16
16
  #
17
- # {An animation of a GeoTree in action.}[link:../../doc/geo_tree.pdf]
17
+ # {An animation of a GeoTree in action.}[link:http://www.cs.ubc.ca/~jpsember/geo_tree.ps]
18
18
  #
19
19
  # Usage:
20
20
  #
@@ -41,17 +41,17 @@ module GeoTreeModule
41
41
  #
42
42
  # t.close()
43
43
  #
44
- #
44
+ #
45
45
  # One of the problems with kd-trees (including this one) is that they can become
46
- # unbalanced after a number of insertions and deletions. To deal with this,
46
+ # unbalanced after a number of insertions and deletions. To deal with this,
47
47
  # consider these two suggestions:
48
48
  #
49
49
  # 1) When constructing the initial tree, if the datapoints are given in a random
50
50
  # order, the tree will (with high probability) be constructed in a balanced form.
51
51
  # By contrast, consider what happens if the points (1,1), (2,2), (3,3), ... are
52
52
  # added in sequence to an initially empty tree. The tree will be very unbalanced,
53
- # with poor performance.
54
- # To address this problem, if you are not confident that the points you initially
53
+ # with poor performance.
54
+ # To address this problem, if you are not confident that the points you initially
55
55
  # provide are in a sufficiently random sequence, you can enable 'point buffering':
56
56
  #
57
57
  # t = GeoTree.open("treepath.bin")
@@ -62,32 +62,26 @@ module GeoTreeModule
62
62
  # t.add(dp2) # these points are stored in a temporary disk file
63
63
  # t.add(dp3)
64
64
  # :
65
- #
65
+ #
66
66
  # t.buffering = false # the points will be shuffled into a random sequence and
67
67
  # # added to the tree
68
68
  #
69
69
  #
70
- # 2) Periodically, you can start with a new tree, and add all of the datapoints using the
70
+ # 2) Periodically, you can start with a new tree, and add all of the datapoints using the
71
71
  # above buffering technique. This is easy to do if the datapoints are also stored
72
72
  # externally to the GeoTree (for instance, as parts of larger records in some database).
73
- # Otherwise, (i) the datapoints can be retrieved from the tree to an array
74
- # (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
73
+ # Otherwise, (i) the datapoints can be retrieved from the tree to an array
74
+ # (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
75
75
  # and (iii) each of the points in the array can be added to the new tree.
76
- #
76
+ #
77
77
  class GeoTree
78
78
 
79
79
  ROOT_NODE_NAME_ = BlockFile::FIRST_BLOCK_ID
80
80
 
81
81
  privatize(self)
82
82
  def buffering=(val)
83
- db = false
84
- # db = true
85
- !db || pr("\nSetting buffering to #{val} (was #{@buffer.active})\n\n")
86
-
87
83
  raise IllegalStateException if !open?
88
-
89
84
  @buffer.active = val
90
-
91
85
  end
92
86
 
93
87
  # Construct GeoTree
@@ -114,7 +108,6 @@ module GeoTreeModule
114
108
  root_name = @block_file.alloc(encode_block(root))
115
109
  write_node(root)
116
110
  end
117
-
118
111
  end
119
112
 
120
113
  def open?
@@ -164,17 +157,8 @@ module GeoTreeModule
164
157
  # @param path path of file; if nil, constructs tree in memory only
165
158
  #
166
159
  def self.open(path = nil)
167
- db = false
168
- # db = true
169
- !db || pr("GeoTree.open path=#{path}\n")
170
160
  bf = nil
171
161
  if path
172
- !db || pr(" exists=#{File.file?(path)}\n")
173
-
174
- if (db && File.file?(path))
175
- hex_dump(read_text_file(path),"path #{path}")
176
- end
177
-
178
162
  bf = DiskBlockFile.new(KDTREE_BLOCKSIZE, path)
179
163
  end
180
164
  GeoTree.new(bf);
@@ -186,7 +170,7 @@ module GeoTreeModule
186
170
  #
187
171
  def add(data_point)
188
172
  raise IllegalStateException if !open?
189
- @buffer.add(data_point)
173
+ @buffer.add(data_point)
190
174
  end
191
175
 
192
176
  # Remove a datapoint. Returns the datapoint if it was found and removed,
@@ -197,9 +181,6 @@ module GeoTreeModule
197
181
 
198
182
  raise IllegalStateException if @buffer.active
199
183
 
200
- db = false
201
- !db || pr("remove #{data_point}\n")
202
-
203
184
  removed = nil
204
185
  block do
205
186
 
@@ -211,13 +192,11 @@ module GeoTreeModule
211
192
 
212
193
  while !n.leaf
213
194
 
214
- !db || pr(" add #{n} to internal path\n")
215
195
  internal_path << n
216
196
 
217
197
  # find the child that will contain the point
218
198
  child_slot = n.slot_intersecting_line(n.vertical ? data_point.loc.y : data_point.loc.x)
219
199
  next_name = n.slot_child(child_slot)
220
- !db || pr(" child_slot=#{child_slot}, next_name=#{next_name}\n")
221
200
  if next_name == 0
222
201
  n = nil
223
202
  break
@@ -228,7 +207,6 @@ module GeoTreeModule
228
207
 
229
208
  # build list of overflow nodes
230
209
  leaf_set = build_leaf_set(n)
231
- !db || pr(" built leaf set: #{d(leaf_set)}\n")
232
210
 
233
211
  # We now have path containing the path of internal nodes, and leaf_set the leaf nodes
234
212
 
@@ -289,7 +267,6 @@ module GeoTreeModule
289
267
  if inode.population < SPLIT_SIZE/2
290
268
  collapse_internal_node(inode)
291
269
  end
292
-
293
270
  end
294
271
  end
295
272
  done_operation
@@ -411,10 +388,7 @@ module GeoTreeModule
411
388
  # Replace an internal node with a leaf node, one containing all the
412
389
  # datapoints in the internal node's subtree.
413
390
  def collapse_internal_node(n)
414
- db = false
415
- !db || pr("internal node population has dropped below half leaf set capacity;\n%s\n",d(n))
416
- !db || puts(dump)
417
-
391
+
418
392
  dp_set = []
419
393
  node_set = []
420
394
  gather_datapoints(n,dp_set,node_set)
@@ -424,11 +398,7 @@ module GeoTreeModule
424
398
  "Interior node actual population #{dp_set.size} disagrees with stored value #{n.population};\n#{dump(n)}"
425
399
  end
426
400
 
427
- !db || pr("\ndp_set=#{d2(dp_set)}\n\n")
428
- !db || pr("node_set=#{d2(node_set)}\n\n")
429
-
430
401
  node_set.each do |n2|
431
- !db || pr(" removing #{n2} from mod/cache\n")
432
402
  delete_node(n2)
433
403
  end
434
404
 
@@ -448,9 +418,6 @@ module GeoTreeModule
448
418
  write_node(n)
449
419
  n = n2
450
420
  end
451
-
452
- !db || printf("After collapsing\n#{dump}\n\n")
453
-
454
421
  end
455
422
 
456
423
  def aux_stats(node_name, b,v,overflow,depth, st)
@@ -513,10 +480,6 @@ module GeoTreeModule
513
480
  # @return locations of partitions (1 + NODEI_CHILDREN of them)
514
481
  #
515
482
  def self.calc_partitions(bounds, unsorted_pts, vertical)
516
- db = false
517
- # db = true
518
- !db || pr("calc_partitions for bounds #{bounds}\n")
519
-
520
483
  a = []
521
484
 
522
485
  # Convert inputs so we need deal only with x coordinates
@@ -530,7 +493,6 @@ module GeoTreeModule
530
493
  end
531
494
 
532
495
  pts = unsorted_pts.sort{|a,b| a.loc.x <=> b.loc.x}
533
- !db || pr(" starting with left boundary #{bounds.x}\n")
534
496
 
535
497
  # Add location of left boundary
536
498
  a << bounds.x
@@ -541,7 +503,6 @@ module GeoTreeModule
541
503
  # how many zones are the items cutting it into at present?
542
504
  n_items = pts.size + 1
543
505
  f_step = n_items / (n_zones.to_f)
544
- !db || puts(" n_items=#{n_items}, zones=#{n_zones}, step=#{f_step}")
545
506
  while a.size < n_zones
546
507
  f_pos = f_step * a.size
547
508
  left_item = f_pos.floor.to_i
@@ -570,10 +531,8 @@ module GeoTreeModule
570
531
  x_new = [prev+1, bounds.x + bounds.w].min
571
532
  end
572
533
 
573
- !db || pr(" adding #{x_new}, for f_step #{f_step}\n")
574
534
  a << x_new
575
535
  end
576
- !db || pr("partitions=#{a} (bounds=#{bounds})\n")
577
536
  a
578
537
  end
579
538
 
@@ -585,10 +544,8 @@ module GeoTreeModule
585
544
  end
586
545
 
587
546
  def read_node(node_name, bounds, vertical)
588
- db = false
589
547
  # Determine if node is in cache
590
548
  n = @cache_dict[node_name]
591
- !db || pr("read_node #{node_name}, from cache=#{n}\n")
592
549
  if !n
593
550
  bp = @block_file.read(node_name)
594
551
  n = decode_block(bp, node_name, vertical, bounds)
@@ -636,9 +593,6 @@ module GeoTreeModule
636
593
  # Encode a node to a block of bytes
637
594
  def encode_block(n)
638
595
 
639
- db = false
640
- !db || pr("encode_block for #{n}\n")
641
-
642
596
  b = @block_file.alloc_buffer
643
597
 
644
598
  flags = 0
@@ -664,17 +618,11 @@ module GeoTreeModule
664
618
  off += DATAPOINT_INTS
665
619
  end
666
620
  end
667
- !db || hex_dump(b)
668
-
669
621
  b
670
622
  end
671
623
 
672
624
  # Decode a node from a block of bytes
673
625
  def decode_block(b, node_name, vertical, bounds)
674
- db = false
675
- # db = (node_name == 2)
676
- !db || pr("decode_block\n")
677
- !db || hex_dump(b)
678
626
 
679
627
  flags = BlockFile.read_int(b, HDR_FLAGS)
680
628
  type = (flags & 1)
@@ -702,7 +650,6 @@ module GeoTreeModule
702
650
  off += DATAPOINT_INTS
703
651
  end
704
652
  end
705
- !db || pr("decoded to #{n}\n")
706
653
  n
707
654
  end
708
655
 
@@ -724,9 +671,6 @@ module GeoTreeModule
724
671
  # new child nodes.
725
672
  # Returns the new internal node
726
673
  def split_leaf_set(node,path)
727
- db = false
728
- # db = true
729
- !db || pr("\nsplit_leaf_set #{node} bounds=#{node.bounds} vert=#{node.vertical}...\n")
730
674
 
731
675
  # list of data points from the leaf node (and its overflow siblings)
732
676
  dp = []
@@ -752,12 +696,9 @@ module GeoTreeModule
752
696
  n2 = read_node(next_id,b,n2.vertical)
753
697
  end
754
698
 
755
- !db || pr(" datapoints=#{d(dp)}\n")
756
-
757
699
  ni = NodeI.new(node.name,node.vertical,node.bounds)
758
700
 
759
701
  a = GeoTree.calc_partitions(ni.bounds,dp,ni.vertical)
760
- !db || pr(" partitions=#{d(a)}\n")
761
702
 
762
703
  a.each_with_index do |posn,i|
763
704
  p = Partition.new(posn,0)
@@ -783,15 +724,11 @@ module GeoTreeModule
783
724
  end
784
725
 
785
726
  def add_data_point(dp, node_name, path, b, v)
786
- db = false
787
- # db = true
788
727
 
789
- !db || pr("\n\nadd_data_point #{dp}, node name #{node_name}\n")
790
728
  n = read_node(node_name,b,v)
791
729
 
792
730
  # iterate until we have found a leaf node with remaining capacity
793
731
  while true
794
- !db || pr(" ...top of iteration\n")
795
732
 
796
733
  if (n.leaf)
797
734
  # If the leaf node and overflow nodes have reached a certain size, create a new internal node,
@@ -901,7 +838,6 @@ module GeoTreeModule
901
838
  end
902
839
 
903
840
  def dump_aux(s, n, indent, dc)
904
- # assert!(!(dc.member? n.name))
905
841
  dc[n.name] = n.name
906
842
  tab(s,indent)
907
843
  s << n.to_s
@@ -931,52 +867,52 @@ module GeoTreeModule
931
867
 
932
868
  end
933
869
 
934
- private
935
-
936
- class TreeStats
937
- attr_accessor :leaf_count, :interior_count, :overflow_count, :leaf_depth_max
938
- def initialize
939
- @leaf_count = 0
940
- @interior_count = 0
941
- @overflow_count = 0
942
- @leaf_used_sum = 0
943
- @leaf_depth_sum = 0
944
- @leaf_depth_max = 0
945
- end
870
+ private
871
+
872
+ class TreeStats
873
+ attr_accessor :leaf_count, :interior_count, :overflow_count, :leaf_depth_max
874
+ def initialize
875
+ @leaf_count = 0
876
+ @interior_count = 0
877
+ @overflow_count = 0
878
+ @leaf_used_sum = 0
879
+ @leaf_depth_sum = 0
880
+ @leaf_depth_max = 0
881
+ end
882
+
883
+ def process_node(n, overflow, depth)
884
+ if n.leaf
885
+ @leaf_count += 1
886
+ @leaf_used_sum += n.used
887
+ @leaf_depth_sum += depth
888
+ if overflow
889
+ @overflow_count += 1
890
+ end
891
+ @leaf_depth_max = [@leaf_depth_max,depth].max
892
+ else
893
+ @interior_count += 1
894
+ end
895
+ end
946
896
 
947
- def process_node(n, overflow, depth)
948
- if n.leaf
949
- @leaf_count += 1
950
- @leaf_used_sum += n.used
951
- @leaf_depth_sum += depth
952
- if overflow
953
- @overflow_count += 1
897
+ def summary
898
+ s = {}
899
+ s['leaf_nodes'] = leaf_count
900
+ s['interior_nodes'] = interior_count
901
+ s['overflow_nodes'] = overflow_count
902
+ leaf_usage = 0
903
+ if (leaf_count > 0)
904
+ leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
954
905
  end
955
- @leaf_depth_max = [@leaf_depth_max,depth].max
956
- else
957
- @interior_count += 1
906
+ s['leaf_usage'] = leaf_usage
907
+ avg_depth = 0
908
+ if @leaf_count > 0
909
+ avg_depth = @leaf_depth_sum / @leaf_count.to_f
910
+ end
911
+ s['leaf_depth (avg)'] = avg_depth
912
+ s['leaf_depth (max)'] = leaf_depth_max
913
+ s
958
914
  end
959
- end
960
915
 
961
- def summary
962
- s = {}
963
- s['leaf_nodes'] = leaf_count
964
- s['interior_nodes'] = interior_count
965
- s['overflow_nodes'] = overflow_count
966
- leaf_usage = 0
967
- if (leaf_count > 0)
968
- leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
969
- end
970
- s['leaf_usage'] = leaf_usage
971
- avg_depth = 0
972
- if @leaf_count > 0
973
- avg_depth = @leaf_depth_sum / @leaf_count.to_f
974
- end
975
- s['leaf_depth (avg)'] = avg_depth
976
- s['leaf_depth (max)'] = leaf_depth_max
977
- s
978
916
  end
979
917
 
980
918
  end
981
-
982
- end