RubyGems - geotree - Versions diffs - 1.1.1 → 1.1.2 - Mend

geotree 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d139e141c3f1de2d164a60d382a57dab14c145cb
-  data.tar.gz: a4e2951d2f513a39aac941e073dedb6f77a4b3bc
+  metadata.gz: 676eca7a9c5e6fa6057d9d0ac7f6635428678934
+  data.tar.gz: ac4607c36e9d32e42f0391686c3cc1c6286ddec1
 SHA512:
-  metadata.gz: ec83653bcabfe8121c7954dc80550acb11f8578b4eb58b55a989450369b99d2b4d353008c4584d4d7dea6aaf0aab91ed77b3f7abd12086d64f27dd8aa826849f
-  data.tar.gz: 51fd669841ada4aae664cf500c63e8f358c4c596d4e9860d48075278de0365a82c78f5c7f78b99a5bb8786dd282b21f152aca973dee0dbb7197079ca5d76af8b
+  metadata.gz: 8060ef2768eb956aca745fb29cd808a573a0317ee3daea5630ec4b1723ad8c9fe253137fabd9cef5d639afff70021a4ca0c1b254d7a1aa64936c73fa6e1b4ceb
+  data.tar.gz: c3a131f6fa5151027b58c3f6107e2d07bfc3bac2a5b6c2ea33ea95d4dd59ea1b60d78f933de8050fd9c97937d32fdfab0be94dc5302427b917c407bdb2ee81d6

data/CHANGELOG.txt CHANGED

@@ -2,6 +2,8 @@
   * Version 1.0.0
 2013-04-03
-  * Version 1.0.1
+  * Version 1.1.1
   * Enhanced README file
+  * Version 1.1.2
+  * Moved figures to personal homepage

data/README.txt CHANGED

@@ -1,10 +1,11 @@
 # @markup markdown
-'geotree' : A ruby gem that maintains a set of geographical points, reports points lying within a query rectangle,
-and supports multiple levels of detail.
+geotree
 =======
-Written and (c) by Jeff Sember, April 2013.
+A ruby gem that maintains a set of geographical points, reports points lying within a query rectangle,
+and supports multiple levels of detail.
+Written and (c) by Jeff Sember, April 2013.
 GeoTree
@@ -16,16 +17,13 @@ size of a city).  GeoTrees are disk-based data structures and can store a very l
 number of points efficiently.  If desired, for smaller data sets, memory-only trees
 can be constructed instead.
-[An animation of a GeoTree in action.](../geotree/lib/fig/geo_tree.pdf "geo_tree.pdf")
+[An animation of a GeoTree in action.](http://www.cs.ubc.ca/~jpsember/geo_tree.ps)
 MultiTree
 -------
 The gem includes MultiTree, a GeoTree variant that supports queries at multiple
 levels of detail. For example, when focusing on a small region it can return points
 that would be omitted when querying a much larger region.
-[An animation of a MultiTree in action.](../geotree/lib/fig/multi_tree.pdf "multi_tree.pdf")
+[An animation of a MultiTree in action.](http://www.cs.ubc.ca/~jpsember/multi_tree.ps)

data/lib/geotree/blockfile.rb CHANGED

@@ -67,12 +67,8 @@ class BlockFile
   # @return true if underlying storage already existed
   #
   def open
-    db = false
-#    db = true
-    !db || pr("BlockFile.open\n")
     !open? || raise(IllegalStateException)
     existed = open_storage
-    !db || pr(" existed=#{existed}\n")
     if !existed
       @header_data = alloc_buffer
       BlockFile.write_int(@header_data, HDR_VERSION_, VERSION_)
@@ -92,7 +88,6 @@ class BlockFile
       if BlockFile.read_int(@header_data,HDR_BLOCKSIZE_) != block_size
         raise ArgumentError,"unexpected block size"
       end
-      !db || puts(hex_dump_to_string(@header_data,'header data'))
       @recycle_data = read(rdir_head_name)
     end
     existed
@@ -104,21 +99,15 @@ class BlockFile
   #
   def alloc(src = nil)
-    db = false
-    !db || pr("blockfile alloc\n")
-    #!db || puts(self.to_s)
     ensure_open
     src ||= alloc_buffer
     # get index of last recycle block directory
     r_index = rdir_head_name
-    !db||pr(" last recycle block dir=%d\n",r_index)
     # any entries remain in this directory?
     n_ent = get_rdir_slots_used
-    !db||pr(" n_ent=%d\n",n_ent);
     if n_ent == 0
       prev_rb_block = get_rdir_next_name
@@ -129,17 +118,14 @@ class BlockFile
         r_index = prev_rb_block
         write_hdr(HDR_RECYCLEINDEX_, r_index)
         read(prev_rb_block, @recycle_data)
-        !db||pr(" using directory as new block: %d\n",ret)
         append_or_replace(ret, src)
       else
         ret = name_max
-        !db||pr(" using name_max %d\n",ret)
         append_or_replace(ret, src)
       end
     else
       slot = n_ent - 1;
       ret = get_rdir_slot(slot)
-      !db || pr(" read slot %d to get %d\n",slot,ret)
       set_rdir_slot(slot,0)
       set_rdir_slots_used(slot)
       append_or_replace(r_index, @recycle_data)
@@ -163,16 +149,13 @@ class BlockFile
       set_rdir_slots_used(slot+1)
       append_or_replace(rdir_head_name, @recycle_data)
     else
       # use freed block as next recycle page
       old_dir = rdir_head_name
       write_hdr(HDR_RECYCLEINDEX_, block_name)
       read(block_name, @recycle_data)
       BlockFile.clear_block(@recycle_data)
-      #      mark_rc_block
       set_rdir_next_name(old_dir)
       append_or_replace(block_name, @recycle_data)
@@ -295,7 +278,6 @@ class BlockFile
   end
   def BlockFile.copy_block(dest, src)
-    #  assert!(dest && src)
     dest[0..-1] = src
   end
@@ -311,10 +293,6 @@ class BlockFile
   # @return buffer
   #
   def read(block_name, dest_buffer = nil)
-    db = false
-#    db = true
-    !db || pr("BlockFile read #{block_name}, memory version!\n")
     dest_buffer ||= alloc_buffer
     if block_name >= @mem_file.size
       raise ArgumentError,"No such block name #{block_name} exists (size=#{@mem_file.size})"
@@ -322,7 +300,6 @@ class BlockFile
     src = @mem_file[block_name]
     BlockFile.copy_block(dest_buffer, src)
-    !db || hex_dump(dest_buffer,"Contents of block #{block_name}")
     dest_buffer
   end
@@ -332,7 +309,6 @@ class BlockFile
   # @param block_name name of block
   # @param src_buffer data to write
   def write(block_name, src_buffer)
     if  block_name == @mem_file.size
       @mem_file << alloc_buffer
     end
@@ -415,10 +391,6 @@ class BlockFile
     BlockFile.write_int(@recycle_data,RC_PREV_DIR_NAME_,n)
   end
-  #  def mark_rc_block
-  #    @recycle_data[RC_BLOCKTYPE_] = BLOCKTYPE_RECYCLE_.chr
-  #  end
   # Get name of first recycle directory block (they are connected as
   # a singly-linked list)
   #

data/lib/geotree/geotree.rb CHANGED

@@ -5,7 +5,7 @@ req 'diskblockfile ptbuffer'
 module GeoTreeModule
   #
   # A variant of a kd-tree, it is capable of maintaining sets of 2D points and efficiently
-  # reporting all points lying within (axis-aligned) query rectangles.
+  # reporting all points lying within (axis-aligned) query rectangles.
   #
   # Like a B+ tree, it has a large branching factor
   # and the nodes are large to improve performance when the tree is stored
@@ -14,7 +14,7 @@ module GeoTreeModule
   # A GeoTree is usually stored within a disk file, though it is also possible to
   # construct a tree that exists only in memory; see the initialize(...) method.
   #
-  # {An animation of a GeoTree in action.}[link:../../doc/geo_tree.pdf]
+  # {An animation of a GeoTree in action.}[link:http://www.cs.ubc.ca/~jpsember/geo_tree.ps]
   #
   # Usage:
   #
@@ -41,17 +41,17 @@ module GeoTreeModule
   #
   #      t.close()
   #
-  #
+  #
   # One of the problems with kd-trees (including this one) is that they can become
-  # unbalanced after a number of insertions and deletions.  To deal with this,
+  # unbalanced after a number of insertions and deletions.  To deal with this,
   # consider these two suggestions:
   #
   #  1) When constructing the initial tree, if the datapoints are given in a random
   #     order, the tree will (with high probability) be constructed in a balanced form.
   #     By contrast, consider what happens if the points (1,1), (2,2), (3,3), ... are
   #     added in sequence to an initially empty tree.  The tree will be very unbalanced,
-  #     with poor performance.
-  #     To address this problem, if you are not confident that the points you initially
+  #     with poor performance.
+  #     To address this problem, if you are not confident that the points you initially
   #     provide are in a sufficiently random sequence, you can enable 'point buffering':
   #
   #      t = GeoTree.open("treepath.bin")
@@ -62,32 +62,26 @@ module GeoTreeModule
   #      t.add(dp2)           # these points are stored in a temporary disk file
   #      t.add(dp3)
   #         :
-  #
+  #
   #      t.buffering = false  # the points will be shuffled into a random sequence and
   #                           # added to the tree
   #
   #
-  #   2) Periodically, you can start with a new tree, and add all of the datapoints using the
+  #   2) Periodically, you can start with a new tree, and add all of the datapoints using the
   #      above buffering technique.  This is easy to do if the datapoints are also stored
   #      externally to the GeoTree (for instance, as parts of larger records in some database).
-  #      Otherwise, (i) the datapoints can be retrieved from the tree to an array
-  #      (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
+  #      Otherwise, (i) the datapoints can be retrieved from the tree to an array
+  #      (by using a sufficiently large query rectangle), (ii) a new tree can be constructed,
   #      and (iii) each of the points in the array can be added to the new tree.
-  #
+  #
   class GeoTree
     ROOT_NODE_NAME_ = BlockFile::FIRST_BLOCK_ID
     privatize(self)
     def buffering=(val)
-      db = false
-      #      db = true
-      !db || pr("\nSetting buffering to #{val} (was #{@buffer.active})\n\n")
       raise IllegalStateException if !open?
       @buffer.active = val
     end
     # Construct GeoTree
@@ -114,7 +108,6 @@ module GeoTreeModule
         root_name = @block_file.alloc(encode_block(root))
         write_node(root)
       end
     end
     def open?
@@ -164,17 +157,8 @@ module GeoTreeModule
     # @param path path of file; if nil, constructs tree in memory only
     #
     def self.open(path = nil)
-      db = false
-      #      db = true
-      !db || pr("GeoTree.open path=#{path}\n")
       bf = nil
       if path
-        !db || pr("   exists=#{File.file?(path)}\n")
-        if (db && File.file?(path))
-          hex_dump(read_text_file(path),"path #{path}")
-        end
         bf = DiskBlockFile.new(KDTREE_BLOCKSIZE, path)
       end
       GeoTree.new(bf);
@@ -186,7 +170,7 @@ module GeoTreeModule
     #
     def add(data_point)
       raise IllegalStateException if !open?
-        @buffer.add(data_point)
+      @buffer.add(data_point)
     end
     # Remove a datapoint.  Returns the datapoint if it was found and removed,
@@ -197,9 +181,6 @@ module GeoTreeModule
       raise IllegalStateException if @buffer.active
-      db = false
-      !db || pr("remove #{data_point}\n")
       removed = nil
       block do
@@ -211,13 +192,11 @@ module GeoTreeModule
         while !n.leaf
-          !db || pr(" add #{n} to internal path\n")
           internal_path << n
           # find the child that will contain the point
           child_slot = n.slot_intersecting_line(n.vertical ? data_point.loc.y : data_point.loc.x)
           next_name = n.slot_child(child_slot)
-          !db || pr(" child_slot=#{child_slot}, next_name=#{next_name}\n")
           if next_name == 0
             n = nil
             break
@@ -228,7 +207,6 @@ module GeoTreeModule
         # build list of overflow nodes
         leaf_set = build_leaf_set(n)
-        !db || pr(" built leaf set: #{d(leaf_set)}\n")
         # We now have path containing the path of internal nodes, and leaf_set the leaf nodes
@@ -289,7 +267,6 @@ module GeoTreeModule
           if inode.population < SPLIT_SIZE/2
             collapse_internal_node(inode)
           end
         end
       end
       done_operation
@@ -411,10 +388,7 @@ module GeoTreeModule
     # Replace an internal node with a leaf node, one containing all the
     # datapoints in the internal node's subtree.
     def collapse_internal_node(n)
-      db = false
-      !db || pr("internal node population has dropped below half leaf set capacity;\n%s\n",d(n))
-      !db || puts(dump)
       dp_set = []
       node_set = []
       gather_datapoints(n,dp_set,node_set)
@@ -424,11 +398,7 @@ module GeoTreeModule
         "Interior node actual population #{dp_set.size} disagrees with stored value #{n.population};\n#{dump(n)}"
       end
-      !db || pr("\ndp_set=#{d2(dp_set)}\n\n")
-      !db || pr("node_set=#{d2(node_set)}\n\n")
       node_set.each do |n2|
-        !db || pr(" removing #{n2} from mod/cache\n")
         delete_node(n2)
       end
@@ -448,9 +418,6 @@ module GeoTreeModule
         write_node(n)
         n = n2
       end
-      !db || printf("After collapsing\n#{dump}\n\n")
     end
     def aux_stats(node_name, b,v,overflow,depth, st)
@@ -513,10 +480,6 @@ module GeoTreeModule
     # @return locations of partitions (1 + NODEI_CHILDREN of them)
     #
     def self.calc_partitions(bounds, unsorted_pts, vertical)
-      db = false
-      #      db = true
-      !db || pr("calc_partitions for bounds #{bounds}\n")
       a = []
       # Convert inputs so we need deal only with x coordinates
@@ -530,7 +493,6 @@ module GeoTreeModule
       end
       pts = unsorted_pts.sort{|a,b| a.loc.x <=> b.loc.x}
-      !db || pr(" starting with left boundary #{bounds.x}\n")
       # Add location of left boundary
       a << bounds.x
@@ -541,7 +503,6 @@ module GeoTreeModule
       # how many zones are the items cutting it into at present?
       n_items = pts.size + 1
       f_step = n_items / (n_zones.to_f)
-      !db || puts(" n_items=#{n_items}, zones=#{n_zones}, step=#{f_step}")
       while a.size < n_zones
         f_pos = f_step * a.size
         left_item = f_pos.floor.to_i
@@ -570,10 +531,8 @@ module GeoTreeModule
           x_new = [prev+1, bounds.x + bounds.w].min
         end
-        !db || pr("  adding #{x_new}, for f_step #{f_step}\n")
         a << x_new
       end
-      !db ||     pr("partitions=#{a}  (bounds=#{bounds})\n")
       a
     end
@@ -585,10 +544,8 @@ module GeoTreeModule
     end
     def read_node(node_name, bounds, vertical)
-      db = false
       # Determine if node is in cache
       n = @cache_dict[node_name]
-      !db || pr("read_node #{node_name}, from cache=#{n}\n")
       if !n
         bp = @block_file.read(node_name)
         n = decode_block(bp, node_name, vertical, bounds)
@@ -636,9 +593,6 @@ module GeoTreeModule
     # Encode a node to a block of bytes
     def encode_block(n)
-      db = false
-      !db || pr("encode_block for #{n}\n")
       b = @block_file.alloc_buffer
       flags = 0
@@ -664,17 +618,11 @@ module GeoTreeModule
           off += DATAPOINT_INTS
         end
       end
-      !db || hex_dump(b)
       b
     end
     # Decode a node from a block of bytes
     def decode_block(b, node_name, vertical, bounds)
-      db = false
-      #  db = (node_name == 2)
-      !db || pr("decode_block\n")
-      !db || hex_dump(b)
       flags = BlockFile.read_int(b, HDR_FLAGS)
       type = (flags & 1)
@@ -702,7 +650,6 @@ module GeoTreeModule
           off += DATAPOINT_INTS
         end
       end
-      !db || pr("decoded to #{n}\n")
       n
     end
@@ -724,9 +671,6 @@ module GeoTreeModule
     # new child nodes.
     # Returns the new internal node
     def split_leaf_set(node,path)
-      db = false
-      #      db = true
-      !db || pr("\nsplit_leaf_set #{node}  bounds=#{node.bounds}  vert=#{node.vertical}...\n")
       # list of data points from the leaf node (and its overflow siblings)
       dp = []
@@ -752,12 +696,9 @@ module GeoTreeModule
         n2 = read_node(next_id,b,n2.vertical)
       end
-      !db || pr(" datapoints=#{d(dp)}\n")
       ni = NodeI.new(node.name,node.vertical,node.bounds)
       a = GeoTree.calc_partitions(ni.bounds,dp,ni.vertical)
-      !db || pr(" partitions=#{d(a)}\n")
       a.each_with_index do |posn,i|
         p = Partition.new(posn,0)
@@ -783,15 +724,11 @@ module GeoTreeModule
     end
     def add_data_point(dp, node_name, path, b, v)
-      db = false
-      # db = true
-      !db || pr("\n\nadd_data_point #{dp},  node name #{node_name}\n")
       n = read_node(node_name,b,v)
       # iterate until we have found a leaf node with remaining capacity
       while true
-        !db || pr(" ...top of iteration\n")
         if (n.leaf)
           # If the leaf node and overflow nodes have reached a certain size, create a new internal node,
@@ -901,7 +838,6 @@ module GeoTreeModule
     end
     def dump_aux(s, n, indent, dc)
-      #      assert!(!(dc.member? n.name))
       dc[n.name] = n.name
       tab(s,indent)
       s << n.to_s
@@ -931,52 +867,52 @@ module GeoTreeModule
   end
-private
-class TreeStats
-  attr_accessor :leaf_count, :interior_count, :overflow_count, :leaf_depth_max
-  def initialize
-    @leaf_count = 0
-    @interior_count = 0
-    @overflow_count = 0
-    @leaf_used_sum = 0
-    @leaf_depth_sum = 0
-    @leaf_depth_max = 0
-  end
+  private
+  class TreeStats
+    attr_accessor :leaf_count, :interior_count, :overflow_count, :leaf_depth_max
+    def initialize
+      @leaf_count = 0
+      @interior_count = 0
+      @overflow_count = 0
+      @leaf_used_sum = 0
+      @leaf_depth_sum = 0
+      @leaf_depth_max = 0
+    end
+    def process_node(n, overflow, depth)
+      if n.leaf
+        @leaf_count += 1
+        @leaf_used_sum += n.used
+        @leaf_depth_sum += depth
+        if overflow
+          @overflow_count += 1
+        end
+        @leaf_depth_max = [@leaf_depth_max,depth].max
+      else
+        @interior_count += 1
+      end
+    end
-  def process_node(n, overflow, depth)
-    if n.leaf
-      @leaf_count += 1
-      @leaf_used_sum += n.used
-      @leaf_depth_sum += depth
-      if overflow
-        @overflow_count += 1
+    def summary
+      s = {}
+      s['leaf_nodes'] = leaf_count
+      s['interior_nodes'] = interior_count
+      s['overflow_nodes'] = overflow_count
+      leaf_usage = 0
+      if (leaf_count > 0)
+        leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
       end
-      @leaf_depth_max = [@leaf_depth_max,depth].max
-    else
-      @interior_count += 1
+      s['leaf_usage'] = leaf_usage
+      avg_depth = 0
+      if @leaf_count > 0
+        avg_depth = @leaf_depth_sum / @leaf_count.to_f
+      end
+      s['leaf_depth (avg)'] = avg_depth
+      s['leaf_depth (max)'] = leaf_depth_max
+      s
     end
-  end
-  def summary
-    s = {}
-    s['leaf_nodes'] = leaf_count
-    s['interior_nodes'] = interior_count
-    s['overflow_nodes'] = overflow_count
-    leaf_usage = 0
-    if (leaf_count > 0)
-      leaf_usage = (@leaf_used_sum / @leaf_count.to_f) / NODEL_CAPACITY
-    end
-    s['leaf_usage'] = leaf_usage
-    avg_depth = 0
-    if @leaf_count > 0
-      avg_depth = @leaf_depth_sum / @leaf_count.to_f
-    end
-    s['leaf_depth (avg)'] = avg_depth
-    s['leaf_depth (max)'] = leaf_depth_max
-    s
   end
 end
-end