RubyGems - data_structures_rmolinari - Versions diffs - 0.5.3 → 0.5.5 - Mend

data_structures_rmolinari 0.5.3 → 0.5.5

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +21 -1
data/README.md +3 -0
data/lib/data_structures_rmolinari/algorithms.rb +5 -11
data/lib/data_structures_rmolinari/heap.rb +4 -3
data/lib/data_structures_rmolinari/max_priority_search_tree.rb +223 -30
data/lib/data_structures_rmolinari/min_priority_search_tree.rb +64 -27
data/lib/data_structures_rmolinari/segment_tree.rb +34 -2
data/lib/data_structures_rmolinari/segment_tree_template.rb +1 -0
data/lib/data_structures_rmolinari/shared.rb +2 -5
metadata +4 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 78d2f04135a4e241d64b8d6c71fbe05d84c1995ee2ce58362df313a9c75f527b
-  data.tar.gz: 6def12eef0b4aeb5ef12eff37f89bb49b695c60369e02c407969255cf03c5f57
+  metadata.gz: 3ec5653a5bec033196042ad1af1e850877696eb02a1c37a6efdf1c89ce8e726c
+  data.tar.gz: 88893649d4329549fd2d2994f9b643614b46eb1d2d5b541418d6ad183f8f9f02
 SHA512:
-  metadata.gz: '08a7b5abf025e232d5863310860f5b0e2bfea014688750b34e48028cc2a57f4bc1ba4d0638974e64342fe377d729dd7feab0b985db684035f6e8905f8f7b708c'
-  data.tar.gz: 923ac00b27e35c41d392d465aacdd73dd8313d46ac66a741eb008355df2abbb6c10ee62bfe8bce9e5a8b0623be06dea98a298f5ef07efb10cb5cfc0ec447bf66
+  metadata.gz: 5c2a7ba2ca4630ae925358130d4b85297ea081d37ce032fe0db5e2b13d9c84cbef5be58f9d82027feb63cc476b59200c993d608f48a9db8871594415544aab32
+  data.tar.gz: fc5ad5901038397b7d42eb44ee2b199654612f043c2eeceeb05b380da54c9e4db858612daf3dff7eb8ae580c0af0905ddc237867580f44057a0d9df6e063df6c

data/CHANGELOG.md CHANGED Viewed

@@ -2,7 +2,27 @@
 ## [Unreleased]
-## [0.5.0] 2023-02.03
+## [0.5.5] 2023-12-19
+Support Ruby v3.2.
+## [0.5.4] 2023-12-12
+(Unfortunately this note was added long after the changes were made and my memory of the changes is poor.)
+- SegmentTree
+  - Sum version is provided
+- PrioritySearchTree
+  - Open regions
+- Some bug fixes
+- Some refactoring of test cases
+## [0.5.1] - [0.5.3]
+- Releases to fix some bad gemspec data.
+## [0.5.0] 2023-02-03
 - SegmentTree
   - Reorganize the code into a SegmentTree submodule.

data/README.md CHANGED Viewed

@@ -106,6 +106,9 @@ There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0,
 (These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
 [[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
+Each method also has a named parameter `open:` that makes the search region an open set. For example, if we call `smallest_x_in_ne`
+with `open: true` then we consider points satisifying x > x0 and y > y0. The default value for this parameter is always `false`.
 The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
 the number of points actually enumerated.

data/lib/data_structures_rmolinari/algorithms.rb CHANGED Viewed

@@ -32,16 +32,10 @@ module DataStructuresRMolinari::Algorithms
     x_max = sorted_points.last.x
     y_min, y_max = sorted_points.map(&:y).minmax
-    # Half of the smallest non-zero gap between x values. This is needed below
-    epsilon = INFINITY
     # Enumerate type 1
     sorted_points.each_cons(2) do |pt1, pt2|
       next if pt1.x == pt2.x
-      d = (pt2.x.to_f - pt1.x) / 2
-      epsilon = d if d < epsilon
       yield [pt1.x, pt2.x, y_min, y_max]
     end
@@ -54,9 +48,9 @@ module DataStructuresRMolinari::Algorithms
       next if pt.y == y_max # 0 area
       next if pt.y == y_min # type 1
-      # Epsilon means we don't just get pt back again. The De et al. paper is rather vague.
-      left_bound  = max_pst.largest_x_in_nw( pt.x - epsilon, pt.y)
-      right_bound = max_pst.smallest_x_in_ne(pt.x + epsilon, pt.y)
+      # Open region means we don't just get pt back again. The De et al. paper is rather vague.
+      left_bound  = max_pst.largest_x_in_nw(pt.x, pt.y, open: true)
+      right_bound = max_pst.smallest_x_in_ne(pt.x, pt.y, open: true)
       left = left_bound.x.infinite? ? x_min : left_bound.x
       right = right_bound.x.infinite? ? x_max : right_bound.x
@@ -74,7 +68,7 @@ module DataStructuresRMolinari::Algorithms
     #
     #      largest_y_in_3_sided(l, r, y_min)
     #
-    # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use l + epsilon and r - epsilon.
+    # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use an open search region instead.
     until max_pst.empty?
       top_pt = max_pst.delete_top!
       top = top_pt.y
@@ -85,7 +79,7 @@ module DataStructuresRMolinari::Algorithms
       r = x_max
       loop do
-        next_pt = max_pst.largest_y_in_3_sided(l + epsilon, r - epsilon, y_min)
+        next_pt = max_pst.largest_y_in_3_sided(l, r, y_min, open: true)
         bottom = next_pt.y.infinite? ? y_min : next_pt.y
         yield [l, r, bottom, top]

data/lib/data_structures_rmolinari/heap.rb CHANGED Viewed

@@ -78,8 +78,9 @@ class DataStructuresRMolinari::Heap
   # Insert a new element into the heap with the given priority.
   # @param value the item to be inserted.
   #   - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
-  # @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
-  def insert(value, priority)
+  # @param (optional) priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
+  #        If omitted we use the inserted value as its own priority.
+  def insert(value, priority = value)
     raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
     @size += 1
@@ -103,7 +104,7 @@ class DataStructuresRMolinari::Heap
   # Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
   # @return (see #top)
   def pop
-    result = top
+    result = top # raises if empty
     assign(@data[@size], root)
     @data[@size] = nil

data/lib/data_structures_rmolinari/max_priority_search_tree.rb CHANGED Viewed

@@ -30,6 +30,10 @@ require_relative 'shared'
 #
 # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
 #
+# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
+# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y > y0. The default value for this parameter
+# is always +false+. See below for limitations in this functionality.
+#
 # If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
 #
 # - +delete_top!+: remove the top (max-y) element of the tree and return it.
@@ -39,14 +43,123 @@ require_relative 'shared'
 # In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
 # but it hasn't been written yet. See issue #9.
 #
-# There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
+# There is a related data structure called a Min-max priority search tree so we have called this a "Max priority search tree", or
 # MaxPST.
 #
+# ## Open regions: limitations
+#
+# Calls involving open regions - using the +open:+ argument - are implemented internally using closed regions in which the
+# boundaries have been "nudged" by a tiny amount so as to exclude points on the boundary. Since there are only finitely many points
+# in the PST there are no limit points, and the open region given by x > x0 and y > y0 contains the same PST members as a closed
+# region x >= x0 + e and y >= y0 + e for small enough values of e.
+#
+# But it is hard to determine e robustly. Indeed, assume for the moment that all x- and y-values are floating-point, i.e., IEEE754
+# double-precision. We can easily determine a value for e: the smallest difference between any two distinct x-values or any two
+# distinct y-values. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive
+# x-values
+#
+#    0, 5e-324, 1, and 2.
+#
+# (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
+# values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
+# and we may get the wrong result.
+#
+# The solution here is to replace (x0, y0) with (x0.next_float, y0.next_float) rather than (x0 + e, y0 + e). +Float::next_float+ is
+# an implementation of the IEEE754 operation +nextafter+ which gives, for a floating point value z, the smallest floating point
+# value larger than z. If our PST contains only points with finite, floating point coordinates, then this approach implements open
+# search regions correctly. This is what the implementation currently does.
+#
+# However, when coordinates are not all Floats there are cases when this approach will fail. Consider the case in which we have the
+# following consecutive x-values:
+#
+#   0, 1e-324, 2e-324, 1.
+#
+# (Here 1e-324 and 2e-324 are the Ruby values +Rational(1, 10**324)+ and +Rational(2, 10**324)+.) Then, given an argument x0 =
+# 1e-324, +x0.to_f == 0.0+ and so +x0.to_f.next_float == 5e-324+ and the region we use internally for our query incorrectly excludes
+# the point with x value 2e-324. This is a bug in the code.
+#
+# There are also issues with numeric values (Integer or Rational) that are larger than the maximum floating point value,
+# approximately 1.8e308. For such values z, +z.to_f == Float::INFINITY+, and we will incorrectly exclude any larger-but-finite
+# coordinates from the search region.
+#
+# Yet more issues arise when the coordinates of points in the PST aren't numeric at all, but are some other sort of comparable
+# objects, such as arrays.
+#
+# So, we may say that queries on open regions will work as expected if either
+# - all coordinates of the points in the PST are finite Ruby Floats, or
+# - all coordinates of the points are finite Numeric values and for no such pair of x-values s, t (or pair of y-values) is it such
+#   that +s.to_f.next_float > t+.
+#
+# Otherwise, use this functionality at your own risk, and not at all with coordinates that do not respond reasonably to +to_f+.
+#
 # References:
 # * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985.
-# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
-#   Geometry, 2011
+# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational Geometry, 2011
 class DataStructuresRMolinari::MaxPrioritySearchTree
+  # IMPLEMENTATION NOTES
+  #
+  # Open regions
+  #
+  # The search methods each have an argument +open:+ that changes the search region from closed (x >= x0) to open (x > x0). I had
+  # initially intended to implement this by varying the internals of the search code. But this turned out to be error-prone because
+  # the code is written for closed regions. When deciding which children to take in the next level of the tree, say, we assume the
+  # search space is closed, sometimes in a way that means we won't find the optimal point when the search region is open. Changing
+  # the logic turned out to be finicky and buggy.
+  #
+  # It is much easier and safer to replace a search request on, say (x0, y0, open) with (x0 + e, y0 + e, closed) where e[psilon] is
+  # small enough that we don't exclude any points other than those on the boundary of the closed region. Then we can just call the
+  # existing search code as-is. This is what the code did at first. We calculated e as the smallest difference between any two
+  # distinct x-values or distinct y-values.
+  #
+  # But this approach is not robust. Assume for the moment that all x- and y-values are floating-point. We can easily determine the
+  # value e. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive x-values
+  #
+  #    0, 5e-324, 1, and 2.
+  #
+  # (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
+  # values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
+  # and we may get the wrong result.
+  #
+  # I see the following possible approaches.
+  #
+  # 1. Rewrite the code to do open regions "properly"
+  #    Pro:
+  #      - we don't need to worry about numerical issues.
+  #    Con:
+  #      - too complicated and error-prone.
+  #
+  # 2. Instead of calculating e like this, replace each bounding value x with x.next_float or x.prev_float as required.
+  #    Note that #next_float gives the next-largest value representable as a floating point value.
+  #    Pro:
+  #      - we don't need to worry about the scaling issues in type Float
+  #      - simple and supported by the Ruby libraries (and by the C standard library if we decide to implement as a C extension)
+  #    Con:
+  #      - [minor] will fail with Float::INFINITY.
+  #        - this is an unlikely edge case that could be handled directly or simply documented away.
+  #      - won't work if the x0 value is a Numeric outside of Float range, roughly [-1.798e308, 1.798e308]
+  #        - For example, (10**400).to_f.next_float == Infinity
+  #        - We could warn about this case in documentation
+  #        - For numeric values x in the Float range we would need to check that x.to_f.next_float > x, but I suspect that this is
+  #          guaranteed.
+  #      - won't work with comparable but non-numeric values, like arrays, or some sort of user-defined type
+  #        - We would simply have to document that this case is not supported (or just throw an exception on +#to_f+)
+  #
+  # 3. Handle numeric values on a case-by-case values. So for numeric values x in the float range we use x.to_f.next_float while for
+  #    other values - like BigDecimal - do something different that depends on the next value in the data set with an x-value
+  #    greater than x.
+  #    Pro:
+  #      - more cases are handled
+  #    Con:
+  #      - complicated and perhaps non-performant in the general case
+  #      - doesn't handle non-numeric cases (just like idea 2)
+  #      - possibly error-prone in corner cases.
+  #
+  #    Idea: maintain @x_chain and @y_chain hashes mapping each distinct x/y values to the next largest and smallest such
+  #    value. Then just use that. Lookup is fast. Downside: O(n) extra memory.
+  #
+  # For now approach 2 looks best. It doesn't cover all cases, but covers cases most likely in practice - the Float range is large -
+  # and other cases can be documented away in a clean way.
   include Shared
   include BinaryTreeArithmetic
@@ -83,35 +196,54 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Return the highest point in P to the "northeast" of (x0, y0).
   #
-  # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northeast quadrant defined by the point (x0, y0):
+  # - \[x0, infty) X [y0, infty) if +open+ is false and
+  # - (x0, infty) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, -infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_y_in_ne(x0, y0)
-    largest_y_in_quadrant(x0, y0, :ne)
+  def largest_y_in_ne(x0, y0, open: false)
+    if open
+      largest_y_in_quadrant(slightly_bigger(x0), slightly_bigger(y0), :ne)
+    else
+      largest_y_in_quadrant(x0, y0, :ne)
+    end
   end
   # Return the highest point in P to the "northwest" of (x0, y0).
   #
-  # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X [y0, infty) if +open+ is false and
+  # - (infity, x0) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, -infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_y_in_nw(x0, y0)
-    largest_y_in_quadrant(x0, y0, :nw)
+  def largest_y_in_nw(x0, y0, open: false)
+    if open
+      largest_y_in_quadrant(slightly_smaller(x0), slightly_bigger(y0), :nw)
+    else
+      largest_y_in_quadrant(x0, y0, :nw)
+    end
   end
-  # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
-  # largest_y_in_nw
+  # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
+  # largest_y_in_ne and largest_y_in_nw
   #
-  # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
-  # general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
+  # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
+  # than the general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
+  # structure.
   #
   # From the paper:
   #
@@ -137,6 +269,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
       sufficient_x = ->(x) { x <= x0 }
     end
+    return best if empty?
     # x == x0 or is not sufficient. This test sometimes excludes the other child of a node from consideration.
     exclusionary_x = ->(x) { x == x0 || !sufficient_x.call(x) }
@@ -200,28 +334,46 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Return the leftmost (min-x) point in P to the northeast of (x0, y0).
   #
-  # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northeast quadrant defined by the point (x0, y0):
+  # - [x0, infty) X [y0, infty) if +open+ is false and
+  # - (x0, infty) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_x_in_ne(x0, y0)
-    extremal_in_x_dimension(x0, y0, :ne)
+  def smallest_x_in_ne(x0, y0, open: false)
+    if open
+      extremal_in_x_dimension(slightly_bigger(x0), slightly_bigger(y0), :ne)
+    else
+      extremal_in_x_dimension(x0, y0, :ne)
+    end
   end
   # Return the rightmost (max-x) point in P to the northwest of (x0, y0).
   #
-  # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X [y0, infty) if +open+ is false and
+  # - (infty, x0) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_x_in_nw(x0, y0)
-    extremal_in_x_dimension(x0, y0, :nw)
+  def largest_x_in_nw(x0, y0, open: false)
+    if open
+      extremal_in_x_dimension(slightly_smaller(x0), slightly_bigger(y0), :nw)
+    else
+      extremal_in_x_dimension(x0, y0, :nw)
+    end
   end
   # A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
@@ -369,14 +521,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Return the highest point of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X [y0, infty) if +open+ is false and
+  # - (x0, x1) X (y0, infty) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # Define p* as
   #
   # - (infty, -infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_y_in_3_sided(x0, x1, y0)
+  def largest_y_in_3_sided(x0, x1, y0, open: false)
+    if open
+      x0 = slightly_bigger(x0)
+      x1 = slightly_smaller(x1)
+      y0 = slightly_bigger(y0)
+    end
     # From the paper:
     #
     #    The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
@@ -572,14 +736,27 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Enumerate the points of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X [y0, infty) if +open+ is false and
+  # - (x0, x1) X (y0, infty) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # We find and enumerate all the points in Q \intersect P.
   #
   # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
   # the intersection.
   #
   # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
-  def enumerate_3_sided(x0, x1, y0)
+  def enumerate_3_sided(x0, x1, y0, open: false)
+    if open
+      x0 = slightly_bigger(x0)
+      x1 = slightly_smaller(x1)
+      y0 = slightly_bigger(y0)
+    end
     # From the paper
     #
     #     Given three real numbers x0, x1, and y0 define the three sided range Q = [x0, x1] X [y0, infty). Algorithm
@@ -1143,7 +1320,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
     # We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
     # defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
-    # worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
+    # worth the trouble of both a bespoke heapsort and the special sorting algorithm of Katajainen and Pasanen.
     # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
     # and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
@@ -1216,6 +1393,22 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
     @data[l..r] = @data[l..r].sort_by(&:x)
   end
+  # The smallest floating point number larger than x
+  private def slightly_bigger(x)
+    x_f = x.to_f
+    raise "#{x} out of Float range" if x_f.infinite?
+    x_f.next_float
+  end
+  # The largest floating point number smaller than x
+  private def slightly_smaller(x)
+    x_f = x.to_f
+    raise "#{x} out of Float range" if x_f.infinite?
+    x_f.prev_float
+  end
   ########################################
   # Debugging support
   #

data/lib/data_structures_rmolinari/min_priority_search_tree.rb CHANGED Viewed

@@ -22,8 +22,7 @@ require_relative 'shared'
 #   et al. But we don't do that, as we create a separate array of Points.
 # - Whereas the implementation of MaxPST means that client code gets the same (x, y) objects back in results as it passed into the
 #   contructor, that's not the case here.
-#   - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return
-#    (different) instances of +Point+ in response to queries.
+#   - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return (different) instances of +Point+ in response to queries.
 #   - client code is unlikely to care, but be aware of this, just in case.
 #
 # Given a set of n points, we can answer the following questions quickly:
@@ -37,6 +36,10 @@ require_relative 'shared'
 #
 # (Here, "leftmost/rightmost" means "minimal/maximal x", and "lowest" means "minimal y".)
 #
+# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
+# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y < y0. The default value for this parameter
+# is always +false+. See the documentation of MaxPrioritySearchTree for limitiations of this support.
+#
 # The first 5 operations take O(log n) time and O(1) extra space.
 #
 # The final operation (enumerate) takes O(m + log n) time and O(1) extra space, where m is the number of points that are enumerated.
@@ -77,28 +80,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Return the "lowest" point in P to the "southeast" of (x0, y0).
   #
-  # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southeast quadrant defined by the point (x0, y0):
+  # - \[x0, infty) X (infty, y0] if +open+ is false and
+  # - (x0, infty) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, infty) if Q \intersect P is empty and
   # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_y_in_se(x0, y0)
-    flip @max_pst.largest_y_in_ne(x0, -y0)
+  def smallest_y_in_se(x0, y0, open: false)
+    flip @max_pst.largest_y_in_ne(x0, -y0, open:)
   end
   # Return the "lowest" point in P to the "southwest" of (x0, y0).
   #
-  # Let Q = (-infty, x0] X (-infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X (infty, y0] if +open+ is false and
+  # - (infty, x0) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, infty) if Q \intersect P is empty and
   # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_y_in_sw(x0, y0)
-    flip @max_pst.largest_y_in_nw(x0, -y0)
+  def smallest_y_in_sw(x0, y0, open: false)
+    flip @max_pst.largest_y_in_nw(x0, -y0, open:)
   end
   ########################################
@@ -106,28 +119,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Return the leftmost (min-x) point in P to the southeast of (x0, y0).
   #
-  # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southeast quadrant defined by the point (x0, y0):
+  # - \[x0, infty) X (infty, y0] if +open+ is false and
+  # - (x0, infty) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, -infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_x_in_se(x0, y0)
-    flip @max_pst.smallest_x_in_ne(x0, -y0)
+  def smallest_x_in_se(x0, y0, open: false)
+    flip @max_pst.smallest_x_in_ne(x0, -y0, open:)
   end
   # Return the rightmost (max-x) point in P to the southwest of (x0, y0).
   #
-  # Let Q = (-infty, x0] X (infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X (infty, y0] if +open+ is false and
+  # - (infty, x0) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, -infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_x_in_sw(x0, y0)
-    flip @max_pst.largest_x_in_nw(x0, -y0)
+  def largest_x_in_sw(x0, y0, open: false)
+    flip @max_pst.largest_x_in_nw(x0, -y0, open:)
   end
   ########################################
@@ -135,15 +158,22 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Return the lowest point of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X (infty, y0] be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X (infty, y0] if +open+ is false and
+  # - (x0, x1) X (infty, y0) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # Define p* as
   #
   # - (infty, infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_y_in_3_sided(x0, x1, y0)
-    flip @max_pst.largest_y_in_3_sided(x0, x1, -y0)
+  def smallest_y_in_3_sided(x0, x1, y0, open: false)
+    flip @max_pst.largest_y_in_3_sided(x0, x1, -y0, open:)
   end
   ########################################
@@ -151,18 +181,25 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Enumerate the points of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X (infty, y0] if +open+ is false and
+  # - (x0, x1) X (infty, y0) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # We find and enumerate all the points in Q \intersect P.
   #
   # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
   # the intersection.
   #
   # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
-  def enumerate_3_sided(x0, x1, y0)
+  def enumerate_3_sided(x0, x1, y0, open: false)
     if block_given?
-      @max_pst.enumerate_3_sided(x0, x1, -y0) { |point| yield(flip point) }
+      @max_pst.enumerate_3_sided(x0, x1, -y0, open:) { |point| yield(flip point) }
     else
-      Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0).map { |pt| flip pt })
+      Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0, open:).map { |pt| flip pt })
     end
   end

data/lib/data_structures_rmolinari/segment_tree.rb CHANGED Viewed

@@ -36,10 +36,15 @@ module DataStructuresRMolinari
     #   - +:c+ or +:ruby+
     #   - the C version will run faster but for now may be buggier and harder to debug
     module_function def construct(data, operation, lang)
-      operation.must_be_in [:max, :index_of_max]
+      operation.must_be_in [:max, :index_of_max, :sum]
       lang.must_be_in [:ruby, :c]
-      klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
+      klass = case operation
+              when :max then MaxValSegmentTree
+              when :index_of_max then IndexOfMaxValSegmentTree
+              when :sum then SumSegmentTree
+              else raise ArgumentError, "Unknown operation #{operation}"
+              end
       template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
       klass.new(template, data)
@@ -107,6 +112,33 @@ module DataStructuresRMolinari
       end
     end
+    class SumSegmentTree
+      extend Forwardable
+      # Tell the tree that the value at idx has changed
+      def_delegator :@structure, :update_at
+      # @param (see MaxValSegmentTree#initialize)
+      def initialize(template_klass, data)
+        data.must_be_a Enumerable
+        @structure = template_klass.new(
+          combine:               ->(a, b) { a + b },
+          single_cell_array_val: ->(i) { data[i] },
+          size:                  data.size,
+          identity:              0
+        )
+      end
+      # The sum of the values in A(i..j)
+      #
+      # The arguments must be integers in 0...(A.size)
+      # @return the sum of the values in A(i..j) or 0 if i > j.
+      def sum_on(i, j)
+        @structure.query_on(i, j)
+      end
+    end
     # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
     #
     # See SegmentTreeTemplate for more information.

data/lib/data_structures_rmolinari/segment_tree_template.rb CHANGED Viewed

@@ -71,6 +71,7 @@ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
   # Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
   # construction.
   def update_at(idx)
+    raise DataError, "Bad update index #{idx} (size = #{@size})" unless (0...@size).cover?(idx)
     update_val_at(idx, root, 0, @size - 1)
   end

data/lib/data_structures_rmolinari/shared.rb CHANGED Viewed

@@ -69,12 +69,9 @@ module Shared
   #        duplication. When nil we don't call anything and just use the elements themselves.
   def contains_duplicates?(enum, by: nil)
     seen = Set.new
-    enum.each do |v|
+    enum.any? do |v|
       v = v.send(by) if by
-      return true if seen.include? v
-      seen << v
+      !seen.add?(v)
     end
-    false
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: data_structures_rmolinari
 version: !ruby/object:Gem::Version
-  version: 0.5.3
+  version: 0.5.5
 platform: ruby
 authors:
 - Rory Molinari
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-02-15 00:00:00.000000000 Z
+date: 2023-12-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: must_be
@@ -113,7 +113,7 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - "~>"
+  - - ">="
     - !ruby/object:Gem::Version
       version: 3.1.3
 required_rubygems_version: !ruby/object:Gem::Requirement
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.4.5
+rubygems_version: 3.4.10
 signing_key:
 specification_version: 4
 summary: Several miscellaneous data structures I have implemented to learn about them.