RubyGems - data_structures_rmolinari - Versions diffs - 0.5.2 → 0.5.4 - Mend

data_structures_rmolinari 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -1
data/README.md +3 -0
data/ext/extconf_shared.rb +19 -0
data/lib/data_structures_rmolinari/algorithms.rb +5 -11
data/lib/data_structures_rmolinari/heap.rb +4 -3
data/lib/data_structures_rmolinari/max_priority_search_tree.rb +223 -30
data/lib/data_structures_rmolinari/min_priority_search_tree.rb +64 -27
data/lib/data_structures_rmolinari/segment_tree.rb +34 -2
data/lib/data_structures_rmolinari/segment_tree_template.rb +1 -0
data/lib/data_structures_rmolinari/shared.rb +2 -5
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5d10fb46bf10f119b95239cf8e3ed06585804d37ccb9b7b1da0c7139dfcb31b9
-  data.tar.gz: dc393fb3e3f597df278832b3c4faae08866ef93ad747ee0a30fca19345fa6c8a
+  metadata.gz: b194edc412e6bce073e73f874c4a392dd821f333e4ba9b07908af397819cdffa
+  data.tar.gz: 5dabb2d689cb0b0ebacf2b804e6c27ece1e3e7f7db604f5722d1657915bf0bda
 SHA512:
-  metadata.gz: 6ec16b3eb2a1f4deccf8a8c45cb20c9558a0267049236389de53444124feeefe282751f964f2cf1875e1fccd5fccc6ad5fea4edd0098fed7b358bc6051666b4e
-  data.tar.gz: 0435cf6031c7e40bf9706a7c8d7b493e7e31f7667131be682de59ad5dcd26a5151add0d03cee83b23da6fa6f0c132c964ef551341dc94ff5d17cffddc2b48f2a
+  metadata.gz: cf36912f4242d7a91e8227464993ac634441bdcff30d7b6ec5a10149cfdc0a14a132fa5c319f2edafdc9d9ab6b11ee0af2354fdd45f2638d0099ccdb84eff436
+  data.tar.gz: a1ce15decbc869b9f26902d391f27967778032f54d2543b914d93bbe15ab1ceb7aae5c576cc93f99c80aa8f62d8f23c747591a6beb520d7c756facb125a5a72d

data/CHANGELOG.md CHANGED Viewed

@@ -2,7 +2,23 @@
 ## [Unreleased]
-## [0.5.0] 2023-02.03
+## [0.5.4] 2023-12-12
+(Unfortunately this note was added long after the changes were made and my memory of the changes is poor.)
+- SegmentTree
+  - Sum version is provided
+- PrioritySearchTree
+  - Open regions
+- Some bug fixes
+- Some refactoring of test cases
+## [0.5.1] - [0.5.3]
+- Releases to fix some bad gemspec data.
+## [0.5.0] 2023-02-03
 - SegmentTree
   - Reorganize the code into a SegmentTree submodule.

data/README.md CHANGED Viewed

@@ -106,6 +106,9 @@ There is no `smallest_x_in_3_sided(x0, x1, y0)`. Just use `smallest_x_in_ne(x0,
 (These queries appear rather abstract at first but there are interesting applications. See, for example, section 4 of
 [[McC85]](#references), keeping in mind that the data structure in that paper is actually a _MinPST_.)
+Each method also has a named parameter `open:` that makes the search region an open set. For example, if we call `smallest_x_in_ne`
+with `open: true` then we consider points satisifying x > x0 and y > y0. The default value for this parameter is always `false`.
 The single-point queries run in O(log n) time, where n is the size of P, while `enumerate_3_sided` runs in O(m + log n), where m is
 the number of points actually enumerated.

data/ext/extconf_shared.rb ADDED Viewed

@@ -0,0 +1,19 @@
+def generate_makefile(name)
+  extension_name = "c_#{name}"
+  source_name = "#{name}.c"
+  abort 'missing malloc()' unless have_func "malloc"
+  abort 'missing realloc()' unless have_func "realloc"
+  if try_cflags('-O3')
+    append_cflags('-O3')
+  end
+  dir_config(extension_name)
+  $srcs = [source_name, "../shared.c"]
+  $INCFLAGS << " -I$(srcdir)/.."
+  $VPATH << "$(srcdir)/.."
+  create_makefile("data_structures_rmolinari/#{extension_name}")
+end

data/lib/data_structures_rmolinari/algorithms.rb CHANGED Viewed

@@ -32,16 +32,10 @@ module DataStructuresRMolinari::Algorithms
     x_max = sorted_points.last.x
     y_min, y_max = sorted_points.map(&:y).minmax
-    # Half of the smallest non-zero gap between x values. This is needed below
-    epsilon = INFINITY
     # Enumerate type 1
     sorted_points.each_cons(2) do |pt1, pt2|
       next if pt1.x == pt2.x
-      d = (pt2.x.to_f - pt1.x) / 2
-      epsilon = d if d < epsilon
       yield [pt1.x, pt2.x, y_min, y_max]
     end
@@ -54,9 +48,9 @@ module DataStructuresRMolinari::Algorithms
       next if pt.y == y_max # 0 area
       next if pt.y == y_min # type 1
-      # Epsilon means we don't just get pt back again. The De et al. paper is rather vague.
-      left_bound  = max_pst.largest_x_in_nw( pt.x - epsilon, pt.y)
-      right_bound = max_pst.smallest_x_in_ne(pt.x + epsilon, pt.y)
+      # Open region means we don't just get pt back again. The De et al. paper is rather vague.
+      left_bound  = max_pst.largest_x_in_nw(pt.x, pt.y, open: true)
+      right_bound = max_pst.smallest_x_in_ne(pt.x, pt.y, open: true)
       left = left_bound.x.infinite? ? x_min : left_bound.x
       right = right_bound.x.infinite? ? x_max : right_bound.x
@@ -74,7 +68,7 @@ module DataStructuresRMolinari::Algorithms
     #
     #      largest_y_in_3_sided(l, r, y_min)
     #
-    # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use l + epsilon and r - epsilon.
+    # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use an open search region instead.
     until max_pst.empty?
       top_pt = max_pst.delete_top!
       top = top_pt.y
@@ -85,7 +79,7 @@ module DataStructuresRMolinari::Algorithms
       r = x_max
       loop do
-        next_pt = max_pst.largest_y_in_3_sided(l + epsilon, r - epsilon, y_min)
+        next_pt = max_pst.largest_y_in_3_sided(l, r, y_min, open: true)
         bottom = next_pt.y.infinite? ? y_min : next_pt.y
         yield [l, r, bottom, top]

data/lib/data_structures_rmolinari/heap.rb CHANGED Viewed

@@ -78,8 +78,9 @@ class DataStructuresRMolinari::Heap
   # Insert a new element into the heap with the given priority.
   # @param value the item to be inserted.
   #   - If the heap is addressible (the default) it is an error to insert an item that is already present in the heap.
-  # @param priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
-  def insert(value, priority)
+  # @param (optional) priority the priority to use for new item. The values used as priorities must be totally ordered via +<=>+.
+  #        If omitted we use the inserted value as its own priority.
+  def insert(value, priority = value)
     raise DataError, "Heap already contains #{value}" if @addressable && contains?(value)
     @size += 1
@@ -103,7 +104,7 @@ class DataStructuresRMolinari::Heap
   # Return the top of the heap and remove it, updating the structure to maintain the necessary properties.
   # @return (see #top)
   def pop
-    result = top
+    result = top # raises if empty
     assign(@data[@size], root)
     @data[@size] = nil

data/lib/data_structures_rmolinari/max_priority_search_tree.rb CHANGED Viewed

@@ -30,6 +30,10 @@ require_relative 'shared'
 #
 # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
 #
+# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
+# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y > y0. The default value for this parameter
+# is always +false+. See below for limitations in this functionality.
+#
 # If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
 #
 # - +delete_top!+: remove the top (max-y) element of the tree and return it.
@@ -39,14 +43,123 @@ require_relative 'shared'
 # In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
 # but it hasn't been written yet. See issue #9.
 #
-# There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
+# There is a related data structure called a Min-max priority search tree so we have called this a "Max priority search tree", or
 # MaxPST.
 #
+# ## Open regions: limitations
+#
+# Calls involving open regions - using the +open:+ argument - are implemented internally using closed regions in which the
+# boundaries have been "nudged" by a tiny amount so as to exclude points on the boundary. Since there are only finitely many points
+# in the PST there are no limit points, and the open region given by x > x0 and y > y0 contains the same PST members as a closed
+# region x >= x0 + e and y >= y0 + e for small enough values of e.
+#
+# But it is hard to determine e robustly. Indeed, assume for the moment that all x- and y-values are floating-point, i.e., IEEE754
+# double-precision. We can easily determine a value for e: the smallest difference between any two distinct x-values or any two
+# distinct y-values. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive
+# x-values
+#
+#    0, 5e-324, 1, and 2.
+#
+# (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
+# values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
+# and we may get the wrong result.
+#
+# The solution here is to replace (x0, y0) with (x0.next_float, y0.next_float) rather than (x0 + e, y0 + e). +Float::next_float+ is
+# an implementation of the IEEE754 operation +nextafter+ which gives, for a floating point value z, the smallest floating point
+# value larger than z. If our PST contains only points with finite, floating point coordinates, then this approach implements open
+# search regions correctly. This is what the implementation currently does.
+#
+# However, when coordinates are not all Floats there are cases when this approach will fail. Consider the case in which we have the
+# following consecutive x-values:
+#
+#   0, 1e-324, 2e-324, 1.
+#
+# (Here 1e-324 and 2e-324 are the Ruby values +Rational(1, 10**324)+ and +Rational(2, 10**324)+.) Then, given an argument x0 =
+# 1e-324, +x0.to_f == 0.0+ and so +x0.to_f.next_float == 5e-324+ and the region we use internally for our query incorrectly excludes
+# the point with x value 2e-324. This is a bug in the code.
+#
+# There are also issues with numeric values (Integer or Rational) that are larger than the maximum floating point value,
+# approximately 1.8e308. For such values z, +z.to_f == Float::INFINITY+, and we will incorrectly exclude any larger-but-finite
+# coordinates from the search region.
+#
+# Yet more issues arise when the coordinates of points in the PST aren't numeric at all, but are some other sort of comparable
+# objects, such as arrays.
+#
+# So, we may say that queries on open regions will work as expected if either
+# - all coordinates of the points in the PST are finite Ruby Floats, or
+# - all coordinates of the points are finite Numeric values and for no such pair of x-values s, t (or pair of y-values) is it such
+#   that +s.to_f.next_float > t+.
+#
+# Otherwise, use this functionality at your own risk, and not at all with coordinates that do not respond reasonably to +to_f+.
+#
 # References:
 # * E.M. McCreight, _Priority search trees_, SIAM J. Comput., 14(2):257-276, 1985.
-# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational
-#   Geometry, 2011
+# * M. De, A. Maheshwari, S. C. Nandy, M. Smid, _An In-Place Priority Search Tree_, 23rd Canadian Conference on Computational Geometry, 2011
 class DataStructuresRMolinari::MaxPrioritySearchTree
+  # IMPLEMENTATION NOTES
+  #
+  # Open regions
+  #
+  # The search methods each have an argument +open:+ that changes the search region from closed (x >= x0) to open (x > x0). I had
+  # initially intended to implement this by varying the internals of the search code. But this turned out to be error-prone because
+  # the code is written for closed regions. When deciding which children to take in the next level of the tree, say, we assume the
+  # search space is closed, sometimes in a way that means we won't find the optimal point when the search region is open. Changing
+  # the logic turned out to be finicky and buggy.
+  #
+  # It is much easier and safer to replace a search request on, say (x0, y0, open) with (x0 + e, y0 + e, closed) where e[psilon] is
+  # small enough that we don't exclude any points other than those on the boundary of the closed region. Then we can just call the
+  # existing search code as-is. This is what the code did at first. We calculated e as the smallest difference between any two
+  # distinct x-values or distinct y-values.
+  #
+  # But this approach is not robust. Assume for the moment that all x- and y-values are floating-point. We can easily determine the
+  # value e. But the scaling of floating-point numbers makes this buggy. Consider the case in which we have consecutive x-values
+  #
+  #    0, 5e-324, 1, and 2.
+  #
+  # (5e-324 is the smallest positive Float value in Ruby). Our value for e is thus 5e-324 and, because of the way floating point
+  # values are represented, 1 + e = 1.0. Any query on open region with x0 = 1 will be run on a closed region with x0 = 1 + e = 1.0,
+  # and we may get the wrong result.
+  #
+  # I see the following possible approaches.
+  #
+  # 1. Rewrite the code to do open regions "properly"
+  #    Pro:
+  #      - we don't need to worry about numerical issues.
+  #    Con:
+  #      - too complicated and error-prone.
+  #
+  # 2. Instead of calculating e like this, replace each bounding value x with x.next_float or x.prev_float as required.
+  #    Note that #next_float gives the next-largest value representable as a floating point value.
+  #    Pro:
+  #      - we don't need to worry about the scaling issues in type Float
+  #      - simple and supported by the Ruby libraries (and by the C standard library if we decide to implement as a C extension)
+  #    Con:
+  #      - [minor] will fail with Float::INFINITY.
+  #        - this is an unlikely edge case that could be handled directly or simply documented away.
+  #      - won't work if the x0 value is a Numeric outside of Float range, roughly [-1.798e308, 1.798e308]
+  #        - For example, (10**400).to_f.next_float == Infinity
+  #        - We could warn about this case in documentation
+  #        - For numeric values x in the Float range we would need to check that x.to_f.next_float > x, but I suspect that this is
+  #          guaranteed.
+  #      - won't work with comparable but non-numeric values, like arrays, or some sort of user-defined type
+  #        - We would simply have to document that this case is not supported (or just throw an exception on +#to_f+)
+  #
+  # 3. Handle numeric values on a case-by-case values. So for numeric values x in the float range we use x.to_f.next_float while for
+  #    other values - like BigDecimal - do something different that depends on the next value in the data set with an x-value
+  #    greater than x.
+  #    Pro:
+  #      - more cases are handled
+  #    Con:
+  #      - complicated and perhaps non-performant in the general case
+  #      - doesn't handle non-numeric cases (just like idea 2)
+  #      - possibly error-prone in corner cases.
+  #
+  #    Idea: maintain @x_chain and @y_chain hashes mapping each distinct x/y values to the next largest and smallest such
+  #    value. Then just use that. Lookup is fast. Downside: O(n) extra memory.
+  #
+  # For now approach 2 looks best. It doesn't cover all cases, but covers cases most likely in practice - the Float range is large -
+  # and other cases can be documented away in a clean way.
   include Shared
   include BinaryTreeArithmetic
@@ -83,35 +196,54 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Return the highest point in P to the "northeast" of (x0, y0).
   #
-  # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northeast quadrant defined by the point (x0, y0):
+  # - \[x0, infty) X [y0, infty) if +open+ is false and
+  # - (x0, infty) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, -infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_y_in_ne(x0, y0)
-    largest_y_in_quadrant(x0, y0, :ne)
+  def largest_y_in_ne(x0, y0, open: false)
+    if open
+      largest_y_in_quadrant(slightly_bigger(x0), slightly_bigger(y0), :ne)
+    else
+      largest_y_in_quadrant(x0, y0, :ne)
+    end
   end
   # Return the highest point in P to the "northwest" of (x0, y0).
   #
-  # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X [y0, infty) if +open+ is false and
+  # - (infity, x0) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, -infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_y_in_nw(x0, y0)
-    largest_y_in_quadrant(x0, y0, :nw)
+  def largest_y_in_nw(x0, y0, open: false)
+    if open
+      largest_y_in_quadrant(slightly_smaller(x0), slightly_bigger(y0), :nw)
+    else
+      largest_y_in_quadrant(x0, y0, :nw)
+    end
   end
-  # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both largest_y_in_ne and
-  # largest_y_in_nw
+  # The basic algorithm is from De et al. section 3.1. We have generalaized it slightly to allow it to calculate both
+  # largest_y_in_ne and largest_y_in_nw
   #
-  # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster than the
-  # general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data structure.
+  # Note that largest_y_in_ne(x0, y0) = largest_y_in_3_sided(x0, infinty, y0) so we don't really need this. But it's a bit faster
+  # than the general case and is a simple algorithm that introduces a typical way that an algorithm interacts with the data
+  # structure.
   #
   # From the paper:
   #
@@ -137,6 +269,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
       sufficient_x = ->(x) { x <= x0 }
     end
+    return best if empty?
     # x == x0 or is not sufficient. This test sometimes excludes the other child of a node from consideration.
     exclusionary_x = ->(x) { x == x0 || !sufficient_x.call(x) }
@@ -200,28 +334,46 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Return the leftmost (min-x) point in P to the northeast of (x0, y0).
   #
-  # Let Q = [x0, infty) X [y0, infty) be the northeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northeast quadrant defined by the point (x0, y0):
+  # - [x0, infty) X [y0, infty) if +open+ is false and
+  # - (x0, infty) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_x_in_ne(x0, y0)
-    extremal_in_x_dimension(x0, y0, :ne)
+  def smallest_x_in_ne(x0, y0, open: false)
+    if open
+      extremal_in_x_dimension(slightly_bigger(x0), slightly_bigger(y0), :ne)
+    else
+      extremal_in_x_dimension(x0, y0, :ne)
+    end
   end
   # Return the rightmost (max-x) point in P to the northwest of (x0, y0).
   #
-  # Let Q = (-infty, x0] X [y0, infty) be the northwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the northwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X [y0, infty) if +open+ is false and
+  # - (infty, x0) X (y0, infty) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_x_in_nw(x0, y0)
-    extremal_in_x_dimension(x0, y0, :nw)
+  def largest_x_in_nw(x0, y0, open: false)
+    if open
+      extremal_in_x_dimension(slightly_smaller(x0), slightly_bigger(y0), :nw)
+    else
+      extremal_in_x_dimension(x0, y0, :nw)
+    end
   end
   # A genericized version of the paper's smallest_x_in_ne that can calculate either smallest_x_in_ne or largest_x_in_nw as specifies via a
@@ -369,14 +521,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Return the highest point of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X [y0, infty) if +open+ is false and
+  # - (x0, x1) X (y0, infty) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # Define p* as
   #
   # - (infty, -infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_y_in_3_sided(x0, x1, y0)
+  def largest_y_in_3_sided(x0, x1, y0, open: false)
+    if open
+      x0 = slightly_bigger(x0)
+      x1 = slightly_smaller(x1)
+      y0 = slightly_bigger(y0)
+    end
     # From the paper:
     #
     #    The three real numbers x0, x1, and y0 define the three-sided range Q = [x0,x1] X [y0,∞). If Q \intersect P̸ is not \empty,
@@ -572,14 +736,27 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
   # Enumerate the points of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X [y0, infty) if +open+ is false and
+  # - (x0, x1) X (y0, infty) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # We find and enumerate all the points in Q \intersect P.
   #
   # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
   # the intersection.
   #
   # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
-  def enumerate_3_sided(x0, x1, y0)
+  def enumerate_3_sided(x0, x1, y0, open: false)
+    if open
+      x0 = slightly_bigger(x0)
+      x1 = slightly_smaller(x1)
+      y0 = slightly_bigger(y0)
+    end
     # From the paper
     #
     #     Given three real numbers x0, x1, and y0 define the three sided range Q = [x0, x1] X [y0, infty). Algorithm
@@ -1143,7 +1320,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
     # We follow the algorithm in the paper by De, Maheshwari et al, which takes O(n log^2 n) time. Their follow-up paper that
     # defines the Min-max PST, describes how to do the construction in O(n log n) time, but it is more complex and probably not
-    # worth the trouble of both a bespoke heapsort the special sorting algorithm of Katajainen and Pasanen.
+    # worth the trouble of both a bespoke heapsort and the special sorting algorithm of Katajainen and Pasanen.
     # Since we are building an implicit binary tree, things are simpler if the array is 1-based. This requires a malloc (perhaps)
     # and memcpy (for sure), which isn't great, but it's in the C layer so cheap compared to the O(n log^2 n) work we need to do for
@@ -1216,6 +1393,22 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
     @data[l..r] = @data[l..r].sort_by(&:x)
   end
+  # The smallest floating point number larger than x
+  private def slightly_bigger(x)
+    x_f = x.to_f
+    raise "#{x} out of Float range" if x_f.infinite?
+    x_f.next_float
+  end
+  # The largest floating point number smaller than x
+  private def slightly_smaller(x)
+    x_f = x.to_f
+    raise "#{x} out of Float range" if x_f.infinite?
+    x_f.prev_float
+  end
   ########################################
   # Debugging support
   #

data/lib/data_structures_rmolinari/min_priority_search_tree.rb CHANGED Viewed

@@ -22,8 +22,7 @@ require_relative 'shared'
 #   et al. But we don't do that, as we create a separate array of Points.
 # - Whereas the implementation of MaxPST means that client code gets the same (x, y) objects back in results as it passed into the
 #   contructor, that's not the case here.
-#   - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return
-#    (different) instances of +Point+ in response to queries.
+#   - we map each point in the input - which is an object responding to +#x+ and +#y+ - to an instance of +Point+, and will return (different) instances of +Point+ in response to queries.
 #   - client code is unlikely to care, but be aware of this, just in case.
 #
 # Given a set of n points, we can answer the following questions quickly:
@@ -37,6 +36,10 @@ require_relative 'shared'
 #
 # (Here, "leftmost/rightmost" means "minimal/maximal x", and "lowest" means "minimal y".)
 #
+# Each of these methods has a named parameter +open:+ that makes the search region an open set. For example, if we call
+# +smallest_x_in_ne+ with +open: true+ then we consider points satisifying x > x0 and y < y0. The default value for this parameter
+# is always +false+. See the documentation of MaxPrioritySearchTree for limitiations of this support.
+#
 # The first 5 operations take O(log n) time and O(1) extra space.
 #
 # The final operation (enumerate) takes O(m + log n) time and O(1) extra space, where m is the number of points that are enumerated.
@@ -77,28 +80,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Return the "lowest" point in P to the "southeast" of (x0, y0).
   #
-  # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southeast quadrant defined by the point (x0, y0):
+  # - \[x0, infty) X (infty, y0] if +open+ is false and
+  # - (x0, infty) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, infty) if Q \intersect P is empty and
   # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_y_in_se(x0, y0)
-    flip @max_pst.largest_y_in_ne(x0, -y0)
+  def smallest_y_in_se(x0, y0, open: false)
+    flip @max_pst.largest_y_in_ne(x0, -y0, open:)
   end
   # Return the "lowest" point in P to the "southwest" of (x0, y0).
   #
-  # Let Q = (-infty, x0] X (-infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X (infty, y0] if +open+ is false and
+  # - (infty, x0) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, infty) if Q \intersect P is empty and
   # - the lowest (min-y) point in Q \intersect P otherwise, breaking ties by preferring smaller values of x
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_y_in_sw(x0, y0)
-    flip @max_pst.largest_y_in_nw(x0, -y0)
+  def smallest_y_in_sw(x0, y0, open: false)
+    flip @max_pst.largest_y_in_nw(x0, -y0, open:)
   end
   ########################################
@@ -106,28 +119,38 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Return the leftmost (min-x) point in P to the southeast of (x0, y0).
   #
-  # Let Q = [x0, infty) X (infty, y0] be the southeast quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southeast quadrant defined by the point (x0, y0):
+  # - \[x0, infty) X (infty, y0] if +open+ is false and
+  # - (x0, infty) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (infty, -infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_x_in_se(x0, y0)
-    flip @max_pst.smallest_x_in_ne(x0, -y0)
+  def smallest_x_in_se(x0, y0, open: false)
+    flip @max_pst.smallest_x_in_ne(x0, -y0, open:)
   end
   # Return the rightmost (max-x) point in P to the southwest of (x0, y0).
   #
-  # Let Q = (-infty, x0] X (infty, y0] be the southwest quadrant defined by the point (x0, y0) and let P be the points in this data
-  # structure. Define p* as
+  # Let Q = be the southwest quadrant defined by the point (x0, y0):
+  # - (infty, x0] X (infty, y0] if +open+ is false and
+  # - (infty, x0) X (infty, y0) if +open+ is true.
+  #
+  # Let P be the points in this data structure.
+  #
+  # Define p* as
   #
   # - (-infty, -infty) if Q \intersect P is empty and
   # - the leftmost (min-x) point in Q \intersect P otherwise.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def largest_x_in_sw(x0, y0)
-    flip @max_pst.largest_x_in_nw(x0, -y0)
+  def largest_x_in_sw(x0, y0, open: false)
+    flip @max_pst.largest_x_in_nw(x0, -y0, open:)
   end
   ########################################
@@ -135,15 +158,22 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Return the lowest point of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X (infty, y0] be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) Define p* as
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X (infty, y0] if +open+ is false and
+  # - (x0, x1) X (infty, y0) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # Define p* as
   #
   # - (infty, infty) if Q \intersect P is empty and
   # - the highest (max-y) point in Q \intersect P otherwise, breaking ties by preferring smaller x values.
   #
   # This method returns p* in O(log n) time and O(1) extra space.
-  def smallest_y_in_3_sided(x0, x1, y0)
-    flip @max_pst.largest_y_in_3_sided(x0, x1, -y0)
+  def smallest_y_in_3_sided(x0, x1, y0, open: false)
+    flip @max_pst.largest_y_in_3_sided(x0, x1, -y0, open:)
   end
   ########################################
@@ -151,18 +181,25 @@ class DataStructuresRMolinari::MinPrioritySearchTree
   # Enumerate the points of P in the box bounded by x0, x1, and y0.
   #
-  # Let Q = [x0, x1] X [y0, infty) be the "three-sided" box bounded by x0, x1, and y0, and let P be the set of points in the
-  # MaxPST. (Note that Q is empty if x1 < x0.) We find an enumerate all the points in Q \intersect P.
+  # Let Q be the "three-sided" box bounded by x0, x1, and y0:
+  # - \[x0, x1] X (infty, y0] if +open+ is false and
+  # - (x0, x1) X (infty, y0) if +open+ is true.
+  #
+  # Note that Q is empty if x1 < x0 or if +open+ is true and x1 <= x0.
+  #
+  # Let P be the set of points in the MaxPST.
+  #
+  # We find and enumerate all the points in Q \intersect P.
   #
   # If the calling code provides a block then we +yield+ each point to it. Otherwise we return a set containing all the points in
   # the intersection.
   #
   # This method runs in O(m + log n) time and O(1) extra space, where m is the number of points found.
-  def enumerate_3_sided(x0, x1, y0)
+  def enumerate_3_sided(x0, x1, y0, open: false)
     if block_given?
-      @max_pst.enumerate_3_sided(x0, x1, -y0) { |point| yield(flip point) }
+      @max_pst.enumerate_3_sided(x0, x1, -y0, open:) { |point| yield(flip point) }
     else
-      Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0).map { |pt| flip pt })
+      Set.new( @max_pst.enumerate_3_sided(x0, x1, -y0, open:).map { |pt| flip pt })
     end
   end

data/lib/data_structures_rmolinari/segment_tree.rb CHANGED Viewed

@@ -36,10 +36,15 @@ module DataStructuresRMolinari
     #   - +:c+ or +:ruby+
     #   - the C version will run faster but for now may be buggier and harder to debug
     module_function def construct(data, operation, lang)
-      operation.must_be_in [:max, :index_of_max]
+      operation.must_be_in [:max, :index_of_max, :sum]
       lang.must_be_in [:ruby, :c]
-      klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
+      klass = case operation
+              when :max then MaxValSegmentTree
+              when :index_of_max then IndexOfMaxValSegmentTree
+              when :sum then SumSegmentTree
+              else raise ArgumentError, "Unknown operation #{operation}"
+              end
       template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
       klass.new(template, data)
@@ -107,6 +112,33 @@ module DataStructuresRMolinari
       end
     end
+    class SumSegmentTree
+      extend Forwardable
+      # Tell the tree that the value at idx has changed
+      def_delegator :@structure, :update_at
+      # @param (see MaxValSegmentTree#initialize)
+      def initialize(template_klass, data)
+        data.must_be_a Enumerable
+        @structure = template_klass.new(
+          combine:               ->(a, b) { a + b },
+          single_cell_array_val: ->(i) { data[i] },
+          size:                  data.size,
+          identity:              0
+        )
+      end
+      # The sum of the values in A(i..j)
+      #
+      # The arguments must be integers in 0...(A.size)
+      # @return the sum of the values in A(i..j) or 0 if i > j.
+      def sum_on(i, j)
+        @structure.query_on(i, j)
+      end
+    end
     # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
     #
     # See SegmentTreeTemplate for more information.

data/lib/data_structures_rmolinari/segment_tree_template.rb CHANGED Viewed

@@ -71,6 +71,7 @@ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
   # Note that we don't need the updated value itself. We get that by calling the lambda +single_cell_array_val+ supplied at
   # construction.
   def update_at(idx)
+    raise DataError, "Bad update index #{idx} (size = #{@size})" unless (0...@size).cover?(idx)
     update_val_at(idx, root, 0, @size - 1)
   end

data/lib/data_structures_rmolinari/shared.rb CHANGED Viewed

@@ -69,12 +69,9 @@ module Shared
   #        duplication. When nil we don't call anything and just use the elements themselves.
   def contains_duplicates?(enum, by: nil)
     seen = Set.new
-    enum.each do |v|
+    enum.any? do |v|
       v = v.send(by) if by
-      return true if seen.include? v
-      seen << v
+      !seen.add?(v)
     end
-    false
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: data_structures_rmolinari
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.5.4
 platform: ruby
 authors:
 - Rory Molinari
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-02-15 00:00:00.000000000 Z
+date: 2023-12-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: must_be
@@ -90,6 +90,7 @@ files:
 - ext/c_segment_tree_template/extconf.rb
 - ext/c_segment_tree_template/segment_tree_template.c
 - ext/cc.h
+- ext/extconf_shared.rb
 - ext/shared.c
 - ext/shared.h
 - lib/data_structures_rmolinari.rb