data_structures_rmolinari 0.4.1 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,103 @@
1
+ # A collection of algorithms that use the module's data structures but don't belong as a method on one of the data structures
2
+ module DataStructuresRMolinari::Algorithms
3
+ include Shared
4
+
5
+ # We are given a set P of points in the x-y plane. An _empty rectangle for P_ is a rectangle (left, right, bottom, top)
6
+ # satisifying the following
7
+ # - it has positive area;
8
+ # - its sides are parallel to the axes;
9
+ # - it lies within the smallest bounding box (x_min, x_max, y_min, y_max) containing P; and
10
+ # - no point of P lies in its interior.
11
+ #
12
+ # A _maximal empty rectangle_ (MER) for P is an empty rectangle for P not properly contained in any other.
13
+ #
14
+ # We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top) to a block. The algorithm is due to
15
+ # De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013),
16
+ # pp 310-327.
17
+ #
18
+ # It runs in O(m log n) time, where m is the number of MERs enumerated and n is the number of points in P. (Contructing the
19
+ # MaxPST below takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
20
+ #
21
+ # @param points [Array] an array of points in the x-y plane. Each must respond to +x+ and +y+.
22
+ def self.maximal_empty_rectangles(points)
23
+ # We break the emtpy rectangles into three types
24
+ # 1. bounded at bottom and top by y_min and y_max
25
+ # 2. bounded at the top by y_max and at the bottom by one of the points of P
26
+ # 3. bounded at the top by a point in P
27
+
28
+ return if points.size <= 1
29
+
30
+ sorted_points = points.sort_by(&:x)
31
+ x_min = sorted_points.first.x
32
+ x_max = sorted_points.last.x
33
+ y_min, y_max = sorted_points.map(&:y).minmax
34
+
35
+ # Half of the smallest non-zero gap between x values. This is needed below
36
+ epsilon = INFINITY
37
+
38
+ # Enumerate type 1
39
+ sorted_points.each_cons(2) do |pt1, pt2|
40
+ next if pt1.x == pt2.x
41
+
42
+ d = (pt2.x.to_f - pt1.x) / 2
43
+ epsilon = d if d < epsilon
44
+
45
+ yield [pt1.x, pt2.x, y_min, y_max]
46
+ end
47
+
48
+ # This builds its internal structure inside sorted_points itself.
49
+ max_pst = DataStructuresRMolinari::MaxPrioritySearchTree.new(sorted_points, dynamic: true)
50
+
51
+ # Enumerate type 2. We consider each point of P and work out the largest rectangle bounded below by P and above by y_max. The
52
+ # points constraining us on the left and right are given by queries on the MaxPST.
53
+ points.each do |pt|
54
+ next if pt.y == y_max # 0 area
55
+ next if pt.y == y_min # type 1
56
+
57
+ # Epsilon means we don't just get pt back again. The De et al. paper is rather vague.
58
+ left_bound = max_pst.largest_x_in_nw( pt.x - epsilon, pt.y)
59
+ right_bound = max_pst.smallest_x_in_ne(pt.x + epsilon, pt.y)
60
+
61
+ left = left_bound.x.infinite? ? x_min : left_bound.x
62
+ right = right_bound.x.infinite? ? x_max : right_bound.x
63
+ next if left == right
64
+
65
+ yield [left, right, pt.y, y_max]
66
+ end
67
+
68
+ # Enumerate type 3. This is the cleverest part of the algorithm. Start with a point (x0, y0) in P. We imagine a horizontal line
69
+ # drawing down over the bounding rectangle, starting at y = y0 with l = x_min and r = x_max. Every time we meet another point
70
+ # (x1, y1) of P we emit a maximal rectangle and shorten the horizonal line. At any time, the next point that we encounter is the
71
+ # highest (max y) point in the region l < x < r and y >= y_min.
72
+ #
73
+ # If we have a MaxPST containing with the points (x0, y0) and above deleted, (x1, y1) is almost given by
74
+ #
75
+ # largest_y_in_3_sided(l, r, y_min)
76
+ #
77
+ # That call considers the points in the closed region l <= x <= r and y >= y_min, so we use l + epsilon and r - epsilon.
78
+ until max_pst.empty?
79
+ top_pt = max_pst.delete_top!
80
+ top = top_pt.y
81
+ next if top == y_max # this one is type 1 or 2
82
+ next if top == y_min # zero area: no good
83
+
84
+ l = x_min
85
+ r = x_max
86
+
87
+ loop do
88
+ next_pt = max_pst.largest_y_in_3_sided(l + epsilon, r - epsilon, y_min)
89
+
90
+ bottom = next_pt.y.infinite? ? y_min : next_pt.y
91
+ yield [l, r, bottom, top]
92
+
93
+ break if next_pt.y.infinite? # we have reached the bottom
94
+
95
+ if next_pt.x < top_pt.x
96
+ l = next_pt.x
97
+ else
98
+ r = next_pt.x
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -30,9 +30,14 @@ require_relative 'shared'
30
30
  #
31
31
  # The final operation (enumerate) takes O(m + log n) time, where m is the number of points that are enumerated.
32
32
  #
33
- # In the current implementation no two points can share an x-value. This (rather severe) restriction can be relaxed with some more
34
- # complicated code, but it hasn't been written yet. See issue #9.
33
+ # If the MaxPST is constructed to be "dynamic" we also have an operation that deletes the top element.
35
34
  #
35
+ # - +delete_top!+: remove the top (max-y) element of the tree and return it.
36
+ #
37
+ # It runs in O(log n) time, where n is the size of the PST when it was initially created.
38
+ #
39
+ # In the current implementation no two points can share an x-value. This restriction can be relaxed with some more complicated code,
40
+ # but it hasn't been written yet. See issue #9.
36
41
  #
37
42
  # There is a related data structure called the Min-max priority search tree so we have called this a "Max priority search tree", or
38
43
  # MaxPST.
@@ -53,18 +58,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
53
58
  # - The +x+ values must be distinct. We raise a +Shared::DataError+ if this isn't the case.
54
59
  # - This is a restriction that simplifies some of the algorithm code. It can be removed as the cost of some extra work. Issue
55
60
  # #9.
56
- #
61
+ # @param dynamic [Boolean] when truthy the PST is _dynamic_. This means the root can be deleted, which is useful in certain
62
+ # algorithms than use a PST.
63
+ # - a dynamic PST needs more bookwork for some internal work and so slows things down a little.
57
64
  # @param verify [Boolean] when truthy, check that the properties of a PST are satisified after construction, raising an exception
58
65
  # if not.
59
- def initialize(data, verify: false)
66
+ def initialize(data, dynamic: false, verify: false)
60
67
  @data = data
61
68
  @size = @data.size
69
+ @member_count = @size # these can diverge for dynamic PSTs
70
+ @dynamic = dynamic
62
71
 
63
72
  construct_pst
64
73
 
65
74
  verify_properties if verify
66
75
  end
67
76
 
77
+ def empty?
78
+ @member_count.zero?
79
+ end
80
+
68
81
  ########################################
69
82
  # Highest NE and Highest NW
70
83
 
@@ -154,9 +167,9 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
154
167
  elsif p_val.y < y0
155
168
  # p is too low for Q, so the entire subtree is too low as well
156
169
  return best
157
- elsif one_child?(p)
170
+ elsif (child = one_child?(p))
158
171
  # With just one child we need to check it
159
- p = left(p)
172
+ p = child
160
173
  elsif exclusionary_x.call(@data[preferred_child.call(p)].x)
161
174
  # right(p) might be in Q, but nothing in the left subtree can be, by the PST property on x.
162
175
  p = preferred_child.call(p)
@@ -234,6 +247,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
234
247
  best = Point.new(-INFINITY, INFINITY)
235
248
  end
236
249
 
250
+ return best if empty?
251
+
237
252
  p = q = root
238
253
 
239
254
  in_q = lambda do |pair|
@@ -307,13 +322,15 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
307
322
  [new_p, new_q]
308
323
  end
309
324
 
310
- until leaf?(p)
325
+ # Now that we have the possibility of dynamic PSTs we need to worry about more cases. For example, p might be a leaf even though
326
+ # q is not
327
+ until leaf?(p) && leaf?(q)
311
328
  update_best.call(p)
312
329
  update_best.call(q)
313
330
 
314
331
  if p == q
315
- if one_child?(p)
316
- p = q = left(p)
332
+ if (child = one_child?(p))
333
+ p = q = child
317
334
  else
318
335
  q = right(p)
319
336
  p = left(p)
@@ -321,13 +338,23 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
321
338
  else
322
339
  # p != q
323
340
  if leaf?(q)
324
- q = p # p itself is just one layer above the leaves, or is itself a leaf
325
- elsif one_child?(q)
341
+ q = p
342
+ elsif leaf?(p)
343
+ p = q
344
+ else
345
+ p_only_child = one_child?(p)
346
+ q_only_child = one_child?(q)
326
347
  # This generic approach is not as fast as the bespoke checks described in the paper. But it is easier to maintain the code
327
348
  # this way and allows easy implementation of largest_x_in_nw
328
- p, q = determine_next_nodes.call(left(p), right(p), left(q))
329
- else
330
- p, q = determine_next_nodes.call(left(p), right(p), left(q), right(q))
349
+ if p_only_child && q_only_child
350
+ p, q = determine_next_nodes.call(p_only_child, q_only_child)
351
+ elsif p_only_child
352
+ p, q = determine_next_nodes.call(p_only_child, left(q), right(q))
353
+ elsif q_only_child
354
+ p, q = determine_next_nodes.call(left(p), right(p), q_only_child)
355
+ else
356
+ p, q = determine_next_nodes.call(left(p), right(p), left(q), right(q))
357
+ end
331
358
  end
332
359
  break unless p # we've run out of useful nodes
333
360
  end
@@ -374,6 +401,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
374
401
  # Sometimes we don't have a relevant node to the left or right of Q. The booleans L and R (which we call left and right) track
375
402
  # whether p and q are defined at the moment.
376
403
  best = Point.new(INFINITY, -INFINITY)
404
+ return best if empty?
405
+
377
406
  p = q = left = right = nil
378
407
 
379
408
  x_range = (x0..x1)
@@ -402,15 +431,15 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
402
431
  # become a child of (the original) p.
403
432
  check_left = lambda do
404
433
  if leaf?(p)
405
- left = false # Question: did p ever get checked as a potential winner?
406
- elsif one_child?(p)
407
- if x_range.cover? @data[left(p)].x
408
- update_highest.call(left(p))
434
+ left = false
435
+ elsif (only_child = one_child?(p))
436
+ if x_range.cover? @data[only_child].x
437
+ update_highest.call(only_child)
409
438
  left = false # can't do y-better in the subtree
410
- elsif @data[left(p)].x < x0
411
- p = left(p)
439
+ elsif @data[only_child].x < x0
440
+ p = only_child
412
441
  else
413
- q = left(p)
442
+ q = only_child
414
443
  right = true
415
444
  left = false
416
445
  end
@@ -457,16 +486,16 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
457
486
  check_right = lambda do
458
487
  if leaf?(q)
459
488
  right = false
460
- elsif one_child?(q)
461
- if x_range.cover? @data[left(q)].x
462
- update_highest.call(left(q))
489
+ elsif (only_child = one_child?(q))
490
+ if x_range.cover? @data[only_child].x
491
+ update_highest.call(only_child)
463
492
  right = false # can't do y-better in the subtree
464
- elsif @data[left(q)].x < x0
465
- p = left(q)
493
+ elsif @data[only_child].x < x0
494
+ p = only_child
466
495
  left = true
467
496
  right = false
468
497
  else
469
- q = left(q)
498
+ q = only_child
470
499
  end
471
500
  else
472
501
  # q has two children
@@ -499,6 +528,8 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
499
528
  end
500
529
  end
501
530
 
531
+ return best if empty?
532
+
502
533
  root_val = @data[root]
503
534
 
504
535
  # If the root value is in the region Q, the max-heap property on y means we can't do better
@@ -595,6 +626,11 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
595
626
  p = p_in = q_in = q = nil
596
627
 
597
628
  result = Set.new
629
+ if empty?
630
+ return if block_given?
631
+
632
+ return result
633
+ end
598
634
 
599
635
  report = lambda do |node|
600
636
  if block_given?
@@ -620,14 +656,14 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
620
656
  if @data[current].y >= y0
621
657
  report.call(current)
622
658
  end
623
- if !leaf?(current) && @data[left(current)].y >= y0
659
+ if !leaf?(current) && in_tree?(left(current)) && @data[left(current)].y >= y0
624
660
  current = left(current)
625
661
  else
626
662
  state = 1
627
663
  end
628
664
  when 1
629
665
  # State 1: we've already handled this node and its left subtree. Should we descend to the right subtree?
630
- if two_children?(current) && @data[right(current)].y >= y0
666
+ if in_tree?(right(current)) && @data[right(current)].y >= y0
631
667
  current = right(current)
632
668
  state = 0
633
669
  else
@@ -698,14 +734,15 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
698
734
  return
699
735
  end
700
736
 
701
- if one_child?(p)
702
- if x_range.cover? @data[left(p)].x
703
- add_leftmost_inner_node.call(left(p))
737
+ if (only_child = one_child?(p))
738
+ child_val = @data[only_child]
739
+ if x_range.cover? child_val.x
740
+ add_leftmost_inner_node.call(only_child)
704
741
  left = false
705
- elsif @data[left(p)].x < x0
706
- p = left(p)
742
+ elsif child_val.x < x0
743
+ p = only_child
707
744
  else
708
- q = left(p)
745
+ q = only_child
709
746
  right = true
710
747
  left = false
711
748
  end
@@ -754,25 +791,26 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
754
791
  return
755
792
  end
756
793
 
757
- left_val = @data[left(p_in)]
758
- if one_child?(p_in)
759
- if x_range.cover? left_val.x
760
- p_in = left(p_in)
761
- elsif left_val.x < x0
794
+ if (only_child = one_child?(p_in))
795
+ child_val = @data[only_child]
796
+ if x_range.cover? child_val.x
797
+ p_in = only_child
798
+ elsif child_val.x < x0
762
799
  # We aren't in the [x0, x1] zone any more and have moved out to the left
763
- p = left(p_in)
800
+ p = only_child
764
801
  deactivate_p_in.call
765
802
  left = true
766
803
  else
767
804
  # similar, but we've moved out to the right. Note that left(p_in) is the leftmost node to the right of Q.
768
805
  raise 'q_in should not be active (by the val of left(p_in))' if right_in
769
806
 
770
- q = left(p_in)
807
+ q = only_child
771
808
  deactivate_p_in.call
772
809
  right = true
773
810
  end
774
811
  else
775
812
  # p' has two children
813
+ left_val = @data[left(p_in)]
776
814
  right_val = @data[right(p_in)]
777
815
  if left_val.x < x0
778
816
  if right_val.x < x0
@@ -828,16 +866,17 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
828
866
  return
829
867
  end
830
868
 
831
- if one_child?(q)
832
- if x_range.cover? @data[left(q)].x
833
- add_rightmost_inner_node.call(left(q))
869
+ if (only_child = one_child?(q))
870
+ child_val = @data[only_child]
871
+ if x_range.cover? child_val.x
872
+ add_rightmost_inner_node.call(only_child)
834
873
  right = false
835
- elsif @data[left(q)].x < x0
836
- p = left(q)
874
+ elsif child_val.x < x0
875
+ p = only_child
837
876
  left = true
838
877
  right = false
839
878
  else
840
- q = left(q)
879
+ q = only_child
841
880
  end
842
881
  return
843
882
  end
@@ -887,18 +926,18 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
887
926
  return
888
927
  end
889
928
 
890
- left_val = @data[left(q_in)]
891
- if one_child?(q_in)
892
- if x_range.cover? left_val.x
893
- q_in = left(q_in)
894
- elsif left_val.x < x0
929
+ if (only_child = one_child?(q_in))
930
+ child_val = @data[only_child]
931
+ if x_range.cover? child_val.x
932
+ q_in = only_child
933
+ elsif child_val.x < x0
895
934
  # We have moved out to the left
896
- p = left(q_in)
935
+ p = only_child
897
936
  right_in = false
898
937
  left = true
899
938
  else
900
939
  # We have moved out to the right
901
- q = left(q_in)
940
+ q = only_child
902
941
  right_in = false
903
942
  right = true
904
943
  end
@@ -906,6 +945,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
906
945
  end
907
946
 
908
947
  # q' has two children
948
+ left_val = @data[left(q_in)]
909
949
  right_val = @data[right(q_in)]
910
950
  if left_val.x < x0
911
951
  raise InternalLogicError, 'p_in cannot be active, by the value in the left child of q_in' if left_in
@@ -967,7 +1007,6 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
967
1007
  end
968
1008
 
969
1009
  while left || left_in || right_in || right
970
- # byebug if $do_it
971
1010
  raise InternalLogicError, 'It should not be that q_in is active but p_in is not' if right_in && !left_in
972
1011
 
973
1012
  set_i = []
@@ -992,6 +1031,110 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
992
1031
  return result unless block_given?
993
1032
  end
994
1033
 
1034
+ ########################################
1035
+ # Delete Top
1036
+ #
1037
+
1038
+ # Delete the top (max-y) element of the PST. This is possible only for dynamic PSTs
1039
+ #
1040
+ # It runs in guaranteed O(log n) time, where n is the size of the PST when it was intially constructed. As elements are deleted
1041
+ # the internal tree structure is no longer guaranteed to be balanced and so we cannot guarantee operation in O(log n') time, where
1042
+ # n' is the current size. In practice, "random" deletion is likely to leave the tree almost balanced.
1043
+ #
1044
+ # @return [Point] the top element that was deleted
1045
+ def delete_top!
1046
+ raise LogicError, 'delete_top! not supported for PSTs that are not dynamic' unless dynamic?
1047
+ raise DataError, 'delete_top! not possible for empty PSTs' unless @member_count.positive?
1048
+
1049
+ i = root
1050
+ while !leaf?(i)
1051
+ if (child = one_child?(i))
1052
+ next_node = child
1053
+ else
1054
+ next_node = left(i)
1055
+
1056
+ if better_y?(right(i), next_node)
1057
+ next_node = right(i)
1058
+ end
1059
+ end
1060
+ swap(i, next_node)
1061
+ i = next_node
1062
+ end
1063
+ @member_count -= 1
1064
+ @data[i]
1065
+ end
1066
+
1067
+ ########################################
1068
+ # Helpers for the internal guts of things
1069
+
1070
+ private def dynamic?
1071
+ @dynamic
1072
+ end
1073
+
1074
+ # i has no children
1075
+ private def leaf?(i)
1076
+ return i > @last_non_leaf unless dynamic?
1077
+
1078
+ !(in_tree?(left(i)) || in_tree?(right(i)))
1079
+ end
1080
+
1081
+ # i has exactly one child. We return the unique child if there is one, and nil otherwise
1082
+
1083
+ # Unless the PST is dynamic this will be the left child. Otherwise it could be either
1084
+ private def one_child?(i)
1085
+ if dynamic?
1086
+ l_child = left(i)
1087
+ r_child = right(i)
1088
+ left_is_in_tree = in_tree?(l_child)
1089
+ return nil unless left_is_in_tree ^ in_tree?(r_child)
1090
+ return l_child if left_is_in_tree
1091
+
1092
+ r_child
1093
+ else
1094
+ return left(i) if i == @parent_of_one_child
1095
+
1096
+ nil
1097
+ end
1098
+ end
1099
+
1100
+ # i has two children
1101
+ private def two_children?(i)
1102
+ i <= @last_parent_of_two_children unless dynamic?
1103
+
1104
+ in_tree?(left(i)) && in_tree?(right(i))
1105
+ end
1106
+
1107
+ # Does the value at index i have a "better" y value than the value at index j.
1108
+ #
1109
+ # A value is better if it is larger, or if it is equal and the x value is smaller (which is how we break the tie)
1110
+ private def better_y?(i, j)
1111
+ val_i = @data[i]
1112
+ val_j = @data[j]
1113
+ return true if val_i.y > val_j.y
1114
+ return false if val_i.y < val_j.y
1115
+
1116
+ val_i.x < val_j.x
1117
+ end
1118
+
1119
+ # Is node i in the tree?
1120
+ private def in_tree?(i)
1121
+ return i <= @size unless dynamic?
1122
+
1123
+ return false if empty?
1124
+ return false if i > @size
1125
+ return true if i == root
1126
+
1127
+ better_y?(parent(i), i)
1128
+
1129
+ # p = parent(i)
1130
+ # return true if @data[i].y < @data[p].y
1131
+ # return false if @data[i].y > @data[p].y
1132
+
1133
+ # # the y values are equal so the tie is broken by x. We are "normal", and in the tree, if our value of x is worse than our
1134
+ # # parent's value
1135
+ # @data[i].x > @data[p].x
1136
+ end
1137
+
995
1138
  ########################################
996
1139
  # Build the initial stucture
997
1140
 
@@ -1058,7 +1201,7 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1058
1201
  end
1059
1202
 
1060
1203
  # The index in @data[l..r] having the largest value for y, breaking ties with the smaller x value. Since we are already sorted by
1061
- # x we don't actually need to check this.
1204
+ # x we don't actually need to check the x value.
1062
1205
  private def index_with_largest_y_in(l, r)
1063
1206
  return nil if r < l
1064
1207
 
@@ -1084,7 +1227,6 @@ class DataStructuresRMolinari::MaxPrioritySearchTree
1084
1227
  private def verify_properties
1085
1228
  # It's a max-heap in y
1086
1229
  (2..@size).each do |node|
1087
- byebug unless @data[node].y <= @data[parent(node)].y
1088
1230
  raise InternalLogicError, "Heap property violated at child #{node}" unless @data[node].y <= @data[parent(node)].y
1089
1231
  end
1090
1232