data_structures_rmolinari 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/shared.h ADDED
@@ -0,0 +1,33 @@
1
+ #ifndef SHARED_H
2
+ #define SHARED_H
3
+
4
+ #include <stddef.h>
5
+ #include "ruby.h"
6
+
7
+ #define mShared rb_define_module("Shared")
8
+ #define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
9
+ #define eSharedInternalLogicError rb_const_get(mShared, rb_intern_const("InternalLogicError"))
10
+ #define mDataStructuresRMolinari rb_define_module("DataStructuresRMolinari")
11
+
12
+ //#define debug(...) printf(__VA_ARGS__)
13
+ #define debug(...)
14
+
15
+ /* What we might think of as vector[index]. It is assignable */
16
+ #define lval(vector, index) (*get(vector, index))
17
+
18
+ /*
19
+ * Binary tree arithmetic for an implicit tree in an array, 1-based.
20
+ *
21
+ * TODO: into shared header
22
+ */
23
+ #define TREE_ROOT 1
24
+ size_t midpoint(size_t left, size_t right);
25
+ size_t left_child(size_t i);
26
+ size_t right_child(size_t i);
27
+
28
+ /*
29
+ * Check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
30
+ */
31
+ unsigned long checked_nonneg_fixnum(VALUE val);
32
+
33
+ #endif
@@ -1,4 +1,4 @@
1
- # A collection of algorithms that use the module's data structures but don't belong as a method on one of the data structures
1
+ # Algorithms that use the module's data structures but don't belong as a method on one of the data structures
2
2
  module DataStructuresRMolinari::Algorithms
3
3
  include Shared
4
4
 
@@ -11,12 +11,12 @@ module DataStructuresRMolinari::Algorithms
11
11
  #
12
12
  # A _maximal empty rectangle_ (MER) for P is an empty rectangle for P not properly contained in any other.
13
13
  #
14
- # We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top) to a block. The algorithm is due to
15
- # De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013),
16
- # pp 310-327.
14
+ # We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top). The algorithm is due to De, M.,
15
+ # Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013), pp
16
+ # 310-327.
17
17
  #
18
18
  # It runs in O(m log n) time, where m is the number of MERs enumerated and n is the number of points in P. (Contructing the
19
- # MaxPST below takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
19
+ # MaxPST takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
20
20
  #
21
21
  # @param points [Array] an array of points in the x-y plane. Each must respond to +x+ and +y+.
22
22
  def self.maximal_empty_rectangles(points)
@@ -3,110 +3,13 @@ require 'must_be'
3
3
  require_relative 'shared'
4
4
  require_relative 'c_segment_tree_template'
5
5
 
6
- # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
7
- # max) on a arbitrary subarray of a given array.
6
+ # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
8
7
  #
9
- # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
10
- # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
11
- # called an "interval tree."
12
- #
13
- # For more details (and some close-to-metal analysis of run time, especially for large datasets) see
14
- # https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
15
- # which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
16
- # Ruby.
17
- #
18
- # This is a generic implementation, intended to allow easy configuration for concrete instances. See the parameters to the
19
- # initializer and the definitions of concrete realisations like MaxValSegmentTree.
20
- #
21
- # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
8
+ # See SegmentTreeTemplate for more information.
22
9
  class DataStructuresRMolinari::CSegmentTreeTemplate
23
-
24
- # Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
25
- # @param combine a lambda that takes two values and munges them into a combined value.
26
- # - For example, if we are calculating sums over subintervals, combine.call(a, b) = a + b, while if we are doing maxima we will
27
- # return max(a, b).
28
- # - Things get more complicated when we are calculating, say, the _index_ of the maximal value in a subinterval. Now it is not
29
- # enough simply to store that index at each tree node, because to combine the indices from two child nodes we need to know
30
- # both the index of the maximal element in each child node's interval, but also the maximal values themselves, so we know
31
- # which one "wins" for the parent node. This affects the sort of work we need to do when combining and the value provided by
32
- # the +single_cell_array_val+ lambda.
33
- # @param single_cell_array_val a lambda that takes an index i and returns the value we need to store in the #build
34
- # operation for the subinterval i..i.
35
- # - This will often simply be the value data[i], but in some cases it will be something else. For example, when we are
36
- # calculating the index of the maximal value on each subinterval we need [i, data[i]] here.
37
- # - If +update_at+ is called later, this lambda must close over the underlying data in a way that captures the updated value.
38
- # @param size the size of the underlying data array, used in certain internal arithmetic.
39
- # @param identity the value to return when we are querying on an empty interval
40
- # - for sums, this will be zero; for maxima, this will be -Infinity, etc
10
+ # (see SegmentTreeTemplate::initialize)
41
11
  def initialize(combine:, single_cell_array_val:, size:, identity:)
42
12
  # having sorted out the keyword arguments, pass them more easily to the C layer.
43
13
  c_initialize(combine, single_cell_array_val, size, identity)
44
14
  end
45
15
  end
46
-
47
- # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
48
- # in O(log n) time.
49
- #
50
- # C version
51
- #
52
- # TODO: share the definition with (non-C) MasValSegmentTree. The only difference is the class of the underlying segment tree
53
- # template.
54
- module DataStructuresRMolinari
55
- class CMaxValSegmentTree
56
- extend Forwardable
57
-
58
- # Tell the tree that the value at idx has changed
59
- def_delegator :@structure, :update_at
60
-
61
- # @param data an object that contains values at integer indices based at 0, via +data[i]+.
62
- # - This will usually be an Array, but it could also be a hash or a proc.
63
- def initialize(data)
64
- @structure = CSegmentTreeTemplate.new(
65
- combine: ->(a, b) { [a, b].max },
66
- single_cell_array_val: ->(i) { data[i] },
67
- size: data.size,
68
- identity: -Shared::INFINITY
69
- )
70
- end
71
-
72
- # The maximum value in A(i..j).
73
- #
74
- # The arguments must be integers in 0...(A.size)
75
- # @return the largest value in A(i..j) or -Infinity if i > j.
76
- def max_on(i, j)
77
- @structure.query_on(i, j)
78
- end
79
- end
80
-
81
- # A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
82
- # subinterval A(i..j)?" in O(log n) time.
83
- #
84
- # C version
85
- class CIndexOfMaxValSegmentTree
86
- extend Forwardable
87
-
88
- # Tell the tree that the value at idx has changed
89
- def_delegator :@structure, :update_at
90
-
91
- # @param (see MaxValSegmentTree#initialize)
92
- def initialize(data)
93
- @structure = CSegmentTreeTemplate.new(
94
- combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
95
- single_cell_array_val: ->(i) { [i, data[i]] },
96
- size: data.size,
97
- identity: nil
98
- )
99
- end
100
-
101
- # The index of the maximum value in A(i..j)
102
- #
103
- # The arguments must be integers in 0...(A.size)
104
- # @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
105
- # - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
106
- # - Return +nil+ if i > j
107
- def index_of_max_val_on(i, j)
108
- @structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
109
- end
110
- end
111
-
112
- end
@@ -89,5 +89,7 @@ class DataStructuresRMolinari::DisjointUnion
89
89
  else
90
90
  @d[e] = f
91
91
  end
92
+
93
+ nil
92
94
  end
93
95
  end
@@ -0,0 +1,126 @@
1
+ require_relative 'shared'
2
+
3
+ # A namespace to hold the various bits and bobs related to the SegmentTree implementation
4
+ module DataStructuresRMolinari::SegmentTree
5
+ end
6
+
7
+ require_relative 'segment_tree_template' # Ruby implementation of the generic API
8
+ require_relative 'c_segment_tree_template' # C implementation of the generic API
9
+
10
+ # Segment Tree: various concrete implementations
11
+ #
12
+ # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
13
+ # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
14
+ # called an "interval tree."
15
+ #
16
+ # For more details (and some close-to-metal analysis of run time, especially for large datasets) see
17
+ # https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
18
+ # which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
19
+ # Ruby.
20
+ #
21
+ # Here we provide several concrete segment tree implementations built on top of the template (generic) versions. Each instance is
22
+ # backed either by the pure Ruby SegmentTreeTemplate or its C-based sibling CSegmentTreeTemplate
23
+ module DataStructuresRMolinari
24
+ module SegmentTree
25
+ # A convenience method to construct a Segment Tree that, for a given array A(0...size), answers questions of the kind given by
26
+ # operation, using the template written in lang
27
+ #
28
+ # - @param data: the array A.
29
+ # - It must respond to +#size+ and to +#[]+ with non-negative integer arguments.
30
+ # - @param operation: a supported "style" of Segment Tree
31
+ # - for now, must be one of these (but you can write your own concrete version)
32
+ # - +:max+: implementing +max_on(i, j)+, returning the maximum value in A(i..j)
33
+ # - +:index_of_max+: implementing +index_of_max_val_on(i, j)+, returning an index corresponding to the maximum value in
34
+ # A(i..j).
35
+ # - @param lang: the language in which the underlying "template" is written
36
+ # - +:c+ or +:ruby+
37
+ # - the C version will run faster but for now may be buggier and harder to debug
38
+ module_function def construct(data, operation, lang)
39
+ operation.must_be_in [:max, :index_of_max]
40
+ lang.must_be_in [:ruby, :c]
41
+
42
+ klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
43
+ template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
44
+
45
+ klass.new(template, data)
46
+ end
47
+
48
+ # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
49
+ # in O(log n) time.
50
+ class MaxValSegmentTree
51
+ extend Forwardable
52
+
53
+ # Tell the tree that the value at idx has changed
54
+ def_delegator :@structure, :update_at
55
+
56
+ # @param template_klass the "template" class that provides the generic implementation of the Segment Tree functionality.
57
+ # @param data an object that contains values at integer indices based at 0, via +data[i]+.
58
+ # - This will usually be an Array, but it could also be a hash or a proc.
59
+ def initialize(template_klass, data)
60
+ data.must_be_a Enumerable
61
+
62
+ @structure = template_klass.new(
63
+ combine: ->(a, b) { [a, b].max },
64
+ single_cell_array_val: ->(i) { data[i] },
65
+ size: data.size,
66
+ identity: -Shared::INFINITY
67
+ )
68
+ end
69
+
70
+ # The maximum value in A(i..j).
71
+ #
72
+ # The arguments must be integers in 0...(A.size)
73
+ # @return the largest value in A(i..j) or -Infinity if i > j.
74
+ def max_on(i, j)
75
+ @structure.query_on(i, j)
76
+ end
77
+ end
78
+
79
+ # A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
80
+ # subinterval A(i..j)?" in O(log n) time.
81
+ class IndexOfMaxValSegmentTree
82
+ extend Forwardable
83
+
84
+ # Tell the tree that the value at idx has changed
85
+ def_delegator :@structure, :update_at
86
+
87
+ # @param (see MaxValSegmentTree#initialize)
88
+ def initialize(template_klass, data)
89
+ data.must_be_a Enumerable
90
+
91
+ @structure = template_klass.new(
92
+ combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
93
+ single_cell_array_val: ->(i) { [i, data[i]] },
94
+ size: data.size,
95
+ identity: nil
96
+ )
97
+ end
98
+
99
+ # The index of the maximum value in A(i..j)
100
+ #
101
+ # The arguments must be integers in 0...(A.size)
102
+ # @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
103
+ # - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
104
+ # - Return +nil+ if i > j
105
+ def index_of_max_val_on(i, j)
106
+ @structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
107
+ end
108
+ end
109
+
110
+ # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
111
+ #
112
+ # See SegmentTreeTemplate for more information.
113
+ #
114
+ # Implementation note
115
+ #
116
+ # The functionality is entirely written in C. But we write the constructor in Ruby because keyword arguments are difficult to
117
+ # parse on the C side.
118
+ class CSegmentTreeTemplate
119
+ # (see SegmentTreeTemplate::initialize)
120
+ def initialize(combine:, single_cell_array_val:, size:, identity:)
121
+ # having sorted out the keyword arguments, pass them more easily to the C layer.
122
+ c_initialize(combine, single_cell_array_val, size, identity)
123
+ end
124
+ end
125
+ end
126
+ end
@@ -1,7 +1,7 @@
1
1
  require_relative 'shared'
2
2
 
3
- # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
4
- # max) on a arbitrary subarray of a given array.
3
+ # A generic implementation of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the
4
+ # sum (or min or max) on a arbitrary subarray of a given array.
5
5
  #
6
6
  # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
7
7
  # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
@@ -16,7 +16,7 @@ require_relative 'shared'
16
16
  # initializer and the definitions of concrete realisations like MaxValSegmentTree.
17
17
  #
18
18
  # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
19
- class DataStructuresRMolinari::SegmentTreeTemplate
19
+ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
20
20
  include Shared
21
21
  include Shared::BinaryTreeArithmetic
22
22
 
@@ -14,77 +14,12 @@ require_relative 'data_structures_rmolinari/algorithms'
14
14
  require_relative 'data_structures_rmolinari/disjoint_union'
15
15
  require_relative 'data_structures_rmolinari/c_disjoint_union' # version as a C extension
16
16
 
17
- require_relative 'data_structures_rmolinari/segment_tree_template'
18
- require_relative 'data_structures_rmolinari/c_segment_tree_template_impl'
17
+ require_relative 'data_structures_rmolinari/segment_tree'
19
18
 
20
19
  require_relative 'data_structures_rmolinari/heap'
21
20
  require_relative 'data_structures_rmolinari/max_priority_search_tree'
22
21
  require_relative 'data_structures_rmolinari/min_priority_search_tree'
23
22
 
24
23
  module DataStructuresRMolinari
25
- ########################################
26
- # Concrete instances of Segment Tree
27
- #
28
- # @todo consider moving these into generic_segment_tree.rb and renaming that file
29
-
30
- # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
31
- # in O(log n) time.
32
- class MaxValSegmentTree
33
- extend Forwardable
34
-
35
- # Tell the tree that the value at idx has changed
36
- def_delegator :@structure, :update_at
37
-
38
- # @param data an object that contains values at integer indices based at 0, via +data[i]+.
39
- # - This will usually be an Array, but it could also be a hash or a proc.
40
- def initialize(data)
41
- data.must_be_a Enumerable
42
-
43
- @structure = SegmentTreeTemplate.new(
44
- combine: ->(a, b) { [a, b].max },
45
- single_cell_array_val: ->(i) { data[i] },
46
- size: data.size,
47
- identity: -Shared::INFINITY
48
- )
49
- end
50
-
51
- # The maximum value in A(i..j).
52
- #
53
- # The arguments must be integers in 0...(A.size)
54
- # @return the largest value in A(i..j) or -Infinity if i > j.
55
- def max_on(i, j)
56
- @structure.query_on(i, j)
57
- end
58
- end
59
-
60
- # A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
61
- # subinterval A(i..j)?" in O(log n) time.
62
- class IndexOfMaxValSegmentTree
63
- extend Forwardable
64
-
65
- # Tell the tree that the value at idx has changed
66
- def_delegator :@structure, :update_at
67
-
68
- # @param (see MaxValSegmentTree#initialize)
69
- def initialize(data)
70
- data.must_be_a Enumerable
71
-
72
- @structure = SegmentTreeTemplate.new(
73
- combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
74
- single_cell_array_val: ->(i) { [i, data[i]] },
75
- size: data.size,
76
- identity: nil
77
- )
78
- end
79
-
80
- # The index of the maximum value in A(i..j)
81
- #
82
- # The arguments must be integers in 0...(A.size)
83
- # @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
84
- # - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
85
- # - Return +nil+ if i > j
86
- def index_of_max_val_on(i, j)
87
- @structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
88
- end
89
- end
24
+ # Add things here if needed
90
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-02 00:00:00.000000000 Z
11
+ date: 2023-02-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be
@@ -89,7 +89,9 @@ files:
89
89
  - ext/c_disjoint_union/extconf.rb
90
90
  - ext/c_segment_tree_template/extconf.rb
91
91
  - ext/c_segment_tree_template/segment_tree_template.c
92
+ - ext/cc.h
92
93
  - ext/shared.c
94
+ - ext/shared.h
93
95
  - lib/data_structures_rmolinari.rb
94
96
  - lib/data_structures_rmolinari/algorithms.rb
95
97
  - lib/data_structures_rmolinari/c_segment_tree_template_impl.rb
@@ -97,6 +99,7 @@ files:
97
99
  - lib/data_structures_rmolinari/heap.rb
98
100
  - lib/data_structures_rmolinari/max_priority_search_tree.rb
99
101
  - lib/data_structures_rmolinari/min_priority_search_tree.rb
102
+ - lib/data_structures_rmolinari/segment_tree.rb
100
103
  - lib/data_structures_rmolinari/segment_tree_template.rb
101
104
  - lib/data_structures_rmolinari/shared.rb
102
105
  homepage: https://github.com/rmolinari/data_structures