data_structures_rmolinari 0.4.4 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/shared.h ADDED
@@ -0,0 +1,33 @@
1
+ #ifndef SHARED_H
2
+ #define SHARED_H
3
+
4
+ #include <stddef.h>
5
+ #include "ruby.h"
6
+
7
+ #define mShared rb_define_module("Shared")
8
+ #define eSharedDataError rb_const_get(mShared, rb_intern_const("DataError"))
9
+ #define eSharedInternalLogicError rb_const_get(mShared, rb_intern_const("InternalLogicError"))
10
+ #define mDataStructuresRMolinari rb_define_module("DataStructuresRMolinari")
11
+
12
+ //#define debug(...) printf(__VA_ARGS__)
13
+ #define debug(...)
14
+
15
+ /* What we might think of as vector[index]. It is assignable */
16
+ #define lval(vector, index) (*get(vector, index))
17
+
18
+ /*
19
+ * Binary tree arithmetic for an implicit tree in an array, 1-based.
20
+ *
21
+ * TODO: into shared header
22
+ */
23
+ #define TREE_ROOT 1
24
+ size_t midpoint(size_t left, size_t right);
25
+ size_t left_child(size_t i);
26
+ size_t right_child(size_t i);
27
+
28
+ /*
29
+ * Check that a Ruby value is a non-negative Fixnum and convert it to a C unsigned long
30
+ */
31
+ unsigned long checked_nonneg_fixnum(VALUE val);
32
+
33
+ #endif
@@ -1,4 +1,4 @@
1
- # A collection of algorithms that use the module's data structures but don't belong as a method on one of the data structures
1
+ # Algorithms that use the module's data structures but don't belong as a method on one of the data structures
2
2
  module DataStructuresRMolinari::Algorithms
3
3
  include Shared
4
4
 
@@ -11,12 +11,12 @@ module DataStructuresRMolinari::Algorithms
11
11
  #
12
12
  # A _maximal empty rectangle_ (MER) for P is an empty rectangle for P not properly contained in any other.
13
13
  #
14
- # We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top) to a block. The algorithm is due to
15
- # De, M., Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013),
16
- # pp 310-327.
14
+ # We enumerate all maximal empty rectangles for P, yielding each as (left, right, bottom, top). The algorithm is due to De, M.,
15
+ # Maheshwari, A., Nandy, S. C., Smid, M., _An In-Place Min-max Priority Search Tree_, Computational Geometry, v46 (2013), pp
16
+ # 310-327.
17
17
  #
18
18
  # It runs in O(m log n) time, where m is the number of MERs enumerated and n is the number of points in P. (Contructing the
19
- # MaxPST below takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
19
+ # MaxPST takes O(n log^2 n) time, but m = O(n^2) so we are still O(m log n) overall.)
20
20
  #
21
21
  # @param points [Array] an array of points in the x-y plane. Each must respond to +x+ and +y+.
22
22
  def self.maximal_empty_rectangles(points)
@@ -3,110 +3,13 @@ require 'must_be'
3
3
  require_relative 'shared'
4
4
  require_relative 'c_segment_tree_template'
5
5
 
6
- # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
7
- # max) on a arbitrary subarray of a given array.
6
+ # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
8
7
  #
9
- # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
10
- # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
11
- # called an "interval tree."
12
- #
13
- # For more details (and some close-to-metal analysis of run time, especially for large datasets) see
14
- # https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
15
- # which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
16
- # Ruby.
17
- #
18
- # This is a generic implementation, intended to allow easy configuration for concrete instances. See the parameters to the
19
- # initializer and the definitions of concrete realisations like MaxValSegmentTree.
20
- #
21
- # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
8
+ # See SegmentTreeTemplate for more information.
22
9
  class DataStructuresRMolinari::CSegmentTreeTemplate
23
-
24
- # Construct a concrete instance of a Segment Tree. See details at the links above for the underlying concepts here.
25
- # @param combine a lambda that takes two values and munges them into a combined value.
26
- # - For example, if we are calculating sums over subintervals, combine.call(a, b) = a + b, while if we are doing maxima we will
27
- # return max(a, b).
28
- # - Things get more complicated when we are calculating, say, the _index_ of the maximal value in a subinterval. Now it is not
29
- # enough simply to store that index at each tree node, because to combine the indices from two child nodes we need to know
30
- # both the index of the maximal element in each child node's interval, but also the maximal values themselves, so we know
31
- # which one "wins" for the parent node. This affects the sort of work we need to do when combining and the value provided by
32
- # the +single_cell_array_val+ lambda.
33
- # @param single_cell_array_val a lambda that takes an index i and returns the value we need to store in the #build
34
- # operation for the subinterval i..i.
35
- # - This will often simply be the value data[i], but in some cases it will be something else. For example, when we are
36
- # calculating the index of the maximal value on each subinterval we need [i, data[i]] here.
37
- # - If +update_at+ is called later, this lambda must close over the underlying data in a way that captures the updated value.
38
- # @param size the size of the underlying data array, used in certain internal arithmetic.
39
- # @param identity the value to return when we are querying on an empty interval
40
- # - for sums, this will be zero; for maxima, this will be -Infinity, etc
10
+ # (see SegmentTreeTemplate::initialize)
41
11
  def initialize(combine:, single_cell_array_val:, size:, identity:)
42
12
  # having sorted out the keyword arguments, pass them more easily to the C layer.
43
13
  c_initialize(combine, single_cell_array_val, size, identity)
44
14
  end
45
15
  end
46
-
47
- # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
48
- # in O(log n) time.
49
- #
50
- # C version
51
- #
52
- # TODO: share the definition with (non-C) MasValSegmentTree. The only difference is the class of the underlying segment tree
53
- # template.
54
- module DataStructuresRMolinari
55
- class CMaxValSegmentTree
56
- extend Forwardable
57
-
58
- # Tell the tree that the value at idx has changed
59
- def_delegator :@structure, :update_at
60
-
61
- # @param data an object that contains values at integer indices based at 0, via +data[i]+.
62
- # - This will usually be an Array, but it could also be a hash or a proc.
63
- def initialize(data)
64
- @structure = CSegmentTreeTemplate.new(
65
- combine: ->(a, b) { [a, b].max },
66
- single_cell_array_val: ->(i) { data[i] },
67
- size: data.size,
68
- identity: -Shared::INFINITY
69
- )
70
- end
71
-
72
- # The maximum value in A(i..j).
73
- #
74
- # The arguments must be integers in 0...(A.size)
75
- # @return the largest value in A(i..j) or -Infinity if i > j.
76
- def max_on(i, j)
77
- @structure.query_on(i, j)
78
- end
79
- end
80
-
81
- # A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
82
- # subinterval A(i..j)?" in O(log n) time.
83
- #
84
- # C version
85
- class CIndexOfMaxValSegmentTree
86
- extend Forwardable
87
-
88
- # Tell the tree that the value at idx has changed
89
- def_delegator :@structure, :update_at
90
-
91
- # @param (see MaxValSegmentTree#initialize)
92
- def initialize(data)
93
- @structure = CSegmentTreeTemplate.new(
94
- combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
95
- single_cell_array_val: ->(i) { [i, data[i]] },
96
- size: data.size,
97
- identity: nil
98
- )
99
- end
100
-
101
- # The index of the maximum value in A(i..j)
102
- #
103
- # The arguments must be integers in 0...(A.size)
104
- # @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
105
- # - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
106
- # - Return +nil+ if i > j
107
- def index_of_max_val_on(i, j)
108
- @structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
109
- end
110
- end
111
-
112
- end
@@ -89,5 +89,7 @@ class DataStructuresRMolinari::DisjointUnion
89
89
  else
90
90
  @d[e] = f
91
91
  end
92
+
93
+ nil
92
94
  end
93
95
  end
@@ -0,0 +1,126 @@
1
+ require_relative 'shared'
2
+
3
+ # A namespace to hold the various bits and bobs related to the SegmentTree implementation
4
+ module DataStructuresRMolinari::SegmentTree
5
+ end
6
+
7
+ require_relative 'segment_tree_template' # Ruby implementation of the generic API
8
+ require_relative 'c_segment_tree_template' # C implementation of the generic API
9
+
10
+ # Segment Tree: various concrete implementations
11
+ #
12
+ # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
13
+ # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
14
+ # called an "interval tree."
15
+ #
16
+ # For more details (and some close-to-metal analysis of run time, especially for large datasets) see
17
+ # https://en.algorithmica.org/hpc/data-structures/segment-trees/. In particular, this shows how to do a bottom-up implementation,
18
+ # which is faster, at least for large datasets and cache-relevant compiled code. These issues don't really apply to code written in
19
+ # Ruby.
20
+ #
21
+ # Here we provide several concrete segment tree implementations built on top of the template (generic) versions. Each instance is
22
+ # backed either by the pure Ruby SegmentTreeTemplate or its C-based sibling CSegmentTreeTemplate
23
+ module DataStructuresRMolinari
24
+ module SegmentTree
25
+ # A convenience method to construct a Segment Tree that, for a given array A(0...size), answers questions of the kind given by
26
+ # operation, using the template written in lang
27
+ #
28
+ # - @param data: the array A.
29
+ # - It must respond to +#size+ and to +#[]+ with non-negative integer arguments.
30
+ # - @param operation: a supported "style" of Segment Tree
31
+ # - for now, must be one of these (but you can write your own concrete version)
32
+ # - +:max+: implementing +max_on(i, j)+, returning the maximum value in A(i..j)
33
+ # - +:index_of_max+: implementing +index_of_max_val_on(i, j)+, returning an index corresponding to the maximum value in
34
+ # A(i..j).
35
+ # - @param lang: the language in which the underlying "template" is written
36
+ # - +:c+ or +:ruby+
37
+ # - the C version will run faster but for now may be buggier and harder to debug
38
+ module_function def construct(data, operation, lang)
39
+ operation.must_be_in [:max, :index_of_max]
40
+ lang.must_be_in [:ruby, :c]
41
+
42
+ klass = operation == :max ? MaxValSegmentTree : IndexOfMaxValSegmentTree
43
+ template = lang == :ruby ? SegmentTreeTemplate : CSegmentTreeTemplate
44
+
45
+ klass.new(template, data)
46
+ end
47
+
48
+ # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
49
+ # in O(log n) time.
50
+ class MaxValSegmentTree
51
+ extend Forwardable
52
+
53
+ # Tell the tree that the value at idx has changed
54
+ def_delegator :@structure, :update_at
55
+
56
+ # @param template_klass the "template" class that provides the generic implementation of the Segment Tree functionality.
57
+ # @param data an object that contains values at integer indices based at 0, via +data[i]+.
58
+ # - This will usually be an Array, but it could also be a hash or a proc.
59
+ def initialize(template_klass, data)
60
+ data.must_be_a Enumerable
61
+
62
+ @structure = template_klass.new(
63
+ combine: ->(a, b) { [a, b].max },
64
+ single_cell_array_val: ->(i) { data[i] },
65
+ size: data.size,
66
+ identity: -Shared::INFINITY
67
+ )
68
+ end
69
+
70
+ # The maximum value in A(i..j).
71
+ #
72
+ # The arguments must be integers in 0...(A.size)
73
+ # @return the largest value in A(i..j) or -Infinity if i > j.
74
+ def max_on(i, j)
75
+ @structure.query_on(i, j)
76
+ end
77
+ end
78
+
79
+ # A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
80
+ # subinterval A(i..j)?" in O(log n) time.
81
+ class IndexOfMaxValSegmentTree
82
+ extend Forwardable
83
+
84
+ # Tell the tree that the value at idx has changed
85
+ def_delegator :@structure, :update_at
86
+
87
+ # @param (see MaxValSegmentTree#initialize)
88
+ def initialize(template_klass, data)
89
+ data.must_be_a Enumerable
90
+
91
+ @structure = template_klass.new(
92
+ combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
93
+ single_cell_array_val: ->(i) { [i, data[i]] },
94
+ size: data.size,
95
+ identity: nil
96
+ )
97
+ end
98
+
99
+ # The index of the maximum value in A(i..j)
100
+ #
101
+ # The arguments must be integers in 0...(A.size)
102
+ # @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
103
+ # - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
104
+ # - Return +nil+ if i > j
105
+ def index_of_max_val_on(i, j)
106
+ @structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
107
+ end
108
+ end
109
+
110
+ # The underlying functionality of the Segment Tree data type, implemented in C as a Ruby extension.
111
+ #
112
+ # See SegmentTreeTemplate for more information.
113
+ #
114
+ # Implementation note
115
+ #
116
+ # The functionality is entirely written in C. But we write the constructor in Ruby because keyword arguments are difficult to
117
+ # parse on the C side.
118
+ class CSegmentTreeTemplate
119
+ # (see SegmentTreeTemplate::initialize)
120
+ def initialize(combine:, single_cell_array_val:, size:, identity:)
121
+ # having sorted out the keyword arguments, pass them more easily to the C layer.
122
+ c_initialize(combine, single_cell_array_val, size, identity)
123
+ end
124
+ end
125
+ end
126
+ end
@@ -1,7 +1,7 @@
1
1
  require_relative 'shared'
2
2
 
3
- # The template of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the sum (or min or
4
- # max) on a arbitrary subarray of a given array.
3
+ # A generic implementation of Segment Tree, which can be used for various interval-related purposes, like efficiently finding the
4
+ # sum (or min or max) on a arbitrary subarray of a given array.
5
5
  #
6
6
  # There is an excellent description of the data structure at https://cp-algorithms.com/data_structures/segment_tree.html. The
7
7
  # Wikipedia article (https://en.wikipedia.org/wiki/Segment_tree) appears to describe a different data structure which is sometimes
@@ -16,7 +16,7 @@ require_relative 'shared'
16
16
  # initializer and the definitions of concrete realisations like MaxValSegmentTree.
17
17
  #
18
18
  # We do O(n) work to build the internal data structure at initialization. Then we answer queries in O(log n) time.
19
- class DataStructuresRMolinari::SegmentTreeTemplate
19
+ class DataStructuresRMolinari::SegmentTree::SegmentTreeTemplate
20
20
  include Shared
21
21
  include Shared::BinaryTreeArithmetic
22
22
 
@@ -14,77 +14,12 @@ require_relative 'data_structures_rmolinari/algorithms'
14
14
  require_relative 'data_structures_rmolinari/disjoint_union'
15
15
  require_relative 'data_structures_rmolinari/c_disjoint_union' # version as a C extension
16
16
 
17
- require_relative 'data_structures_rmolinari/segment_tree_template'
18
- require_relative 'data_structures_rmolinari/c_segment_tree_template_impl'
17
+ require_relative 'data_structures_rmolinari/segment_tree'
19
18
 
20
19
  require_relative 'data_structures_rmolinari/heap'
21
20
  require_relative 'data_structures_rmolinari/max_priority_search_tree'
22
21
  require_relative 'data_structures_rmolinari/min_priority_search_tree'
23
22
 
24
23
  module DataStructuresRMolinari
25
- ########################################
26
- # Concrete instances of Segment Tree
27
- #
28
- # @todo consider moving these into generic_segment_tree.rb and renaming that file
29
-
30
- # A segment tree that for an array A(0...n) answers questions of the form "what is the maximum value in the subinterval A(i..j)?"
31
- # in O(log n) time.
32
- class MaxValSegmentTree
33
- extend Forwardable
34
-
35
- # Tell the tree that the value at idx has changed
36
- def_delegator :@structure, :update_at
37
-
38
- # @param data an object that contains values at integer indices based at 0, via +data[i]+.
39
- # - This will usually be an Array, but it could also be a hash or a proc.
40
- def initialize(data)
41
- data.must_be_a Enumerable
42
-
43
- @structure = SegmentTreeTemplate.new(
44
- combine: ->(a, b) { [a, b].max },
45
- single_cell_array_val: ->(i) { data[i] },
46
- size: data.size,
47
- identity: -Shared::INFINITY
48
- )
49
- end
50
-
51
- # The maximum value in A(i..j).
52
- #
53
- # The arguments must be integers in 0...(A.size)
54
- # @return the largest value in A(i..j) or -Infinity if i > j.
55
- def max_on(i, j)
56
- @structure.query_on(i, j)
57
- end
58
- end
59
-
60
- # A segment tree that for an array A(0...n) answers questions of the form "what is the index of the maximal value in the
61
- # subinterval A(i..j)?" in O(log n) time.
62
- class IndexOfMaxValSegmentTree
63
- extend Forwardable
64
-
65
- # Tell the tree that the value at idx has changed
66
- def_delegator :@structure, :update_at
67
-
68
- # @param (see MaxValSegmentTree#initialize)
69
- def initialize(data)
70
- data.must_be_a Enumerable
71
-
72
- @structure = SegmentTreeTemplate.new(
73
- combine: ->(p1, p2) { p1[1] >= p2[1] ? p1 : p2 },
74
- single_cell_array_val: ->(i) { [i, data[i]] },
75
- size: data.size,
76
- identity: nil
77
- )
78
- end
79
-
80
- # The index of the maximum value in A(i..j)
81
- #
82
- # The arguments must be integers in 0...(A.size)
83
- # @return (Integer, nil) the index of the largest value in A(i..j) or +nil+ if i > j.
84
- # - If there is more than one entry with that value, return one the indices. There is no guarantee as to which one.
85
- # - Return +nil+ if i > j
86
- def index_of_max_val_on(i, j)
87
- @structure.query_on(i, j)&.first # discard the value part of the pair, which is a bookkeeping
88
- end
89
- end
24
+ # Add things here if needed
90
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_structures_rmolinari
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rory Molinari
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-02 00:00:00.000000000 Z
11
+ date: 2023-02-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: must_be
@@ -89,7 +89,9 @@ files:
89
89
  - ext/c_disjoint_union/extconf.rb
90
90
  - ext/c_segment_tree_template/extconf.rb
91
91
  - ext/c_segment_tree_template/segment_tree_template.c
92
+ - ext/cc.h
92
93
  - ext/shared.c
94
+ - ext/shared.h
93
95
  - lib/data_structures_rmolinari.rb
94
96
  - lib/data_structures_rmolinari/algorithms.rb
95
97
  - lib/data_structures_rmolinari/c_segment_tree_template_impl.rb
@@ -97,6 +99,7 @@ files:
97
99
  - lib/data_structures_rmolinari/heap.rb
98
100
  - lib/data_structures_rmolinari/max_priority_search_tree.rb
99
101
  - lib/data_structures_rmolinari/min_priority_search_tree.rb
102
+ - lib/data_structures_rmolinari/segment_tree.rb
100
103
  - lib/data_structures_rmolinari/segment_tree_template.rb
101
104
  - lib/data_structures_rmolinari/shared.rb
102
105
  homepage: https://github.com/rmolinari/data_structures