sequitur 0.1.18 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'digram'
2
4
  require_relative 'symbol_sequence'
3
5
  require_relative 'production_ref'
@@ -23,7 +25,7 @@ class Production
23
25
 
24
26
  # Constructor.
25
27
  # Build a production with an empty RHS.
26
- def initialize()
28
+ def initialize
27
29
  @rhs = SymbolSequence.new
28
30
  @refcount = 0
29
31
  @digrams = []
@@ -44,7 +46,6 @@ class Production
44
46
  return result
45
47
  end
46
48
 
47
-
48
49
  # Is the rhs empty?
49
50
  # @ return true if the rhs has no members.
50
51
  def empty?
@@ -52,20 +53,20 @@ class Production
52
53
  end
53
54
 
54
55
  # Increment the reference count by one.
55
- def incr_refcount()
56
+ def incr_refcount
56
57
  @refcount += 1
57
58
  end
58
59
 
59
60
  # Decrement the reference count by one.
60
- def decr_refcount()
61
+ def decr_refcount
61
62
  raise StandardError, 'Internal error' if @refcount.zero?
63
+
62
64
  @refcount -= 1
63
65
  end
64
66
 
65
-
66
67
  # Select the references to production appearing in the rhs.
67
68
  # @return [Array of ProductionRef]
68
- def references()
69
+ def references
69
70
  return rhs.references
70
71
  end
71
72
 
@@ -77,10 +78,9 @@ class Production
77
78
  return rhs.references_of(real_prod)
78
79
  end
79
80
 
80
-
81
81
  # Enumerate the digrams appearing in the right-hand side (rhs)
82
82
  # @return [Array] the list of digrams found in rhs of this production.
83
- def recalc_digrams()
83
+ def recalc_digrams
84
84
  return [] if rhs.size < 2
85
85
 
86
86
  result = []
@@ -88,20 +88,17 @@ class Production
88
88
  @digrams = result
89
89
  end
90
90
 
91
-
92
-
93
91
  # Does the rhs have exactly one digram only (= 2 symbols)?
94
92
  # @return [true/false] true when the rhs contains exactly two symbols.
95
93
  def single_digram?
96
94
  return rhs.size == 2
97
95
  end
98
96
 
99
-
100
97
  # Detect whether the last digram occurs twice
101
98
  # Assumption: when a digram occurs twice in a production then it must occur
102
99
  # at the end of the rhs
103
100
  # @return [true/false] true when the digram occurs twice in rhs.
104
- def repeated_digram?()
101
+ def repeated_digram?
105
102
  return false if rhs.size < 3
106
103
 
107
104
  my_digrams = digrams
@@ -113,17 +110,16 @@ class Production
113
110
 
114
111
  # Retrieve the last digram appearing in the RHS (if any).
115
112
  # @return [Digram] last digram in the rhs otherwise nil.
116
- def last_digram()
113
+ def last_digram
117
114
  result = digrams.empty? ? nil : digrams.last
118
115
  return result
119
116
  end
120
117
 
121
-
122
118
  # Emit a text representation of the production rule.
123
119
  # Text is of the form:
124
120
  # object id of production : rhs as space-separated sequence of symbols.
125
121
  # @return [String]
126
- def to_string()
122
+ def to_string
127
123
  return "#{object_id} : #{rhs.to_string}."
128
124
  end
129
125
 
@@ -150,7 +146,7 @@ class Production
150
146
 
151
147
  # Clear the right-hand side.
152
148
  # Any referenced production has its reference counter decremented.
153
- def clear_rhs()
149
+ def clear_rhs
154
150
  rhs.clear
155
151
  end
156
152
 
@@ -168,9 +164,10 @@ class Production
168
164
  # p.positions_of(a, a) # => [0, 3]
169
165
  def positions_of(symb1, symb2)
170
166
  # Find the positions where the digram occur in rhs
171
- indices = [ -2 ] # Dummy index!
167
+ indices = [-2] # Dummy index!
172
168
  (0...rhs.size).each do |i|
173
169
  next if i == indices.last + 1
170
+
174
171
  indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
175
172
  end
176
173
 
@@ -179,7 +176,6 @@ class Production
179
176
  return indices
180
177
  end
181
178
 
182
-
183
179
  # Given that the production P passed as argument has exactly 2 symbols
184
180
  # in its rhs s1 s2, substitute in the rhs of self all occurrences of
185
181
  # s1 s2 by a reference to P.
@@ -217,7 +213,6 @@ class Production
217
213
  recalc_digrams
218
214
  end
219
215
 
220
-
221
216
  # Part of the 'visitee' role in Visitor design pattern.
222
217
  # @param aVisitor[GrammarVisitor]
223
218
  def accept(aVisitor)
@@ -1,115 +1,116 @@
1
-
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Sequitur # Module for classes implementing the Sequitur algorithm
4
- # A production reference is a grammar symbol that may appear in the right-hand
5
- # side of a production P1 and that refers to a production P2.
6
- # Every time a production P2 appears in the left-hand side of
7
- # production P1, this is implemented by inserting a production reference to P2
8
- # in the appropriate position in the RHS of P1.
9
- # In the literature, production references are also called non terminal
10
- # symbols
11
- # @example
12
- # # Given a production rule...
13
- # prod = Sequitur::Production.new
14
- # puts prod.refcount # outputs 0
15
- # # ... Build a reference to it
16
- # ref = Sequitur::ProductionRef.new(prod)
17
- # # ... Production reference count is updated...
18
- # puts prod.refcount # outputs 1
19
- class ProductionRef
20
- # Link to the production to reference.
21
- attr_reader(:production)
22
-
23
- # Constructor
24
- # @param target [Production or ProductionRef]
25
- # The production that is being referenced.
26
- def initialize(target)
27
- bind_to(target)
28
- end
29
-
30
- # Copy constructor invoked by dup or clone methods.
31
- # @param orig [ProductionRef]
4
+ # A production reference is a grammar symbol that may appear in the right-hand
5
+ # side of a production P1 and that refers to a production P2.
6
+ # Every time a production P2 appears in the left-hand side of
7
+ # production P1, this is implemented by inserting a production reference to P2
8
+ # in the appropriate position in the RHS of P1.
9
+ # In the literature, production references are also called non terminal
10
+ # symbols
32
11
  # @example
12
+ # # Given a production rule...
33
13
  # prod = Sequitur::Production.new
14
+ # puts prod.refcount # outputs 0
15
+ # # ... Build a reference to it
34
16
  # ref = Sequitur::ProductionRef.new(prod)
35
- # copy_ref = ref.dup
36
- # puts prod.refcount # outputs 2
37
- def initialize_copy(orig)
38
- @production = nil
39
- bind_to(orig.production)
40
- end
41
-
42
- # Emit the text representation of a production reference.
43
- # @return [String]
44
- def to_s()
45
- return production.object_id.to_s
46
- end
47
-
48
- alias to_string to_s
49
-
50
-
51
- # Equality testing.
52
- # A production ref is equal to another one when its
53
- # refers to the same production or when it is compared to
54
- # the production it refers to.
55
- # @param other [ProductionRef]
56
- # @return [true / false]
57
- def ==(other)
58
- return true if object_id == other.object_id
59
-
60
- result = if other.is_a?(ProductionRef)
61
- (production == other.production)
62
- else
63
- (production == other)
64
- end
65
-
66
- return result
67
- end
68
-
69
- # Produce a hash value.
70
- # A reference has no identity on its own,
71
- # the method returns the hash value of the
72
- # referenced production
73
- # @return [Fixnum] the hash value
74
- def hash()
75
- raise StandardError, 'Nil production' if production.nil?
76
- return production.hash
77
- end
78
-
79
- # Make this reference point to the given production.
80
- # @param aProduction [Production or ProductionRef] the production
81
- # to refer to
82
- def bind_to(aProduction)
83
- return if aProduction == @production
84
-
85
- production.decr_refcount if production
86
- unless aProduction.kind_of?(Production)
87
- raise StandardError, "Illegal production type #{aProduction.class}"
17
+ # # ... Production reference count is updated...
18
+ # puts prod.refcount # outputs 1
19
+ class ProductionRef
20
+ # Link to the production to reference.
21
+ attr_reader(:production)
22
+
23
+ # Constructor
24
+ # @param target [Production or ProductionRef]
25
+ # The production that is being referenced.
26
+ def initialize(target)
27
+ bind_to(target)
28
+ end
29
+
30
+ # Copy constructor invoked by dup or clone methods.
31
+ # @param orig [ProductionRef]
32
+ # @example
33
+ # prod = Sequitur::Production.new
34
+ # ref = Sequitur::ProductionRef.new(prod)
35
+ # copy_ref = ref.dup
36
+ # puts prod.refcount # outputs 2
37
+ def initialize_copy(orig)
38
+ @production = nil
39
+ bind_to(orig.production)
40
+ end
41
+
42
+ # Emit the text representation of a production reference.
43
+ # @return [String]
44
+ def to_s
45
+ return production.object_id.to_s
46
+ end
47
+
48
+ alias to_string to_s
49
+
50
+
51
+ # Equality testing.
52
+ # A production ref is equal to another one when its
53
+ # refers to the same production or when it is compared to
54
+ # the production it refers to.
55
+ # @param other [ProductionRef]
56
+ # @return [true / false]
57
+ def ==(other)
58
+ return true if object_id == other.object_id
59
+
60
+ result = if other.is_a?(ProductionRef)
61
+ (production == other.production)
62
+ else
63
+ (production == other)
64
+ end
65
+
66
+ return result
67
+ end
68
+
69
+ # Produce a hash value.
70
+ # A reference has no identity on its own,
71
+ # the method returns the hash value of the
72
+ # referenced production
73
+ # @return [Fixnum] the hash value
74
+ def hash
75
+ raise StandardError, 'Nil production' if production.nil?
76
+
77
+ return production.hash
78
+ end
79
+
80
+ # Make this reference point to the given production.
81
+ # @param aProduction [Production or ProductionRef] the production
82
+ # to refer to
83
+ def bind_to(aProduction)
84
+ return if aProduction == @production
85
+
86
+ production&.decr_refcount
87
+ unless aProduction.kind_of?(Production)
88
+ raise StandardError, "Illegal production type #{aProduction.class}"
89
+ end
90
+
91
+ @production = aProduction
92
+ production.incr_refcount
93
+ end
94
+
95
+ # Clear the reference to the target production.
96
+ def unbind
97
+ production.decr_refcount
98
+ @production = nil
99
+ end
100
+
101
+ # Check that the this object doesn't refer to any production.
102
+ # @return [true / false] true when this object doesn't
103
+ # point to a production.
104
+ def unbound?
105
+ return production.nil?
106
+ end
107
+
108
+ # Part of the 'visitee' role in the Visitor design pattern.
109
+ # @param aVisitor [GrammarVisitor] the visitor
110
+ def accept(aVisitor)
111
+ aVisitor.visit_prod_ref(self)
88
112
  end
89
- @production = aProduction
90
- production.incr_refcount
91
- end
92
-
93
-
94
- # Clear the reference to the target production.
95
- def unbind()
96
- production.decr_refcount
97
- @production = nil
98
- end
99
-
100
- # Check that the this object doesn't refer to any production.
101
- # @return [true / false] true when this object doesn't
102
- # point to a production.
103
- def unbound?()
104
- return production.nil?
105
- end
106
-
107
- # Part of the 'visitee' role in the Visitor design pattern.
108
- # @param aVisitor [GrammarVisitor] the visitor
109
- def accept(aVisitor)
110
- aVisitor.visit_prod_ref(self)
111
- end
112
- end # class
113
+ end # class
113
114
  end # module
114
115
 
115
116
  # End of file
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'dynamic_grammar'
2
4
 
3
5
 
@@ -27,8 +29,8 @@ class SequiturGrammar < DynamicGrammar
27
29
  CollisionDiagnosis = Struct.new(
28
30
  :collision_found, # true if collision detected
29
31
  :digram, # The digram involved in a collision
30
- :productions # The productions where the digram occurs
31
- )
32
+ :productions) # The productions where the digram occurs
33
+
32
34
 
33
35
 
34
36
  # Assuming that a new input token was added to the start production,
@@ -43,7 +45,7 @@ class SequiturGrammar < DynamicGrammar
43
45
  # remove P from grammar
44
46
  # end
45
47
  # end until digram unicity and rule utility are met
46
- def enforce_rules()
48
+ def enforce_rules
47
49
  loop do
48
50
  unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
49
51
  restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
@@ -61,7 +63,7 @@ class SequiturGrammar < DynamicGrammar
61
63
  # Return an empty Hash if each digram appears once.
62
64
  # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
63
65
  # Where Pi, Pk are two productions where the digram occurs.
64
- def detect_collision()
66
+ def detect_collision
65
67
  diagnosis = CollisionDiagnosis.new(false)
66
68
  found_so_far = {}
67
69
  productions.each do |a_prod|
@@ -109,9 +111,9 @@ class SequiturGrammar < DynamicGrammar
109
111
  end
110
112
 
111
113
  # Return a production that is used less than twice in the grammar.
112
- def detect_useless_production()
114
+ def detect_useless_production
113
115
  useless = productions.index { |prod| prod.refcount < 2 }
114
- useless = nil if useless && useless.zero?
116
+ useless = nil if useless&.zero?
115
117
 
116
118
  return useless
117
119
  end
@@ -131,6 +133,7 @@ class SequiturGrammar < DynamicGrammar
131
133
 
132
134
  refs = a_prod.references_of(useless_prod)
133
135
  next if refs.empty?
136
+
134
137
  referencing = a_prod
135
138
  break
136
139
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Sequitur # Module for classes implementing the Sequitur algorithm
2
4
  # Represents a sequence (concatenation) of grammar symbols
3
5
  # as they appear in rhs of productions
@@ -6,7 +8,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
6
8
  attr_reader(:symbols)
7
9
 
8
10
  # Create an empty sequence
9
- def initialize()
11
+ def initialize
10
12
  @symbols = []
11
13
  end
12
14
 
@@ -21,7 +23,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
21
23
  end
22
24
 
23
25
  # Clear the symbol sequence.
24
- def clear()
26
+ def clear
25
27
  refs = references
26
28
  refs.each(&:unbind)
27
29
  @symbols = []
@@ -30,13 +32,13 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
30
32
 
31
33
  # Tell whether the sequence is empty.
32
34
  # @return [true / false] true only if the sequence has no symbol in it.
33
- def empty?()
35
+ def empty?
34
36
  return symbols.empty?
35
37
  end
36
38
 
37
39
  # Count the number of elements in the sequence.
38
40
  # @return [Fixnum] the number of elements
39
- def size()
41
+ def size
40
42
  return symbols.size
41
43
  end
42
44
 
@@ -76,29 +78,27 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
76
78
  return same
77
79
  end
78
80
 
79
-
80
81
  # Select the references to production appearing in the rhs.
81
82
  # @return [Array of ProductionRef]
82
- def references()
83
+ def references
83
84
  @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
84
85
  return @memo_references
85
86
  end
86
87
 
87
-
88
88
  # Select the references of the given production appearing in the rhs.
89
89
  # @param aProduction [Production]
90
90
  # @return [Array of ProductionRef]
91
91
  def references_of(aProduction)
92
92
  return [] if references.empty?
93
+
93
94
  result = references.select { |a_ref| a_ref == aProduction }
94
95
  return result
95
96
  end
96
97
 
97
-
98
98
  # Emit a text representation of the symbol sequence.
99
99
  # Text is of the form: space-separated sequence of symbols.
100
100
  # @return [String]
101
- def to_string()
101
+ def to_string
102
102
  rhs_text = symbols.map do |elem|
103
103
  case elem
104
104
  when String then "'#{elem}'"
@@ -150,7 +150,6 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
150
150
  symbols.delete_at(position)
151
151
  end
152
152
 
153
-
154
153
  # Part of the 'visitee' role in Visitor design pattern.
155
154
  # @param aVisitor[GrammarVisitor]
156
155
  def accept(aVisitor)
@@ -170,7 +169,7 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
170
169
 
171
170
  private
172
171
 
173
- def invalidate_refs()
172
+ def invalidate_refs
174
173
  @memo_references = nil
175
174
  @lookup_references = nil
176
175
  end