sequitur 0.1.23 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,155 +2,153 @@
2
2
 
3
3
  require_relative 'dynamic_grammar'
4
4
 
5
-
6
5
  module Sequitur # Module for classes implementing the Sequitur algorithm
7
- # Specialization of the DynamicGrammar class.
8
- # A Sequitur grammar is a context-free grammar that is entirely built
9
- # from a sequence of input tokens through the Sequitur algorithm.
10
- class SequiturGrammar < DynamicGrammar
11
- # Build the grammar from an enumerator of tokens.
12
- # @param anEnum [Enumerator] an enumerator that will iterate
13
- # over the input tokens.
14
- def initialize(anEnum)
15
- super()
16
- # Make start production compliant with utility rule
17
- 2.times { start.incr_refcount }
18
-
19
- # Read the input sequence and apply the Sequitur algorithm
20
- anEnum.each do |a_token|
21
- add_token(a_token)
22
- enforce_rules
6
+ # Specialization of the DynamicGrammar class.
7
+ # A Sequitur grammar is a context-free grammar that is entirely built
8
+ # from a sequence of input tokens through the Sequitur algorithm.
9
+ class SequiturGrammar < DynamicGrammar
10
+ # Build the grammar from an enumerator of tokens.
11
+ # @param anEnum [Enumerator] an enumerator that will iterate
12
+ # over the input tokens.
13
+ def initialize(anEnum)
14
+ super()
15
+ # Make start production compliant with utility rule
16
+ 2.times { start.incr_refcount }
17
+
18
+ # Read the input sequence and apply the Sequitur algorithm
19
+ anEnum.each do |a_token|
20
+ add_token(a_token)
21
+ enforce_rules
22
+ end
23
23
  end
24
- end
25
-
26
- private
27
-
28
- # Struct used for internal purposes
29
- CollisionDiagnosis = Struct.new(
30
- :collision_found, # true if collision detected
31
- :digram, # The digram involved in a collision
32
- :productions) # The productions where the digram occurs
33
-
34
-
35
-
36
- # Assuming that a new input token was added to the start production,
37
- # enforce the digram unicity and rule utility rules
38
- # begin
39
- # if a digram D occurs twice in the grammar then
40
- # add a production P : D (if not already there)
41
- # replace both Ds with R (reduction step).
42
- # end
43
- # if a production P : RHS in referenced only once then
44
- # replace P by its RHS (derivation step)
45
- # remove P from grammar
46
- # end
47
- # end until digram unicity and rule utility are met
48
- def enforce_rules
49
- loop do
50
- unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
- restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
-
53
- prod_index = detect_useless_production
54
- restore_utility(prod_index) unless prod_index.nil?
55
-
56
- unicity_diagnosis = detect_collision
57
- prod_index = detect_useless_production
58
- break unless unicity_diagnosis.collision_found || !prod_index.nil?
24
+
25
+ private
26
+
27
+ # Struct used for internal purposes
28
+ CollisionDiagnosis = Struct.new(
29
+ :collision_found, # true if collision detected
30
+ :digram, # The digram involved in a collision
31
+ :productions # The productions where the digram occurs
32
+ )
33
+
34
+ # Assuming that a new input token was added to the start production,
35
+ # enforce the digram unicity and rule utility rules
36
+ # begin
37
+ # if a digram D occurs twice in the grammar then
38
+ # add a production P : D (if not already there)
39
+ # replace both Ds with R (reduction step).
40
+ # end
41
+ # if a production P : RHS in referenced only once then
42
+ # replace P by its RHS (derivation step)
43
+ # remove P from grammar
44
+ # end
45
+ # end until digram unicity and rule utility are met
46
+ def enforce_rules
47
+ loop do
48
+ unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
49
+ restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
50
+
51
+ prod_index = detect_useless_production
52
+ restore_utility(prod_index) unless prod_index.nil?
53
+
54
+ unicity_diagnosis = detect_collision
55
+ prod_index = detect_useless_production
56
+ break unless unicity_diagnosis.collision_found || !prod_index.nil?
57
+ end
59
58
  end
60
- end
61
-
62
- # Check whether a digram is used twice in the grammar.
63
- # Return an empty Hash if each digram appears once.
64
- # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
65
- # Where Pi, Pk are two productions where the digram occurs.
66
- def detect_collision
67
- diagnosis = CollisionDiagnosis.new(false)
68
- found_so_far = {}
69
- productions.each do |a_prod|
70
- prod_digrams = a_prod.digrams
71
- prod_digrams.each do |a_digr|
72
- its_key = a_digr.key
73
- if found_so_far.include? its_key
74
- orig_digr = found_so_far[its_key]
75
- # Disregard sequence like a a a
76
- if (orig_digr.production == a_prod) && a_digr.repeating? &&
77
- (orig_digr == a_digr)
78
- next
79
- end
80
59
 
81
- diagnosis.digram = orig_digr
82
- diagnosis.productions = [orig_digr.production, a_prod]
83
- diagnosis.collision_found = true
84
- break
85
- else
86
- found_so_far[its_key] = a_digr
60
+ # Check whether a digram is used twice in the grammar.
61
+ # Return an empty Hash if each digram appears once.
62
+ # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
63
+ # Where Pi, Pk are two productions where the digram occurs.
64
+ def detect_collision
65
+ diagnosis = CollisionDiagnosis.new(false)
66
+ found_so_far = {}
67
+ productions.each do |a_prod|
68
+ prod_digrams = a_prod.digrams
69
+ prod_digrams.each do |a_digr|
70
+ its_key = a_digr.key
71
+ if found_so_far.include? its_key
72
+ orig_digr = found_so_far[its_key]
73
+ # Disregard sequence like a a a
74
+ if (orig_digr.production == a_prod) && a_digr.repeating? &&
75
+ (orig_digr == a_digr)
76
+ next
77
+ end
78
+
79
+ diagnosis.digram = orig_digr
80
+ diagnosis.productions = [orig_digr.production, a_prod]
81
+ diagnosis.collision_found = true
82
+ break
83
+ else
84
+ found_so_far[its_key] = a_digr
85
+ end
87
86
  end
87
+ break if diagnosis.collision_found
88
88
  end
89
- break if diagnosis.collision_found
89
+
90
+ diagnosis
90
91
  end
91
92
 
92
- return diagnosis
93
- end
94
-
95
- # When a collision diagnosis indicates that a given
96
- # digram d occurs twice in the grammar
97
- # Then create a new production that will have
98
- # the symbols of d as its rhs members.
99
- def restore_unicity(aDiagnosis)
100
- prods = aDiagnosis.productions
101
- if prods.any?(&:single_digram?)
102
- (simple, compound) = prods.partition(&:single_digram?)
103
- compound[0].reduce_step(simple[0])
104
- else
105
- # Create a new production with the digram's symbols as its
106
- # sole rhs members.
107
- new_prod = build_production_for(aDiagnosis.digram)
108
- prods[0].reduce_step(new_prod)
109
- prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
93
+ # When a collision diagnosis indicates that a given
94
+ # digram d occurs twice in the grammar
95
+ # Then create a new production that will have
96
+ # the symbols of d as its rhs members.
97
+ def restore_unicity(aDiagnosis)
98
+ prods = aDiagnosis.productions
99
+ if prods.any?(&:single_digram?)
100
+ (simple, compound) = prods.partition(&:single_digram?)
101
+ compound[0].reduce_step(simple[0])
102
+ else
103
+ # Create a new production with the digram's symbols as its
104
+ # sole rhs members.
105
+ new_prod = build_production_for(aDiagnosis.digram)
106
+ prods[0].reduce_step(new_prod)
107
+ prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
108
+ end
110
109
  end
111
- end
112
-
113
- # Return a production that is used less than twice in the grammar.
114
- def detect_useless_production
115
- useless = productions.index { |prod| prod.refcount < 2 }
116
- useless = nil if useless&.zero?
117
-
118
- return useless
119
- end
120
-
121
- # Given the passed production P is referenced only once.
122
- # Then replace P by its RHS where it is referenced.
123
- # And delete P
124
- def restore_utility(prod_index)
125
- # Retrieve useless prod from its index
126
- useless_prod = productions[prod_index]
127
-
128
- # Retrieve production referencing useless one
129
- referencing = nil
130
- productions.reverse_each do |a_prod|
131
- # Next line assumes non-recursive productions
132
- next if a_prod == useless_prod
133
-
134
- refs = a_prod.references_of(useless_prod)
135
- next if refs.empty?
136
-
137
- referencing = a_prod
138
- break
110
+
111
+ # Return a production that is used less than twice in the grammar.
112
+ def detect_useless_production
113
+ useless = productions.index { |prod| prod.refcount < 2 }
114
+ useless = nil if useless&.zero?
115
+
116
+ useless
139
117
  end
140
118
 
141
- referencing.derive_step(useless_prod)
142
- remove_production(prod_index)
143
- end
119
+ # Given the passed production P is referenced only once.
120
+ # Then replace P by its RHS where it is referenced.
121
+ # And delete P
122
+ def restore_utility(prod_index)
123
+ # Retrieve useless prod from its index
124
+ useless_prod = productions[prod_index]
125
+
126
+ # Retrieve production referencing useless one
127
+ referencing = nil
128
+ productions.reverse_each do |a_prod|
129
+ # Next line assumes non-recursive productions
130
+ next if a_prod == useless_prod
131
+
132
+ refs = a_prod.references_of(useless_prod)
133
+ next if refs.empty?
134
+
135
+ referencing = a_prod
136
+ break
137
+ end
138
+
139
+ referencing.derive_step(useless_prod)
140
+ remove_production(prod_index)
141
+ end
144
142
 
145
- # Create a new production that will have the symbols from digram
146
- # as its rhs members.
147
- def build_production_for(aDigram)
148
- new_prod = Production.new
149
- aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
150
- add_production(new_prod)
143
+ # Create a new production that will have the symbols from digram
144
+ # as its rhs members.
145
+ def build_production_for(aDigram)
146
+ new_prod = Production.new
147
+ aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
148
+ add_production(new_prod)
151
149
 
152
- return new_prod
153
- end
154
- end # class
150
+ new_prod
151
+ end
152
+ end # class
155
153
  end # module
156
154
  # End of file
@@ -31,15 +31,15 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
31
31
  end
32
32
 
33
33
  # Tell whether the sequence is empty.
34
- # @return [true / false] true only if the sequence has no symbol in it.
34
+ # @[true / false] true only if the sequence has no symbol in it.
35
35
  def empty?
36
- return symbols.empty?
36
+ symbols.empty?
37
37
  end
38
38
 
39
39
  # Count the number of elements in the sequence.
40
- # @return [Fixnum] the number of elements
40
+ # @[Fixnum] the number of elements
41
41
  def size
42
- return symbols.size
42
+ symbols.size
43
43
  end
44
44
 
45
45
  # Append a grammar symbol at the end of the sequence.
@@ -55,58 +55,55 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
55
55
  # Retrieve the element from the sequence at given position.
56
56
  # @param anIndex [Fixnum] A zero-based index of the element to access.
57
57
  def [](anIndex)
58
- return symbols[anIndex]
58
+ symbols[anIndex]
59
59
  end
60
60
 
61
61
  # Equality testing.
62
62
  # @param other [SymbolSequence or Array] the other other sequence
63
63
  # to compare to.
64
- # @return true when an item from self equals the corresponding
64
+ # @true when an item from self equals the corresponding
65
65
  # item from 'other'
66
66
  def ==(other)
67
- return true if object_id == other.object_id
67
+ true if object_id == other.object_id
68
68
 
69
- same = case other
70
- when SymbolSequence
71
- symbols == other.symbols
72
- when Array
73
- symbols == other
74
- else
75
- false
76
- end
77
-
78
- return same
69
+ case other
70
+ when SymbolSequence
71
+ symbols == other.symbols
72
+ when Array
73
+ symbols == other
74
+ else
75
+ false
76
+ end
79
77
  end
80
78
 
81
79
  # Select the references to production appearing in the rhs.
82
- # @return [Array of ProductionRef]
80
+ # @[Array of ProductionRef]
83
81
  def references
84
82
  @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
85
- return @memo_references
83
+ @memo_references
86
84
  end
87
85
 
88
86
  # Select the references of the given production appearing in the rhs.
89
87
  # @param aProduction [Production]
90
- # @return [Array of ProductionRef]
88
+ # @[Array of ProductionRef]
91
89
  def references_of(aProduction)
92
- return [] if references.empty?
90
+ [] if references.empty?
93
91
 
94
- result = references.select { |a_ref| a_ref == aProduction }
95
- return result
92
+ references.select { |a_ref| a_ref == aProduction }
96
93
  end
97
94
 
98
95
  # Emit a text representation of the symbol sequence.
99
96
  # Text is of the form: space-separated sequence of symbols.
100
- # @return [String]
97
+ # @[String]
101
98
  def to_string
102
99
  rhs_text = symbols.map do |elem|
103
100
  case elem
104
- when String then "'#{elem}'"
105
- else elem.to_s
101
+ when String then "'#{elem}'"
102
+ else elem.to_s
106
103
  end
107
104
  end
108
105
 
109
- return rhs_text.join(' ')
106
+ rhs_text.join(' ')
110
107
  end
111
108
 
112
109
  # Insert at position the elements from another sequence.
data/lib/sequitur.rb CHANGED
@@ -9,7 +9,6 @@ require_relative './sequitur/sequitur_grammar'
9
9
  require_relative './sequitur/formatter/debug'
10
10
  require_relative './sequitur/formatter/base_text'
11
11
 
12
-
13
12
  module Sequitur
14
13
  # Build a Sequitur-generated grammar based on the sequence of input tokens.
15
14
  #
@@ -19,12 +18,12 @@ module Sequitur
19
18
  # @return [SequiturGrammar] a grammar that encodes the input.
20
19
  def self.build_from(tokens)
21
20
  input_sequence = case tokens
22
- when String then tokens.chars
23
- when Enumerator then tokens
24
- else tokens.to_enum
21
+ when String then tokens.chars
22
+ when Enumerator then tokens
23
+ else tokens.to_enum
25
24
  end
26
25
 
27
- return SequiturGrammar.new(input_sequence)
26
+ SequiturGrammar.new(input_sequence)
28
27
  end
29
28
  end # module
30
29
 
@@ -5,38 +5,40 @@ require_relative '../spec_helper'
5
5
  # Load the class under test
6
6
  require_relative '../../lib/sequitur/digram'
7
7
 
8
- module Sequitur # Re-open the module to get rid of qualified names
9
- describe Digram do
8
+ describe Sequitur::Digram do
10
9
  let(:two_symbols) { %i[b c] }
11
10
  let(:production) { double('sample-production') }
11
+ def make_digram(symb1, symb2, production)
12
+ Sequitur::Digram.new(symb1, symb2, production)
13
+ end
12
14
 
13
15
  context 'Standard creation & initialization:' do
14
16
  it 'should be created with 3 arguments' do
15
- instance = Digram.new(:b, :c, production)
17
+ instance = make_digram(:b, :c, production)
16
18
 
17
19
  expect(instance.symbols).to eq(two_symbols)
18
20
  expect(instance.production).to eq(production)
19
21
  end
20
22
 
21
23
  it 'should return the production that it refers to' do
22
- instance = Digram.new(:b, :c, production)
24
+ instance = make_digram(:b, :c, production)
23
25
  expect(instance.production).to eq(production)
24
26
  end
25
27
 
26
28
  it 'should whether its symbols are the same' do
27
- instance1 = Digram.new(:a, :a, production)
28
- expect(instance1).to be_repeating
29
+ instance1 = make_digram(:a, :a, production)
30
+ expect(instance1).to be_repeating
29
31
 
30
- instance1 = Digram.new(:a, :b, production)
31
- expect(instance1).not_to be_repeating
32
+ instance1 = make_digram(:a, :b, production)
33
+ expect(instance1).not_to be_repeating
32
34
  end
33
35
  end # context
34
36
 
35
37
  context 'Provided services:' do
36
38
  it 'should compare itself to another digram' do
37
- instance1 = Digram.new(:a, :b, production)
38
- same = Digram.new(:a, :b, production)
39
- different = Digram.new(:b, :c, production)
39
+ instance1 = make_digram(:a, :b, production)
40
+ same = make_digram(:a, :b, production)
41
+ different = make_digram(:b, :c, production)
40
42
 
41
43
  expect(instance1).to eq(instance1)
42
44
  expect(instance1).to eq(same)
@@ -45,6 +47,5 @@ describe Digram do
45
47
  end
46
48
  end # context
47
49
  end # describe
48
- end # module
49
50
 
50
51
  # End of file
@@ -5,14 +5,13 @@ require_relative '../spec_helper'
5
5
  # Load the class under test
6
6
  require_relative '../../lib/sequitur/dynamic_grammar'
7
7
 
8
- module Sequitur # Re-open the module to get rid of qualified names
9
- describe DynamicGrammar do
8
+ describe Sequitur::DynamicGrammar do
10
9
  # Factory method. Build a production with the given sequence
11
10
  # of symbols as its rhs.
12
11
  def build_production(*symbols)
13
- prod = Production.new
12
+ prod = Sequitur::Production.new
14
13
  symbols.each { |symb| prod.append_symbol(symb) }
15
- return prod
14
+ prod
16
15
  end
17
16
 
18
17
  let(:p_a) { build_production(:a) }
@@ -20,10 +19,9 @@ describe DynamicGrammar do
20
19
  let(:p_c) { build_production(:c) }
21
20
  let(:p_bc) { build_production(p_b, p_c) }
22
21
 
23
-
24
22
  context 'Creation & initialization:' do
25
23
  it 'should be created without parameter' do
26
- expect { DynamicGrammar.new }.not_to raise_error
24
+ expect { Sequitur::DynamicGrammar.new }.not_to raise_error
27
25
  end
28
26
 
29
27
  it 'should have an empty start/start production' do
@@ -33,7 +31,6 @@ describe DynamicGrammar do
33
31
  end
34
32
  end # context
35
33
 
36
-
37
34
  context 'Adding productions to the grammar:' do
38
35
  it 'should add a simple production' do
39
36
  subject.add_production(p_a)
@@ -60,7 +57,6 @@ describe DynamicGrammar do
60
57
  end
61
58
  end # context
62
59
 
63
-
64
60
  context 'Removing a production from the grammar:' do
65
61
  it 'should remove an existing production' do
66
62
  subject.add_production(p_a) # index = 1
@@ -91,7 +87,7 @@ describe DynamicGrammar do
91
87
  context 'Visiting:' do
92
88
  it 'should return a visitor' do
93
89
  expect { subject.visitor }.not_to raise_error
94
- expect(subject.visitor).to be_kind_of(GrammarVisitor)
90
+ expect(subject.visitor).to be_kind_of(Sequitur::GrammarVisitor)
95
91
  end
96
92
 
97
93
  it 'should accept a visitor' do
@@ -133,7 +129,6 @@ describe DynamicGrammar do
133
129
  end
134
130
  end # context
135
131
 
136
-
137
132
  context 'Generating a text representation of itself:' do
138
133
  it 'should generate a text representation when empty' do
139
134
  expectation = "#{subject.start.object_id} : ."
@@ -141,6 +136,5 @@ describe DynamicGrammar do
141
136
  end
142
137
  end # context
143
138
  end # describe
144
- end # module
145
139
 
146
140
  # End of file