sequitur 0.1.23 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,155 +2,153 @@
2
2
 
3
3
  require_relative 'dynamic_grammar'
4
4
 
5
-
6
5
  module Sequitur # Module for classes implementing the Sequitur algorithm
7
- # Specialization of the DynamicGrammar class.
8
- # A Sequitur grammar is a context-free grammar that is entirely built
9
- # from a sequence of input tokens through the Sequitur algorithm.
10
- class SequiturGrammar < DynamicGrammar
11
- # Build the grammar from an enumerator of tokens.
12
- # @param anEnum [Enumerator] an enumerator that will iterate
13
- # over the input tokens.
14
- def initialize(anEnum)
15
- super()
16
- # Make start production compliant with utility rule
17
- 2.times { start.incr_refcount }
18
-
19
- # Read the input sequence and apply the Sequitur algorithm
20
- anEnum.each do |a_token|
21
- add_token(a_token)
22
- enforce_rules
6
+ # Specialization of the DynamicGrammar class.
7
+ # A Sequitur grammar is a context-free grammar that is entirely built
8
+ # from a sequence of input tokens through the Sequitur algorithm.
9
+ class SequiturGrammar < DynamicGrammar
10
+ # Build the grammar from an enumerator of tokens.
11
+ # @param anEnum [Enumerator] an enumerator that will iterate
12
+ # over the input tokens.
13
+ def initialize(anEnum)
14
+ super()
15
+ # Make start production compliant with utility rule
16
+ 2.times { start.incr_refcount }
17
+
18
+ # Read the input sequence and apply the Sequitur algorithm
19
+ anEnum.each do |a_token|
20
+ add_token(a_token)
21
+ enforce_rules
22
+ end
23
23
  end
24
- end
25
-
26
- private
27
-
28
- # Struct used for internal purposes
29
- CollisionDiagnosis = Struct.new(
30
- :collision_found, # true if collision detected
31
- :digram, # The digram involved in a collision
32
- :productions) # The productions where the digram occurs
33
-
34
-
35
-
36
- # Assuming that a new input token was added to the start production,
37
- # enforce the digram unicity and rule utility rules
38
- # begin
39
- # if a digram D occurs twice in the grammar then
40
- # add a production P : D (if not already there)
41
- # replace both Ds with R (reduction step).
42
- # end
43
- # if a production P : RHS in referenced only once then
44
- # replace P by its RHS (derivation step)
45
- # remove P from grammar
46
- # end
47
- # end until digram unicity and rule utility are met
48
- def enforce_rules
49
- loop do
50
- unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
51
- restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
52
-
53
- prod_index = detect_useless_production
54
- restore_utility(prod_index) unless prod_index.nil?
55
-
56
- unicity_diagnosis = detect_collision
57
- prod_index = detect_useless_production
58
- break unless unicity_diagnosis.collision_found || !prod_index.nil?
24
+
25
+ private
26
+
27
+ # Struct used for internal purposes
28
+ CollisionDiagnosis = Struct.new(
29
+ :collision_found, # true if collision detected
30
+ :digram, # The digram involved in a collision
31
+ :productions # The productions where the digram occurs
32
+ )
33
+
34
+ # Assuming that a new input token was added to the start production,
35
+ # enforce the digram unicity and rule utility rules
36
+ # begin
37
+ # if a digram D occurs twice in the grammar then
38
+ # add a production P : D (if not already there)
39
+ # replace both Ds with R (reduction step).
40
+ # end
41
+ # if a production P : RHS in referenced only once then
42
+ # replace P by its RHS (derivation step)
43
+ # remove P from grammar
44
+ # end
45
+ # end until digram unicity and rule utility are met
46
+ def enforce_rules
47
+ loop do
48
+ unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
49
+ restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
50
+
51
+ prod_index = detect_useless_production
52
+ restore_utility(prod_index) unless prod_index.nil?
53
+
54
+ unicity_diagnosis = detect_collision
55
+ prod_index = detect_useless_production
56
+ break unless unicity_diagnosis.collision_found || !prod_index.nil?
57
+ end
59
58
  end
60
- end
61
-
62
- # Check whether a digram is used twice in the grammar.
63
- # Return an empty Hash if each digram appears once.
64
- # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
65
- # Where Pi, Pk are two productions where the digram occurs.
66
- def detect_collision
67
- diagnosis = CollisionDiagnosis.new(false)
68
- found_so_far = {}
69
- productions.each do |a_prod|
70
- prod_digrams = a_prod.digrams
71
- prod_digrams.each do |a_digr|
72
- its_key = a_digr.key
73
- if found_so_far.include? its_key
74
- orig_digr = found_so_far[its_key]
75
- # Disregard sequence like a a a
76
- if (orig_digr.production == a_prod) && a_digr.repeating? &&
77
- (orig_digr == a_digr)
78
- next
79
- end
80
59
 
81
- diagnosis.digram = orig_digr
82
- diagnosis.productions = [orig_digr.production, a_prod]
83
- diagnosis.collision_found = true
84
- break
85
- else
86
- found_so_far[its_key] = a_digr
60
+ # Check whether a digram is used twice in the grammar.
61
+ # Return an empty Hash if each digram appears once.
62
+ # Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
63
+ # Where Pi, Pk are two productions where the digram occurs.
64
+ def detect_collision
65
+ diagnosis = CollisionDiagnosis.new(false)
66
+ found_so_far = {}
67
+ productions.each do |a_prod|
68
+ prod_digrams = a_prod.digrams
69
+ prod_digrams.each do |a_digr|
70
+ its_key = a_digr.key
71
+ if found_so_far.include? its_key
72
+ orig_digr = found_so_far[its_key]
73
+ # Disregard sequence like a a a
74
+ if (orig_digr.production == a_prod) && a_digr.repeating? &&
75
+ (orig_digr == a_digr)
76
+ next
77
+ end
78
+
79
+ diagnosis.digram = orig_digr
80
+ diagnosis.productions = [orig_digr.production, a_prod]
81
+ diagnosis.collision_found = true
82
+ break
83
+ else
84
+ found_so_far[its_key] = a_digr
85
+ end
87
86
  end
87
+ break if diagnosis.collision_found
88
88
  end
89
- break if diagnosis.collision_found
89
+
90
+ diagnosis
90
91
  end
91
92
 
92
- return diagnosis
93
- end
94
-
95
- # When a collision diagnosis indicates that a given
96
- # digram d occurs twice in the grammar
97
- # Then create a new production that will have
98
- # the symbols of d as its rhs members.
99
- def restore_unicity(aDiagnosis)
100
- prods = aDiagnosis.productions
101
- if prods.any?(&:single_digram?)
102
- (simple, compound) = prods.partition(&:single_digram?)
103
- compound[0].reduce_step(simple[0])
104
- else
105
- # Create a new production with the digram's symbols as its
106
- # sole rhs members.
107
- new_prod = build_production_for(aDiagnosis.digram)
108
- prods[0].reduce_step(new_prod)
109
- prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
93
+ # When a collision diagnosis indicates that a given
94
+ # digram d occurs twice in the grammar
95
+ # Then create a new production that will have
96
+ # the symbols of d as its rhs members.
97
+ def restore_unicity(aDiagnosis)
98
+ prods = aDiagnosis.productions
99
+ if prods.any?(&:single_digram?)
100
+ (simple, compound) = prods.partition(&:single_digram?)
101
+ compound[0].reduce_step(simple[0])
102
+ else
103
+ # Create a new production with the digram's symbols as its
104
+ # sole rhs members.
105
+ new_prod = build_production_for(aDiagnosis.digram)
106
+ prods[0].reduce_step(new_prod)
107
+ prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
108
+ end
110
109
  end
111
- end
112
-
113
- # Return a production that is used less than twice in the grammar.
114
- def detect_useless_production
115
- useless = productions.index { |prod| prod.refcount < 2 }
116
- useless = nil if useless&.zero?
117
-
118
- return useless
119
- end
120
-
121
- # Given the passed production P is referenced only once.
122
- # Then replace P by its RHS where it is referenced.
123
- # And delete P
124
- def restore_utility(prod_index)
125
- # Retrieve useless prod from its index
126
- useless_prod = productions[prod_index]
127
-
128
- # Retrieve production referencing useless one
129
- referencing = nil
130
- productions.reverse_each do |a_prod|
131
- # Next line assumes non-recursive productions
132
- next if a_prod == useless_prod
133
-
134
- refs = a_prod.references_of(useless_prod)
135
- next if refs.empty?
136
-
137
- referencing = a_prod
138
- break
110
+
111
+ # Return a production that is used less than twice in the grammar.
112
+ def detect_useless_production
113
+ useless = productions.index { |prod| prod.refcount < 2 }
114
+ useless = nil if useless&.zero?
115
+
116
+ useless
139
117
  end
140
118
 
141
- referencing.derive_step(useless_prod)
142
- remove_production(prod_index)
143
- end
119
+ # Given the passed production P is referenced only once.
120
+ # Then replace P by its RHS where it is referenced.
121
+ # And delete P
122
+ def restore_utility(prod_index)
123
+ # Retrieve useless prod from its index
124
+ useless_prod = productions[prod_index]
125
+
126
+ # Retrieve production referencing useless one
127
+ referencing = nil
128
+ productions.reverse_each do |a_prod|
129
+ # Next line assumes non-recursive productions
130
+ next if a_prod == useless_prod
131
+
132
+ refs = a_prod.references_of(useless_prod)
133
+ next if refs.empty?
134
+
135
+ referencing = a_prod
136
+ break
137
+ end
138
+
139
+ referencing.derive_step(useless_prod)
140
+ remove_production(prod_index)
141
+ end
144
142
 
145
- # Create a new production that will have the symbols from digram
146
- # as its rhs members.
147
- def build_production_for(aDigram)
148
- new_prod = Production.new
149
- aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
150
- add_production(new_prod)
143
+ # Create a new production that will have the symbols from digram
144
+ # as its rhs members.
145
+ def build_production_for(aDigram)
146
+ new_prod = Production.new
147
+ aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
148
+ add_production(new_prod)
151
149
 
152
- return new_prod
153
- end
154
- end # class
150
+ new_prod
151
+ end
152
+ end # class
155
153
  end # module
156
154
  # End of file
@@ -31,15 +31,15 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
31
31
  end
32
32
 
33
33
  # Tell whether the sequence is empty.
34
- # @return [true / false] true only if the sequence has no symbol in it.
34
+ # @[true / false] true only if the sequence has no symbol in it.
35
35
  def empty?
36
- return symbols.empty?
36
+ symbols.empty?
37
37
  end
38
38
 
39
39
  # Count the number of elements in the sequence.
40
- # @return [Fixnum] the number of elements
40
+ # @[Fixnum] the number of elements
41
41
  def size
42
- return symbols.size
42
+ symbols.size
43
43
  end
44
44
 
45
45
  # Append a grammar symbol at the end of the sequence.
@@ -55,58 +55,55 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
55
55
  # Retrieve the element from the sequence at given position.
56
56
  # @param anIndex [Fixnum] A zero-based index of the element to access.
57
57
  def [](anIndex)
58
- return symbols[anIndex]
58
+ symbols[anIndex]
59
59
  end
60
60
 
61
61
  # Equality testing.
62
62
  # @param other [SymbolSequence or Array] the other other sequence
63
63
  # to compare to.
64
- # @return true when an item from self equals the corresponding
64
+ # @true when an item from self equals the corresponding
65
65
  # item from 'other'
66
66
  def ==(other)
67
- return true if object_id == other.object_id
67
+ true if object_id == other.object_id
68
68
 
69
- same = case other
70
- when SymbolSequence
71
- symbols == other.symbols
72
- when Array
73
- symbols == other
74
- else
75
- false
76
- end
77
-
78
- return same
69
+ case other
70
+ when SymbolSequence
71
+ symbols == other.symbols
72
+ when Array
73
+ symbols == other
74
+ else
75
+ false
76
+ end
79
77
  end
80
78
 
81
79
  # Select the references to production appearing in the rhs.
82
- # @return [Array of ProductionRef]
80
+ # @[Array of ProductionRef]
83
81
  def references
84
82
  @memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
85
- return @memo_references
83
+ @memo_references
86
84
  end
87
85
 
88
86
  # Select the references of the given production appearing in the rhs.
89
87
  # @param aProduction [Production]
90
- # @return [Array of ProductionRef]
88
+ # @[Array of ProductionRef]
91
89
  def references_of(aProduction)
92
- return [] if references.empty?
90
+ [] if references.empty?
93
91
 
94
- result = references.select { |a_ref| a_ref == aProduction }
95
- return result
92
+ references.select { |a_ref| a_ref == aProduction }
96
93
  end
97
94
 
98
95
  # Emit a text representation of the symbol sequence.
99
96
  # Text is of the form: space-separated sequence of symbols.
100
- # @return [String]
97
+ # @[String]
101
98
  def to_string
102
99
  rhs_text = symbols.map do |elem|
103
100
  case elem
104
- when String then "'#{elem}'"
105
- else elem.to_s
101
+ when String then "'#{elem}'"
102
+ else elem.to_s
106
103
  end
107
104
  end
108
105
 
109
- return rhs_text.join(' ')
106
+ rhs_text.join(' ')
110
107
  end
111
108
 
112
109
  # Insert at position the elements from another sequence.
data/lib/sequitur.rb CHANGED
@@ -9,7 +9,6 @@ require_relative './sequitur/sequitur_grammar'
9
9
  require_relative './sequitur/formatter/debug'
10
10
  require_relative './sequitur/formatter/base_text'
11
11
 
12
-
13
12
  module Sequitur
14
13
  # Build a Sequitur-generated grammar based on the sequence of input tokens.
15
14
  #
@@ -19,12 +18,12 @@ module Sequitur
19
18
  # @return [SequiturGrammar] a grammar that encodes the input.
20
19
  def self.build_from(tokens)
21
20
  input_sequence = case tokens
22
- when String then tokens.chars
23
- when Enumerator then tokens
24
- else tokens.to_enum
21
+ when String then tokens.chars
22
+ when Enumerator then tokens
23
+ else tokens.to_enum
25
24
  end
26
25
 
27
- return SequiturGrammar.new(input_sequence)
26
+ SequiturGrammar.new(input_sequence)
28
27
  end
29
28
  end # module
30
29
 
@@ -5,38 +5,40 @@ require_relative '../spec_helper'
5
5
  # Load the class under test
6
6
  require_relative '../../lib/sequitur/digram'
7
7
 
8
- module Sequitur # Re-open the module to get rid of qualified names
9
- describe Digram do
8
+ describe Sequitur::Digram do
10
9
  let(:two_symbols) { %i[b c] }
11
10
  let(:production) { double('sample-production') }
11
+ def make_digram(symb1, symb2, production)
12
+ Sequitur::Digram.new(symb1, symb2, production)
13
+ end
12
14
 
13
15
  context 'Standard creation & initialization:' do
14
16
  it 'should be created with 3 arguments' do
15
- instance = Digram.new(:b, :c, production)
17
+ instance = make_digram(:b, :c, production)
16
18
 
17
19
  expect(instance.symbols).to eq(two_symbols)
18
20
  expect(instance.production).to eq(production)
19
21
  end
20
22
 
21
23
  it 'should return the production that it refers to' do
22
- instance = Digram.new(:b, :c, production)
24
+ instance = make_digram(:b, :c, production)
23
25
  expect(instance.production).to eq(production)
24
26
  end
25
27
 
26
28
  it 'should whether its symbols are the same' do
27
- instance1 = Digram.new(:a, :a, production)
28
- expect(instance1).to be_repeating
29
+ instance1 = make_digram(:a, :a, production)
30
+ expect(instance1).to be_repeating
29
31
 
30
- instance1 = Digram.new(:a, :b, production)
31
- expect(instance1).not_to be_repeating
32
+ instance1 = make_digram(:a, :b, production)
33
+ expect(instance1).not_to be_repeating
32
34
  end
33
35
  end # context
34
36
 
35
37
  context 'Provided services:' do
36
38
  it 'should compare itself to another digram' do
37
- instance1 = Digram.new(:a, :b, production)
38
- same = Digram.new(:a, :b, production)
39
- different = Digram.new(:b, :c, production)
39
+ instance1 = make_digram(:a, :b, production)
40
+ same = make_digram(:a, :b, production)
41
+ different = make_digram(:b, :c, production)
40
42
 
41
43
  expect(instance1).to eq(instance1)
42
44
  expect(instance1).to eq(same)
@@ -45,6 +47,5 @@ describe Digram do
45
47
  end
46
48
  end # context
47
49
  end # describe
48
- end # module
49
50
 
50
51
  # End of file
@@ -5,14 +5,13 @@ require_relative '../spec_helper'
5
5
  # Load the class under test
6
6
  require_relative '../../lib/sequitur/dynamic_grammar'
7
7
 
8
- module Sequitur # Re-open the module to get rid of qualified names
9
- describe DynamicGrammar do
8
+ describe Sequitur::DynamicGrammar do
10
9
  # Factory method. Build a production with the given sequence
11
10
  # of symbols as its rhs.
12
11
  def build_production(*symbols)
13
- prod = Production.new
12
+ prod = Sequitur::Production.new
14
13
  symbols.each { |symb| prod.append_symbol(symb) }
15
- return prod
14
+ prod
16
15
  end
17
16
 
18
17
  let(:p_a) { build_production(:a) }
@@ -20,10 +19,9 @@ describe DynamicGrammar do
20
19
  let(:p_c) { build_production(:c) }
21
20
  let(:p_bc) { build_production(p_b, p_c) }
22
21
 
23
-
24
22
  context 'Creation & initialization:' do
25
23
  it 'should be created without parameter' do
26
- expect { DynamicGrammar.new }.not_to raise_error
24
+ expect { Sequitur::DynamicGrammar.new }.not_to raise_error
27
25
  end
28
26
 
29
27
  it 'should have an empty start/start production' do
@@ -33,7 +31,6 @@ describe DynamicGrammar do
33
31
  end
34
32
  end # context
35
33
 
36
-
37
34
  context 'Adding productions to the grammar:' do
38
35
  it 'should add a simple production' do
39
36
  subject.add_production(p_a)
@@ -60,7 +57,6 @@ describe DynamicGrammar do
60
57
  end
61
58
  end # context
62
59
 
63
-
64
60
  context 'Removing a production from the grammar:' do
65
61
  it 'should remove an existing production' do
66
62
  subject.add_production(p_a) # index = 1
@@ -91,7 +87,7 @@ describe DynamicGrammar do
91
87
  context 'Visiting:' do
92
88
  it 'should return a visitor' do
93
89
  expect { subject.visitor }.not_to raise_error
94
- expect(subject.visitor).to be_kind_of(GrammarVisitor)
90
+ expect(subject.visitor).to be_kind_of(Sequitur::GrammarVisitor)
95
91
  end
96
92
 
97
93
  it 'should accept a visitor' do
@@ -133,7 +129,6 @@ describe DynamicGrammar do
133
129
  end
134
130
  end # context
135
131
 
136
-
137
132
  context 'Generating a text representation of itself:' do
138
133
  it 'should generate a text representation when empty' do
139
134
  expectation = "#{subject.start.object_id} : ."
@@ -141,6 +136,5 @@ describe DynamicGrammar do
141
136
  end
142
137
  end # context
143
138
  end # describe
144
- end # module
145
139
 
146
140
  # End of file