sequitur 0.1.22 → 0.1.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -180
- data/CHANGELOG.md +11 -1
- data/LICENSE.txt +1 -1
- data/Rakefile +0 -2
- data/examples/integer_sample.rb +1 -2
- data/examples/porridge.rb +10 -10
- data/examples/simple_case.rb +1 -1
- data/examples/symbol_sample.rb +1 -1
- data/examples/word_sample.rb +5 -6
- data/lib/sequitur/constants.rb +5 -2
- data/lib/sequitur/digram.rb +3 -3
- data/lib/sequitur/dynamic_grammar.rb +4 -5
- data/lib/sequitur/formatter/base_formatter.rb +1 -1
- data/lib/sequitur/formatter/base_text.rb +3 -7
- data/lib/sequitur/formatter/debug.rb +0 -1
- data/lib/sequitur/grammar_visitor.rb +1 -1
- data/lib/sequitur/production.rb +200 -205
- data/lib/sequitur/production_ref.rb +9 -12
- data/lib/sequitur/sequitur_grammar.rb +135 -137
- data/lib/sequitur/symbol_sequence.rb +24 -27
- data/lib/sequitur.rb +4 -5
- data/spec/sequitur/digram_spec.rb +13 -12
- data/spec/sequitur/dynamic_grammar_spec.rb +5 -11
- data/spec/sequitur/formatter/base_text_spec.rb +70 -72
- data/spec/sequitur/formatter/debug_spec.rb +90 -92
- data/spec/sequitur/grammar_visitor_spec.rb +70 -71
- data/spec/sequitur/production_ref_spec.rb +92 -92
- data/spec/sequitur/production_spec.rb +30 -34
- data/spec/sequitur/sequitur_grammar_spec.rb +48 -49
- data/spec/sequitur/symbol_sequence_spec.rb +102 -105
- data/spec/spec_helper.rb +0 -15
- metadata +18 -60
- data/.simplecov +0 -7
- data/.travis.yml +0 -29
@@ -2,155 +2,153 @@
|
|
2
2
|
|
3
3
|
require_relative 'dynamic_grammar'
|
4
4
|
|
5
|
-
|
6
5
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
7
|
-
# Specialization of the DynamicGrammar class.
|
8
|
-
# A Sequitur grammar is a context-free grammar that is entirely built
|
9
|
-
# from a sequence of input tokens through the Sequitur algorithm.
|
10
|
-
class SequiturGrammar < DynamicGrammar
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
6
|
+
# Specialization of the DynamicGrammar class.
|
7
|
+
# A Sequitur grammar is a context-free grammar that is entirely built
|
8
|
+
# from a sequence of input tokens through the Sequitur algorithm.
|
9
|
+
class SequiturGrammar < DynamicGrammar
|
10
|
+
# Build the grammar from an enumerator of tokens.
|
11
|
+
# @param anEnum [Enumerator] an enumerator that will iterate
|
12
|
+
# over the input tokens.
|
13
|
+
def initialize(anEnum)
|
14
|
+
super()
|
15
|
+
# Make start production compliant with utility rule
|
16
|
+
2.times { start.incr_refcount }
|
17
|
+
|
18
|
+
# Read the input sequence and apply the Sequitur algorithm
|
19
|
+
anEnum.each do |a_token|
|
20
|
+
add_token(a_token)
|
21
|
+
enforce_rules
|
22
|
+
end
|
23
23
|
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
break unless unicity_diagnosis.collision_found || !prod_index.nil?
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Struct used for internal purposes
|
28
|
+
CollisionDiagnosis = Struct.new(
|
29
|
+
:collision_found, # true if collision detected
|
30
|
+
:digram, # The digram involved in a collision
|
31
|
+
:productions # The productions where the digram occurs
|
32
|
+
)
|
33
|
+
|
34
|
+
# Assuming that a new input token was added to the start production,
|
35
|
+
# enforce the digram unicity and rule utility rules
|
36
|
+
# begin
|
37
|
+
# if a digram D occurs twice in the grammar then
|
38
|
+
# add a production P : D (if not already there)
|
39
|
+
# replace both Ds with R (reduction step).
|
40
|
+
# end
|
41
|
+
# if a production P : RHS in referenced only once then
|
42
|
+
# replace P by its RHS (derivation step)
|
43
|
+
# remove P from grammar
|
44
|
+
# end
|
45
|
+
# end until digram unicity and rule utility are met
|
46
|
+
def enforce_rules
|
47
|
+
loop do
|
48
|
+
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
49
|
+
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
50
|
+
|
51
|
+
prod_index = detect_useless_production
|
52
|
+
restore_utility(prod_index) unless prod_index.nil?
|
53
|
+
|
54
|
+
unicity_diagnosis = detect_collision
|
55
|
+
prod_index = detect_useless_production
|
56
|
+
break unless unicity_diagnosis.collision_found || !prod_index.nil?
|
57
|
+
end
|
59
58
|
end
|
60
|
-
end
|
61
|
-
|
62
|
-
# Check whether a digram is used twice in the grammar.
|
63
|
-
# Return an empty Hash if each digram appears once.
|
64
|
-
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
65
|
-
# Where Pi, Pk are two productions where the digram occurs.
|
66
|
-
def detect_collision
|
67
|
-
diagnosis = CollisionDiagnosis.new(false)
|
68
|
-
found_so_far = {}
|
69
|
-
productions.each do |a_prod|
|
70
|
-
prod_digrams = a_prod.digrams
|
71
|
-
prod_digrams.each do |a_digr|
|
72
|
-
its_key = a_digr.key
|
73
|
-
if found_so_far.include? its_key
|
74
|
-
orig_digr = found_so_far[its_key]
|
75
|
-
# Disregard sequence like a a a
|
76
|
-
if (orig_digr.production == a_prod) && a_digr.repeating? &&
|
77
|
-
(orig_digr == a_digr)
|
78
|
-
next
|
79
|
-
end
|
80
59
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
60
|
+
# Check whether a digram is used twice in the grammar.
|
61
|
+
# Return an empty Hash if each digram appears once.
|
62
|
+
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
63
|
+
# Where Pi, Pk are two productions where the digram occurs.
|
64
|
+
def detect_collision
|
65
|
+
diagnosis = CollisionDiagnosis.new(false)
|
66
|
+
found_so_far = {}
|
67
|
+
productions.each do |a_prod|
|
68
|
+
prod_digrams = a_prod.digrams
|
69
|
+
prod_digrams.each do |a_digr|
|
70
|
+
its_key = a_digr.key
|
71
|
+
if found_so_far.include? its_key
|
72
|
+
orig_digr = found_so_far[its_key]
|
73
|
+
# Disregard sequence like a a a
|
74
|
+
if (orig_digr.production == a_prod) && a_digr.repeating? &&
|
75
|
+
(orig_digr == a_digr)
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
diagnosis.digram = orig_digr
|
80
|
+
diagnosis.productions = [orig_digr.production, a_prod]
|
81
|
+
diagnosis.collision_found = true
|
82
|
+
break
|
83
|
+
else
|
84
|
+
found_so_far[its_key] = a_digr
|
85
|
+
end
|
87
86
|
end
|
87
|
+
break if diagnosis.collision_found
|
88
88
|
end
|
89
|
-
|
89
|
+
|
90
|
+
diagnosis
|
90
91
|
end
|
91
92
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
prods[0].reduce_step(new_prod)
|
109
|
-
prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
|
93
|
+
# When a collision diagnosis indicates that a given
|
94
|
+
# digram d occurs twice in the grammar
|
95
|
+
# Then create a new production that will have
|
96
|
+
# the symbols of d as its rhs members.
|
97
|
+
def restore_unicity(aDiagnosis)
|
98
|
+
prods = aDiagnosis.productions
|
99
|
+
if prods.any?(&:single_digram?)
|
100
|
+
(simple, compound) = prods.partition(&:single_digram?)
|
101
|
+
compound[0].reduce_step(simple[0])
|
102
|
+
else
|
103
|
+
# Create a new production with the digram's symbols as its
|
104
|
+
# sole rhs members.
|
105
|
+
new_prod = build_production_for(aDiagnosis.digram)
|
106
|
+
prods[0].reduce_step(new_prod)
|
107
|
+
prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
|
108
|
+
end
|
110
109
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
return useless
|
119
|
-
end
|
120
|
-
|
121
|
-
# Given the passed production P is referenced only once.
|
122
|
-
# Then replace P by its RHS where it is referenced.
|
123
|
-
# And delete P
|
124
|
-
def restore_utility(prod_index)
|
125
|
-
# Retrieve useless prod from its index
|
126
|
-
useless_prod = productions[prod_index]
|
127
|
-
|
128
|
-
# Retrieve production referencing useless one
|
129
|
-
referencing = nil
|
130
|
-
productions.reverse_each do |a_prod|
|
131
|
-
# Next line assumes non-recursive productions
|
132
|
-
next if a_prod == useless_prod
|
133
|
-
|
134
|
-
refs = a_prod.references_of(useless_prod)
|
135
|
-
next if refs.empty?
|
136
|
-
|
137
|
-
referencing = a_prod
|
138
|
-
break
|
110
|
+
|
111
|
+
# Return a production that is used less than twice in the grammar.
|
112
|
+
def detect_useless_production
|
113
|
+
useless = productions.index { |prod| prod.refcount < 2 }
|
114
|
+
useless = nil if useless&.zero?
|
115
|
+
|
116
|
+
useless
|
139
117
|
end
|
140
118
|
|
141
|
-
|
142
|
-
|
143
|
-
|
119
|
+
# Given the passed production P is referenced only once.
|
120
|
+
# Then replace P by its RHS where it is referenced.
|
121
|
+
# And delete P
|
122
|
+
def restore_utility(prod_index)
|
123
|
+
# Retrieve useless prod from its index
|
124
|
+
useless_prod = productions[prod_index]
|
125
|
+
|
126
|
+
# Retrieve production referencing useless one
|
127
|
+
referencing = nil
|
128
|
+
productions.reverse_each do |a_prod|
|
129
|
+
# Next line assumes non-recursive productions
|
130
|
+
next if a_prod == useless_prod
|
131
|
+
|
132
|
+
refs = a_prod.references_of(useless_prod)
|
133
|
+
next if refs.empty?
|
134
|
+
|
135
|
+
referencing = a_prod
|
136
|
+
break
|
137
|
+
end
|
138
|
+
|
139
|
+
referencing.derive_step(useless_prod)
|
140
|
+
remove_production(prod_index)
|
141
|
+
end
|
144
142
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
143
|
+
# Create a new production that will have the symbols from digram
|
144
|
+
# as its rhs members.
|
145
|
+
def build_production_for(aDigram)
|
146
|
+
new_prod = Production.new
|
147
|
+
aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
|
148
|
+
add_production(new_prod)
|
151
149
|
|
152
|
-
|
153
|
-
|
154
|
-
end # class
|
150
|
+
new_prod
|
151
|
+
end
|
152
|
+
end # class
|
155
153
|
end # module
|
156
154
|
# End of file
|
@@ -31,15 +31,15 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
31
31
|
end
|
32
32
|
|
33
33
|
# Tell whether the sequence is empty.
|
34
|
-
# @
|
34
|
+
# @[true / false] true only if the sequence has no symbol in it.
|
35
35
|
def empty?
|
36
|
-
|
36
|
+
symbols.empty?
|
37
37
|
end
|
38
38
|
|
39
39
|
# Count the number of elements in the sequence.
|
40
|
-
# @
|
40
|
+
# @[Fixnum] the number of elements
|
41
41
|
def size
|
42
|
-
|
42
|
+
symbols.size
|
43
43
|
end
|
44
44
|
|
45
45
|
# Append a grammar symbol at the end of the sequence.
|
@@ -55,58 +55,55 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
55
55
|
# Retrieve the element from the sequence at given position.
|
56
56
|
# @param anIndex [Fixnum] A zero-based index of the element to access.
|
57
57
|
def [](anIndex)
|
58
|
-
|
58
|
+
symbols[anIndex]
|
59
59
|
end
|
60
60
|
|
61
61
|
# Equality testing.
|
62
62
|
# @param other [SymbolSequence or Array] the other other sequence
|
63
63
|
# to compare to.
|
64
|
-
# @
|
64
|
+
# @true when an item from self equals the corresponding
|
65
65
|
# item from 'other'
|
66
66
|
def ==(other)
|
67
|
-
|
67
|
+
true if object_id == other.object_id
|
68
68
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
return same
|
69
|
+
case other
|
70
|
+
when SymbolSequence
|
71
|
+
symbols == other.symbols
|
72
|
+
when Array
|
73
|
+
symbols == other
|
74
|
+
else
|
75
|
+
false
|
76
|
+
end
|
79
77
|
end
|
80
78
|
|
81
79
|
# Select the references to production appearing in the rhs.
|
82
|
-
# @
|
80
|
+
# @[Array of ProductionRef]
|
83
81
|
def references
|
84
82
|
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
85
|
-
|
83
|
+
@memo_references
|
86
84
|
end
|
87
85
|
|
88
86
|
# Select the references of the given production appearing in the rhs.
|
89
87
|
# @param aProduction [Production]
|
90
|
-
# @
|
88
|
+
# @[Array of ProductionRef]
|
91
89
|
def references_of(aProduction)
|
92
|
-
|
90
|
+
[] if references.empty?
|
93
91
|
|
94
|
-
|
95
|
-
return result
|
92
|
+
references.select { |a_ref| a_ref == aProduction }
|
96
93
|
end
|
97
94
|
|
98
95
|
# Emit a text representation of the symbol sequence.
|
99
96
|
# Text is of the form: space-separated sequence of symbols.
|
100
|
-
# @
|
97
|
+
# @[String]
|
101
98
|
def to_string
|
102
99
|
rhs_text = symbols.map do |elem|
|
103
100
|
case elem
|
104
|
-
|
105
|
-
|
101
|
+
when String then "'#{elem}'"
|
102
|
+
else elem.to_s
|
106
103
|
end
|
107
104
|
end
|
108
105
|
|
109
|
-
|
106
|
+
rhs_text.join(' ')
|
110
107
|
end
|
111
108
|
|
112
109
|
# Insert at position the elements from another sequence.
|
data/lib/sequitur.rb
CHANGED
@@ -9,7 +9,6 @@ require_relative './sequitur/sequitur_grammar'
|
|
9
9
|
require_relative './sequitur/formatter/debug'
|
10
10
|
require_relative './sequitur/formatter/base_text'
|
11
11
|
|
12
|
-
|
13
12
|
module Sequitur
|
14
13
|
# Build a Sequitur-generated grammar based on the sequence of input tokens.
|
15
14
|
#
|
@@ -19,12 +18,12 @@ module Sequitur
|
|
19
18
|
# @return [SequiturGrammar] a grammar that encodes the input.
|
20
19
|
def self.build_from(tokens)
|
21
20
|
input_sequence = case tokens
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
when String then tokens.chars
|
22
|
+
when Enumerator then tokens
|
23
|
+
else tokens.to_enum
|
25
24
|
end
|
26
25
|
|
27
|
-
|
26
|
+
SequiturGrammar.new(input_sequence)
|
28
27
|
end
|
29
28
|
end # module
|
30
29
|
|
@@ -5,38 +5,40 @@ require_relative '../spec_helper'
|
|
5
5
|
# Load the class under test
|
6
6
|
require_relative '../../lib/sequitur/digram'
|
7
7
|
|
8
|
-
|
9
|
-
describe Digram do
|
8
|
+
describe Sequitur::Digram do
|
10
9
|
let(:two_symbols) { %i[b c] }
|
11
10
|
let(:production) { double('sample-production') }
|
11
|
+
def make_digram(symb1, symb2, production)
|
12
|
+
Sequitur::Digram.new(symb1, symb2, production)
|
13
|
+
end
|
12
14
|
|
13
15
|
context 'Standard creation & initialization:' do
|
14
16
|
it 'should be created with 3 arguments' do
|
15
|
-
instance =
|
17
|
+
instance = make_digram(:b, :c, production)
|
16
18
|
|
17
19
|
expect(instance.symbols).to eq(two_symbols)
|
18
20
|
expect(instance.production).to eq(production)
|
19
21
|
end
|
20
22
|
|
21
23
|
it 'should return the production that it refers to' do
|
22
|
-
instance =
|
24
|
+
instance = make_digram(:b, :c, production)
|
23
25
|
expect(instance.production).to eq(production)
|
24
26
|
end
|
25
27
|
|
26
28
|
it 'should whether its symbols are the same' do
|
27
|
-
|
28
|
-
|
29
|
+
instance1 = make_digram(:a, :a, production)
|
30
|
+
expect(instance1).to be_repeating
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
+
instance1 = make_digram(:a, :b, production)
|
33
|
+
expect(instance1).not_to be_repeating
|
32
34
|
end
|
33
35
|
end # context
|
34
36
|
|
35
37
|
context 'Provided services:' do
|
36
38
|
it 'should compare itself to another digram' do
|
37
|
-
instance1 =
|
38
|
-
same =
|
39
|
-
different =
|
39
|
+
instance1 = make_digram(:a, :b, production)
|
40
|
+
same = make_digram(:a, :b, production)
|
41
|
+
different = make_digram(:b, :c, production)
|
40
42
|
|
41
43
|
expect(instance1).to eq(instance1)
|
42
44
|
expect(instance1).to eq(same)
|
@@ -45,6 +47,5 @@ describe Digram do
|
|
45
47
|
end
|
46
48
|
end # context
|
47
49
|
end # describe
|
48
|
-
end # module
|
49
50
|
|
50
51
|
# End of file
|
@@ -5,14 +5,13 @@ require_relative '../spec_helper'
|
|
5
5
|
# Load the class under test
|
6
6
|
require_relative '../../lib/sequitur/dynamic_grammar'
|
7
7
|
|
8
|
-
|
9
|
-
describe DynamicGrammar do
|
8
|
+
describe Sequitur::DynamicGrammar do
|
10
9
|
# Factory method. Build a production with the given sequence
|
11
10
|
# of symbols as its rhs.
|
12
11
|
def build_production(*symbols)
|
13
|
-
prod = Production.new
|
12
|
+
prod = Sequitur::Production.new
|
14
13
|
symbols.each { |symb| prod.append_symbol(symb) }
|
15
|
-
|
14
|
+
prod
|
16
15
|
end
|
17
16
|
|
18
17
|
let(:p_a) { build_production(:a) }
|
@@ -20,10 +19,9 @@ describe DynamicGrammar do
|
|
20
19
|
let(:p_c) { build_production(:c) }
|
21
20
|
let(:p_bc) { build_production(p_b, p_c) }
|
22
21
|
|
23
|
-
|
24
22
|
context 'Creation & initialization:' do
|
25
23
|
it 'should be created without parameter' do
|
26
|
-
expect { DynamicGrammar.new }.not_to raise_error
|
24
|
+
expect { Sequitur::DynamicGrammar.new }.not_to raise_error
|
27
25
|
end
|
28
26
|
|
29
27
|
it 'should have an empty start/start production' do
|
@@ -33,7 +31,6 @@ describe DynamicGrammar do
|
|
33
31
|
end
|
34
32
|
end # context
|
35
33
|
|
36
|
-
|
37
34
|
context 'Adding productions to the grammar:' do
|
38
35
|
it 'should add a simple production' do
|
39
36
|
subject.add_production(p_a)
|
@@ -60,7 +57,6 @@ describe DynamicGrammar do
|
|
60
57
|
end
|
61
58
|
end # context
|
62
59
|
|
63
|
-
|
64
60
|
context 'Removing a production from the grammar:' do
|
65
61
|
it 'should remove an existing production' do
|
66
62
|
subject.add_production(p_a) # index = 1
|
@@ -91,7 +87,7 @@ describe DynamicGrammar do
|
|
91
87
|
context 'Visiting:' do
|
92
88
|
it 'should return a visitor' do
|
93
89
|
expect { subject.visitor }.not_to raise_error
|
94
|
-
expect(subject.visitor).to be_kind_of(GrammarVisitor)
|
90
|
+
expect(subject.visitor).to be_kind_of(Sequitur::GrammarVisitor)
|
95
91
|
end
|
96
92
|
|
97
93
|
it 'should accept a visitor' do
|
@@ -133,7 +129,6 @@ describe DynamicGrammar do
|
|
133
129
|
end
|
134
130
|
end # context
|
135
131
|
|
136
|
-
|
137
132
|
context 'Generating a text representation of itself:' do
|
138
133
|
it 'should generate a text representation when empty' do
|
139
134
|
expectation = "#{subject.start.object_id} : ."
|
@@ -141,6 +136,5 @@ describe DynamicGrammar do
|
|
141
136
|
end
|
142
137
|
end # context
|
143
138
|
end # describe
|
144
|
-
end # module
|
145
139
|
|
146
140
|
# End of file
|