sequitur 0.1.23 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -437
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +1 -1
- data/Rakefile +0 -2
- data/examples/integer_sample.rb +0 -1
- data/examples/porridge.rb +9 -9
- data/examples/word_sample.rb +4 -5
- data/lib/sequitur/constants.rb +4 -1
- data/lib/sequitur/digram.rb +2 -2
- data/lib/sequitur/dynamic_grammar.rb +3 -4
- data/lib/sequitur/formatter/base_formatter.rb +1 -1
- data/lib/sequitur/formatter/base_text.rb +3 -7
- data/lib/sequitur/formatter/debug.rb +0 -1
- data/lib/sequitur/grammar_visitor.rb +1 -1
- data/lib/sequitur/production.rb +200 -205
- data/lib/sequitur/production_ref.rb +9 -12
- data/lib/sequitur/sequitur_grammar.rb +135 -137
- data/lib/sequitur/symbol_sequence.rb +24 -27
- data/lib/sequitur.rb +4 -5
- data/spec/sequitur/digram_spec.rb +13 -12
- data/spec/sequitur/dynamic_grammar_spec.rb +5 -11
- data/spec/sequitur/formatter/base_text_spec.rb +70 -72
- data/spec/sequitur/formatter/debug_spec.rb +90 -92
- data/spec/sequitur/grammar_visitor_spec.rb +70 -71
- data/spec/sequitur/production_ref_spec.rb +92 -92
- data/spec/sequitur/production_spec.rb +30 -34
- data/spec/sequitur/sequitur_grammar_spec.rb +47 -46
- data/spec/sequitur/symbol_sequence_spec.rb +102 -105
- data/spec/spec_helper.rb +0 -1
- metadata +4 -5
- data/.travis.yml +0 -29
@@ -2,155 +2,153 @@
|
|
2
2
|
|
3
3
|
require_relative 'dynamic_grammar'
|
4
4
|
|
5
|
-
|
6
5
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
7
|
-
# Specialization of the DynamicGrammar class.
|
8
|
-
# A Sequitur grammar is a context-free grammar that is entirely built
|
9
|
-
# from a sequence of input tokens through the Sequitur algorithm.
|
10
|
-
class SequiturGrammar < DynamicGrammar
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
6
|
+
# Specialization of the DynamicGrammar class.
|
7
|
+
# A Sequitur grammar is a context-free grammar that is entirely built
|
8
|
+
# from a sequence of input tokens through the Sequitur algorithm.
|
9
|
+
class SequiturGrammar < DynamicGrammar
|
10
|
+
# Build the grammar from an enumerator of tokens.
|
11
|
+
# @param anEnum [Enumerator] an enumerator that will iterate
|
12
|
+
# over the input tokens.
|
13
|
+
def initialize(anEnum)
|
14
|
+
super()
|
15
|
+
# Make start production compliant with utility rule
|
16
|
+
2.times { start.incr_refcount }
|
17
|
+
|
18
|
+
# Read the input sequence and apply the Sequitur algorithm
|
19
|
+
anEnum.each do |a_token|
|
20
|
+
add_token(a_token)
|
21
|
+
enforce_rules
|
22
|
+
end
|
23
23
|
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
break unless unicity_diagnosis.collision_found || !prod_index.nil?
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Struct used for internal purposes
|
28
|
+
CollisionDiagnosis = Struct.new(
|
29
|
+
:collision_found, # true if collision detected
|
30
|
+
:digram, # The digram involved in a collision
|
31
|
+
:productions # The productions where the digram occurs
|
32
|
+
)
|
33
|
+
|
34
|
+
# Assuming that a new input token was added to the start production,
|
35
|
+
# enforce the digram unicity and rule utility rules
|
36
|
+
# begin
|
37
|
+
# if a digram D occurs twice in the grammar then
|
38
|
+
# add a production P : D (if not already there)
|
39
|
+
# replace both Ds with R (reduction step).
|
40
|
+
# end
|
41
|
+
# if a production P : RHS in referenced only once then
|
42
|
+
# replace P by its RHS (derivation step)
|
43
|
+
# remove P from grammar
|
44
|
+
# end
|
45
|
+
# end until digram unicity and rule utility are met
|
46
|
+
def enforce_rules
|
47
|
+
loop do
|
48
|
+
unicity_diagnosis = detect_collision if unicity_diagnosis.nil?
|
49
|
+
restore_unicity(unicity_diagnosis) if unicity_diagnosis.collision_found
|
50
|
+
|
51
|
+
prod_index = detect_useless_production
|
52
|
+
restore_utility(prod_index) unless prod_index.nil?
|
53
|
+
|
54
|
+
unicity_diagnosis = detect_collision
|
55
|
+
prod_index = detect_useless_production
|
56
|
+
break unless unicity_diagnosis.collision_found || !prod_index.nil?
|
57
|
+
end
|
59
58
|
end
|
60
|
-
end
|
61
|
-
|
62
|
-
# Check whether a digram is used twice in the grammar.
|
63
|
-
# Return an empty Hash if each digram appears once.
|
64
|
-
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
65
|
-
# Where Pi, Pk are two productions where the digram occurs.
|
66
|
-
def detect_collision
|
67
|
-
diagnosis = CollisionDiagnosis.new(false)
|
68
|
-
found_so_far = {}
|
69
|
-
productions.each do |a_prod|
|
70
|
-
prod_digrams = a_prod.digrams
|
71
|
-
prod_digrams.each do |a_digr|
|
72
|
-
its_key = a_digr.key
|
73
|
-
if found_so_far.include? its_key
|
74
|
-
orig_digr = found_so_far[its_key]
|
75
|
-
# Disregard sequence like a a a
|
76
|
-
if (orig_digr.production == a_prod) && a_digr.repeating? &&
|
77
|
-
(orig_digr == a_digr)
|
78
|
-
next
|
79
|
-
end
|
80
59
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
60
|
+
# Check whether a digram is used twice in the grammar.
|
61
|
+
# Return an empty Hash if each digram appears once.
|
62
|
+
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
63
|
+
# Where Pi, Pk are two productions where the digram occurs.
|
64
|
+
def detect_collision
|
65
|
+
diagnosis = CollisionDiagnosis.new(false)
|
66
|
+
found_so_far = {}
|
67
|
+
productions.each do |a_prod|
|
68
|
+
prod_digrams = a_prod.digrams
|
69
|
+
prod_digrams.each do |a_digr|
|
70
|
+
its_key = a_digr.key
|
71
|
+
if found_so_far.include? its_key
|
72
|
+
orig_digr = found_so_far[its_key]
|
73
|
+
# Disregard sequence like a a a
|
74
|
+
if (orig_digr.production == a_prod) && a_digr.repeating? &&
|
75
|
+
(orig_digr == a_digr)
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
diagnosis.digram = orig_digr
|
80
|
+
diagnosis.productions = [orig_digr.production, a_prod]
|
81
|
+
diagnosis.collision_found = true
|
82
|
+
break
|
83
|
+
else
|
84
|
+
found_so_far[its_key] = a_digr
|
85
|
+
end
|
87
86
|
end
|
87
|
+
break if diagnosis.collision_found
|
88
88
|
end
|
89
|
-
|
89
|
+
|
90
|
+
diagnosis
|
90
91
|
end
|
91
92
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
prods[0].reduce_step(new_prod)
|
109
|
-
prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
|
93
|
+
# When a collision diagnosis indicates that a given
|
94
|
+
# digram d occurs twice in the grammar
|
95
|
+
# Then create a new production that will have
|
96
|
+
# the symbols of d as its rhs members.
|
97
|
+
def restore_unicity(aDiagnosis)
|
98
|
+
prods = aDiagnosis.productions
|
99
|
+
if prods.any?(&:single_digram?)
|
100
|
+
(simple, compound) = prods.partition(&:single_digram?)
|
101
|
+
compound[0].reduce_step(simple[0])
|
102
|
+
else
|
103
|
+
# Create a new production with the digram's symbols as its
|
104
|
+
# sole rhs members.
|
105
|
+
new_prod = build_production_for(aDiagnosis.digram)
|
106
|
+
prods[0].reduce_step(new_prod)
|
107
|
+
prods[1].reduce_step(new_prod) unless prods[1] == prods[0]
|
108
|
+
end
|
110
109
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
return useless
|
119
|
-
end
|
120
|
-
|
121
|
-
# Given the passed production P is referenced only once.
|
122
|
-
# Then replace P by its RHS where it is referenced.
|
123
|
-
# And delete P
|
124
|
-
def restore_utility(prod_index)
|
125
|
-
# Retrieve useless prod from its index
|
126
|
-
useless_prod = productions[prod_index]
|
127
|
-
|
128
|
-
# Retrieve production referencing useless one
|
129
|
-
referencing = nil
|
130
|
-
productions.reverse_each do |a_prod|
|
131
|
-
# Next line assumes non-recursive productions
|
132
|
-
next if a_prod == useless_prod
|
133
|
-
|
134
|
-
refs = a_prod.references_of(useless_prod)
|
135
|
-
next if refs.empty?
|
136
|
-
|
137
|
-
referencing = a_prod
|
138
|
-
break
|
110
|
+
|
111
|
+
# Return a production that is used less than twice in the grammar.
|
112
|
+
def detect_useless_production
|
113
|
+
useless = productions.index { |prod| prod.refcount < 2 }
|
114
|
+
useless = nil if useless&.zero?
|
115
|
+
|
116
|
+
useless
|
139
117
|
end
|
140
118
|
|
141
|
-
|
142
|
-
|
143
|
-
|
119
|
+
# Given the passed production P is referenced only once.
|
120
|
+
# Then replace P by its RHS where it is referenced.
|
121
|
+
# And delete P
|
122
|
+
def restore_utility(prod_index)
|
123
|
+
# Retrieve useless prod from its index
|
124
|
+
useless_prod = productions[prod_index]
|
125
|
+
|
126
|
+
# Retrieve production referencing useless one
|
127
|
+
referencing = nil
|
128
|
+
productions.reverse_each do |a_prod|
|
129
|
+
# Next line assumes non-recursive productions
|
130
|
+
next if a_prod == useless_prod
|
131
|
+
|
132
|
+
refs = a_prod.references_of(useless_prod)
|
133
|
+
next if refs.empty?
|
134
|
+
|
135
|
+
referencing = a_prod
|
136
|
+
break
|
137
|
+
end
|
138
|
+
|
139
|
+
referencing.derive_step(useless_prod)
|
140
|
+
remove_production(prod_index)
|
141
|
+
end
|
144
142
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
143
|
+
# Create a new production that will have the symbols from digram
|
144
|
+
# as its rhs members.
|
145
|
+
def build_production_for(aDigram)
|
146
|
+
new_prod = Production.new
|
147
|
+
aDigram.symbols.each { |sym| new_prod.append_symbol(sym) }
|
148
|
+
add_production(new_prod)
|
151
149
|
|
152
|
-
|
153
|
-
|
154
|
-
end # class
|
150
|
+
new_prod
|
151
|
+
end
|
152
|
+
end # class
|
155
153
|
end # module
|
156
154
|
# End of file
|
@@ -31,15 +31,15 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
31
31
|
end
|
32
32
|
|
33
33
|
# Tell whether the sequence is empty.
|
34
|
-
# @
|
34
|
+
# @[true / false] true only if the sequence has no symbol in it.
|
35
35
|
def empty?
|
36
|
-
|
36
|
+
symbols.empty?
|
37
37
|
end
|
38
38
|
|
39
39
|
# Count the number of elements in the sequence.
|
40
|
-
# @
|
40
|
+
# @[Fixnum] the number of elements
|
41
41
|
def size
|
42
|
-
|
42
|
+
symbols.size
|
43
43
|
end
|
44
44
|
|
45
45
|
# Append a grammar symbol at the end of the sequence.
|
@@ -55,58 +55,55 @@ module Sequitur # Module for classes implementing the Sequitur algorithm
|
|
55
55
|
# Retrieve the element from the sequence at given position.
|
56
56
|
# @param anIndex [Fixnum] A zero-based index of the element to access.
|
57
57
|
def [](anIndex)
|
58
|
-
|
58
|
+
symbols[anIndex]
|
59
59
|
end
|
60
60
|
|
61
61
|
# Equality testing.
|
62
62
|
# @param other [SymbolSequence or Array] the other other sequence
|
63
63
|
# to compare to.
|
64
|
-
# @
|
64
|
+
# @true when an item from self equals the corresponding
|
65
65
|
# item from 'other'
|
66
66
|
def ==(other)
|
67
|
-
|
67
|
+
true if object_id == other.object_id
|
68
68
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
return same
|
69
|
+
case other
|
70
|
+
when SymbolSequence
|
71
|
+
symbols == other.symbols
|
72
|
+
when Array
|
73
|
+
symbols == other
|
74
|
+
else
|
75
|
+
false
|
76
|
+
end
|
79
77
|
end
|
80
78
|
|
81
79
|
# Select the references to production appearing in the rhs.
|
82
|
-
# @
|
80
|
+
# @[Array of ProductionRef]
|
83
81
|
def references
|
84
82
|
@memo_references ||= symbols.select { |symb| symb.is_a?(ProductionRef) }
|
85
|
-
|
83
|
+
@memo_references
|
86
84
|
end
|
87
85
|
|
88
86
|
# Select the references of the given production appearing in the rhs.
|
89
87
|
# @param aProduction [Production]
|
90
|
-
# @
|
88
|
+
# @[Array of ProductionRef]
|
91
89
|
def references_of(aProduction)
|
92
|
-
|
90
|
+
[] if references.empty?
|
93
91
|
|
94
|
-
|
95
|
-
return result
|
92
|
+
references.select { |a_ref| a_ref == aProduction }
|
96
93
|
end
|
97
94
|
|
98
95
|
# Emit a text representation of the symbol sequence.
|
99
96
|
# Text is of the form: space-separated sequence of symbols.
|
100
|
-
# @
|
97
|
+
# @[String]
|
101
98
|
def to_string
|
102
99
|
rhs_text = symbols.map do |elem|
|
103
100
|
case elem
|
104
|
-
|
105
|
-
|
101
|
+
when String then "'#{elem}'"
|
102
|
+
else elem.to_s
|
106
103
|
end
|
107
104
|
end
|
108
105
|
|
109
|
-
|
106
|
+
rhs_text.join(' ')
|
110
107
|
end
|
111
108
|
|
112
109
|
# Insert at position the elements from another sequence.
|
data/lib/sequitur.rb
CHANGED
@@ -9,7 +9,6 @@ require_relative './sequitur/sequitur_grammar'
|
|
9
9
|
require_relative './sequitur/formatter/debug'
|
10
10
|
require_relative './sequitur/formatter/base_text'
|
11
11
|
|
12
|
-
|
13
12
|
module Sequitur
|
14
13
|
# Build a Sequitur-generated grammar based on the sequence of input tokens.
|
15
14
|
#
|
@@ -19,12 +18,12 @@ module Sequitur
|
|
19
18
|
# @return [SequiturGrammar] a grammar that encodes the input.
|
20
19
|
def self.build_from(tokens)
|
21
20
|
input_sequence = case tokens
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
when String then tokens.chars
|
22
|
+
when Enumerator then tokens
|
23
|
+
else tokens.to_enum
|
25
24
|
end
|
26
25
|
|
27
|
-
|
26
|
+
SequiturGrammar.new(input_sequence)
|
28
27
|
end
|
29
28
|
end # module
|
30
29
|
|
@@ -5,38 +5,40 @@ require_relative '../spec_helper'
|
|
5
5
|
# Load the class under test
|
6
6
|
require_relative '../../lib/sequitur/digram'
|
7
7
|
|
8
|
-
|
9
|
-
describe Digram do
|
8
|
+
describe Sequitur::Digram do
|
10
9
|
let(:two_symbols) { %i[b c] }
|
11
10
|
let(:production) { double('sample-production') }
|
11
|
+
def make_digram(symb1, symb2, production)
|
12
|
+
Sequitur::Digram.new(symb1, symb2, production)
|
13
|
+
end
|
12
14
|
|
13
15
|
context 'Standard creation & initialization:' do
|
14
16
|
it 'should be created with 3 arguments' do
|
15
|
-
instance =
|
17
|
+
instance = make_digram(:b, :c, production)
|
16
18
|
|
17
19
|
expect(instance.symbols).to eq(two_symbols)
|
18
20
|
expect(instance.production).to eq(production)
|
19
21
|
end
|
20
22
|
|
21
23
|
it 'should return the production that it refers to' do
|
22
|
-
instance =
|
24
|
+
instance = make_digram(:b, :c, production)
|
23
25
|
expect(instance.production).to eq(production)
|
24
26
|
end
|
25
27
|
|
26
28
|
it 'should whether its symbols are the same' do
|
27
|
-
|
28
|
-
|
29
|
+
instance1 = make_digram(:a, :a, production)
|
30
|
+
expect(instance1).to be_repeating
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
+
instance1 = make_digram(:a, :b, production)
|
33
|
+
expect(instance1).not_to be_repeating
|
32
34
|
end
|
33
35
|
end # context
|
34
36
|
|
35
37
|
context 'Provided services:' do
|
36
38
|
it 'should compare itself to another digram' do
|
37
|
-
instance1 =
|
38
|
-
same =
|
39
|
-
different =
|
39
|
+
instance1 = make_digram(:a, :b, production)
|
40
|
+
same = make_digram(:a, :b, production)
|
41
|
+
different = make_digram(:b, :c, production)
|
40
42
|
|
41
43
|
expect(instance1).to eq(instance1)
|
42
44
|
expect(instance1).to eq(same)
|
@@ -45,6 +47,5 @@ describe Digram do
|
|
45
47
|
end
|
46
48
|
end # context
|
47
49
|
end # describe
|
48
|
-
end # module
|
49
50
|
|
50
51
|
# End of file
|
@@ -5,14 +5,13 @@ require_relative '../spec_helper'
|
|
5
5
|
# Load the class under test
|
6
6
|
require_relative '../../lib/sequitur/dynamic_grammar'
|
7
7
|
|
8
|
-
|
9
|
-
describe DynamicGrammar do
|
8
|
+
describe Sequitur::DynamicGrammar do
|
10
9
|
# Factory method. Build a production with the given sequence
|
11
10
|
# of symbols as its rhs.
|
12
11
|
def build_production(*symbols)
|
13
|
-
prod = Production.new
|
12
|
+
prod = Sequitur::Production.new
|
14
13
|
symbols.each { |symb| prod.append_symbol(symb) }
|
15
|
-
|
14
|
+
prod
|
16
15
|
end
|
17
16
|
|
18
17
|
let(:p_a) { build_production(:a) }
|
@@ -20,10 +19,9 @@ describe DynamicGrammar do
|
|
20
19
|
let(:p_c) { build_production(:c) }
|
21
20
|
let(:p_bc) { build_production(p_b, p_c) }
|
22
21
|
|
23
|
-
|
24
22
|
context 'Creation & initialization:' do
|
25
23
|
it 'should be created without parameter' do
|
26
|
-
expect { DynamicGrammar.new }.not_to raise_error
|
24
|
+
expect { Sequitur::DynamicGrammar.new }.not_to raise_error
|
27
25
|
end
|
28
26
|
|
29
27
|
it 'should have an empty start/start production' do
|
@@ -33,7 +31,6 @@ describe DynamicGrammar do
|
|
33
31
|
end
|
34
32
|
end # context
|
35
33
|
|
36
|
-
|
37
34
|
context 'Adding productions to the grammar:' do
|
38
35
|
it 'should add a simple production' do
|
39
36
|
subject.add_production(p_a)
|
@@ -60,7 +57,6 @@ describe DynamicGrammar do
|
|
60
57
|
end
|
61
58
|
end # context
|
62
59
|
|
63
|
-
|
64
60
|
context 'Removing a production from the grammar:' do
|
65
61
|
it 'should remove an existing production' do
|
66
62
|
subject.add_production(p_a) # index = 1
|
@@ -91,7 +87,7 @@ describe DynamicGrammar do
|
|
91
87
|
context 'Visiting:' do
|
92
88
|
it 'should return a visitor' do
|
93
89
|
expect { subject.visitor }.not_to raise_error
|
94
|
-
expect(subject.visitor).to be_kind_of(GrammarVisitor)
|
90
|
+
expect(subject.visitor).to be_kind_of(Sequitur::GrammarVisitor)
|
95
91
|
end
|
96
92
|
|
97
93
|
it 'should accept a visitor' do
|
@@ -133,7 +129,6 @@ describe DynamicGrammar do
|
|
133
129
|
end
|
134
130
|
end # context
|
135
131
|
|
136
|
-
|
137
132
|
context 'Generating a text representation of itself:' do
|
138
133
|
it 'should generate a text representation when empty' do
|
139
134
|
expectation = "#{subject.start.object_id} : ."
|
@@ -141,6 +136,5 @@ describe DynamicGrammar do
|
|
141
136
|
end
|
142
137
|
end # context
|
143
138
|
end # describe
|
144
|
-
end # module
|
145
139
|
|
146
140
|
# End of file
|