sequitur 0.1.18 → 0.1.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +163 -49
- data/.travis.yml +13 -10
- data/CHANGELOG.md +9 -0
- data/Gemfile +2 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +13 -10
- data/examples/integer_sample.rb +5 -6
- data/examples/porridge.rb +4 -6
- data/examples/simple_case.rb +5 -6
- data/examples/symbol_sample.rb +5 -8
- data/examples/word_sample.rb +1 -2
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +38 -38
- data/lib/sequitur/dynamic_grammar.rb +91 -95
- data/lib/sequitur/formatter/base_text.rb +1 -1
- data/lib/sequitur/formatter/debug.rb +2 -2
- data/lib/sequitur/grammar_visitor.rb +96 -98
- data/lib/sequitur/production.rb +10 -19
- data/lib/sequitur/production_ref.rb +104 -105
- data/lib/sequitur/sequitur_grammar.rb +3 -3
- data/lib/sequitur/symbol_sequence.rb +7 -11
- data/spec/sequitur/digram_spec.rb +8 -8
- data/spec/sequitur/production_spec.rb +7 -7
- data/spec/sequitur/sequitur_grammar_spec.rb +10 -10
- data/spec/sequitur/symbol_sequence_spec.rb +4 -4
- data/spec/spec_helper.rb +6 -4
- metadata +44 -29
data/examples/symbol_sample.rb
CHANGED
@@ -1,19 +1,17 @@
|
|
1
|
-
require 'sequitur'
|
1
|
+
require 'sequitur' # Load the Sequitur library
|
2
2
|
|
3
3
|
#
|
4
4
|
# Purpose: show how to apply Sequitur on a stream of Symbol values
|
5
5
|
#
|
6
|
-
input_sequence =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
:dd, :ee
|
6
|
+
input_sequence = %i[
|
7
|
+
aa bb aa bb cc
|
8
|
+
aa bb cc dd aa
|
9
|
+
bb cc dd ee
|
11
10
|
]
|
12
11
|
|
13
12
|
# Generate the grammar from the sequence
|
14
13
|
grammar = Sequitur.build_from(input_sequence)
|
15
14
|
|
16
|
-
|
17
15
|
# Use a formatter to display the grammar rules on the console output
|
18
16
|
formatter = Sequitur::Formatter::BaseText.new(STDOUT)
|
19
17
|
|
@@ -25,4 +23,3 @@ formatter.render(grammar.visitor)
|
|
25
23
|
# P1 : aa bb.
|
26
24
|
# P2 : P1 cc.
|
27
25
|
# P3 : P2 dd.
|
28
|
-
|
data/examples/word_sample.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'sequitur'
|
1
|
+
require 'sequitur' # Load the Sequitur library
|
2
2
|
|
3
3
|
#
|
4
4
|
# Purpose: show how to apply Sequitur on a stream of text words
|
@@ -27,4 +27,3 @@ formatter.render(grammar.visitor)
|
|
27
27
|
# start : P2 6 Error illegal P1 20 P2 9.
|
28
28
|
# P1 : character at position.
|
29
29
|
# P2 : Error unknown P1.
|
30
|
-
|
data/lib/sequitur/constants.rb
CHANGED
data/lib/sequitur/digram.rb
CHANGED
@@ -1,49 +1,49 @@
|
|
1
1
|
# File: digram.rb
|
2
2
|
|
3
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
-
# In linguistics, a digram is a sequence of two letters.
|
5
|
-
# In Sequitur, a digram is a sequence of two consecutive symbols that
|
6
|
-
# appear in a production rule. Each symbol in a digram
|
7
|
-
# can be a terminal or not.
|
8
|
-
class Digram
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
# In linguistics, a digram is a sequence of two letters.
|
5
|
+
# In Sequitur, a digram is a sequence of two consecutive symbols that
|
6
|
+
# appear in a production rule. Each symbol in a digram
|
7
|
+
# can be a terminal or not.
|
8
|
+
class Digram
|
9
|
+
# The sequence of two consecutive grammar symbols.
|
10
|
+
# The two symbols should respond to the :hash message.
|
11
|
+
attr_reader(:symbols)
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
# An unique hash key of the digram
|
14
|
+
attr_reader(:key)
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
# The production in which the digram occurs
|
17
|
+
attr_reader(:production)
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
19
|
+
# Constructor.
|
20
|
+
# A digram represents a sequence of two symbols
|
21
|
+
# (that appears in a rhs of a production).
|
22
|
+
# Terminal symbols must respond to the :hash message.
|
23
|
+
# @param symbol1 [StringOrSymbol] First element of the digram
|
24
|
+
# @param symbol2 [StringOrSymbol] Second element of the digram
|
25
|
+
# @param aProduction [Production] Production in which the RHS
|
26
|
+
# the sequence symbol1 symbol2 appears.
|
27
|
+
def initialize(symbol1, symbol2, aProduction)
|
28
|
+
@symbols = [symbol1, symbol2]
|
29
|
+
@key = symbol1.hash.to_s(16) + ':' + symbol2.hash.to_s(16)
|
30
|
+
@production = aProduction
|
31
|
+
end
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
# Equality testing.
|
34
|
+
# true iff keys of both digrams are equal, false otherwise
|
35
|
+
# @param other [Digram] another to compare with
|
36
|
+
# @return [true/false]
|
37
|
+
def ==(other)
|
38
|
+
return key == other.key
|
39
|
+
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end # class
|
41
|
+
# Does the digram consists of twice the same symbols?
|
42
|
+
# @return [true/false] true when symbols.first == symbols.last
|
43
|
+
def repeating?
|
44
|
+
return symbols[0] == symbols[1]
|
45
|
+
end
|
46
|
+
end # class
|
47
47
|
end # module
|
48
48
|
|
49
49
|
# End of file
|
@@ -2,100 +2,96 @@ require_relative 'production'
|
|
2
2
|
require_relative 'grammar_visitor'
|
3
3
|
|
4
4
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
5
|
-
# A dynamic grammar is a context-free grammar that can be built incrementally.
|
6
|
-
# Formally, a grammar has:
|
7
|
-
# One start production
|
8
|
-
# Zero or more other productions
|
9
|
-
# Each production has a rhs that is a sequence of grammar symbols.
|
10
|
-
# Grammar symbols are categorized into
|
11
|
-
# -terminal symbols (i.e. String, Ruby Symbol,...)
|
12
|
-
# -non-terminal symbols (i.e. ProductionRef)
|
13
|
-
class DynamicGrammar
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
#
|
96
|
-
def append_symbol_to(aProduction, aSymbol)
|
97
|
-
aProduction.append_symbol(aSymbol)
|
98
|
-
end
|
99
|
-
end # class
|
5
|
+
# A dynamic grammar is a context-free grammar that can be built incrementally.
|
6
|
+
# Formally, a grammar has:
|
7
|
+
# One start production
|
8
|
+
# Zero or more other productions
|
9
|
+
# Each production has a rhs that is a sequence of grammar symbols.
|
10
|
+
# Grammar symbols are categorized into
|
11
|
+
# -terminal symbols (i.e. String, Ruby Symbol,...)
|
12
|
+
# -non-terminal symbols (i.e. ProductionRef)
|
13
|
+
class DynamicGrammar
|
14
|
+
# Link to the start production.
|
15
|
+
attr_reader(:start)
|
16
|
+
|
17
|
+
# The set of production rules of the grammar
|
18
|
+
attr_reader(:productions)
|
19
|
+
|
20
|
+
# nodoc Trace the execution of the algorithm.
|
21
|
+
attr(:trace)
|
22
|
+
|
23
|
+
# Constructor.
|
24
|
+
# Build a grammar with one empty rule as start/start rule.
|
25
|
+
def initialize
|
26
|
+
@start = Production.new
|
27
|
+
@productions = [start]
|
28
|
+
@trace = false
|
29
|
+
end
|
30
|
+
|
31
|
+
# Emit a text representation of the grammar.
|
32
|
+
# Each production rule is emitted per line.
|
33
|
+
# @return [String]
|
34
|
+
def to_string
|
35
|
+
rule_text = productions.map(&:to_string).join("\n")
|
36
|
+
return rule_text
|
37
|
+
end
|
38
|
+
|
39
|
+
# Add a given production to the grammar.
|
40
|
+
# @param aProduction [Production]
|
41
|
+
def add_production(aProduction)
|
42
|
+
# TODO: remove output
|
43
|
+
puts "Adding #{aProduction.object_id}" if trace
|
44
|
+
puts aProduction.to_string if trace
|
45
|
+
productions << aProduction
|
46
|
+
end
|
47
|
+
|
48
|
+
# Remove a production with given index from the grammar
|
49
|
+
# @param anIndex [Fixnum]
|
50
|
+
# @return [Production] the production removed from the grammar.
|
51
|
+
def remove_production(anIndex)
|
52
|
+
puts "Before production removal #{productions[anIndex].object_id}" if trace
|
53
|
+
puts to_string if trace
|
54
|
+
prod = productions.delete_at(anIndex)
|
55
|
+
# TODO: remove output
|
56
|
+
puts('Removed: ' + prod.to_string) if trace
|
57
|
+
prod.clear_rhs
|
58
|
+
|
59
|
+
return prod
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add the given token to the grammar.
|
63
|
+
# Append the token to the rhs of the start/start rule.
|
64
|
+
# @param aToken [Object] input token to add
|
65
|
+
def add_token(aToken)
|
66
|
+
append_symbol_to(start, aToken)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
70
|
+
# A visitee is expected to accept the visit from a visitor object
|
71
|
+
# @param aVisitor [GrammarVisitor] the visitor object
|
72
|
+
def accept(aVisitor)
|
73
|
+
aVisitor.start_visit_grammar(self)
|
74
|
+
|
75
|
+
# Let's proceed with the visit of productions
|
76
|
+
productions.each { |prod| prod.accept(aVisitor) }
|
77
|
+
|
78
|
+
aVisitor.end_visit_grammar(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Factory method. Returns a visitor for this grammar.
|
82
|
+
# @return [GrammarVisitor]
|
83
|
+
def visitor
|
84
|
+
return GrammarVisitor.new(self)
|
85
|
+
end
|
86
|
+
|
87
|
+
protected
|
88
|
+
|
89
|
+
# Append a given symbol to the rhs of passed production.
|
90
|
+
# @param aProduction [Production]
|
91
|
+
# @param aSymbol [Object]
|
92
|
+
def append_symbol_to(aProduction, aSymbol)
|
93
|
+
aProduction.append_symbol(aSymbol)
|
94
|
+
end
|
95
|
+
end # class
|
100
96
|
end # module
|
101
97
|
# End of file
|
@@ -1,104 +1,102 @@
|
|
1
1
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
2
2
|
# A visitor class dedicated in the visit of Grammar.
|
3
|
-
class GrammarVisitor
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
# Add a subscriber for the visit event notification.
|
18
|
-
# @param aSubscriber [Object]
|
19
|
-
def subscribe(aSubscriber)
|
20
|
-
subscribers << aSubscriber
|
21
|
-
end
|
22
|
-
|
23
|
-
# Remove the given object from the subscription list.
|
24
|
-
# The object won't be notified of visit events.
|
25
|
-
# @param aSubscriber [Object]
|
26
|
-
def unsubscribe(aSubscriber)
|
27
|
-
subscribers.delete_if { |entry| entry == aSubscriber }
|
28
|
-
end
|
29
|
-
|
30
|
-
# The signal to start the visit.
|
31
|
-
def start()
|
32
|
-
grammar.accept(self)
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
# Visit event. The visitor is about to visit the grammar.
|
37
|
-
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
38
|
-
def start_visit_grammar(aGrammar)
|
39
|
-
broadcast(:before_grammar, aGrammar)
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
# Visit event. The visitor is about to visit the given production.
|
44
|
-
# @param aProduction [Production] the production to visit.
|
45
|
-
def start_visit_production(aProduction)
|
46
|
-
broadcast(:before_production, aProduction)
|
47
|
-
end
|
48
|
-
|
49
|
-
# Visit event. The visitor is about to visit the given rhs of production.
|
50
|
-
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
51
|
-
def start_visit_rhs(rhs)
|
52
|
-
broadcast(:before_rhs, rhs)
|
53
|
-
end
|
54
|
-
|
55
|
-
# Visit event. The visitor is visiting the
|
56
|
-
# given reference production (= non-terminal symbol).
|
57
|
-
# @param aProdRef [ProductionRef] the production reference to visit.
|
58
|
-
def visit_prod_ref(aProdRef)
|
59
|
-
production = aProdRef.production
|
60
|
-
broadcast(:before_non_terminal, production)
|
61
|
-
broadcast(:after_non_terminal, production)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Visit event. The visitor is visiting the
|
65
|
-
# given terminal symbol.
|
66
|
-
# @param aTerminal [Object] the terminal to visit.
|
67
|
-
def visit_terminal(aTerminal)
|
68
|
-
broadcast(:before_terminal, aTerminal)
|
69
|
-
broadcast(:after_terminal, aTerminal)
|
70
|
-
end
|
71
|
-
|
72
|
-
# Visit event. The visitor has completed its visit of the given rhs.
|
73
|
-
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
74
|
-
def end_visit_rhs(rhs)
|
75
|
-
broadcast(:after_rhs, rhs)
|
76
|
-
end
|
77
|
-
|
78
|
-
# Visit event. The visitor has completed its visit of the given production.
|
79
|
-
# @param aProduction [Production] the production to visit.
|
80
|
-
def end_visit_production(aProduction)
|
81
|
-
broadcast(:after_production, aProduction)
|
82
|
-
end
|
83
|
-
|
84
|
-
# Visit event. The visitor has completed the visit of the grammar.
|
85
|
-
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
86
|
-
def end_visit_grammar(aGrammar)
|
87
|
-
broadcast(:after_grammar, aGrammar)
|
88
|
-
end
|
89
|
-
|
90
|
-
private
|
91
|
-
|
92
|
-
# Send a notification to all subscribers.
|
93
|
-
# @param msg [Symbol] event to notify
|
94
|
-
# @param args [Array] arguments of the notification.
|
95
|
-
def broadcast(msg, *args)
|
96
|
-
subscribers.each do |a_subscriber|
|
97
|
-
next unless a_subscriber.respond_to?(msg)
|
98
|
-
a_subscriber.send(msg, *args)
|
3
|
+
class GrammarVisitor
|
4
|
+
# Link to the grammar to visit
|
5
|
+
attr_reader(:grammar)
|
6
|
+
|
7
|
+
# List of objects that subscribed to the visit event notification.
|
8
|
+
attr_reader(:subscribers)
|
9
|
+
|
10
|
+
# Build a visitor for the given grammar.
|
11
|
+
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
12
|
+
def initialize(aGrammar)
|
13
|
+
@grammar = aGrammar
|
14
|
+
@subscribers = []
|
99
15
|
end
|
100
|
-
|
101
|
-
|
16
|
+
|
17
|
+
# Add a subscriber for the visit event notification.
|
18
|
+
# @param aSubscriber [Object]
|
19
|
+
def subscribe(aSubscriber)
|
20
|
+
subscribers << aSubscriber
|
21
|
+
end
|
22
|
+
|
23
|
+
# Remove the given object from the subscription list.
|
24
|
+
# The object won't be notified of visit events.
|
25
|
+
# @param aSubscriber [Object]
|
26
|
+
def unsubscribe(aSubscriber)
|
27
|
+
subscribers.delete_if { |entry| entry == aSubscriber }
|
28
|
+
end
|
29
|
+
|
30
|
+
# The signal to start the visit.
|
31
|
+
def start
|
32
|
+
grammar.accept(self)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Visit event. The visitor is about to visit the grammar.
|
36
|
+
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
37
|
+
def start_visit_grammar(aGrammar)
|
38
|
+
broadcast(:before_grammar, aGrammar)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Visit event. The visitor is about to visit the given production.
|
42
|
+
# @param aProduction [Production] the production to visit.
|
43
|
+
def start_visit_production(aProduction)
|
44
|
+
broadcast(:before_production, aProduction)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Visit event. The visitor is about to visit the given rhs of production.
|
48
|
+
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
49
|
+
def start_visit_rhs(rhs)
|
50
|
+
broadcast(:before_rhs, rhs)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Visit event. The visitor is visiting the
|
54
|
+
# given reference production (= non-terminal symbol).
|
55
|
+
# @param aProdRef [ProductionRef] the production reference to visit.
|
56
|
+
def visit_prod_ref(aProdRef)
|
57
|
+
production = aProdRef.production
|
58
|
+
broadcast(:before_non_terminal, production)
|
59
|
+
broadcast(:after_non_terminal, production)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Visit event. The visitor is visiting the
|
63
|
+
# given terminal symbol.
|
64
|
+
# @param aTerminal [Object] the terminal to visit.
|
65
|
+
def visit_terminal(aTerminal)
|
66
|
+
broadcast(:before_terminal, aTerminal)
|
67
|
+
broadcast(:after_terminal, aTerminal)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Visit event. The visitor has completed its visit of the given rhs.
|
71
|
+
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
72
|
+
def end_visit_rhs(rhs)
|
73
|
+
broadcast(:after_rhs, rhs)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Visit event. The visitor has completed its visit of the given production.
|
77
|
+
# @param aProduction [Production] the production to visit.
|
78
|
+
def end_visit_production(aProduction)
|
79
|
+
broadcast(:after_production, aProduction)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Visit event. The visitor has completed the visit of the grammar.
|
83
|
+
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
84
|
+
def end_visit_grammar(aGrammar)
|
85
|
+
broadcast(:after_grammar, aGrammar)
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# Send a notification to all subscribers.
|
91
|
+
# @param msg [Symbol] event to notify
|
92
|
+
# @param args [Array] arguments of the notification.
|
93
|
+
def broadcast(msg, *args)
|
94
|
+
subscribers.each do |a_subscriber|
|
95
|
+
next unless a_subscriber.respond_to?(msg)
|
96
|
+
a_subscriber.send(msg, *args)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end # class
|
102
100
|
end # module
|
103
101
|
|
104
102
|
# End of file
|