sequitur 0.1.18 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +163 -49
- data/.travis.yml +13 -10
- data/CHANGELOG.md +9 -0
- data/Gemfile +2 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +13 -10
- data/examples/integer_sample.rb +5 -6
- data/examples/porridge.rb +4 -6
- data/examples/simple_case.rb +5 -6
- data/examples/symbol_sample.rb +5 -8
- data/examples/word_sample.rb +1 -2
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +38 -38
- data/lib/sequitur/dynamic_grammar.rb +91 -95
- data/lib/sequitur/formatter/base_text.rb +1 -1
- data/lib/sequitur/formatter/debug.rb +2 -2
- data/lib/sequitur/grammar_visitor.rb +96 -98
- data/lib/sequitur/production.rb +10 -19
- data/lib/sequitur/production_ref.rb +104 -105
- data/lib/sequitur/sequitur_grammar.rb +3 -3
- data/lib/sequitur/symbol_sequence.rb +7 -11
- data/spec/sequitur/digram_spec.rb +8 -8
- data/spec/sequitur/production_spec.rb +7 -7
- data/spec/sequitur/sequitur_grammar_spec.rb +10 -10
- data/spec/sequitur/symbol_sequence_spec.rb +4 -4
- data/spec/spec_helper.rb +6 -4
- metadata +44 -29
data/examples/symbol_sample.rb
CHANGED
@@ -1,19 +1,17 @@
|
|
1
|
-
require 'sequitur'
|
1
|
+
require 'sequitur' # Load the Sequitur library
|
2
2
|
|
3
3
|
#
|
4
4
|
# Purpose: show how to apply Sequitur on a stream of Symbol values
|
5
5
|
#
|
6
|
-
input_sequence =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
:dd, :ee
|
6
|
+
input_sequence = %i[
|
7
|
+
aa bb aa bb cc
|
8
|
+
aa bb cc dd aa
|
9
|
+
bb cc dd ee
|
11
10
|
]
|
12
11
|
|
13
12
|
# Generate the grammar from the sequence
|
14
13
|
grammar = Sequitur.build_from(input_sequence)
|
15
14
|
|
16
|
-
|
17
15
|
# Use a formatter to display the grammar rules on the console output
|
18
16
|
formatter = Sequitur::Formatter::BaseText.new(STDOUT)
|
19
17
|
|
@@ -25,4 +23,3 @@ formatter.render(grammar.visitor)
|
|
25
23
|
# P1 : aa bb.
|
26
24
|
# P2 : P1 cc.
|
27
25
|
# P3 : P2 dd.
|
28
|
-
|
data/examples/word_sample.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'sequitur'
|
1
|
+
require 'sequitur' # Load the Sequitur library
|
2
2
|
|
3
3
|
#
|
4
4
|
# Purpose: show how to apply Sequitur on a stream of text words
|
@@ -27,4 +27,3 @@ formatter.render(grammar.visitor)
|
|
27
27
|
# start : P2 6 Error illegal P1 20 P2 9.
|
28
28
|
# P1 : character at position.
|
29
29
|
# P2 : Error unknown P1.
|
30
|
-
|
data/lib/sequitur/constants.rb
CHANGED
data/lib/sequitur/digram.rb
CHANGED
@@ -1,49 +1,49 @@
|
|
1
1
|
# File: digram.rb
|
2
2
|
|
3
3
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
4
|
-
# In linguistics, a digram is a sequence of two letters.
|
5
|
-
# In Sequitur, a digram is a sequence of two consecutive symbols that
|
6
|
-
# appear in a production rule. Each symbol in a digram
|
7
|
-
# can be a terminal or not.
|
8
|
-
class Digram
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
# In linguistics, a digram is a sequence of two letters.
|
5
|
+
# In Sequitur, a digram is a sequence of two consecutive symbols that
|
6
|
+
# appear in a production rule. Each symbol in a digram
|
7
|
+
# can be a terminal or not.
|
8
|
+
class Digram
|
9
|
+
# The sequence of two consecutive grammar symbols.
|
10
|
+
# The two symbols should respond to the :hash message.
|
11
|
+
attr_reader(:symbols)
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
# An unique hash key of the digram
|
14
|
+
attr_reader(:key)
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
# The production in which the digram occurs
|
17
|
+
attr_reader(:production)
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
19
|
+
# Constructor.
|
20
|
+
# A digram represents a sequence of two symbols
|
21
|
+
# (that appears in a rhs of a production).
|
22
|
+
# Terminal symbols must respond to the :hash message.
|
23
|
+
# @param symbol1 [StringOrSymbol] First element of the digram
|
24
|
+
# @param symbol2 [StringOrSymbol] Second element of the digram
|
25
|
+
# @param aProduction [Production] Production in which the RHS
|
26
|
+
# the sequence symbol1 symbol2 appears.
|
27
|
+
def initialize(symbol1, symbol2, aProduction)
|
28
|
+
@symbols = [symbol1, symbol2]
|
29
|
+
@key = symbol1.hash.to_s(16) + ':' + symbol2.hash.to_s(16)
|
30
|
+
@production = aProduction
|
31
|
+
end
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
# Equality testing.
|
34
|
+
# true iff keys of both digrams are equal, false otherwise
|
35
|
+
# @param other [Digram] another to compare with
|
36
|
+
# @return [true/false]
|
37
|
+
def ==(other)
|
38
|
+
return key == other.key
|
39
|
+
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end # class
|
41
|
+
# Does the digram consists of twice the same symbols?
|
42
|
+
# @return [true/false] true when symbols.first == symbols.last
|
43
|
+
def repeating?
|
44
|
+
return symbols[0] == symbols[1]
|
45
|
+
end
|
46
|
+
end # class
|
47
47
|
end # module
|
48
48
|
|
49
49
|
# End of file
|
@@ -2,100 +2,96 @@ require_relative 'production'
|
|
2
2
|
require_relative 'grammar_visitor'
|
3
3
|
|
4
4
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
5
|
-
# A dynamic grammar is a context-free grammar that can be built incrementally.
|
6
|
-
# Formally, a grammar has:
|
7
|
-
# One start production
|
8
|
-
# Zero or more other productions
|
9
|
-
# Each production has a rhs that is a sequence of grammar symbols.
|
10
|
-
# Grammar symbols are categorized into
|
11
|
-
# -terminal symbols (i.e. String, Ruby Symbol,...)
|
12
|
-
# -non-terminal symbols (i.e. ProductionRef)
|
13
|
-
class DynamicGrammar
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
#
|
96
|
-
def append_symbol_to(aProduction, aSymbol)
|
97
|
-
aProduction.append_symbol(aSymbol)
|
98
|
-
end
|
99
|
-
end # class
|
5
|
+
# A dynamic grammar is a context-free grammar that can be built incrementally.
|
6
|
+
# Formally, a grammar has:
|
7
|
+
# One start production
|
8
|
+
# Zero or more other productions
|
9
|
+
# Each production has a rhs that is a sequence of grammar symbols.
|
10
|
+
# Grammar symbols are categorized into
|
11
|
+
# -terminal symbols (i.e. String, Ruby Symbol,...)
|
12
|
+
# -non-terminal symbols (i.e. ProductionRef)
|
13
|
+
class DynamicGrammar
|
14
|
+
# Link to the start production.
|
15
|
+
attr_reader(:start)
|
16
|
+
|
17
|
+
# The set of production rules of the grammar
|
18
|
+
attr_reader(:productions)
|
19
|
+
|
20
|
+
# nodoc Trace the execution of the algorithm.
|
21
|
+
attr(:trace)
|
22
|
+
|
23
|
+
# Constructor.
|
24
|
+
# Build a grammar with one empty rule as start/start rule.
|
25
|
+
def initialize
|
26
|
+
@start = Production.new
|
27
|
+
@productions = [start]
|
28
|
+
@trace = false
|
29
|
+
end
|
30
|
+
|
31
|
+
# Emit a text representation of the grammar.
|
32
|
+
# Each production rule is emitted per line.
|
33
|
+
# @return [String]
|
34
|
+
def to_string
|
35
|
+
rule_text = productions.map(&:to_string).join("\n")
|
36
|
+
return rule_text
|
37
|
+
end
|
38
|
+
|
39
|
+
# Add a given production to the grammar.
|
40
|
+
# @param aProduction [Production]
|
41
|
+
def add_production(aProduction)
|
42
|
+
# TODO: remove output
|
43
|
+
puts "Adding #{aProduction.object_id}" if trace
|
44
|
+
puts aProduction.to_string if trace
|
45
|
+
productions << aProduction
|
46
|
+
end
|
47
|
+
|
48
|
+
# Remove a production with given index from the grammar
|
49
|
+
# @param anIndex [Fixnum]
|
50
|
+
# @return [Production] the production removed from the grammar.
|
51
|
+
def remove_production(anIndex)
|
52
|
+
puts "Before production removal #{productions[anIndex].object_id}" if trace
|
53
|
+
puts to_string if trace
|
54
|
+
prod = productions.delete_at(anIndex)
|
55
|
+
# TODO: remove output
|
56
|
+
puts('Removed: ' + prod.to_string) if trace
|
57
|
+
prod.clear_rhs
|
58
|
+
|
59
|
+
return prod
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add the given token to the grammar.
|
63
|
+
# Append the token to the rhs of the start/start rule.
|
64
|
+
# @param aToken [Object] input token to add
|
65
|
+
def add_token(aToken)
|
66
|
+
append_symbol_to(start, aToken)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
70
|
+
# A visitee is expected to accept the visit from a visitor object
|
71
|
+
# @param aVisitor [GrammarVisitor] the visitor object
|
72
|
+
def accept(aVisitor)
|
73
|
+
aVisitor.start_visit_grammar(self)
|
74
|
+
|
75
|
+
# Let's proceed with the visit of productions
|
76
|
+
productions.each { |prod| prod.accept(aVisitor) }
|
77
|
+
|
78
|
+
aVisitor.end_visit_grammar(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Factory method. Returns a visitor for this grammar.
|
82
|
+
# @return [GrammarVisitor]
|
83
|
+
def visitor
|
84
|
+
return GrammarVisitor.new(self)
|
85
|
+
end
|
86
|
+
|
87
|
+
protected
|
88
|
+
|
89
|
+
# Append a given symbol to the rhs of passed production.
|
90
|
+
# @param aProduction [Production]
|
91
|
+
# @param aSymbol [Object]
|
92
|
+
def append_symbol_to(aProduction, aSymbol)
|
93
|
+
aProduction.append_symbol(aSymbol)
|
94
|
+
end
|
95
|
+
end # class
|
100
96
|
end # module
|
101
97
|
# End of file
|
@@ -1,104 +1,102 @@
|
|
1
1
|
module Sequitur # Module for classes implementing the Sequitur algorithm
|
2
2
|
# A visitor class dedicated in the visit of Grammar.
|
3
|
-
class GrammarVisitor
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
# Add a subscriber for the visit event notification.
|
18
|
-
# @param aSubscriber [Object]
|
19
|
-
def subscribe(aSubscriber)
|
20
|
-
subscribers << aSubscriber
|
21
|
-
end
|
22
|
-
|
23
|
-
# Remove the given object from the subscription list.
|
24
|
-
# The object won't be notified of visit events.
|
25
|
-
# @param aSubscriber [Object]
|
26
|
-
def unsubscribe(aSubscriber)
|
27
|
-
subscribers.delete_if { |entry| entry == aSubscriber }
|
28
|
-
end
|
29
|
-
|
30
|
-
# The signal to start the visit.
|
31
|
-
def start()
|
32
|
-
grammar.accept(self)
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
# Visit event. The visitor is about to visit the grammar.
|
37
|
-
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
38
|
-
def start_visit_grammar(aGrammar)
|
39
|
-
broadcast(:before_grammar, aGrammar)
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
# Visit event. The visitor is about to visit the given production.
|
44
|
-
# @param aProduction [Production] the production to visit.
|
45
|
-
def start_visit_production(aProduction)
|
46
|
-
broadcast(:before_production, aProduction)
|
47
|
-
end
|
48
|
-
|
49
|
-
# Visit event. The visitor is about to visit the given rhs of production.
|
50
|
-
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
51
|
-
def start_visit_rhs(rhs)
|
52
|
-
broadcast(:before_rhs, rhs)
|
53
|
-
end
|
54
|
-
|
55
|
-
# Visit event. The visitor is visiting the
|
56
|
-
# given reference production (= non-terminal symbol).
|
57
|
-
# @param aProdRef [ProductionRef] the production reference to visit.
|
58
|
-
def visit_prod_ref(aProdRef)
|
59
|
-
production = aProdRef.production
|
60
|
-
broadcast(:before_non_terminal, production)
|
61
|
-
broadcast(:after_non_terminal, production)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Visit event. The visitor is visiting the
|
65
|
-
# given terminal symbol.
|
66
|
-
# @param aTerminal [Object] the terminal to visit.
|
67
|
-
def visit_terminal(aTerminal)
|
68
|
-
broadcast(:before_terminal, aTerminal)
|
69
|
-
broadcast(:after_terminal, aTerminal)
|
70
|
-
end
|
71
|
-
|
72
|
-
# Visit event. The visitor has completed its visit of the given rhs.
|
73
|
-
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
74
|
-
def end_visit_rhs(rhs)
|
75
|
-
broadcast(:after_rhs, rhs)
|
76
|
-
end
|
77
|
-
|
78
|
-
# Visit event. The visitor has completed its visit of the given production.
|
79
|
-
# @param aProduction [Production] the production to visit.
|
80
|
-
def end_visit_production(aProduction)
|
81
|
-
broadcast(:after_production, aProduction)
|
82
|
-
end
|
83
|
-
|
84
|
-
# Visit event. The visitor has completed the visit of the grammar.
|
85
|
-
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
86
|
-
def end_visit_grammar(aGrammar)
|
87
|
-
broadcast(:after_grammar, aGrammar)
|
88
|
-
end
|
89
|
-
|
90
|
-
private
|
91
|
-
|
92
|
-
# Send a notification to all subscribers.
|
93
|
-
# @param msg [Symbol] event to notify
|
94
|
-
# @param args [Array] arguments of the notification.
|
95
|
-
def broadcast(msg, *args)
|
96
|
-
subscribers.each do |a_subscriber|
|
97
|
-
next unless a_subscriber.respond_to?(msg)
|
98
|
-
a_subscriber.send(msg, *args)
|
3
|
+
class GrammarVisitor
|
4
|
+
# Link to the grammar to visit
|
5
|
+
attr_reader(:grammar)
|
6
|
+
|
7
|
+
# List of objects that subscribed to the visit event notification.
|
8
|
+
attr_reader(:subscribers)
|
9
|
+
|
10
|
+
# Build a visitor for the given grammar.
|
11
|
+
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
12
|
+
def initialize(aGrammar)
|
13
|
+
@grammar = aGrammar
|
14
|
+
@subscribers = []
|
99
15
|
end
|
100
|
-
|
101
|
-
|
16
|
+
|
17
|
+
# Add a subscriber for the visit event notification.
|
18
|
+
# @param aSubscriber [Object]
|
19
|
+
def subscribe(aSubscriber)
|
20
|
+
subscribers << aSubscriber
|
21
|
+
end
|
22
|
+
|
23
|
+
# Remove the given object from the subscription list.
|
24
|
+
# The object won't be notified of visit events.
|
25
|
+
# @param aSubscriber [Object]
|
26
|
+
def unsubscribe(aSubscriber)
|
27
|
+
subscribers.delete_if { |entry| entry == aSubscriber }
|
28
|
+
end
|
29
|
+
|
30
|
+
# The signal to start the visit.
|
31
|
+
def start
|
32
|
+
grammar.accept(self)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Visit event. The visitor is about to visit the grammar.
|
36
|
+
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
37
|
+
def start_visit_grammar(aGrammar)
|
38
|
+
broadcast(:before_grammar, aGrammar)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Visit event. The visitor is about to visit the given production.
|
42
|
+
# @param aProduction [Production] the production to visit.
|
43
|
+
def start_visit_production(aProduction)
|
44
|
+
broadcast(:before_production, aProduction)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Visit event. The visitor is about to visit the given rhs of production.
|
48
|
+
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
49
|
+
def start_visit_rhs(rhs)
|
50
|
+
broadcast(:before_rhs, rhs)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Visit event. The visitor is visiting the
|
54
|
+
# given reference production (= non-terminal symbol).
|
55
|
+
# @param aProdRef [ProductionRef] the production reference to visit.
|
56
|
+
def visit_prod_ref(aProdRef)
|
57
|
+
production = aProdRef.production
|
58
|
+
broadcast(:before_non_terminal, production)
|
59
|
+
broadcast(:after_non_terminal, production)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Visit event. The visitor is visiting the
|
63
|
+
# given terminal symbol.
|
64
|
+
# @param aTerminal [Object] the terminal to visit.
|
65
|
+
def visit_terminal(aTerminal)
|
66
|
+
broadcast(:before_terminal, aTerminal)
|
67
|
+
broadcast(:after_terminal, aTerminal)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Visit event. The visitor has completed its visit of the given rhs.
|
71
|
+
# @param rhs [SymbolSequence] the rhs of a production to visit.
|
72
|
+
def end_visit_rhs(rhs)
|
73
|
+
broadcast(:after_rhs, rhs)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Visit event. The visitor has completed its visit of the given production.
|
77
|
+
# @param aProduction [Production] the production to visit.
|
78
|
+
def end_visit_production(aProduction)
|
79
|
+
broadcast(:after_production, aProduction)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Visit event. The visitor has completed the visit of the grammar.
|
83
|
+
# @param aGrammar [DynamicGrammar-like] the grammar to visit.
|
84
|
+
def end_visit_grammar(aGrammar)
|
85
|
+
broadcast(:after_grammar, aGrammar)
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# Send a notification to all subscribers.
|
91
|
+
# @param msg [Symbol] event to notify
|
92
|
+
# @param args [Array] arguments of the notification.
|
93
|
+
def broadcast(msg, *args)
|
94
|
+
subscribers.each do |a_subscriber|
|
95
|
+
next unless a_subscriber.respond_to?(msg)
|
96
|
+
a_subscriber.send(msg, *args)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end # class
|
102
100
|
end # module
|
103
101
|
|
104
102
|
# End of file
|