rley 0.0.02
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.rspec +1 -0
- data/.rubocop.yml +74 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.simplecov +7 -0
- data/.travis.yml +21 -0
- data/.yardopts +6 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +19 -0
- data/README.md +19 -0
- data/Rakefile +32 -0
- data/lib/rley/constants.rb +26 -0
- data/lib/rley/parser/chart.rb +39 -0
- data/lib/rley/parser/dotted_item.rb +80 -0
- data/lib/rley/parser/earley_parser.rb +177 -0
- data/lib/rley/parser/parse_state.rb +54 -0
- data/lib/rley/parser/parsing.rb +101 -0
- data/lib/rley/parser/state_set.rb +47 -0
- data/lib/rley/parser/token.rb +21 -0
- data/lib/rley/syntax/grammar.rb +59 -0
- data/lib/rley/syntax/grm_symbol.rb +18 -0
- data/lib/rley/syntax/literal.rb +20 -0
- data/lib/rley/syntax/non_terminal.rb +18 -0
- data/lib/rley/syntax/production.rb +42 -0
- data/lib/rley/syntax/symbol_seq.rb +36 -0
- data/lib/rley/syntax/terminal.rb +18 -0
- data/lib/rley/syntax/verbatim_symbol.rb +21 -0
- data/spec/rley/parser/chart_spec.rb +47 -0
- data/spec/rley/parser/dotted_item_spec.rb +108 -0
- data/spec/rley/parser/earley_parser_spec.rb +271 -0
- data/spec/rley/parser/parse_state_spec.rb +99 -0
- data/spec/rley/parser/parsing_spec.rb +118 -0
- data/spec/rley/parser/state_set_spec.rb +68 -0
- data/spec/rley/parser/token_spec.rb +40 -0
- data/spec/rley/syntax/grammar_spec.rb +149 -0
- data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
- data/spec/rley/syntax/literal_spec.rb +32 -0
- data/spec/rley/syntax/non_terminal_spec.rb +29 -0
- data/spec/rley/syntax/production_spec.rb +50 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
- data/spec/rley/syntax/terminal_spec.rb +29 -0
- data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
- data/spec/spec_helper.rb +21 -0
- metadata +166 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
module Rley # This module is used as a namespace
|
2
|
+
module Parser # This module is used as a namespace
|
3
|
+
|
4
|
+
class ParseState
|
5
|
+
attr_reader(:dotted_rule)
|
6
|
+
|
7
|
+
# the position in the input that matches the beginning of the rhs
|
8
|
+
# of the production.
|
9
|
+
attr_reader(:origin)
|
10
|
+
|
11
|
+
def initialize(aDottedRule, theOrigin)
|
12
|
+
@dotted_rule = valid_dotted_rule(aDottedRule)
|
13
|
+
@origin = theOrigin
|
14
|
+
end
|
15
|
+
|
16
|
+
# Equality comparison. A parse state behaves as a value object.
|
17
|
+
def ==(other)
|
18
|
+
return true if self.object_id == other.object_id
|
19
|
+
|
20
|
+
if (dotted_rule == other.dotted_rule) && (origin == other.origin)
|
21
|
+
result = true
|
22
|
+
else
|
23
|
+
result = false
|
24
|
+
end
|
25
|
+
|
26
|
+
return result
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns true if the dot is at the end of the rhs of the production.
|
30
|
+
# In other words, the complete rhs matches the input.
|
31
|
+
def complete?()
|
32
|
+
return dotted_rule.reduce_item?
|
33
|
+
end
|
34
|
+
|
35
|
+
# Next expected symbol in the production
|
36
|
+
def next_symbol()
|
37
|
+
return dotted_rule.next_symbol
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# Return the validated dotted item(rule)
|
43
|
+
def valid_dotted_rule(aDottedRule)
|
44
|
+
fail StandardError, 'Dotted item cannot be nil' if aDottedRule.nil?
|
45
|
+
|
46
|
+
return aDottedRule
|
47
|
+
end
|
48
|
+
|
49
|
+
end # class
|
50
|
+
|
51
|
+
end # module
|
52
|
+
end # module
|
53
|
+
|
54
|
+
# End of file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require_relative 'chart'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
|
6
|
+
class Parsing
|
7
|
+
attr_reader(:chart)
|
8
|
+
|
9
|
+
# The sequence of input token to parse
|
10
|
+
attr_reader(:tokens)
|
11
|
+
|
12
|
+
def initialize(startDottedRule, theTokens)
|
13
|
+
@tokens = theTokens.dup
|
14
|
+
@chart = Chart.new(startDottedRule, tokens.size)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return true if the parse was successful (= input tokens
|
18
|
+
# followed the syntax specified by the grammar)
|
19
|
+
def success?()
|
20
|
+
# Success can be detected as follows:
|
21
|
+
# The last chart entry has a parse state
|
22
|
+
# that involves the start production and
|
23
|
+
# has a dot positioned at the end of its rhs.
|
24
|
+
|
25
|
+
start_dotted_rule = chart.start_dotted_rule
|
26
|
+
start_production = start_dotted_rule.production
|
27
|
+
last_chart_entry = chart.state_sets.last
|
28
|
+
candidate_states = last_chart_entry.states_for(start_production)
|
29
|
+
found = candidate_states.find(&:complete?)
|
30
|
+
|
31
|
+
return ! found.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# Push a parse state (dotted item + origin) to the
|
36
|
+
# chart entry with given index if it isn't yet in the chart entry.
|
37
|
+
def push_state(aDottedItem, anOrigin, aChartIndex)
|
38
|
+
fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
|
39
|
+
chart.push_state(aDottedItem, anOrigin, aChartIndex)
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# This method is called when a parse state for chart entry at position
|
44
|
+
# 'pos' expects a terminal as next symbol.
|
45
|
+
# If the input token matches the terminal symbol then:
|
46
|
+
# Retrieve all parse states for chart entry at 'aPosition'
|
47
|
+
# that have the given terminal as next symbol.
|
48
|
+
# For each s of the above states, push to chart entry aPosition + 1
|
49
|
+
# a new state like: <next dotted rule, s.origin, aPosition + 1>
|
50
|
+
# In other words, we place the dotted rules in the next state set
|
51
|
+
# such that the dot appears after terminal.
|
52
|
+
# @param Terminal [Terminal] a terminal symbol that
|
53
|
+
# immediately follows a dot
|
54
|
+
# @param aPosition [Fixnum] position in the input token sequence.
|
55
|
+
# @param nextMapping [Proc or Lambda] code to evaluate in order to
|
56
|
+
# determine the "next" dotted rule for a given one.
|
57
|
+
def scanning(aTerminal, aPosition, &nextMapping)
|
58
|
+
curr_token = tokens[aPosition]
|
59
|
+
|
60
|
+
if curr_token.terminal == aTerminal
|
61
|
+
states = states_expecting(aTerminal, aPosition)
|
62
|
+
states.each do |s|
|
63
|
+
next_item = nextMapping.call(s.dotted_rule)
|
64
|
+
push_state(next_item, s.origin, aPosition + 1)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
# This method is called when a parse state at chart entry reaches the end
|
72
|
+
# of a production.
|
73
|
+
# For every state in chart[aPosition] that is complete (i.e. of the form:
|
74
|
+
# { dotted_rule: X -> γ •, origin: j}),
|
75
|
+
# Find states s in chart[j] of the form {dotted_rule: Y -> α • X β, origin: i}
|
76
|
+
# In other words, rules that predicted the non-terminal X.
|
77
|
+
# For each s, add to chart[aPosition] a state of the form
|
78
|
+
# { dotted_rule: Y → α X • β, origin: i})
|
79
|
+
def completion(aState, aPosition, &nextMapping)
|
80
|
+
curr_origin = aState.origin
|
81
|
+
curr_lhs = aState.dotted_rule.lhs
|
82
|
+
states = states_expecting(curr_lhs, curr_origin)
|
83
|
+
states.each do |s|
|
84
|
+
next_item = nextMapping.call(s.dotted_rule)
|
85
|
+
push_state(next_item, s.origin, aPosition)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
# The list of ParseState from the chart entry at given position
|
91
|
+
# that expect the given terminal
|
92
|
+
def states_expecting(aTerminal, aPosition)
|
93
|
+
return chart[aPosition].states_expecting(aTerminal)
|
94
|
+
end
|
95
|
+
|
96
|
+
end # class
|
97
|
+
|
98
|
+
end # module
|
99
|
+
end # module
|
100
|
+
|
101
|
+
# End of file
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'forwardable' # Delegation
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
|
6
|
+
class StateSet
|
7
|
+
extend Forwardable
|
8
|
+
def_delegators :states, :empty?, :size, :first, :each
|
9
|
+
|
10
|
+
# The set of parse states
|
11
|
+
attr_reader(:states)
|
12
|
+
|
13
|
+
|
14
|
+
def initialize()
|
15
|
+
@states = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Append the given state (if it isn't yet in the set)
|
19
|
+
# to the list of states
|
20
|
+
# @param aState [ParseState] the state to push.
|
21
|
+
def push_state(aState)
|
22
|
+
@states << aState unless include?(aState)
|
23
|
+
end
|
24
|
+
|
25
|
+
# The list of ParseState that expect the given terminal
|
26
|
+
def states_expecting(aTerminal)
|
27
|
+
return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
|
28
|
+
end
|
29
|
+
|
30
|
+
# The list of ParseState that involve the given production
|
31
|
+
def states_for(aProduction)
|
32
|
+
return states.select { |s| s.dotted_rule.production == aProduction }
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def include?(aState)
|
38
|
+
# TODO: make it better than linear search
|
39
|
+
return states.include?(aState)
|
40
|
+
end
|
41
|
+
|
42
|
+
end # class
|
43
|
+
|
44
|
+
end # module
|
45
|
+
end # module
|
46
|
+
|
47
|
+
# End of file
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative '../syntax/grammar'
|
2
|
+
require_relative 'dotted_item'
|
3
|
+
|
4
|
+
module Rley # This module is used as a namespace
|
5
|
+
module Parser # This module is used as a namespace
|
6
|
+
|
7
|
+
class Token
|
8
|
+
attr_reader(:lexeme)
|
9
|
+
attr_reader(:terminal)
|
10
|
+
|
11
|
+
def initialize(theLexeme, aTerminal)
|
12
|
+
@lexeme = theLexeme
|
13
|
+
@terminal = aTerminal
|
14
|
+
end
|
15
|
+
|
16
|
+
end # class
|
17
|
+
|
18
|
+
end # module
|
19
|
+
end # module
|
20
|
+
|
21
|
+
# End of file
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Rley # This module is used as a namespace
|
2
|
+
module Syntax # This module is used as a namespace
|
3
|
+
|
4
|
+
# A grammar specifies the syntax of a language.
|
5
|
+
# Formally, a grammar has:
|
6
|
+
# One start symbol,
|
7
|
+
# One or more other production rules,
|
8
|
+
# Each production has a rhs that is a sequence of grammar symbols.
|
9
|
+
# Grammar symbols are categorized into
|
10
|
+
# -terminal symbols
|
11
|
+
# -non-terminal symbols
|
12
|
+
class Grammar
|
13
|
+
# A non-terminal symbol that represents all the possible strings
|
14
|
+
# in the language.
|
15
|
+
attr_reader(:start_symbol)
|
16
|
+
|
17
|
+
# The list of production rules for the language.
|
18
|
+
attr_reader(:rules)
|
19
|
+
|
20
|
+
# The list of grammar symbols in the language.
|
21
|
+
attr_reader(:symbols)
|
22
|
+
|
23
|
+
# @param theProduction [Array of Production]
|
24
|
+
def initialize(theProductions)
|
25
|
+
@rules = []
|
26
|
+
@symbols = []
|
27
|
+
valid_productions = validate_productions(theProductions)
|
28
|
+
# TODO: use topological sorting
|
29
|
+
@start_symbol = valid_productions[0].lhs
|
30
|
+
valid_productions.each { |prod| add_production(prod) }
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# Validation method. Return the validated list of productions
|
36
|
+
def validate_productions(theProductions)
|
37
|
+
msg = 'A grammar must have at least one production'
|
38
|
+
fail StandardError, msg if theProductions.nil? || theProductions.empty?
|
39
|
+
return theProductions
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_production(aProduction)
|
43
|
+
@rules << aProduction
|
44
|
+
the_lhs = aProduction.lhs
|
45
|
+
@symbols << the_lhs unless @symbols.include? the_lhs
|
46
|
+
|
47
|
+
# TODO: remove quadratic execution time
|
48
|
+
aProduction.rhs.members.each do |symb|
|
49
|
+
next if symbols.include? symb
|
50
|
+
@symbols << symb
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end # class
|
55
|
+
|
56
|
+
end # module
|
57
|
+
end # module
|
58
|
+
|
59
|
+
# End of file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Rley # This module is used as a namespace
|
2
|
+
module Syntax # This module is used as a namespace
|
3
|
+
|
4
|
+
# Abstract class for grammar symbols.
|
5
|
+
# A grammar symbol is an element that appears in grammar rules.
|
6
|
+
class GrmSymbol
|
7
|
+
# The name of the grammar symbol
|
8
|
+
attr_reader(:name)
|
9
|
+
|
10
|
+
def initialize(aName)
|
11
|
+
@name = aName.dup
|
12
|
+
end
|
13
|
+
end # class
|
14
|
+
|
15
|
+
end # module
|
16
|
+
end # module
|
17
|
+
|
18
|
+
# End of file
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require_relative 'terminal' # Load superclass
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A literal is terminal symbol that matches a lexical pattern
|
7
|
+
class Literal < Terminal
|
8
|
+
# The exact text representation of the word.
|
9
|
+
attr_reader(:pattern)
|
10
|
+
|
11
|
+
def initialize(aName, aPattern)
|
12
|
+
super(aName)
|
13
|
+
@pattern = aPattern
|
14
|
+
end
|
15
|
+
end # class
|
16
|
+
|
17
|
+
end # module
|
18
|
+
end # module
|
19
|
+
|
20
|
+
# End of file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'grm_symbol' # Load superclass
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
7
|
+
# a composition of terminal or non-terminal symbols
|
8
|
+
class NonTerminal < GrmSymbol
|
9
|
+
|
10
|
+
def initialize(aName)
|
11
|
+
super(aName)
|
12
|
+
end
|
13
|
+
end # class
|
14
|
+
|
15
|
+
end # module
|
16
|
+
end # module
|
17
|
+
|
18
|
+
# End of file
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require_relative 'symbol_seq'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# In a context-free grammar, a production is a rule in which
|
7
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
8
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
9
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
10
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
11
|
+
# in other words every occurrence of the LHS can be substituted to
|
12
|
+
# corresponding RHS.
|
13
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
14
|
+
class Production
|
15
|
+
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
16
|
+
attr_reader(:rhs)
|
17
|
+
|
18
|
+
# The left-hand side of the rule. It must be a non-terminal symbol
|
19
|
+
attr_reader(:lhs)
|
20
|
+
|
21
|
+
# Provide common alternate names to lhs and rhs accessors
|
22
|
+
|
23
|
+
alias :body :rhs
|
24
|
+
alias :head :lhs
|
25
|
+
|
26
|
+
def initialize(aNonTerminal, theSymbols)
|
27
|
+
@lhs = aNonTerminal
|
28
|
+
@rhs = SymbolSeq.new(theSymbols)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Is the rhs empty?
|
32
|
+
# @ return true if the rhs has no members.
|
33
|
+
def empty?()
|
34
|
+
return rhs.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class
|
38
|
+
|
39
|
+
end # module
|
40
|
+
end # module
|
41
|
+
|
42
|
+
# End of file
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A symbol sequence is a suite of grammar symbols
|
7
|
+
class SymbolSeq
|
8
|
+
extend Forwardable
|
9
|
+
def_delegators :@members, :empty?, :size, :[]
|
10
|
+
|
11
|
+
# The sequence of symbols
|
12
|
+
attr_reader(:members)
|
13
|
+
|
14
|
+
def initialize(theSymbols)
|
15
|
+
@members = theSymbols.dup
|
16
|
+
end
|
17
|
+
|
18
|
+
# Equality operator.
|
19
|
+
def ==(other)
|
20
|
+
return true if other.object_id == self.object_id
|
21
|
+
|
22
|
+
case other
|
23
|
+
when SymbolSeq then result = other.members == self.members
|
24
|
+
when Array then result = other == self.members
|
25
|
+
else
|
26
|
+
fail StandardError, "Cannot compare a SymbolSeq with a #{other.class}"
|
27
|
+
end
|
28
|
+
|
29
|
+
return result
|
30
|
+
end
|
31
|
+
end # class
|
32
|
+
|
33
|
+
end # module
|
34
|
+
end # module
|
35
|
+
|
36
|
+
# End of file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'grm_symbol' # Load superclass
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A terminal symbol represents a class of words in the language
|
7
|
+
# defined the grammar.
|
8
|
+
class Terminal < GrmSymbol
|
9
|
+
|
10
|
+
def initialize(aName)
|
11
|
+
super(aName)
|
12
|
+
end
|
13
|
+
end # class
|
14
|
+
|
15
|
+
end # module
|
16
|
+
end # module
|
17
|
+
|
18
|
+
# End of file
|