rley 0.0.02
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.rspec +1 -0
- data/.rubocop.yml +74 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.simplecov +7 -0
- data/.travis.yml +21 -0
- data/.yardopts +6 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +19 -0
- data/README.md +19 -0
- data/Rakefile +32 -0
- data/lib/rley/constants.rb +26 -0
- data/lib/rley/parser/chart.rb +39 -0
- data/lib/rley/parser/dotted_item.rb +80 -0
- data/lib/rley/parser/earley_parser.rb +177 -0
- data/lib/rley/parser/parse_state.rb +54 -0
- data/lib/rley/parser/parsing.rb +101 -0
- data/lib/rley/parser/state_set.rb +47 -0
- data/lib/rley/parser/token.rb +21 -0
- data/lib/rley/syntax/grammar.rb +59 -0
- data/lib/rley/syntax/grm_symbol.rb +18 -0
- data/lib/rley/syntax/literal.rb +20 -0
- data/lib/rley/syntax/non_terminal.rb +18 -0
- data/lib/rley/syntax/production.rb +42 -0
- data/lib/rley/syntax/symbol_seq.rb +36 -0
- data/lib/rley/syntax/terminal.rb +18 -0
- data/lib/rley/syntax/verbatim_symbol.rb +21 -0
- data/spec/rley/parser/chart_spec.rb +47 -0
- data/spec/rley/parser/dotted_item_spec.rb +108 -0
- data/spec/rley/parser/earley_parser_spec.rb +271 -0
- data/spec/rley/parser/parse_state_spec.rb +99 -0
- data/spec/rley/parser/parsing_spec.rb +118 -0
- data/spec/rley/parser/state_set_spec.rb +68 -0
- data/spec/rley/parser/token_spec.rb +40 -0
- data/spec/rley/syntax/grammar_spec.rb +149 -0
- data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
- data/spec/rley/syntax/literal_spec.rb +32 -0
- data/spec/rley/syntax/non_terminal_spec.rb +29 -0
- data/spec/rley/syntax/production_spec.rb +50 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
- data/spec/rley/syntax/terminal_spec.rb +29 -0
- data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
- data/spec/spec_helper.rb +21 -0
- metadata +166 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
module Rley # This module is used as a namespace
|
2
|
+
module Parser # This module is used as a namespace
|
3
|
+
|
4
|
+
class ParseState
|
5
|
+
attr_reader(:dotted_rule)
|
6
|
+
|
7
|
+
# the position in the input that matches the beginning of the rhs
|
8
|
+
# of the production.
|
9
|
+
attr_reader(:origin)
|
10
|
+
|
11
|
+
def initialize(aDottedRule, theOrigin)
|
12
|
+
@dotted_rule = valid_dotted_rule(aDottedRule)
|
13
|
+
@origin = theOrigin
|
14
|
+
end
|
15
|
+
|
16
|
+
# Equality comparison. A parse state behaves as a value object.
|
17
|
+
def ==(other)
|
18
|
+
return true if self.object_id == other.object_id
|
19
|
+
|
20
|
+
if (dotted_rule == other.dotted_rule) && (origin == other.origin)
|
21
|
+
result = true
|
22
|
+
else
|
23
|
+
result = false
|
24
|
+
end
|
25
|
+
|
26
|
+
return result
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns true if the dot is at the end of the rhs of the production.
|
30
|
+
# In other words, the complete rhs matches the input.
|
31
|
+
def complete?()
|
32
|
+
return dotted_rule.reduce_item?
|
33
|
+
end
|
34
|
+
|
35
|
+
# Next expected symbol in the production
|
36
|
+
def next_symbol()
|
37
|
+
return dotted_rule.next_symbol
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# Return the validated dotted item(rule)
|
43
|
+
def valid_dotted_rule(aDottedRule)
|
44
|
+
fail StandardError, 'Dotted item cannot be nil' if aDottedRule.nil?
|
45
|
+
|
46
|
+
return aDottedRule
|
47
|
+
end
|
48
|
+
|
49
|
+
end # class
|
50
|
+
|
51
|
+
end # module
|
52
|
+
end # module
|
53
|
+
|
54
|
+
# End of file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require_relative 'chart'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
|
6
|
+
class Parsing
|
7
|
+
attr_reader(:chart)
|
8
|
+
|
9
|
+
# The sequence of input token to parse
|
10
|
+
attr_reader(:tokens)
|
11
|
+
|
12
|
+
def initialize(startDottedRule, theTokens)
|
13
|
+
@tokens = theTokens.dup
|
14
|
+
@chart = Chart.new(startDottedRule, tokens.size)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return true if the parse was successful (= input tokens
|
18
|
+
# followed the syntax specified by the grammar)
|
19
|
+
def success?()
|
20
|
+
# Success can be detected as follows:
|
21
|
+
# The last chart entry has a parse state
|
22
|
+
# that involves the start production and
|
23
|
+
# has a dot positioned at the end of its rhs.
|
24
|
+
|
25
|
+
start_dotted_rule = chart.start_dotted_rule
|
26
|
+
start_production = start_dotted_rule.production
|
27
|
+
last_chart_entry = chart.state_sets.last
|
28
|
+
candidate_states = last_chart_entry.states_for(start_production)
|
29
|
+
found = candidate_states.find(&:complete?)
|
30
|
+
|
31
|
+
return ! found.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# Push a parse state (dotted item + origin) to the
|
36
|
+
# chart entry with given index if it isn't yet in the chart entry.
|
37
|
+
def push_state(aDottedItem, anOrigin, aChartIndex)
|
38
|
+
fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
|
39
|
+
chart.push_state(aDottedItem, anOrigin, aChartIndex)
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# This method is called when a parse state for chart entry at position
|
44
|
+
# 'pos' expects a terminal as next symbol.
|
45
|
+
# If the input token matches the terminal symbol then:
|
46
|
+
# Retrieve all parse states for chart entry at 'aPosition'
|
47
|
+
# that have the given terminal as next symbol.
|
48
|
+
# For each s of the above states, push to chart entry aPosition + 1
|
49
|
+
# a new state like: <next dotted rule, s.origin, aPosition + 1>
|
50
|
+
# In other words, we place the dotted rules in the next state set
|
51
|
+
# such that the dot appears after terminal.
|
52
|
+
# @param Terminal [Terminal] a terminal symbol that
|
53
|
+
# immediately follows a dot
|
54
|
+
# @param aPosition [Fixnum] position in the input token sequence.
|
55
|
+
# @param nextMapping [Proc or Lambda] code to evaluate in order to
|
56
|
+
# determine the "next" dotted rule for a given one.
|
57
|
+
def scanning(aTerminal, aPosition, &nextMapping)
|
58
|
+
curr_token = tokens[aPosition]
|
59
|
+
|
60
|
+
if curr_token.terminal == aTerminal
|
61
|
+
states = states_expecting(aTerminal, aPosition)
|
62
|
+
states.each do |s|
|
63
|
+
next_item = nextMapping.call(s.dotted_rule)
|
64
|
+
push_state(next_item, s.origin, aPosition + 1)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
# This method is called when a parse state at chart entry reaches the end
|
72
|
+
# of a production.
|
73
|
+
# For every state in chart[aPosition] that is complete (i.e. of the form:
|
74
|
+
# { dotted_rule: X -> γ •, origin: j}),
|
75
|
+
# Find states s in chart[j] of the form {dotted_rule: Y -> α • X β, origin: i}
|
76
|
+
# In other words, rules that predicted the non-terminal X.
|
77
|
+
# For each s, add to chart[aPosition] a state of the form
|
78
|
+
# { dotted_rule: Y → α X • β, origin: i})
|
79
|
+
def completion(aState, aPosition, &nextMapping)
|
80
|
+
curr_origin = aState.origin
|
81
|
+
curr_lhs = aState.dotted_rule.lhs
|
82
|
+
states = states_expecting(curr_lhs, curr_origin)
|
83
|
+
states.each do |s|
|
84
|
+
next_item = nextMapping.call(s.dotted_rule)
|
85
|
+
push_state(next_item, s.origin, aPosition)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
# The list of ParseState from the chart entry at given position
|
91
|
+
# that expect the given terminal
|
92
|
+
def states_expecting(aTerminal, aPosition)
|
93
|
+
return chart[aPosition].states_expecting(aTerminal)
|
94
|
+
end
|
95
|
+
|
96
|
+
end # class
|
97
|
+
|
98
|
+
end # module
|
99
|
+
end # module
|
100
|
+
|
101
|
+
# End of file
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'forwardable' # Delegation
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
|
6
|
+
class StateSet
|
7
|
+
extend Forwardable
|
8
|
+
def_delegators :states, :empty?, :size, :first, :each
|
9
|
+
|
10
|
+
# The set of parse states
|
11
|
+
attr_reader(:states)
|
12
|
+
|
13
|
+
|
14
|
+
def initialize()
|
15
|
+
@states = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Append the given state (if it isn't yet in the set)
|
19
|
+
# to the list of states
|
20
|
+
# @param aState [ParseState] the state to push.
|
21
|
+
def push_state(aState)
|
22
|
+
@states << aState unless include?(aState)
|
23
|
+
end
|
24
|
+
|
25
|
+
# The list of ParseState that expect the given terminal
|
26
|
+
def states_expecting(aTerminal)
|
27
|
+
return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
|
28
|
+
end
|
29
|
+
|
30
|
+
# The list of ParseState that involve the given production
|
31
|
+
def states_for(aProduction)
|
32
|
+
return states.select { |s| s.dotted_rule.production == aProduction }
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def include?(aState)
|
38
|
+
# TODO: make it better than linear search
|
39
|
+
return states.include?(aState)
|
40
|
+
end
|
41
|
+
|
42
|
+
end # class
|
43
|
+
|
44
|
+
end # module
|
45
|
+
end # module
|
46
|
+
|
47
|
+
# End of file
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative '../syntax/grammar'
|
2
|
+
require_relative 'dotted_item'
|
3
|
+
|
4
|
+
module Rley # This module is used as a namespace
|
5
|
+
module Parser # This module is used as a namespace
|
6
|
+
|
7
|
+
class Token
|
8
|
+
attr_reader(:lexeme)
|
9
|
+
attr_reader(:terminal)
|
10
|
+
|
11
|
+
def initialize(theLexeme, aTerminal)
|
12
|
+
@lexeme = theLexeme
|
13
|
+
@terminal = aTerminal
|
14
|
+
end
|
15
|
+
|
16
|
+
end # class
|
17
|
+
|
18
|
+
end # module
|
19
|
+
end # module
|
20
|
+
|
21
|
+
# End of file
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Rley # This module is used as a namespace
|
2
|
+
module Syntax # This module is used as a namespace
|
3
|
+
|
4
|
+
# A grammar specifies the syntax of a language.
|
5
|
+
# Formally, a grammar has:
|
6
|
+
# One start symbol,
|
7
|
+
# One or more other production rules,
|
8
|
+
# Each production has a rhs that is a sequence of grammar symbols.
|
9
|
+
# Grammar symbols are categorized into
|
10
|
+
# -terminal symbols
|
11
|
+
# -non-terminal symbols
|
12
|
+
class Grammar
|
13
|
+
# A non-terminal symbol that represents all the possible strings
|
14
|
+
# in the language.
|
15
|
+
attr_reader(:start_symbol)
|
16
|
+
|
17
|
+
# The list of production rules for the language.
|
18
|
+
attr_reader(:rules)
|
19
|
+
|
20
|
+
# The list of grammar symbols in the language.
|
21
|
+
attr_reader(:symbols)
|
22
|
+
|
23
|
+
# @param theProduction [Array of Production]
|
24
|
+
def initialize(theProductions)
|
25
|
+
@rules = []
|
26
|
+
@symbols = []
|
27
|
+
valid_productions = validate_productions(theProductions)
|
28
|
+
# TODO: use topological sorting
|
29
|
+
@start_symbol = valid_productions[0].lhs
|
30
|
+
valid_productions.each { |prod| add_production(prod) }
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# Validation method. Return the validated list of productions
|
36
|
+
def validate_productions(theProductions)
|
37
|
+
msg = 'A grammar must have at least one production'
|
38
|
+
fail StandardError, msg if theProductions.nil? || theProductions.empty?
|
39
|
+
return theProductions
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_production(aProduction)
|
43
|
+
@rules << aProduction
|
44
|
+
the_lhs = aProduction.lhs
|
45
|
+
@symbols << the_lhs unless @symbols.include? the_lhs
|
46
|
+
|
47
|
+
# TODO: remove quadratic execution time
|
48
|
+
aProduction.rhs.members.each do |symb|
|
49
|
+
next if symbols.include? symb
|
50
|
+
@symbols << symb
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end # class
|
55
|
+
|
56
|
+
end # module
|
57
|
+
end # module
|
58
|
+
|
59
|
+
# End of file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Rley # This module is used as a namespace
|
2
|
+
module Syntax # This module is used as a namespace
|
3
|
+
|
4
|
+
# Abstract class for grammar symbols.
|
5
|
+
# A grammar symbol is an element that appears in grammar rules.
|
6
|
+
class GrmSymbol
|
7
|
+
# The name of the grammar symbol
|
8
|
+
attr_reader(:name)
|
9
|
+
|
10
|
+
def initialize(aName)
|
11
|
+
@name = aName.dup
|
12
|
+
end
|
13
|
+
end # class
|
14
|
+
|
15
|
+
end # module
|
16
|
+
end # module
|
17
|
+
|
18
|
+
# End of file
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require_relative 'terminal' # Load superclass
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A literal is terminal symbol that matches a lexical pattern
|
7
|
+
class Literal < Terminal
|
8
|
+
# The exact text representation of the word.
|
9
|
+
attr_reader(:pattern)
|
10
|
+
|
11
|
+
def initialize(aName, aPattern)
|
12
|
+
super(aName)
|
13
|
+
@pattern = aPattern
|
14
|
+
end
|
15
|
+
end # class
|
16
|
+
|
17
|
+
end # module
|
18
|
+
end # module
|
19
|
+
|
20
|
+
# End of file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'grm_symbol' # Load superclass
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
7
|
+
# a composition of terminal or non-terminal symbols
|
8
|
+
class NonTerminal < GrmSymbol
|
9
|
+
|
10
|
+
def initialize(aName)
|
11
|
+
super(aName)
|
12
|
+
end
|
13
|
+
end # class
|
14
|
+
|
15
|
+
end # module
|
16
|
+
end # module
|
17
|
+
|
18
|
+
# End of file
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require_relative 'symbol_seq'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# In a context-free grammar, a production is a rule in which
|
7
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
8
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
9
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
10
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
11
|
+
# in other words every occurrence of the LHS can be substituted to
|
12
|
+
# corresponding RHS.
|
13
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
14
|
+
class Production
|
15
|
+
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
16
|
+
attr_reader(:rhs)
|
17
|
+
|
18
|
+
# The left-hand side of the rule. It must be a non-terminal symbol
|
19
|
+
attr_reader(:lhs)
|
20
|
+
|
21
|
+
# Provide common alternate names to lhs and rhs accessors
|
22
|
+
|
23
|
+
alias :body :rhs
|
24
|
+
alias :head :lhs
|
25
|
+
|
26
|
+
def initialize(aNonTerminal, theSymbols)
|
27
|
+
@lhs = aNonTerminal
|
28
|
+
@rhs = SymbolSeq.new(theSymbols)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Is the rhs empty?
|
32
|
+
# @ return true if the rhs has no members.
|
33
|
+
def empty?()
|
34
|
+
return rhs.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class
|
38
|
+
|
39
|
+
end # module
|
40
|
+
end # module
|
41
|
+
|
42
|
+
# End of file
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A symbol sequence is a suite of grammar symbols
|
7
|
+
class SymbolSeq
|
8
|
+
extend Forwardable
|
9
|
+
def_delegators :@members, :empty?, :size, :[]
|
10
|
+
|
11
|
+
# The sequence of symbols
|
12
|
+
attr_reader(:members)
|
13
|
+
|
14
|
+
def initialize(theSymbols)
|
15
|
+
@members = theSymbols.dup
|
16
|
+
end
|
17
|
+
|
18
|
+
# Equality operator.
|
19
|
+
def ==(other)
|
20
|
+
return true if other.object_id == self.object_id
|
21
|
+
|
22
|
+
case other
|
23
|
+
when SymbolSeq then result = other.members == self.members
|
24
|
+
when Array then result = other == self.members
|
25
|
+
else
|
26
|
+
fail StandardError, "Cannot compare a SymbolSeq with a #{other.class}"
|
27
|
+
end
|
28
|
+
|
29
|
+
return result
|
30
|
+
end
|
31
|
+
end # class
|
32
|
+
|
33
|
+
end # module
|
34
|
+
end # module
|
35
|
+
|
36
|
+
# End of file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'grm_symbol' # Load superclass
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Syntax # This module is used as a namespace
|
5
|
+
|
6
|
+
# A terminal symbol represents a class of words in the language
|
7
|
+
# defined the grammar.
|
8
|
+
class Terminal < GrmSymbol
|
9
|
+
|
10
|
+
def initialize(aName)
|
11
|
+
super(aName)
|
12
|
+
end
|
13
|
+
end # class
|
14
|
+
|
15
|
+
end # module
|
16
|
+
end # module
|
17
|
+
|
18
|
+
# End of file
|