rley 0.0.04 → 0.0.05
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +2 -4
- data/lib/rley/parser/dotted_item.rb +2 -4
- data/lib/rley/parser/earley_parser.rb +13 -16
- data/lib/rley/parser/parse_state.rb +2 -5
- data/lib/rley/parser/parsing.rb +17 -20
- data/lib/rley/parser/state_set.rb +1 -5
- data/lib/rley/parser/token.rb +0 -4
- data/lib/rley/syntax/grammar.rb +1 -4
- data/lib/rley/syntax/grm_symbol.rb +0 -2
- data/lib/rley/syntax/literal.rb +0 -2
- data/lib/rley/syntax/non_terminal.rb +0 -4
- data/lib/rley/syntax/production.rb +3 -7
- data/lib/rley/syntax/symbol_seq.rb +7 -8
- data/lib/rley/syntax/verbatim_symbol.rb +0 -2
- data/spec/rley/parser/chart_spec.rb +24 -26
- data/spec/rley/parser/dotted_item_spec.rb +83 -88
- data/spec/rley/parser/earley_parser_spec.rb +277 -241
- data/spec/rley/parser/parse_state_spec.rb +66 -66
- data/spec/rley/parser/parsing_spec.rb +89 -90
- data/spec/rley/parser/state_set_spec.rb +54 -56
- data/spec/rley/parser/token_spec.rb +18 -20
- data/spec/rley/syntax/grammar_spec.rb +118 -120
- data/spec/rley/syntax/grm_symbol_spec.rb +12 -15
- data/spec/rley/syntax/literal_spec.rb +16 -18
- data/spec/rley/syntax/non_terminal_spec.rb +12 -15
- data/spec/rley/syntax/production_spec.rb +33 -35
- data/spec/rley/syntax/symbol_seq_spec.rb +51 -52
- data/spec/rley/syntax/terminal_spec.rb +12 -15
- data/spec/rley/syntax/verbatim_symbol_spec.rb +16 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZTEwY2E4YWQ0MDgwMmE0MTFjMjBmOGQ0OWUwYjI5ZjZjMWU2MDNjMw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MjkzZGJjMjM3MmIyZDY0ZjRmZjU2ZTEzNzM2NDRmNzE5MGNhZTU3ZA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzRmMTAxMzcyZmJkZjNiYTYxNGYxOTQ1M2YzNDRmOTcyZGEzODVjYWZhOGE2
|
10
|
+
MTQxYjQ2YWMxZGMzN2E3NjRjZjhjNmI2MzljOGIwMzcxNGY1ZDg2MGM0YTA5
|
11
|
+
YzZmZjBiODMwMzk0YmY4OTQ4NjRhNGNjMWJjZTRlNjU5Y2NiNjY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDAzM2I1ZDNjMWRjNWEyMGNhMWYwNmUzNTdiNzE3NDYzMzVlMzdiYjFiYzUx
|
14
|
+
Y2I0MGY2MjU2MDg5ZDIxOGI5NjViOWExMDhkZGIyZTc0NmQ4ODVlOTJlYWQx
|
15
|
+
ODU0NGRhZDZmNDA1MTdmMzE1Y2Q3OTY3MmMxZDM4Y2NlYmI3YWQ=
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -9,7 +9,7 @@ module Rley # This module is used as a namespace
|
|
9
9
|
attr_reader(:state_sets)
|
10
10
|
|
11
11
|
def initialize(startDottedItem, tokenCount)
|
12
|
-
@state_sets = Array.new(tokenCount + 1) {|_| StateSet.new }
|
12
|
+
@state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
|
13
13
|
push_state(startDottedItem, 0, 0)
|
14
14
|
end
|
15
15
|
|
@@ -30,10 +30,8 @@ module Rley # This module is used as a namespace
|
|
30
30
|
new_state = ParseState.new(aDottedItem, anOrigin)
|
31
31
|
self[anIndex].push_state(new_state)
|
32
32
|
end
|
33
|
-
|
34
33
|
end # class
|
35
|
-
|
36
34
|
end # module
|
37
35
|
end # module
|
38
36
|
|
39
|
-
# End of file
|
37
|
+
# End of file
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
|
-
|
3
2
|
# A dotted item is a parse state for a given production/grammar rule
|
4
3
|
# It partitions the rhs of the rule in two parts.
|
5
4
|
# The left part consists of the symbols in the rules that are matched
|
@@ -37,7 +36,7 @@ module Rley # This module is used as a namespace
|
|
37
36
|
|
38
37
|
# An item with the dot at the beginning is called
|
39
38
|
# predicted item
|
40
|
-
|
39
|
+
alias_method :predicted_item?, :at_start?
|
41
40
|
|
42
41
|
# A dotted item is called a reduce item if the dot is at the end.
|
43
42
|
def reduce_item?()
|
@@ -52,7 +51,7 @@ module Rley # This module is used as a namespace
|
|
52
51
|
# Return the symbol after the dot.
|
53
52
|
# nil is returned if the dot is at the end
|
54
53
|
def next_symbol()
|
55
|
-
|
54
|
+
return (position < 0) ? nil : production.rhs[position]
|
56
55
|
end
|
57
56
|
|
58
57
|
# An item with the dot in front of a terminal is called a shift item
|
@@ -79,7 +78,6 @@ module Rley # This module is used as a namespace
|
|
79
78
|
return index
|
80
79
|
end
|
81
80
|
end # class
|
82
|
-
|
83
81
|
end # module
|
84
82
|
|
85
83
|
# End of file
|
@@ -4,7 +4,6 @@ require_relative 'parsing'
|
|
4
4
|
|
5
5
|
module Rley # This module is used as a namespace
|
6
6
|
module Parser # This module is used as a namespace
|
7
|
-
|
8
7
|
# Implementation of a parser that uses the Earley parsing algorithm.
|
9
8
|
class EarleyParser
|
10
9
|
# The grammar of the language.
|
@@ -113,8 +112,8 @@ module Rley # This module is used as a namespace
|
|
113
112
|
# 'pos':
|
114
113
|
# For each production with 'nt' as lhs, retrieve their corresponding
|
115
114
|
# initial dotted rules nt -> . xxxx
|
116
|
-
# For retrieved dotted rule, add a parse state to the chart entry
|
117
|
-
#
|
115
|
+
# For retrieved dotted rule, add a parse state to the chart entry
|
116
|
+
# at 'pos': <initial dotted rule, pos, pos>
|
118
117
|
# In short, one adds states to chart[pos], one per production that
|
119
118
|
# specifies how to reduce some input into the predicted nt (non-terminal)
|
120
119
|
# A prediction corresponds to a potential expansion of a nonterminal
|
@@ -149,29 +148,27 @@ module Rley # This module is used as a namespace
|
|
149
148
|
# immediately follows a dot
|
150
149
|
# @param aPosition [Fixnum] position in the input token sequence.
|
151
150
|
def scanning(aParsing, aTerminal, aPosition)
|
152
|
-
aParsing.scanning(aTerminal, aPosition)
|
151
|
+
aParsing.scanning(aTerminal, aPosition) do |item|
|
153
152
|
next_mapping[item]
|
154
|
-
|
153
|
+
end
|
155
154
|
end
|
156
|
-
|
157
155
|
|
158
|
-
# This method is called when a parse state at chart entry reaches
|
159
|
-
# of a production.
|
160
|
-
# For every state in chart[aPosition] that is
|
161
|
-
#
|
162
|
-
# Find states s in chart[j] of the
|
156
|
+
# This method is called when a parse state at chart entry reaches
|
157
|
+
# the end of a production.
|
158
|
+
# For every state in chart[aPosition] that is
|
159
|
+
# complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
160
|
+
# Find states s in chart[j] of the
|
161
|
+
# form {dotted_rule: Y -> α • X β, origin: i}
|
163
162
|
# In other words, rules that predicted the non-terminal X.
|
164
163
|
# For each s, add to chart[aPosition] a state of the form
|
165
164
|
# { dotted_rule: Y → α X • β, origin: i})
|
166
165
|
def completion(aParsing, aState, aPosition)
|
167
|
-
aParsing.completion(aState, aPosition)
|
166
|
+
aParsing.completion(aState, aPosition) do |item|
|
168
167
|
next_mapping[item]
|
169
|
-
|
168
|
+
end
|
170
169
|
end
|
171
|
-
|
172
170
|
end # class
|
173
|
-
|
174
171
|
end # module
|
175
172
|
end # module
|
176
173
|
|
177
|
-
# End of file
|
174
|
+
# End of file
|
@@ -1,6 +1,5 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
2
|
module Parser # This module is used as a namespace
|
3
|
-
|
4
3
|
class ParseState
|
5
4
|
attr_reader(:dotted_rule)
|
6
5
|
|
@@ -15,7 +14,7 @@ module Rley # This module is used as a namespace
|
|
15
14
|
|
16
15
|
# Equality comparison. A parse state behaves as a value object.
|
17
16
|
def ==(other)
|
18
|
-
return true if
|
17
|
+
return true if object_id == other.object_id
|
19
18
|
|
20
19
|
if (dotted_rule == other.dotted_rule) && (origin == other.origin)
|
21
20
|
result = true
|
@@ -45,10 +44,8 @@ module Rley # This module is used as a namespace
|
|
45
44
|
|
46
45
|
return aDottedRule
|
47
46
|
end
|
48
|
-
|
49
47
|
end # class
|
50
|
-
|
51
48
|
end # module
|
52
49
|
end # module
|
53
50
|
|
54
|
-
# End of file
|
51
|
+
# End of file
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -2,10 +2,9 @@ require_relative 'chart'
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Parser # This module is used as a namespace
|
5
|
-
|
6
5
|
class Parsing
|
7
6
|
attr_reader(:chart)
|
8
|
-
|
7
|
+
|
9
8
|
# The sequence of input token to parse
|
10
9
|
attr_reader(:tokens)
|
11
10
|
|
@@ -13,7 +12,7 @@ module Rley # This module is used as a namespace
|
|
13
12
|
@tokens = theTokens.dup
|
14
13
|
@chart = Chart.new(startDottedRule, tokens.size)
|
15
14
|
end
|
16
|
-
|
15
|
+
|
17
16
|
# Return true if the parse was successful (= input tokens
|
18
17
|
# followed the syntax specified by the grammar)
|
19
18
|
def success?()
|
@@ -21,14 +20,14 @@ module Rley # This module is used as a namespace
|
|
21
20
|
# The last chart entry has a parse state
|
22
21
|
# that involves the start production and
|
23
22
|
# has a dot positioned at the end of its rhs.
|
24
|
-
|
23
|
+
|
25
24
|
start_dotted_rule = chart.start_dotted_rule
|
26
25
|
start_production = start_dotted_rule.production
|
27
26
|
last_chart_entry = chart.state_sets.last
|
28
27
|
candidate_states = last_chart_entry.states_for(start_production)
|
29
28
|
found = candidate_states.find(&:complete?)
|
30
|
-
|
31
|
-
return !
|
29
|
+
|
30
|
+
return !found.nil?
|
32
31
|
end
|
33
32
|
|
34
33
|
|
@@ -40,23 +39,22 @@ module Rley # This module is used as a namespace
|
|
40
39
|
end
|
41
40
|
|
42
41
|
|
43
|
-
# This method is called when a parse state for chart entry at position
|
42
|
+
# This method is called when a parse state for chart entry at position
|
44
43
|
# 'pos' expects a terminal as next symbol.
|
45
44
|
# If the input token matches the terminal symbol then:
|
46
45
|
# Retrieve all parse states for chart entry at 'aPosition'
|
47
46
|
# that have the given terminal as next symbol.
|
48
|
-
# For each s of the above states, push to chart entry aPosition + 1
|
47
|
+
# For each s of the above states, push to chart entry aPosition + 1
|
49
48
|
# a new state like: <next dotted rule, s.origin, aPosition + 1>
|
50
|
-
# In other words, we place the dotted rules in the next state set
|
49
|
+
# In other words, we place the dotted rules in the next state set
|
51
50
|
# such that the dot appears after terminal.
|
52
|
-
# @param Terminal [Terminal] a terminal symbol that
|
53
|
-
# immediately follows a dot
|
51
|
+
# @param Terminal [Terminal] a terminal symbol that
|
52
|
+
# immediately follows a dot
|
54
53
|
# @param aPosition [Fixnum] position in the input token sequence.
|
55
54
|
# @param nextMapping [Proc or Lambda] code to evaluate in order to
|
56
55
|
# determine the "next" dotted rule for a given one.
|
57
56
|
def scanning(aTerminal, aPosition, &nextMapping)
|
58
57
|
curr_token = tokens[aPosition]
|
59
|
-
|
60
58
|
if curr_token.terminal == aTerminal
|
61
59
|
states = states_expecting(aTerminal, aPosition)
|
62
60
|
states.each do |s|
|
@@ -70,12 +68,13 @@ module Rley # This module is used as a namespace
|
|
70
68
|
|
71
69
|
# This method is called when a parse state at chart entry reaches the end
|
72
70
|
# of a production.
|
73
|
-
# For every state in chart[aPosition] that is complete
|
74
|
-
#
|
75
|
-
# Find states s in chart[j] of the form
|
76
|
-
#
|
71
|
+
# For every state in chart[aPosition] that is complete
|
72
|
+
# (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
73
|
+
# Find states s in chart[j] of the form
|
74
|
+
# {dotted_rule: Y -> α • X β, origin: i}
|
75
|
+
# In other words, rules that predicted the non-terminal X.
|
77
76
|
# For each s, add to chart[aPosition] a state of the form
|
78
|
-
#
|
77
|
+
# { dotted_rule: Y → α X • β, origin: i})
|
79
78
|
def completion(aState, aPosition, &nextMapping)
|
80
79
|
curr_origin = aState.origin
|
81
80
|
curr_lhs = aState.dotted_rule.lhs
|
@@ -92,10 +91,8 @@ module Rley # This module is used as a namespace
|
|
92
91
|
def states_expecting(aTerminal, aPosition)
|
93
92
|
return chart[aPosition].states_expecting(aTerminal)
|
94
93
|
end
|
95
|
-
|
96
94
|
end # class
|
97
|
-
|
98
95
|
end # module
|
99
96
|
end # module
|
100
97
|
|
101
|
-
# End of file
|
98
|
+
# End of file
|
@@ -2,7 +2,6 @@ require 'forwardable' # Delegation
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Parser # This module is used as a namespace
|
5
|
-
|
6
5
|
class StateSet
|
7
6
|
extend Forwardable
|
8
7
|
def_delegators :states, :empty?, :size, :first, :each
|
@@ -38,10 +37,7 @@ module Rley # This module is used as a namespace
|
|
38
37
|
# TODO: make it better than linear search
|
39
38
|
return states.include?(aState)
|
40
39
|
end
|
41
|
-
|
42
40
|
end # class
|
43
|
-
|
44
41
|
end # module
|
45
42
|
end # module
|
46
|
-
|
47
|
-
# End of file
|
43
|
+
# End of file
|
data/lib/rley/parser/token.rb
CHANGED
@@ -3,7 +3,6 @@ require_relative 'dotted_item'
|
|
3
3
|
|
4
4
|
module Rley # This module is used as a namespace
|
5
5
|
module Parser # This module is used as a namespace
|
6
|
-
|
7
6
|
class Token
|
8
7
|
attr_reader(:lexeme)
|
9
8
|
attr_reader(:terminal)
|
@@ -12,10 +11,7 @@ module Rley # This module is used as a namespace
|
|
12
11
|
@lexeme = theLexeme
|
13
12
|
@terminal = aTerminal
|
14
13
|
end
|
15
|
-
|
16
14
|
end # class
|
17
|
-
|
18
15
|
end # module
|
19
16
|
end # module
|
20
|
-
|
21
17
|
# End of file
|
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
2
|
module Syntax # This module is used as a namespace
|
3
|
-
|
4
3
|
# A grammar specifies the syntax of a language.
|
5
4
|
# Formally, a grammar has:
|
6
5
|
# One start symbol,
|
@@ -50,10 +49,8 @@ module Rley # This module is used as a namespace
|
|
50
49
|
@symbols << symb
|
51
50
|
end
|
52
51
|
end
|
53
|
-
|
54
52
|
end # class
|
55
|
-
|
56
53
|
end # module
|
57
54
|
end # module
|
58
55
|
|
59
|
-
# End of file
|
56
|
+
# End of file
|
@@ -1,6 +1,5 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
2
|
module Syntax # This module is used as a namespace
|
3
|
-
|
4
3
|
# Abstract class for grammar symbols.
|
5
4
|
# A grammar symbol is an element that appears in grammar rules.
|
6
5
|
class GrmSymbol
|
@@ -11,7 +10,6 @@ module Rley # This module is used as a namespace
|
|
11
10
|
@name = aName.dup
|
12
11
|
end
|
13
12
|
end # class
|
14
|
-
|
15
13
|
end # module
|
16
14
|
end # module
|
17
15
|
|
data/lib/rley/syntax/literal.rb
CHANGED
@@ -2,7 +2,6 @@ require_relative 'terminal' # Load superclass
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A literal is terminal symbol that matches a lexical pattern
|
7
6
|
class Literal < Terminal
|
8
7
|
# The exact text representation of the word.
|
@@ -13,7 +12,6 @@ module Rley # This module is used as a namespace
|
|
13
12
|
@pattern = aPattern
|
14
13
|
end
|
15
14
|
end # class
|
16
|
-
|
17
15
|
end # module
|
18
16
|
end # module
|
19
17
|
|
@@ -2,17 +2,13 @@ require_relative 'grm_symbol' # Load superclass
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
7
6
|
# a composition of terminal or non-terminal symbols
|
8
7
|
class NonTerminal < GrmSymbol
|
9
|
-
|
10
8
|
def initialize(aName)
|
11
9
|
super(aName)
|
12
10
|
end
|
13
11
|
end # class
|
14
|
-
|
15
12
|
end # module
|
16
13
|
end # module
|
17
|
-
|
18
14
|
# End of file
|
@@ -2,7 +2,6 @@ require_relative 'symbol_seq'
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# In a context-free grammar, a production is a rule in which
|
7
6
|
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
8
7
|
# and the right-hand side (RHS) consists of a sequence of symbols.
|
@@ -10,7 +9,6 @@ module Rley # This module is used as a namespace
|
|
10
9
|
# The rule stipulates that the LHS is equivalent to the RHS,
|
11
10
|
# in other words every occurrence of the LHS can be substituted to
|
12
11
|
# corresponding RHS.
|
13
|
-
# Implementation note: the object id of the production is taken as its LHS.
|
14
12
|
class Production
|
15
13
|
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
16
14
|
attr_reader(:rhs)
|
@@ -20,8 +18,8 @@ module Rley # This module is used as a namespace
|
|
20
18
|
|
21
19
|
# Provide common alternate names to lhs and rhs accessors
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
alias_method :body, :rhs
|
22
|
+
alias_method :head, :lhs
|
25
23
|
|
26
24
|
def initialize(aNonTerminal, theSymbols)
|
27
25
|
@lhs = aNonTerminal
|
@@ -33,10 +31,8 @@ module Rley # This module is used as a namespace
|
|
33
31
|
def empty?()
|
34
32
|
return rhs.empty?
|
35
33
|
end
|
36
|
-
|
37
34
|
end # class
|
38
|
-
|
39
35
|
end # module
|
40
36
|
end # module
|
41
37
|
|
42
|
-
# End of file
|
38
|
+
# End of file
|
@@ -2,7 +2,6 @@ require 'forwardable'
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A symbol sequence is a suite of grammar symbols
|
7
6
|
class SymbolSeq
|
8
7
|
extend Forwardable
|
@@ -17,20 +16,20 @@ module Rley # This module is used as a namespace
|
|
17
16
|
|
18
17
|
# Equality operator.
|
19
18
|
def ==(other)
|
20
|
-
return true if other.object_id ==
|
19
|
+
return true if other.object_id == object_id
|
21
20
|
|
22
21
|
case other
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
when SymbolSeq then result = other.members == members
|
23
|
+
when Array then result = other == members
|
24
|
+
else
|
25
|
+
msg = "Cannot compare a SymbolSeq with a #{other.class}"
|
26
|
+
fail StandardError, msg
|
27
27
|
end
|
28
28
|
|
29
29
|
return result
|
30
30
|
end
|
31
31
|
end # class
|
32
|
-
|
33
32
|
end # module
|
34
33
|
end # module
|
35
34
|
|
36
|
-
# End of file
|
35
|
+
# End of file
|