rley 0.0.04 → 0.0.05
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +2 -4
- data/lib/rley/parser/dotted_item.rb +2 -4
- data/lib/rley/parser/earley_parser.rb +13 -16
- data/lib/rley/parser/parse_state.rb +2 -5
- data/lib/rley/parser/parsing.rb +17 -20
- data/lib/rley/parser/state_set.rb +1 -5
- data/lib/rley/parser/token.rb +0 -4
- data/lib/rley/syntax/grammar.rb +1 -4
- data/lib/rley/syntax/grm_symbol.rb +0 -2
- data/lib/rley/syntax/literal.rb +0 -2
- data/lib/rley/syntax/non_terminal.rb +0 -4
- data/lib/rley/syntax/production.rb +3 -7
- data/lib/rley/syntax/symbol_seq.rb +7 -8
- data/lib/rley/syntax/verbatim_symbol.rb +0 -2
- data/spec/rley/parser/chart_spec.rb +24 -26
- data/spec/rley/parser/dotted_item_spec.rb +83 -88
- data/spec/rley/parser/earley_parser_spec.rb +277 -241
- data/spec/rley/parser/parse_state_spec.rb +66 -66
- data/spec/rley/parser/parsing_spec.rb +89 -90
- data/spec/rley/parser/state_set_spec.rb +54 -56
- data/spec/rley/parser/token_spec.rb +18 -20
- data/spec/rley/syntax/grammar_spec.rb +118 -120
- data/spec/rley/syntax/grm_symbol_spec.rb +12 -15
- data/spec/rley/syntax/literal_spec.rb +16 -18
- data/spec/rley/syntax/non_terminal_spec.rb +12 -15
- data/spec/rley/syntax/production_spec.rb +33 -35
- data/spec/rley/syntax/symbol_seq_spec.rb +51 -52
- data/spec/rley/syntax/terminal_spec.rb +12 -15
- data/spec/rley/syntax/verbatim_symbol_spec.rb +16 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZTEwY2E4YWQ0MDgwMmE0MTFjMjBmOGQ0OWUwYjI5ZjZjMWU2MDNjMw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MjkzZGJjMjM3MmIyZDY0ZjRmZjU2ZTEzNzM2NDRmNzE5MGNhZTU3ZA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzRmMTAxMzcyZmJkZjNiYTYxNGYxOTQ1M2YzNDRmOTcyZGEzODVjYWZhOGE2
|
10
|
+
MTQxYjQ2YWMxZGMzN2E3NjRjZjhjNmI2MzljOGIwMzcxNGY1ZDg2MGM0YTA5
|
11
|
+
YzZmZjBiODMwMzk0YmY4OTQ4NjRhNGNjMWJjZTRlNjU5Y2NiNjY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDAzM2I1ZDNjMWRjNWEyMGNhMWYwNmUzNTdiNzE3NDYzMzVlMzdiYjFiYzUx
|
14
|
+
Y2I0MGY2MjU2MDg5ZDIxOGI5NjViOWExMDhkZGIyZTc0NmQ4ODVlOTJlYWQx
|
15
|
+
ODU0NGRhZDZmNDA1MTdmMzE1Y2Q3OTY3MmMxZDM4Y2NlYmI3YWQ=
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -9,7 +9,7 @@ module Rley # This module is used as a namespace
|
|
9
9
|
attr_reader(:state_sets)
|
10
10
|
|
11
11
|
def initialize(startDottedItem, tokenCount)
|
12
|
-
@state_sets = Array.new(tokenCount + 1) {|_| StateSet.new }
|
12
|
+
@state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
|
13
13
|
push_state(startDottedItem, 0, 0)
|
14
14
|
end
|
15
15
|
|
@@ -30,10 +30,8 @@ module Rley # This module is used as a namespace
|
|
30
30
|
new_state = ParseState.new(aDottedItem, anOrigin)
|
31
31
|
self[anIndex].push_state(new_state)
|
32
32
|
end
|
33
|
-
|
34
33
|
end # class
|
35
|
-
|
36
34
|
end # module
|
37
35
|
end # module
|
38
36
|
|
39
|
-
# End of file
|
37
|
+
# End of file
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
|
-
|
3
2
|
# A dotted item is a parse state for a given production/grammar rule
|
4
3
|
# It partitions the rhs of the rule in two parts.
|
5
4
|
# The left part consists of the symbols in the rules that are matched
|
@@ -37,7 +36,7 @@ module Rley # This module is used as a namespace
|
|
37
36
|
|
38
37
|
# An item with the dot at the beginning is called
|
39
38
|
# predicted item
|
40
|
-
|
39
|
+
alias_method :predicted_item?, :at_start?
|
41
40
|
|
42
41
|
# A dotted item is called a reduce item if the dot is at the end.
|
43
42
|
def reduce_item?()
|
@@ -52,7 +51,7 @@ module Rley # This module is used as a namespace
|
|
52
51
|
# Return the symbol after the dot.
|
53
52
|
# nil is returned if the dot is at the end
|
54
53
|
def next_symbol()
|
55
|
-
|
54
|
+
return (position < 0) ? nil : production.rhs[position]
|
56
55
|
end
|
57
56
|
|
58
57
|
# An item with the dot in front of a terminal is called a shift item
|
@@ -79,7 +78,6 @@ module Rley # This module is used as a namespace
|
|
79
78
|
return index
|
80
79
|
end
|
81
80
|
end # class
|
82
|
-
|
83
81
|
end # module
|
84
82
|
|
85
83
|
# End of file
|
@@ -4,7 +4,6 @@ require_relative 'parsing'
|
|
4
4
|
|
5
5
|
module Rley # This module is used as a namespace
|
6
6
|
module Parser # This module is used as a namespace
|
7
|
-
|
8
7
|
# Implementation of a parser that uses the Earley parsing algorithm.
|
9
8
|
class EarleyParser
|
10
9
|
# The grammar of the language.
|
@@ -113,8 +112,8 @@ module Rley # This module is used as a namespace
|
|
113
112
|
# 'pos':
|
114
113
|
# For each production with 'nt' as lhs, retrieve their corresponding
|
115
114
|
# initial dotted rules nt -> . xxxx
|
116
|
-
# For retrieved dotted rule, add a parse state to the chart entry
|
117
|
-
#
|
115
|
+
# For retrieved dotted rule, add a parse state to the chart entry
|
116
|
+
# at 'pos': <initial dotted rule, pos, pos>
|
118
117
|
# In short, one adds states to chart[pos], one per production that
|
119
118
|
# specifies how to reduce some input into the predicted nt (non-terminal)
|
120
119
|
# A prediction corresponds to a potential expansion of a nonterminal
|
@@ -149,29 +148,27 @@ module Rley # This module is used as a namespace
|
|
149
148
|
# immediately follows a dot
|
150
149
|
# @param aPosition [Fixnum] position in the input token sequence.
|
151
150
|
def scanning(aParsing, aTerminal, aPosition)
|
152
|
-
aParsing.scanning(aTerminal, aPosition)
|
151
|
+
aParsing.scanning(aTerminal, aPosition) do |item|
|
153
152
|
next_mapping[item]
|
154
|
-
|
153
|
+
end
|
155
154
|
end
|
156
|
-
|
157
155
|
|
158
|
-
# This method is called when a parse state at chart entry reaches
|
159
|
-
# of a production.
|
160
|
-
# For every state in chart[aPosition] that is
|
161
|
-
#
|
162
|
-
# Find states s in chart[j] of the
|
156
|
+
# This method is called when a parse state at chart entry reaches
|
157
|
+
# the end of a production.
|
158
|
+
# For every state in chart[aPosition] that is
|
159
|
+
# complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
160
|
+
# Find states s in chart[j] of the
|
161
|
+
# form {dotted_rule: Y -> α • X β, origin: i}
|
163
162
|
# In other words, rules that predicted the non-terminal X.
|
164
163
|
# For each s, add to chart[aPosition] a state of the form
|
165
164
|
# { dotted_rule: Y → α X • β, origin: i})
|
166
165
|
def completion(aParsing, aState, aPosition)
|
167
|
-
aParsing.completion(aState, aPosition)
|
166
|
+
aParsing.completion(aState, aPosition) do |item|
|
168
167
|
next_mapping[item]
|
169
|
-
|
168
|
+
end
|
170
169
|
end
|
171
|
-
|
172
170
|
end # class
|
173
|
-
|
174
171
|
end # module
|
175
172
|
end # module
|
176
173
|
|
177
|
-
# End of file
|
174
|
+
# End of file
|
@@ -1,6 +1,5 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
2
|
module Parser # This module is used as a namespace
|
3
|
-
|
4
3
|
class ParseState
|
5
4
|
attr_reader(:dotted_rule)
|
6
5
|
|
@@ -15,7 +14,7 @@ module Rley # This module is used as a namespace
|
|
15
14
|
|
16
15
|
# Equality comparison. A parse state behaves as a value object.
|
17
16
|
def ==(other)
|
18
|
-
return true if
|
17
|
+
return true if object_id == other.object_id
|
19
18
|
|
20
19
|
if (dotted_rule == other.dotted_rule) && (origin == other.origin)
|
21
20
|
result = true
|
@@ -45,10 +44,8 @@ module Rley # This module is used as a namespace
|
|
45
44
|
|
46
45
|
return aDottedRule
|
47
46
|
end
|
48
|
-
|
49
47
|
end # class
|
50
|
-
|
51
48
|
end # module
|
52
49
|
end # module
|
53
50
|
|
54
|
-
# End of file
|
51
|
+
# End of file
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -2,10 +2,9 @@ require_relative 'chart'
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Parser # This module is used as a namespace
|
5
|
-
|
6
5
|
class Parsing
|
7
6
|
attr_reader(:chart)
|
8
|
-
|
7
|
+
|
9
8
|
# The sequence of input token to parse
|
10
9
|
attr_reader(:tokens)
|
11
10
|
|
@@ -13,7 +12,7 @@ module Rley # This module is used as a namespace
|
|
13
12
|
@tokens = theTokens.dup
|
14
13
|
@chart = Chart.new(startDottedRule, tokens.size)
|
15
14
|
end
|
16
|
-
|
15
|
+
|
17
16
|
# Return true if the parse was successful (= input tokens
|
18
17
|
# followed the syntax specified by the grammar)
|
19
18
|
def success?()
|
@@ -21,14 +20,14 @@ module Rley # This module is used as a namespace
|
|
21
20
|
# The last chart entry has a parse state
|
22
21
|
# that involves the start production and
|
23
22
|
# has a dot positioned at the end of its rhs.
|
24
|
-
|
23
|
+
|
25
24
|
start_dotted_rule = chart.start_dotted_rule
|
26
25
|
start_production = start_dotted_rule.production
|
27
26
|
last_chart_entry = chart.state_sets.last
|
28
27
|
candidate_states = last_chart_entry.states_for(start_production)
|
29
28
|
found = candidate_states.find(&:complete?)
|
30
|
-
|
31
|
-
return !
|
29
|
+
|
30
|
+
return !found.nil?
|
32
31
|
end
|
33
32
|
|
34
33
|
|
@@ -40,23 +39,22 @@ module Rley # This module is used as a namespace
|
|
40
39
|
end
|
41
40
|
|
42
41
|
|
43
|
-
# This method is called when a parse state for chart entry at position
|
42
|
+
# This method is called when a parse state for chart entry at position
|
44
43
|
# 'pos' expects a terminal as next symbol.
|
45
44
|
# If the input token matches the terminal symbol then:
|
46
45
|
# Retrieve all parse states for chart entry at 'aPosition'
|
47
46
|
# that have the given terminal as next symbol.
|
48
|
-
# For each s of the above states, push to chart entry aPosition + 1
|
47
|
+
# For each s of the above states, push to chart entry aPosition + 1
|
49
48
|
# a new state like: <next dotted rule, s.origin, aPosition + 1>
|
50
|
-
# In other words, we place the dotted rules in the next state set
|
49
|
+
# In other words, we place the dotted rules in the next state set
|
51
50
|
# such that the dot appears after terminal.
|
52
|
-
# @param Terminal [Terminal] a terminal symbol that
|
53
|
-
# immediately follows a dot
|
51
|
+
# @param Terminal [Terminal] a terminal symbol that
|
52
|
+
# immediately follows a dot
|
54
53
|
# @param aPosition [Fixnum] position in the input token sequence.
|
55
54
|
# @param nextMapping [Proc or Lambda] code to evaluate in order to
|
56
55
|
# determine the "next" dotted rule for a given one.
|
57
56
|
def scanning(aTerminal, aPosition, &nextMapping)
|
58
57
|
curr_token = tokens[aPosition]
|
59
|
-
|
60
58
|
if curr_token.terminal == aTerminal
|
61
59
|
states = states_expecting(aTerminal, aPosition)
|
62
60
|
states.each do |s|
|
@@ -70,12 +68,13 @@ module Rley # This module is used as a namespace
|
|
70
68
|
|
71
69
|
# This method is called when a parse state at chart entry reaches the end
|
72
70
|
# of a production.
|
73
|
-
# For every state in chart[aPosition] that is complete
|
74
|
-
#
|
75
|
-
# Find states s in chart[j] of the form
|
76
|
-
#
|
71
|
+
# For every state in chart[aPosition] that is complete
|
72
|
+
# (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
73
|
+
# Find states s in chart[j] of the form
|
74
|
+
# {dotted_rule: Y -> α • X β, origin: i}
|
75
|
+
# In other words, rules that predicted the non-terminal X.
|
77
76
|
# For each s, add to chart[aPosition] a state of the form
|
78
|
-
#
|
77
|
+
# { dotted_rule: Y → α X • β, origin: i})
|
79
78
|
def completion(aState, aPosition, &nextMapping)
|
80
79
|
curr_origin = aState.origin
|
81
80
|
curr_lhs = aState.dotted_rule.lhs
|
@@ -92,10 +91,8 @@ module Rley # This module is used as a namespace
|
|
92
91
|
def states_expecting(aTerminal, aPosition)
|
93
92
|
return chart[aPosition].states_expecting(aTerminal)
|
94
93
|
end
|
95
|
-
|
96
94
|
end # class
|
97
|
-
|
98
95
|
end # module
|
99
96
|
end # module
|
100
97
|
|
101
|
-
# End of file
|
98
|
+
# End of file
|
@@ -2,7 +2,6 @@ require 'forwardable' # Delegation
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Parser # This module is used as a namespace
|
5
|
-
|
6
5
|
class StateSet
|
7
6
|
extend Forwardable
|
8
7
|
def_delegators :states, :empty?, :size, :first, :each
|
@@ -38,10 +37,7 @@ module Rley # This module is used as a namespace
|
|
38
37
|
# TODO: make it better than linear search
|
39
38
|
return states.include?(aState)
|
40
39
|
end
|
41
|
-
|
42
40
|
end # class
|
43
|
-
|
44
41
|
end # module
|
45
42
|
end # module
|
46
|
-
|
47
|
-
# End of file
|
43
|
+
# End of file
|
data/lib/rley/parser/token.rb
CHANGED
@@ -3,7 +3,6 @@ require_relative 'dotted_item'
|
|
3
3
|
|
4
4
|
module Rley # This module is used as a namespace
|
5
5
|
module Parser # This module is used as a namespace
|
6
|
-
|
7
6
|
class Token
|
8
7
|
attr_reader(:lexeme)
|
9
8
|
attr_reader(:terminal)
|
@@ -12,10 +11,7 @@ module Rley # This module is used as a namespace
|
|
12
11
|
@lexeme = theLexeme
|
13
12
|
@terminal = aTerminal
|
14
13
|
end
|
15
|
-
|
16
14
|
end # class
|
17
|
-
|
18
15
|
end # module
|
19
16
|
end # module
|
20
|
-
|
21
17
|
# End of file
|
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
2
|
module Syntax # This module is used as a namespace
|
3
|
-
|
4
3
|
# A grammar specifies the syntax of a language.
|
5
4
|
# Formally, a grammar has:
|
6
5
|
# One start symbol,
|
@@ -50,10 +49,8 @@ module Rley # This module is used as a namespace
|
|
50
49
|
@symbols << symb
|
51
50
|
end
|
52
51
|
end
|
53
|
-
|
54
52
|
end # class
|
55
|
-
|
56
53
|
end # module
|
57
54
|
end # module
|
58
55
|
|
59
|
-
# End of file
|
56
|
+
# End of file
|
@@ -1,6 +1,5 @@
|
|
1
1
|
module Rley # This module is used as a namespace
|
2
2
|
module Syntax # This module is used as a namespace
|
3
|
-
|
4
3
|
# Abstract class for grammar symbols.
|
5
4
|
# A grammar symbol is an element that appears in grammar rules.
|
6
5
|
class GrmSymbol
|
@@ -11,7 +10,6 @@ module Rley # This module is used as a namespace
|
|
11
10
|
@name = aName.dup
|
12
11
|
end
|
13
12
|
end # class
|
14
|
-
|
15
13
|
end # module
|
16
14
|
end # module
|
17
15
|
|
data/lib/rley/syntax/literal.rb
CHANGED
@@ -2,7 +2,6 @@ require_relative 'terminal' # Load superclass
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A literal is terminal symbol that matches a lexical pattern
|
7
6
|
class Literal < Terminal
|
8
7
|
# The exact text representation of the word.
|
@@ -13,7 +12,6 @@ module Rley # This module is used as a namespace
|
|
13
12
|
@pattern = aPattern
|
14
13
|
end
|
15
14
|
end # class
|
16
|
-
|
17
15
|
end # module
|
18
16
|
end # module
|
19
17
|
|
@@ -2,17 +2,13 @@ require_relative 'grm_symbol' # Load superclass
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
7
6
|
# a composition of terminal or non-terminal symbols
|
8
7
|
class NonTerminal < GrmSymbol
|
9
|
-
|
10
8
|
def initialize(aName)
|
11
9
|
super(aName)
|
12
10
|
end
|
13
11
|
end # class
|
14
|
-
|
15
12
|
end # module
|
16
13
|
end # module
|
17
|
-
|
18
14
|
# End of file
|
@@ -2,7 +2,6 @@ require_relative 'symbol_seq'
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# In a context-free grammar, a production is a rule in which
|
7
6
|
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
8
7
|
# and the right-hand side (RHS) consists of a sequence of symbols.
|
@@ -10,7 +9,6 @@ module Rley # This module is used as a namespace
|
|
10
9
|
# The rule stipulates that the LHS is equivalent to the RHS,
|
11
10
|
# in other words every occurrence of the LHS can be substituted to
|
12
11
|
# corresponding RHS.
|
13
|
-
# Implementation note: the object id of the production is taken as its LHS.
|
14
12
|
class Production
|
15
13
|
# The right-hand side (rhs) consists of a sequence of grammar symbols
|
16
14
|
attr_reader(:rhs)
|
@@ -20,8 +18,8 @@ module Rley # This module is used as a namespace
|
|
20
18
|
|
21
19
|
# Provide common alternate names to lhs and rhs accessors
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
alias_method :body, :rhs
|
22
|
+
alias_method :head, :lhs
|
25
23
|
|
26
24
|
def initialize(aNonTerminal, theSymbols)
|
27
25
|
@lhs = aNonTerminal
|
@@ -33,10 +31,8 @@ module Rley # This module is used as a namespace
|
|
33
31
|
def empty?()
|
34
32
|
return rhs.empty?
|
35
33
|
end
|
36
|
-
|
37
34
|
end # class
|
38
|
-
|
39
35
|
end # module
|
40
36
|
end # module
|
41
37
|
|
42
|
-
# End of file
|
38
|
+
# End of file
|
@@ -2,7 +2,6 @@ require 'forwardable'
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A symbol sequence is a suite of grammar symbols
|
7
6
|
class SymbolSeq
|
8
7
|
extend Forwardable
|
@@ -17,20 +16,20 @@ module Rley # This module is used as a namespace
|
|
17
16
|
|
18
17
|
# Equality operator.
|
19
18
|
def ==(other)
|
20
|
-
return true if other.object_id ==
|
19
|
+
return true if other.object_id == object_id
|
21
20
|
|
22
21
|
case other
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
when SymbolSeq then result = other.members == members
|
23
|
+
when Array then result = other == members
|
24
|
+
else
|
25
|
+
msg = "Cannot compare a SymbolSeq with a #{other.class}"
|
26
|
+
fail StandardError, msg
|
27
27
|
end
|
28
28
|
|
29
29
|
return result
|
30
30
|
end
|
31
31
|
end # class
|
32
|
-
|
33
32
|
end # module
|
34
33
|
end # module
|
35
34
|
|
36
|
-
# End of file
|
35
|
+
# End of file
|