rley 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +5 -0
- data/examples/parsers/parsing_b_expr.rb +85 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_state.rb +5 -0
- data/lib/rley/parser/parsing.rb +23 -7
- data/lib/rley/parser/state_set.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +12 -1
- data/spec/rley/parser/parsing_spec.rb +25 -7
- data/spec/rley/ptree/parse_tree_spec.rb +3 -1
- data/spec/rley/support/grammar_b_expr_helper.rb +39 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NTc2NGE4MzYxOTc1ZDUyMDVkYjdmNGFhODllNmEwM2YxMjVkZDk1OQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZDBkMTdmZWM2NTMwYWMwNDFkNDQ4NGI2YzdkNjk3NDU0ZGExMGYzNA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWZjYmQxNDNiNjVmMDYyYWI1YzM0YzMyN2VjMzk4ZjUxOTIyMmQxNTE4Y2Y4
|
10
|
+
YjM4NTAyNGNhNjhiOWRhNWMyZWVmYzRhYjFjNzhhOWEzMDY1ZTgzMzRiMTVh
|
11
|
+
Njg1MDRjMjQ5NTlhYmU3NTk2MDBiMWQyZmI3MDIyMWUwNGM1NjM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODZiMmY1ZDMwNTVlNmM3ZGJmZDIzOTAzYjQ1MTFlMmY4OTBlODhhZWZhN2M3
|
14
|
+
MjEzYjI0YjZhNTA4NzkzYjJiMzMwY2Y0NzliMGMyZDdlMjI3NjkzMTliYWNh
|
15
|
+
YzQ2MGVmNWM1NTA5M2IxYjcxNzliNzhhMzViMDE4OTM3NWY1NjI=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.1.12 / 2014-12-22
|
2
|
+
* [FIX] Fixed `Parsing#parse_tree`: code couldn't cope with parse state set containing more
|
3
|
+
than one parse state that expected the same symbol.
|
4
|
+
* [NEW] Added one more parser example (for very basic arithmetic expression)
|
5
|
+
|
1
6
|
### 0.1.11 / 2014-12-16
|
2
7
|
* [FIX] Fixed all but one YARD (documentation) warnings. Most of them were due to mismatch
|
3
8
|
in method argument names between source code and documentation.
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Purpose: to demonstrate how to parse basic arithmetic expressions
|
2
|
+
# and render a parse tree
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a very simple arithmetic expression language
|
17
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
18
|
+
|
19
|
+
# Let's create the grammar piece by piece
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('+', '*', 'integer')
|
22
|
+
builder.add_production('P' => 'S')
|
23
|
+
builder.add_production('S' => %w(S + M))
|
24
|
+
builder.add_production('S' => 'M')
|
25
|
+
builder.add_production('M' => %w(M * T))
|
26
|
+
builder.add_production('M' => 'T')
|
27
|
+
builder.add_production('T' => 'integer')
|
28
|
+
|
29
|
+
# And now build the grammar...
|
30
|
+
grammar_s_expr = builder.grammar
|
31
|
+
|
32
|
+
|
33
|
+
########################################
|
34
|
+
# 2. Create a tokenizer for the language
|
35
|
+
# The tokenizer transforms the input into an array of tokens
|
36
|
+
def tokenizer(aText, aGrammar)
|
37
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
38
|
+
case lexeme
|
39
|
+
when '+', '*'
|
40
|
+
terminal = aGrammar.name2symbol[lexeme]
|
41
|
+
when /^[-+]?\d+$/
|
42
|
+
terminal = aGrammar.name2symbol['integer']
|
43
|
+
else
|
44
|
+
msg = "Unknown input text '#{lexeme}'"
|
45
|
+
fail StandardError, msg
|
46
|
+
end
|
47
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
48
|
+
end
|
49
|
+
|
50
|
+
return tokens
|
51
|
+
end
|
52
|
+
|
53
|
+
########################################
|
54
|
+
# Step 3. Create a parser for that grammar
|
55
|
+
parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
|
56
|
+
|
57
|
+
########################################
|
58
|
+
# Step 3. Tokenize the input
|
59
|
+
valid_input = '2 + 3 * 4'
|
60
|
+
tokens = tokenizer(valid_input, grammar_s_expr)
|
61
|
+
|
62
|
+
########################################
|
63
|
+
# Step 5. Let the parser process the input
|
64
|
+
result = parser.parse(tokens)
|
65
|
+
puts "Parse successful? #{result.success?}"
|
66
|
+
pp result
|
67
|
+
|
68
|
+
|
69
|
+
########################################
|
70
|
+
# Step 6. Generate a parse tree from the parse result
|
71
|
+
ptree = result.parse_tree
|
72
|
+
=begin
|
73
|
+
########################################
|
74
|
+
# Step 7. Render the parse tree (in JSON)
|
75
|
+
# Let's create a parse tree visitor
|
76
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
77
|
+
|
78
|
+
#Here we create a renderer object...
|
79
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
|
+
|
81
|
+
# Now emit the parse tree as JSON on the console output
|
82
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
|
+
renderer.render(visitor)
|
84
|
+
=end
|
85
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
@@ -31,6 +31,11 @@ module Rley # This module is used as a namespace
|
|
31
31
|
return dotted_rule.reduce_item?
|
32
32
|
end
|
33
33
|
|
34
|
+
# Returns true if the dot is at the start of the rhs of the production.
|
35
|
+
def predicted?()
|
36
|
+
return dotted_rule.predicted_item?
|
37
|
+
end
|
38
|
+
|
34
39
|
# Next expected symbol in the production
|
35
40
|
def next_symbol()
|
36
41
|
return dotted_rule.next_symbol
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require_relative 'chart'
|
2
2
|
require_relative '../ptree/parse_tree'
|
3
3
|
|
4
|
+
|
4
5
|
module Rley # This module is used as a namespace
|
5
6
|
module Parser # This module is used as a namespace
|
6
7
|
class Parsing
|
@@ -47,19 +48,23 @@ module Rley # This module is used as a namespace
|
|
47
48
|
parse_state)
|
48
49
|
|
49
50
|
when Syntax::NonTerminal
|
50
|
-
# Retrieve complete states
|
51
|
+
# Retrieve complete states with curr_symbol as lhs
|
51
52
|
new_states = chart[state_set_index].states_rewriting(curr_symbol)
|
52
53
|
# TODO: make this more robust
|
53
54
|
parse_state = new_states[0]
|
54
55
|
curr_dotted_item = parse_state.dotted_rule
|
56
|
+
# Additional check
|
57
|
+
if ptree.current_node.symbol != curr_dotted_item.production.lhs
|
58
|
+
ptree.step_back(state_set_index)
|
59
|
+
end
|
55
60
|
ptree.current_node.range = { low: parse_state.origin }
|
56
61
|
node_range = ptree.current_node.range
|
57
62
|
ptree.add_children(curr_dotted_item.production, node_range)
|
58
63
|
link_node_to_token(ptree, state_set_index - 1)
|
59
64
|
|
60
|
-
when NilClass
|
65
|
+
when NilClass # No symbol on the left of dot
|
61
66
|
lhs = curr_dotted_item.production.lhs
|
62
|
-
new_states =
|
67
|
+
new_states = states_expecting(lhs, state_set_index, true)
|
63
68
|
break if new_states.empty?
|
64
69
|
# TODO: make this more robust
|
65
70
|
parse_state = new_states[0]
|
@@ -98,7 +103,7 @@ module Rley # This module is used as a namespace
|
|
98
103
|
curr_token = tokens[aPosition]
|
99
104
|
return unless curr_token.terminal == aTerminal
|
100
105
|
|
101
|
-
states = states_expecting(aTerminal, aPosition)
|
106
|
+
states = states_expecting(aTerminal, aPosition, false)
|
102
107
|
states.each do |s|
|
103
108
|
next_item = nextMapping.call(s.dotted_rule)
|
104
109
|
push_state(next_item, s.origin, aPosition + 1)
|
@@ -119,7 +124,7 @@ module Rley # This module is used as a namespace
|
|
119
124
|
def completion(aState, aPosition, &nextMapping)
|
120
125
|
curr_origin = aState.origin
|
121
126
|
curr_lhs = aState.dotted_rule.lhs
|
122
|
-
states = states_expecting(curr_lhs, curr_origin)
|
127
|
+
states = states_expecting(curr_lhs, curr_origin, false)
|
123
128
|
states.each do |s|
|
124
129
|
next_item = nextMapping.call(s.dotted_rule)
|
125
130
|
push_state(next_item, s.origin, aPosition)
|
@@ -129,8 +134,19 @@ module Rley # This module is used as a namespace
|
|
129
134
|
|
130
135
|
# The list of ParseState from the chart entry at given position
|
131
136
|
# that expect the given terminal
|
132
|
-
def states_expecting(aTerminal, aPosition)
|
133
|
-
|
137
|
+
def states_expecting(aTerminal, aPosition, toSort)
|
138
|
+
expecting = chart[aPosition].states_expecting(aTerminal)
|
139
|
+
return expecting if !toSort || expecting.size < 2
|
140
|
+
|
141
|
+
# Put predicted states ahead
|
142
|
+
(predicted, others) = expecting.partition { |state| state.predicted? }
|
143
|
+
|
144
|
+
# Sort state in reverse order of their origin value
|
145
|
+
[predicted, others].each do |set|
|
146
|
+
set.sort! { |a,b| b.origin <=> a.origin }
|
147
|
+
end
|
148
|
+
|
149
|
+
return predicted + others
|
134
150
|
end
|
135
151
|
|
136
152
|
private
|
@@ -21,13 +21,15 @@ module Rley # This module is used as a namespace
|
|
21
21
|
@states << aState unless include?(aState)
|
22
22
|
end
|
23
23
|
|
24
|
-
# The list of ParseState that expect the given
|
25
|
-
|
26
|
-
|
24
|
+
# The list of ParseState that expect the given symbol.
|
25
|
+
# @param aSymbol [GrmSymbol] the expected symbol
|
26
|
+
# (=on the right of the dot)
|
27
|
+
def states_expecting(aSymbol)
|
28
|
+
return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
|
27
29
|
end
|
28
30
|
|
29
|
-
# The list of complete ParseState that have the
|
30
|
-
# production
|
31
|
+
# The list of complete ParseState that have the given non-terminal
|
32
|
+
# symbol as the lhs of their production.
|
31
33
|
def states_rewriting(aNonTerm)
|
32
34
|
return states.select do |s|
|
33
35
|
(s.dotted_rule.production.lhs == aNonTerm) && s.complete?
|
@@ -33,8 +33,19 @@ module Rley # This module is used as a namespace
|
|
33
33
|
aVisitor.end_visit_ptree(self)
|
34
34
|
end
|
35
35
|
|
36
|
-
|
36
|
+
# Add children to the current node.
|
37
|
+
# The children nodes correspond to the rhs of the production.
|
38
|
+
# Update the range in the children given the passed range object.
|
39
|
+
# Pre-condition: the current node refers to the same (non-terminal)
|
40
|
+
# symbol of the lhs of the given produiction.
|
41
|
+
# @param aProduction [Production] A production rule
|
42
|
+
# @param aRange [TokenRange]
|
37
43
|
def add_children(aProduction, aRange)
|
44
|
+
if aProduction.lhs != current_node.symbol
|
45
|
+
msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
|
46
|
+
fail StandardError, msg
|
47
|
+
end
|
48
|
+
|
38
49
|
aProduction.rhs.each do |symb|
|
39
50
|
case symb
|
40
51
|
when Syntax::Terminal
|
@@ -7,12 +7,19 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
|
|
7
7
|
require_relative '../../../lib/rley/parser/dotted_item'
|
8
8
|
require_relative '../../../lib/rley/parser/token'
|
9
9
|
require_relative '../../../lib/rley/parser/earley_parser'
|
10
|
+
require_relative '../support/grammar_abc_helper'
|
11
|
+
require_relative '../support/grammar_b_expr_helper'
|
12
|
+
|
13
|
+
|
10
14
|
# Load the class under test
|
11
15
|
require_relative '../../../lib/rley/parser/parsing'
|
12
16
|
|
13
17
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
14
18
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
15
19
|
describe Parsing do
|
20
|
+
include GrammarABCHelper # Mix-in module with builder for grammar abc
|
21
|
+
include GrammarBExprHelper # Mix-in with builder for simple expressions
|
22
|
+
|
16
23
|
# Grammar 1: A very simple language
|
17
24
|
# S ::= A.
|
18
25
|
# A ::= "a" A "c".
|
@@ -87,7 +94,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
87
94
|
item2 = DottedItem.new(prod_A1, 1)
|
88
95
|
subject.push_state(item1, 2, 2)
|
89
96
|
subject.push_state(item2, 2, 2)
|
90
|
-
states = subject.states_expecting(c_, 2)
|
97
|
+
states = subject.states_expecting(c_, 2, false)
|
91
98
|
expect(states.size).to eq(1)
|
92
99
|
expect(states[0].dotted_rule).to eq(item1)
|
93
100
|
end
|
@@ -114,11 +121,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
114
121
|
|
115
122
|
context 'Parse tree building:' do
|
116
123
|
let(:sample_grammar1) do
|
117
|
-
builder =
|
118
|
-
builder.add_terminals('a', 'b', 'c')
|
119
|
-
builder.add_production('S' => ['A'])
|
120
|
-
builder.add_production('A' => %w(a A c))
|
121
|
-
builder.add_production('A' => ['b'])
|
124
|
+
builder = grammar_abc_builder
|
122
125
|
builder.grammar
|
123
126
|
end
|
124
127
|
|
@@ -128,13 +131,28 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
128
131
|
end
|
129
132
|
end
|
130
133
|
|
134
|
+
let(:b_expr_grammar) do
|
135
|
+
builder = grammar_expr_builder
|
136
|
+
builder.grammar
|
137
|
+
end
|
138
|
+
|
131
139
|
|
132
|
-
it 'should build the parse tree for a non-ambiguous grammar' do
|
140
|
+
it 'should build the parse tree for a simple non-ambiguous grammar' do
|
133
141
|
parser = EarleyParser.new(sample_grammar1)
|
134
142
|
instance = parser.parse(token_seq1)
|
135
143
|
ptree = instance.parse_tree
|
136
144
|
expect(ptree).to be_kind_of(PTree::ParseTree)
|
137
145
|
end
|
146
|
+
|
147
|
+
it 'should build the parse tree for a simple expression grammar' do
|
148
|
+
parser = EarleyParser.new(b_expr_grammar)
|
149
|
+
tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
|
150
|
+
instance = parser.parse(tokens)
|
151
|
+
ptree = instance.parse_tree
|
152
|
+
expect(ptree).to be_kind_of(PTree::ParseTree)
|
153
|
+
end
|
154
|
+
|
155
|
+
|
138
156
|
end # context
|
139
157
|
end # describe
|
140
158
|
end # module
|
@@ -2,6 +2,7 @@ require_relative '../../spec_helper'
|
|
2
2
|
|
3
3
|
require_relative '../support/grammar_abc_helper'
|
4
4
|
|
5
|
+
|
5
6
|
# Load the class under test
|
6
7
|
require_relative '../../../lib/rley/ptree/parse_tree'
|
7
8
|
|
@@ -9,7 +10,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
9
10
|
module PTree # Open this namespace to avoid module qualifier prefixes
|
10
11
|
describe ParseTree do
|
11
12
|
include GrammarABCHelper # Mix-in module with builder for grammar abc
|
12
|
-
|
13
|
+
|
14
|
+
|
13
15
|
let(:sample_grammar) do
|
14
16
|
builder = grammar_abc_builder
|
15
17
|
builder.grammar
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Load the builder class
|
2
|
+
require_relative '../../../lib/rley/syntax/grammar_builder'
|
3
|
+
require_relative '../../../lib/rley/parser/token'
|
4
|
+
|
5
|
+
|
6
|
+
module GrammarBExprHelper
|
7
|
+
# Factory method. Creates a grammar builder for a basic arithmetic
|
8
|
+
# expression grammar.
|
9
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
10
|
+
def grammar_expr_builder()
|
11
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
12
|
+
builder.add_terminals('+', '*', 'integer')
|
13
|
+
builder.add_production('P' => 'S')
|
14
|
+
builder.add_production('S' => %w(S + M))
|
15
|
+
builder.add_production('S' => 'M')
|
16
|
+
builder.add_production('M' => %w(M * T))
|
17
|
+
builder.add_production('M' => 'T')
|
18
|
+
builder.add_production('T' => 'integer')
|
19
|
+
builder
|
20
|
+
end
|
21
|
+
|
22
|
+
# Basic expression tokenizer
|
23
|
+
def expr_tokenizer(aText, aGrammar)
|
24
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
25
|
+
case lexeme
|
26
|
+
when '+', '*'
|
27
|
+
terminal = aGrammar.name2symbol[lexeme]
|
28
|
+
when /^[-+]?\d+$/
|
29
|
+
terminal = aGrammar.name2symbol['integer']
|
30
|
+
else
|
31
|
+
msg = "Unknown input text '#{lexeme}'"
|
32
|
+
fail StandardError, msg
|
33
|
+
end
|
34
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
35
|
+
end
|
36
|
+
|
37
|
+
return tokens
|
38
|
+
end
|
39
|
+
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- examples/grammars/grammar_abc.rb
|
103
103
|
- examples/grammars/grammar_L0.rb
|
104
104
|
- examples/parsers/parsing_abc.rb
|
105
|
+
- examples/parsers/parsing_b_expr.rb
|
105
106
|
- examples/recognizers/recognizer_abc.rb
|
106
107
|
- lib/rley.rb
|
107
108
|
- lib/rley/constants.rb
|
@@ -145,6 +146,7 @@ files:
|
|
145
146
|
- spec/rley/ptree/parse_tree_spec.rb
|
146
147
|
- spec/rley/ptree/token_range_spec.rb
|
147
148
|
- spec/rley/support/grammar_abc_helper.rb
|
149
|
+
- spec/rley/support/grammar_b_expr_helper.rb
|
148
150
|
- spec/rley/syntax/grammar_builder_spec.rb
|
149
151
|
- spec/rley/syntax/grammar_spec.rb
|
150
152
|
- spec/rley/syntax/grm_symbol_spec.rb
|