rley 0.1.11 → 0.1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +5 -0
- data/examples/parsers/parsing_b_expr.rb +85 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_state.rb +5 -0
- data/lib/rley/parser/parsing.rb +23 -7
- data/lib/rley/parser/state_set.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +12 -1
- data/spec/rley/parser/parsing_spec.rb +25 -7
- data/spec/rley/ptree/parse_tree_spec.rb +3 -1
- data/spec/rley/support/grammar_b_expr_helper.rb +39 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NTc2NGE4MzYxOTc1ZDUyMDVkYjdmNGFhODllNmEwM2YxMjVkZDk1OQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZDBkMTdmZWM2NTMwYWMwNDFkNDQ4NGI2YzdkNjk3NDU0ZGExMGYzNA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWZjYmQxNDNiNjVmMDYyYWI1YzM0YzMyN2VjMzk4ZjUxOTIyMmQxNTE4Y2Y4
|
10
|
+
YjM4NTAyNGNhNjhiOWRhNWMyZWVmYzRhYjFjNzhhOWEzMDY1ZTgzMzRiMTVh
|
11
|
+
Njg1MDRjMjQ5NTlhYmU3NTk2MDBiMWQyZmI3MDIyMWUwNGM1NjM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODZiMmY1ZDMwNTVlNmM3ZGJmZDIzOTAzYjQ1MTFlMmY4OTBlODhhZWZhN2M3
|
14
|
+
MjEzYjI0YjZhNTA4NzkzYjJiMzMwY2Y0NzliMGMyZDdlMjI3NjkzMTliYWNh
|
15
|
+
YzQ2MGVmNWM1NTA5M2IxYjcxNzliNzhhMzViMDE4OTM3NWY1NjI=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.1.12 / 2014-12-22
|
2
|
+
* [FIX] Fixed `Parsing#parse_tree`: code couldn't cope with parse state set containing more
|
3
|
+
than one parse state that expected the same symbol.
|
4
|
+
* [NEW] Added one more parser example (for very basic arithmetic expression)
|
5
|
+
|
1
6
|
### 0.1.11 / 2014-12-16
|
2
7
|
* [FIX] Fixed all but one YARD (documentation) warnings. Most of them were due to mismatch
|
3
8
|
in method argument names between source code and documentation.
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Purpose: to demonstrate how to parse basic arithmetic expressions
|
2
|
+
# and render a parse tree
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a very simple arithmetic expression language
|
17
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
18
|
+
|
19
|
+
# Let's create the grammar piece by piece
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('+', '*', 'integer')
|
22
|
+
builder.add_production('P' => 'S')
|
23
|
+
builder.add_production('S' => %w(S + M))
|
24
|
+
builder.add_production('S' => 'M')
|
25
|
+
builder.add_production('M' => %w(M * T))
|
26
|
+
builder.add_production('M' => 'T')
|
27
|
+
builder.add_production('T' => 'integer')
|
28
|
+
|
29
|
+
# And now build the grammar...
|
30
|
+
grammar_s_expr = builder.grammar
|
31
|
+
|
32
|
+
|
33
|
+
########################################
|
34
|
+
# 2. Create a tokenizer for the language
|
35
|
+
# The tokenizer transforms the input into an array of tokens
|
36
|
+
def tokenizer(aText, aGrammar)
|
37
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
38
|
+
case lexeme
|
39
|
+
when '+', '*'
|
40
|
+
terminal = aGrammar.name2symbol[lexeme]
|
41
|
+
when /^[-+]?\d+$/
|
42
|
+
terminal = aGrammar.name2symbol['integer']
|
43
|
+
else
|
44
|
+
msg = "Unknown input text '#{lexeme}'"
|
45
|
+
fail StandardError, msg
|
46
|
+
end
|
47
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
48
|
+
end
|
49
|
+
|
50
|
+
return tokens
|
51
|
+
end
|
52
|
+
|
53
|
+
########################################
|
54
|
+
# Step 3. Create a parser for that grammar
|
55
|
+
parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
|
56
|
+
|
57
|
+
########################################
|
58
|
+
# Step 3. Tokenize the input
|
59
|
+
valid_input = '2 + 3 * 4'
|
60
|
+
tokens = tokenizer(valid_input, grammar_s_expr)
|
61
|
+
|
62
|
+
########################################
|
63
|
+
# Step 5. Let the parser process the input
|
64
|
+
result = parser.parse(tokens)
|
65
|
+
puts "Parse successful? #{result.success?}"
|
66
|
+
pp result
|
67
|
+
|
68
|
+
|
69
|
+
########################################
|
70
|
+
# Step 6. Generate a parse tree from the parse result
|
71
|
+
ptree = result.parse_tree
|
72
|
+
=begin
|
73
|
+
########################################
|
74
|
+
# Step 7. Render the parse tree (in JSON)
|
75
|
+
# Let's create a parse tree visitor
|
76
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
77
|
+
|
78
|
+
#Here we create a renderer object...
|
79
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
|
+
|
81
|
+
# Now emit the parse tree as JSON on the console output
|
82
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
|
+
renderer.render(visitor)
|
84
|
+
=end
|
85
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
@@ -31,6 +31,11 @@ module Rley # This module is used as a namespace
|
|
31
31
|
return dotted_rule.reduce_item?
|
32
32
|
end
|
33
33
|
|
34
|
+
# Returns true if the dot is at the start of the rhs of the production.
|
35
|
+
def predicted?()
|
36
|
+
return dotted_rule.predicted_item?
|
37
|
+
end
|
38
|
+
|
34
39
|
# Next expected symbol in the production
|
35
40
|
def next_symbol()
|
36
41
|
return dotted_rule.next_symbol
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require_relative 'chart'
|
2
2
|
require_relative '../ptree/parse_tree'
|
3
3
|
|
4
|
+
|
4
5
|
module Rley # This module is used as a namespace
|
5
6
|
module Parser # This module is used as a namespace
|
6
7
|
class Parsing
|
@@ -47,19 +48,23 @@ module Rley # This module is used as a namespace
|
|
47
48
|
parse_state)
|
48
49
|
|
49
50
|
when Syntax::NonTerminal
|
50
|
-
# Retrieve complete states
|
51
|
+
# Retrieve complete states with curr_symbol as lhs
|
51
52
|
new_states = chart[state_set_index].states_rewriting(curr_symbol)
|
52
53
|
# TODO: make this more robust
|
53
54
|
parse_state = new_states[0]
|
54
55
|
curr_dotted_item = parse_state.dotted_rule
|
56
|
+
# Additional check
|
57
|
+
if ptree.current_node.symbol != curr_dotted_item.production.lhs
|
58
|
+
ptree.step_back(state_set_index)
|
59
|
+
end
|
55
60
|
ptree.current_node.range = { low: parse_state.origin }
|
56
61
|
node_range = ptree.current_node.range
|
57
62
|
ptree.add_children(curr_dotted_item.production, node_range)
|
58
63
|
link_node_to_token(ptree, state_set_index - 1)
|
59
64
|
|
60
|
-
when NilClass
|
65
|
+
when NilClass # No symbol on the left of dot
|
61
66
|
lhs = curr_dotted_item.production.lhs
|
62
|
-
new_states =
|
67
|
+
new_states = states_expecting(lhs, state_set_index, true)
|
63
68
|
break if new_states.empty?
|
64
69
|
# TODO: make this more robust
|
65
70
|
parse_state = new_states[0]
|
@@ -98,7 +103,7 @@ module Rley # This module is used as a namespace
|
|
98
103
|
curr_token = tokens[aPosition]
|
99
104
|
return unless curr_token.terminal == aTerminal
|
100
105
|
|
101
|
-
states = states_expecting(aTerminal, aPosition)
|
106
|
+
states = states_expecting(aTerminal, aPosition, false)
|
102
107
|
states.each do |s|
|
103
108
|
next_item = nextMapping.call(s.dotted_rule)
|
104
109
|
push_state(next_item, s.origin, aPosition + 1)
|
@@ -119,7 +124,7 @@ module Rley # This module is used as a namespace
|
|
119
124
|
def completion(aState, aPosition, &nextMapping)
|
120
125
|
curr_origin = aState.origin
|
121
126
|
curr_lhs = aState.dotted_rule.lhs
|
122
|
-
states = states_expecting(curr_lhs, curr_origin)
|
127
|
+
states = states_expecting(curr_lhs, curr_origin, false)
|
123
128
|
states.each do |s|
|
124
129
|
next_item = nextMapping.call(s.dotted_rule)
|
125
130
|
push_state(next_item, s.origin, aPosition)
|
@@ -129,8 +134,19 @@ module Rley # This module is used as a namespace
|
|
129
134
|
|
130
135
|
# The list of ParseState from the chart entry at given position
|
131
136
|
# that expect the given terminal
|
132
|
-
def states_expecting(aTerminal, aPosition)
|
133
|
-
|
137
|
+
def states_expecting(aTerminal, aPosition, toSort)
|
138
|
+
expecting = chart[aPosition].states_expecting(aTerminal)
|
139
|
+
return expecting if !toSort || expecting.size < 2
|
140
|
+
|
141
|
+
# Put predicted states ahead
|
142
|
+
(predicted, others) = expecting.partition { |state| state.predicted? }
|
143
|
+
|
144
|
+
# Sort state in reverse order of their origin value
|
145
|
+
[predicted, others].each do |set|
|
146
|
+
set.sort! { |a,b| b.origin <=> a.origin }
|
147
|
+
end
|
148
|
+
|
149
|
+
return predicted + others
|
134
150
|
end
|
135
151
|
|
136
152
|
private
|
@@ -21,13 +21,15 @@ module Rley # This module is used as a namespace
|
|
21
21
|
@states << aState unless include?(aState)
|
22
22
|
end
|
23
23
|
|
24
|
-
# The list of ParseState that expect the given
|
25
|
-
|
26
|
-
|
24
|
+
# The list of ParseState that expect the given symbol.
|
25
|
+
# @param aSymbol [GrmSymbol] the expected symbol
|
26
|
+
# (=on the right of the dot)
|
27
|
+
def states_expecting(aSymbol)
|
28
|
+
return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
|
27
29
|
end
|
28
30
|
|
29
|
-
# The list of complete ParseState that have the
|
30
|
-
# production
|
31
|
+
# The list of complete ParseState that have the given non-terminal
|
32
|
+
# symbol as the lhs of their production.
|
31
33
|
def states_rewriting(aNonTerm)
|
32
34
|
return states.select do |s|
|
33
35
|
(s.dotted_rule.production.lhs == aNonTerm) && s.complete?
|
@@ -33,8 +33,19 @@ module Rley # This module is used as a namespace
|
|
33
33
|
aVisitor.end_visit_ptree(self)
|
34
34
|
end
|
35
35
|
|
36
|
-
|
36
|
+
# Add children to the current node.
|
37
|
+
# The children nodes correspond to the rhs of the production.
|
38
|
+
# Update the range in the children given the passed range object.
|
39
|
+
# Pre-condition: the current node refers to the same (non-terminal)
|
40
|
+
# symbol of the lhs of the given produiction.
|
41
|
+
# @param aProduction [Production] A production rule
|
42
|
+
# @param aRange [TokenRange]
|
37
43
|
def add_children(aProduction, aRange)
|
44
|
+
if aProduction.lhs != current_node.symbol
|
45
|
+
msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
|
46
|
+
fail StandardError, msg
|
47
|
+
end
|
48
|
+
|
38
49
|
aProduction.rhs.each do |symb|
|
39
50
|
case symb
|
40
51
|
when Syntax::Terminal
|
@@ -7,12 +7,19 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
|
|
7
7
|
require_relative '../../../lib/rley/parser/dotted_item'
|
8
8
|
require_relative '../../../lib/rley/parser/token'
|
9
9
|
require_relative '../../../lib/rley/parser/earley_parser'
|
10
|
+
require_relative '../support/grammar_abc_helper'
|
11
|
+
require_relative '../support/grammar_b_expr_helper'
|
12
|
+
|
13
|
+
|
10
14
|
# Load the class under test
|
11
15
|
require_relative '../../../lib/rley/parser/parsing'
|
12
16
|
|
13
17
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
14
18
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
15
19
|
describe Parsing do
|
20
|
+
include GrammarABCHelper # Mix-in module with builder for grammar abc
|
21
|
+
include GrammarBExprHelper # Mix-in with builder for simple expressions
|
22
|
+
|
16
23
|
# Grammar 1: A very simple language
|
17
24
|
# S ::= A.
|
18
25
|
# A ::= "a" A "c".
|
@@ -87,7 +94,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
87
94
|
item2 = DottedItem.new(prod_A1, 1)
|
88
95
|
subject.push_state(item1, 2, 2)
|
89
96
|
subject.push_state(item2, 2, 2)
|
90
|
-
states = subject.states_expecting(c_, 2)
|
97
|
+
states = subject.states_expecting(c_, 2, false)
|
91
98
|
expect(states.size).to eq(1)
|
92
99
|
expect(states[0].dotted_rule).to eq(item1)
|
93
100
|
end
|
@@ -114,11 +121,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
114
121
|
|
115
122
|
context 'Parse tree building:' do
|
116
123
|
let(:sample_grammar1) do
|
117
|
-
builder =
|
118
|
-
builder.add_terminals('a', 'b', 'c')
|
119
|
-
builder.add_production('S' => ['A'])
|
120
|
-
builder.add_production('A' => %w(a A c))
|
121
|
-
builder.add_production('A' => ['b'])
|
124
|
+
builder = grammar_abc_builder
|
122
125
|
builder.grammar
|
123
126
|
end
|
124
127
|
|
@@ -128,13 +131,28 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
128
131
|
end
|
129
132
|
end
|
130
133
|
|
134
|
+
let(:b_expr_grammar) do
|
135
|
+
builder = grammar_expr_builder
|
136
|
+
builder.grammar
|
137
|
+
end
|
138
|
+
|
131
139
|
|
132
|
-
it 'should build the parse tree for a non-ambiguous grammar' do
|
140
|
+
it 'should build the parse tree for a simple non-ambiguous grammar' do
|
133
141
|
parser = EarleyParser.new(sample_grammar1)
|
134
142
|
instance = parser.parse(token_seq1)
|
135
143
|
ptree = instance.parse_tree
|
136
144
|
expect(ptree).to be_kind_of(PTree::ParseTree)
|
137
145
|
end
|
146
|
+
|
147
|
+
it 'should build the parse tree for a simple expression grammar' do
|
148
|
+
parser = EarleyParser.new(b_expr_grammar)
|
149
|
+
tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
|
150
|
+
instance = parser.parse(tokens)
|
151
|
+
ptree = instance.parse_tree
|
152
|
+
expect(ptree).to be_kind_of(PTree::ParseTree)
|
153
|
+
end
|
154
|
+
|
155
|
+
|
138
156
|
end # context
|
139
157
|
end # describe
|
140
158
|
end # module
|
@@ -2,6 +2,7 @@ require_relative '../../spec_helper'
|
|
2
2
|
|
3
3
|
require_relative '../support/grammar_abc_helper'
|
4
4
|
|
5
|
+
|
5
6
|
# Load the class under test
|
6
7
|
require_relative '../../../lib/rley/ptree/parse_tree'
|
7
8
|
|
@@ -9,7 +10,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
9
10
|
module PTree # Open this namespace to avoid module qualifier prefixes
|
10
11
|
describe ParseTree do
|
11
12
|
include GrammarABCHelper # Mix-in module with builder for grammar abc
|
12
|
-
|
13
|
+
|
14
|
+
|
13
15
|
let(:sample_grammar) do
|
14
16
|
builder = grammar_abc_builder
|
15
17
|
builder.grammar
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Load the builder class
|
2
|
+
require_relative '../../../lib/rley/syntax/grammar_builder'
|
3
|
+
require_relative '../../../lib/rley/parser/token'
|
4
|
+
|
5
|
+
|
6
|
+
module GrammarBExprHelper
|
7
|
+
# Factory method. Creates a grammar builder for a basic arithmetic
|
8
|
+
# expression grammar.
|
9
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
10
|
+
def grammar_expr_builder()
|
11
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
12
|
+
builder.add_terminals('+', '*', 'integer')
|
13
|
+
builder.add_production('P' => 'S')
|
14
|
+
builder.add_production('S' => %w(S + M))
|
15
|
+
builder.add_production('S' => 'M')
|
16
|
+
builder.add_production('M' => %w(M * T))
|
17
|
+
builder.add_production('M' => 'T')
|
18
|
+
builder.add_production('T' => 'integer')
|
19
|
+
builder
|
20
|
+
end
|
21
|
+
|
22
|
+
# Basic expression tokenizer
|
23
|
+
def expr_tokenizer(aText, aGrammar)
|
24
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
25
|
+
case lexeme
|
26
|
+
when '+', '*'
|
27
|
+
terminal = aGrammar.name2symbol[lexeme]
|
28
|
+
when /^[-+]?\d+$/
|
29
|
+
terminal = aGrammar.name2symbol['integer']
|
30
|
+
else
|
31
|
+
msg = "Unknown input text '#{lexeme}'"
|
32
|
+
fail StandardError, msg
|
33
|
+
end
|
34
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
35
|
+
end
|
36
|
+
|
37
|
+
return tokens
|
38
|
+
end
|
39
|
+
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- examples/grammars/grammar_abc.rb
|
103
103
|
- examples/grammars/grammar_L0.rb
|
104
104
|
- examples/parsers/parsing_abc.rb
|
105
|
+
- examples/parsers/parsing_b_expr.rb
|
105
106
|
- examples/recognizers/recognizer_abc.rb
|
106
107
|
- lib/rley.rb
|
107
108
|
- lib/rley/constants.rb
|
@@ -145,6 +146,7 @@ files:
|
|
145
146
|
- spec/rley/ptree/parse_tree_spec.rb
|
146
147
|
- spec/rley/ptree/token_range_spec.rb
|
147
148
|
- spec/rley/support/grammar_abc_helper.rb
|
149
|
+
- spec/rley/support/grammar_b_expr_helper.rb
|
148
150
|
- spec/rley/syntax/grammar_builder_spec.rb
|
149
151
|
- spec/rley/syntax/grammar_spec.rb
|
150
152
|
- spec/rley/syntax/grm_symbol_spec.rb
|