rley 0.2.04 → 0.2.05
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +9 -0
- data/README.md +4 -5
- data/examples/parsers/parsing_ambig.rb +2 -2
- data/examples/parsers/parsing_err_expr.rb +15 -26
- data/examples/parsers/{parsing_tricky.rb → tracing_parser.rb} +2 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/earley_parser.rb +19 -21
- data/lib/rley/parser/state_set.rb +11 -0
- data/spec/rley/parser/earley_parser_spec.rb +8 -1
- data/spec/rley/parser/parsing_spec.rb +1 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzRiMzQ3MzkwZGMzMDJjZDVlYjVjNGI0YzdmMzE3NDFkOTRkOWM3ZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDJjMDNjNmIxMWVmMDhkZmFjM2U1ZGQ4ZmFkM2ZjYmZjY2IzNzk0Yw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MmUwZGIwYWNkNzJhOWY4M2Y1YzE1MjljN2JmODg3ZWVlODJhYjI0NzRmMzky
|
10
|
+
NWQ3NzUyM2JhODU5M2I3MzYyY2IyMWMxZTA3ZDQxMTU0ODdmZmY5OTg5YmNi
|
11
|
+
MWU3OTViYzY3Y2E4NDgyMjhiMmUzNDk2NjY0MTdiYWUwYmFkYTE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjAwNmI4MjNmNDEyOWZlMDM4YjA2YzM3MTBhMjc0MTcyZjliYjc3NWU0ZWIy
|
14
|
+
YjM2OTQzYWVkMDlhMGRkNmQ2OWNhYzhkM2IyNjNlNGNlMzEzMDA3MDYzM2Zj
|
15
|
+
ZTNmOTFjZTcyODRiNjAyNjMwNjQ3MzQ4ZDUyOTMyMGI4NTkxYjc=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.2.05 / 2015-03-19
|
2
|
+
* [NEW] Class `EarleyParser` implements a crude error detection mechanism. A syntax error causes an exception to be raised.
|
3
|
+
* [CHANGE] Examplar file `parsing_err_expr.rb`: demo error message.
|
4
|
+
|
5
|
+
### 0.2.04 / 2015-03-04
|
6
|
+
* [NEW] Class `ParseTracer` that helps to trace the parse steps (similar the trace format in NLTK).
|
7
|
+
* [CHANGE] Method `EarleyParser#parse` takes a trace level argument.
|
8
|
+
|
9
|
+
|
1
10
|
### 0.2.03 / 2015-02-06
|
2
11
|
* [FIX] File `.rubocop.yml`: removal of setting for obsolete EmptyLinesAroundBody cop.
|
3
12
|
* [CHANGE] Source code re-formatted to please Rubocop 0.29.
|
data/README.md
CHANGED
@@ -9,20 +9,19 @@ Rley
|
|
9
9
|
[![Dependency Status](https://gemnasium.com/famished-tiger/Rley.svg)](https://gemnasium.com/famished-tiger/Rley)
|
10
10
|
[![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt)
|
11
11
|
|
12
|
-
__Rley__ is a Ruby implementation of
|
12
|
+
__Rley__ is a Ruby implementation of a parser using the [Earley](http://en.wikipedia.org/wiki/Earley_parser) algorithm.
|
13
13
|
The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
14
14
|
|
15
15
|
|
16
16
|
Yet another parser?
|
17
17
|
Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
|
18
|
-
The latter are faster because they use
|
18
|
+
The latter are faster because they use optimized algorithms at the price of a loss of generality
|
19
19
|
in the grammar/language they support.
|
20
|
-
The Earley's algorithm being more general is able to parse input
|
21
|
-
|
20
|
+
The Earley's algorithm being more general is able to parse input that conforms to any context-free grammar.
|
21
|
+
For instance, it copes with ambiguous grammars.
|
22
22
|
|
23
23
|
This project is in "earley" stage.
|
24
24
|
####Roadmap:
|
25
|
-
- Add examples (including small NLP grammar)
|
26
25
|
- Document the parser API
|
27
26
|
- Add more validation tests and sample grammars
|
28
27
|
- Add AST generation (and semantic actions?)
|
@@ -69,7 +69,7 @@ pp result
|
|
69
69
|
# Step 6. Generate a parse tree from the parse result
|
70
70
|
ptree = result.parse_tree
|
71
71
|
pp ptree
|
72
|
-
|
72
|
+
#=begin
|
73
73
|
########################################
|
74
74
|
# Step 7. Render the parse tree (in JSON)
|
75
75
|
# Let's create a parse tree visitor
|
@@ -81,5 +81,5 @@ renderer = Rley::Formatter::Json.new(STDOUT)
|
|
81
81
|
# Now emit the parse tree as JSON on the console output
|
82
82
|
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
83
|
renderer.render(visitor)
|
84
|
-
|
84
|
+
#=end
|
85
85
|
# End of file
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Purpose: to demonstrate how to
|
2
|
-
|
1
|
+
# Purpose: to demonstrate how to catch parsing errors
|
2
|
+
|
3
3
|
require 'pp' # TODO remove this dependency
|
4
4
|
require 'rley' # Load the gem
|
5
5
|
|
@@ -8,9 +8,8 @@ require 'rley' # Load the gem
|
|
8
8
|
# 2. Create a tokenizer for the language
|
9
9
|
# 3. Create a parser for that grammar
|
10
10
|
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
|
13
|
-
# 7. Render the parse tree (in JSON)
|
11
|
+
# 5. Let the parser process the invalid input
|
12
|
+
|
14
13
|
|
15
14
|
########################################
|
16
15
|
# Step 1. Define a grammar for a very simple arithmetic expression language
|
@@ -55,31 +54,21 @@ end
|
|
55
54
|
parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
|
56
55
|
|
57
56
|
########################################
|
58
|
-
# Step
|
59
|
-
invalid_input = '2 + 3 * * 4'
|
57
|
+
# Step 4. Tokenize the invalid input
|
58
|
+
invalid_input = '2 + 3 * * 4' # Notice the repeated stars (*)
|
59
|
+
puts "Invalid expression to parse: #{invalid_input}"
|
60
|
+
puts ''
|
60
61
|
tokens = tokenizer(invalid_input, grammar_s_expr)
|
61
62
|
|
62
63
|
########################################
|
63
|
-
# Step 5. Let the
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
ptree = result.parse_tree
|
71
|
-
pp ptree
|
72
|
-
|
73
|
-
########################################
|
74
|
-
# Step 7. Render the parse tree (in JSON)
|
75
|
-
# Let's create a parse tree visitor
|
76
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
64
|
+
# Step 5. Let catch the exception caused by a syntax error...
|
65
|
+
# ... and display the error message
|
66
|
+
begin
|
67
|
+
parser.parse(tokens)
|
68
|
+
rescue StandardError => exc
|
69
|
+
puts exc.message
|
70
|
+
end
|
77
71
|
|
78
|
-
#Here we create a renderer object...
|
79
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
72
|
|
81
|
-
# Now emit the parse tree as JSON on the console output
|
82
|
-
puts "JSON rendering of the parse tree for '#{invalid_input}' input:"
|
83
|
-
renderer.render(visitor)
|
84
73
|
|
85
74
|
# End of file
|
@@ -46,7 +46,8 @@ valid_input = 'abcdefg'
|
|
46
46
|
tokens = tokenizer(valid_input, grammar_tricky)
|
47
47
|
|
48
48
|
########################################
|
49
|
-
# Step 5. Let the parser process the input
|
49
|
+
# Step 5. Let the parser process the input
|
50
|
+
# Force the parser to trace its parsing progress.
|
50
51
|
result = parser.parse(tokens, 1)
|
51
52
|
puts "Parsing success? #{result.success?}"
|
52
53
|
|
data/lib/rley/constants.rb
CHANGED
@@ -27,27 +27,6 @@ module Rley # This module is used as a namespace
|
|
27
27
|
@start_mapping = build_start_mapping(dotted_items)
|
28
28
|
@next_mapping = build_next_mapping(dotted_items)
|
29
29
|
end
|
30
|
-
|
31
|
-
=begin
|
32
|
-
You can optionally specify a tracing level, for how much output you
|
33
|
-
want to see:
|
34
|
-
|
35
|
-
0: No output.
|
36
|
-
1: Show edges from scanner and completer rules (not predictor).
|
37
|
-
2 (default): Show all edges as they are added to the chart.
|
38
|
-
|
39
|
-
- For each index I{end} in [0, 1, ..., N]:
|
40
|
-
- For each I{edge} s.t. I{edge}.end = I{end}:
|
41
|
-
- If I{edge} is incomplete, and I{edge}.next is not a part
|
42
|
-
of speech:
|
43
|
-
- Apply PredictorRule to I{edge}
|
44
|
-
- If I{edge} is incomplete, and I{edge}.next is a part of
|
45
|
-
speech:
|
46
|
-
- Apply ScannerRule to I{edge}
|
47
|
-
- If I{edge} is complete:
|
48
|
-
- Apply CompleterRule to I{edge}
|
49
|
-
- Return any complete parses in the chart
|
50
|
-
=end
|
51
30
|
|
52
31
|
# Parse a sequence of input tokens.
|
53
32
|
# @param aTokenSequence [Array] Array of Tokens objects returned by a
|
@@ -65,6 +44,7 @@ module Rley # This module is used as a namespace
|
|
65
44
|
result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
|
66
45
|
last_token_index = aTokenSequence.size
|
67
46
|
(0..last_token_index).each do |i|
|
47
|
+
handle_error(result) if result.chart[i].empty?
|
68
48
|
predicted = Set.new
|
69
49
|
result.chart[i].each do |state|
|
70
50
|
if state.complete? # End of production reached?
|
@@ -220,6 +200,24 @@ module Rley # This module is used as a namespace
|
|
220
200
|
next_mapping[item]
|
221
201
|
end
|
222
202
|
end
|
203
|
+
|
204
|
+
# Raise an exception to indicate a syntax error.
|
205
|
+
def handle_error(aParsing)
|
206
|
+
# Retrieve the first empty state set
|
207
|
+
pos = aParsing.chart.state_sets.find_index(&:empty?)
|
208
|
+
lexeme_at_pos = aParsing.tokens[pos - 1].lexeme
|
209
|
+
|
210
|
+
terminals = aParsing.chart.state_sets[pos - 1].expected_terminals
|
211
|
+
err_msg = "Syntax error at or near token #{pos}"
|
212
|
+
err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
|
213
|
+
if terminals.size > 1
|
214
|
+
err_msg << "one of: #{terminals},"
|
215
|
+
else
|
216
|
+
err_msg << ": #{terminals[0]},"
|
217
|
+
end
|
218
|
+
err_msg << " found a #{aParsing.tokens[pos-1].terminal} instead."
|
219
|
+
fail StandardError, err_msg
|
220
|
+
end
|
223
221
|
end # class
|
224
222
|
end # module
|
225
223
|
end # module
|
@@ -59,6 +59,17 @@ module Rley # This module is used as a namespace
|
|
59
59
|
|
60
60
|
return candidate
|
61
61
|
end
|
62
|
+
|
63
|
+
# The list of distinct expected terminal symbols. An expected symbol is on the
|
64
|
+
# left of a dot in a parse state of the parse set.
|
65
|
+
def expected_terminals()
|
66
|
+
expecting_terminals = states.select do |s|
|
67
|
+
s.dotted_rule.next_symbol.kind_of?(Rley::Syntax::Terminal)
|
68
|
+
end
|
69
|
+
|
70
|
+
terminals = expecting_terminals.map { |s| s.dotted_rule.next_symbol }
|
71
|
+
return terminals.uniq
|
72
|
+
end
|
62
73
|
|
63
74
|
private
|
64
75
|
|
@@ -557,7 +557,13 @@ SNIPPET
|
|
557
557
|
Token.new('c', c_),
|
558
558
|
Token.new('c', c_)
|
559
559
|
]
|
560
|
-
|
560
|
+
err_msg = <<-MSG
|
561
|
+
Syntax error at or near token 3>>>c<<<:
|
562
|
+
Expected one of: ['a', 'b'], found a 'c' instead.
|
563
|
+
MSG
|
564
|
+
err = StandardError
|
565
|
+
expect { subject.parse(wrong)}.to raise_error(err, err_msg.chomp)
|
566
|
+
=begin
|
561
567
|
expect(parse_result.success?).to eq(false)
|
562
568
|
|
563
569
|
###################### S(0) == . a a c c
|
@@ -589,6 +595,7 @@ SNIPPET
|
|
589
595
|
###################### S(3) == a a c? c
|
590
596
|
state_set_3 = parse_result.chart[3]
|
591
597
|
expect(state_set_3.states).to be_empty # This is an error symptom
|
598
|
+
=end
|
592
599
|
end
|
593
600
|
|
594
601
|
it 'should parse a grammar with nullable nonterminals' do
|
@@ -133,7 +133,7 @@ SNIPPET
|
|
133
133
|
expect(new_state.origin).to eq(0)
|
134
134
|
end
|
135
135
|
end # context
|
136
|
-
|
136
|
+
|
137
137
|
context 'Parse tree building:' do
|
138
138
|
let(:sample_grammar1) do
|
139
139
|
builder = grammar_abc_builder
|
@@ -401,7 +401,6 @@ SNIPPET
|
|
401
401
|
expect(actual).to eq(expected_text.chomp)
|
402
402
|
end
|
403
403
|
end # context
|
404
|
-
=end
|
405
404
|
end # describe
|
406
405
|
end # module
|
407
406
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.05
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -108,7 +108,7 @@ files:
|
|
108
108
|
- examples/parsers/parsing_groucho.rb
|
109
109
|
- examples/parsers/parsing_L0.rb
|
110
110
|
- examples/parsers/parsing_L1.rb
|
111
|
-
- examples/parsers/
|
111
|
+
- examples/parsers/tracing_parser.rb
|
112
112
|
- examples/recognizers/recognizer_abc.rb
|
113
113
|
- lib/rley.rb
|
114
114
|
- lib/rley/constants.rb
|