rley 0.2.04 → 0.2.05
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +9 -0
- data/README.md +4 -5
- data/examples/parsers/parsing_ambig.rb +2 -2
- data/examples/parsers/parsing_err_expr.rb +15 -26
- data/examples/parsers/{parsing_tricky.rb → tracing_parser.rb} +2 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/earley_parser.rb +19 -21
- data/lib/rley/parser/state_set.rb +11 -0
- data/spec/rley/parser/earley_parser_spec.rb +8 -1
- data/spec/rley/parser/parsing_spec.rb +1 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzRiMzQ3MzkwZGMzMDJjZDVlYjVjNGI0YzdmMzE3NDFkOTRkOWM3ZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDJjMDNjNmIxMWVmMDhkZmFjM2U1ZGQ4ZmFkM2ZjYmZjY2IzNzk0Yw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MmUwZGIwYWNkNzJhOWY4M2Y1YzE1MjljN2JmODg3ZWVlODJhYjI0NzRmMzky
|
10
|
+
NWQ3NzUyM2JhODU5M2I3MzYyY2IyMWMxZTA3ZDQxMTU0ODdmZmY5OTg5YmNi
|
11
|
+
MWU3OTViYzY3Y2E4NDgyMjhiMmUzNDk2NjY0MTdiYWUwYmFkYTE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjAwNmI4MjNmNDEyOWZlMDM4YjA2YzM3MTBhMjc0MTcyZjliYjc3NWU0ZWIy
|
14
|
+
YjM2OTQzYWVkMDlhMGRkNmQ2OWNhYzhkM2IyNjNlNGNlMzEzMDA3MDYzM2Zj
|
15
|
+
ZTNmOTFjZTcyODRiNjAyNjMwNjQ3MzQ4ZDUyOTMyMGI4NTkxYjc=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.2.05 / 2015-03-19
|
2
|
+
* [NEW] Class `EarleyParser` implements a crude error detection mechanism. A syntax error causes an exception to be raised.
|
3
|
+
* [CHANGE] Examplar file `parsing_err_expr.rb`: demo error message.
|
4
|
+
|
5
|
+
### 0.2.04 / 2015-03-04
|
6
|
+
* [NEW] Class `ParseTracer` that helps to trace the parse steps (similar the trace format in NLTK).
|
7
|
+
* [CHANGE] Method `EarleyParser#parse` takes a trace level argument.
|
8
|
+
|
9
|
+
|
1
10
|
### 0.2.03 / 2015-02-06
|
2
11
|
* [FIX] File `.rubocop.yml`: removal of setting for obsolete EmptyLinesAroundBody cop.
|
3
12
|
* [CHANGE] Source code re-formatted to please Rubocop 0.29.
|
data/README.md
CHANGED
@@ -9,20 +9,19 @@ Rley
|
|
9
9
|
[](https://gemnasium.com/famished-tiger/Rley)
|
10
10
|
[](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt)
|
11
11
|
|
12
|
-
__Rley__ is a Ruby implementation of
|
12
|
+
__Rley__ is a Ruby implementation of a parser using the [Earley](http://en.wikipedia.org/wiki/Earley_parser) algorithm.
|
13
13
|
The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
14
14
|
|
15
15
|
|
16
16
|
Yet another parser?
|
17
17
|
Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
|
18
|
-
The latter are faster because they use
|
18
|
+
The latter are faster because they use optimized algorithms at the price of a loss of generality
|
19
19
|
in the grammar/language they support.
|
20
|
-
The Earley's algorithm being more general is able to parse input
|
21
|
-
|
20
|
+
The Earley's algorithm being more general is able to parse input that conforms to any context-free grammar.
|
21
|
+
For instance, it copes with ambiguous grammars.
|
22
22
|
|
23
23
|
This project is in "earley" stage.
|
24
24
|
####Roadmap:
|
25
|
-
- Add examples (including small NLP grammar)
|
26
25
|
- Document the parser API
|
27
26
|
- Add more validation tests and sample grammars
|
28
27
|
- Add AST generation (and semantic actions?)
|
@@ -69,7 +69,7 @@ pp result
|
|
69
69
|
# Step 6. Generate a parse tree from the parse result
|
70
70
|
ptree = result.parse_tree
|
71
71
|
pp ptree
|
72
|
-
|
72
|
+
#=begin
|
73
73
|
########################################
|
74
74
|
# Step 7. Render the parse tree (in JSON)
|
75
75
|
# Let's create a parse tree visitor
|
@@ -81,5 +81,5 @@ renderer = Rley::Formatter::Json.new(STDOUT)
|
|
81
81
|
# Now emit the parse tree as JSON on the console output
|
82
82
|
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
83
|
renderer.render(visitor)
|
84
|
-
|
84
|
+
#=end
|
85
85
|
# End of file
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Purpose: to demonstrate how to
|
2
|
-
|
1
|
+
# Purpose: to demonstrate how to catch parsing errors
|
2
|
+
|
3
3
|
require 'pp' # TODO remove this dependency
|
4
4
|
require 'rley' # Load the gem
|
5
5
|
|
@@ -8,9 +8,8 @@ require 'rley' # Load the gem
|
|
8
8
|
# 2. Create a tokenizer for the language
|
9
9
|
# 3. Create a parser for that grammar
|
10
10
|
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
|
13
|
-
# 7. Render the parse tree (in JSON)
|
11
|
+
# 5. Let the parser process the invalid input
|
12
|
+
|
14
13
|
|
15
14
|
########################################
|
16
15
|
# Step 1. Define a grammar for a very simple arithmetic expression language
|
@@ -55,31 +54,21 @@ end
|
|
55
54
|
parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
|
56
55
|
|
57
56
|
########################################
|
58
|
-
# Step
|
59
|
-
invalid_input = '2 + 3 * * 4'
|
57
|
+
# Step 4. Tokenize the invalid input
|
58
|
+
invalid_input = '2 + 3 * * 4' # Notice the repeated stars (*)
|
59
|
+
puts "Invalid expression to parse: #{invalid_input}"
|
60
|
+
puts ''
|
60
61
|
tokens = tokenizer(invalid_input, grammar_s_expr)
|
61
62
|
|
62
63
|
########################################
|
63
|
-
# Step 5. Let the
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
ptree = result.parse_tree
|
71
|
-
pp ptree
|
72
|
-
|
73
|
-
########################################
|
74
|
-
# Step 7. Render the parse tree (in JSON)
|
75
|
-
# Let's create a parse tree visitor
|
76
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
64
|
+
# Step 5. Let catch the exception caused by a syntax error...
|
65
|
+
# ... and display the error message
|
66
|
+
begin
|
67
|
+
parser.parse(tokens)
|
68
|
+
rescue StandardError => exc
|
69
|
+
puts exc.message
|
70
|
+
end
|
77
71
|
|
78
|
-
#Here we create a renderer object...
|
79
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
72
|
|
81
|
-
# Now emit the parse tree as JSON on the console output
|
82
|
-
puts "JSON rendering of the parse tree for '#{invalid_input}' input:"
|
83
|
-
renderer.render(visitor)
|
84
73
|
|
85
74
|
# End of file
|
@@ -46,7 +46,8 @@ valid_input = 'abcdefg'
|
|
46
46
|
tokens = tokenizer(valid_input, grammar_tricky)
|
47
47
|
|
48
48
|
########################################
|
49
|
-
# Step 5. Let the parser process the input
|
49
|
+
# Step 5. Let the parser process the input
|
50
|
+
# Force the parser to trace its parsing progress.
|
50
51
|
result = parser.parse(tokens, 1)
|
51
52
|
puts "Parsing success? #{result.success?}"
|
52
53
|
|
data/lib/rley/constants.rb
CHANGED
@@ -27,27 +27,6 @@ module Rley # This module is used as a namespace
|
|
27
27
|
@start_mapping = build_start_mapping(dotted_items)
|
28
28
|
@next_mapping = build_next_mapping(dotted_items)
|
29
29
|
end
|
30
|
-
|
31
|
-
=begin
|
32
|
-
You can optionally specify a tracing level, for how much output you
|
33
|
-
want to see:
|
34
|
-
|
35
|
-
0: No output.
|
36
|
-
1: Show edges from scanner and completer rules (not predictor).
|
37
|
-
2 (default): Show all edges as they are added to the chart.
|
38
|
-
|
39
|
-
- For each index I{end} in [0, 1, ..., N]:
|
40
|
-
- For each I{edge} s.t. I{edge}.end = I{end}:
|
41
|
-
- If I{edge} is incomplete, and I{edge}.next is not a part
|
42
|
-
of speech:
|
43
|
-
- Apply PredictorRule to I{edge}
|
44
|
-
- If I{edge} is incomplete, and I{edge}.next is a part of
|
45
|
-
speech:
|
46
|
-
- Apply ScannerRule to I{edge}
|
47
|
-
- If I{edge} is complete:
|
48
|
-
- Apply CompleterRule to I{edge}
|
49
|
-
- Return any complete parses in the chart
|
50
|
-
=end
|
51
30
|
|
52
31
|
# Parse a sequence of input tokens.
|
53
32
|
# @param aTokenSequence [Array] Array of Tokens objects returned by a
|
@@ -65,6 +44,7 @@ module Rley # This module is used as a namespace
|
|
65
44
|
result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
|
66
45
|
last_token_index = aTokenSequence.size
|
67
46
|
(0..last_token_index).each do |i|
|
47
|
+
handle_error(result) if result.chart[i].empty?
|
68
48
|
predicted = Set.new
|
69
49
|
result.chart[i].each do |state|
|
70
50
|
if state.complete? # End of production reached?
|
@@ -220,6 +200,24 @@ module Rley # This module is used as a namespace
|
|
220
200
|
next_mapping[item]
|
221
201
|
end
|
222
202
|
end
|
203
|
+
|
204
|
+
# Raise an exception to indicate a syntax error.
|
205
|
+
def handle_error(aParsing)
|
206
|
+
# Retrieve the first empty state set
|
207
|
+
pos = aParsing.chart.state_sets.find_index(&:empty?)
|
208
|
+
lexeme_at_pos = aParsing.tokens[pos - 1].lexeme
|
209
|
+
|
210
|
+
terminals = aParsing.chart.state_sets[pos - 1].expected_terminals
|
211
|
+
err_msg = "Syntax error at or near token #{pos}"
|
212
|
+
err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
|
213
|
+
if terminals.size > 1
|
214
|
+
err_msg << "one of: #{terminals},"
|
215
|
+
else
|
216
|
+
err_msg << ": #{terminals[0]},"
|
217
|
+
end
|
218
|
+
err_msg << " found a #{aParsing.tokens[pos-1].terminal} instead."
|
219
|
+
fail StandardError, err_msg
|
220
|
+
end
|
223
221
|
end # class
|
224
222
|
end # module
|
225
223
|
end # module
|
@@ -59,6 +59,17 @@ module Rley # This module is used as a namespace
|
|
59
59
|
|
60
60
|
return candidate
|
61
61
|
end
|
62
|
+
|
63
|
+
# The list of distinct expected terminal symbols. An expected symbol is on the
|
64
|
+
# left of a dot in a parse state of the parse set.
|
65
|
+
def expected_terminals()
|
66
|
+
expecting_terminals = states.select do |s|
|
67
|
+
s.dotted_rule.next_symbol.kind_of?(Rley::Syntax::Terminal)
|
68
|
+
end
|
69
|
+
|
70
|
+
terminals = expecting_terminals.map { |s| s.dotted_rule.next_symbol }
|
71
|
+
return terminals.uniq
|
72
|
+
end
|
62
73
|
|
63
74
|
private
|
64
75
|
|
@@ -557,7 +557,13 @@ SNIPPET
|
|
557
557
|
Token.new('c', c_),
|
558
558
|
Token.new('c', c_)
|
559
559
|
]
|
560
|
-
|
560
|
+
err_msg = <<-MSG
|
561
|
+
Syntax error at or near token 3>>>c<<<:
|
562
|
+
Expected one of: ['a', 'b'], found a 'c' instead.
|
563
|
+
MSG
|
564
|
+
err = StandardError
|
565
|
+
expect { subject.parse(wrong)}.to raise_error(err, err_msg.chomp)
|
566
|
+
=begin
|
561
567
|
expect(parse_result.success?).to eq(false)
|
562
568
|
|
563
569
|
###################### S(0) == . a a c c
|
@@ -589,6 +595,7 @@ SNIPPET
|
|
589
595
|
###################### S(3) == a a c? c
|
590
596
|
state_set_3 = parse_result.chart[3]
|
591
597
|
expect(state_set_3.states).to be_empty # This is an error symptom
|
598
|
+
=end
|
592
599
|
end
|
593
600
|
|
594
601
|
it 'should parse a grammar with nullable nonterminals' do
|
@@ -133,7 +133,7 @@ SNIPPET
|
|
133
133
|
expect(new_state.origin).to eq(0)
|
134
134
|
end
|
135
135
|
end # context
|
136
|
-
|
136
|
+
|
137
137
|
context 'Parse tree building:' do
|
138
138
|
let(:sample_grammar1) do
|
139
139
|
builder = grammar_abc_builder
|
@@ -401,7 +401,6 @@ SNIPPET
|
|
401
401
|
expect(actual).to eq(expected_text.chomp)
|
402
402
|
end
|
403
403
|
end # context
|
404
|
-
=end
|
405
404
|
end # describe
|
406
405
|
end # module
|
407
406
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.05
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -108,7 +108,7 @@ files:
|
|
108
108
|
- examples/parsers/parsing_groucho.rb
|
109
109
|
- examples/parsers/parsing_L0.rb
|
110
110
|
- examples/parsers/parsing_L1.rb
|
111
|
-
- examples/parsers/
|
111
|
+
- examples/parsers/tracing_parser.rb
|
112
112
|
- examples/recognizers/recognizer_abc.rb
|
113
113
|
- lib/rley.rb
|
114
114
|
- lib/rley/constants.rb
|