rley 0.0.05 → 0.0.06
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +7 -0
- data/README.md +17 -2
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/earley_parser.rb +3 -3
- data/spec/rley/parser/earley_parser_spec.rb +33 -5
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NzBkOTQ2MmQ2MDcxZGFjZTM5YzNlZGY5NDg1MTFhOWVjYzFjNWFkYg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjEwNzMyNWRjMmUxMGU1YjJmYzQxODg5NjA4M2YzNWU4NmVhNzcyYw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MWNhOTQyOGQ3NzcwMzcxY2FlYTgzMWJhMGNlYmQzYzM2ZTAwNjdlZjJkOGE1
|
10
|
+
ODZiODkxMGFlNzc5NjUxNzA3OTVjNzFkNzY1NGQ2MTc4MDRjMTg0YWRhODc5
|
11
|
+
ZTU3NDhjZmY3ZDE2YjM3NmRjZjA0MTVmNjNmZGQxYmE4YTg5YmM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MzQ2MmRkMjIyMWQxZGI1MTBmOTdhMDY1ZTYxYjc0MGExZGJjZjFkMjZlOTM3
|
14
|
+
MTk0NjdhMjUwNjAwYWQzMzBlOTdhNzg5ZmFlNWFjNjlhOWM3ZjMxMWJlOTFj
|
15
|
+
NjVlODk3NjBkYTQ3OTFjODBlMjQ2ZjZjNTA5ZGIxZjdiYWIxYzA=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
### 0.0.06 / 2014-11-13
|
2
|
+
* [CHANGE] File `README.md`: Added roadmap section.
|
3
|
+
* [FIX] `EarleyParser#parse`: prevent call to `scanning` method after last token encountered.
|
4
|
+
|
5
|
+
### 0.0.05 / 2014-11-13
|
6
|
+
* [CHANGE] Code re-styling to please Rubocop 0.27.0 (less than 10 offenses).
|
7
|
+
|
1
8
|
### 0.0.04 / 2014-11-12
|
2
9
|
* [CHANGE] Class `DottedItem` moved to `Rley` module.
|
3
10
|
|
data/README.md
CHANGED
@@ -7,12 +7,27 @@ Rley
|
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
|
8
8
|
|
9
9
|
### What is Rley? ###
|
10
|
-
__Rley__ is a Ruby implementation of a Earley parser.
|
10
|
+
__Rley__ is a Ruby implementation of a Earley parser.
|
11
11
|
The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
12
12
|
|
13
|
-
|
13
|
+
Yet another parser?
|
14
|
+
Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
|
15
|
+
The latter are faster because they use faster algorithms at the price of a loss of generality
|
16
|
+
in the grammar/language they support.
|
17
|
+
The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
|
14
18
|
Consult Wikipedia to learn more about Earley's parsing algorithm.
|
15
19
|
|
20
|
+
This project is in "early" stage.
|
21
|
+
####Roadmap:
|
22
|
+
- Add more validation tests and sample grammars
|
23
|
+
- Add AST generation (and semantic actions?)
|
24
|
+
- Add DSL for grammar specification
|
25
|
+
- Add grammar validations
|
26
|
+
- Add error reporting
|
27
|
+
- Add examples (including small NLP grammar)
|
28
|
+
- Add a command-line interface
|
29
|
+
- Provide documentation and examples
|
30
|
+
|
16
31
|
|
17
32
|
Copyright
|
18
33
|
---------
|
data/lib/rley/constants.rb
CHANGED
@@ -30,8 +30,8 @@ module Rley # This module is used as a namespace
|
|
30
30
|
|
31
31
|
def parse(aTokenSequence)
|
32
32
|
result = Parsing.new(start_dotted_item, aTokenSequence)
|
33
|
-
|
34
|
-
(0..
|
33
|
+
last_token_index = aTokenSequence.size
|
34
|
+
(0..last_token_index).each do |i|
|
35
35
|
result.chart[i].each do |state|
|
36
36
|
if state.complete?
|
37
37
|
# parse reached end of production
|
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
|
|
40
40
|
next_symbol = state.next_symbol
|
41
41
|
if next_symbol.kind_of?(Syntax::NonTerminal)
|
42
42
|
prediction(result, next_symbol, i)
|
43
|
-
|
43
|
+
elsif i < last_token_index
|
44
44
|
# Expecting a terminal symbol
|
45
45
|
scanning(result, next_symbol, i)
|
46
46
|
end
|
@@ -33,7 +33,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
33
33
|
end
|
34
34
|
=end
|
35
35
|
|
36
|
-
# Grammar 1: A very simple language
|
36
|
+
# Grammar 1: A very simple language
|
37
|
+
# (based on example in N. Wirth "Compiler Construction" book, p. 6)
|
37
38
|
# S ::= A.
|
38
39
|
# A ::= "a" A "c".
|
39
40
|
# A ::= "b".
|
@@ -64,14 +65,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
64
65
|
|
65
66
|
|
66
67
|
# Grammar 2: A simple arithmetic expression language
|
67
|
-
#
|
68
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
69
|
+
# P ::= S.
|
68
70
|
# S ::= S "+" M.
|
69
71
|
# S ::= M.
|
70
72
|
# M ::= M "*" M.
|
71
73
|
# M ::= T.
|
72
74
|
# T ::= an integer number token.
|
73
75
|
# Let's create the grammar piece by piece
|
74
|
-
let(:
|
76
|
+
let(:nt_P) { Syntax::NonTerminal.new('P') }
|
75
77
|
let(:nt_M) { Syntax::NonTerminal.new('M') }
|
76
78
|
let(:nt_T) { Syntax::NonTerminal.new('T') }
|
77
79
|
let(:plus) { Syntax::VerbatimSymbol.new('+') }
|
@@ -80,14 +82,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
80
82
|
integer_pattern = /[-+]?[0-9]+/ # Decimal notation
|
81
83
|
Syntax::Literal.new('integer', integer_pattern)
|
82
84
|
end
|
83
|
-
let(:
|
85
|
+
let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
|
84
86
|
let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
|
85
87
|
let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
|
86
88
|
let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
|
87
89
|
let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
|
88
90
|
let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
|
89
91
|
let(:grammar_expr) do
|
90
|
-
all_prods = [
|
92
|
+
all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
|
91
93
|
Syntax::Grammar.new(all_prods)
|
92
94
|
end
|
93
95
|
|
@@ -242,6 +244,32 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
242
244
|
]
|
243
245
|
compare_state_set(state_set_5, expectations)
|
244
246
|
end
|
247
|
+
|
248
|
+
it 'should parse a valid simple expression' do
|
249
|
+
instance = EarleyParser.new(grammar_expr)
|
250
|
+
parse_result = instance.parse(grm2_tokens)
|
251
|
+
expect(parse_result.success?).to eq(true)
|
252
|
+
|
253
|
+
######################
|
254
|
+
# Expectation chart[0]:
|
255
|
+
# (1) P -> . S, 0 # start rule
|
256
|
+
# (2) S -> . S "+" M, 0 # predict from (1)
|
257
|
+
# (3) S -> . M, 0 # predict from (1)
|
258
|
+
# (4) M -> . M "*" T, 0 # predict from (3)
|
259
|
+
# (5) M -> . T, 0 # predict from (3)
|
260
|
+
# (6) T -> . integer, 0 # predict from (3)
|
261
|
+
expectations = [
|
262
|
+
{ origin: 0, production: prod_P, dot: 0 },
|
263
|
+
{ origin: 0, production: prod_S1, dot: 0 },
|
264
|
+
{ origin: 0, production: prod_S2, dot: 0 },
|
265
|
+
{ origin: 0, production: prod_M1, dot: 0 },
|
266
|
+
{ origin: 0, production: prod_M2, dot: 0 },
|
267
|
+
{ origin: 0, production: prod_T, dot: 0 }
|
268
|
+
]
|
269
|
+
compare_state_set(parse_result.chart[0], expectations)
|
270
|
+
|
271
|
+
end
|
272
|
+
|
245
273
|
|
246
274
|
it 'should parse an invalid simple input' do
|
247
275
|
# Parse an erroneous input (b is missing)
|