rley 0.0.05 → 0.0.06
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +7 -0
- data/README.md +17 -2
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/earley_parser.rb +3 -3
- data/spec/rley/parser/earley_parser_spec.rb +33 -5
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NzBkOTQ2MmQ2MDcxZGFjZTM5YzNlZGY5NDg1MTFhOWVjYzFjNWFkYg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjEwNzMyNWRjMmUxMGU1YjJmYzQxODg5NjA4M2YzNWU4NmVhNzcyYw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MWNhOTQyOGQ3NzcwMzcxY2FlYTgzMWJhMGNlYmQzYzM2ZTAwNjdlZjJkOGE1
|
10
|
+
ODZiODkxMGFlNzc5NjUxNzA3OTVjNzFkNzY1NGQ2MTc4MDRjMTg0YWRhODc5
|
11
|
+
ZTU3NDhjZmY3ZDE2YjM3NmRjZjA0MTVmNjNmZGQxYmE4YTg5YmM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MzQ2MmRkMjIyMWQxZGI1MTBmOTdhMDY1ZTYxYjc0MGExZGJjZjFkMjZlOTM3
|
14
|
+
MTk0NjdhMjUwNjAwYWQzMzBlOTdhNzg5ZmFlNWFjNjlhOWM3ZjMxMWJlOTFj
|
15
|
+
NjVlODk3NjBkYTQ3OTFjODBlMjQ2ZjZjNTA5ZGIxZjdiYWIxYzA=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
### 0.0.06 / 2014-11-13
|
2
|
+
* [CHANGE] File `README.md`: Added roadmap section.
|
3
|
+
* [FIX] `EarleyParser#parse`: prevent call to `scanning` method after last token encountered.
|
4
|
+
|
5
|
+
### 0.0.05 / 2014-11-13
|
6
|
+
* [CHANGE] Code re-styling to please Rubocop 0.27.0 (less than 10 offenses).
|
7
|
+
|
1
8
|
### 0.0.04 / 2014-11-12
|
2
9
|
* [CHANGE] Class `DottedItem` moved to `Rley` module.
|
3
10
|
|
data/README.md
CHANGED
@@ -7,12 +7,27 @@ Rley
|
|
7
7
|
[](http://badge.fury.io/rb/rley)
|
8
8
|
|
9
9
|
### What is Rley? ###
|
10
|
-
__Rley__ is a Ruby implementation of a Earley parser.
|
10
|
+
__Rley__ is a Ruby implementation of a Earley parser.
|
11
11
|
The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
12
12
|
|
13
|
-
|
13
|
+
Yet another parser?
|
14
|
+
Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
|
15
|
+
The latter are faster because they use faster algorithms at the price of a loss of generality
|
16
|
+
in the grammar/language they support.
|
17
|
+
The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
|
14
18
|
Consult Wikipedia to learn more about Earley's parsing algorithm.
|
15
19
|
|
20
|
+
This project is in "early" stage.
|
21
|
+
####Roadmap:
|
22
|
+
- Add more validation tests and sample grammars
|
23
|
+
- Add AST generation (and semantic actions?)
|
24
|
+
- Add DSL for grammar specification
|
25
|
+
- Add grammar validations
|
26
|
+
- Add error reporting
|
27
|
+
- Add examples (including small NLP grammar)
|
28
|
+
- Add a command-line interface
|
29
|
+
- Provide documentation and examples
|
30
|
+
|
16
31
|
|
17
32
|
Copyright
|
18
33
|
---------
|
data/lib/rley/constants.rb
CHANGED
@@ -30,8 +30,8 @@ module Rley # This module is used as a namespace
|
|
30
30
|
|
31
31
|
def parse(aTokenSequence)
|
32
32
|
result = Parsing.new(start_dotted_item, aTokenSequence)
|
33
|
-
|
34
|
-
(0..
|
33
|
+
last_token_index = aTokenSequence.size
|
34
|
+
(0..last_token_index).each do |i|
|
35
35
|
result.chart[i].each do |state|
|
36
36
|
if state.complete?
|
37
37
|
# parse reached end of production
|
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
|
|
40
40
|
next_symbol = state.next_symbol
|
41
41
|
if next_symbol.kind_of?(Syntax::NonTerminal)
|
42
42
|
prediction(result, next_symbol, i)
|
43
|
-
|
43
|
+
elsif i < last_token_index
|
44
44
|
# Expecting a terminal symbol
|
45
45
|
scanning(result, next_symbol, i)
|
46
46
|
end
|
@@ -33,7 +33,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
33
33
|
end
|
34
34
|
=end
|
35
35
|
|
36
|
-
# Grammar 1: A very simple language
|
36
|
+
# Grammar 1: A very simple language
|
37
|
+
# (based on example in N. Wirth "Compiler Construction" book, p. 6)
|
37
38
|
# S ::= A.
|
38
39
|
# A ::= "a" A "c".
|
39
40
|
# A ::= "b".
|
@@ -64,14 +65,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
64
65
|
|
65
66
|
|
66
67
|
# Grammar 2: A simple arithmetic expression language
|
67
|
-
#
|
68
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
69
|
+
# P ::= S.
|
68
70
|
# S ::= S "+" M.
|
69
71
|
# S ::= M.
|
70
72
|
# M ::= M "*" M.
|
71
73
|
# M ::= T.
|
72
74
|
# T ::= an integer number token.
|
73
75
|
# Let's create the grammar piece by piece
|
74
|
-
let(:
|
76
|
+
let(:nt_P) { Syntax::NonTerminal.new('P') }
|
75
77
|
let(:nt_M) { Syntax::NonTerminal.new('M') }
|
76
78
|
let(:nt_T) { Syntax::NonTerminal.new('T') }
|
77
79
|
let(:plus) { Syntax::VerbatimSymbol.new('+') }
|
@@ -80,14 +82,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
80
82
|
integer_pattern = /[-+]?[0-9]+/ # Decimal notation
|
81
83
|
Syntax::Literal.new('integer', integer_pattern)
|
82
84
|
end
|
83
|
-
let(:
|
85
|
+
let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
|
84
86
|
let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
|
85
87
|
let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
|
86
88
|
let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
|
87
89
|
let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
|
88
90
|
let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
|
89
91
|
let(:grammar_expr) do
|
90
|
-
all_prods = [
|
92
|
+
all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
|
91
93
|
Syntax::Grammar.new(all_prods)
|
92
94
|
end
|
93
95
|
|
@@ -242,6 +244,32 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
242
244
|
]
|
243
245
|
compare_state_set(state_set_5, expectations)
|
244
246
|
end
|
247
|
+
|
248
|
+
it 'should parse a valid simple expression' do
|
249
|
+
instance = EarleyParser.new(grammar_expr)
|
250
|
+
parse_result = instance.parse(grm2_tokens)
|
251
|
+
expect(parse_result.success?).to eq(true)
|
252
|
+
|
253
|
+
######################
|
254
|
+
# Expectation chart[0]:
|
255
|
+
# (1) P -> . S, 0 # start rule
|
256
|
+
# (2) S -> . S "+" M, 0 # predict from (1)
|
257
|
+
# (3) S -> . M, 0 # predict from (1)
|
258
|
+
# (4) M -> . M "*" T, 0 # predict from (3)
|
259
|
+
# (5) M -> . T, 0 # predict from (3)
|
260
|
+
# (6) T -> . integer, 0 # predict from (3)
|
261
|
+
expectations = [
|
262
|
+
{ origin: 0, production: prod_P, dot: 0 },
|
263
|
+
{ origin: 0, production: prod_S1, dot: 0 },
|
264
|
+
{ origin: 0, production: prod_S2, dot: 0 },
|
265
|
+
{ origin: 0, production: prod_M1, dot: 0 },
|
266
|
+
{ origin: 0, production: prod_M2, dot: 0 },
|
267
|
+
{ origin: 0, production: prod_T, dot: 0 }
|
268
|
+
]
|
269
|
+
compare_state_set(parse_result.chart[0], expectations)
|
270
|
+
|
271
|
+
end
|
272
|
+
|
245
273
|
|
246
274
|
it 'should parse an invalid simple input' do
|
247
275
|
# Parse an erroneous input (b is missing)
|