rley 0.0.05 → 0.0.06

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZTEwY2E4YWQ0MDgwMmE0MTFjMjBmOGQ0OWUwYjI5ZjZjMWU2MDNjMw==
4
+ NzBkOTQ2MmQ2MDcxZGFjZTM5YzNlZGY5NDg1MTFhOWVjYzFjNWFkYg==
5
5
  data.tar.gz: !binary |-
6
- MjkzZGJjMjM3MmIyZDY0ZjRmZjU2ZTEzNzM2NDRmNzE5MGNhZTU3ZA==
6
+ ZjEwNzMyNWRjMmUxMGU1YjJmYzQxODg5NjA4M2YzNWU4NmVhNzcyYw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MzRmMTAxMzcyZmJkZjNiYTYxNGYxOTQ1M2YzNDRmOTcyZGEzODVjYWZhOGE2
10
- MTQxYjQ2YWMxZGMzN2E3NjRjZjhjNmI2MzljOGIwMzcxNGY1ZDg2MGM0YTA5
11
- YzZmZjBiODMwMzk0YmY4OTQ4NjRhNGNjMWJjZTRlNjU5Y2NiNjY=
9
+ MWNhOTQyOGQ3NzcwMzcxY2FlYTgzMWJhMGNlYmQzYzM2ZTAwNjdlZjJkOGE1
10
+ ODZiODkxMGFlNzc5NjUxNzA3OTVjNzFkNzY1NGQ2MTc4MDRjMTg0YWRhODc5
11
+ ZTU3NDhjZmY3ZDE2YjM3NmRjZjA0MTVmNjNmZGQxYmE4YTg5YmM=
12
12
  data.tar.gz: !binary |-
13
- ZDAzM2I1ZDNjMWRjNWEyMGNhMWYwNmUzNTdiNzE3NDYzMzVlMzdiYjFiYzUx
14
- Y2I0MGY2MjU2MDg5ZDIxOGI5NjViOWExMDhkZGIyZTc0NmQ4ODVlOTJlYWQx
15
- ODU0NGRhZDZmNDA1MTdmMzE1Y2Q3OTY3MmMxZDM4Y2NlYmI3YWQ=
13
+ MzQ2MmRkMjIyMWQxZGI1MTBmOTdhMDY1ZTYxYjc0MGExZGJjZjFkMjZlOTM3
14
+ MTk0NjdhMjUwNjAwYWQzMzBlOTdhNzg5ZmFlNWFjNjlhOWM3ZjMxMWJlOTFj
15
+ NjVlODk3NjBkYTQ3OTFjODBlMjQ2ZjZjNTA5ZGIxZjdiYWIxYzA=
@@ -1,3 +1,10 @@
1
+ ### 0.0.06 / 2014-11-13
2
+ * [CHANGE] File `README.md`: Added roadmap section.
3
+ * [FIX] `EarleyParser#parse`: prevent call to `scanning` method after last token encountered.
4
+
5
+ ### 0.0.05 / 2014-11-13
6
+ * [CHANGE] Code re-styling to please Rubocop 0.27.0 (less than 10 offenses).
7
+
1
8
  ### 0.0.04 / 2014-11-12
2
9
  * [CHANGE] Class `DottedItem` moved to `Rley` module.
3
10
 
data/README.md CHANGED
@@ -7,12 +7,27 @@ Rley
7
7
  [![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
8
8
 
9
9
  ### What is Rley? ###
10
- __Rley__ is a Ruby implementation of a Earley parser.
10
+ __Rley__ is a Ruby implementation of a Earley parser.
11
11
  The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
12
12
 
13
- This project is in "early" stage.
13
+ Yet another parser?
14
+ Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
15
+ The latter are faster because they use faster algorithms at the price of a loss of generality
16
+ in the grammar/language they support.
17
+ The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
14
18
  Consult Wikipedia to learn more about Earley's parsing algorithm.
15
19
 
20
+ This project is in "early" stage.
21
+ ####Roadmap:
22
+ - Add more validation tests and sample grammars
23
+ - Add AST generation (and semantic actions?)
24
+ - Add DSL for grammar specification
25
+ - Add grammar validations
26
+ - Add error reporting
27
+ - Add examples (including small NLP grammar)
28
+ - Add a command-line interface
29
+ - Provide documentation and examples
30
+
16
31
 
17
32
  Copyright
18
33
  ---------
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.0.05'
6
+ Version = '0.0.06'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -30,8 +30,8 @@ module Rley # This module is used as a namespace
30
30
 
31
31
  def parse(aTokenSequence)
32
32
  result = Parsing.new(start_dotted_item, aTokenSequence)
33
-
34
- (0..aTokenSequence.size).each do |i|
33
+ last_token_index = aTokenSequence.size
34
+ (0..last_token_index).each do |i|
35
35
  result.chart[i].each do |state|
36
36
  if state.complete?
37
37
  # parse reached end of production
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
40
40
  next_symbol = state.next_symbol
41
41
  if next_symbol.kind_of?(Syntax::NonTerminal)
42
42
  prediction(result, next_symbol, i)
43
- else
43
+ elsif i < last_token_index
44
44
  # Expecting a terminal symbol
45
45
  scanning(result, next_symbol, i)
46
46
  end
@@ -33,7 +33,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
33
33
  end
34
34
  =end
35
35
 
36
- # Grammar 1: A very simple language
36
+ # Grammar 1: A very simple language
37
+ # (based on example in N. Wirth "Compiler Construction" book, p. 6)
37
38
  # S ::= A.
38
39
  # A ::= "a" A "c".
39
40
  # A ::= "b".
@@ -64,14 +65,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
64
65
 
65
66
 
66
67
  # Grammar 2: A simple arithmetic expression language
67
- # E ::= S.
68
+ # (based on example in article on Earley's algorithm in Wikipedia)
69
+ # P ::= S.
68
70
  # S ::= S "+" M.
69
71
  # S ::= M.
70
72
  # M ::= M "*" M.
71
73
  # M ::= T.
72
74
  # T ::= an integer number token.
73
75
  # Let's create the grammar piece by piece
74
- let(:nt_E) { Syntax::NonTerminal.new('E') }
76
+ let(:nt_P) { Syntax::NonTerminal.new('P') }
75
77
  let(:nt_M) { Syntax::NonTerminal.new('M') }
76
78
  let(:nt_T) { Syntax::NonTerminal.new('T') }
77
79
  let(:plus) { Syntax::VerbatimSymbol.new('+') }
@@ -80,14 +82,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
80
82
  integer_pattern = /[-+]?[0-9]+/ # Decimal notation
81
83
  Syntax::Literal.new('integer', integer_pattern)
82
84
  end
83
- let(:prod_E) { Syntax::Production.new(nt_E, [nt_S]) }
85
+ let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
84
86
  let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
85
87
  let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
86
88
  let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
87
89
  let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
88
90
  let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
89
91
  let(:grammar_expr) do
90
- all_prods = [prod_E, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
92
+ all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
91
93
  Syntax::Grammar.new(all_prods)
92
94
  end
93
95
 
@@ -242,6 +244,32 @@ module Rley # Open this namespace to avoid module qualifier prefixes
242
244
  ]
243
245
  compare_state_set(state_set_5, expectations)
244
246
  end
247
+
248
+ it 'should parse a valid simple expression' do
249
+ instance = EarleyParser.new(grammar_expr)
250
+ parse_result = instance.parse(grm2_tokens)
251
+ expect(parse_result.success?).to eq(true)
252
+
253
+ ######################
254
+ # Expectation chart[0]:
255
+ # (1) P -> . S, 0 # start rule
256
+ # (2) S -> . S "+" M, 0 # predict from (1)
257
+ # (3) S -> . M, 0 # predict from (1)
258
+ # (4) M -> . M "*" T, 0 # predict from (3)
259
+ # (5) M -> . T, 0 # predict from (3)
260
+ # (6) T -> . integer, 0 # predict from (3)
261
+ expectations = [
262
+ { origin: 0, production: prod_P, dot: 0 },
263
+ { origin: 0, production: prod_S1, dot: 0 },
264
+ { origin: 0, production: prod_S2, dot: 0 },
265
+ { origin: 0, production: prod_M1, dot: 0 },
266
+ { origin: 0, production: prod_M2, dot: 0 },
267
+ { origin: 0, production: prod_T, dot: 0 }
268
+ ]
269
+ compare_state_set(parse_result.chart[0], expectations)
270
+
271
+ end
272
+
245
273
 
246
274
  it 'should parse an invalid simple input' do
247
275
  # Parse an erroneous input (b is missing)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.05
4
+ version: 0.0.06
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef