rley 0.0.05 → 0.0.06

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZTEwY2E4YWQ0MDgwMmE0MTFjMjBmOGQ0OWUwYjI5ZjZjMWU2MDNjMw==
4
+ NzBkOTQ2MmQ2MDcxZGFjZTM5YzNlZGY5NDg1MTFhOWVjYzFjNWFkYg==
5
5
  data.tar.gz: !binary |-
6
- MjkzZGJjMjM3MmIyZDY0ZjRmZjU2ZTEzNzM2NDRmNzE5MGNhZTU3ZA==
6
+ ZjEwNzMyNWRjMmUxMGU1YjJmYzQxODg5NjA4M2YzNWU4NmVhNzcyYw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MzRmMTAxMzcyZmJkZjNiYTYxNGYxOTQ1M2YzNDRmOTcyZGEzODVjYWZhOGE2
10
- MTQxYjQ2YWMxZGMzN2E3NjRjZjhjNmI2MzljOGIwMzcxNGY1ZDg2MGM0YTA5
11
- YzZmZjBiODMwMzk0YmY4OTQ4NjRhNGNjMWJjZTRlNjU5Y2NiNjY=
9
+ MWNhOTQyOGQ3NzcwMzcxY2FlYTgzMWJhMGNlYmQzYzM2ZTAwNjdlZjJkOGE1
10
+ ODZiODkxMGFlNzc5NjUxNzA3OTVjNzFkNzY1NGQ2MTc4MDRjMTg0YWRhODc5
11
+ ZTU3NDhjZmY3ZDE2YjM3NmRjZjA0MTVmNjNmZGQxYmE4YTg5YmM=
12
12
  data.tar.gz: !binary |-
13
- ZDAzM2I1ZDNjMWRjNWEyMGNhMWYwNmUzNTdiNzE3NDYzMzVlMzdiYjFiYzUx
14
- Y2I0MGY2MjU2MDg5ZDIxOGI5NjViOWExMDhkZGIyZTc0NmQ4ODVlOTJlYWQx
15
- ODU0NGRhZDZmNDA1MTdmMzE1Y2Q3OTY3MmMxZDM4Y2NlYmI3YWQ=
13
+ MzQ2MmRkMjIyMWQxZGI1MTBmOTdhMDY1ZTYxYjc0MGExZGJjZjFkMjZlOTM3
14
+ MTk0NjdhMjUwNjAwYWQzMzBlOTdhNzg5ZmFlNWFjNjlhOWM3ZjMxMWJlOTFj
15
+ NjVlODk3NjBkYTQ3OTFjODBlMjQ2ZjZjNTA5ZGIxZjdiYWIxYzA=
@@ -1,3 +1,10 @@
1
+ ### 0.0.06 / 2014-11-13
2
+ * [CHANGE] File `README.md`: Added roadmap section.
3
+ * [FIX] `EarleyParser#parse`: prevent call to `scanning` method after last token encountered.
4
+
5
+ ### 0.0.05 / 2014-11-13
6
+ * [CHANGE] Code re-styling to please Rubocop 0.27.0 (less than 10 offenses).
7
+
1
8
  ### 0.0.04 / 2014-11-12
2
9
  * [CHANGE] Class `DottedItem` moved to `Rley` module.
3
10
 
data/README.md CHANGED
@@ -7,12 +7,27 @@ Rley
7
7
  [![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
8
8
 
9
9
  ### What is Rley? ###
10
- __Rley__ is a Ruby implementation of a Earley parser.
10
+ __Rley__ is a Ruby implementation of a Earley parser.
11
11
  The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
12
12
 
13
- This project is in "early" stage.
13
+ Yet another parser?
14
+ Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
15
+ The latter are faster because they use faster algorithms at the price of a loss of generality
16
+ in the grammar/language they support.
17
+ The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
14
18
  Consult Wikipedia to learn more about Earley's parsing algorithm.
15
19
 
20
+ This project is in "early" stage.
21
+ ####Roadmap:
22
+ - Add more validation tests and sample grammars
23
+ - Add AST generation (and semantic actions?)
24
+ - Add DSL for grammar specification
25
+ - Add grammar validations
26
+ - Add error reporting
27
+ - Add examples (including small NLP grammar)
28
+ - Add a command-line interface
29
+ - Provide documentation and examples
30
+
16
31
 
17
32
  Copyright
18
33
  ---------
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.0.05'
6
+ Version = '0.0.06'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -30,8 +30,8 @@ module Rley # This module is used as a namespace
30
30
 
31
31
  def parse(aTokenSequence)
32
32
  result = Parsing.new(start_dotted_item, aTokenSequence)
33
-
34
- (0..aTokenSequence.size).each do |i|
33
+ last_token_index = aTokenSequence.size
34
+ (0..last_token_index).each do |i|
35
35
  result.chart[i].each do |state|
36
36
  if state.complete?
37
37
  # parse reached end of production
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
40
40
  next_symbol = state.next_symbol
41
41
  if next_symbol.kind_of?(Syntax::NonTerminal)
42
42
  prediction(result, next_symbol, i)
43
- else
43
+ elsif i < last_token_index
44
44
  # Expecting a terminal symbol
45
45
  scanning(result, next_symbol, i)
46
46
  end
@@ -33,7 +33,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
33
33
  end
34
34
  =end
35
35
 
36
- # Grammar 1: A very simple language
36
+ # Grammar 1: A very simple language
37
+ # (based on example in N. Wirth "Compiler Construction" book, p. 6)
37
38
  # S ::= A.
38
39
  # A ::= "a" A "c".
39
40
  # A ::= "b".
@@ -64,14 +65,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
64
65
 
65
66
 
66
67
  # Grammar 2: A simple arithmetic expression language
67
- # E ::= S.
68
+ # (based on example in article on Earley's algorithm in Wikipedia)
69
+ # P ::= S.
68
70
  # S ::= S "+" M.
69
71
  # S ::= M.
70
72
  # M ::= M "*" M.
71
73
  # M ::= T.
72
74
  # T ::= an integer number token.
73
75
  # Let's create the grammar piece by piece
74
- let(:nt_E) { Syntax::NonTerminal.new('E') }
76
+ let(:nt_P) { Syntax::NonTerminal.new('P') }
75
77
  let(:nt_M) { Syntax::NonTerminal.new('M') }
76
78
  let(:nt_T) { Syntax::NonTerminal.new('T') }
77
79
  let(:plus) { Syntax::VerbatimSymbol.new('+') }
@@ -80,14 +82,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
80
82
  integer_pattern = /[-+]?[0-9]+/ # Decimal notation
81
83
  Syntax::Literal.new('integer', integer_pattern)
82
84
  end
83
- let(:prod_E) { Syntax::Production.new(nt_E, [nt_S]) }
85
+ let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
84
86
  let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
85
87
  let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
86
88
  let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
87
89
  let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
88
90
  let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
89
91
  let(:grammar_expr) do
90
- all_prods = [prod_E, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
92
+ all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
91
93
  Syntax::Grammar.new(all_prods)
92
94
  end
93
95
 
@@ -242,6 +244,32 @@ module Rley # Open this namespace to avoid module qualifier prefixes
242
244
  ]
243
245
  compare_state_set(state_set_5, expectations)
244
246
  end
247
+
248
+ it 'should parse a valid simple expression' do
249
+ instance = EarleyParser.new(grammar_expr)
250
+ parse_result = instance.parse(grm2_tokens)
251
+ expect(parse_result.success?).to eq(true)
252
+
253
+ ######################
254
+ # Expectation chart[0]:
255
+ # (1) P -> . S, 0 # start rule
256
+ # (2) S -> . S "+" M, 0 # predict from (1)
257
+ # (3) S -> . M, 0 # predict from (1)
258
+ # (4) M -> . M "*" T, 0 # predict from (3)
259
+ # (5) M -> . T, 0 # predict from (3)
260
+ # (6) T -> . integer, 0 # predict from (3)
261
+ expectations = [
262
+ { origin: 0, production: prod_P, dot: 0 },
263
+ { origin: 0, production: prod_S1, dot: 0 },
264
+ { origin: 0, production: prod_S2, dot: 0 },
265
+ { origin: 0, production: prod_M1, dot: 0 },
266
+ { origin: 0, production: prod_M2, dot: 0 },
267
+ { origin: 0, production: prod_T, dot: 0 }
268
+ ]
269
+ compare_state_set(parse_result.chart[0], expectations)
270
+
271
+ end
272
+
245
273
 
246
274
  it 'should parse an invalid simple input' do
247
275
  # Parse an erroneous input (b is missing)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.05
4
+ version: 0.0.06
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef