rley 0.2.02 → 0.2.03
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.rubocop.yml +1 -3
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -0
- data/examples/parsers/parsing_L1.rb +2 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- data/lib/rley/parser/parsing.rb +1 -4
- data/spec/rley/parser/earley_parser_spec.rb +1 -1
- data/spec/rley/parser/parse_tree_builder_spec.rb +1 -2
- data/spec/rley/parser/parsing_spec.rb +4 -3
- data/spec/rley/ptree/non_terminal_node_spec.rb +16 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZWM0MGE3NzdlMzhiNTBkYTkzZjBlZjZjYzhhNTA1NzMwMzc4ZjI5NA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
M2JjYTE2OTQxNmI3MzNjOGZjMzJlZGNhY2NiYjU2ZjgzZmVmNmE3Mw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzA0NjBlNDZmNzMzYjc2MTg5Y2Q4NTUwMWE1MDZjMTM2MmJlM2I3ZmQ3ZTc2
|
10
|
+
ZWEzMjVjMmY5NjRkMzEyZGRmMTU1NTU5YzUyNjkxMGMwZDM4OTUyMGI2ZjQ0
|
11
|
+
NmQzMTIzOWFkNzAzOTY2MTA4MzYyYzU0MWU1MzkwMDQ5YjlkMjM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NjE0M2Q2YzliYzBkMzhlNzNhM2NhOTRiMDVkYzI1NTFlMTdiOGY0ODNjOWMz
|
14
|
+
YzkxZmYzNDg5ZTVkMzlkNjgzMzc5ZTE5ZTQzYzM0MTgwNDk4ZDQ3NDI4NGJh
|
15
|
+
NTg1NjMyYmVhNGU0MTkyMzk3NGJlMTYwM2QxMmZkNGIzMjMxMjE=
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.2.03 / 2015-02-06
|
2
|
+
* [FIX] File `.rubocop.yml`: removal of setting for obsolete EmptyLinesAroundBody cop.
|
3
|
+
* [CHANGE] Source code re-formatted to please Rubocop 0.29.
|
4
|
+
* [CHANGE] File `README.md` added licensing badge (MIT license)
|
5
|
+
|
1
6
|
### 0.2.02 / 2015-02-02
|
2
7
|
* [NEW] Examplar file `parsing_L1.rb`: demo using a (highly simplified) English grammar.
|
3
8
|
* [NEW] Examplar file `parsing_amb.rb`: demo using an ambiguous grammar.
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,7 @@ Rley
|
|
7
7
|
[![Coverage Status](https://img.shields.io/coveralls/famished-tiger/Rley.svg)](https://coveralls.io/r/famished-tiger/Rley?branch=master)
|
8
8
|
[![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
|
9
9
|
[![Dependency Status](https://gemnasium.com/famished-tiger/Rley.svg)](https://gemnasium.com/famished-tiger/Rley)
|
10
|
+
[![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt)
|
10
11
|
|
11
12
|
__Rley__ is a Ruby implementation of an [Earley parser](http://en.wikipedia.org/wiki/Earley_parser).
|
12
13
|
The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
@@ -108,7 +108,8 @@ parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
|
108
108
|
|
109
109
|
########################################
|
110
110
|
# Step 3. Tokenize the input
|
111
|
-
valid_input = 'I want the
|
111
|
+
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
112
|
+
|
112
113
|
# Another sentence: it is a flight from Chicago
|
113
114
|
tokens = tokenizer(valid_input, grammar_l1)
|
114
115
|
|
data/lib/rley/constants.rb
CHANGED
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
|
|
40
40
|
# (dot is at end of rhs)
|
41
41
|
def use_complete_state(aCompleteState)
|
42
42
|
prod = aCompleteState.dotted_rule.production
|
43
|
-
use_production(prod,
|
43
|
+
use_production(prod, low: aCompleteState.origin)
|
44
44
|
end
|
45
45
|
|
46
46
|
# Given that the current node is a non-terminal
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -36,10 +36,7 @@ module Rley # This module is used as a namespace
|
|
36
36
|
builder = tree_builder(state_tracker.state_set_index)
|
37
37
|
|
38
38
|
loop do
|
39
|
-
#
|
40
|
-
curr_symbol = state_tracker.symbol_on_left
|
41
|
-
|
42
|
-
# Place the symbol in the parse tree
|
39
|
+
# Place the symbol on left of the dot in the parse tree
|
43
40
|
done = insert_matched_symbol(state_tracker, builder)
|
44
41
|
break if done
|
45
42
|
end
|
@@ -444,7 +444,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
444
444
|
end
|
445
445
|
|
446
446
|
it 'should parse an ambiguous grammar (II)' do
|
447
|
-
|
447
|
+
extend(AmbiguousGrammarHelper)
|
448
448
|
grammar = grammar_builder.grammar
|
449
449
|
instance = EarleyParser.new(grammar)
|
450
450
|
tokens = tokenize('abc + def + ghi', grammar)
|
@@ -35,7 +35,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
35
35
|
parser.parse(tokens_abc)
|
36
36
|
end
|
37
37
|
|
38
|
-
subject { ParseTreeBuilder.new(start_prod,
|
38
|
+
subject { ParseTreeBuilder.new(start_prod, low: 0, high: 5) }
|
39
39
|
|
40
40
|
context 'Initialization:' do
|
41
41
|
it 'should be created with a proposition and a range' do
|
@@ -172,7 +172,6 @@ SNIPPET
|
|
172
172
|
expect(actual.root).to eq(subject.root)
|
173
173
|
end
|
174
174
|
end # context
|
175
|
-
|
176
175
|
end # describe
|
177
176
|
end # module
|
178
177
|
end # module
|
@@ -142,7 +142,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
142
142
|
subject do
|
143
143
|
parser = EarleyParser.new(b_expr_grammar)
|
144
144
|
tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
|
145
|
-
|
145
|
+
parser.parse(tokens)
|
146
146
|
end
|
147
147
|
|
148
148
|
# Helper. Build a state tracker and a parse tree builder.
|
@@ -158,7 +158,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
158
158
|
(state_tracker, builder) = prepare_parse_tree(subject)
|
159
159
|
# The root node should correspond to the start symbol and
|
160
160
|
# its direct children should correspond to rhs of start production
|
161
|
-
expected_text = <<-SNIPPET
|
161
|
+
expected_text = <<-SNIPPET
|
162
162
|
P[0, 5]
|
163
163
|
+- S[0, 5]
|
164
164
|
SNIPPET
|
@@ -249,7 +249,8 @@ SNIPPET
|
|
249
249
|
expected_state = 'T => integer . | 4'
|
250
250
|
expect(state_tracker.parse_state.to_s).to eq(expected_state)
|
251
251
|
expect(state_tracker.state_set_index).to eq(5)
|
252
|
-
|
252
|
+
integer_repr = "integer[4, 5]: '(nil)'"
|
253
|
+
expect(builder.current_node.to_string(0)).to eq(integer_repr)
|
253
254
|
|
254
255
|
# Given current tree symbol is integer[4, 5]: '(nil)'
|
255
256
|
# And its previous item is T => . integer | 4
|
@@ -12,12 +12,12 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
12
12
|
def range(low, high)
|
13
13
|
return TokenRange.new(low: low, high: high)
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
let(:sample_symbol) do
|
17
17
|
OpenStruct.new(name: 'VP')
|
18
18
|
end
|
19
19
|
let(:sample_range) { range(0, 3) }
|
20
|
-
|
20
|
+
|
21
21
|
subject { NonTerminalNode.new(sample_symbol, sample_range) }
|
22
22
|
|
23
23
|
context 'Initialization:' do
|
@@ -25,7 +25,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
25
25
|
expect(subject.children).to be_empty
|
26
26
|
end
|
27
27
|
end # context
|
28
|
-
|
28
|
+
|
29
29
|
context 'Provided services:' do
|
30
30
|
it 'should accept the addition of children' do
|
31
31
|
child1 = double('first_child')
|
@@ -36,18 +36,23 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
36
36
|
subject.add_child(child3)
|
37
37
|
expect(subject.children).to eq([child1, child2, child3])
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
it 'should provide a text representation of itself' do
|
41
41
|
# Case 1: no child
|
42
|
-
expected_text =
|
42
|
+
expected_text = 'VP[0, 3]'
|
43
43
|
expect(subject.to_string(0)).to eq(expected_text)
|
44
|
-
|
44
|
+
|
45
45
|
# Case 2: with children
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
verb = OpenStruct.new(name: 'Verb')
|
47
|
+
child_1_1 = TerminalNode.new(verb, range(0, 1))
|
48
|
+
np = OpenStruct.new(name: 'NP')
|
49
|
+
child_1_2 = NonTerminalNode.new(np, range(1, 3))
|
50
|
+
det = OpenStruct.new(name: 'Determiner')
|
51
|
+
child_2_1 = TerminalNode.new(det, range(1, 2))
|
52
|
+
nominal = OpenStruct.new(name: 'Nominal')
|
53
|
+
child_2_2 = NonTerminalNode.new(nominal, range(2, 3))
|
54
|
+
noun = OpenStruct.new(name: 'Noun')
|
55
|
+
child_3_1 = TerminalNode.new(noun, range(2, 3))
|
51
56
|
subject.add_child(child_1_1)
|
52
57
|
subject.add_child(child_1_2)
|
53
58
|
child_1_2.add_child(child_2_1)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.03
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|