rley 0.2.02 → 0.2.03
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.rubocop.yml +1 -3
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -0
- data/examples/parsers/parsing_L1.rb +2 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- data/lib/rley/parser/parsing.rb +1 -4
- data/spec/rley/parser/earley_parser_spec.rb +1 -1
- data/spec/rley/parser/parse_tree_builder_spec.rb +1 -2
- data/spec/rley/parser/parsing_spec.rb +4 -3
- data/spec/rley/ptree/non_terminal_node_spec.rb +16 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZWM0MGE3NzdlMzhiNTBkYTkzZjBlZjZjYzhhNTA1NzMwMzc4ZjI5NA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
M2JjYTE2OTQxNmI3MzNjOGZjMzJlZGNhY2NiYjU2ZjgzZmVmNmE3Mw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzA0NjBlNDZmNzMzYjc2MTg5Y2Q4NTUwMWE1MDZjMTM2MmJlM2I3ZmQ3ZTc2
|
10
|
+
ZWEzMjVjMmY5NjRkMzEyZGRmMTU1NTU5YzUyNjkxMGMwZDM4OTUyMGI2ZjQ0
|
11
|
+
NmQzMTIzOWFkNzAzOTY2MTA4MzYyYzU0MWU1MzkwMDQ5YjlkMjM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NjE0M2Q2YzliYzBkMzhlNzNhM2NhOTRiMDVkYzI1NTFlMTdiOGY0ODNjOWMz
|
14
|
+
YzkxZmYzNDg5ZTVkMzlkNjgzMzc5ZTE5ZTQzYzM0MTgwNDk4ZDQ3NDI4NGJh
|
15
|
+
NTg1NjMyYmVhNGU0MTkyMzk3NGJlMTYwM2QxMmZkNGIzMjMxMjE=
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.2.03 / 2015-02-06
|
2
|
+
* [FIX] File `.rubocop.yml`: removal of setting for obsolete EmptyLinesAroundBody cop.
|
3
|
+
* [CHANGE] Source code re-formatted to please Rubocop 0.29.
|
4
|
+
* [CHANGE] File `README.md` added licensing badge (MIT license)
|
5
|
+
|
1
6
|
### 0.2.02 / 2015-02-02
|
2
7
|
* [NEW] Examplar file `parsing_L1.rb`: demo using a (highly simplified) English grammar.
|
3
8
|
* [NEW] Examplar file `parsing_amb.rb`: demo using an ambiguous grammar.
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,7 @@ Rley
|
|
7
7
|
[](https://coveralls.io/r/famished-tiger/Rley?branch=master)
|
8
8
|
[](http://badge.fury.io/rb/rley)
|
9
9
|
[](https://gemnasium.com/famished-tiger/Rley)
|
10
|
+
[](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt)
|
10
11
|
|
11
12
|
__Rley__ is a Ruby implementation of an [Earley parser](http://en.wikipedia.org/wiki/Earley_parser).
|
12
13
|
The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
@@ -108,7 +108,8 @@ parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
|
108
108
|
|
109
109
|
########################################
|
110
110
|
# Step 3. Tokenize the input
|
111
|
-
valid_input = 'I want the
|
111
|
+
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
112
|
+
|
112
113
|
# Another sentence: it is a flight from Chicago
|
113
114
|
tokens = tokenizer(valid_input, grammar_l1)
|
114
115
|
|
data/lib/rley/constants.rb
CHANGED
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
|
|
40
40
|
# (dot is at end of rhs)
|
41
41
|
def use_complete_state(aCompleteState)
|
42
42
|
prod = aCompleteState.dotted_rule.production
|
43
|
-
use_production(prod,
|
43
|
+
use_production(prod, low: aCompleteState.origin)
|
44
44
|
end
|
45
45
|
|
46
46
|
# Given that the current node is a non-terminal
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -36,10 +36,7 @@ module Rley # This module is used as a namespace
|
|
36
36
|
builder = tree_builder(state_tracker.state_set_index)
|
37
37
|
|
38
38
|
loop do
|
39
|
-
#
|
40
|
-
curr_symbol = state_tracker.symbol_on_left
|
41
|
-
|
42
|
-
# Place the symbol in the parse tree
|
39
|
+
# Place the symbol on left of the dot in the parse tree
|
43
40
|
done = insert_matched_symbol(state_tracker, builder)
|
44
41
|
break if done
|
45
42
|
end
|
@@ -444,7 +444,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
444
444
|
end
|
445
445
|
|
446
446
|
it 'should parse an ambiguous grammar (II)' do
|
447
|
-
|
447
|
+
extend(AmbiguousGrammarHelper)
|
448
448
|
grammar = grammar_builder.grammar
|
449
449
|
instance = EarleyParser.new(grammar)
|
450
450
|
tokens = tokenize('abc + def + ghi', grammar)
|
@@ -35,7 +35,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
35
35
|
parser.parse(tokens_abc)
|
36
36
|
end
|
37
37
|
|
38
|
-
subject { ParseTreeBuilder.new(start_prod,
|
38
|
+
subject { ParseTreeBuilder.new(start_prod, low: 0, high: 5) }
|
39
39
|
|
40
40
|
context 'Initialization:' do
|
41
41
|
it 'should be created with a proposition and a range' do
|
@@ -172,7 +172,6 @@ SNIPPET
|
|
172
172
|
expect(actual.root).to eq(subject.root)
|
173
173
|
end
|
174
174
|
end # context
|
175
|
-
|
176
175
|
end # describe
|
177
176
|
end # module
|
178
177
|
end # module
|
@@ -142,7 +142,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
142
142
|
subject do
|
143
143
|
parser = EarleyParser.new(b_expr_grammar)
|
144
144
|
tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
|
145
|
-
|
145
|
+
parser.parse(tokens)
|
146
146
|
end
|
147
147
|
|
148
148
|
# Helper. Build a state tracker and a parse tree builder.
|
@@ -158,7 +158,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
158
158
|
(state_tracker, builder) = prepare_parse_tree(subject)
|
159
159
|
# The root node should correspond to the start symbol and
|
160
160
|
# its direct children should correspond to rhs of start production
|
161
|
-
expected_text = <<-SNIPPET
|
161
|
+
expected_text = <<-SNIPPET
|
162
162
|
P[0, 5]
|
163
163
|
+- S[0, 5]
|
164
164
|
SNIPPET
|
@@ -249,7 +249,8 @@ SNIPPET
|
|
249
249
|
expected_state = 'T => integer . | 4'
|
250
250
|
expect(state_tracker.parse_state.to_s).to eq(expected_state)
|
251
251
|
expect(state_tracker.state_set_index).to eq(5)
|
252
|
-
|
252
|
+
integer_repr = "integer[4, 5]: '(nil)'"
|
253
|
+
expect(builder.current_node.to_string(0)).to eq(integer_repr)
|
253
254
|
|
254
255
|
# Given current tree symbol is integer[4, 5]: '(nil)'
|
255
256
|
# And its previous item is T => . integer | 4
|
@@ -12,12 +12,12 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
12
12
|
def range(low, high)
|
13
13
|
return TokenRange.new(low: low, high: high)
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
let(:sample_symbol) do
|
17
17
|
OpenStruct.new(name: 'VP')
|
18
18
|
end
|
19
19
|
let(:sample_range) { range(0, 3) }
|
20
|
-
|
20
|
+
|
21
21
|
subject { NonTerminalNode.new(sample_symbol, sample_range) }
|
22
22
|
|
23
23
|
context 'Initialization:' do
|
@@ -25,7 +25,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
25
25
|
expect(subject.children).to be_empty
|
26
26
|
end
|
27
27
|
end # context
|
28
|
-
|
28
|
+
|
29
29
|
context 'Provided services:' do
|
30
30
|
it 'should accept the addition of children' do
|
31
31
|
child1 = double('first_child')
|
@@ -36,18 +36,23 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
36
36
|
subject.add_child(child3)
|
37
37
|
expect(subject.children).to eq([child1, child2, child3])
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
it 'should provide a text representation of itself' do
|
41
41
|
# Case 1: no child
|
42
|
-
expected_text =
|
42
|
+
expected_text = 'VP[0, 3]'
|
43
43
|
expect(subject.to_string(0)).to eq(expected_text)
|
44
|
-
|
44
|
+
|
45
45
|
# Case 2: with children
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
verb = OpenStruct.new(name: 'Verb')
|
47
|
+
child_1_1 = TerminalNode.new(verb, range(0, 1))
|
48
|
+
np = OpenStruct.new(name: 'NP')
|
49
|
+
child_1_2 = NonTerminalNode.new(np, range(1, 3))
|
50
|
+
det = OpenStruct.new(name: 'Determiner')
|
51
|
+
child_2_1 = TerminalNode.new(det, range(1, 2))
|
52
|
+
nominal = OpenStruct.new(name: 'Nominal')
|
53
|
+
child_2_2 = NonTerminalNode.new(nominal, range(2, 3))
|
54
|
+
noun = OpenStruct.new(name: 'Noun')
|
55
|
+
child_3_1 = TerminalNode.new(noun, range(2, 3))
|
51
56
|
subject.add_child(child_1_1)
|
52
57
|
subject.add_child(child_1_2)
|
53
58
|
child_1_2.add_child(child_2_1)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.03
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|