rley 0.7.00 → 0.7.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +51 -34
- data/.travis.yml +10 -9
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -1
- data/appveyor.yml +10 -8
- data/examples/NLP/benchmark_pico_en.rb +3 -2
- data/examples/NLP/engtagger.rb +23 -12
- data/examples/NLP/nano_eng/nano_en_demo.rb +4 -3
- data/examples/NLP/pico_en_demo.rb +3 -2
- data/examples/data_formats/JSON/json_ast_nodes.rb +3 -0
- data/examples/data_formats/JSON/json_demo.rb +1 -0
- data/examples/data_formats/JSON/json_lexer.rb +2 -1
- data/lib/rley/base/dotted_item.rb +2 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +8 -7
- data/lib/rley/gfg/grm_flow_graph.rb +2 -0
- data/lib/rley/gfg/item_vertex.rb +2 -0
- data/lib/rley/gfg/vertex.rb +2 -1
- data/lib/rley/lexical/token.rb +5 -4
- data/lib/rley/parse_forest_visitor.rb +7 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +1 -1
- data/lib/rley/parse_rep/parse_rep_creator.rb +2 -2
- data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
- data/lib/rley/parse_tree_visitor.rb +2 -0
- data/lib/rley/parser/error_reason.rb +8 -6
- data/lib/rley/parser/gfg_chart.rb +5 -5
- data/lib/rley/parser/gfg_parsing.rb +10 -5
- data/lib/rley/parser/parse_entry_tracker.rb +1 -0
- data/lib/rley/parser/parse_state.rb +2 -1
- data/lib/rley/parser/parse_state_tracker.rb +1 -0
- data/lib/rley/parser/parse_walker_factory.rb +7 -1
- data/lib/rley/ptree/parse_tree_node.rb +1 -0
- data/lib/rley/sppf/parse_forest.rb +9 -7
- data/lib/rley/syntax/grammar.rb +10 -6
- data/lib/rley/syntax/grammar_builder.rb +2 -2
- data/lib/rley/syntax/grm_symbol.rb +1 -0
- data/lib/support/base_tokenizer.rb +10 -96
- data/spec/rley/engine_spec.rb +3 -3
- data/spec/rley/gfg/grm_flow_graph_spec.rb +1 -0
- data/spec/rley/parse_forest_visitor_spec.rb +63 -38
- data/spec/rley/parse_rep/groucho_spec.rb +9 -8
- data/spec/rley/parse_tree_visitor_spec.rb +1 -1
- data/spec/rley/parser/gfg_earley_parser_spec.rb +7 -7
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -3
- data/spec/rley/parser/parse_entry_spec.rb +1 -1
- data/spec/rley/support/expectation_helper.rb +2 -1
- data/spec/rley/support/grammar_ambig01_helper.rb +4 -3
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -4
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -4
- data/spec/rley/support/grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_l0_helper.rb +3 -2
- data/spec/rley/support/grammar_pb_helper.rb +5 -28
- data/spec/support/base_tokenizer_spec.rb +7 -9
- metadata +2 -2
|
@@ -54,6 +54,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
|
54
54
|
if term_name.nil?
|
|
55
55
|
raise StandardError, "Word '#{word}' not found in lexicon"
|
|
56
56
|
end
|
|
57
|
+
|
|
57
58
|
terminal = aGrammar.name2symbol[term_name]
|
|
58
59
|
Rley::Lexical::Token.new(word, terminal, pos)
|
|
59
60
|
end
|
|
@@ -98,10 +99,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
|
98
99
|
child = subject.curr_parent.subnodes.first
|
|
99
100
|
expect(child.to_string(0)).to eq(anExpectation)
|
|
100
101
|
end
|
|
101
|
-
|
|
102
|
+
|
|
102
103
|
def root_children
|
|
103
|
-
subject.result.root.subnodes
|
|
104
|
-
end
|
|
104
|
+
subject.result.root.subnodes
|
|
105
|
+
end
|
|
105
106
|
|
|
106
107
|
|
|
107
108
|
before(:each) do
|
|
@@ -125,7 +126,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
|
125
126
|
|
|
126
127
|
next_event(:visit, 'VP. | 1') # Event 3
|
|
127
128
|
expected_curr_path('S[0, 7]/VP[1, 7]')
|
|
128
|
-
# Root node should have one child
|
|
129
|
+
# Root node should have one child
|
|
129
130
|
expect(root_children.size).to eq(1)
|
|
130
131
|
expect(root_children.first.to_string(0)).to eq('VP[1, 7]')
|
|
131
132
|
|
|
@@ -139,16 +140,16 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
|
139
140
|
# Root node should have two children
|
|
140
141
|
expect(root_children.size).to eq(2)
|
|
141
142
|
expect(root_children.first.to_string(0)).to eq('NP[0, 1]')
|
|
142
|
-
|
|
143
|
+
|
|
143
144
|
18.times do
|
|
144
145
|
event = @walker.next
|
|
145
146
|
subject.receive_event(*event)
|
|
146
147
|
end
|
|
147
|
-
|
|
148
|
+
|
|
148
149
|
next_event(:revisit, 'NP. | 0') # Event 48
|
|
149
150
|
expected_curr_path('S[0, 7]')
|
|
150
|
-
# Root node should still have two children
|
|
151
|
-
expect(root_children.size).to eq(2)
|
|
151
|
+
# Root node should still have two children
|
|
152
|
+
expect(root_children.size).to eq(2)
|
|
152
153
|
end
|
|
153
154
|
end # context
|
|
154
155
|
end # describe
|
|
@@ -235,7 +235,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
|
235
235
|
big_a_3 = big_a_2_children[1]
|
|
236
236
|
big_a_3_children = big_a_3.subnodes
|
|
237
237
|
expectations = [
|
|
238
|
-
[:before_ptree, [grm_abc_ptree1]]
|
|
238
|
+
[:before_ptree, [grm_abc_ptree1]]
|
|
239
239
|
# TODO: fix this test
|
|
240
240
|
# [:before_subnodes, [root, children]],
|
|
241
241
|
# [:before_non_terminal, [root]],
|
|
@@ -587,12 +587,12 @@ MSG
|
|
|
587
587
|
###################### S(0) == . 1 +
|
|
588
588
|
# Expectation chart[0]:
|
|
589
589
|
expected = [
|
|
590
|
-
'.S | 0',
|
|
591
|
-
'S => . E | 0',
|
|
592
|
-
'.E | 0',
|
|
593
|
-
'E => . int | 0',
|
|
594
|
-
|
|
595
|
-
|
|
590
|
+
'.S | 0', # initialization
|
|
591
|
+
'S => . E | 0', # start rule
|
|
592
|
+
'.E | 0', # call rule
|
|
593
|
+
'E => . int | 0', # start rule
|
|
594
|
+
'E => . ( E + E ) | 0', # start rule
|
|
595
|
+
'E => . E + E | 0' # start rule
|
|
596
596
|
]
|
|
597
597
|
compare_entry_texts(parse_result.chart[0], expected)
|
|
598
598
|
|
|
@@ -619,7 +619,7 @@ MSG
|
|
|
619
619
|
compare_entry_texts(parse_result.chart[2], expected)
|
|
620
620
|
|
|
621
621
|
err_msg = "Premature end of input after '+' at position line 1, "
|
|
622
|
-
err_msg <<
|
|
622
|
+
err_msg << 'column 3'
|
|
623
623
|
err_msg << "\nExpected one of: ['int', '(']."
|
|
624
624
|
expect(parse_result.failure_reason.message).to eq(err_msg)
|
|
625
625
|
end
|
|
@@ -314,9 +314,7 @@ SNIPPET
|
|
|
314
314
|
end
|
|
315
315
|
|
|
316
316
|
it 'should build a parse forest' do
|
|
317
|
-
if subject.success?
|
|
318
|
-
expect { subject.parse_forest }.not_to raise_error
|
|
319
|
-
end
|
|
317
|
+
expect { subject.parse_forest }.not_to raise_error if subject.success?
|
|
320
318
|
end
|
|
321
319
|
=begin
|
|
322
320
|
it 'should create the root of a parse forest' do
|
|
@@ -199,7 +199,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
|
199
199
|
|
|
200
200
|
it 'should be inspectable' do
|
|
201
201
|
subject.add_antecedent(subject) # Cheat for the good cause...
|
|
202
|
-
expected = '.sentence | 3'
|
|
202
|
+
# expected = '.sentence | 3'
|
|
203
203
|
prefix = /^#<Rley::Parser::ParseEntry:\d+ @vertex/
|
|
204
204
|
expect(subject.inspect).to match(prefix)
|
|
205
205
|
pattern = /@vertex=<Rley::GFG::StartVertex:\d+ label=\.sentence/
|
|
@@ -7,7 +7,8 @@ module ExpectationHelper
|
|
|
7
7
|
# Helper method. Compare the data from all the parse entries
|
|
8
8
|
# of a given ParseEntrySet with an array of expectation strings.
|
|
9
9
|
def compare_entry_texts(anEntrySet, expectations)
|
|
10
|
-
raise StandardError,
|
|
10
|
+
raise StandardError, 'Nil entry set' if anEntrySet.nil?
|
|
11
|
+
|
|
11
12
|
(0...expectations.size).each do |i|
|
|
12
13
|
expect(anEntrySet.entries[i].to_s).to eq(expectations[i])
|
|
13
14
|
end
|
|
@@ -24,12 +24,13 @@ module GrammarAmbig01Helper
|
|
|
24
24
|
def tokenizer_ambig01(aText)
|
|
25
25
|
scanner = StringScanner.new(aText)
|
|
26
26
|
tokens = []
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
loop do
|
|
29
29
|
scanner.skip(/\s+/)
|
|
30
30
|
curr_pos = scanner.pos
|
|
31
31
|
lexeme = scanner.scan(/\S+/)
|
|
32
32
|
break unless lexeme
|
|
33
|
+
|
|
33
34
|
case lexeme
|
|
34
35
|
when '+', '*'
|
|
35
36
|
terminal = lexeme
|
|
@@ -41,8 +42,8 @@ module GrammarAmbig01Helper
|
|
|
41
42
|
end
|
|
42
43
|
|
|
43
44
|
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
|
44
|
-
tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
45
|
-
end
|
|
45
|
+
tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
46
|
+
end
|
|
46
47
|
|
|
47
48
|
return tokens
|
|
48
49
|
end
|
|
@@ -26,13 +26,14 @@ module GrammarArrIntHelper
|
|
|
26
26
|
def arr_int_tokenizer(aText)
|
|
27
27
|
scanner = StringScanner.new(aText)
|
|
28
28
|
tokens = []
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
loop do
|
|
31
31
|
scanner.skip(/\s+/)
|
|
32
32
|
curr_ch = scanner.peek(1)
|
|
33
33
|
break if curr_ch.nil? || curr_ch.empty?
|
|
34
|
+
|
|
34
35
|
curr_pos = scanner.pos
|
|
35
|
-
|
|
36
|
+
|
|
36
37
|
if (lexeme = scanner.scan(/[\[\],]/))
|
|
37
38
|
terminal = lexeme
|
|
38
39
|
elsif (lexeme = scanner.scan(/[-+]?\d+/))
|
|
@@ -43,10 +44,10 @@ module GrammarArrIntHelper
|
|
|
43
44
|
end
|
|
44
45
|
|
|
45
46
|
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
|
46
|
-
tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
47
|
+
tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
47
48
|
end
|
|
48
49
|
|
|
49
|
-
return tokens
|
|
50
|
+
return tokens
|
|
50
51
|
end
|
|
51
52
|
end # module
|
|
52
53
|
# End of file
|
|
@@ -24,12 +24,13 @@ module GrammarBExprHelper
|
|
|
24
24
|
def expr_tokenizer(aText)
|
|
25
25
|
scanner = StringScanner.new(aText)
|
|
26
26
|
tokens = []
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
loop do
|
|
29
29
|
scanner.skip(/\s+/)
|
|
30
30
|
curr_pos = scanner.pos
|
|
31
31
|
lexeme = scanner.scan(/\S+/)
|
|
32
32
|
break unless lexeme
|
|
33
|
+
|
|
33
34
|
case lexeme
|
|
34
35
|
when '+', '*'
|
|
35
36
|
terminal = lexeme
|
|
@@ -41,10 +42,10 @@ module GrammarBExprHelper
|
|
|
41
42
|
end
|
|
42
43
|
|
|
43
44
|
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
|
44
|
-
tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
45
|
-
end
|
|
45
|
+
tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
46
|
+
end
|
|
46
47
|
|
|
47
|
-
return tokens
|
|
48
|
+
return tokens
|
|
48
49
|
end
|
|
49
50
|
end # module
|
|
50
51
|
# End of file
|
|
@@ -69,7 +69,7 @@ module GrammarL0Helper
|
|
|
69
69
|
def tokenizer_l0(aText)
|
|
70
70
|
scanner = StringScanner.new(aText)
|
|
71
71
|
tokens = []
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
loop do
|
|
74
74
|
scanner.skip(/\s+/)
|
|
75
75
|
curr_pos = scanner.pos
|
|
@@ -80,8 +80,9 @@ module GrammarL0Helper
|
|
|
80
80
|
if term_name.nil?
|
|
81
81
|
raise StandardError, "Word '#{word}' not found in lexicon"
|
|
82
82
|
end
|
|
83
|
+
|
|
83
84
|
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
|
84
|
-
tokens << Rley::Lexical::Token.new(word, term_name, pos)
|
|
85
|
+
tokens << Rley::Lexical::Token.new(word, term_name, pos)
|
|
85
86
|
end
|
|
86
87
|
|
|
87
88
|
return tokens
|
|
@@ -22,41 +22,18 @@ class GrammarPBHelper
|
|
|
22
22
|
builder.grammar
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
|
-
|
|
26
|
-
# # Basic expression tokenizer
|
|
27
|
-
# def tokenize(aText)
|
|
28
|
-
# tokens = aText.scan(/\S+/).map do |lexeme|
|
|
29
|
-
# case lexeme
|
|
30
|
-
# when '+', '(', ')'
|
|
31
|
-
# terminal = @grammar.name2symbol[lexeme]
|
|
32
|
-
# when /^[-+]?\d+$/
|
|
33
|
-
# terminal = @grammar.name2symbol['int']
|
|
34
|
-
# else
|
|
35
|
-
# msg = "Unknown input text '#{lexeme}'"
|
|
36
|
-
# raise StandardError, msg
|
|
37
|
-
# end
|
|
38
|
-
# pos = Rley::Lexical::Position.new(1, 4) # Dummy position
|
|
39
|
-
# Rley::Lexical::Token.new(lexeme, terminal, pos)
|
|
40
|
-
# end
|
|
41
|
-
|
|
42
|
-
# return tokens
|
|
43
|
-
# end
|
|
44
|
-
|
|
45
25
|
|
|
46
26
|
class PB_Tokenizer < BaseTokenizer
|
|
47
|
-
|
|
48
27
|
protected
|
|
49
28
|
|
|
50
29
|
def recognize_token()
|
|
51
|
-
token = nil
|
|
52
|
-
|
|
53
30
|
if (lexeme = scanner.scan(/[\(\)]/)) # Single characters
|
|
54
31
|
# Delimiters, separators => single character token
|
|
55
|
-
|
|
56
|
-
elsif (lexeme = scanner.scan(/(?:\+)(?=\s|$)/)) #
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
32
|
+
build_token(lexeme, lexeme)
|
|
33
|
+
elsif (lexeme = scanner.scan(/(?:\+)(?=\s|$)/)) # Isolated char
|
|
34
|
+
build_token(lexeme, lexeme)
|
|
35
|
+
elsif (lexeme = scanner.scan(/[+-]?[0-9]+/))
|
|
36
|
+
build_token('int', lexeme)
|
|
60
37
|
end
|
|
61
38
|
end
|
|
62
39
|
end # class
|
|
@@ -27,21 +27,19 @@ describe BaseTokenizer do
|
|
|
27
27
|
@@lexeme2name = {
|
|
28
28
|
'(' => 'LPAREN',
|
|
29
29
|
')' => 'RPAREN',
|
|
30
|
-
'+' => 'PLUS'
|
|
30
|
+
'+' => 'PLUS'
|
|
31
31
|
}.freeze
|
|
32
32
|
|
|
33
33
|
protected
|
|
34
34
|
|
|
35
35
|
def recognize_token()
|
|
36
|
-
token = nil
|
|
37
|
-
|
|
38
36
|
if (lexeme = scanner.scan(/[\(\)]/)) # Single characters
|
|
39
37
|
# Delimiters, separators => single character token
|
|
40
|
-
|
|
41
|
-
elsif (lexeme = scanner.scan(/(?:\+)(?=\s)/)) #
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
38
|
+
build_token(@@lexeme2name[lexeme], lexeme)
|
|
39
|
+
elsif (lexeme = scanner.scan(/(?:\+)(?=\s)/)) # Isolated char
|
|
40
|
+
build_token(@@lexeme2name[lexeme], lexeme)
|
|
41
|
+
elsif (lexeme = scanner.scan(/[+-]?[0-9]+/))
|
|
42
|
+
build_token('int', lexeme)
|
|
45
43
|
end
|
|
46
44
|
end
|
|
47
45
|
end # class
|
|
@@ -74,4 +72,4 @@ describe BaseTokenizer do
|
|
|
74
72
|
end
|
|
75
73
|
end
|
|
76
74
|
end
|
|
77
|
-
end # describe
|
|
75
|
+
end # describe
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rley
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.01
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dimitri Geshef
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2019-01-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: coveralls
|