rley 0.7.00 → 0.7.01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +51 -34
  3. data/.travis.yml +10 -9
  4. data/CHANGELOG.md +9 -0
  5. data/LICENSE.txt +1 -1
  6. data/README.md +0 -1
  7. data/appveyor.yml +10 -8
  8. data/examples/NLP/benchmark_pico_en.rb +3 -2
  9. data/examples/NLP/engtagger.rb +23 -12
  10. data/examples/NLP/nano_eng/nano_en_demo.rb +4 -3
  11. data/examples/NLP/pico_en_demo.rb +3 -2
  12. data/examples/data_formats/JSON/json_ast_nodes.rb +3 -0
  13. data/examples/data_formats/JSON/json_demo.rb +1 -0
  14. data/examples/data_formats/JSON/json_lexer.rb +2 -1
  15. data/lib/rley/base/dotted_item.rb +2 -0
  16. data/lib/rley/constants.rb +1 -1
  17. data/lib/rley/engine.rb +8 -7
  18. data/lib/rley/gfg/grm_flow_graph.rb +2 -0
  19. data/lib/rley/gfg/item_vertex.rb +2 -0
  20. data/lib/rley/gfg/vertex.rb +2 -1
  21. data/lib/rley/lexical/token.rb +5 -4
  22. data/lib/rley/parse_forest_visitor.rb +7 -5
  23. data/lib/rley/parse_rep/ast_base_builder.rb +1 -1
  24. data/lib/rley/parse_rep/parse_rep_creator.rb +2 -2
  25. data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
  26. data/lib/rley/parse_tree_visitor.rb +2 -0
  27. data/lib/rley/parser/error_reason.rb +8 -6
  28. data/lib/rley/parser/gfg_chart.rb +5 -5
  29. data/lib/rley/parser/gfg_parsing.rb +10 -5
  30. data/lib/rley/parser/parse_entry_tracker.rb +1 -0
  31. data/lib/rley/parser/parse_state.rb +2 -1
  32. data/lib/rley/parser/parse_state_tracker.rb +1 -0
  33. data/lib/rley/parser/parse_walker_factory.rb +7 -1
  34. data/lib/rley/ptree/parse_tree_node.rb +1 -0
  35. data/lib/rley/sppf/parse_forest.rb +9 -7
  36. data/lib/rley/syntax/grammar.rb +10 -6
  37. data/lib/rley/syntax/grammar_builder.rb +2 -2
  38. data/lib/rley/syntax/grm_symbol.rb +1 -0
  39. data/lib/support/base_tokenizer.rb +10 -96
  40. data/spec/rley/engine_spec.rb +3 -3
  41. data/spec/rley/gfg/grm_flow_graph_spec.rb +1 -0
  42. data/spec/rley/parse_forest_visitor_spec.rb +63 -38
  43. data/spec/rley/parse_rep/groucho_spec.rb +9 -8
  44. data/spec/rley/parse_tree_visitor_spec.rb +1 -1
  45. data/spec/rley/parser/gfg_earley_parser_spec.rb +7 -7
  46. data/spec/rley/parser/gfg_parsing_spec.rb +1 -3
  47. data/spec/rley/parser/parse_entry_spec.rb +1 -1
  48. data/spec/rley/support/expectation_helper.rb +2 -1
  49. data/spec/rley/support/grammar_ambig01_helper.rb +4 -3
  50. data/spec/rley/support/grammar_arr_int_helper.rb +5 -4
  51. data/spec/rley/support/grammar_b_expr_helper.rb +5 -4
  52. data/spec/rley/support/grammar_helper.rb +2 -2
  53. data/spec/rley/support/grammar_l0_helper.rb +3 -2
  54. data/spec/rley/support/grammar_pb_helper.rb +5 -28
  55. data/spec/support/base_tokenizer_spec.rb +7 -9
  56. metadata +2 -2
@@ -54,6 +54,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
54
54
  if term_name.nil?
55
55
  raise StandardError, "Word '#{word}' not found in lexicon"
56
56
  end
57
+
57
58
  terminal = aGrammar.name2symbol[term_name]
58
59
  Rley::Lexical::Token.new(word, terminal, pos)
59
60
  end
@@ -98,10 +99,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
98
99
  child = subject.curr_parent.subnodes.first
99
100
  expect(child.to_string(0)).to eq(anExpectation)
100
101
  end
101
-
102
+
102
103
  def root_children
103
- subject.result.root.subnodes
104
- end
104
+ subject.result.root.subnodes
105
+ end
105
106
 
106
107
 
107
108
  before(:each) do
@@ -125,7 +126,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
125
126
 
126
127
  next_event(:visit, 'VP. | 1') # Event 3
127
128
  expected_curr_path('S[0, 7]/VP[1, 7]')
128
- # Root node should have one child
129
+ # Root node should have one child
129
130
  expect(root_children.size).to eq(1)
130
131
  expect(root_children.first.to_string(0)).to eq('VP[1, 7]')
131
132
 
@@ -139,16 +140,16 @@ module Rley # Open this namespace to avoid module qualifier prefixes
139
140
  # Root node should have two children
140
141
  expect(root_children.size).to eq(2)
141
142
  expect(root_children.first.to_string(0)).to eq('NP[0, 1]')
142
-
143
+
143
144
  18.times do
144
145
  event = @walker.next
145
146
  subject.receive_event(*event)
146
147
  end
147
-
148
+
148
149
  next_event(:revisit, 'NP. | 0') # Event 48
149
150
  expected_curr_path('S[0, 7]')
150
- # Root node should still have two children
151
- expect(root_children.size).to eq(2)
151
+ # Root node should still have two children
152
+ expect(root_children.size).to eq(2)
152
153
  end
153
154
  end # context
154
155
  end # describe
@@ -235,7 +235,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
235
235
  big_a_3 = big_a_2_children[1]
236
236
  big_a_3_children = big_a_3.subnodes
237
237
  expectations = [
238
- [:before_ptree, [grm_abc_ptree1]],
238
+ [:before_ptree, [grm_abc_ptree1]]
239
239
  # TODO: fix this test
240
240
  # [:before_subnodes, [root, children]],
241
241
  # [:before_non_terminal, [root]],
@@ -587,12 +587,12 @@ MSG
587
587
  ###################### S(0) == . 1 +
588
588
  # Expectation chart[0]:
589
589
  expected = [
590
- '.S | 0', # initialization
591
- 'S => . E | 0', # start rule
592
- '.E | 0', # call rule
593
- 'E => . int | 0', # start rule
594
- "E => . ( E + E ) | 0", # start rule
595
- "E => . E + E | 0" # start rule
590
+ '.S | 0', # initialization
591
+ 'S => . E | 0', # start rule
592
+ '.E | 0', # call rule
593
+ 'E => . int | 0', # start rule
594
+ 'E => . ( E + E ) | 0', # start rule
595
+ 'E => . E + E | 0' # start rule
596
596
  ]
597
597
  compare_entry_texts(parse_result.chart[0], expected)
598
598
 
@@ -619,7 +619,7 @@ MSG
619
619
  compare_entry_texts(parse_result.chart[2], expected)
620
620
 
621
621
  err_msg = "Premature end of input after '+' at position line 1, "
622
- err_msg << "column 3"
622
+ err_msg << 'column 3'
623
623
  err_msg << "\nExpected one of: ['int', '(']."
624
624
  expect(parse_result.failure_reason.message).to eq(err_msg)
625
625
  end
@@ -314,9 +314,7 @@ SNIPPET
314
314
  end
315
315
 
316
316
  it 'should build a parse forest' do
317
- if subject.success?
318
- expect { subject.parse_forest }.not_to raise_error
319
- end
317
+ expect { subject.parse_forest }.not_to raise_error if subject.success?
320
318
  end
321
319
  =begin
322
320
  it 'should create the root of a parse forest' do
@@ -199,7 +199,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
199
199
 
200
200
  it 'should be inspectable' do
201
201
  subject.add_antecedent(subject) # Cheat for the good cause...
202
- expected = '.sentence | 3'
202
+ # expected = '.sentence | 3'
203
203
  prefix = /^#<Rley::Parser::ParseEntry:\d+ @vertex/
204
204
  expect(subject.inspect).to match(prefix)
205
205
  pattern = /@vertex=<Rley::GFG::StartVertex:\d+ label=\.sentence/
@@ -7,7 +7,8 @@ module ExpectationHelper
7
7
  # Helper method. Compare the data from all the parse entries
8
8
  # of a given ParseEntrySet with an array of expectation strings.
9
9
  def compare_entry_texts(anEntrySet, expectations)
10
- raise StandardError, "Nil entry set" if anEntrySet.nil?
10
+ raise StandardError, 'Nil entry set' if anEntrySet.nil?
11
+
11
12
  (0...expectations.size).each do |i|
12
13
  expect(anEntrySet.entries[i].to_s).to eq(expectations[i])
13
14
  end
@@ -24,12 +24,13 @@ module GrammarAmbig01Helper
24
24
  def tokenizer_ambig01(aText)
25
25
  scanner = StringScanner.new(aText)
26
26
  tokens = []
27
-
27
+
28
28
  loop do
29
29
  scanner.skip(/\s+/)
30
30
  curr_pos = scanner.pos
31
31
  lexeme = scanner.scan(/\S+/)
32
32
  break unless lexeme
33
+
33
34
  case lexeme
34
35
  when '+', '*'
35
36
  terminal = lexeme
@@ -41,8 +42,8 @@ module GrammarAmbig01Helper
41
42
  end
42
43
 
43
44
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
44
- tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
45
- end
45
+ tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
46
+ end
46
47
 
47
48
  return tokens
48
49
  end
@@ -26,13 +26,14 @@ module GrammarArrIntHelper
26
26
  def arr_int_tokenizer(aText)
27
27
  scanner = StringScanner.new(aText)
28
28
  tokens = []
29
-
29
+
30
30
  loop do
31
31
  scanner.skip(/\s+/)
32
32
  curr_ch = scanner.peek(1)
33
33
  break if curr_ch.nil? || curr_ch.empty?
34
+
34
35
  curr_pos = scanner.pos
35
-
36
+
36
37
  if (lexeme = scanner.scan(/[\[\],]/))
37
38
  terminal = lexeme
38
39
  elsif (lexeme = scanner.scan(/[-+]?\d+/))
@@ -43,10 +44,10 @@ module GrammarArrIntHelper
43
44
  end
44
45
 
45
46
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
46
- tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
47
+ tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
47
48
  end
48
49
 
49
- return tokens
50
+ return tokens
50
51
  end
51
52
  end # module
52
53
  # End of file
@@ -24,12 +24,13 @@ module GrammarBExprHelper
24
24
  def expr_tokenizer(aText)
25
25
  scanner = StringScanner.new(aText)
26
26
  tokens = []
27
-
27
+
28
28
  loop do
29
29
  scanner.skip(/\s+/)
30
30
  curr_pos = scanner.pos
31
31
  lexeme = scanner.scan(/\S+/)
32
32
  break unless lexeme
33
+
33
34
  case lexeme
34
35
  when '+', '*'
35
36
  terminal = lexeme
@@ -41,10 +42,10 @@ module GrammarBExprHelper
41
42
  end
42
43
 
43
44
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
44
- tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
45
- end
45
+ tokens << Rley::Lexical::Token.new(lexeme, terminal, pos)
46
+ end
46
47
 
47
- return tokens
48
+ return tokens
48
49
  end
49
50
  end # module
50
51
  # End of file
@@ -23,8 +23,8 @@ module GrammarHelper
23
23
  token = Rley::Lexical::Token.new(literal, terminal, pos)
24
24
  end
25
25
  end
26
- col += lexeme.length + 1
27
- token
26
+ col += lexeme.length + 1
27
+ token
28
28
  end
29
29
 
30
30
  return tokens.flatten
@@ -69,7 +69,7 @@ module GrammarL0Helper
69
69
  def tokenizer_l0(aText)
70
70
  scanner = StringScanner.new(aText)
71
71
  tokens = []
72
-
72
+
73
73
  loop do
74
74
  scanner.skip(/\s+/)
75
75
  curr_pos = scanner.pos
@@ -80,8 +80,9 @@ module GrammarL0Helper
80
80
  if term_name.nil?
81
81
  raise StandardError, "Word '#{word}' not found in lexicon"
82
82
  end
83
+
83
84
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
84
- tokens << Rley::Lexical::Token.new(word, term_name, pos)
85
+ tokens << Rley::Lexical::Token.new(word, term_name, pos)
85
86
  end
86
87
 
87
88
  return tokens
@@ -22,41 +22,18 @@ class GrammarPBHelper
22
22
  builder.grammar
23
23
  end
24
24
  end
25
-
26
- # # Basic expression tokenizer
27
- # def tokenize(aText)
28
- # tokens = aText.scan(/\S+/).map do |lexeme|
29
- # case lexeme
30
- # when '+', '(', ')'
31
- # terminal = @grammar.name2symbol[lexeme]
32
- # when /^[-+]?\d+$/
33
- # terminal = @grammar.name2symbol['int']
34
- # else
35
- # msg = "Unknown input text '#{lexeme}'"
36
- # raise StandardError, msg
37
- # end
38
- # pos = Rley::Lexical::Position.new(1, 4) # Dummy position
39
- # Rley::Lexical::Token.new(lexeme, terminal, pos)
40
- # end
41
-
42
- # return tokens
43
- # end
44
-
45
25
 
46
26
  class PB_Tokenizer < BaseTokenizer
47
-
48
27
  protected
49
28
 
50
29
  def recognize_token()
51
- token = nil
52
-
53
30
  if (lexeme = scanner.scan(/[\(\)]/)) # Single characters
54
31
  # Delimiters, separators => single character token
55
- token = build_token(lexeme, lexeme)
56
- elsif (lexeme = scanner.scan(/(?:\+)(?=\s|$)/)) # Single char occurring alone
57
- token = build_token(lexeme, lexeme)
58
- elsif (lexeme = scanner.scan(/[+-]?[0-9]+/))
59
- token = build_token('int', lexeme)
32
+ build_token(lexeme, lexeme)
33
+ elsif (lexeme = scanner.scan(/(?:\+)(?=\s|$)/)) # Isolated char
34
+ build_token(lexeme, lexeme)
35
+ elsif (lexeme = scanner.scan(/[+-]?[0-9]+/))
36
+ build_token('int', lexeme)
60
37
  end
61
38
  end
62
39
  end # class
@@ -27,21 +27,19 @@ describe BaseTokenizer do
27
27
  @@lexeme2name = {
28
28
  '(' => 'LPAREN',
29
29
  ')' => 'RPAREN',
30
- '+' => 'PLUS',
30
+ '+' => 'PLUS'
31
31
  }.freeze
32
32
 
33
33
  protected
34
34
 
35
35
  def recognize_token()
36
- token = nil
37
-
38
36
  if (lexeme = scanner.scan(/[\(\)]/)) # Single characters
39
37
  # Delimiters, separators => single character token
40
- token = build_token(@@lexeme2name[lexeme], lexeme)
41
- elsif (lexeme = scanner.scan(/(?:\+)(?=\s)/)) # Single char occurring alone
42
- token = build_token(@@lexeme2name[lexeme], lexeme)
43
- elsif (lexeme = scanner.scan(/[+-]?[0-9]+/))
44
- token = build_token('int', lexeme)
38
+ build_token(@@lexeme2name[lexeme], lexeme)
39
+ elsif (lexeme = scanner.scan(/(?:\+)(?=\s)/)) # Isolated char
40
+ build_token(@@lexeme2name[lexeme], lexeme)
41
+ elsif (lexeme = scanner.scan(/[+-]?[0-9]+/))
42
+ build_token('int', lexeme)
45
43
  end
46
44
  end
47
45
  end # class
@@ -74,4 +72,4 @@ describe BaseTokenizer do
74
72
  end
75
73
  end
76
74
  end
77
- end # describe
75
+ end # describe
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.00
4
+ version: 0.7.01
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-24 00:00:00.000000000 Z
11
+ date: 2019-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls