rley 0.7.00 → 0.7.01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +51 -34
  3. data/.travis.yml +10 -9
  4. data/CHANGELOG.md +9 -0
  5. data/LICENSE.txt +1 -1
  6. data/README.md +0 -1
  7. data/appveyor.yml +10 -8
  8. data/examples/NLP/benchmark_pico_en.rb +3 -2
  9. data/examples/NLP/engtagger.rb +23 -12
  10. data/examples/NLP/nano_eng/nano_en_demo.rb +4 -3
  11. data/examples/NLP/pico_en_demo.rb +3 -2
  12. data/examples/data_formats/JSON/json_ast_nodes.rb +3 -0
  13. data/examples/data_formats/JSON/json_demo.rb +1 -0
  14. data/examples/data_formats/JSON/json_lexer.rb +2 -1
  15. data/lib/rley/base/dotted_item.rb +2 -0
  16. data/lib/rley/constants.rb +1 -1
  17. data/lib/rley/engine.rb +8 -7
  18. data/lib/rley/gfg/grm_flow_graph.rb +2 -0
  19. data/lib/rley/gfg/item_vertex.rb +2 -0
  20. data/lib/rley/gfg/vertex.rb +2 -1
  21. data/lib/rley/lexical/token.rb +5 -4
  22. data/lib/rley/parse_forest_visitor.rb +7 -5
  23. data/lib/rley/parse_rep/ast_base_builder.rb +1 -1
  24. data/lib/rley/parse_rep/parse_rep_creator.rb +2 -2
  25. data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
  26. data/lib/rley/parse_tree_visitor.rb +2 -0
  27. data/lib/rley/parser/error_reason.rb +8 -6
  28. data/lib/rley/parser/gfg_chart.rb +5 -5
  29. data/lib/rley/parser/gfg_parsing.rb +10 -5
  30. data/lib/rley/parser/parse_entry_tracker.rb +1 -0
  31. data/lib/rley/parser/parse_state.rb +2 -1
  32. data/lib/rley/parser/parse_state_tracker.rb +1 -0
  33. data/lib/rley/parser/parse_walker_factory.rb +7 -1
  34. data/lib/rley/ptree/parse_tree_node.rb +1 -0
  35. data/lib/rley/sppf/parse_forest.rb +9 -7
  36. data/lib/rley/syntax/grammar.rb +10 -6
  37. data/lib/rley/syntax/grammar_builder.rb +2 -2
  38. data/lib/rley/syntax/grm_symbol.rb +1 -0
  39. data/lib/support/base_tokenizer.rb +10 -96
  40. data/spec/rley/engine_spec.rb +3 -3
  41. data/spec/rley/gfg/grm_flow_graph_spec.rb +1 -0
  42. data/spec/rley/parse_forest_visitor_spec.rb +63 -38
  43. data/spec/rley/parse_rep/groucho_spec.rb +9 -8
  44. data/spec/rley/parse_tree_visitor_spec.rb +1 -1
  45. data/spec/rley/parser/gfg_earley_parser_spec.rb +7 -7
  46. data/spec/rley/parser/gfg_parsing_spec.rb +1 -3
  47. data/spec/rley/parser/parse_entry_spec.rb +1 -1
  48. data/spec/rley/support/expectation_helper.rb +2 -1
  49. data/spec/rley/support/grammar_ambig01_helper.rb +4 -3
  50. data/spec/rley/support/grammar_arr_int_helper.rb +5 -4
  51. data/spec/rley/support/grammar_b_expr_helper.rb +5 -4
  52. data/spec/rley/support/grammar_helper.rb +2 -2
  53. data/spec/rley/support/grammar_l0_helper.rb +3 -2
  54. data/spec/rley/support/grammar_pb_helper.rb +5 -28
  55. data/spec/support/base_tokenizer_spec.rb +7 -9
  56. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b462d4c492ffb698715478492a962d65e41834c
4
- data.tar.gz: 282ab2ed83d7b1ead2646c8dd98176d1753a142e
3
+ metadata.gz: 1d453e82683bb3a51986dad86b0b34d100fd4c27
4
+ data.tar.gz: bce3fa7704cb65670102ecfcb78ee762bdad7ba5
5
5
  SHA512:
6
- metadata.gz: 514e4a9429b4fd1231001269cd18e96fa70d4b9145c60115f042f338e0a0063871f979ba9c04e971ee589c8e2d3919fece6b3af4499af50f03899f44318a0598
7
- data.tar.gz: 870e01cb9e693c126b9fa13915dadcec72559553147e0bf220f6e39a6431c6b59c7fb1b67aa1a19d8eadf0076fc6378948fbf1f56850c88d9f99584fefb7f259
6
+ metadata.gz: bfba908cc187a280ed9a236414cd79ff9880abc96997acdb8c33a4c36f39731ca9b2d4c99be6e2adfb45ef053bd275ba052d8a2d61e851f6607b9ebd2248d1c8
7
+ data.tar.gz: 1b20d6ebe85f9d174a7ce8e2a5729e7eaa09fffa0550eb6e586592dd24d4b3cc0c309139b1d2368178309a2ed1eb411c3e07e05509152491961248c8a61492ea
@@ -1,20 +1,20 @@
1
1
  AllCops:
2
2
  Exclude:
3
3
  - 'features/**/*'
4
- - 'exp/**/*'
4
+ - 'exp/**/*'
5
5
  - 'gems/**/*'
6
6
  - 'refs/**/*'
7
-
7
+
8
8
  # This is disabled because some demos use UTF-8
9
9
  AsciiComments:
10
10
  Enabled: false
11
-
11
+
12
12
  Attr:
13
13
  Enabled: false
14
-
14
+
15
15
  BlockComments:
16
16
  Enabled: false
17
-
17
+
18
18
  CaseIndentation:
19
19
  EnforcedStyle: end
20
20
  IndentOneStep: true
@@ -23,89 +23,106 @@ CaseIndentation:
23
23
  # Which is contrary to modelling practice.
24
24
  ClassCheck:
25
25
  Enabled: false
26
-
26
+
27
27
  ClassLength:
28
28
  Max: 250
29
- CountComments: false
29
+ CountComments: false
30
30
 
31
- ConstantName:
31
+ ConstantName:
32
32
  Enabled: false
33
-
33
+
34
34
  CyclomaticComplexity:
35
35
  Enabled: false
36
-
37
- DefWithParentheses:
36
+
37
+ DefWithParentheses:
38
38
  Enabled: false
39
-
39
+
40
40
  Documentation:
41
41
  Enabled: false
42
-
42
+
43
43
  EmptyLines:
44
- Enabled: false
44
+ Enabled: false
45
45
 
46
46
  Encoding:
47
47
  Enabled: false
48
-
48
+
49
49
  EndOfLine:
50
50
  Enabled: false
51
51
  # SupportedStyles: lf
52
-
53
-
54
- IndentationWidth :
52
+
53
+
54
+ IndentationWidth:
55
55
  Enabled: false
56
56
 
57
- # Disable this because it produces false negatives
58
- Naming/HeredocDelimiterNaming:
57
+ Layout/BlockAlignment:
58
+ Enabled: false
59
+
60
+ Layout/ClosingHeredocIndentation:
59
61
  Enabled: false
60
62
 
61
63
  # Enabled after end of support of Rubies < 2.3
62
64
  Layout/IndentHeredoc:
63
65
  Enabled: false
64
66
 
67
+ Layout/SpaceInsideArrayLiteralBrackets:
68
+ Enabled: false
69
+
65
70
  Metrics/AbcSize:
66
71
  Max: 50
67
-
72
+
68
73
  # Avoid methods longer than 50 lines of code
69
74
  Metrics/MethodLength:
70
75
  Max: 50
71
- CountComments: false
76
+ CountComments: false
72
77
 
73
- # Avoid modules longer than 200 lines of code
78
+ # Avoid modules longer than 200 lines of code
74
79
  Metrics/ModuleLength:
75
80
  CountComments: false
76
- Max: 200
81
+ Max: 200
77
82
 
78
83
  Metrics/PerceivedComplexity:
79
84
  Enabled: true
80
85
  Max: 50
81
86
 
87
+ # Disable this because it produces false negatives
88
+ Naming/HeredocDelimiterNaming:
89
+ Enabled: false
90
+
82
91
  Naming/MethodName:
83
92
  Enabled: false
84
-
93
+
94
+ Naming/UncommunicativeMethodParamName:
95
+ Enabled: false
96
+
85
97
  NonNilCheck:
86
98
  Enabled: false
87
99
 
88
100
  NumericLiterals:
89
101
  Enabled: false
90
-
102
+
91
103
  RaiseArgs:
92
104
  Enabled: false
93
-
105
+
94
106
  RedundantReturn:
95
107
  Enabled: false
96
108
 
97
- SpaceInsideBrackets:
109
+ Style/CommentedKeyword:
110
+ Enabled: false
111
+
112
+ Style/ConditionalAssignment:
113
+ Enabled: false
114
+
115
+ Style/Lambda:
116
+ Enabled: false
117
+
118
+ Style/MissingRespondToMissing:
98
119
  Enabled: false
99
120
 
100
121
  TrailingWhitespace:
101
122
  Enabled: false
102
-
123
+
103
124
  VariableName:
104
125
  Enabled: false
105
126
 
106
127
  VariableNumber:
107
- Enabled: false
108
-
109
- Style/CommentedKeyword:
110
- Enabled: false
111
-
128
+ Enabled: false
@@ -1,14 +1,15 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.0.0-p648
3
+ - 2.6.0
4
+ - 2.5.3
5
+ - 2.4.5
6
+ - 2.3.8
7
+ - 2.2.10
4
8
  - 2.1.10
5
- - 2.2.8
6
- - 2.3.6
7
- - 2.4.2
8
- - 2.5.0
9
+ - 2.0.0-p648
9
10
  - ruby-head
10
- - jruby-9.1.13.0
11
- - jruby-head
11
+ - jruby-9.1.9.0
12
+ - jruby-head
12
13
  matrix:
13
14
  allow_failures:
14
15
  - rvm: ruby-head
@@ -16,8 +17,8 @@ matrix:
16
17
 
17
18
  gemfile:
18
19
  - Gemfile
19
-
20
+
20
21
  # whitelist
21
- branches:
22
+ branches:
22
23
  only:
23
24
  - master
@@ -1,3 +1,12 @@
1
+ ### 0.7.01 / 2019-01-03
2
+ - Maintenance release.
3
+
4
+ * [CHANGE] Code re-styling to please Rubocop 0.62.0.
5
+ * [CHANGE] File `.travis.yml`: updated Ruby versions.
6
+ * [CHANGE] File `appveyor.yml` updated Ruby versions.
7
+ * [CHANGE] File `README.me` removal obsolete icon.
8
+ * [CHANGE] File `LICENSE.txt` Updated copyright years.
9
+
1
10
  ### 0.7.00 / 2018-11-24
2
11
  - Version bump. Core class `Token` is changed.
3
12
 
@@ -1,4 +1,4 @@
1
- Copyright (c) 2014-2018 Dimitri Geshef
1
+ Copyright (c) 2014-2019 Dimitri Geshef
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,6 @@
4
4
  [![Build status](https://ci.appveyor.com/api/projects/status/l5adgcbfo128rvo9?svg=true)](https://ci.appveyor.com/project/famished-tiger/rley)
5
5
  [![Coverage Status](https://img.shields.io/coveralls/famished-tiger/Rley.svg)](https://coveralls.io/r/famished-tiger/Rley?branch=master)
6
6
  [![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
7
- [![Dependency Status](https://gemnasium.com/famished-tiger/Rley.svg)](https://gemnasium.com/famished-tiger/Rley)
8
7
  [![Inline docs](http://inch-ci.org/github/famished-tiger/Rley.svg?branch=master)](http://inch-ci.org/github/famished-tiger/Rley)
9
8
  [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/famished-tiger/SRL-Ruby/blob/master/LICENSE.txt)
10
9
 
@@ -2,16 +2,18 @@ version: '{build}'
2
2
  max_jobs: 3
3
3
  environment:
4
4
  matrix:
5
- - Ruby_version: 200
6
- - Ruby_version: 200-x64
7
- - Ruby_version: 21
8
- - Ruby_version: 21-x64
9
- - Ruby_version: 22
10
- - Ruby_version: 22-x64
11
- - Ruby_version: 23
5
+ #- Ruby_version: 25-x64
6
+ - Ruby_version: 24-x64
12
7
  - Ruby_version: 23-x64
8
+ - Ruby_version: 22-x64
9
+ - Ruby_version: 21-x64
10
+ - Ruby_version: 200-x64
11
+ #- Ruby_version: 25
13
12
  - Ruby_version: 24
14
- - Ruby_version: 24-x64
13
+ - Ruby_version: 23
14
+ - Ruby_version: 22
15
+ - Ruby_version: 21
16
+ - Ruby_version: 200
15
17
 
16
18
  install:
17
19
  - set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
@@ -5,7 +5,7 @@ require 'rley' # Load Rley library
5
5
 
6
6
  ########################################
7
7
  # Step 0. Instantiate facade object of Rley library.
8
- # It provides a unified, higher-level interface
8
+ # It provides a unified, higher-level interface
9
9
  engine = Rley::Engine.new
10
10
 
11
11
  ########################################
@@ -67,8 +67,9 @@ def tokenizer(aTextToParse)
67
67
  tokens = aTextToParse.scan(/\S+/).map do |word|
68
68
  term_name = Lexicon[word]
69
69
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
70
+
70
71
  pos = Rley::Lexical::Position.new(1, offset + 1)
71
- offset += word.length
72
+ offset += word.length
72
73
  Rley::Lexical::Token.new(word, term_name, pos)
73
74
  end
74
75
 
@@ -2,12 +2,13 @@ require 'rley'
2
2
  require 'engtagger' # Load POS (Part-Of-Speech) tagger EngTagger
3
3
 
4
4
  # REGEX to remove XML tags from Engtagger output
5
- GET_TAG = /<(.+?)>(.*?)<.+?>/
5
+ GET_TAG = /<(.+?)>(.*?)<.+?>/.freeze
6
6
 
7
7
  # Text tokenizer
8
8
  # Taken directly from Engtagger, will ensure uniform indexing while parsing
9
9
  def clean_text(text)
10
10
  return false unless valid_text(text)
11
+
11
12
  text = text.toutf8
12
13
  cleaned_text = text
13
14
  tokenized = []
@@ -48,13 +49,14 @@ def split_sentences(array)
48
49
  va wash wis wisc wy wyo usafa alta man ont que sask yuk]
49
50
  month = %w[jan feb mar apr may jun jul aug sep sept oct nov dec]
50
51
  misc = %w[vs etc no esp]
51
- abbr = Hash.new
52
+ abbr = {}
52
53
  [people, army, inst, place, comp, state, month, misc].flatten.each do |i|
53
54
  abbr[i] = true
54
55
  end
55
- words = Array.new
56
+ words = []
56
57
  tokenized.each_with_index do |_t, i|
57
- if tokenized[i + 1] && tokenized [i + 1] =~ /[A-Z\W]/ && tokenized[i] =~ /\A(.+)\.\z/
58
+ if tokenized[i + 1] &&
59
+ tokenized [i + 1] =~ /[A-Z\W]/ && tokenized[i] =~ /\A(.+)\.\z/
58
60
  w = $1
59
61
  # Don't separate the period off words that
60
62
  # meet any of the following conditions:
@@ -62,8 +64,9 @@ def split_sentences(array)
62
64
  # 1. It is defined in one of the lists above
63
65
  # 2. It is only one letter long: Alfred E. Sloan
64
66
  # 3. It has a repeating letter-dot: U.S.A. or J.C. Penney
65
- unless abbr[w.downcase] || w =~ /\A[a-z]\z/i || w =~ /[a-z](?:\.[a-z])+\z/i
66
- words << w
67
+ unless abbr[w.downcase] ||
68
+ w =~ /\A[a-z]\z/i || w =~ /[a-z](?:\.[a-z])+\z/i
69
+ words << w
67
70
  words << '.'
68
71
  next
69
72
  end
@@ -83,15 +86,20 @@ end
83
86
  def split_punct(text)
84
87
  # If there's no punctuation, return immediately
85
88
  return [text] if /\A\w+\z/ =~ text
89
+
86
90
  # Sanity checks
87
91
  text = text.gsub(/\W{10,}/o, ' ')
88
92
 
89
93
  # Put quotes into a standard format
90
94
  text = text.gsub(/`(?!`)(?=.*\w)/o, '` ') # Shift left quotes off text
91
95
  text = text.gsub(/"(?=.*\w)/o, ' `` ') # Convert left quotes to ``
92
- text = text.gsub(/(\W|^)'(?=.*\w)/o) { $1 ? $1 + ' ` ' : ' ` ' } # Convert left quote to `
96
+
97
+ # Convert left quote to `
98
+ text = text.gsub(/(\W|^)'(?=.*\w)/o) { $1 ? $1 + ' ` ' : ' ` ' }
93
99
  text = text.gsub(/"/, " '' ") # Convert (remaining) quotes to ''
94
- text = text.gsub(/(\w)'(?!')(?=\W|$)/o, "\\1 ' ") # Separate right single quotes
100
+
101
+ # Separate right single quotes
102
+ text = text.gsub(/(\w)'(?!')(?=\W|$)/o, "\\1 ' ")
95
103
 
96
104
  # Handle all other punctuation
97
105
  text = text.gsub(/--+/o, ' - ') # Convert and separate dashes
@@ -99,10 +107,13 @@ def split_punct(text)
99
107
  text = text.gsub(/:/o, ' :') # Shift semicolon off
100
108
  text = text.gsub(/(\.\.\.+)/o, ' \1 ') # Shift ellipses off
101
109
  text = text.gsub(/([\(\[\{\}\]\)])/o, ' \1 ') # Shift off brackets
102
- text = text.gsub(/([\!\?#\$%;~|])/o, ' \1 ') # Shift off other ``standard'' punctuation
110
+
111
+ # Shift off other ``standard'' punctuation
112
+ text = text.gsub(/([\!\?#\$%;~|])/o, ' \1 ')
103
113
 
104
114
  # English-specific contractions
105
- text = text.gsub(/([A-Za-z])'([dms])\b/o, "\\1 '\\2") # Separate off 'd 'm 's
115
+ # Separate off 'd 'm 's
116
+ text = text.gsub(/([A-Za-z])'([dms])\b/o, "\\1 '\\2")
106
117
  text = text.gsub(/n't\b/o, " n't") # Separate off n't
107
118
  text = text.gsub(/'(ve|ll|re)\b/o, " '\\1") # Separate off 've, 'll, 're
108
119
  result = text.split(' ')
@@ -139,7 +150,7 @@ tgr = EngTagger.new
139
150
  tagged = tgr.add_tags(text)
140
151
 
141
152
  # Generte tokenied lexicon of input text
142
- # Instead of creating a lexicon dictionary,
153
+ # Instead of creating a lexicon dictionary,
143
154
  # we would simply generate one each time on the fly for the current text only.
144
155
  lexicon = clean_text(text)
145
156
 
@@ -153,7 +164,7 @@ def tokenizer(lexicon, tokens)
153
164
  term_name = tokens[i].last
154
165
  rank = Rley::Lexical::Position.new(1, pos + 1)
155
166
  pos += word.length + 1 # Assuming one space between words.
156
- rley_tokens << Rley::Lexical::Token.new(word, term_name, pos)
167
+ rley_tokens << Rley::Lexical::Token.new(word, term_name, rank)
157
168
  end
158
169
  return rley_tokens
159
170
  end
@@ -68,10 +68,10 @@ Lexicon = {
68
68
  # Step 4. Creating a tokenizer
69
69
  # A tokenizer reads the input string and converts it into a sequence of tokens
70
70
  # Highly simplified tokenizer implementation.
71
- def tokenizer(aTextToParse)
71
+ def tokenizer(aTextToParse)
72
72
  scanner = StringScanner.new(aTextToParse)
73
73
  tokens = []
74
-
74
+
75
75
  loop do
76
76
  scanner.skip(/\s+/)
77
77
  curr_pos = scanner.pos
@@ -80,11 +80,12 @@ def tokenizer(aTextToParse)
80
80
 
81
81
  term_name = Lexicon[word]
82
82
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
83
+
83
84
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
84
85
  tokens << Rley::Lexical::Token.new(word, term_name, pos)
85
86
  end
86
87
 
87
- return tokens
88
+ return tokens
88
89
  end
89
90
 
90
91
 
@@ -64,7 +64,7 @@ Lexicon = {
64
64
  def tokenizer(aTextToParse)
65
65
  scanner = StringScanner.new(aTextToParse)
66
66
  tokens = []
67
-
67
+
68
68
  loop do
69
69
  scanner.skip(/\s+/)
70
70
  curr_pos = scanner.pos
@@ -73,6 +73,7 @@ def tokenizer(aTextToParse)
73
73
 
74
74
  term_name = Lexicon[word]
75
75
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
76
+
76
77
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
77
78
  tokens << Rley::Lexical::Token.new(word, term_name, pos)
78
79
  end
@@ -94,7 +95,7 @@ unless result.success?
94
95
  puts result.failure_reason.message
95
96
  exit(1)
96
97
  end
97
-
98
+
98
99
  ########################################
99
100
  # Step 6. Generating a parse tree from parse result
100
101
  ptree = engine.to_ptree(result)
@@ -29,6 +29,7 @@ JSONTerminalNode = Struct.new(:token, :value, :position) do
29
29
  end
30
30
 
31
31
  def done!
32
+ # Do nothing
32
33
  end
33
34
  end
34
35
 
@@ -76,6 +77,7 @@ class JSONCompositeNode
76
77
  end
77
78
 
78
79
  def done!
80
+ # Do nothing
79
81
  end
80
82
 
81
83
  alias subnodes children
@@ -123,6 +125,7 @@ class JSONPair
123
125
  end
124
126
 
125
127
  def done!
128
+ # Do nothing
126
129
  end
127
130
 
128
131
  def to_ruby