rley 0.7.00 → 0.7.01

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +51 -34
  3. data/.travis.yml +10 -9
  4. data/CHANGELOG.md +9 -0
  5. data/LICENSE.txt +1 -1
  6. data/README.md +0 -1
  7. data/appveyor.yml +10 -8
  8. data/examples/NLP/benchmark_pico_en.rb +3 -2
  9. data/examples/NLP/engtagger.rb +23 -12
  10. data/examples/NLP/nano_eng/nano_en_demo.rb +4 -3
  11. data/examples/NLP/pico_en_demo.rb +3 -2
  12. data/examples/data_formats/JSON/json_ast_nodes.rb +3 -0
  13. data/examples/data_formats/JSON/json_demo.rb +1 -0
  14. data/examples/data_formats/JSON/json_lexer.rb +2 -1
  15. data/lib/rley/base/dotted_item.rb +2 -0
  16. data/lib/rley/constants.rb +1 -1
  17. data/lib/rley/engine.rb +8 -7
  18. data/lib/rley/gfg/grm_flow_graph.rb +2 -0
  19. data/lib/rley/gfg/item_vertex.rb +2 -0
  20. data/lib/rley/gfg/vertex.rb +2 -1
  21. data/lib/rley/lexical/token.rb +5 -4
  22. data/lib/rley/parse_forest_visitor.rb +7 -5
  23. data/lib/rley/parse_rep/ast_base_builder.rb +1 -1
  24. data/lib/rley/parse_rep/parse_rep_creator.rb +2 -2
  25. data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
  26. data/lib/rley/parse_tree_visitor.rb +2 -0
  27. data/lib/rley/parser/error_reason.rb +8 -6
  28. data/lib/rley/parser/gfg_chart.rb +5 -5
  29. data/lib/rley/parser/gfg_parsing.rb +10 -5
  30. data/lib/rley/parser/parse_entry_tracker.rb +1 -0
  31. data/lib/rley/parser/parse_state.rb +2 -1
  32. data/lib/rley/parser/parse_state_tracker.rb +1 -0
  33. data/lib/rley/parser/parse_walker_factory.rb +7 -1
  34. data/lib/rley/ptree/parse_tree_node.rb +1 -0
  35. data/lib/rley/sppf/parse_forest.rb +9 -7
  36. data/lib/rley/syntax/grammar.rb +10 -6
  37. data/lib/rley/syntax/grammar_builder.rb +2 -2
  38. data/lib/rley/syntax/grm_symbol.rb +1 -0
  39. data/lib/support/base_tokenizer.rb +10 -96
  40. data/spec/rley/engine_spec.rb +3 -3
  41. data/spec/rley/gfg/grm_flow_graph_spec.rb +1 -0
  42. data/spec/rley/parse_forest_visitor_spec.rb +63 -38
  43. data/spec/rley/parse_rep/groucho_spec.rb +9 -8
  44. data/spec/rley/parse_tree_visitor_spec.rb +1 -1
  45. data/spec/rley/parser/gfg_earley_parser_spec.rb +7 -7
  46. data/spec/rley/parser/gfg_parsing_spec.rb +1 -3
  47. data/spec/rley/parser/parse_entry_spec.rb +1 -1
  48. data/spec/rley/support/expectation_helper.rb +2 -1
  49. data/spec/rley/support/grammar_ambig01_helper.rb +4 -3
  50. data/spec/rley/support/grammar_arr_int_helper.rb +5 -4
  51. data/spec/rley/support/grammar_b_expr_helper.rb +5 -4
  52. data/spec/rley/support/grammar_helper.rb +2 -2
  53. data/spec/rley/support/grammar_l0_helper.rb +3 -2
  54. data/spec/rley/support/grammar_pb_helper.rb +5 -28
  55. data/spec/support/base_tokenizer_spec.rb +7 -9
  56. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b462d4c492ffb698715478492a962d65e41834c
4
- data.tar.gz: 282ab2ed83d7b1ead2646c8dd98176d1753a142e
3
+ metadata.gz: 1d453e82683bb3a51986dad86b0b34d100fd4c27
4
+ data.tar.gz: bce3fa7704cb65670102ecfcb78ee762bdad7ba5
5
5
  SHA512:
6
- metadata.gz: 514e4a9429b4fd1231001269cd18e96fa70d4b9145c60115f042f338e0a0063871f979ba9c04e971ee589c8e2d3919fece6b3af4499af50f03899f44318a0598
7
- data.tar.gz: 870e01cb9e693c126b9fa13915dadcec72559553147e0bf220f6e39a6431c6b59c7fb1b67aa1a19d8eadf0076fc6378948fbf1f56850c88d9f99584fefb7f259
6
+ metadata.gz: bfba908cc187a280ed9a236414cd79ff9880abc96997acdb8c33a4c36f39731ca9b2d4c99be6e2adfb45ef053bd275ba052d8a2d61e851f6607b9ebd2248d1c8
7
+ data.tar.gz: 1b20d6ebe85f9d174a7ce8e2a5729e7eaa09fffa0550eb6e586592dd24d4b3cc0c309139b1d2368178309a2ed1eb411c3e07e05509152491961248c8a61492ea
@@ -1,20 +1,20 @@
1
1
  AllCops:
2
2
  Exclude:
3
3
  - 'features/**/*'
4
- - 'exp/**/*'
4
+ - 'exp/**/*'
5
5
  - 'gems/**/*'
6
6
  - 'refs/**/*'
7
-
7
+
8
8
  # This is disabled because some demos use UTF-8
9
9
  AsciiComments:
10
10
  Enabled: false
11
-
11
+
12
12
  Attr:
13
13
  Enabled: false
14
-
14
+
15
15
  BlockComments:
16
16
  Enabled: false
17
-
17
+
18
18
  CaseIndentation:
19
19
  EnforcedStyle: end
20
20
  IndentOneStep: true
@@ -23,89 +23,106 @@ CaseIndentation:
23
23
  # Which is contrary to modelling practice.
24
24
  ClassCheck:
25
25
  Enabled: false
26
-
26
+
27
27
  ClassLength:
28
28
  Max: 250
29
- CountComments: false
29
+ CountComments: false
30
30
 
31
- ConstantName:
31
+ ConstantName:
32
32
  Enabled: false
33
-
33
+
34
34
  CyclomaticComplexity:
35
35
  Enabled: false
36
-
37
- DefWithParentheses:
36
+
37
+ DefWithParentheses:
38
38
  Enabled: false
39
-
39
+
40
40
  Documentation:
41
41
  Enabled: false
42
-
42
+
43
43
  EmptyLines:
44
- Enabled: false
44
+ Enabled: false
45
45
 
46
46
  Encoding:
47
47
  Enabled: false
48
-
48
+
49
49
  EndOfLine:
50
50
  Enabled: false
51
51
  # SupportedStyles: lf
52
-
53
-
54
- IndentationWidth :
52
+
53
+
54
+ IndentationWidth:
55
55
  Enabled: false
56
56
 
57
- # Disable this because it produces false negatives
58
- Naming/HeredocDelimiterNaming:
57
+ Layout/BlockAlignment:
58
+ Enabled: false
59
+
60
+ Layout/ClosingHeredocIndentation:
59
61
  Enabled: false
60
62
 
61
63
  # Enabled after end of support of Rubies < 2.3
62
64
  Layout/IndentHeredoc:
63
65
  Enabled: false
64
66
 
67
+ Layout/SpaceInsideArrayLiteralBrackets:
68
+ Enabled: false
69
+
65
70
  Metrics/AbcSize:
66
71
  Max: 50
67
-
72
+
68
73
  # Avoid methods longer than 50 lines of code
69
74
  Metrics/MethodLength:
70
75
  Max: 50
71
- CountComments: false
76
+ CountComments: false
72
77
 
73
- # Avoid modules longer than 200 lines of code
78
+ # Avoid modules longer than 200 lines of code
74
79
  Metrics/ModuleLength:
75
80
  CountComments: false
76
- Max: 200
81
+ Max: 200
77
82
 
78
83
  Metrics/PerceivedComplexity:
79
84
  Enabled: true
80
85
  Max: 50
81
86
 
87
+ # Disable this because it produces false negatives
88
+ Naming/HeredocDelimiterNaming:
89
+ Enabled: false
90
+
82
91
  Naming/MethodName:
83
92
  Enabled: false
84
-
93
+
94
+ Naming/UncommunicativeMethodParamName:
95
+ Enabled: false
96
+
85
97
  NonNilCheck:
86
98
  Enabled: false
87
99
 
88
100
  NumericLiterals:
89
101
  Enabled: false
90
-
102
+
91
103
  RaiseArgs:
92
104
  Enabled: false
93
-
105
+
94
106
  RedundantReturn:
95
107
  Enabled: false
96
108
 
97
- SpaceInsideBrackets:
109
+ Style/CommentedKeyword:
110
+ Enabled: false
111
+
112
+ Style/ConditionalAssignment:
113
+ Enabled: false
114
+
115
+ Style/Lambda:
116
+ Enabled: false
117
+
118
+ Style/MissingRespondToMissing:
98
119
  Enabled: false
99
120
 
100
121
  TrailingWhitespace:
101
122
  Enabled: false
102
-
123
+
103
124
  VariableName:
104
125
  Enabled: false
105
126
 
106
127
  VariableNumber:
107
- Enabled: false
108
-
109
- Style/CommentedKeyword:
110
- Enabled: false
111
-
128
+ Enabled: false
@@ -1,14 +1,15 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.0.0-p648
3
+ - 2.6.0
4
+ - 2.5.3
5
+ - 2.4.5
6
+ - 2.3.8
7
+ - 2.2.10
4
8
  - 2.1.10
5
- - 2.2.8
6
- - 2.3.6
7
- - 2.4.2
8
- - 2.5.0
9
+ - 2.0.0-p648
9
10
  - ruby-head
10
- - jruby-9.1.13.0
11
- - jruby-head
11
+ - jruby-9.1.9.0
12
+ - jruby-head
12
13
  matrix:
13
14
  allow_failures:
14
15
  - rvm: ruby-head
@@ -16,8 +17,8 @@ matrix:
16
17
 
17
18
  gemfile:
18
19
  - Gemfile
19
-
20
+
20
21
  # whitelist
21
- branches:
22
+ branches:
22
23
  only:
23
24
  - master
@@ -1,3 +1,12 @@
1
+ ### 0.7.01 / 2019-01-03
2
+ - Maintenance release.
3
+
4
+ * [CHANGE] Code re-styling to please Rubocop 0.62.0.
5
+ * [CHANGE] File `.travis.yml`: updated Ruby versions.
6
+ * [CHANGE] File `appveyor.yml` updated Ruby versions.
7
+ * [CHANGE] File `README.me` removal obsolete icon.
8
+ * [CHANGE] File `LICENSE.txt` Updated copyright years.
9
+
1
10
  ### 0.7.00 / 2018-11-24
2
11
  - Version bump. Core class `Token` is changed.
3
12
 
@@ -1,4 +1,4 @@
1
- Copyright (c) 2014-2018 Dimitri Geshef
1
+ Copyright (c) 2014-2019 Dimitri Geshef
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,6 @@
4
4
  [![Build status](https://ci.appveyor.com/api/projects/status/l5adgcbfo128rvo9?svg=true)](https://ci.appveyor.com/project/famished-tiger/rley)
5
5
  [![Coverage Status](https://img.shields.io/coveralls/famished-tiger/Rley.svg)](https://coveralls.io/r/famished-tiger/Rley?branch=master)
6
6
  [![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
7
- [![Dependency Status](https://gemnasium.com/famished-tiger/Rley.svg)](https://gemnasium.com/famished-tiger/Rley)
8
7
  [![Inline docs](http://inch-ci.org/github/famished-tiger/Rley.svg?branch=master)](http://inch-ci.org/github/famished-tiger/Rley)
9
8
  [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/famished-tiger/SRL-Ruby/blob/master/LICENSE.txt)
10
9
 
@@ -2,16 +2,18 @@ version: '{build}'
2
2
  max_jobs: 3
3
3
  environment:
4
4
  matrix:
5
- - Ruby_version: 200
6
- - Ruby_version: 200-x64
7
- - Ruby_version: 21
8
- - Ruby_version: 21-x64
9
- - Ruby_version: 22
10
- - Ruby_version: 22-x64
11
- - Ruby_version: 23
5
+ #- Ruby_version: 25-x64
6
+ - Ruby_version: 24-x64
12
7
  - Ruby_version: 23-x64
8
+ - Ruby_version: 22-x64
9
+ - Ruby_version: 21-x64
10
+ - Ruby_version: 200-x64
11
+ #- Ruby_version: 25
13
12
  - Ruby_version: 24
14
- - Ruby_version: 24-x64
13
+ - Ruby_version: 23
14
+ - Ruby_version: 22
15
+ - Ruby_version: 21
16
+ - Ruby_version: 200
15
17
 
16
18
  install:
17
19
  - set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
@@ -5,7 +5,7 @@ require 'rley' # Load Rley library
5
5
 
6
6
  ########################################
7
7
  # Step 0. Instantiate facade object of Rley library.
8
- # It provides a unified, higher-level interface
8
+ # It provides a unified, higher-level interface
9
9
  engine = Rley::Engine.new
10
10
 
11
11
  ########################################
@@ -67,8 +67,9 @@ def tokenizer(aTextToParse)
67
67
  tokens = aTextToParse.scan(/\S+/).map do |word|
68
68
  term_name = Lexicon[word]
69
69
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
70
+
70
71
  pos = Rley::Lexical::Position.new(1, offset + 1)
71
- offset += word.length
72
+ offset += word.length
72
73
  Rley::Lexical::Token.new(word, term_name, pos)
73
74
  end
74
75
 
@@ -2,12 +2,13 @@ require 'rley'
2
2
  require 'engtagger' # Load POS (Part-Of-Speech) tagger EngTagger
3
3
 
4
4
  # REGEX to remove XML tags from Engtagger output
5
- GET_TAG = /<(.+?)>(.*?)<.+?>/
5
+ GET_TAG = /<(.+?)>(.*?)<.+?>/.freeze
6
6
 
7
7
  # Text tokenizer
8
8
  # Taken directly from Engtagger, will ensure uniform indexing while parsing
9
9
  def clean_text(text)
10
10
  return false unless valid_text(text)
11
+
11
12
  text = text.toutf8
12
13
  cleaned_text = text
13
14
  tokenized = []
@@ -48,13 +49,14 @@ def split_sentences(array)
48
49
  va wash wis wisc wy wyo usafa alta man ont que sask yuk]
49
50
  month = %w[jan feb mar apr may jun jul aug sep sept oct nov dec]
50
51
  misc = %w[vs etc no esp]
51
- abbr = Hash.new
52
+ abbr = {}
52
53
  [people, army, inst, place, comp, state, month, misc].flatten.each do |i|
53
54
  abbr[i] = true
54
55
  end
55
- words = Array.new
56
+ words = []
56
57
  tokenized.each_with_index do |_t, i|
57
- if tokenized[i + 1] && tokenized [i + 1] =~ /[A-Z\W]/ && tokenized[i] =~ /\A(.+)\.\z/
58
+ if tokenized[i + 1] &&
59
+ tokenized [i + 1] =~ /[A-Z\W]/ && tokenized[i] =~ /\A(.+)\.\z/
58
60
  w = $1
59
61
  # Don't separate the period off words that
60
62
  # meet any of the following conditions:
@@ -62,8 +64,9 @@ def split_sentences(array)
62
64
  # 1. It is defined in one of the lists above
63
65
  # 2. It is only one letter long: Alfred E. Sloan
64
66
  # 3. It has a repeating letter-dot: U.S.A. or J.C. Penney
65
- unless abbr[w.downcase] || w =~ /\A[a-z]\z/i || w =~ /[a-z](?:\.[a-z])+\z/i
66
- words << w
67
+ unless abbr[w.downcase] ||
68
+ w =~ /\A[a-z]\z/i || w =~ /[a-z](?:\.[a-z])+\z/i
69
+ words << w
67
70
  words << '.'
68
71
  next
69
72
  end
@@ -83,15 +86,20 @@ end
83
86
  def split_punct(text)
84
87
  # If there's no punctuation, return immediately
85
88
  return [text] if /\A\w+\z/ =~ text
89
+
86
90
  # Sanity checks
87
91
  text = text.gsub(/\W{10,}/o, ' ')
88
92
 
89
93
  # Put quotes into a standard format
90
94
  text = text.gsub(/`(?!`)(?=.*\w)/o, '` ') # Shift left quotes off text
91
95
  text = text.gsub(/"(?=.*\w)/o, ' `` ') # Convert left quotes to ``
92
- text = text.gsub(/(\W|^)'(?=.*\w)/o) { $1 ? $1 + ' ` ' : ' ` ' } # Convert left quote to `
96
+
97
+ # Convert left quote to `
98
+ text = text.gsub(/(\W|^)'(?=.*\w)/o) { $1 ? $1 + ' ` ' : ' ` ' }
93
99
  text = text.gsub(/"/, " '' ") # Convert (remaining) quotes to ''
94
- text = text.gsub(/(\w)'(?!')(?=\W|$)/o, "\\1 ' ") # Separate right single quotes
100
+
101
+ # Separate right single quotes
102
+ text = text.gsub(/(\w)'(?!')(?=\W|$)/o, "\\1 ' ")
95
103
 
96
104
  # Handle all other punctuation
97
105
  text = text.gsub(/--+/o, ' - ') # Convert and separate dashes
@@ -99,10 +107,13 @@ def split_punct(text)
99
107
  text = text.gsub(/:/o, ' :') # Shift semicolon off
100
108
  text = text.gsub(/(\.\.\.+)/o, ' \1 ') # Shift ellipses off
101
109
  text = text.gsub(/([\(\[\{\}\]\)])/o, ' \1 ') # Shift off brackets
102
- text = text.gsub(/([\!\?#\$%;~|])/o, ' \1 ') # Shift off other ``standard'' punctuation
110
+
111
+ # Shift off other ``standard'' punctuation
112
+ text = text.gsub(/([\!\?#\$%;~|])/o, ' \1 ')
103
113
 
104
114
  # English-specific contractions
105
- text = text.gsub(/([A-Za-z])'([dms])\b/o, "\\1 '\\2") # Separate off 'd 'm 's
115
+ # Separate off 'd 'm 's
116
+ text = text.gsub(/([A-Za-z])'([dms])\b/o, "\\1 '\\2")
106
117
  text = text.gsub(/n't\b/o, " n't") # Separate off n't
107
118
  text = text.gsub(/'(ve|ll|re)\b/o, " '\\1") # Separate off 've, 'll, 're
108
119
  result = text.split(' ')
@@ -139,7 +150,7 @@ tgr = EngTagger.new
139
150
  tagged = tgr.add_tags(text)
140
151
 
141
152
  # Generte tokenied lexicon of input text
142
- # Instead of creating a lexicon dictionary,
153
+ # Instead of creating a lexicon dictionary,
143
154
  # we would simply generate one each time on the fly for the current text only.
144
155
  lexicon = clean_text(text)
145
156
 
@@ -153,7 +164,7 @@ def tokenizer(lexicon, tokens)
153
164
  term_name = tokens[i].last
154
165
  rank = Rley::Lexical::Position.new(1, pos + 1)
155
166
  pos += word.length + 1 # Assuming one space between words.
156
- rley_tokens << Rley::Lexical::Token.new(word, term_name, pos)
167
+ rley_tokens << Rley::Lexical::Token.new(word, term_name, rank)
157
168
  end
158
169
  return rley_tokens
159
170
  end
@@ -68,10 +68,10 @@ Lexicon = {
68
68
  # Step 4. Creating a tokenizer
69
69
  # A tokenizer reads the input string and converts it into a sequence of tokens
70
70
  # Highly simplified tokenizer implementation.
71
- def tokenizer(aTextToParse)
71
+ def tokenizer(aTextToParse)
72
72
  scanner = StringScanner.new(aTextToParse)
73
73
  tokens = []
74
-
74
+
75
75
  loop do
76
76
  scanner.skip(/\s+/)
77
77
  curr_pos = scanner.pos
@@ -80,11 +80,12 @@ def tokenizer(aTextToParse)
80
80
 
81
81
  term_name = Lexicon[word]
82
82
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
83
+
83
84
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
84
85
  tokens << Rley::Lexical::Token.new(word, term_name, pos)
85
86
  end
86
87
 
87
- return tokens
88
+ return tokens
88
89
  end
89
90
 
90
91
 
@@ -64,7 +64,7 @@ Lexicon = {
64
64
  def tokenizer(aTextToParse)
65
65
  scanner = StringScanner.new(aTextToParse)
66
66
  tokens = []
67
-
67
+
68
68
  loop do
69
69
  scanner.skip(/\s+/)
70
70
  curr_pos = scanner.pos
@@ -73,6 +73,7 @@ def tokenizer(aTextToParse)
73
73
 
74
74
  term_name = Lexicon[word]
75
75
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
76
+
76
77
  pos = Rley::Lexical::Position.new(1, curr_pos + 1)
77
78
  tokens << Rley::Lexical::Token.new(word, term_name, pos)
78
79
  end
@@ -94,7 +95,7 @@ unless result.success?
94
95
  puts result.failure_reason.message
95
96
  exit(1)
96
97
  end
97
-
98
+
98
99
  ########################################
99
100
  # Step 6. Generating a parse tree from parse result
100
101
  ptree = engine.to_ptree(result)
@@ -29,6 +29,7 @@ JSONTerminalNode = Struct.new(:token, :value, :position) do
29
29
  end
30
30
 
31
31
  def done!
32
+ # Do nothing
32
33
  end
33
34
  end
34
35
 
@@ -76,6 +77,7 @@ class JSONCompositeNode
76
77
  end
77
78
 
78
79
  def done!
80
+ # Do nothing
79
81
  end
80
82
 
81
83
  alias subnodes children
@@ -123,6 +125,7 @@ class JSONPair
123
125
  end
124
126
 
125
127
  def done!
128
+ # Do nothing
126
129
  end
127
130
 
128
131
  def to_ruby