open_nlp 0.2.0-java → 0.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: b3d6968e3686d75cbe3875f8381249b6ff289b94
4
- data.tar.gz: eadddd4c5e86d4b3654c6e7c83dd59823e5bb6e2
2
+ SHA256:
3
+ metadata.gz: 611fa39e5357a43ac259dc113b299aede41e5379a1912dcd0bb32120da05356d
4
+ data.tar.gz: 0ef94231098429a66a11f8d3ab5dc5708156fcc425caa5ab4e29278f5233ab1f
5
5
  SHA512:
6
- metadata.gz: 424960c23f13b6e9e6f3c85ab137b341075ba118b42b4009e3fb26cb37d62fb9bf4b117405bfd3382cb71a3785da891bc9f0d4fba2aede99b3e7c6ed0cffa5b4
7
- data.tar.gz: b047c3e19f850bc46692a3ad58ac8bf7282e9393388f19601571a67e05f2bd4d078aa51d3536ea3f9554dce9a4be498329eb4f586dae1ddc23d026ccdbb29438
6
+ metadata.gz: 6e5fc1bbbd6059d818bbe3abc9408f497b845dd6e4a314b19a9d47a8e85a3435cdbe8d6c9fd2610f0a643a26974d4b20a0fc995c6660549149f9acaa688d3f52
7
+ data.tar.gz: c416e14a29512f1935a00289625ea162fac34f8dad4d320a8c8cf940253a6f9ac5c845e07bda12deca699ec5da809bbc733d9e392795e2895faa78e0d826a76c
data/.gitignore CHANGED
@@ -11,6 +11,7 @@ lib/bundler/man
11
11
  pkg
12
12
  rdoc
13
13
  spec/reports
14
+ spec/examples.txt
14
15
  test/tmp
15
16
  test/version_tmp
16
17
  tmp
data/.rspec CHANGED
@@ -1 +1,2 @@
1
1
  --colour
2
+ --require spec_helper
@@ -0,0 +1,24 @@
1
+ Metrics/AbcSize:
2
+ Max: 41
3
+
4
+ Metrics/BlockLength:
5
+ Exclude:
6
+ - spec/**/*.rb
7
+
8
+ Metrics/LineLength:
9
+ Max: 153
10
+
11
+ Metrics/MethodLength:
12
+ Max: 15
13
+
14
+ Metrics/ParameterLists:
15
+ Max: 6
16
+
17
+ Style/ClassAndModuleChildren:
18
+ Enabled: false
19
+
20
+ Style/ColonMethodCall:
21
+ Enabled: false
22
+
23
+ Style/Documentation:
24
+ Enabled: false
@@ -1 +1 @@
1
- jruby-9.0.5.0
1
+ jruby-9.2.4.0
@@ -1,6 +1,4 @@
1
1
  language: ruby
2
2
  rvm:
3
- - jruby-19mode
4
- - jruby-1.7.20
5
- - jruby-9.0.5.0
3
+ - jruby-9.2.4.0
6
4
  script: JRUBY_OPTS=-J-Xmx768m bundle exec rspec spec
data/Gemfile CHANGED
@@ -5,4 +5,5 @@ gemspec
5
5
 
6
6
  group :test do
7
7
  gem 'rspec'
8
- end
8
+ gem 'rubocop'
9
+ end
@@ -1,24 +1,42 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
+ open_nlp (0.3.0-java)
4
5
 
5
6
  GEM
6
7
  remote: https://rubygems.org/
7
8
  specs:
8
- diff-lcs (1.2.5)
9
- rspec (3.4.0)
10
- rspec-core (~> 3.4.0)
11
- rspec-expectations (~> 3.4.0)
12
- rspec-mocks (~> 3.4.0)
13
- rspec-core (3.4.4)
14
- rspec-support (~> 3.4.0)
15
- rspec-expectations (3.4.0)
9
+ ast (2.4.0)
10
+ diff-lcs (1.3)
11
+ jaro_winkler (1.5.1-java)
12
+ parallel (1.12.1)
13
+ parser (2.5.3.0)
14
+ ast (~> 2.4.0)
15
+ powerpack (0.1.2)
16
+ rainbow (3.0.0)
17
+ rspec (3.8.0)
18
+ rspec-core (~> 3.8.0)
19
+ rspec-expectations (~> 3.8.0)
20
+ rspec-mocks (~> 3.8.0)
21
+ rspec-core (3.8.0)
22
+ rspec-support (~> 3.8.0)
23
+ rspec-expectations (3.8.2)
16
24
  diff-lcs (>= 1.2.0, < 2.0)
17
- rspec-support (~> 3.4.0)
18
- rspec-mocks (3.4.1)
25
+ rspec-support (~> 3.8.0)
26
+ rspec-mocks (3.8.0)
19
27
  diff-lcs (>= 1.2.0, < 2.0)
20
- rspec-support (~> 3.4.0)
21
- rspec-support (3.4.1)
28
+ rspec-support (~> 3.8.0)
29
+ rspec-support (3.8.0)
30
+ rubocop (0.60.0)
31
+ jaro_winkler (~> 1.5.1)
32
+ parallel (~> 1.10)
33
+ parser (>= 2.5, != 2.5.1.1)
34
+ powerpack (~> 0.1)
35
+ rainbow (>= 2.2.2, < 4.0)
36
+ ruby-progressbar (~> 1.7)
37
+ unicode-display_width (~> 1.4.0)
38
+ ruby-progressbar (1.10.0)
39
+ unicode-display_width (1.4.0)
22
40
 
23
41
  PLATFORMS
24
42
  java
@@ -26,6 +44,7 @@ PLATFORMS
26
44
  DEPENDENCIES
27
45
  open_nlp!
28
46
  rspec
47
+ rubocop
29
48
 
30
49
  BUNDLED WITH
31
- 1.11.2
50
+ 1.17.1
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
@@ -7,10 +7,10 @@ module OpenNlp
7
7
  # @param [String] str string to be categorized
8
8
  # @return [String] category
9
9
  def categorize(str)
10
- fail ArgumentError, 'str param must be a String' unless str.is_a?(String)
10
+ raise ArgumentError, 'str param must be a String' unless str.is_a?(String)
11
11
 
12
12
  outcomes = j_instance.categorize(str)
13
13
  j_instance.getBestCategory(outcomes)
14
14
  end
15
15
  end
16
- end
16
+ end
@@ -2,16 +2,19 @@ module OpenNlp
2
2
  class Chunker < Tool
3
3
  self.java_class = Java::opennlp.tools.chunker.ChunkerME
4
4
 
5
+ # Initializes new instance of Chunker
6
+ #
7
+ # @param [OpenNlp::Model] model chunker model
8
+ # @param [Model::Tokenizer] token_model tokenizer model
9
+ # @param [Model::POSTagger] pos_model part-of-speech tagging model
5
10
  def initialize(model, token_model, pos_model)
6
11
  super(model)
7
12
 
8
- unless token_model.is_a?(Model::Tokenizer)
9
- fail ArgumentError, 'token model must be an OpenNlp::Tokenizer::Model'
10
- end
13
+ token_model.is_a?(Model::Tokenizer) ||
14
+ raise(ArgumentError, 'token model must be an OpenNlp::Tokenizer::Model')
11
15
 
12
- unless pos_model.is_a?(Model::POSTagger)
13
- fail ArgumentError, 'pos model must be an OpenNlp::POSTagger::Model'
14
- end
16
+ pos_model.is_a?(Model::POSTagger) ||
17
+ raise(ArgumentError, 'pos model must be an OpenNlp::POSTagger::Model')
15
18
 
16
19
  @tokenizer = Tokenizer.new(token_model)
17
20
  @pos_tagger = POSTagger.new(pos_model)
@@ -22,7 +25,7 @@ module OpenNlp
22
25
  # @param [String] str string to chunk
23
26
  # @return [Array] array of chunks with part-of-sentence information
24
27
  def chunk(str)
25
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
28
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
26
29
 
27
30
  tokens = tokenizer.tokenize(str)
28
31
  pos_tags = pos_tagger.tag(tokens).to_ary
@@ -39,24 +42,22 @@ module OpenNlp
39
42
  def build_chunks(chunks, tokens, pos_tags)
40
43
  data = tokens.zip(pos_tags, chunks)
41
44
 
42
- data.inject([]) do |acc, val|
45
+ data.each_with_object([]) do |val, acc|
43
46
  chunk = val[2]
44
- acc << [{val[0] => val[1]}] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
45
-
46
- if chunk[0] == 'I'
47
- if acc.last
48
- acc.last << {val[0] => val[1]} # add token to chunk if it is a continuation of chunk
49
- else
50
- acc << [{val[0] => val[1]}] # add token to new chunk if no chunks exists
51
- end
52
- end
47
+ acc << [{ val[0] => val[1] }] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
48
+
49
+ next if chunk[0] != 'I'
53
50
 
54
- acc
51
+ if acc.last
52
+ acc.last << { val[0] => val[1] } # add token to chunk if it is a continuation of chunk
53
+ else
54
+ acc << [{ val[0] => val[1] }] # add token to new chunk if no chunks exists
55
+ end
55
56
  end
56
57
  end
57
58
 
58
- def get_last_probabilities
59
+ def last_probabilities
59
60
  j_instance.probs.to_ary
60
61
  end
61
62
  end
62
- end
63
+ end
@@ -5,13 +5,7 @@ module OpenNlp
5
5
  end
6
6
 
7
7
  module ClassMethods
8
- def java_class=(value)
9
- @java_class = value
10
- end
11
-
12
- def java_class
13
- @java_class
14
- end
8
+ attr_accessor :java_class
15
9
  end
16
10
  end
17
- end
11
+ end
@@ -4,6 +4,9 @@ module OpenNlp
4
4
 
5
5
  attr_reader :j_model
6
6
 
7
+ # Initializes new instance of Model
8
+ #
9
+ # @param [String, java.io.FileInputStream] model
7
10
  def initialize(model)
8
11
  @j_model = self.class.java_class.new(model_stream(model))
9
12
  end
@@ -17,7 +20,7 @@ module OpenNlp
17
20
  when String
18
21
  java.io.FileInputStream.new(model)
19
22
  else
20
- fail ArgumentError, 'Model must be either a string or a java.io.FileInputStream'
23
+ raise ArgumentError, 'Model must be either a string or a java.io.FileInputStream'
21
24
  end
22
25
  end
23
26
  end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Categorizer < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.doccat.DoccatModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Chunker < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.chunker.ChunkerModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Detokenizer < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.tokenize.DetokenizationDictionary
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Parser < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.parser.ParserModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::POSTagger < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.postag.POSModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::SentenceDetector < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.sentdetect.SentenceModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Tokenizer < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.tokenize.TokenizerModel
3
- end
3
+ end
@@ -7,7 +7,8 @@ module OpenNlp
7
7
  # @param [Array<String>] tokens tokens to run name detection on
8
8
  # @return [Array<Java::opennlp.tools.util.Span>] names detected
9
9
  def detect(tokens)
10
- fail ArgumentError, 'tokens must be an instance of Array' unless tokens.is_a?(Array)
10
+ raise ArgumentError, 'tokens must be an instance of Array' unless tokens.is_a?(Array)
11
+
11
12
  j_instance.find(tokens.to_java(:String)).to_ary
12
13
  end
13
14
  end
@@ -1,13 +1,15 @@
1
1
  module OpenNlp
2
2
  class Parser < Tool
3
+ # Initializes new instance of Parser
4
+ #
5
+ # @param [OpenNlp::Model::Parser] parser_model
6
+ # @param [OpenNlp::Model::Tokenizer] token_model
3
7
  def initialize(parser_model, token_model)
4
- unless parser_model.is_a?(OpenNlp::Model)
5
- fail ArgumentError, 'parser_model must be an OpenNlp::Model'
6
- end
8
+ parser_model.is_a?(OpenNlp::Model::Parser) ||
9
+ raise(ArgumentError, 'parser_model must be an OpenNlp::Model')
7
10
 
8
- unless token_model.is_a?(Model::Tokenizer)
9
- fail ArgumentError, 'token_model must be an OpenNlp::Tokenizer::Model'
10
- end
11
+ token_model.is_a?(Model::Tokenizer) ||
12
+ raise(ArgumentError, 'token_model must be an OpenNlp::Tokenizer::Model')
11
13
 
12
14
  @j_instance = Java::opennlp.tools.parser.ParserFactory.create(parser_model.j_model)
13
15
  @tokenizer = Tokenizer.new(token_model)
@@ -19,6 +21,7 @@ module OpenNlp
19
21
  # @return [OpenNlp::Parser::Parse]
20
22
  def parse(text)
21
23
  raise ArgumentError, 'passed text must be a String' unless text.is_a?(String)
24
+
22
25
  text.empty? ? {} : parse_tokens(tokenizer.tokenize(text), text)
23
26
  end
24
27
 
@@ -27,16 +30,14 @@ module OpenNlp
27
30
  attr_reader :tokenizer
28
31
 
29
32
  def get_token_offset(text, tokens, index)
30
- offset = 0
31
- return offset unless index > 0
33
+ return 0 if index.zero?
32
34
 
33
- for i in (1..index) do
34
- offset = text.index tokens[i], offset + tokens[i - 1].size
35
+ (1..index).inject(0) do |offset, i|
36
+ text.index(tokens[i], offset + tokens[i - 1].size)
35
37
  end
36
- offset
37
38
  end
38
39
 
39
- def build_parse_obj(text, span_start, span_end, type=Java::opennlp.tools.parser.AbstractBottomUpParser::INC_NODE, probability=1, token_index=0)
40
+ def build_parse_obj(text, span_start, span_end, type = Java::opennlp.tools.parser.AbstractBottomUpParser::INC_NODE, probability = 1, token_index = 0)
40
41
  Java::opennlp.tools.parser.Parse.new(
41
42
  text.to_java(:String),
42
43
  Java::opennlp.tools.util.Span.new(span_start, span_end),
@@ -6,36 +6,57 @@ module OpenNlp
6
6
 
7
7
  self.java_class = Java::opennlp.tools.parser.Parse
8
8
 
9
+ # Initializes instance of Parser::Parse
10
+ #
11
+ # @param [Java::opennlp.tools.parser.Parse] java_instance
9
12
  def initialize(java_instance)
10
- raise ArgumentError, "java_instance must be an instance of #{self.class.java_class.name}" unless java_instance.is_a?(self.class.java_class)
13
+ java_instance.is_a?(self.class.java_class) ||
14
+ raise(ArgumentError, "java_instance must be an instance of #{self.class.java_class.name}")
11
15
 
12
16
  @j_instance = java_instance
13
17
  end
14
18
 
19
+ # Composes tree bank string, nested string representation of sentence parts, parts-of-speech and words,
20
+ # for example:
21
+ # '(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))'
22
+ #
23
+ # @return [String]
15
24
  def tree_bank_string
16
- span, text, type, res = j_instance.getSpan, j_instance.getText, j_instance.getType, ''
17
- start = span.getStart
25
+ span = j_instance.getSpan
26
+ text = j_instance.getText
27
+ type = j_instance.getType
28
+ res = ''
29
+ start = span.getStart
18
30
 
19
31
  res << "(#{type} " if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
20
32
 
21
33
  j_instance.getChildren.each do |child|
22
34
  child_span = child.span
23
- res << text[start..child_span.getStart-1] if start < child_span.getStart
35
+ res << text[start..child_span.getStart - 1] if start < child_span.getStart
24
36
  res << self.class.new(child).tree_bank_string
25
37
  start = child_span.getEnd
26
38
  end
27
39
 
28
- res << text[start..span.getEnd-1] if start < span.getEnd
29
- res << ")" if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
40
+ res << text[start..span.getEnd - 1] if start < span.getEnd
41
+ res << ')' if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
30
42
 
31
43
  res
32
44
  end
33
45
 
46
+ # Composes array representation of sentence tree where
47
+ # each hash has following fields:
48
+ #
49
+ # :type => <[String] node type>,
50
+ # :parent_type => <[String] type of parent node>,
51
+ # :token => <[String] current token>,
52
+ # :children => <Array[Hash] array of child nodes hashes>
53
+ #
54
+ # @return [Array<Hash>]
34
55
  def code_tree
35
56
  kids = j_instance.getChildren
36
57
 
37
58
  kids.each_with_object([]) do |kid, acc|
38
- data = { :type => kid.getType, :parent_type => self.j_instance.getType, :token => kid.toString }
59
+ data = { type: kid.getType, parent_type: j_instance.getType, token: kid.toString }
39
60
  subtree = self.class.new(kid).code_tree
40
61
  data[:children] = subtree unless subtree.empty?
41
62
  acc << data
@@ -2,10 +2,13 @@ module OpenNlp
2
2
  class POSTagger < Tool
3
3
  self.java_class = Java::opennlp.tools.postag.POSTaggerME
4
4
 
5
+ # Adds tags to tokens passed as argument
6
+ #
7
+ # @param [Array<String>, String] tokens tokens to tag
8
+ # @return [Array<String>, String] array of part-of-speech tags or string with added part-of-speech tags
5
9
  def tag(tokens)
6
- unless (tokens.is_a?(Array) || tokens.is_a?(String))
7
- fail ArgumentError, 'tokens must be an instance of String or Array'
8
- end
10
+ !tokens.is_a?(Array) && !tokens.is_a?(String) &&
11
+ raise(ArgumentError, 'tokens must be an instance of String or Array')
9
12
 
10
13
  j_instance.tag(tokens.to_java(:String))
11
14
  end
@@ -7,7 +7,8 @@ module OpenNlp
7
7
  # @param [String] string string to detect sentences in
8
8
  # @return [Array<String>] array of detected sentences
9
9
  def detect(str)
10
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
10
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
11
+
11
12
  j_instance.sentDetect(str).to_ary
12
13
  end
13
14
 
@@ -16,10 +17,11 @@ module OpenNlp
16
17
  # @param [String] str
17
18
  # @return [Array<OpenNlp::Util::Span>] array of spans for detected sentences
18
19
  def pos_detect(str)
19
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
20
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
21
+
20
22
  j_instance.sentPosDetect(str).map do |span|
21
23
  OpenNlp::Util::Span.new(span.getStart, span.getEnd)
22
24
  end
23
25
  end
24
26
  end
25
- end
27
+ end
@@ -7,13 +7,14 @@ module OpenNlp
7
7
  # @param [String] str string to tokenize
8
8
  # @return [Array] array of string tokens
9
9
  def tokenize(str)
10
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
10
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
11
+
11
12
  j_instance.tokenize(str).to_ary
12
13
  end
13
14
 
14
15
  private
15
16
 
16
- def get_last_probabilities
17
+ def last_probabilities
17
18
  j_instance.getTokenProbabilities.to_ary
18
19
  end
19
20
  end
@@ -4,9 +4,13 @@ module OpenNlp
4
4
 
5
5
  attr_reader :j_instance
6
6
 
7
+ # Initializes instance of Tool
8
+ #
9
+ # @param [OpenNlp::Model] model instance of model class to initialize a tool object
7
10
  def initialize(model)
8
- fail ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
11
+ raise ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
12
+
9
13
  @j_instance = self.class.java_class.new(model.j_model)
10
14
  end
11
15
  end
12
- end
16
+ end
@@ -5,34 +5,50 @@ class OpenNlp::Util::Span
5
5
 
6
6
  attr_reader :j_instance
7
7
 
8
- def initialize(s, e)
9
- fail ArgumentError, 's should be an integer' unless s.is_a?(Fixnum)
10
- fail ArgumentError, 'e should be an integer' unless e.is_a?(Fixnum)
11
-
12
- @j_instance = self.class.java_class.new(s, e)
8
+ # Initializes new instance of Util::Span
9
+ #
10
+ # @param [Integer] start start index of the span
11
+ # @param [Integer] end end index of the span
12
+ def initialize(start_pos, end_pos)
13
+ raise ArgumentError, 'start should be an integer' unless start_pos.is_a?(Integer)
14
+ raise ArgumentError, 'end should be an integer' unless end_pos.is_a?(Integer)
15
+
16
+ @j_instance = self.class.java_class.new(start_pos, end_pos)
13
17
  end
14
18
 
19
+ # Returns end index of the span
20
+ #
21
+ # @return [Integer]
15
22
  def start
16
23
  j_instance.getStart
17
24
  end
18
25
 
26
+ # Returns end index of the span
27
+ #
28
+ # @return [Integer]
19
29
  def end
20
30
  j_instance.getEnd
21
31
  end
22
32
 
33
+ # Returns type of the span
34
+ #
35
+ # @return [String]
23
36
  def type
24
37
  j_instance.getType
25
38
  end
26
39
 
40
+ # Returns length of the span
41
+ #
42
+ # @return [Integer]
27
43
  def length
28
44
  j_instance.length
29
45
  end
30
46
 
31
- def ==(obj)
32
- return false unless obj.is_a?(self.class)
47
+ def ==(other)
48
+ return false unless other.is_a?(self.class)
33
49
 
34
- [:start, :end, :type].each_with_object(true) do |method, acc|
35
- acc = acc && self.public_send(method) == obj.public_send(method)
50
+ %i[start end type].inject(true) do |acc, method|
51
+ acc && public_send(method) == other.public_send(method)
36
52
  end
37
53
  end
38
54
  end
@@ -1,3 +1,3 @@
1
1
  module OpenNlp
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'.freeze
3
3
  end
@@ -1,20 +1,19 @@
1
- # -*- encoding: utf-8 -*-
2
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'open_nlp/version'
5
4
 
6
5
  Gem::Specification.new do |gem|
7
- gem.name = "open_nlp"
6
+ gem.name = 'open_nlp'
8
7
  gem.version = OpenNlp::VERSION
9
- gem.authors = ["Hck"]
10
- gem.description = %q{JRuby tools wrapper for Apache OpenNLP}
11
- gem.summary = %q{A JRuby wrapper for the Apache OpenNLP tools library}
12
- gem.homepage = "http://github.com/hck/open_nlp"
8
+ gem.authors = ['Hck']
9
+ gem.description = 'JRuby tools wrapper for Apache OpenNLP'
10
+ gem.summary = 'A JRuby wrapper for the Apache OpenNLP tools library'
11
+ gem.homepage = 'http://github.com/hck/open_nlp'
13
12
 
14
- gem.files = `git ls-files`.split($/)
15
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
16
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
- gem.require_paths = ["lib"]
16
+ gem.require_paths = ['lib']
18
17
 
19
- gem.platform = "java"
18
+ gem.platform = 'java'
20
19
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::Tokenizer do
3
+ RSpec.describe OpenNlp::Model::Tokenizer do
4
4
  let(:model_file_name) { File.join(FIXTURES_DIR, 'en-token.bin') }
5
5
 
6
6
  it 'accept a string filename parameter' do
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  RSpec.describe OpenNlp::NamedEntityDetector do
4
4
  let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, 'en-ner-time.bin')) }
5
5
  let(:ne_detector) { described_class.new(model) }
6
-
6
+
7
7
  describe 'initialization' do
8
8
  it 'initializes with a valid model' do
9
9
  expect(ne_detector.j_instance).to be_a(described_class.java_class)
@@ -40,66 +40,66 @@ RSpec.describe OpenNlp::Parser::Parse do
40
40
  let(:expected_code_tree) do
41
41
  [
42
42
  {
43
- :type => 'S',
44
- :parent_type => 'TOP',
45
- :token => 'The red fox sleeps soundly .',
46
- :children => [
43
+ type: 'S',
44
+ parent_type: 'TOP',
45
+ token: 'The red fox sleeps soundly .',
46
+ children: [
47
47
  {
48
- :type => 'NP',
49
- :parent_type => 'S',
50
- :token => 'The red fox',
51
- :children => [
48
+ type: 'NP',
49
+ parent_type: 'S',
50
+ token: 'The red fox',
51
+ children: [
52
52
  {
53
- :type => 'DT',
54
- :parent_type => 'NP',
55
- :token => 'The',
56
- :children => [{:type => 'TK', :parent_type => 'DT', :token => 'The'}]
53
+ type: 'DT',
54
+ parent_type: 'NP',
55
+ token: 'The',
56
+ children: [{ type: 'TK', parent_type: 'DT', token: 'The' }]
57
57
  },
58
58
  {
59
- :type => 'JJ',
60
- :parent_type => 'NP',
61
- :token => 'red',
62
- :children => [{:type => 'TK', :parent_type => 'JJ', :token => 'red'}]
59
+ type: 'JJ',
60
+ parent_type: 'NP',
61
+ token: 'red',
62
+ children: [{ type: 'TK', parent_type: 'JJ', token: 'red' }]
63
63
  },
64
64
  {
65
- :type => 'NN',
66
- :parent_type => 'NP',
67
- :token => 'fox',
68
- :children => [{:type => 'TK', :parent_type => 'NN', :token => 'fox'}]
65
+ type: 'NN',
66
+ parent_type: 'NP',
67
+ token: 'fox',
68
+ children: [{ type: 'TK', parent_type: 'NN', token: 'fox' }]
69
69
  }
70
70
  ]
71
71
  },
72
72
  {
73
- :type => 'VP',
74
- :parent_type => 'S',
75
- :token => 'sleeps soundly',
76
- :children => [
73
+ type: 'VP',
74
+ parent_type: 'S',
75
+ token: 'sleeps soundly',
76
+ children: [
77
77
  {
78
- :type => 'VBZ',
79
- :parent_type => 'VP',
80
- :token => 'sleeps',
81
- :children => [{:type => 'TK', :parent_type => 'VBZ', :token => 'sleeps'}]
78
+ type: 'VBZ',
79
+ parent_type: 'VP',
80
+ token: 'sleeps',
81
+ children: [{ type: 'TK', parent_type: 'VBZ', token: 'sleeps' }]
82
82
  },
83
83
  {
84
- :type => 'ADVP',
85
- :parent_type => 'VP',
86
- :token => 'soundly',
87
- :children => [
84
+ type: 'ADVP',
85
+ parent_type: 'VP',
86
+ token: 'soundly',
87
+ children: [
88
88
  {
89
- :type => 'RB',
90
- :parent_type => 'ADVP',
91
- :token => 'soundly',
92
- :children => [{:type => 'TK', :parent_type => 'RB', :token => 'soundly'}]
89
+ type: 'RB',
90
+ parent_type: 'ADVP',
91
+ token: 'soundly',
92
+ children: [{ type: 'TK', parent_type: 'RB', token: 'soundly' }]
93
93
  }
94
94
  ]
95
95
  }
96
96
  ]
97
97
  },
98
98
  {
99
- :type => '.',
100
- :parent_type => 'S',
101
- :token => '.',
102
- :children => [{:type => 'TK', :parent_type => '.', :token => '.'}]
99
+ type: '.',
100
+ parent_type: 'S',
101
+ token: '.',
102
+ children: [{ type: 'TK', parent_type: '.', token: '.' }]
103
103
  }
104
104
  ]
105
105
  }
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe OpenNlp::Parser do
4
- let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, "en-parser-chunking.bin")) }
5
- let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
4
+ let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, 'en-parser-chunking.bin')) }
5
+ let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
6
6
  let(:parser) { described_class.new(model, token_model) }
7
7
 
8
8
  describe 'initialization' do
@@ -21,8 +21,8 @@ RSpec.describe OpenNlp::POSTagger do
21
21
  end
22
22
 
23
23
  it 'tags provided tokens' do
24
- tagged = pos_tagger.tag(%w(The quick brown fox jumps over the lazy dog .))
25
- expect(tagged.to_ary).to eq(%w(DT JJ JJ NN NNS IN DT JJ NN .))
24
+ tagged = pos_tagger.tag(%w[The quick brown fox jumps over the lazy dog .])
25
+ expect(tagged.to_ary).to eq(%w[DT JJ JJ NN NNS IN DT JJ NN .])
26
26
  end
27
27
 
28
28
  it 'raises an ArgumentError when nil is passed as an argument' do
@@ -3,3 +3,28 @@ require 'java'
3
3
  require 'open_nlp'
4
4
 
5
5
  FIXTURES_DIR = File.join(File.dirname(__FILE__), 'fixtures')
6
+
7
+ RSpec.configure do |config|
8
+ config.expect_with :rspec do |expectations|
9
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
10
+ end
11
+
12
+ config.mock_with :rspec do |mocks|
13
+ mocks.verify_partial_doubles = true
14
+ end
15
+
16
+ config.filter_run :focus
17
+ config.run_all_when_everything_filtered = true
18
+
19
+ config.example_status_persistence_file_path = 'spec/examples.txt'
20
+
21
+ config.disable_monkey_patching!
22
+
23
+ config.warnings = true
24
+
25
+ config.profile_examples = 10
26
+
27
+ config.order = :random
28
+
29
+ Kernel.srand config.seed
30
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: open_nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: java
6
6
  authors:
7
7
  - Hck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-07 00:00:00.000000000 Z
11
+ date: 2018-11-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: JRuby tools wrapper for Apache OpenNLP
14
14
  email:
@@ -18,6 +18,7 @@ extra_rdoc_files: []
18
18
  files:
19
19
  - ".gitignore"
20
20
  - ".rspec"
21
+ - ".rubocop.yml"
21
22
  - ".ruby-version"
22
23
  - ".travis.yml"
23
24
  - Gemfile
@@ -92,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
93
  version: '0'
93
94
  requirements: []
94
95
  rubyforge_project:
95
- rubygems_version: 2.4.8
96
+ rubygems_version: 2.7.6
96
97
  signing_key:
97
98
  specification_version: 4
98
99
  summary: A JRuby wrapper for the Apache OpenNLP tools library