open_nlp 0.2.0-java → 0.3.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: b3d6968e3686d75cbe3875f8381249b6ff289b94
4
- data.tar.gz: eadddd4c5e86d4b3654c6e7c83dd59823e5bb6e2
2
+ SHA256:
3
+ metadata.gz: 611fa39e5357a43ac259dc113b299aede41e5379a1912dcd0bb32120da05356d
4
+ data.tar.gz: 0ef94231098429a66a11f8d3ab5dc5708156fcc425caa5ab4e29278f5233ab1f
5
5
  SHA512:
6
- metadata.gz: 424960c23f13b6e9e6f3c85ab137b341075ba118b42b4009e3fb26cb37d62fb9bf4b117405bfd3382cb71a3785da891bc9f0d4fba2aede99b3e7c6ed0cffa5b4
7
- data.tar.gz: b047c3e19f850bc46692a3ad58ac8bf7282e9393388f19601571a67e05f2bd4d078aa51d3536ea3f9554dce9a4be498329eb4f586dae1ddc23d026ccdbb29438
6
+ metadata.gz: 6e5fc1bbbd6059d818bbe3abc9408f497b845dd6e4a314b19a9d47a8e85a3435cdbe8d6c9fd2610f0a643a26974d4b20a0fc995c6660549149f9acaa688d3f52
7
+ data.tar.gz: c416e14a29512f1935a00289625ea162fac34f8dad4d320a8c8cf940253a6f9ac5c845e07bda12deca699ec5da809bbc733d9e392795e2895faa78e0d826a76c
data/.gitignore CHANGED
@@ -11,6 +11,7 @@ lib/bundler/man
11
11
  pkg
12
12
  rdoc
13
13
  spec/reports
14
+ spec/examples.txt
14
15
  test/tmp
15
16
  test/version_tmp
16
17
  tmp
data/.rspec CHANGED
@@ -1 +1,2 @@
1
1
  --colour
2
+ --require spec_helper
@@ -0,0 +1,24 @@
1
+ Metrics/AbcSize:
2
+ Max: 41
3
+
4
+ Metrics/BlockLength:
5
+ Exclude:
6
+ - spec/**/*.rb
7
+
8
+ Metrics/LineLength:
9
+ Max: 153
10
+
11
+ Metrics/MethodLength:
12
+ Max: 15
13
+
14
+ Metrics/ParameterLists:
15
+ Max: 6
16
+
17
+ Style/ClassAndModuleChildren:
18
+ Enabled: false
19
+
20
+ Style/ColonMethodCall:
21
+ Enabled: false
22
+
23
+ Style/Documentation:
24
+ Enabled: false
@@ -1 +1 @@
1
- jruby-9.0.5.0
1
+ jruby-9.2.4.0
@@ -1,6 +1,4 @@
1
1
  language: ruby
2
2
  rvm:
3
- - jruby-19mode
4
- - jruby-1.7.20
5
- - jruby-9.0.5.0
3
+ - jruby-9.2.4.0
6
4
  script: JRUBY_OPTS=-J-Xmx768m bundle exec rspec spec
data/Gemfile CHANGED
@@ -5,4 +5,5 @@ gemspec
5
5
 
6
6
  group :test do
7
7
  gem 'rspec'
8
- end
8
+ gem 'rubocop'
9
+ end
@@ -1,24 +1,42 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
+ open_nlp (0.3.0-java)
4
5
 
5
6
  GEM
6
7
  remote: https://rubygems.org/
7
8
  specs:
8
- diff-lcs (1.2.5)
9
- rspec (3.4.0)
10
- rspec-core (~> 3.4.0)
11
- rspec-expectations (~> 3.4.0)
12
- rspec-mocks (~> 3.4.0)
13
- rspec-core (3.4.4)
14
- rspec-support (~> 3.4.0)
15
- rspec-expectations (3.4.0)
9
+ ast (2.4.0)
10
+ diff-lcs (1.3)
11
+ jaro_winkler (1.5.1-java)
12
+ parallel (1.12.1)
13
+ parser (2.5.3.0)
14
+ ast (~> 2.4.0)
15
+ powerpack (0.1.2)
16
+ rainbow (3.0.0)
17
+ rspec (3.8.0)
18
+ rspec-core (~> 3.8.0)
19
+ rspec-expectations (~> 3.8.0)
20
+ rspec-mocks (~> 3.8.0)
21
+ rspec-core (3.8.0)
22
+ rspec-support (~> 3.8.0)
23
+ rspec-expectations (3.8.2)
16
24
  diff-lcs (>= 1.2.0, < 2.0)
17
- rspec-support (~> 3.4.0)
18
- rspec-mocks (3.4.1)
25
+ rspec-support (~> 3.8.0)
26
+ rspec-mocks (3.8.0)
19
27
  diff-lcs (>= 1.2.0, < 2.0)
20
- rspec-support (~> 3.4.0)
21
- rspec-support (3.4.1)
28
+ rspec-support (~> 3.8.0)
29
+ rspec-support (3.8.0)
30
+ rubocop (0.60.0)
31
+ jaro_winkler (~> 1.5.1)
32
+ parallel (~> 1.10)
33
+ parser (>= 2.5, != 2.5.1.1)
34
+ powerpack (~> 0.1)
35
+ rainbow (>= 2.2.2, < 4.0)
36
+ ruby-progressbar (~> 1.7)
37
+ unicode-display_width (~> 1.4.0)
38
+ ruby-progressbar (1.10.0)
39
+ unicode-display_width (1.4.0)
22
40
 
23
41
  PLATFORMS
24
42
  java
@@ -26,6 +44,7 @@ PLATFORMS
26
44
  DEPENDENCIES
27
45
  open_nlp!
28
46
  rspec
47
+ rubocop
29
48
 
30
49
  BUNDLED WITH
31
- 1.11.2
50
+ 1.17.1
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
@@ -7,10 +7,10 @@ module OpenNlp
7
7
  # @param [String] str string to be categorized
8
8
  # @return [String] category
9
9
  def categorize(str)
10
- fail ArgumentError, 'str param must be a String' unless str.is_a?(String)
10
+ raise ArgumentError, 'str param must be a String' unless str.is_a?(String)
11
11
 
12
12
  outcomes = j_instance.categorize(str)
13
13
  j_instance.getBestCategory(outcomes)
14
14
  end
15
15
  end
16
- end
16
+ end
@@ -2,16 +2,19 @@ module OpenNlp
2
2
  class Chunker < Tool
3
3
  self.java_class = Java::opennlp.tools.chunker.ChunkerME
4
4
 
5
+ # Initializes new instance of Chunker
6
+ #
7
+ # @param [OpenNlp::Model] model chunker model
8
+ # @param [Model::Tokenizer] token_model tokenizer model
9
+ # @param [Model::POSTagger] pos_model part-of-speech tagging model
5
10
  def initialize(model, token_model, pos_model)
6
11
  super(model)
7
12
 
8
- unless token_model.is_a?(Model::Tokenizer)
9
- fail ArgumentError, 'token model must be an OpenNlp::Tokenizer::Model'
10
- end
13
+ token_model.is_a?(Model::Tokenizer) ||
14
+ raise(ArgumentError, 'token model must be an OpenNlp::Tokenizer::Model')
11
15
 
12
- unless pos_model.is_a?(Model::POSTagger)
13
- fail ArgumentError, 'pos model must be an OpenNlp::POSTagger::Model'
14
- end
16
+ pos_model.is_a?(Model::POSTagger) ||
17
+ raise(ArgumentError, 'pos model must be an OpenNlp::POSTagger::Model')
15
18
 
16
19
  @tokenizer = Tokenizer.new(token_model)
17
20
  @pos_tagger = POSTagger.new(pos_model)
@@ -22,7 +25,7 @@ module OpenNlp
22
25
  # @param [String] str string to chunk
23
26
  # @return [Array] array of chunks with part-of-sentence information
24
27
  def chunk(str)
25
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
28
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
26
29
 
27
30
  tokens = tokenizer.tokenize(str)
28
31
  pos_tags = pos_tagger.tag(tokens).to_ary
@@ -39,24 +42,22 @@ module OpenNlp
39
42
  def build_chunks(chunks, tokens, pos_tags)
40
43
  data = tokens.zip(pos_tags, chunks)
41
44
 
42
- data.inject([]) do |acc, val|
45
+ data.each_with_object([]) do |val, acc|
43
46
  chunk = val[2]
44
- acc << [{val[0] => val[1]}] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
45
-
46
- if chunk[0] == 'I'
47
- if acc.last
48
- acc.last << {val[0] => val[1]} # add token to chunk if it is a continuation of chunk
49
- else
50
- acc << [{val[0] => val[1]}] # add token to new chunk if no chunks exists
51
- end
52
- end
47
+ acc << [{ val[0] => val[1] }] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
48
+
49
+ next if chunk[0] != 'I'
53
50
 
54
- acc
51
+ if acc.last
52
+ acc.last << { val[0] => val[1] } # add token to chunk if it is a continuation of chunk
53
+ else
54
+ acc << [{ val[0] => val[1] }] # add token to new chunk if no chunks exists
55
+ end
55
56
  end
56
57
  end
57
58
 
58
- def get_last_probabilities
59
+ def last_probabilities
59
60
  j_instance.probs.to_ary
60
61
  end
61
62
  end
62
- end
63
+ end
@@ -5,13 +5,7 @@ module OpenNlp
5
5
  end
6
6
 
7
7
  module ClassMethods
8
- def java_class=(value)
9
- @java_class = value
10
- end
11
-
12
- def java_class
13
- @java_class
14
- end
8
+ attr_accessor :java_class
15
9
  end
16
10
  end
17
- end
11
+ end
@@ -4,6 +4,9 @@ module OpenNlp
4
4
 
5
5
  attr_reader :j_model
6
6
 
7
+ # Initializes new instance of Model
8
+ #
9
+ # @param [String, java.io.FileInputStream] model
7
10
  def initialize(model)
8
11
  @j_model = self.class.java_class.new(model_stream(model))
9
12
  end
@@ -17,7 +20,7 @@ module OpenNlp
17
20
  when String
18
21
  java.io.FileInputStream.new(model)
19
22
  else
20
- fail ArgumentError, 'Model must be either a string or a java.io.FileInputStream'
23
+ raise ArgumentError, 'Model must be either a string or a java.io.FileInputStream'
21
24
  end
22
25
  end
23
26
  end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Categorizer < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.doccat.DoccatModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Chunker < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.chunker.ChunkerModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Detokenizer < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.tokenize.DetokenizationDictionary
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Parser < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.parser.ParserModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::POSTagger < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.postag.POSModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::SentenceDetector < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.sentdetect.SentenceModel
3
- end
3
+ end
@@ -1,3 +1,3 @@
1
1
  class OpenNlp::Model::Tokenizer < OpenNlp::Model
2
2
  self.java_class = Java::opennlp.tools.tokenize.TokenizerModel
3
- end
3
+ end
@@ -7,7 +7,8 @@ module OpenNlp
7
7
  # @param [Array<String>] tokens tokens to run name detection on
8
8
  # @return [Array<Java::opennlp.tools.util.Span>] names detected
9
9
  def detect(tokens)
10
- fail ArgumentError, 'tokens must be an instance of Array' unless tokens.is_a?(Array)
10
+ raise ArgumentError, 'tokens must be an instance of Array' unless tokens.is_a?(Array)
11
+
11
12
  j_instance.find(tokens.to_java(:String)).to_ary
12
13
  end
13
14
  end
@@ -1,13 +1,15 @@
1
1
  module OpenNlp
2
2
  class Parser < Tool
3
+ # Initializes new instance of Parser
4
+ #
5
+ # @param [OpenNlp::Model::Parser] parser_model
6
+ # @param [OpenNlp::Model::Tokenizer] token_model
3
7
  def initialize(parser_model, token_model)
4
- unless parser_model.is_a?(OpenNlp::Model)
5
- fail ArgumentError, 'parser_model must be an OpenNlp::Model'
6
- end
8
+ parser_model.is_a?(OpenNlp::Model::Parser) ||
9
+ raise(ArgumentError, 'parser_model must be an OpenNlp::Model')
7
10
 
8
- unless token_model.is_a?(Model::Tokenizer)
9
- fail ArgumentError, 'token_model must be an OpenNlp::Tokenizer::Model'
10
- end
11
+ token_model.is_a?(Model::Tokenizer) ||
12
+ raise(ArgumentError, 'token_model must be an OpenNlp::Tokenizer::Model')
11
13
 
12
14
  @j_instance = Java::opennlp.tools.parser.ParserFactory.create(parser_model.j_model)
13
15
  @tokenizer = Tokenizer.new(token_model)
@@ -19,6 +21,7 @@ module OpenNlp
19
21
  # @return [OpenNlp::Parser::Parse]
20
22
  def parse(text)
21
23
  raise ArgumentError, 'passed text must be a String' unless text.is_a?(String)
24
+
22
25
  text.empty? ? {} : parse_tokens(tokenizer.tokenize(text), text)
23
26
  end
24
27
 
@@ -27,16 +30,14 @@ module OpenNlp
27
30
  attr_reader :tokenizer
28
31
 
29
32
  def get_token_offset(text, tokens, index)
30
- offset = 0
31
- return offset unless index > 0
33
+ return 0 if index.zero?
32
34
 
33
- for i in (1..index) do
34
- offset = text.index tokens[i], offset + tokens[i - 1].size
35
+ (1..index).inject(0) do |offset, i|
36
+ text.index(tokens[i], offset + tokens[i - 1].size)
35
37
  end
36
- offset
37
38
  end
38
39
 
39
- def build_parse_obj(text, span_start, span_end, type=Java::opennlp.tools.parser.AbstractBottomUpParser::INC_NODE, probability=1, token_index=0)
40
+ def build_parse_obj(text, span_start, span_end, type = Java::opennlp.tools.parser.AbstractBottomUpParser::INC_NODE, probability = 1, token_index = 0)
40
41
  Java::opennlp.tools.parser.Parse.new(
41
42
  text.to_java(:String),
42
43
  Java::opennlp.tools.util.Span.new(span_start, span_end),
@@ -6,36 +6,57 @@ module OpenNlp
6
6
 
7
7
  self.java_class = Java::opennlp.tools.parser.Parse
8
8
 
9
+ # Initializes instance of Parser::Parse
10
+ #
11
+ # @param [Java::opennlp.tools.parser.Parse] java_instance
9
12
  def initialize(java_instance)
10
- raise ArgumentError, "java_instance must be an instance of #{self.class.java_class.name}" unless java_instance.is_a?(self.class.java_class)
13
+ java_instance.is_a?(self.class.java_class) ||
14
+ raise(ArgumentError, "java_instance must be an instance of #{self.class.java_class.name}")
11
15
 
12
16
  @j_instance = java_instance
13
17
  end
14
18
 
19
+ # Composes tree bank string, nested string representation of sentence parts, parts-of-speech and words,
20
+ # for example:
21
+ # '(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))'
22
+ #
23
+ # @return [String]
15
24
  def tree_bank_string
16
- span, text, type, res = j_instance.getSpan, j_instance.getText, j_instance.getType, ''
17
- start = span.getStart
25
+ span = j_instance.getSpan
26
+ text = j_instance.getText
27
+ type = j_instance.getType
28
+ res = ''
29
+ start = span.getStart
18
30
 
19
31
  res << "(#{type} " if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
20
32
 
21
33
  j_instance.getChildren.each do |child|
22
34
  child_span = child.span
23
- res << text[start..child_span.getStart-1] if start < child_span.getStart
35
+ res << text[start..child_span.getStart - 1] if start < child_span.getStart
24
36
  res << self.class.new(child).tree_bank_string
25
37
  start = child_span.getEnd
26
38
  end
27
39
 
28
- res << text[start..span.getEnd-1] if start < span.getEnd
29
- res << ")" if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
40
+ res << text[start..span.getEnd - 1] if start < span.getEnd
41
+ res << ')' if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
30
42
 
31
43
  res
32
44
  end
33
45
 
46
+ # Composes array representation of sentence tree where
47
+ # each hash has following fields:
48
+ #
49
+ # :type => <[String] node type>,
50
+ # :parent_type => <[String] type of parent node>,
51
+ # :token => <[String] current token>,
52
+ # :children => <Array[Hash] array of child nodes hashes>
53
+ #
54
+ # @return [Array<Hash>]
34
55
  def code_tree
35
56
  kids = j_instance.getChildren
36
57
 
37
58
  kids.each_with_object([]) do |kid, acc|
38
- data = { :type => kid.getType, :parent_type => self.j_instance.getType, :token => kid.toString }
59
+ data = { type: kid.getType, parent_type: j_instance.getType, token: kid.toString }
39
60
  subtree = self.class.new(kid).code_tree
40
61
  data[:children] = subtree unless subtree.empty?
41
62
  acc << data
@@ -2,10 +2,13 @@ module OpenNlp
2
2
  class POSTagger < Tool
3
3
  self.java_class = Java::opennlp.tools.postag.POSTaggerME
4
4
 
5
+ # Adds tags to tokens passed as argument
6
+ #
7
+ # @param [Array<String>, String] tokens tokens to tag
8
+ # @return [Array<String>, String] array of part-of-speech tags or string with added part-of-speech tags
5
9
  def tag(tokens)
6
- unless (tokens.is_a?(Array) || tokens.is_a?(String))
7
- fail ArgumentError, 'tokens must be an instance of String or Array'
8
- end
10
+ !tokens.is_a?(Array) && !tokens.is_a?(String) &&
11
+ raise(ArgumentError, 'tokens must be an instance of String or Array')
9
12
 
10
13
  j_instance.tag(tokens.to_java(:String))
11
14
  end
@@ -7,7 +7,8 @@ module OpenNlp
7
7
  # @param [String] string string to detect sentences in
8
8
  # @return [Array<String>] array of detected sentences
9
9
  def detect(str)
10
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
10
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
11
+
11
12
  j_instance.sentDetect(str).to_ary
12
13
  end
13
14
 
@@ -16,10 +17,11 @@ module OpenNlp
16
17
  # @param [String] str
17
18
  # @return [Array<OpenNlp::Util::Span>] array of spans for detected sentences
18
19
  def pos_detect(str)
19
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
20
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
21
+
20
22
  j_instance.sentPosDetect(str).map do |span|
21
23
  OpenNlp::Util::Span.new(span.getStart, span.getEnd)
22
24
  end
23
25
  end
24
26
  end
25
- end
27
+ end
@@ -7,13 +7,14 @@ module OpenNlp
7
7
  # @param [String] str string to tokenize
8
8
  # @return [Array] array of string tokens
9
9
  def tokenize(str)
10
- fail ArgumentError, 'str must be a String' unless str.is_a?(String)
10
+ raise ArgumentError, 'str must be a String' unless str.is_a?(String)
11
+
11
12
  j_instance.tokenize(str).to_ary
12
13
  end
13
14
 
14
15
  private
15
16
 
16
- def get_last_probabilities
17
+ def last_probabilities
17
18
  j_instance.getTokenProbabilities.to_ary
18
19
  end
19
20
  end
@@ -4,9 +4,13 @@ module OpenNlp
4
4
 
5
5
  attr_reader :j_instance
6
6
 
7
+ # Initializes instance of Tool
8
+ #
9
+ # @param [OpenNlp::Model] model instance of model class to initialize a tool object
7
10
  def initialize(model)
8
- fail ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
11
+ raise ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
12
+
9
13
  @j_instance = self.class.java_class.new(model.j_model)
10
14
  end
11
15
  end
12
- end
16
+ end
@@ -5,34 +5,50 @@ class OpenNlp::Util::Span
5
5
 
6
6
  attr_reader :j_instance
7
7
 
8
- def initialize(s, e)
9
- fail ArgumentError, 's should be an integer' unless s.is_a?(Fixnum)
10
- fail ArgumentError, 'e should be an integer' unless e.is_a?(Fixnum)
11
-
12
- @j_instance = self.class.java_class.new(s, e)
8
+ # Initializes new instance of Util::Span
9
+ #
10
+ # @param [Integer] start start index of the span
11
+ # @param [Integer] end end index of the span
12
+ def initialize(start_pos, end_pos)
13
+ raise ArgumentError, 'start should be an integer' unless start_pos.is_a?(Integer)
14
+ raise ArgumentError, 'end should be an integer' unless end_pos.is_a?(Integer)
15
+
16
+ @j_instance = self.class.java_class.new(start_pos, end_pos)
13
17
  end
14
18
 
19
+ # Returns end index of the span
20
+ #
21
+ # @return [Integer]
15
22
  def start
16
23
  j_instance.getStart
17
24
  end
18
25
 
26
+ # Returns end index of the span
27
+ #
28
+ # @return [Integer]
19
29
  def end
20
30
  j_instance.getEnd
21
31
  end
22
32
 
33
+ # Returns type of the span
34
+ #
35
+ # @return [String]
23
36
  def type
24
37
  j_instance.getType
25
38
  end
26
39
 
40
+ # Returns length of the span
41
+ #
42
+ # @return [Integer]
27
43
  def length
28
44
  j_instance.length
29
45
  end
30
46
 
31
- def ==(obj)
32
- return false unless obj.is_a?(self.class)
47
+ def ==(other)
48
+ return false unless other.is_a?(self.class)
33
49
 
34
- [:start, :end, :type].each_with_object(true) do |method, acc|
35
- acc = acc && self.public_send(method) == obj.public_send(method)
50
+ %i[start end type].inject(true) do |acc, method|
51
+ acc && public_send(method) == other.public_send(method)
36
52
  end
37
53
  end
38
54
  end
@@ -1,3 +1,3 @@
1
1
  module OpenNlp
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'.freeze
3
3
  end
@@ -1,20 +1,19 @@
1
- # -*- encoding: utf-8 -*-
2
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'open_nlp/version'
5
4
 
6
5
  Gem::Specification.new do |gem|
7
- gem.name = "open_nlp"
6
+ gem.name = 'open_nlp'
8
7
  gem.version = OpenNlp::VERSION
9
- gem.authors = ["Hck"]
10
- gem.description = %q{JRuby tools wrapper for Apache OpenNLP}
11
- gem.summary = %q{A JRuby wrapper for the Apache OpenNLP tools library}
12
- gem.homepage = "http://github.com/hck/open_nlp"
8
+ gem.authors = ['Hck']
9
+ gem.description = 'JRuby tools wrapper for Apache OpenNLP'
10
+ gem.summary = 'A JRuby wrapper for the Apache OpenNLP tools library'
11
+ gem.homepage = 'http://github.com/hck/open_nlp'
13
12
 
14
- gem.files = `git ls-files`.split($/)
15
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
16
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
- gem.require_paths = ["lib"]
16
+ gem.require_paths = ['lib']
18
17
 
19
- gem.platform = "java"
18
+ gem.platform = 'java'
20
19
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::Tokenizer do
3
+ RSpec.describe OpenNlp::Model::Tokenizer do
4
4
  let(:model_file_name) { File.join(FIXTURES_DIR, 'en-token.bin') }
5
5
 
6
6
  it 'accept a string filename parameter' do
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  RSpec.describe OpenNlp::NamedEntityDetector do
4
4
  let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, 'en-ner-time.bin')) }
5
5
  let(:ne_detector) { described_class.new(model) }
6
-
6
+
7
7
  describe 'initialization' do
8
8
  it 'initializes with a valid model' do
9
9
  expect(ne_detector.j_instance).to be_a(described_class.java_class)
@@ -40,66 +40,66 @@ RSpec.describe OpenNlp::Parser::Parse do
40
40
  let(:expected_code_tree) do
41
41
  [
42
42
  {
43
- :type => 'S',
44
- :parent_type => 'TOP',
45
- :token => 'The red fox sleeps soundly .',
46
- :children => [
43
+ type: 'S',
44
+ parent_type: 'TOP',
45
+ token: 'The red fox sleeps soundly .',
46
+ children: [
47
47
  {
48
- :type => 'NP',
49
- :parent_type => 'S',
50
- :token => 'The red fox',
51
- :children => [
48
+ type: 'NP',
49
+ parent_type: 'S',
50
+ token: 'The red fox',
51
+ children: [
52
52
  {
53
- :type => 'DT',
54
- :parent_type => 'NP',
55
- :token => 'The',
56
- :children => [{:type => 'TK', :parent_type => 'DT', :token => 'The'}]
53
+ type: 'DT',
54
+ parent_type: 'NP',
55
+ token: 'The',
56
+ children: [{ type: 'TK', parent_type: 'DT', token: 'The' }]
57
57
  },
58
58
  {
59
- :type => 'JJ',
60
- :parent_type => 'NP',
61
- :token => 'red',
62
- :children => [{:type => 'TK', :parent_type => 'JJ', :token => 'red'}]
59
+ type: 'JJ',
60
+ parent_type: 'NP',
61
+ token: 'red',
62
+ children: [{ type: 'TK', parent_type: 'JJ', token: 'red' }]
63
63
  },
64
64
  {
65
- :type => 'NN',
66
- :parent_type => 'NP',
67
- :token => 'fox',
68
- :children => [{:type => 'TK', :parent_type => 'NN', :token => 'fox'}]
65
+ type: 'NN',
66
+ parent_type: 'NP',
67
+ token: 'fox',
68
+ children: [{ type: 'TK', parent_type: 'NN', token: 'fox' }]
69
69
  }
70
70
  ]
71
71
  },
72
72
  {
73
- :type => 'VP',
74
- :parent_type => 'S',
75
- :token => 'sleeps soundly',
76
- :children => [
73
+ type: 'VP',
74
+ parent_type: 'S',
75
+ token: 'sleeps soundly',
76
+ children: [
77
77
  {
78
- :type => 'VBZ',
79
- :parent_type => 'VP',
80
- :token => 'sleeps',
81
- :children => [{:type => 'TK', :parent_type => 'VBZ', :token => 'sleeps'}]
78
+ type: 'VBZ',
79
+ parent_type: 'VP',
80
+ token: 'sleeps',
81
+ children: [{ type: 'TK', parent_type: 'VBZ', token: 'sleeps' }]
82
82
  },
83
83
  {
84
- :type => 'ADVP',
85
- :parent_type => 'VP',
86
- :token => 'soundly',
87
- :children => [
84
+ type: 'ADVP',
85
+ parent_type: 'VP',
86
+ token: 'soundly',
87
+ children: [
88
88
  {
89
- :type => 'RB',
90
- :parent_type => 'ADVP',
91
- :token => 'soundly',
92
- :children => [{:type => 'TK', :parent_type => 'RB', :token => 'soundly'}]
89
+ type: 'RB',
90
+ parent_type: 'ADVP',
91
+ token: 'soundly',
92
+ children: [{ type: 'TK', parent_type: 'RB', token: 'soundly' }]
93
93
  }
94
94
  ]
95
95
  }
96
96
  ]
97
97
  },
98
98
  {
99
- :type => '.',
100
- :parent_type => 'S',
101
- :token => '.',
102
- :children => [{:type => 'TK', :parent_type => '.', :token => '.'}]
99
+ type: '.',
100
+ parent_type: 'S',
101
+ token: '.',
102
+ children: [{ type: 'TK', parent_type: '.', token: '.' }]
103
103
  }
104
104
  ]
105
105
  }
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe OpenNlp::Parser do
4
- let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, "en-parser-chunking.bin")) }
5
- let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
4
+ let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, 'en-parser-chunking.bin')) }
5
+ let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
6
6
  let(:parser) { described_class.new(model, token_model) }
7
7
 
8
8
  describe 'initialization' do
@@ -21,8 +21,8 @@ RSpec.describe OpenNlp::POSTagger do
21
21
  end
22
22
 
23
23
  it 'tags provided tokens' do
24
- tagged = pos_tagger.tag(%w(The quick brown fox jumps over the lazy dog .))
25
- expect(tagged.to_ary).to eq(%w(DT JJ JJ NN NNS IN DT JJ NN .))
24
+ tagged = pos_tagger.tag(%w[The quick brown fox jumps over the lazy dog .])
25
+ expect(tagged.to_ary).to eq(%w[DT JJ JJ NN NNS IN DT JJ NN .])
26
26
  end
27
27
 
28
28
  it 'raises an ArgumentError when nil is passed as an argument' do
@@ -3,3 +3,28 @@ require 'java'
3
3
  require 'open_nlp'
4
4
 
5
5
  FIXTURES_DIR = File.join(File.dirname(__FILE__), 'fixtures')
6
+
7
+ RSpec.configure do |config|
8
+ config.expect_with :rspec do |expectations|
9
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
10
+ end
11
+
12
+ config.mock_with :rspec do |mocks|
13
+ mocks.verify_partial_doubles = true
14
+ end
15
+
16
+ config.filter_run :focus
17
+ config.run_all_when_everything_filtered = true
18
+
19
+ config.example_status_persistence_file_path = 'spec/examples.txt'
20
+
21
+ config.disable_monkey_patching!
22
+
23
+ config.warnings = true
24
+
25
+ config.profile_examples = 10
26
+
27
+ config.order = :random
28
+
29
+ Kernel.srand config.seed
30
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: open_nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: java
6
6
  authors:
7
7
  - Hck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-07 00:00:00.000000000 Z
11
+ date: 2018-11-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: JRuby tools wrapper for Apache OpenNLP
14
14
  email:
@@ -18,6 +18,7 @@ extra_rdoc_files: []
18
18
  files:
19
19
  - ".gitignore"
20
20
  - ".rspec"
21
+ - ".rubocop.yml"
21
22
  - ".ruby-version"
22
23
  - ".travis.yml"
23
24
  - Gemfile
@@ -92,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
93
  version: '0'
93
94
  requirements: []
94
95
  rubyforge_project:
95
- rubygems_version: 2.4.8
96
+ rubygems_version: 2.7.6
96
97
  signing_key:
97
98
  specification_version: 4
98
99
  summary: A JRuby wrapper for the Apache OpenNLP tools library