nlp_toolz 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: af17d3c95ae954aa92e06936825db1e051e6a3b5
4
- data.tar.gz: 1248afa56f823c06d39c8121dcacb1759ebfff75
3
+ metadata.gz: ce5f4cad49039b0d8cb6d626facc67a4efa32ae4
4
+ data.tar.gz: 0565742385f0a34aabe4e456cde014ba2673a589
5
5
  SHA512:
6
- metadata.gz: fb31bdd348f1a32e8f979b84787b1aec0ef1170f3a9c5e150968f1cb88f9ad91ac3b1ef7893bdfdd05a9b31e90e3c79764ed98d0a607a49f7a8766d0c7859596
7
- data.tar.gz: 7a95371db1dc274e08478f9cf27f13badea7b97fd04e5963ac2209a66f07d227e0b6b8d9d12646e266a7d3c53bb3eac48db4e1a0f8c4737e799d1db82c095e67
6
+ metadata.gz: 1ec11ec4b9b07437fb16f9ab0c181c9cee40a0cc900f90d02d2a6e4fc3bac7efaae890e8eda16bf7dcf8e3595bcb4010cf9d3893bee2a7a937b0fd527c40356f
7
+ data.tar.gz: 06d53b1bfe11004d0abeba1db130a13f664a054e8ed56f5edb260ec3f8bf189b0f9cb64687a471d2241ffd0d612ae632b50853737282a8d1901ea0645be4426a
data/Guardfile CHANGED
@@ -6,7 +6,7 @@ guard :bundler do
6
6
  watch(/^.+\.gemspec/)
7
7
  end
8
8
 
9
- guard :rspec do
9
+ guard :rspec, cmd: 'bundle exec rspec' do
10
10
  watch(%r{^spec/.+_spec\.rb$})
11
11
  watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
12
12
  watch('spec/spec_helper.rb') { "spec" }
@@ -0,0 +1,52 @@
1
+ # coding: utf-8
2
+ # author LeFnord
3
+ # email pscholz.le@gmail.com
4
+ # date 2014-10-12
5
+
6
+ module NlpToolz
7
+
8
+ class Language
9
+
10
+ # load java classes
11
+ # Enumeration = Rjb::import("java.util.Enumeration")
12
+ HashSet = Rjb::import("java.util.HashSet")
13
+ # Hashtable = Rjb::import("java.util.Hashtable")
14
+ # Set = Rjb::import("java.util.Set")
15
+
16
+ DataSourceException = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.DataSourceException")
17
+ LanIKernel = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.LanIKernel")
18
+ Request = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.Request")
19
+ RequestException = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.RequestException")
20
+ Response = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.Response")
21
+
22
+ def self.get_language(text = nil)
23
+ return -1 if text.nil? || text.empty?
24
+ lang_probability = identify text
25
+ lang_probability.first
26
+ end
27
+
28
+ # set language and probability of sentence
29
+ def self.identify(text)
30
+ languages = HashSet.new
31
+ modus = 0
32
+ reduce = true
33
+
34
+ req = Request.new(text, languages, modus, reduce)
35
+
36
+ LanIKernel.propertyFile = File.join(MODELS, 'language', 'lanikernel')
37
+ kernel = LanIKernel.getInstance()
38
+ res = kernel.evaluate(req)
39
+
40
+ @lang,@probability = get_most_probability_lang(res.getResult.toString)
41
+ end
42
+
43
+ private
44
+ def self.get_most_probability_lang(result)
45
+ res = []
46
+ foo = result.sub!("{","").sub!("}","").split(', ').collect{ |x| x.split('=') }
47
+ foo.each{ |x| res << [x.first,x.last.to_f] }
48
+ res.max{|a,b| a.last <=> b.last}
49
+ end
50
+ end
51
+
52
+ end
@@ -2,11 +2,13 @@ module NlpToolz
2
2
  MODELS = File.join(File.dirname(__FILE__), '..', '..', "models")
3
3
  JARS = File.join(File.dirname(__FILE__), '..', '..', "jars")
4
4
 
5
- CLASS_PATH = [
6
- File.join(JARS, "jwnl-1.3.3.jar"),
7
- File.join(JARS, "opennlp-tools-1.5.3.jar"),
8
- File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
- ].join(":")
5
+ # CLASS_PATH = [
6
+ # File.join(JARS, "jwnl-1.3.3.jar"),
7
+ # File.join(JARS, "opennlp-tools-1.5.3.jar"),
8
+ # File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
+ # ].join(":")
10
+
11
+ CLASS_PATH = Dir.glob(File.join(JARS,'*.jar')).join(':')
10
12
 
11
13
  Rjb::load(CLASS_PATH,['-X+C','-Xmx4096m','-Djava.awt.headless=true'])
12
14
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseParallelGC','-XX:+UseParallelOldGC','-Djava.awt.headless=true'])
@@ -4,12 +4,11 @@
4
4
  # date: 2012-12-10
5
5
 
6
6
  module NlpToolz
7
-
7
+
8
8
  class Parser
9
-
10
- include Lang
9
+
11
10
  include TmpFile
12
-
11
+
13
12
  # load java classes
14
13
  FileInputStream = Rjb::import('java.io.FileInputStream')
15
14
 
@@ -18,7 +17,7 @@ module NlpToolz
18
17
 
19
18
  def initialize(input, lang = nil)
20
19
  @input = input
21
- @lang = lang || get_language
20
+ @lang = lang || NlpToolz::Language.get_language(input)
22
21
  @model_name = "#{@lang}-sm5.gr"
23
22
  get_model
24
23
  end
@@ -8,8 +8,6 @@ module NlpToolz
8
8
 
9
9
  class PosTags
10
10
 
11
- include Lang
12
-
13
11
  # load java classes
14
12
  FileInputStream = Rjb::import('java.io.FileInputStream')
15
13
  POSModel = Rjb::import('opennlp.tools.postag.POSModel')
@@ -19,7 +17,7 @@ module NlpToolz
19
17
 
20
18
  def initialize(input, lang = nil)
21
19
  @input = input
22
- @lang = lang || get_language
20
+ @lang = lang || NlpToolz::Language.get_language(input)
23
21
  @model_name = "#{@lang}-pos-maxent.bin"
24
22
  get_model
25
23
  end
@@ -8,8 +8,6 @@ module NlpToolz
8
8
 
9
9
  class Sentences
10
10
 
11
- include Lang
12
-
13
11
  # load java classes
14
12
  FileInputStream = Rjb::import('java.io.FileInputStream')
15
13
  SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
@@ -19,7 +17,7 @@ module NlpToolz
19
17
 
20
18
  def initialize(input,lang = nil)
21
19
  @input = input
22
- @lang = lang || get_language
20
+ @lang = lang || NlpToolz::Language.get_language(input)
23
21
  @model_name = "#{@lang}-sent.bin"
24
22
  get_model
25
23
  end
@@ -7,8 +7,6 @@ module NlpToolz
7
7
 
8
8
  class Tokens
9
9
 
10
- include Lang
11
-
12
10
  # load java classes
13
11
  FileInputStream = Rjb::import('java.io.FileInputStream')
14
12
  TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
@@ -18,7 +16,7 @@ module NlpToolz
18
16
 
19
17
  def initialize(input, lang = nil)
20
18
  @input = input
21
- @lang = lang || get_language
19
+ @lang = lang || NlpToolz::Language.get_language(input)
22
20
  @model_name = "#{@lang}-token.bin"
23
21
  get_model
24
22
  end
@@ -4,5 +4,5 @@
4
4
  # date: 2012-10-23
5
5
 
6
6
  module NlpToolz
7
- VERSION = "1.0.5"
7
+ VERSION = "1.1.0"
8
8
  end
data/lib/nlp_toolz.rb CHANGED
@@ -13,24 +13,22 @@ require "multi_json"
13
13
  # internal requirements
14
14
  require "nlp_toolz/version"
15
15
  require "nlp_toolz/helpers/url_handler"
16
- require "nlp_toolz/helpers/lang"
17
16
  require "nlp_toolz/helpers/string_extended"
18
17
  require "nlp_toolz/helpers/tmp_file"
19
18
 
20
19
  # NLP Tools
21
20
  require "nlp_toolz/load_jars"
21
+ require "nlp_toolz/language"
22
22
  require "nlp_toolz/sentences"
23
23
  require "nlp_toolz/pos_tags"
24
24
  require "nlp_toolz/tokens"
25
25
  require "nlp_toolz/parser"
26
26
 
27
27
  module NlpToolz
28
- extend Lang
29
-
30
28
  module_function
31
29
 
32
30
  def get_lang(input)
33
- NlpToolz.get_language(input)
31
+ NlpToolz::Language.get_language(input)
34
32
  end
35
33
 
36
34
  def get_sentences(input,lang = nil)
@@ -7,10 +7,10 @@ describe String do
7
7
  end
8
8
 
9
9
  it "should delete quotations marks" do
10
- @a.join("").clean_up.should be_empty
10
+ expect(@a.join("").clean_up).to be_empty
11
11
  chars = (@a.length - 1) * 3
12
- @a.join(" ap").clean_up.length.should == chars
13
- @a.join("ap ").clean_up.length.should == chars
12
+ expect(@a.join(" ap").clean_up.length).to be == chars
13
+ expect(@a.join("ap ").clean_up.length).to be == chars
14
14
  end
15
15
 
16
16
 
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe 'Language' do
4
+
5
+ before(:all) do
6
+ @en_text = "Military historian Basil Liddell Hart famously declared that Sherman was the first modern general."
7
+ @de_text = "Die erste Ausgabe der von Arwidsson herausgegebenen, kurzlebigen Zeitschrift Abo Morgonblad vom 5. Januar 1821."
8
+ end
9
+
10
+ it 'do nothings if text empty or nil' do
11
+ res = NlpToolz::Language.get_language
12
+ expect(res).to be == -1
13
+ res = NlpToolz::Language.get_language('')
14
+ expect(res).to be == -1
15
+ end
16
+
17
+ it 'gets language' do
18
+ res = NlpToolz::Language.get_language @en_text
19
+ expect(res).to be == 'en'
20
+ res = NlpToolz::Language.get_language @de_text
21
+ expect(res).to be == 'de'
22
+ end
23
+ end
@@ -7,44 +7,44 @@ describe NlpToolz do
7
7
  @text = "Military historian Basil Liddell Hart famously declared that Sherman was the first modern general."
8
8
  @g_text = "μακεδονικού εκκεντροφόρου πολιτισμός του. την ανάφλεξης πολιτισμική. πολιτισμού του να. τόπος επειδή σε. καθορίσουν χρόνια Στα από."
9
9
  end
10
-
10
+
11
11
  describe "attributes" do
12
12
  it "should respond to #attribute" do
13
13
  text = NlpToolz::Parser.new(@text)
14
- text.should respond_to(:input)
15
- text.should respond_to(:lang)
16
- text.should respond_to(:model_name)
17
- text.should respond_to(:model)
18
- text.should respond_to(:parse_hash)
14
+ expect(text).to respond_to(:input)
15
+ expect(text).to respond_to(:lang)
16
+ expect(text).to respond_to(:model_name)
17
+ expect(text).to respond_to(:model)
18
+ expect(text).to respond_to(:parse_hash)
19
19
  end
20
20
  end
21
-
21
+
22
22
  describe "model" do
23
23
  it "should have a model, if lang 'en'" do
24
24
  sent = NlpToolz::Parser.new(@text,'en')
25
- sent.model_name.should == 'en-sm5.gr'
26
- sent.has_model?.should be_true
25
+ expect(sent.model_name).to be == 'en-sm5.gr'
26
+ expect(sent.has_model?).to be_truthy
27
27
  end
28
-
28
+
29
29
  it "should not have a model, if lang not known" do
30
30
  sent = NlpToolz::Parser.new(@g_text)
31
- sent.has_model?.should be_false
31
+ expect(sent.has_model?).to be_falsey
32
32
  end
33
33
  end
34
-
34
+
35
35
  describe "object" do
36
36
  it "should create a valid object" do
37
37
  expect{ text = NlpToolz::Parser.new(@text,"en") }.to_not raise_error
38
38
  end
39
-
39
+
40
40
  it "should set the language of input" do
41
41
  text = NlpToolz::Parser.new(@text)
42
- text.lang.should == "en"
42
+ expect(text.lang).to be == "en"
43
43
  end
44
-
44
+
45
45
  it "should build the right model name" do
46
46
  text = NlpToolz::Parser.new(@text)
47
- text.model_name.should == "en-sm5.gr"
47
+ expect(text.model_name).to be == "en-sm5.gr"
48
48
  end
49
49
  end
50
50
 
@@ -52,15 +52,15 @@ describe NlpToolz do
52
52
  it "should store tree in a hash" do
53
53
  text = NlpToolz::Parser.new(@text)
54
54
  text.parse_text
55
- text.parse_hash.should be_a(Hash)
55
+ expect(text.parse_hash).to be_a(Hash)
56
56
  end
57
57
 
58
58
  it "should have a token hash after parsing" do
59
59
  text = NlpToolz::Parser.new(@text)
60
60
  text.parse_text
61
- text.layer.should be_a Hash
62
- text.layer.should include(:tags)
63
- text.layer.should include(:tokens)
61
+ expect(text.layer).to be_a Hash
62
+ expect(text.layer).to include(:tags)
63
+ expect(text.layer).to include(:tokens)
64
64
  end
65
65
  end
66
66
  end # Parser
@@ -7,60 +7,58 @@ describe NlpToolz do
7
7
  @text = "Military historian Basil Liddell Hart famously declared that Sherman was the first modern general."
8
8
  @g_text = "μακεδονικού εκκεντροφόρου πολιτισμός του. την ανάφλεξης πολιτισμική. πολιτισμού του να. τόπος επειδή σε. καθορίσουν χρόνια Στα από."
9
9
  end
10
-
10
+
11
11
  describe "attributes" do
12
12
  it "should respond to #attribute" do
13
13
  text = NlpToolz::PosTags.new(@text)
14
- text.should respond_to(:input)
15
- text.should respond_to(:lang)
16
- text.should respond_to(:model_name)
17
- text.should respond_to(:model)
18
- text.should respond_to(:tokenized)
14
+ expect(text).to respond_to(:input)
15
+ expect(text).to respond_to(:lang)
16
+ expect(text).to respond_to(:model_name)
17
+ expect(text).to respond_to(:model)
18
+ expect(text).to respond_to(:tokenized)
19
19
  end
20
20
  end
21
-
21
+
22
22
  describe "model" do
23
23
  it "should have a model, if lang 'en'" do
24
24
  sent = NlpToolz::PosTags.new(@text,'en')
25
- sent.model_name.should == 'en-pos-maxent.bin'
26
- sent.has_model?.should be_true
25
+ expect(sent.model_name).to be == 'en-pos-maxent.bin'
26
+ expect(sent.has_model?).to be_truthy
27
27
  end
28
-
28
+
29
29
  it "should not have a model, if lang not known" do
30
30
  sent = NlpToolz::PosTags.new(@g_text)
31
- sent.has_model?.should be_false
31
+ expect(sent.has_model?).to be_falsey
32
32
  end
33
33
  end
34
-
34
+
35
35
  describe "object" do
36
36
  it "should create a valid object" do
37
37
  expect{ text = NlpToolz::PosTags.new(@text,"en") }.to_not raise_error
38
38
  end
39
-
39
+
40
40
  it "should set the language of input" do
41
41
  text = NlpToolz::PosTags.new(@text)
42
- text.lang.should == "en"
42
+ expect(text.lang).to be == "en"
43
43
  end
44
-
44
+
45
45
  it "should build the right model name" do
46
46
  text = NlpToolz::PosTags.new(@text)
47
- text.model_name.should == "en-pos-maxent.bin"
47
+ expect(text.model_name).to be == "en-pos-maxent.bin"
48
48
  end
49
-
49
+
50
50
  it "should be a hash after pos tagging" do
51
51
  text = NlpToolz::PosTags.new(@text,"en")
52
52
  text.get_pos_tags
53
- text.tokenized.should include(:tokens)
54
- text.tokenized.should include(:tags)
55
- text.tokenized.should be_a Hash
53
+ expect(text.tokenized).to include(:tokens)
54
+ expect(text.tokenized).to include(:tags)
55
+ expect(text.tokenized).to be_a Hash
56
56
  end
57
-
57
+
58
58
  it "should get pos text of given text" do
59
59
  text = NlpToolz::PosTags.new(@text,"en")
60
60
  text.get_pos_tags
61
- text.tokenized[:tokens].should have(15).items
62
- text.tokenized[:tags].should have(15).items
63
- text.tokenized[:tokens].length.should == text.tokenized[:tags].length
61
+ expect(text.tokenized[:tokens].length).to be == text.tokenized[:tags].length
64
62
  end
65
63
  end
66
64
  end # POS Tags
@@ -7,53 +7,53 @@ describe NlpToolz do
7
7
  @text = "William Tecumseh Sherman (February 8, 1820 – February 14, 1891) was an American soldier, businessman, educator, and author.
8
8
  He served as a general in the United States Army during the American Civil War (1861–65), receiving both recognition for his outstanding command of military strategy, and criticism for the harshness of the scorched earth policies he implemented in conducting total war against the Confederate States of America.
9
9
  Military historian Basil Liddell Hart famously declared that Sherman was the first modern general."
10
-
10
+
11
11
  @g_text = "μακεδονικού εκκεντροφόρου πολιτισμός του. την ανάφλεξης πολιτισμική. πολιτισμού του να. τόπος επειδή σε. καθορίσουν χρόνια Στα από."
12
12
  end
13
-
13
+
14
14
  describe "attributes" do
15
15
  it "should respond to #attribute" do
16
16
  sent = NlpToolz::Sentences.new(@text)
17
- sent.should respond_to(:input)
18
- sent.should respond_to(:lang)
19
- sent.should respond_to(:model_name)
20
- sent.should respond_to(:model)
21
- sent.should respond_to(:sentences)
17
+ expect(sent).to respond_to(:input)
18
+ expect(sent).to respond_to(:lang)
19
+ expect(sent).to respond_to(:model_name)
20
+ expect(sent).to respond_to(:model)
21
+ expect(sent).to respond_to(:sentences)
22
22
  end
23
23
  end
24
-
24
+
25
25
  describe "model" do
26
26
  it "should have a model, if lang 'en'" do
27
27
  sent = NlpToolz::Sentences.new(@text,'en')
28
- sent.has_model?.should be_true
29
- sent.model_name.should == 'en-sent.bin'
28
+ expect(sent.has_model?).to be_truthy
29
+ expect(sent.model_name).to be == 'en-sent.bin'
30
30
  end
31
-
31
+
32
32
  it "should not have a model, if lang not known" do
33
33
  sent = NlpToolz::Sentences.new(@g_text)
34
- sent.has_model?.should be_false
34
+ expect(sent.has_model?).to be_falsey
35
35
  end
36
36
  end
37
-
37
+
38
38
  describe "object" do
39
39
  it "should create a valid object" do
40
40
  expect{ sent = NlpToolz::Sentences.new(@text) }.to_not raise_error
41
41
  end
42
-
42
+
43
43
  it "should set the language of input" do
44
44
  sent = NlpToolz::Sentences.new(@text)
45
- sent.lang.should == "en"
45
+ expect(sent.lang).to be == "en"
46
46
  end
47
-
47
+
48
48
  it "should build the right model name" do
49
49
  sent = NlpToolz::Sentences.new(@text)
50
- sent.model_name.should == "en-sent.bin"
50
+ expect(sent.model_name).to be == "en-sent.bin"
51
51
  end
52
-
52
+
53
53
  it "should split incoming text into sentences" do
54
54
  text = NlpToolz::Sentences.new(@text,"en")
55
55
  text.split_into_sentences
56
- text.sentences.should have(3).items
56
+ expect(text.sentences.length).to be == 3
57
57
  end
58
58
  end
59
59
  end # Sentences
@@ -7,55 +7,55 @@ describe NlpToolz do
7
7
  @text = "Military historian Basil Liddell Hart famously declared that Sherman was the first modern general."
8
8
  @g_text = "μακεδονικού εκκεντροφόρου πολιτισμός του. την ανάφλεξης πολιτισμική. πολιτισμού του να. τόπος επειδή σε. καθορίσουν χρόνια Στα από."
9
9
  end
10
-
10
+
11
11
  describe "attributes" do
12
12
  it "should respond to #attribute" do
13
13
  text = NlpToolz::Tokens.new(@text)
14
- text.should respond_to(:input)
15
- text.should respond_to(:lang)
16
- text.should respond_to(:model_name)
17
- text.should respond_to(:model)
18
- text.should respond_to(:tokens)
14
+ expect(text).to respond_to(:input)
15
+ expect(text).to respond_to(:lang)
16
+ expect(text).to respond_to(:model_name)
17
+ expect(text).to respond_to(:model)
18
+ expect(text).to respond_to(:tokens)
19
19
  end
20
20
  end
21
-
21
+
22
22
  describe "model" do
23
23
  it "should have a model, if lang 'en'" do
24
24
  sent = NlpToolz::Tokens.new(@text,'en')
25
- sent.has_model?.should be_true
25
+ expect(sent.has_model?).to be_truthy
26
26
  end
27
-
27
+
28
28
  it "should not have a model, if lang not known" do
29
29
  sent = NlpToolz::Tokens.new(@g_text)
30
- sent.has_model?.should be_false
30
+ expect(sent.has_model?).to be_falsey
31
31
  end
32
32
  end
33
-
33
+
34
34
  describe "object" do
35
35
  it "should create a valid object" do
36
36
  expect{ text = NlpToolz::Tokens.new(@text,"en") }.to_not raise_error
37
37
  end
38
-
38
+
39
39
  it "should set the language of input" do
40
40
  text = NlpToolz::Tokens.new(@text)
41
- text.lang.should == "en"
41
+ expect(text.lang).to be == "en"
42
42
  end
43
-
43
+
44
44
  it "should build the right model name" do
45
45
  text = NlpToolz::Tokens.new(@text)
46
- text.model_name.should == "en-token.bin"
46
+ expect(text.model_name).to be == "en-token.bin"
47
47
  end
48
-
48
+
49
49
  it "should be a arrar after tokenizing" do
50
50
  text = NlpToolz::Tokens.new(@text,"en")
51
51
  text.tokenize
52
- text.tokens.should be_a Array
52
+ expect(text.tokens).to be_a Array
53
53
  end
54
-
54
+
55
55
  it "should tokenize given text" do
56
56
  text = NlpToolz::Tokens.new(@text,"en")
57
57
  text.tokenize
58
- text.tokens.should have(15).items
58
+ expect(text.tokens.length).to be == 15
59
59
  end
60
60
  end
61
61
  end # Tokens
@@ -11,33 +11,33 @@ describe NlpToolz do
11
11
  describe "detect language" do
12
12
  it "should description" do
13
13
  lang = NlpToolz.get_lang(@text)
14
- lang.should == 'en'
14
+ expect(lang).to be == 'en'
15
15
  end
16
16
  end
17
17
 
18
18
  describe "sentence detection" do
19
19
  it "should input text split into its sentences" do
20
20
  sentences = NlpToolz.get_sentences(@text)
21
- sentences.should have(3).items
21
+ expect(sentences.length).to be == 3
22
22
  end
23
23
 
24
24
  it "should be 'nil', if text lang is unsupported" do
25
25
  sentences = NlpToolz.get_sentences(@g_text)
26
- sentences.should be_nil
26
+ expect(sentences).to be_nil
27
27
  end
28
28
  end
29
29
 
30
30
  describe "tokenizing" do
31
31
  it "should tag a sentence" do
32
32
  tokens = NlpToolz.tokenize_sentence(@sentence)
33
- tokens.should have(26).items
34
- tokens.should be_a Array
33
+ expect(tokens.length).to be == 26
34
+ expect(tokens).to be_a Array
35
35
  end
36
36
 
37
37
  it "should tokenize a whole text" do
38
38
  token_arr = NlpToolz.tokenize_text(@text)
39
- token_arr.should have(3).items
40
- token_arr.first.should have(26).items
39
+ expect(token_arr.length).to be == 3
40
+ expect(token_arr.first.length).to be == 26
41
41
  end
42
42
  end
43
43
 
@@ -45,12 +45,12 @@ describe NlpToolz do
45
45
  it "should tag a sentence" do
46
46
  sentence = NlpToolz.get_sentences(@sentence).last
47
47
  tags = NlpToolz.tag_sentence(sentence)
48
- tags[:tokens].length.should == tags[:tags].length
48
+ expect(tags[:tokens].length).to be == tags[:tags].length
49
49
  end
50
50
 
51
51
  it "should be 'nil', if sentence language not supported " do
52
52
  tags = NlpToolz.tag_sentence(@g_text)
53
- tags.should be_nil
53
+ expect(tags).to be_nil
54
54
  end
55
55
  end
56
56
 
@@ -58,12 +58,12 @@ describe NlpToolz do
58
58
  it "should parse a sentence" do
59
59
  sentence = NlpToolz.get_sentences(@sentence).last
60
60
  parsed = NlpToolz.parse_sentence(sentence)
61
- parsed.should be_a Hash
61
+ expect(parsed).to be_a Hash
62
62
  end
63
-
63
+
64
64
  it "should should be 'nil', if sentence language is not supported" do
65
65
  parsed = NlpToolz.parse_sentence(@g_text)
66
- parsed.should be_nil
66
+ expect(parsed).to be_nil
67
67
  end
68
68
  end
69
69
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp_toolz
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - LeFnord
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-06 00:00:00.000000000 Z
11
+ date: 2014-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -209,10 +209,10 @@ files:
209
209
  - Rakefile
210
210
  - bin/nlp_toolz
211
211
  - lib/nlp_toolz.rb
212
- - lib/nlp_toolz/helpers/lang.rb
213
212
  - lib/nlp_toolz/helpers/string_extended.rb
214
213
  - lib/nlp_toolz/helpers/tmp_file.rb
215
214
  - lib/nlp_toolz/helpers/url_handler.rb
215
+ - lib/nlp_toolz/language.rb
216
216
  - lib/nlp_toolz/load_jars.rb
217
217
  - lib/nlp_toolz/parser.rb
218
218
  - lib/nlp_toolz/pos_tags.rb
@@ -221,6 +221,7 @@ files:
221
221
  - lib/nlp_toolz/version.rb
222
222
  - nlp_toolz.gemspec
223
223
  - spec/helpers/string_extended_spec.rb
224
+ - spec/lib/nlp_toolz/language_spec.rb
224
225
  - spec/lib/nlp_toolz/parser_spec.rb
225
226
  - spec/lib/nlp_toolz/pos_tags_spec.rb
226
227
  - spec/lib/nlp_toolz/sentences_spec.rb
@@ -247,12 +248,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
247
248
  version: '0'
248
249
  requirements: []
249
250
  rubyforge_project:
250
- rubygems_version: 2.2.0
251
+ rubygems_version: 2.2.2
251
252
  signing_key:
252
253
  specification_version: 4
253
254
  summary: wrapper around the openNLP toolset
254
255
  test_files:
255
256
  - spec/helpers/string_extended_spec.rb
257
+ - spec/lib/nlp_toolz/language_spec.rb
256
258
  - spec/lib/nlp_toolz/parser_spec.rb
257
259
  - spec/lib/nlp_toolz/pos_tags_spec.rb
258
260
  - spec/lib/nlp_toolz/sentences_spec.rb
@@ -1,26 +0,0 @@
1
- module Lang
2
-
3
- include UrlHandler
4
-
5
- def get_language(text = nil)
6
- uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
7
-
8
- if @input
9
- asv_response = post_data(URI.escape(@input),uri,{'Content-type'=>'text/plain;charset=utf-8'})
10
- elsif text
11
- asv_response = post_data(URI.escape(text),uri,{'Content-type'=>'text/plain;charset=utf-8'})
12
- end
13
- response = MultiJson.load(asv_response.body)
14
-
15
- response["lang"]
16
- end
17
-
18
- # ToDo 2013-02-26: make different lang identifier available
19
- def alternative_langs lang
20
- langs = {
21
- en: [:eng, :english],
22
- de: [:ger, :german]
23
- }.each.collect{|x| x.flatten}
24
- end
25
-
26
- end