nlp_toolz 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ce5f4cad49039b0d8cb6d626facc67a4efa32ae4
4
- data.tar.gz: 0565742385f0a34aabe4e456cde014ba2673a589
3
+ metadata.gz: 2ce637d8ddb8a8ad0b62c3998d016d3552b0cf58
4
+ data.tar.gz: be30bf6df57f309a050ebd0dffa6b6e5bcf85d48
5
5
  SHA512:
6
- metadata.gz: 1ec11ec4b9b07437fb16f9ab0c181c9cee40a0cc900f90d02d2a6e4fc3bac7efaae890e8eda16bf7dcf8e3595bcb4010cf9d3893bee2a7a937b0fd527c40356f
7
- data.tar.gz: 06d53b1bfe11004d0abeba1db130a13f664a054e8ed56f5edb260ec3f8bf189b0f9cb64687a471d2241ffd0d612ae632b50853737282a8d1901ea0645be4426a
6
+ metadata.gz: 453750f9759a44a70939feca45f9bf85d1ec50cf44c9794da66614dcbf2e10f6f97f7e0b3d2709489547c409a92f67dc2bbaf88b4f494b0f459d1d69269531bb
7
+ data.tar.gz: ad7179da66ff954010aca8245180cedf688d49531d6963cfaae92bfa944bc4e240fef3cb41cee471757ab893d2dd1847d2ca6fa0a66c33e76541e10513c2845a
data/.gitignore CHANGED
@@ -26,3 +26,5 @@ teste.rb
26
26
  test-data/
27
27
  jars/*
28
28
  models/*
29
+ jars.zip
30
+ models.zip
data/README.md CHANGED
@@ -18,7 +18,9 @@ Or install it yourself as:
18
18
 
19
19
  $ gem install nlp_toolz
20
20
 
21
- Download jars and model files from [Dropbox](https://www.dropbox.com/sh/1layyjgf5h0wwi3/s2SHAnfVhs) and unzip it in gem folder.
21
+ ~~Download jars and model files from [Dropbox](https://www.dropbox.com/sh/1layyjgf5h0wwi3/s2SHAnfVhs) and unzip it in gem folder.~~
22
+
23
+ Simplyfied installing `models` and `jars`, now run `nlp_toolz init` from command line.
22
24
 
23
25
  ## Usage
24
26
 
data/Rakefile CHANGED
@@ -1,9 +1,11 @@
1
1
  #!/usr/bin/env rake
2
2
  require "bundler/gem_tasks"
3
- require "awesome_print"
4
3
  require 'rspec/core'
5
4
  require 'rspec/core/rake_task'
5
+ require "nlp_toolz"
6
+
6
7
  RSpec::Core::RakeTask.new(:spec) do |spec|
8
+ NlpToolz.check_dependencies
7
9
  spec.pattern = FileList['spec/**/*_spec.rb']
8
10
  end
9
11
 
@@ -14,8 +14,7 @@ include GLI::App
14
14
 
15
15
  # helper methods
16
16
  def get_out(this)
17
- ap this if $stdout.tty?
18
- $stdout.puts this unless $stdout.tty?
17
+ $stdout.puts this
19
18
  end
20
19
 
21
20
  def get_in(input_arg)
@@ -32,6 +31,23 @@ def get_file(name)
32
31
  file.force_encoding("utf-8") unless file.nil?
33
32
  end
34
33
 
34
+ def unzip_file (file, destination)
35
+ Zip::ZipFile.open(file) { |zip_file|
36
+ zip_file.each { |f|
37
+ f_path=File.join(destination, f.name)
38
+ FileUtils.mkdir_p(File.dirname(f_path))
39
+ zip_file.extract(f, f_path) unless File.exist?(f_path)
40
+ }
41
+ }
42
+ end
43
+
44
+ def add_path_in_lanikernel
45
+ file = IO.read(File.join(NlpToolz::HOME,'models','language','lanikernel.ini'))
46
+ file.sub!('WordlistDir=models',"WordlistDir=#{NlpToolz::HOME}/models")
47
+ file.sub!('BlacklistFile=models',"BlacklistFile=#{NlpToolz::HOME}/models")
48
+ file.sub!('MappingFile=models',"MappingFile=#{NlpToolz::HOME}/models")
49
+ File.open(File.join(NlpToolz::HOME,'models','language','lanikernel.ini'), 'w') {|f| f.write(file) }
50
+ end
35
51
 
36
52
  program_desc 'running basic NLP tasks'
37
53
 
@@ -40,7 +56,34 @@ version NlpToolz::VERSION
40
56
  desc 'initial setup'
41
57
  command :init do |c|
42
58
  c.action do |global_options,options,args|
43
- puts 'setting up app'
59
+ unless Dir.exist?(File.join(NlpToolz::HOME,'models')) && Dir.exist?(File.join(NlpToolz::HOME,'jars'))
60
+ gem_home = NlpToolz::HOME
61
+ $stdout.print "download and unzip to: ".green
62
+ $stdout.puts "#{gem_home}".blue
63
+ [
64
+ 'https://dl.dropboxusercontent.com/sh/1layyjgf5h0wwi3/AACw8Y04KnFotOpBkzcfLxmwa/jars.zip',
65
+ 'https://dl.dropboxusercontent.com/sh/1layyjgf5h0wwi3/AADUSMRMVg3n54Djdy9BWYVEa/models.zip',
66
+ ].each do |link|
67
+ loaded_file = link.split('/').last.sub('?dl=0','')
68
+ $stdout.print "download: ".green
69
+ $stdout.puts "#{loaded_file}".blue
70
+
71
+ Schiphol.download(
72
+ link,
73
+ # Default values
74
+ :download_folder => "#{gem_home}",
75
+ :show_progress => true,
76
+ :max_tries => 3
77
+ )
78
+ $stdout.puts "extracting …".green
79
+ unzip_file(File.join(gem_home,loaded_file),gem_home)
80
+ FileUtils.rm(File.join(gem_home,loaded_file))
81
+ end
82
+ $stdout.puts "add abbsolute path to language config".green
83
+ add_path_in_lanikernel
84
+ else
85
+ $stdout.puts "files exist".green
86
+ end
44
87
  end
45
88
  end
46
89
 
@@ -51,6 +94,7 @@ command :sent do |c|
51
94
  c.arg_name '<path/to/file>'
52
95
  c.flag [:f,:file]
53
96
  c.action do |global_options,options,args|
97
+ NlpToolz.check_dependencies
54
98
  input = get_in(options[:f] || args.first)
55
99
  get_out NlpToolz.get_sentences(input)
56
100
  end
@@ -63,6 +107,7 @@ command :parse do |c|
63
107
  c.arg_name '<path/to/file>'
64
108
  c.flag [:f,:file]
65
109
  c.action do |global_options,options,args|
110
+ NlpToolz.check_dependencies
66
111
  input = get_in(options[:f] || args.first)
67
112
  get_out NlpToolz.parse_text(input)
68
113
  end
@@ -75,6 +120,7 @@ command :tag do |c|
75
120
  c.arg_name '<path/to/file>'
76
121
  c.flag [:f,:file]
77
122
  c.action do |global_options,options,args|
123
+ NlpToolz.check_dependencies
78
124
  input = get_in(options[:f] || args.first)
79
125
  get_out NlpToolz.tag_text(input)
80
126
  end
@@ -87,6 +133,7 @@ command :token do |c|
87
133
  c.arg_name '<path/to/file>'
88
134
  c.flag [:f,:file]
89
135
  c.action do |global_options,options,args|
136
+ NlpToolz.check_dependencies
90
137
  input = get_in(options[:f] || args.first)
91
138
  get_out NlpToolz.tokenize_text(input)
92
139
  end
@@ -9,6 +9,9 @@ require "rjb"
9
9
  # external requirements
10
10
  require "awesome_print"
11
11
  require "multi_json"
12
+ # for downloading models and jars
13
+ require "schiphol"
14
+ require "zip/zip"
12
15
 
13
16
  # internal requirements
14
17
  require "nlp_toolz/version"
@@ -17,6 +20,7 @@ require "nlp_toolz/helpers/string_extended"
17
20
  require "nlp_toolz/helpers/tmp_file"
18
21
 
19
22
  # NLP Tools
23
+ require "nlp_toolz/home"
20
24
  require "nlp_toolz/load_jars"
21
25
  require "nlp_toolz/language"
22
26
  require "nlp_toolz/sentences"
@@ -24,29 +28,41 @@ require "nlp_toolz/pos_tags"
24
28
  require "nlp_toolz/tokens"
25
29
  require "nlp_toolz/parser"
26
30
 
31
+
27
32
  module NlpToolz
33
+
34
+
28
35
  module_function
29
36
 
37
+ def check_dependencies
38
+ unless Dir.exist?(File.join(NlpToolz::HOME,'models')) && Dir.exist?(File.join(NlpToolz::HOME,'jars'))
39
+ $stdout.puts "\n--> models and jars not installed,"
40
+ $stdout.puts " install it by running:"
41
+ $stdout.puts "--> $ nlp_toolz init\n".green
42
+ exit
43
+ end
44
+ end
45
+
30
46
  def get_lang(input)
31
47
  NlpToolz::Language.get_language(input)
32
48
  end
33
-
49
+
34
50
  def get_sentences(input,lang = nil)
35
51
  text = NlpToolz::Sentences.new(input,lang)
36
52
  text.split_into_sentences if text.has_model?
37
53
  end
38
-
54
+
39
55
  def tokenize_sentence(input,lang = nil)
40
56
  sentence = NlpToolz::Tokens.new(input,lang)
41
57
  sentence.tokenize
42
58
  end
43
-
59
+
44
60
  def tokenize_text(input,lang = nil)
45
61
  tokenized_text = []
46
62
  get_sentences(input,lang).each do |sentence|
47
63
  tokenized_text << tokenize_sentence(sentence,lang)
48
64
  end
49
-
65
+
50
66
  tokenized_text
51
67
  end
52
68
 
@@ -54,29 +70,30 @@ module NlpToolz
54
70
  sentence = NlpToolz::PosTags.new(input,lang)
55
71
  sentence.get_pos_tags if sentence.has_model?
56
72
  end
57
-
73
+
58
74
  def tag_text(input,lang = nil)
59
75
  tagged_text = []
60
76
  get_sentences(input,lang).each do |sentence|
61
77
  tagged_text << tag_sentence(sentence,lang)
62
78
  end
63
-
79
+
64
80
  tagged_text
65
81
  end
66
-
82
+
67
83
  def parse_sentence(input,lang = nil)
68
84
  text = NlpToolz::Parser.new(input,lang)
69
85
  text.parse_text
70
-
86
+
71
87
  text.parse_hash
72
88
  end
73
-
89
+
74
90
  def parse_text(input,lang = nil)
75
91
  parsed_text = []
76
92
  get_sentences(input,lang).each do |sentence|
77
93
  parsed_text << parse_sentence(sentence,lang)
78
94
  end
79
-
95
+
80
96
  parsed_text
81
97
  end
98
+
82
99
  end
@@ -0,0 +1,3 @@
1
+ module NlpToolz
2
+ HOME = Gem::Specification.find_by_name("nlp_toolz").gem_dir
3
+ end
@@ -7,17 +7,16 @@ module NlpToolz
7
7
 
8
8
  class Language
9
9
 
10
- # load java classes
11
- # Enumeration = Rjb::import("java.util.Enumeration")
12
- HashSet = Rjb::import("java.util.HashSet")
13
- # Hashtable = Rjb::import("java.util.Hashtable")
14
- # Set = Rjb::import("java.util.Set")
10
+ if Dir.exist?(File.join(NlpToolz::HOME,'models')) && Dir.exist?(File.join(NlpToolz::HOME,'jars'))
11
+ # load java classes
12
+ HashSet = Rjb::import("java.util.HashSet")
15
13
 
16
- DataSourceException = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.DataSourceException")
17
- LanIKernel = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.LanIKernel")
18
- Request = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.Request")
19
- RequestException = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.RequestException")
20
- Response = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.Response")
14
+ DataSourceException = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.DataSourceException")
15
+ LanIKernel = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.LanIKernel")
16
+ Request = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.Request")
17
+ RequestException = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.RequestException")
18
+ Response = Rjb::import("de.uni_leipzig.asv.toolbox.jLanI.kernel.Response")
19
+ end
21
20
 
22
21
  def self.get_language(text = nil)
23
22
  return -1 if text.nil? || text.empty?
@@ -33,7 +32,7 @@ module NlpToolz
33
32
 
34
33
  req = Request.new(text, languages, modus, reduce)
35
34
 
36
- LanIKernel.propertyFile = File.join(MODELS, 'language', 'lanikernel')
35
+ LanIKernel.propertyFile = File.join(MODELS,'language','lanikernel')
37
36
  kernel = LanIKernel.getInstance()
38
37
  res = kernel.evaluate(req)
39
38
 
@@ -1,12 +1,7 @@
1
1
  module NlpToolz
2
- MODELS = File.join(File.dirname(__FILE__), '..', '..', "models")
3
- JARS = File.join(File.dirname(__FILE__), '..', '..', "jars")
4
-
5
- # CLASS_PATH = [
6
- # File.join(JARS, "jwnl-1.3.3.jar"),
7
- # File.join(JARS, "opennlp-tools-1.5.3.jar"),
8
- # File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
- # ].join(":")
2
+ CONFIG = File.join(File.dirname(__FILE__), '..', '..', 'config')
3
+ MODELS = File.join(File.dirname(__FILE__), '..', '..', 'models')
4
+ JARS = File.join(File.dirname(__FILE__), '..', '..', 'jars')
10
5
 
11
6
  CLASS_PATH = Dir.glob(File.join(JARS,'*.jar')).join(':')
12
7
 
@@ -8,10 +8,12 @@ module NlpToolz
8
8
 
9
9
  class PosTags
10
10
 
11
- # load java classes
12
- FileInputStream = Rjb::import('java.io.FileInputStream')
13
- POSModel = Rjb::import('opennlp.tools.postag.POSModel')
14
- POSTaggerME = Rjb::import('opennlp.tools.postag.POSTaggerME')
11
+ if Dir.exist?(File.join(NlpToolz::HOME,'models')) && Dir.exist?(File.join(NlpToolz::HOME,'jars'))
12
+ # load java classes
13
+ FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ POSModel = Rjb::import('opennlp.tools.postag.POSModel')
15
+ POSTaggerME = Rjb::import('opennlp.tools.postag.POSTaggerME')
16
+ end
15
17
 
16
18
  attr_accessor :input, :lang, :model, :model_name, :tokenized
17
19
 
@@ -8,10 +8,12 @@ module NlpToolz
8
8
 
9
9
  class Sentences
10
10
 
11
+ if Dir.exist?(File.join(NlpToolz::HOME,'models')) && Dir.exist?(File.join(NlpToolz::HOME,'jars'))
11
12
  # load java classes
12
- FileInputStream = Rjb::import('java.io.FileInputStream')
13
- SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
14
- SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
13
+ FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
15
+ SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
16
+ end
15
17
 
16
18
  attr_accessor :input, :lang, :model, :model_name, :sentences
17
19
 
@@ -7,10 +7,12 @@ module NlpToolz
7
7
 
8
8
  class Tokens
9
9
 
10
- # load java classes
11
- FileInputStream = Rjb::import('java.io.FileInputStream')
12
- TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
13
- TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
10
+ if Dir.exist?(File.join(NlpToolz::HOME,'models')) && Dir.exist?(File.join(NlpToolz::HOME,'jars'))
11
+ # load java classes
12
+ FileInputStream = Rjb::import('java.io.FileInputStream')
13
+ TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
14
+ TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
15
+ end
14
16
 
15
17
  attr_accessor :input, :lang, :model, :model_name, :tokens
16
18
 
@@ -4,5 +4,5 @@
4
4
  # date: 2012-10-23
5
5
 
6
6
  module NlpToolz
7
- VERSION = "1.1.0"
7
+ VERSION = "1.1.1"
8
8
  end
@@ -39,4 +39,8 @@ Gem::Specification.new do |gem|
39
39
  gem.add_runtime_dependency "multi_json"
40
40
  gem.add_runtime_dependency "gli"
41
41
  gem.add_runtime_dependency "rake"
42
+
43
+ #for downloading models and jars
44
+ gem.add_runtime_dependency "schiphol"
45
+ gem.add_runtime_dependency "rubyzip"
42
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp_toolz
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - LeFnord
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-12 00:00:00.000000000 Z
11
+ date: 2014-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -192,6 +192,34 @@ dependencies:
192
192
  - - ">="
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: schiphol
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: rubyzip
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
195
223
  description: make NLP tools available, from OpenNLP and BerkeleyParser
196
224
  email:
197
225
  - pscholz.le@gmail.com
@@ -212,6 +240,7 @@ files:
212
240
  - lib/nlp_toolz/helpers/string_extended.rb
213
241
  - lib/nlp_toolz/helpers/tmp_file.rb
214
242
  - lib/nlp_toolz/helpers/url_handler.rb
243
+ - lib/nlp_toolz/home.rb
215
244
  - lib/nlp_toolz/language.rb
216
245
  - lib/nlp_toolz/load_jars.rb
217
246
  - lib/nlp_toolz/parser.rb