nlp_toolz 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 71916455cffe07c8464fb8cc1543d7b8a2ea7205
4
- data.tar.gz: bc30072b7d62770c3e202e0545137056fe5a6164
3
+ metadata.gz: de6bf2ca8c8112151251aa61670ba7af4b4e8d8e
4
+ data.tar.gz: 6ad9029140f59f5f738c57cf5f40481e0e15ed5e
5
5
  SHA512:
6
- metadata.gz: 997d3fc4fb5d9c18546e1ea4c5c8acd19e61ef6979ece0d27cff540cea99c2ecae094fba16a4c3aa25dc05f1fe9282498c228a898b68b4271e493027663e0ba3
7
- data.tar.gz: 42d5ea917f3febe6484a80ab085f0b41515540f841edc2de4b219d06456d7d331a750fb306095336918b4c82f4cd184d1dc6099cd4ff0fd51e2cb487adab9944
6
+ metadata.gz: dac67f653ee26c3ab3e9f304b609570a90387bffa9042ce57e19eedfa26581fc27555265601add717a4a0e3eee1907c24e9da59a47336657ad346ee58478f4bb
7
+ data.tar.gz: 740086a9ee014e4e5b99f9c3ab08981abdea1e2f0dcd3c2150042baa07773e65352ae8091b6ad5399fe1a9d42f646c7ea13c7678949a235f28a5a4fc84cfb608
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Gem Version](https://badge.fury.io/rb/nlp_toolz.png)](http://badge.fury.io/rb/nlp_toolz)
2
+
1
3
  # NlpToolz
2
4
 
3
5
  Basic NLP tools, mostly based on [OpenNLP](http://opennlp.apache.org), at this time `sentence finder`, `tokenizer` and `POS tagger` implemented, plus [Berkeley Parser](http://code.google.com/p/berkeleyparser/).
data/bin/nlp_toolz CHANGED
@@ -37,8 +37,15 @@ program_desc 'running basic NLP tasks'
37
37
 
38
38
  version NlpToolz::VERSION
39
39
 
40
+ desc 'initial setup'
41
+ command :init do |c|
42
+ c.action do |global_options,options,args|
43
+ puts 'setting up app'
44
+ end
45
+ end
46
+
40
47
  desc 'sentence detection'
41
- arg_name 'Describe arguments to sent here'
48
+ arg_name '"text to split into sentences"'
42
49
  command :sent do |c|
43
50
  c.desc 'file input'
44
51
  c.arg_name '<path/to/file>'
@@ -50,7 +57,7 @@ command :sent do |c|
50
57
  end
51
58
 
52
59
  desc 'parsing text'
53
- arg_name 'Describe arguments to parse here'
60
+ arg_name '"text to parse"'
54
61
  command :parse do |c|
55
62
  c.desc 'file input'
56
63
  c.arg_name '<path/to/file>'
@@ -62,7 +69,7 @@ command :parse do |c|
62
69
  end
63
70
 
64
71
  desc 'pos tagging of text'
65
- arg_name 'Describe arguments to tag here'
72
+ arg_name '"text to tag"'
66
73
  command :tag do |c|
67
74
  c.desc 'file input'
68
75
  c.arg_name '<path/to/file>'
@@ -74,7 +81,7 @@ command :tag do |c|
74
81
  end
75
82
 
76
83
  desc 'tokenizing text'
77
- arg_name 'Describe arguments to token here'
84
+ arg_name '"text to tokenize"'
78
85
  command :token do |c|
79
86
  c.desc 'file input'
80
87
  c.arg_name '<path/to/file>'
@@ -1,19 +1,9 @@
1
1
  module Lang
2
2
 
3
3
  include UrlHandler
4
- # get language of input
4
+
5
5
  def get_language(text = nil)
6
- environment = ENV['ENV_NAME'] || 'development'
7
- # ToDo 2013-03-14: respect environment
8
- case environment
9
- when 'development'
10
- # development -> local
11
- # uri = build_url("localhost", 9292, "/langid", nil)
12
- uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
13
- when 'production'
14
- # production
15
- uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
16
- end
6
+ uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
17
7
 
18
8
  if @input
19
9
  asv_response = post_data(URI.escape(@input),uri,{'Content-type'=>'text/plain;charset=utf-8'})
@@ -1,6 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
3
  class String
4
+
4
5
  # ToDo: check abbr against list of ..
5
6
  def clean_up
6
7
  foo = self.encode('UTF-8', :invalid => :replace, :undef => :replace)
@@ -16,5 +17,5 @@ class String
16
17
  def basename
17
18
  self.split("/").last
18
19
  end
19
- end
20
20
 
21
+ end
@@ -1,6 +1,7 @@
1
1
  require 'tempfile'
2
2
 
3
3
  module TmpFile
4
+
4
5
  module_function
5
6
 
6
7
  def make_tmp_file_from text = nil
@@ -2,10 +2,9 @@ require 'uri'
2
2
  require 'net/http'
3
3
 
4
4
  module UrlHandler
5
- module ClassMethods
6
- end
5
+
6
+ module_function
7
7
 
8
- # instance methods
9
8
  def build_url(host, port, path, query)
10
9
  return URI::HTTP.build({:host => host, :path => path, :query => query}) if port.nil?
11
10
  return URI::HTTP.build({:host => host, :port => port, :path => path, :query => query}) unless port.nil?
@@ -20,7 +19,4 @@ module UrlHandler
20
19
  uri_response
21
20
  end
22
21
 
23
- def self.included(receiver)
24
- receiver.extend ClassMethods
25
- end
26
22
  end
@@ -8,7 +8,7 @@ module NlpToolz
8
8
  File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
9
  ].join(":")
10
10
 
11
- Rjb::load(CLASS_PATH,['-Xmx4096m','-Djava.awt.headless=true'])
11
+ Rjb::load(CLASS_PATH,['-X+C','-Xmx4096m','-Djava.awt.headless=true'])
12
12
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseParallelGC','-XX:+UseParallelOldGC','-Djava.awt.headless=true'])
13
13
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseConcMarkSweepGC','-Djava.awt.headless=true'])
14
14
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseSerialGC','-Djava.awt.headless=true'])
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-12-10
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-12-10
5
5
 
6
6
  module NlpToolz
7
7
 
@@ -11,7 +11,7 @@ module NlpToolz
11
11
  include TmpFile
12
12
 
13
13
  # load java classes
14
- FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
15
 
16
16
  attr_reader :parsed
17
17
  attr_accessor :input, :lang, :model, :model_name, :parse_hash
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-24
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-24
5
5
 
6
6
  # ToDo 2012-10-24: add train capabilities
7
7
  module NlpToolz
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-23
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-23
5
5
 
6
6
  # ToDo 2012-10-24: add train capabilities
7
7
  module NlpToolz
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-11-30
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-11-30
5
5
 
6
6
  module NlpToolz
7
7
 
@@ -10,9 +10,9 @@ module NlpToolz
10
10
  include Lang
11
11
 
12
12
  # load java classes
13
- FileInputStream = Rjb::import('java.io.FileInputStream')
14
- TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
15
- TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
13
+ FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
15
+ TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
16
16
 
17
17
  attr_accessor :input, :lang, :model, :model_name, :tokens
18
18
 
@@ -1,8 +1,8 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-23
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-23
5
5
 
6
6
  module NlpToolz
7
- VERSION = "1.0.3"
7
+ VERSION = "1.0.4"
8
8
  end
data/lib/nlp_toolz.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-23
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-23
5
5
 
6
6
  # for java usage
7
7
  require "rjb"
metadata CHANGED
@@ -1,195 +1,195 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp_toolz
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - LeFnord
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-15 00:00:00.000000000 Z
11
+ date: 2013-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: guard
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rb-fsevent
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: guard-rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: guard-bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: terminal-notifier-guard
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - '>='
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: yard
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - '>='
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - '>='
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: syntax
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - '>='
115
+ - - ">="
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: awesome_print
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
- - - '>='
129
+ - - ">="
130
130
  - !ruby/object:Gem::Version
131
131
  version: '0'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - '>='
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: rjb
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
- - - '>='
143
+ - - ">="
144
144
  - !ruby/object:Gem::Version
145
145
  version: '0'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
- - - '>='
150
+ - - ">="
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: multi_json
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - '>='
157
+ - - ">="
158
158
  - !ruby/object:Gem::Version
159
159
  version: '0'
160
160
  type: :runtime
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - '>='
164
+ - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: gli
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
- - - '>='
171
+ - - ">="
172
172
  - !ruby/object:Gem::Version
173
173
  version: '0'
174
174
  type: :runtime
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
- - - '>='
178
+ - - ">="
179
179
  - !ruby/object:Gem::Version
180
180
  version: '0'
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: rake
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - '>='
185
+ - - ">="
186
186
  - !ruby/object:Gem::Version
187
187
  version: '0'
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
- - - '>='
192
+ - - ">="
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
195
  description: make NLP tools available, from OpenNLP and BerkeleyParser
@@ -200,8 +200,8 @@ executables:
200
200
  extensions: []
201
201
  extra_rdoc_files: []
202
202
  files:
203
- - .gitignore
204
- - .rspec
203
+ - ".gitignore"
204
+ - ".rspec"
205
205
  - Gemfile
206
206
  - Guardfile
207
207
  - LICENSE.txt
@@ -237,17 +237,17 @@ require_paths:
237
237
  - lib
238
238
  required_ruby_version: !ruby/object:Gem::Requirement
239
239
  requirements:
240
- - - '>='
240
+ - - ">="
241
241
  - !ruby/object:Gem::Version
242
242
  version: 2.0.0
243
243
  required_rubygems_version: !ruby/object:Gem::Requirement
244
244
  requirements:
245
- - - '>='
245
+ - - ">="
246
246
  - !ruby/object:Gem::Version
247
247
  version: '0'
248
248
  requirements: []
249
249
  rubyforge_project:
250
- rubygems_version: 2.0.3
250
+ rubygems_version: 2.0.14
251
251
  signing_key:
252
252
  specification_version: 4
253
253
  summary: wrapper around the openNLP toolset