nlp_toolz 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 71916455cffe07c8464fb8cc1543d7b8a2ea7205
4
- data.tar.gz: bc30072b7d62770c3e202e0545137056fe5a6164
3
+ metadata.gz: de6bf2ca8c8112151251aa61670ba7af4b4e8d8e
4
+ data.tar.gz: 6ad9029140f59f5f738c57cf5f40481e0e15ed5e
5
5
  SHA512:
6
- metadata.gz: 997d3fc4fb5d9c18546e1ea4c5c8acd19e61ef6979ece0d27cff540cea99c2ecae094fba16a4c3aa25dc05f1fe9282498c228a898b68b4271e493027663e0ba3
7
- data.tar.gz: 42d5ea917f3febe6484a80ab085f0b41515540f841edc2de4b219d06456d7d331a750fb306095336918b4c82f4cd184d1dc6099cd4ff0fd51e2cb487adab9944
6
+ metadata.gz: dac67f653ee26c3ab3e9f304b609570a90387bffa9042ce57e19eedfa26581fc27555265601add717a4a0e3eee1907c24e9da59a47336657ad346ee58478f4bb
7
+ data.tar.gz: 740086a9ee014e4e5b99f9c3ab08981abdea1e2f0dcd3c2150042baa07773e65352ae8091b6ad5399fe1a9d42f646c7ea13c7678949a235f28a5a4fc84cfb608
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Gem Version](https://badge.fury.io/rb/nlp_toolz.png)](http://badge.fury.io/rb/nlp_toolz)
2
+
1
3
  # NlpToolz
2
4
 
3
5
  Basic NLP tools, mostly based on [OpenNLP](http://opennlp.apache.org), at this time `sentence finder`, `tokenizer` and `POS tagger` implemented, plus [Berkeley Parser](http://code.google.com/p/berkeleyparser/).
data/bin/nlp_toolz CHANGED
@@ -37,8 +37,15 @@ program_desc 'running basic NLP tasks'
37
37
 
38
38
  version NlpToolz::VERSION
39
39
 
40
+ desc 'initial setup'
41
+ command :init do |c|
42
+ c.action do |global_options,options,args|
43
+ puts 'setting up app'
44
+ end
45
+ end
46
+
40
47
  desc 'sentence detection'
41
- arg_name 'Describe arguments to sent here'
48
+ arg_name '"text to split into sentences"'
42
49
  command :sent do |c|
43
50
  c.desc 'file input'
44
51
  c.arg_name '<path/to/file>'
@@ -50,7 +57,7 @@ command :sent do |c|
50
57
  end
51
58
 
52
59
  desc 'parsing text'
53
- arg_name 'Describe arguments to parse here'
60
+ arg_name '"text to parse"'
54
61
  command :parse do |c|
55
62
  c.desc 'file input'
56
63
  c.arg_name '<path/to/file>'
@@ -62,7 +69,7 @@ command :parse do |c|
62
69
  end
63
70
 
64
71
  desc 'pos tagging of text'
65
- arg_name 'Describe arguments to tag here'
72
+ arg_name '"text to tag"'
66
73
  command :tag do |c|
67
74
  c.desc 'file input'
68
75
  c.arg_name '<path/to/file>'
@@ -74,7 +81,7 @@ command :tag do |c|
74
81
  end
75
82
 
76
83
  desc 'tokenizing text'
77
- arg_name 'Describe arguments to token here'
84
+ arg_name '"text to tokenize"'
78
85
  command :token do |c|
79
86
  c.desc 'file input'
80
87
  c.arg_name '<path/to/file>'
@@ -1,19 +1,9 @@
1
1
  module Lang
2
2
 
3
3
  include UrlHandler
4
- # get language of input
4
+
5
5
  def get_language(text = nil)
6
- environment = ENV['ENV_NAME'] || 'development'
7
- # ToDo 2013-03-14: respect environment
8
- case environment
9
- when 'development'
10
- # development -> local
11
- # uri = build_url("localhost", 9292, "/langid", nil)
12
- uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
13
- when 'production'
14
- # production
15
- uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
16
- end
6
+ uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
17
7
 
18
8
  if @input
19
9
  asv_response = post_data(URI.escape(@input),uri,{'Content-type'=>'text/plain;charset=utf-8'})
@@ -1,6 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
3
  class String
4
+
4
5
  # ToDo: check abbr against list of ..
5
6
  def clean_up
6
7
  foo = self.encode('UTF-8', :invalid => :replace, :undef => :replace)
@@ -16,5 +17,5 @@ class String
16
17
  def basename
17
18
  self.split("/").last
18
19
  end
19
- end
20
20
 
21
+ end
@@ -1,6 +1,7 @@
1
1
  require 'tempfile'
2
2
 
3
3
  module TmpFile
4
+
4
5
  module_function
5
6
 
6
7
  def make_tmp_file_from text = nil
@@ -2,10 +2,9 @@ require 'uri'
2
2
  require 'net/http'
3
3
 
4
4
  module UrlHandler
5
- module ClassMethods
6
- end
5
+
6
+ module_function
7
7
 
8
- # instance methods
9
8
  def build_url(host, port, path, query)
10
9
  return URI::HTTP.build({:host => host, :path => path, :query => query}) if port.nil?
11
10
  return URI::HTTP.build({:host => host, :port => port, :path => path, :query => query}) unless port.nil?
@@ -20,7 +19,4 @@ module UrlHandler
20
19
  uri_response
21
20
  end
22
21
 
23
- def self.included(receiver)
24
- receiver.extend ClassMethods
25
- end
26
22
  end
@@ -8,7 +8,7 @@ module NlpToolz
8
8
  File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
9
  ].join(":")
10
10
 
11
- Rjb::load(CLASS_PATH,['-Xmx4096m','-Djava.awt.headless=true'])
11
+ Rjb::load(CLASS_PATH,['-X+C','-Xmx4096m','-Djava.awt.headless=true'])
12
12
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseParallelGC','-XX:+UseParallelOldGC','-Djava.awt.headless=true'])
13
13
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseConcMarkSweepGC','-Djava.awt.headless=true'])
14
14
  # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseSerialGC','-Djava.awt.headless=true'])
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-12-10
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-12-10
5
5
 
6
6
  module NlpToolz
7
7
 
@@ -11,7 +11,7 @@ module NlpToolz
11
11
  include TmpFile
12
12
 
13
13
  # load java classes
14
- FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
15
 
16
16
  attr_reader :parsed
17
17
  attr_accessor :input, :lang, :model, :model_name, :parse_hash
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-24
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-24
5
5
 
6
6
  # ToDo 2012-10-24: add train capabilities
7
7
  module NlpToolz
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-23
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-23
5
5
 
6
6
  # ToDo 2012-10-24: add train capabilities
7
7
  module NlpToolz
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-11-30
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-11-30
5
5
 
6
6
  module NlpToolz
7
7
 
@@ -10,9 +10,9 @@ module NlpToolz
10
10
  include Lang
11
11
 
12
12
  # load java classes
13
- FileInputStream = Rjb::import('java.io.FileInputStream')
14
- TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
15
- TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
13
+ FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
15
+ TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
16
16
 
17
17
  attr_accessor :input, :lang, :model, :model_name, :tokens
18
18
 
@@ -1,8 +1,8 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-23
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-23
5
5
 
6
6
  module NlpToolz
7
- VERSION = "1.0.3"
7
+ VERSION = "1.0.4"
8
8
  end
data/lib/nlp_toolz.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
- # @author: LeFnord
3
- # @email: pscholz.le@gmail.com
4
- # @date: 2012-10-23
2
+ # author: LeFnord
3
+ # email: pscholz.le@gmail.com
4
+ # date: 2012-10-23
5
5
 
6
6
  # for java usage
7
7
  require "rjb"
metadata CHANGED
@@ -1,195 +1,195 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp_toolz
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - LeFnord
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-15 00:00:00.000000000 Z
11
+ date: 2013-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: guard
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rb-fsevent
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: guard-rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: guard-bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: terminal-notifier-guard
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - '>='
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: yard
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - '>='
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - '>='
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: syntax
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - '>='
115
+ - - ">="
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: awesome_print
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
- - - '>='
129
+ - - ">="
130
130
  - !ruby/object:Gem::Version
131
131
  version: '0'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - '>='
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: rjb
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
- - - '>='
143
+ - - ">="
144
144
  - !ruby/object:Gem::Version
145
145
  version: '0'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
- - - '>='
150
+ - - ">="
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: multi_json
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - '>='
157
+ - - ">="
158
158
  - !ruby/object:Gem::Version
159
159
  version: '0'
160
160
  type: :runtime
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - '>='
164
+ - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: gli
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
- - - '>='
171
+ - - ">="
172
172
  - !ruby/object:Gem::Version
173
173
  version: '0'
174
174
  type: :runtime
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
- - - '>='
178
+ - - ">="
179
179
  - !ruby/object:Gem::Version
180
180
  version: '0'
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: rake
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - '>='
185
+ - - ">="
186
186
  - !ruby/object:Gem::Version
187
187
  version: '0'
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
- - - '>='
192
+ - - ">="
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
195
  description: make NLP tools available, from OpenNLP and BerkeleyParser
@@ -200,8 +200,8 @@ executables:
200
200
  extensions: []
201
201
  extra_rdoc_files: []
202
202
  files:
203
- - .gitignore
204
- - .rspec
203
+ - ".gitignore"
204
+ - ".rspec"
205
205
  - Gemfile
206
206
  - Guardfile
207
207
  - LICENSE.txt
@@ -237,17 +237,17 @@ require_paths:
237
237
  - lib
238
238
  required_ruby_version: !ruby/object:Gem::Requirement
239
239
  requirements:
240
- - - '>='
240
+ - - ">="
241
241
  - !ruby/object:Gem::Version
242
242
  version: 2.0.0
243
243
  required_rubygems_version: !ruby/object:Gem::Requirement
244
244
  requirements:
245
- - - '>='
245
+ - - ">="
246
246
  - !ruby/object:Gem::Version
247
247
  version: '0'
248
248
  requirements: []
249
249
  rubyforge_project:
250
- rubygems_version: 2.0.3
250
+ rubygems_version: 2.0.14
251
251
  signing_key:
252
252
  specification_version: 4
253
253
  summary: wrapper around the openNLP toolset