nlp_toolz 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/bin/nlp_toolz +11 -4
- data/lib/nlp_toolz/helpers/lang.rb +2 -12
- data/lib/nlp_toolz/helpers/string_extended.rb +2 -1
- data/lib/nlp_toolz/helpers/tmp_file.rb +1 -0
- data/lib/nlp_toolz/helpers/url_handler.rb +2 -6
- data/lib/nlp_toolz/load_jars.rb +1 -1
- data/lib/nlp_toolz/parser.rb +4 -4
- data/lib/nlp_toolz/pos_tags.rb +3 -3
- data/lib/nlp_toolz/sentences.rb +3 -3
- data/lib/nlp_toolz/tokens.rb +6 -6
- data/lib/nlp_toolz/version.rb +4 -4
- data/lib/nlp_toolz.rb +3 -3
- metadata +33 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de6bf2ca8c8112151251aa61670ba7af4b4e8d8e
|
4
|
+
data.tar.gz: 6ad9029140f59f5f738c57cf5f40481e0e15ed5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dac67f653ee26c3ab3e9f304b609570a90387bffa9042ce57e19eedfa26581fc27555265601add717a4a0e3eee1907c24e9da59a47336657ad346ee58478f4bb
|
7
|
+
data.tar.gz: 740086a9ee014e4e5b99f9c3ab08981abdea1e2f0dcd3c2150042baa07773e65352ae8091b6ad5399fe1a9d42f646c7ea13c7678949a235f28a5a4fc84cfb608
|
data/README.md
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
[](http://badge.fury.io/rb/nlp_toolz)
|
2
|
+
|
1
3
|
# NlpToolz
|
2
4
|
|
3
5
|
Basic NLP tools, mostly based on [OpenNLP](http://opennlp.apache.org), at this time `sentence finder`, `tokenizer` and `POS tagger` implemented, plus [Berkeley Parser](http://code.google.com/p/berkeleyparser/).
|
data/bin/nlp_toolz
CHANGED
@@ -37,8 +37,15 @@ program_desc 'running basic NLP tasks'
|
|
37
37
|
|
38
38
|
version NlpToolz::VERSION
|
39
39
|
|
40
|
+
desc 'initial setup'
|
41
|
+
command :init do |c|
|
42
|
+
c.action do |global_options,options,args|
|
43
|
+
puts 'setting up app'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
40
47
|
desc 'sentence detection'
|
41
|
-
arg_name '
|
48
|
+
arg_name '"text to split into sentences"'
|
42
49
|
command :sent do |c|
|
43
50
|
c.desc 'file input'
|
44
51
|
c.arg_name '<path/to/file>'
|
@@ -50,7 +57,7 @@ command :sent do |c|
|
|
50
57
|
end
|
51
58
|
|
52
59
|
desc 'parsing text'
|
53
|
-
arg_name '
|
60
|
+
arg_name '"text to parse"'
|
54
61
|
command :parse do |c|
|
55
62
|
c.desc 'file input'
|
56
63
|
c.arg_name '<path/to/file>'
|
@@ -62,7 +69,7 @@ command :parse do |c|
|
|
62
69
|
end
|
63
70
|
|
64
71
|
desc 'pos tagging of text'
|
65
|
-
arg_name '
|
72
|
+
arg_name '"text to tag"'
|
66
73
|
command :tag do |c|
|
67
74
|
c.desc 'file input'
|
68
75
|
c.arg_name '<path/to/file>'
|
@@ -74,7 +81,7 @@ command :tag do |c|
|
|
74
81
|
end
|
75
82
|
|
76
83
|
desc 'tokenizing text'
|
77
|
-
arg_name '
|
84
|
+
arg_name '"text to tokenize"'
|
78
85
|
command :token do |c|
|
79
86
|
c.desc 'file input'
|
80
87
|
c.arg_name '<path/to/file>'
|
@@ -1,19 +1,9 @@
|
|
1
1
|
module Lang
|
2
2
|
|
3
3
|
include UrlHandler
|
4
|
-
|
4
|
+
|
5
5
|
def get_language(text = nil)
|
6
|
-
|
7
|
-
# ToDo 2013-03-14: respect environment
|
8
|
-
case environment
|
9
|
-
when 'development'
|
10
|
-
# development -> local
|
11
|
-
# uri = build_url("localhost", 9292, "/langid", nil)
|
12
|
-
uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
|
13
|
-
when 'production'
|
14
|
-
# production
|
15
|
-
uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
|
16
|
-
end
|
6
|
+
uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
|
17
7
|
|
18
8
|
if @input
|
19
9
|
asv_response = post_data(URI.escape(@input),uri,{'Content-type'=>'text/plain;charset=utf-8'})
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
|
3
3
|
class String
|
4
|
+
|
4
5
|
# ToDo: check abbr against list of ..
|
5
6
|
def clean_up
|
6
7
|
foo = self.encode('UTF-8', :invalid => :replace, :undef => :replace)
|
@@ -16,5 +17,5 @@ class String
|
|
16
17
|
def basename
|
17
18
|
self.split("/").last
|
18
19
|
end
|
19
|
-
end
|
20
20
|
|
21
|
+
end
|
@@ -2,10 +2,9 @@ require 'uri'
|
|
2
2
|
require 'net/http'
|
3
3
|
|
4
4
|
module UrlHandler
|
5
|
-
|
6
|
-
|
5
|
+
|
6
|
+
module_function
|
7
7
|
|
8
|
-
# instance methods
|
9
8
|
def build_url(host, port, path, query)
|
10
9
|
return URI::HTTP.build({:host => host, :path => path, :query => query}) if port.nil?
|
11
10
|
return URI::HTTP.build({:host => host, :port => port, :path => path, :query => query}) unless port.nil?
|
@@ -20,7 +19,4 @@ module UrlHandler
|
|
20
19
|
uri_response
|
21
20
|
end
|
22
21
|
|
23
|
-
def self.included(receiver)
|
24
|
-
receiver.extend ClassMethods
|
25
|
-
end
|
26
22
|
end
|
data/lib/nlp_toolz/load_jars.rb
CHANGED
@@ -8,7 +8,7 @@ module NlpToolz
|
|
8
8
|
File.join(JARS, "opennlp-maxent-3.0.3.jar")
|
9
9
|
].join(":")
|
10
10
|
|
11
|
-
Rjb::load(CLASS_PATH,['-Xmx4096m','-Djava.awt.headless=true'])
|
11
|
+
Rjb::load(CLASS_PATH,['-X+C','-Xmx4096m','-Djava.awt.headless=true'])
|
12
12
|
# Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseParallelGC','-XX:+UseParallelOldGC','-Djava.awt.headless=true'])
|
13
13
|
# Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseConcMarkSweepGC','-Djava.awt.headless=true'])
|
14
14
|
# Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseSerialGC','-Djava.awt.headless=true'])
|
data/lib/nlp_toolz/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
2
|
+
# author: LeFnord
|
3
|
+
# email: pscholz.le@gmail.com
|
4
|
+
# date: 2012-12-10
|
5
5
|
|
6
6
|
module NlpToolz
|
7
7
|
|
@@ -11,7 +11,7 @@ module NlpToolz
|
|
11
11
|
include TmpFile
|
12
12
|
|
13
13
|
# load java classes
|
14
|
-
FileInputStream
|
14
|
+
FileInputStream = Rjb::import('java.io.FileInputStream')
|
15
15
|
|
16
16
|
attr_reader :parsed
|
17
17
|
attr_accessor :input, :lang, :model, :model_name, :parse_hash
|
data/lib/nlp_toolz/pos_tags.rb
CHANGED
data/lib/nlp_toolz/sentences.rb
CHANGED
data/lib/nlp_toolz/tokens.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
2
|
+
# author: LeFnord
|
3
|
+
# email: pscholz.le@gmail.com
|
4
|
+
# date: 2012-11-30
|
5
5
|
|
6
6
|
module NlpToolz
|
7
7
|
|
@@ -10,9 +10,9 @@ module NlpToolz
|
|
10
10
|
include Lang
|
11
11
|
|
12
12
|
# load java classes
|
13
|
-
FileInputStream
|
14
|
-
TokenizerModel
|
15
|
-
TokenizerME
|
13
|
+
FileInputStream = Rjb::import('java.io.FileInputStream')
|
14
|
+
TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
|
15
|
+
TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
|
16
16
|
|
17
17
|
attr_accessor :input, :lang, :model, :model_name, :tokens
|
18
18
|
|
data/lib/nlp_toolz/version.rb
CHANGED
data/lib/nlp_toolz.rb
CHANGED
metadata
CHANGED
@@ -1,195 +1,195 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nlp_toolz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- LeFnord
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: guard
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rb-fsevent
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: guard-rspec
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: guard-bundler
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: terminal-notifier-guard
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: yard
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: syntax
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: awesome_print
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
131
|
version: '0'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rjb
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
142
142
|
requirements:
|
143
|
-
- -
|
143
|
+
- - ">="
|
144
144
|
- !ruby/object:Gem::Version
|
145
145
|
version: '0'
|
146
146
|
type: :runtime
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
|
-
- -
|
150
|
+
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
154
|
name: multi_json
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- -
|
157
|
+
- - ">="
|
158
158
|
- !ruby/object:Gem::Version
|
159
159
|
version: '0'
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- -
|
164
|
+
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: gli
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
170
170
|
requirements:
|
171
|
-
- -
|
171
|
+
- - ">="
|
172
172
|
- !ruby/object:Gem::Version
|
173
173
|
version: '0'
|
174
174
|
type: :runtime
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
|
-
- -
|
178
|
+
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
182
|
name: rake
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
|
-
- -
|
185
|
+
- - ">="
|
186
186
|
- !ruby/object:Gem::Version
|
187
187
|
version: '0'
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
|
-
- -
|
192
|
+
- - ">="
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0'
|
195
195
|
description: make NLP tools available, from OpenNLP and BerkeleyParser
|
@@ -200,8 +200,8 @@ executables:
|
|
200
200
|
extensions: []
|
201
201
|
extra_rdoc_files: []
|
202
202
|
files:
|
203
|
-
- .gitignore
|
204
|
-
- .rspec
|
203
|
+
- ".gitignore"
|
204
|
+
- ".rspec"
|
205
205
|
- Gemfile
|
206
206
|
- Guardfile
|
207
207
|
- LICENSE.txt
|
@@ -237,17 +237,17 @@ require_paths:
|
|
237
237
|
- lib
|
238
238
|
required_ruby_version: !ruby/object:Gem::Requirement
|
239
239
|
requirements:
|
240
|
-
- -
|
240
|
+
- - ">="
|
241
241
|
- !ruby/object:Gem::Version
|
242
242
|
version: 2.0.0
|
243
243
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
244
244
|
requirements:
|
245
|
-
- -
|
245
|
+
- - ">="
|
246
246
|
- !ruby/object:Gem::Version
|
247
247
|
version: '0'
|
248
248
|
requirements: []
|
249
249
|
rubyforge_project:
|
250
|
-
rubygems_version: 2.0.
|
250
|
+
rubygems_version: 2.0.14
|
251
251
|
signing_key:
|
252
252
|
specification_version: 4
|
253
253
|
summary: wrapper around the openNLP toolset
|