opener-language-identifier 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +138 -0
- data/bin/language-identifier +6 -0
- data/bin/language-identifier-daemon +10 -0
- data/bin/language-identifier-server +8 -0
- data/config.ru +4 -0
- data/core/target/LanguageDetection-0.0.1.jar +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/Command.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/Detector.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/GenProfile.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/LangDetectException.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/Language.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/Messages.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/NGram.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/TagExtractor.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/messages.properties +128 -0
- data/core/target/classes/org/vicomtech/opennlp/LanguageDetection/CybozuDetector.class +0 -0
- data/core/target/classes/org/vicomtech/opennlp/LanguageDetection/Main.class +0 -0
- data/exec/language-identifier.rb +9 -0
- data/lib/opener/language_identifier.rb +89 -0
- data/lib/opener/language_identifier/cli.rb +139 -0
- data/lib/opener/language_identifier/detector.rb +36 -0
- data/lib/opener/language_identifier/kaf_builder.rb +62 -0
- data/lib/opener/language_identifier/public/markdown.css +283 -0
- data/lib/opener/language_identifier/server.rb +32 -0
- data/lib/opener/language_identifier/version.rb +5 -0
- data/lib/opener/language_identifier/views/index.erb +110 -0
- data/lib/opener/language_identifier/views/result.erb +15 -0
- data/opener-language-identifier.gemspec +37 -0
- metadata +231 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'opener/webservice'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class LanguageIdentifier
|
7
|
+
##
|
8
|
+
# A basic language identification server powered by Sinatra.
|
9
|
+
#
|
10
|
+
class Server < Webservice
|
11
|
+
set :views, File.expand_path('../views', __FILE__)
|
12
|
+
text_processor LanguageIdentifier
|
13
|
+
accepted_params :input, :kaf
|
14
|
+
|
15
|
+
##
|
16
|
+
# Gets the Analyzed output of an input.
|
17
|
+
#
|
18
|
+
# @param [Hash] options The options for the text_processor
|
19
|
+
# @return [String] output the output of the text_processor
|
20
|
+
# @return [Symbol] type the output type ot the text_processor
|
21
|
+
#
|
22
|
+
# @raise RunetimeError Raised when the tagging process failed.
|
23
|
+
#
|
24
|
+
def analyze(options)
|
25
|
+
processor = text_processor.new(options)
|
26
|
+
output = processor.run(options[:input])
|
27
|
+
|
28
|
+
return output
|
29
|
+
end
|
30
|
+
end # Server
|
31
|
+
end # LanguageIdentifier
|
32
|
+
end # Opener
|
@@ -0,0 +1,110 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Language Detector Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Language Detection Web Service</h1>
|
9
|
+
|
10
|
+
<h2>Example Usage</h2>
|
11
|
+
|
12
|
+
<p>
|
13
|
+
<pre>language-identifier-server start</pre>
|
14
|
+
<pre>curl -d "input=this is an english text&kaf=true" http://localhost:9393 -XPOST</pre>
|
15
|
+
|
16
|
+
outputs:
|
17
|
+
|
18
|
+
<pre><?xml version="1.0" encoding="UTF-8" standalone="yes"?><KAF xml:lang="en"><raw>this is an english text</raw></KAF></pre>
|
19
|
+
</p>
|
20
|
+
|
21
|
+
<h2>Try the webservice</h2>
|
22
|
+
|
23
|
+
<p>* required</p>
|
24
|
+
<p>** When entering a value no response will be displayed in the browser.</p>
|
25
|
+
|
26
|
+
<form action="<%=url("/")%>" method="POST">
|
27
|
+
<div>
|
28
|
+
<label for="input"/>Type your text here*</label>
|
29
|
+
<br/>
|
30
|
+
|
31
|
+
<textarea name="input" id="text" rows="10" cols="50"/></textarea>
|
32
|
+
</div>
|
33
|
+
<div>
|
34
|
+
<label for="kaf">
|
35
|
+
<input type="checkbox" name="kaf" id="kaf"/>
|
36
|
+
Output KAF instead of just the language code
|
37
|
+
</label>
|
38
|
+
<br>
|
39
|
+
<br>
|
40
|
+
</div>
|
41
|
+
<% 10.times do |t| %>
|
42
|
+
<div>
|
43
|
+
<label for="callbacks">Callback URL <%=t+1%>(**)</label>
|
44
|
+
<br />
|
45
|
+
|
46
|
+
<input id="callbacks" type="text" name="callbacks[]" />
|
47
|
+
</div>
|
48
|
+
<% end %>
|
49
|
+
|
50
|
+
|
51
|
+
<div>
|
52
|
+
<label for="error_callback">Error Callback</label>
|
53
|
+
<br />
|
54
|
+
|
55
|
+
<input id="error_callback" type="text" name="error_callback" />
|
56
|
+
</div>
|
57
|
+
|
58
|
+
<input type="submit" value="Submit" />
|
59
|
+
</form>
|
60
|
+
|
61
|
+
<h2>Actions</h2>
|
62
|
+
|
63
|
+
<p>
|
64
|
+
<dl>
|
65
|
+
<dt>POST /</dt>
|
66
|
+
<dd>Detect the language on the input argument. See arguments listing for more options.</dd>
|
67
|
+
<dt>GET /</dt>
|
68
|
+
<dd>Show this page</dd>
|
69
|
+
</dl>
|
70
|
+
</p>
|
71
|
+
|
72
|
+
<h2>Arguments</h2>
|
73
|
+
|
74
|
+
<p> The webservice takes the following arguments: </p>
|
75
|
+
<p>* required</p>
|
76
|
+
|
77
|
+
<dl>
|
78
|
+
<dt>text*</dt>
|
79
|
+
<dd>The input text</dd>
|
80
|
+
<dt>kaf [true | false]</dt>
|
81
|
+
<dd>Output a KAF file with the language in the xml:lang attribute and the text in the raw tag</dd>
|
82
|
+
<dd>Use the extended language list instead of the standard languages</dt>
|
83
|
+
<dt>callbacks</dt>
|
84
|
+
<dd>
|
85
|
+
You can provide a list of callback urls. If you provide callback urls
|
86
|
+
the language identification will run as a background job and a callback
|
87
|
+
with the results will be performed (POST) to the first url in the callback
|
88
|
+
list. The other urls in callback list will be provided in the "callbacks"
|
89
|
+
argument.<br/><br/>
|
90
|
+
Using callback you can chain together several OpeNER webservices in
|
91
|
+
one call. The first, will call the second, which will call the third, etc.
|
92
|
+
See for more information the <a href="http://opener-project.github.io">
|
93
|
+
webservice documentation online</a>.
|
94
|
+
</dd>
|
95
|
+
<dt>error_callback</dt>
|
96
|
+
<dd>URL to notify if errors occur in the background process. The error
|
97
|
+
callback will do a POST with the error message in the 'error' field.</dd>
|
98
|
+
</dt>
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
</dl>
|
103
|
+
|
104
|
+
|
105
|
+
<p>
|
106
|
+
|
107
|
+
</p>
|
108
|
+
|
109
|
+
</body>
|
110
|
+
</html>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Language Detector Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Output URL</h1>
|
9
|
+
<p>
|
10
|
+
When ready, you can view the result
|
11
|
+
<a href=<%= output_url %>>here</a>
|
12
|
+
</p>
|
13
|
+
|
14
|
+
</body>
|
15
|
+
</html>
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.expand_path('../lib/opener/language_identifier/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-language-identifier'
|
5
|
+
gem.version = Opener::LanguageIdentifier::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'Language identifier for human readable text.'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = "http://opener-project.github.com/"
|
10
|
+
gem.has_rdoc = 'yard'
|
11
|
+
gem.required_ruby_version = '>= 1.9.2'
|
12
|
+
|
13
|
+
gem.files = Dir.glob([
|
14
|
+
'core/target/LanguageDetection-*.jar',
|
15
|
+
'core/target/classes/**/*.*',
|
16
|
+
'exec/**/*',
|
17
|
+
'lib/**/*',
|
18
|
+
'config.ru',
|
19
|
+
'*.gemspec',
|
20
|
+
'README.md'
|
21
|
+
]).select { |file| File.file?(file) }
|
22
|
+
|
23
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
24
|
+
|
25
|
+
gem.add_dependency 'builder'
|
26
|
+
gem.add_dependency 'puma'
|
27
|
+
gem.add_dependency 'sinatra', '~>1.4.2'
|
28
|
+
gem.add_dependency 'httpclient'
|
29
|
+
gem.add_dependency 'uuidtools'
|
30
|
+
gem.add_dependency 'opener-build-tools'
|
31
|
+
gem.add_dependency 'opener-webservice'
|
32
|
+
gem.add_dependency 'opener-daemons'
|
33
|
+
|
34
|
+
gem.add_development_dependency 'rspec'
|
35
|
+
gem.add_development_dependency 'cucumber'
|
36
|
+
gem.add_development_dependency 'rake'
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,231 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-language-identifier
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: builder
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '0'
|
25
|
+
prerelease: false
|
26
|
+
type: :runtime
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: puma
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
prerelease: false
|
40
|
+
type: :runtime
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: sinatra
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.4.2
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ~>
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.4.2
|
53
|
+
prerelease: false
|
54
|
+
type: :runtime
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: httpclient
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
prerelease: false
|
68
|
+
type: :runtime
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: uuidtools
|
71
|
+
version_requirements: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
requirement: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - '>='
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
prerelease: false
|
82
|
+
type: :runtime
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: opener-build-tools
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
prerelease: false
|
96
|
+
type: :runtime
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: opener-webservice
|
99
|
+
version_requirements: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirement: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
prerelease: false
|
110
|
+
type: :runtime
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: opener-daemons
|
113
|
+
version_requirements: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirement: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
prerelease: false
|
124
|
+
type: :runtime
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rspec
|
127
|
+
version_requirements: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
requirement: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - '>='
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
prerelease: false
|
138
|
+
type: :development
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: cucumber
|
141
|
+
version_requirements: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - '>='
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
requirement: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
prerelease: false
|
152
|
+
type: :development
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rake
|
155
|
+
version_requirements: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
requirements:
|
162
|
+
- - '>='
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
prerelease: false
|
166
|
+
type: :development
|
167
|
+
description: Language identifier for human readable text.
|
168
|
+
email:
|
169
|
+
executables:
|
170
|
+
- language-identifier
|
171
|
+
- language-identifier-server
|
172
|
+
- language-identifier-daemon
|
173
|
+
extensions: []
|
174
|
+
extra_rdoc_files: []
|
175
|
+
files:
|
176
|
+
- core/target/LanguageDetection-0.0.1.jar
|
177
|
+
- core/target/classes/org/vicomtech/opennlp/LanguageDetection/CybozuDetector.class
|
178
|
+
- core/target/classes/org/vicomtech/opennlp/LanguageDetection/Main.class
|
179
|
+
- core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class
|
180
|
+
- core/target/classes/com/cybozu/labs/langdetect/GenProfile.class
|
181
|
+
- core/target/classes/com/cybozu/labs/langdetect/Command.class
|
182
|
+
- core/target/classes/com/cybozu/labs/langdetect/LangDetectException.class
|
183
|
+
- core/target/classes/com/cybozu/labs/langdetect/Language.class
|
184
|
+
- core/target/classes/com/cybozu/labs/langdetect/Detector.class
|
185
|
+
- core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class
|
186
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/TagExtractor.class
|
187
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/NGram.class
|
188
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class
|
189
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/Messages.class
|
190
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/messages.properties
|
191
|
+
- exec/language-identifier.rb
|
192
|
+
- lib/opener/language_identifier.rb
|
193
|
+
- lib/opener/language_identifier/kaf_builder.rb
|
194
|
+
- lib/opener/language_identifier/version.rb
|
195
|
+
- lib/opener/language_identifier/server.rb
|
196
|
+
- lib/opener/language_identifier/detector.rb
|
197
|
+
- lib/opener/language_identifier/cli.rb
|
198
|
+
- lib/opener/language_identifier/public/markdown.css
|
199
|
+
- lib/opener/language_identifier/views/index.erb
|
200
|
+
- lib/opener/language_identifier/views/result.erb
|
201
|
+
- config.ru
|
202
|
+
- opener-language-identifier.gemspec
|
203
|
+
- README.md
|
204
|
+
- bin/language-identifier
|
205
|
+
- bin/language-identifier-server
|
206
|
+
- bin/language-identifier-daemon
|
207
|
+
homepage: http://opener-project.github.com/
|
208
|
+
licenses: []
|
209
|
+
metadata: {}
|
210
|
+
post_install_message:
|
211
|
+
rdoc_options: []
|
212
|
+
require_paths:
|
213
|
+
- lib
|
214
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
215
|
+
requirements:
|
216
|
+
- - '>='
|
217
|
+
- !ruby/object:Gem::Version
|
218
|
+
version: 1.9.2
|
219
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
|
+
requirements:
|
221
|
+
- - '>='
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: '0'
|
224
|
+
requirements: []
|
225
|
+
rubyforge_project:
|
226
|
+
rubygems_version: 2.1.9
|
227
|
+
signing_key:
|
228
|
+
specification_version: 4
|
229
|
+
summary: Language identifier for human readable text.
|
230
|
+
test_files: []
|
231
|
+
has_rdoc: yard
|