opener-language-identifier 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +138 -0
  3. data/bin/language-identifier +6 -0
  4. data/bin/language-identifier-daemon +10 -0
  5. data/bin/language-identifier-server +8 -0
  6. data/config.ru +4 -0
  7. data/core/target/LanguageDetection-0.0.1.jar +0 -0
  8. data/core/target/classes/com/cybozu/labs/langdetect/Command.class +0 -0
  9. data/core/target/classes/com/cybozu/labs/langdetect/Detector.class +0 -0
  10. data/core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class +0 -0
  11. data/core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class +0 -0
  12. data/core/target/classes/com/cybozu/labs/langdetect/GenProfile.class +0 -0
  13. data/core/target/classes/com/cybozu/labs/langdetect/LangDetectException.class +0 -0
  14. data/core/target/classes/com/cybozu/labs/langdetect/Language.class +0 -0
  15. data/core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class +0 -0
  16. data/core/target/classes/com/cybozu/labs/langdetect/util/Messages.class +0 -0
  17. data/core/target/classes/com/cybozu/labs/langdetect/util/NGram.class +0 -0
  18. data/core/target/classes/com/cybozu/labs/langdetect/util/TagExtractor.class +0 -0
  19. data/core/target/classes/com/cybozu/labs/langdetect/util/messages.properties +128 -0
  20. data/core/target/classes/org/vicomtech/opennlp/LanguageDetection/CybozuDetector.class +0 -0
  21. data/core/target/classes/org/vicomtech/opennlp/LanguageDetection/Main.class +0 -0
  22. data/exec/language-identifier.rb +9 -0
  23. data/lib/opener/language_identifier.rb +89 -0
  24. data/lib/opener/language_identifier/cli.rb +139 -0
  25. data/lib/opener/language_identifier/detector.rb +36 -0
  26. data/lib/opener/language_identifier/kaf_builder.rb +62 -0
  27. data/lib/opener/language_identifier/public/markdown.css +283 -0
  28. data/lib/opener/language_identifier/server.rb +32 -0
  29. data/lib/opener/language_identifier/version.rb +5 -0
  30. data/lib/opener/language_identifier/views/index.erb +110 -0
  31. data/lib/opener/language_identifier/views/result.erb +15 -0
  32. data/opener-language-identifier.gemspec +37 -0
  33. metadata +231 -0
@@ -0,0 +1,32 @@
1
+ require 'sinatra/base'
2
+ require 'httpclient'
3
+ require 'opener/webservice'
4
+
5
+ module Opener
6
+ class LanguageIdentifier
7
+ ##
8
+ # A basic language identification server powered by Sinatra.
9
+ #
10
+ class Server < Webservice
11
+ set :views, File.expand_path('../views', __FILE__)
12
+ text_processor LanguageIdentifier
13
+ accepted_params :input, :kaf
14
+
15
+ ##
16
+ # Gets the Analyzed output of an input.
17
+ #
18
+ # @param [Hash] options The options for the text_processor
19
+ # @return [String] output the output of the text_processor
20
+ # @return [Symbol] type the output type ot the text_processor
21
+ #
22
+ # @raise RunetimeError Raised when the tagging process failed.
23
+ #
24
+ def analyze(options)
25
+ processor = text_processor.new(options)
26
+ output = processor.run(options[:input])
27
+
28
+ return output
29
+ end
30
+ end # Server
31
+ end # LanguageIdentifier
32
+ end # Opener
@@ -0,0 +1,5 @@
1
+ module Opener
2
+ class LanguageIdentifier
3
+ VERSION = "3.0.0"
4
+ end
5
+ end
@@ -0,0 +1,110 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
5
+ <title>Language Detector Webservice</title>
6
+ </head>
7
+ <body>
8
+ <h1>Language Detection Web Service</h1>
9
+
10
+ <h2>Example Usage</h2>
11
+
12
+ <p>
13
+ <pre>language-identifier-server start</pre>
14
+ <pre>curl -d "input=this is an english text&amp;kaf=true" http://localhost:9393 -XPOST</pre>
15
+
16
+ outputs:
17
+
18
+ <pre>&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;yes&quot;?&gt;&lt;KAF xml:lang=&quot;en&quot;&gt;&lt;raw&gt;this is an english text&lt;/raw&gt;&lt;/KAF&gt;</pre>
19
+ </p>
20
+
21
+ <h2>Try the webservice</h2>
22
+
23
+ <p>* required</p>
24
+ <p>** When entering a value no response will be displayed in the browser.</p>
25
+
26
+ <form action="<%=url("/")%>" method="POST">
27
+ <div>
28
+ <label for="input"/>Type your text here*</label>
29
+ <br/>
30
+
31
+ <textarea name="input" id="text" rows="10" cols="50"/></textarea>
32
+ </div>
33
+ <div>
34
+ <label for="kaf">
35
+ <input type="checkbox" name="kaf" id="kaf"/>
36
+ Output KAF instead of just the language code
37
+ </label>
38
+ <br>
39
+ <br>
40
+ </div>
41
+ <% 10.times do |t| %>
42
+ <div>
43
+ <label for="callbacks">Callback URL <%=t+1%>(**)</label>
44
+ <br />
45
+
46
+ <input id="callbacks" type="text" name="callbacks[]" />
47
+ </div>
48
+ <% end %>
49
+
50
+
51
+ <div>
52
+ <label for="error_callback">Error Callback</label>
53
+ <br />
54
+
55
+ <input id="error_callback" type="text" name="error_callback" />
56
+ </div>
57
+
58
+ <input type="submit" value="Submit" />
59
+ </form>
60
+
61
+ <h2>Actions</h2>
62
+
63
+ <p>
64
+ <dl>
65
+ <dt>POST /</dt>
66
+ <dd>Detect the language on the input argument. See arguments listing for more options.</dd>
67
+ <dt>GET /</dt>
68
+ <dd>Show this page</dd>
69
+ </dl>
70
+ </p>
71
+
72
+ <h2>Arguments</h2>
73
+
74
+ <p> The webservice takes the following arguments: </p>
75
+ <p>* required</p>
76
+
77
+ <dl>
78
+ <dt>text*</dt>
79
+ <dd>The input text</dd>
80
+ <dt>kaf [true | false]</dt>
81
+ <dd>Output a KAF file with the language in the xml:lang attribute and the text in the raw tag</dd>
82
+ <dd>Use the extended language list instead of the standard languages</dt>
83
+ <dt>callbacks</dt>
84
+ <dd>
85
+ You can provide a list of callback urls. If you provide callback urls
86
+ the language identification will run as a background job and a callback
87
+ with the results will be performed (POST) to the first url in the callback
88
+ list. The other urls in callback list will be provided in the "callbacks"
89
+ argument.<br/><br/>
90
+ Using callback you can chain together several OpeNER webservices in
91
+ one call. The first, will call the second, which will call the third, etc.
92
+ See for more information the <a href="http://opener-project.github.io">
93
+ webservice documentation online</a>.
94
+ </dd>
95
+ <dt>error_callback</dt>
96
+ <dd>URL to notify if errors occur in the background process. The error
97
+ callback will do a POST with the error message in the 'error' field.</dd>
98
+ </dt>
99
+
100
+
101
+
102
+ </dl>
103
+
104
+
105
+ <p>
106
+
107
+ </p>
108
+
109
+ </body>
110
+ </html>
@@ -0,0 +1,15 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
5
+ <title>Language Detector Webservice</title>
6
+ </head>
7
+ <body>
8
+ <h1>Output URL</h1>
9
+ <p>
10
+ When ready, you can view the result
11
+ <a href=<%= output_url %>>here</a>
12
+ </p>
13
+
14
+ </body>
15
+ </html>
@@ -0,0 +1,37 @@
1
+ require File.expand_path('../lib/opener/language_identifier/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'opener-language-identifier'
5
+ gem.version = Opener::LanguageIdentifier::VERSION
6
+ gem.authors = ['development@olery.com']
7
+ gem.summary = 'Language identifier for human readable text.'
8
+ gem.description = gem.summary
9
+ gem.homepage = "http://opener-project.github.com/"
10
+ gem.has_rdoc = 'yard'
11
+ gem.required_ruby_version = '>= 1.9.2'
12
+
13
+ gem.files = Dir.glob([
14
+ 'core/target/LanguageDetection-*.jar',
15
+ 'core/target/classes/**/*.*',
16
+ 'exec/**/*',
17
+ 'lib/**/*',
18
+ 'config.ru',
19
+ '*.gemspec',
20
+ 'README.md'
21
+ ]).select { |file| File.file?(file) }
22
+
23
+ gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
24
+
25
+ gem.add_dependency 'builder'
26
+ gem.add_dependency 'puma'
27
+ gem.add_dependency 'sinatra', '~>1.4.2'
28
+ gem.add_dependency 'httpclient'
29
+ gem.add_dependency 'uuidtools'
30
+ gem.add_dependency 'opener-build-tools'
31
+ gem.add_dependency 'opener-webservice'
32
+ gem.add_dependency 'opener-daemons'
33
+
34
+ gem.add_development_dependency 'rspec'
35
+ gem.add_development_dependency 'cucumber'
36
+ gem.add_development_dependency 'rake'
37
+ end
metadata ADDED
@@ -0,0 +1,231 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-language-identifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 3.0.0
5
+ platform: ruby
6
+ authors:
7
+ - development@olery.com
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: builder
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: puma
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: sinatra
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 1.4.2
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ~>
51
+ - !ruby/object:Gem::Version
52
+ version: 1.4.2
53
+ prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: httpclient
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ prerelease: false
68
+ type: :runtime
69
+ - !ruby/object:Gem::Dependency
70
+ name: uuidtools
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ prerelease: false
82
+ type: :runtime
83
+ - !ruby/object:Gem::Dependency
84
+ name: opener-build-tools
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ prerelease: false
96
+ type: :runtime
97
+ - !ruby/object:Gem::Dependency
98
+ name: opener-webservice
99
+ version_requirements: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirement: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ prerelease: false
110
+ type: :runtime
111
+ - !ruby/object:Gem::Dependency
112
+ name: opener-daemons
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - '>='
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ prerelease: false
124
+ type: :runtime
125
+ - !ruby/object:Gem::Dependency
126
+ name: rspec
127
+ version_requirements: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ requirement: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ prerelease: false
138
+ type: :development
139
+ - !ruby/object:Gem::Dependency
140
+ name: cucumber
141
+ version_requirements: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ requirement: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - '>='
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ prerelease: false
152
+ type: :development
153
+ - !ruby/object:Gem::Dependency
154
+ name: rake
155
+ version_requirements: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ requirement: !ruby/object:Gem::Requirement
161
+ requirements:
162
+ - - '>='
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ prerelease: false
166
+ type: :development
167
+ description: Language identifier for human readable text.
168
+ email:
169
+ executables:
170
+ - language-identifier
171
+ - language-identifier-server
172
+ - language-identifier-daemon
173
+ extensions: []
174
+ extra_rdoc_files: []
175
+ files:
176
+ - core/target/LanguageDetection-0.0.1.jar
177
+ - core/target/classes/org/vicomtech/opennlp/LanguageDetection/CybozuDetector.class
178
+ - core/target/classes/org/vicomtech/opennlp/LanguageDetection/Main.class
179
+ - core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class
180
+ - core/target/classes/com/cybozu/labs/langdetect/GenProfile.class
181
+ - core/target/classes/com/cybozu/labs/langdetect/Command.class
182
+ - core/target/classes/com/cybozu/labs/langdetect/LangDetectException.class
183
+ - core/target/classes/com/cybozu/labs/langdetect/Language.class
184
+ - core/target/classes/com/cybozu/labs/langdetect/Detector.class
185
+ - core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class
186
+ - core/target/classes/com/cybozu/labs/langdetect/util/TagExtractor.class
187
+ - core/target/classes/com/cybozu/labs/langdetect/util/NGram.class
188
+ - core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class
189
+ - core/target/classes/com/cybozu/labs/langdetect/util/Messages.class
190
+ - core/target/classes/com/cybozu/labs/langdetect/util/messages.properties
191
+ - exec/language-identifier.rb
192
+ - lib/opener/language_identifier.rb
193
+ - lib/opener/language_identifier/kaf_builder.rb
194
+ - lib/opener/language_identifier/version.rb
195
+ - lib/opener/language_identifier/server.rb
196
+ - lib/opener/language_identifier/detector.rb
197
+ - lib/opener/language_identifier/cli.rb
198
+ - lib/opener/language_identifier/public/markdown.css
199
+ - lib/opener/language_identifier/views/index.erb
200
+ - lib/opener/language_identifier/views/result.erb
201
+ - config.ru
202
+ - opener-language-identifier.gemspec
203
+ - README.md
204
+ - bin/language-identifier
205
+ - bin/language-identifier-server
206
+ - bin/language-identifier-daemon
207
+ homepage: http://opener-project.github.com/
208
+ licenses: []
209
+ metadata: {}
210
+ post_install_message:
211
+ rdoc_options: []
212
+ require_paths:
213
+ - lib
214
+ required_ruby_version: !ruby/object:Gem::Requirement
215
+ requirements:
216
+ - - '>='
217
+ - !ruby/object:Gem::Version
218
+ version: 1.9.2
219
+ required_rubygems_version: !ruby/object:Gem::Requirement
220
+ requirements:
221
+ - - '>='
222
+ - !ruby/object:Gem::Version
223
+ version: '0'
224
+ requirements: []
225
+ rubyforge_project:
226
+ rubygems_version: 2.1.9
227
+ signing_key:
228
+ specification_version: 4
229
+ summary: Language identifier for human readable text.
230
+ test_files: []
231
+ has_rdoc: yard