opener-language-identifier 3.1.7 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 849ccdcd28088ee1bbb6060b641b0cab043739b5
4
- data.tar.gz: 56d254dff16c2d989182eca81760b576653309ac
3
+ metadata.gz: 1c103a6e78b0e47383c82198173460c555eefe19
4
+ data.tar.gz: 44db5e6da5a34746ef977773fe5bba7b093cd49b
5
5
  SHA512:
6
- metadata.gz: 47b0b2973a35c11e28b81727f32271b29535c5ed34eb1a1f6385abce7b948d831a0b3a2b0ddc37a684b0712ace6e13e32ce7313f830461390fed029bc5c9d5d3
7
- data.tar.gz: 7b6a18125d63fcfb1f511a6218d381c41039328fceb0099eda4fb33fa4c05f5241a9514501a71414e39bcb7a97f174a50955dba950c98fc139c000dee62dde06
6
+ metadata.gz: 8b05e38f97c517b1f4c6527f2d4e941de89e145942aabb2cad675a5938643250a857b37a031896b10f1aad76d553967fe9435c4f9fa956861a1b24419b115fe5
7
+ data.tar.gz: 8a2aada54780b6f0c0c8f47adf68a0477885ea864e59e088c3d9f4a5e8ca9903ac32d7620c9fac58c78d296b7eb717215ffc9bcbe7d796f88c11385c75d20c0a
data/README.md CHANGED
@@ -4,9 +4,7 @@
4
4
 
5
5
  The language identifier takes raw text and tries to figure out what language it
6
6
  was written in. The output can either be a plain-text i18n language code or a
7
- basic KAF document containing the language and raw input text.
8
-
9
- The output of the language identifier can then be used to drive further text
7
+ basic KAF document containing the language and raw input text. The output of the language identifier can then be used to drive further text
10
8
  analysis of for example sentiments and or entities.
11
9
 
12
10
  ## Confused by some terminology?
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
+
2
3
  require_relative '../lib/opener/language_identifier'
3
4
 
4
- cli = Opener::LanguageIdentifier::CLI.new(:args => ARGV)
5
+ cli = Opener::LanguageIdentifier::CLI.new
5
6
 
6
- puts cli.run(STDIN.tty? ? nil : STDIN.read)
7
+ cli.run
@@ -2,9 +2,9 @@
2
2
 
3
3
  require 'opener/daemons'
4
4
 
5
- exec_path = File.expand_path("../../exec/language-identifier.rb", __FILE__)
6
-
7
- Opener::Daemons::Controller.new(
8
- :name => "language-identifier",
9
- :exec_path => exec_path
5
+ controller = Opener::Daemons::Controller.new(
6
+ :name => 'opener-language-identifier',
7
+ :exec_path => File.expand_path("../../exec/language-identifier.rb", __FILE__)
10
8
  )
9
+
10
+ controller.run
@@ -1,8 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'puma/cli'
3
+ require 'opener/webservice'
4
4
 
5
- rack_config = File.expand_path('../../config.ru', __FILE__)
5
+ parser = Opener::Webservice::OptionParser.new(
6
+ 'opener-language-identifier',
7
+ File.expand_path('../../config.ru', __FILE__)
8
+ )
6
9
 
7
- cli = Puma::CLI.new([rack_config] + ARGV)
8
- cli.run
10
+ parser.run
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'opener/daemons'
4
+
4
5
  require_relative '../lib/opener/language_identifier'
5
6
 
6
- options = Opener::Daemons::OptParser.parse!(ARGV)
7
- daemon = Opener::Daemons::Daemon.new(Opener::LanguageIdentifier, options)
7
+ daemon = Opener::Daemons::Daemon.new(Opener::LanguageIdentifier)
8
8
 
9
9
  daemon.start
@@ -1,8 +1,7 @@
1
+ require 'java'
1
2
  require 'open3'
2
- require 'optparse'
3
+ require 'slop'
3
4
  require 'builder'
4
- require 'java'
5
- require 'opener/core'
6
5
 
7
6
  require_relative '../../core/target/LanguageDetection-0.0.1.jar'
8
7
  import 'org.vicomtech.opennlp.LanguageDetection.CybozuDetector'
@@ -29,8 +28,9 @@ module Opener
29
28
  # @return [Hash]
30
29
  #
31
30
  DEFAULT_OPTIONS = {
32
- :args => [],
33
- :kaf => true
31
+ :args => [],
32
+ :kaf => true,
33
+ :probs => false
34
34
  }.freeze
35
35
 
36
36
  ##
@@ -42,6 +42,9 @@ module Opener
42
42
  # @option options [TrueClass|FalseClass] :kaf When set to `true` the
43
43
  # results will be displayed as KAF.
44
44
  #
45
+ # @option options [TrueClass|FalseClass] :probs Wen set the probabilities
46
+ # are returned instead of the language/KAF.
47
+ #
45
48
  def initialize(options = {})
46
49
  @options = DEFAULT_OPTIONS.merge(options)
47
50
  @detector = Detector.instance
@@ -63,9 +66,6 @@ module Opener
63
66
  end
64
67
 
65
68
  return output
66
-
67
- rescue Exception => error
68
- return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
69
69
  end
70
70
 
71
71
  alias identify run
@@ -1,138 +1,79 @@
1
1
  module Opener
2
2
  class LanguageIdentifier
3
3
  ##
4
- # CLI wrapper around {Opener::LanguageIdentifier} using OptionParser.
4
+ # CLI wrapper around {Opener::LanguageIdentifier} using Slop.
5
5
  #
6
- # @!attribute [r] options
7
- # @return [Hash]
8
- #
9
- # @!attribute [r] option_parser
10
- # @return [OptionParser]
6
+ # @!attribute [r] parser
7
+ # @return [Slop]
11
8
  #
12
9
  class CLI
13
- attr_reader :options, :option_parser
10
+ attr_reader :parser
11
+
12
+ def initialize
13
+ @parser = configure_slop
14
+ end
14
15
 
15
16
  ##
16
- # @param [Hash] options
17
+ # @param [Array] argv
17
18
  #
18
- def initialize(options = {})
19
- @options = DEFAULT_OPTIONS.merge(options)
19
+ def run(argv = ARGV)
20
+ parser.parse(argv)
21
+ end
20
22
 
21
- @option_parser = OptionParser.new do |opts|
22
- opts.program_name = 'language-identifier'
23
- opts.summary_indent = ' '
23
+ ##
24
+ # @return [Slop]
25
+ #
26
+ def configure_slop
27
+ return Slop.new(:strict => false, :indent => 2, :help => true) do
28
+ banner 'Usage: language-identifier [OPTIONS]'
24
29
 
25
- opts.on('-v', '--version', 'Shows the current version') do
26
- show_version
27
- end
30
+ separator <<-EOF.chomp
28
31
 
29
- opts.on('-k', '--[no-]kaf', 'Output the language as KAF') do |v|
30
- @options[:kaf] = v
31
- end
32
+ About:
32
33
 
33
- opts.on('-p', '--probs', 'Provide probabilities, assumes --no-kaf') do
34
- @options[:kaf] = false
35
- @options[:probs] = true
36
- end
34
+ Language detection for various languages such as English and Dutch. This
35
+ command reads input from STDIN. Output can be a language code as plain text,
36
+ a KAF document containing the input text and language code, or a list of
37
+ probabilities.
38
+
39
+ Example:
37
40
 
38
- opts.separator <<-EOF
39
-
40
- Examples:
41
-
42
- cat example_text.txt | #{opts.program_name} # Basic detection
43
-
44
- Languages:
45
-
46
- * ar Arabic
47
- * bg Bulgarian
48
- * bn Bengali
49
- * cs Czech
50
- * da Danish
51
- * de German
52
- * el Greek
53
- * en English
54
- * es Spanish
55
- * et Estonian
56
- * fa Persian
57
- * fi Finnish
58
- * fr French
59
- * gu Gujarati
60
- * he Hebrew
61
- * hi Hindi
62
- * hr Croatian
63
- * hu Hungarian
64
- * id Indonesian
65
- * it Italian
66
- * ja Japanese
67
- * kn Kannada
68
- * ko Korean
69
- * lt Lithuanian
70
- * lv Latvian
71
- * mk Macedonian
72
- * ml Malayalam
73
- * mr Marathi
74
- * ne Nepali
75
- * nl Dutch
76
- * no Norwegian
77
- * pa Punjabi
78
- * pl Polish
79
- * pt Portuguese
80
- * ro Romanian
81
- * ru Russian
82
- * sk Slovak
83
- * sl Slovene
84
- * so Somali
85
- * sq Albanian
86
- * sv Swedish
87
- * sw Swahili
88
- * ta Tamil
89
- * te Telugu
90
- * th Thai
91
- * tl Tagalog
92
- * tr Turkish
93
- * uk Ukrainian
94
- * ur Urdu
95
- * vi Vietnamese
96
- * zh-cn Simplified Chinese
97
- * zh-tw Traditional Chinese
41
+ cat some_file.kaf | language-identifier
98
42
  EOF
99
43
 
100
- opts.separator ""
101
- opts.separator "Common options:"
102
- # No argument, shows at tail. This will print an options summary.
103
- # Try it and see!
104
- opts.on_tail("-h", "--help", "Show this message.") do
105
- puts opts
106
- exit
44
+ separator "\nOptions:\n"
45
+
46
+ on :v, :version, 'Shows the current version' do
47
+ abort "language-identifier v#{VERSION} on #{RUBY_DESCRIPTION}"
107
48
  end
108
- end
109
- end
110
49
 
111
- ##
112
- # @param [String] input
113
- #
114
- def run(input)
115
- option_parser.parse!(options[:args])
116
- identifier = LanguageIdentifier.new(options)
50
+ on :'no-kaf', 'Disables KAF output'
51
+ on :p, :probs, 'Displays probabilities instead of a language code'
117
52
 
118
- output = identifier.run(input)
119
- puts output
120
- end
53
+ run do |opts, args|
54
+ enable_kaf = true
55
+ enable_probs = false
121
56
 
122
- private
57
+ if opts[:'no-kaf']
58
+ enable_kaf = false
59
+ end
123
60
 
124
- ##
125
- # Shows the help message and exits the program.
126
- #
127
- def show_help
128
- abort option_parser.to_s
129
- end
61
+ if opts[:probs]
62
+ enable_kf = false
63
+ enable_probs = true
64
+ end
130
65
 
131
- ##
132
- # Shows the version and exits the program.
133
- #
134
- def show_version
135
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
66
+ identifier = LanguageIdentifier.new(
67
+ :args => args,
68
+ :kaf => enable_kaf,
69
+ :probs => enable_probs
70
+ )
71
+
72
+ input = STDIN.tty? ? nil : STDIN.read
73
+
74
+ puts identifier.run(input)
75
+ end
76
+ end
136
77
  end
137
78
  end # CLI
138
79
  end # LanguageIdentifier
@@ -4,6 +4,11 @@ import 'org.vicomtech.opennlp.LanguageDetection.CybozuDetector'
4
4
 
5
5
  module Opener
6
6
  class LanguageIdentifier
7
+ ##
8
+ # Singleton class wrapped around the Cybozu detector. The Cybozu code uses
9
+ # the factory pattern and stores a bunch of things on class level. As such
10
+ # the Cybozu code is *not* thread-safe.
11
+ #
7
12
  class Detector
8
13
  attr_reader :options
9
14
 
@@ -1,5 +1,3 @@
1
- require 'sinatra/base'
2
- require 'httpclient'
3
1
  require 'opener/webservice'
4
2
 
5
3
  module Opener
@@ -7,27 +5,11 @@ module Opener
7
5
  ##
8
6
  # A basic language identification server powered by Sinatra.
9
7
  #
10
- class Server < Webservice
8
+ class Server < Opener::Webservice::Server
11
9
  set :views, File.expand_path('../views', __FILE__)
12
- text_processor LanguageIdentifier
13
- accepted_params :input, :kaf, :benchmark
14
10
 
15
- ##
16
- # Gets the Analyzed output of an input.
17
- #
18
- # @param [Hash] options The options for the text_processor
19
- # @return [String] output the output of the text_processor
20
- # @return [Symbol] type the output type ot the text_processor
21
- #
22
- # @raise RunetimeError Raised when the tagging process failed.
23
- #
24
- def analyze(options)
25
- options[:kaf] = true if options[:kaf].nil?
26
- processor = text_processor.new(options)
27
- output = processor.run(options[:input])
28
-
29
- return output
30
- end
11
+ self.text_processor = LanguageIdentifier
12
+ self.accepted_params = [:input, :kaf]
31
13
  end # Server
32
14
  end # LanguageIdentifier
33
15
  end # Opener
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class LanguageIdentifier
3
- VERSION = "3.1.7"
3
+ VERSION = "4.1.0"
4
4
  end
5
5
  end
@@ -32,20 +32,10 @@
32
32
  <div>
33
33
  <label for="kaf">
34
34
  <input type='hidden' value='false' name='kaf'>
35
- <input type="checkbox" name="kaf" id="kaf" checked/>
35
+ <input type="checkbox" name="kaf" id="kaf" checked />
36
36
 
37
37
  Output KAF instead of just the language code
38
38
  </label>
39
-
40
- <br>
41
-
42
- <label for="benchmark">
43
- <input type="checkbox" name="benchmark" />
44
-
45
- Include benchmark output in the KAF
46
- </label>
47
-
48
- <br>
49
39
  <br>
50
40
  </div>
51
41
  <% 10.times do |t| %>
@@ -25,15 +25,12 @@ Gem::Specification.new do |gem|
25
25
 
26
26
  gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
27
27
 
28
+ gem.add_dependency 'opener-daemons', '~> 2.2'
29
+ gem.add_dependency 'opener-webservice', '~> 2.1'
30
+
28
31
  gem.add_dependency 'builder'
29
- gem.add_dependency 'puma'
30
- gem.add_dependency 'sinatra', '~>1.4.2'
31
- gem.add_dependency 'httpclient'
32
- gem.add_dependency 'uuidtools'
33
- gem.add_dependency 'opener-webservice'
34
- gem.add_dependency 'opener-daemons'
35
32
  gem.add_dependency 'nokogiri'
36
- gem.add_dependency 'opener-core', '~> 1.0'
33
+ gem.add_dependency 'slop', '~> 3.5'
37
34
 
38
35
  gem.add_development_dependency 'rspec', '~> 3.0'
39
36
  gem.add_development_dependency 'cucumber'
metadata CHANGED
@@ -1,101 +1,45 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-language-identifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.7
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: builder
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- requirement: !ruby/object:Gem::Requirement
21
- requirements:
22
- - - '>='
23
- - !ruby/object:Gem::Version
24
- version: '0'
25
- prerelease: false
26
- type: :runtime
27
- - !ruby/object:Gem::Dependency
28
- name: puma
29
- version_requirements: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - '>='
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- requirement: !ruby/object:Gem::Requirement
35
- requirements:
36
- - - '>='
37
- - !ruby/object:Gem::Version
38
- version: '0'
39
- prerelease: false
40
- type: :runtime
41
- - !ruby/object:Gem::Dependency
42
- name: sinatra
14
+ name: opener-daemons
43
15
  version_requirements: !ruby/object:Gem::Requirement
44
16
  requirements:
45
17
  - - ~>
46
18
  - !ruby/object:Gem::Version
47
- version: 1.4.2
19
+ version: '2.2'
48
20
  requirement: !ruby/object:Gem::Requirement
49
21
  requirements:
50
22
  - - ~>
51
23
  - !ruby/object:Gem::Version
52
- version: 1.4.2
53
- prerelease: false
54
- type: :runtime
55
- - !ruby/object:Gem::Dependency
56
- name: httpclient
57
- version_requirements: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- requirement: !ruby/object:Gem::Requirement
63
- requirements:
64
- - - '>='
65
- - !ruby/object:Gem::Version
66
- version: '0'
67
- prerelease: false
68
- type: :runtime
69
- - !ruby/object:Gem::Dependency
70
- name: uuidtools
71
- version_requirements: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - '>='
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- requirement: !ruby/object:Gem::Requirement
77
- requirements:
78
- - - '>='
79
- - !ruby/object:Gem::Version
80
- version: '0'
24
+ version: '2.2'
81
25
  prerelease: false
82
26
  type: :runtime
83
27
  - !ruby/object:Gem::Dependency
84
28
  name: opener-webservice
85
29
  version_requirements: !ruby/object:Gem::Requirement
86
30
  requirements:
87
- - - '>='
31
+ - - ~>
88
32
  - !ruby/object:Gem::Version
89
- version: '0'
33
+ version: '2.1'
90
34
  requirement: !ruby/object:Gem::Requirement
91
35
  requirements:
92
- - - '>='
36
+ - - ~>
93
37
  - !ruby/object:Gem::Version
94
- version: '0'
38
+ version: '2.1'
95
39
  prerelease: false
96
40
  type: :runtime
97
41
  - !ruby/object:Gem::Dependency
98
- name: opener-daemons
42
+ name: builder
99
43
  version_requirements: !ruby/object:Gem::Requirement
100
44
  requirements:
101
45
  - - '>='
@@ -123,17 +67,17 @@ dependencies:
123
67
  prerelease: false
124
68
  type: :runtime
125
69
  - !ruby/object:Gem::Dependency
126
- name: opener-core
70
+ name: slop
127
71
  version_requirements: !ruby/object:Gem::Requirement
128
72
  requirements:
129
73
  - - ~>
130
74
  - !ruby/object:Gem::Version
131
- version: '1.0'
75
+ version: '3.5'
132
76
  requirement: !ruby/object:Gem::Requirement
133
77
  requirements:
134
78
  - - ~>
135
79
  - !ruby/object:Gem::Version
136
- version: '1.0'
80
+ version: '3.5'
137
81
  prerelease: false
138
82
  type: :runtime
139
83
  - !ruby/object:Gem::Dependency