opener-language-identifier 3.1.7 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 849ccdcd28088ee1bbb6060b641b0cab043739b5
4
- data.tar.gz: 56d254dff16c2d989182eca81760b576653309ac
3
+ metadata.gz: 1c103a6e78b0e47383c82198173460c555eefe19
4
+ data.tar.gz: 44db5e6da5a34746ef977773fe5bba7b093cd49b
5
5
  SHA512:
6
- metadata.gz: 47b0b2973a35c11e28b81727f32271b29535c5ed34eb1a1f6385abce7b948d831a0b3a2b0ddc37a684b0712ace6e13e32ce7313f830461390fed029bc5c9d5d3
7
- data.tar.gz: 7b6a18125d63fcfb1f511a6218d381c41039328fceb0099eda4fb33fa4c05f5241a9514501a71414e39bcb7a97f174a50955dba950c98fc139c000dee62dde06
6
+ metadata.gz: 8b05e38f97c517b1f4c6527f2d4e941de89e145942aabb2cad675a5938643250a857b37a031896b10f1aad76d553967fe9435c4f9fa956861a1b24419b115fe5
7
+ data.tar.gz: 8a2aada54780b6f0c0c8f47adf68a0477885ea864e59e088c3d9f4a5e8ca9903ac32d7620c9fac58c78d296b7eb717215ffc9bcbe7d796f88c11385c75d20c0a
data/README.md CHANGED
@@ -4,9 +4,7 @@
4
4
 
5
5
  The language identifier takes raw text and tries to figure out what language it
6
6
  was written in. The output can either be a plain-text i18n language code or a
7
- basic KAF document containing the language and raw input text.
8
-
9
- The output of the language identifier can then be used to drive further text
7
+ basic KAF document containing the language and raw input text. The output of the language identifier can then be used to drive further text
10
8
  analysis of for example sentiments and or entities.
11
9
 
12
10
  ## Confused by some terminology?
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
+
2
3
  require_relative '../lib/opener/language_identifier'
3
4
 
4
- cli = Opener::LanguageIdentifier::CLI.new(:args => ARGV)
5
+ cli = Opener::LanguageIdentifier::CLI.new
5
6
 
6
- puts cli.run(STDIN.tty? ? nil : STDIN.read)
7
+ cli.run
@@ -2,9 +2,9 @@
2
2
 
3
3
  require 'opener/daemons'
4
4
 
5
- exec_path = File.expand_path("../../exec/language-identifier.rb", __FILE__)
6
-
7
- Opener::Daemons::Controller.new(
8
- :name => "language-identifier",
9
- :exec_path => exec_path
5
+ controller = Opener::Daemons::Controller.new(
6
+ :name => 'opener-language-identifier',
7
+ :exec_path => File.expand_path("../../exec/language-identifier.rb", __FILE__)
10
8
  )
9
+
10
+ controller.run
@@ -1,8 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'puma/cli'
3
+ require 'opener/webservice'
4
4
 
5
- rack_config = File.expand_path('../../config.ru', __FILE__)
5
+ parser = Opener::Webservice::OptionParser.new(
6
+ 'opener-language-identifier',
7
+ File.expand_path('../../config.ru', __FILE__)
8
+ )
6
9
 
7
- cli = Puma::CLI.new([rack_config] + ARGV)
8
- cli.run
10
+ parser.run
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'opener/daemons'
4
+
4
5
  require_relative '../lib/opener/language_identifier'
5
6
 
6
- options = Opener::Daemons::OptParser.parse!(ARGV)
7
- daemon = Opener::Daemons::Daemon.new(Opener::LanguageIdentifier, options)
7
+ daemon = Opener::Daemons::Daemon.new(Opener::LanguageIdentifier)
8
8
 
9
9
  daemon.start
@@ -1,8 +1,7 @@
1
+ require 'java'
1
2
  require 'open3'
2
- require 'optparse'
3
+ require 'slop'
3
4
  require 'builder'
4
- require 'java'
5
- require 'opener/core'
6
5
 
7
6
  require_relative '../../core/target/LanguageDetection-0.0.1.jar'
8
7
  import 'org.vicomtech.opennlp.LanguageDetection.CybozuDetector'
@@ -29,8 +28,9 @@ module Opener
29
28
  # @return [Hash]
30
29
  #
31
30
  DEFAULT_OPTIONS = {
32
- :args => [],
33
- :kaf => true
31
+ :args => [],
32
+ :kaf => true,
33
+ :probs => false
34
34
  }.freeze
35
35
 
36
36
  ##
@@ -42,6 +42,9 @@ module Opener
42
42
  # @option options [TrueClass|FalseClass] :kaf When set to `true` the
43
43
  # results will be displayed as KAF.
44
44
  #
45
+ # @option options [TrueClass|FalseClass] :probs Wen set the probabilities
46
+ # are returned instead of the language/KAF.
47
+ #
45
48
  def initialize(options = {})
46
49
  @options = DEFAULT_OPTIONS.merge(options)
47
50
  @detector = Detector.instance
@@ -63,9 +66,6 @@ module Opener
63
66
  end
64
67
 
65
68
  return output
66
-
67
- rescue Exception => error
68
- return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
69
69
  end
70
70
 
71
71
  alias identify run
@@ -1,138 +1,79 @@
1
1
  module Opener
2
2
  class LanguageIdentifier
3
3
  ##
4
- # CLI wrapper around {Opener::LanguageIdentifier} using OptionParser.
4
+ # CLI wrapper around {Opener::LanguageIdentifier} using Slop.
5
5
  #
6
- # @!attribute [r] options
7
- # @return [Hash]
8
- #
9
- # @!attribute [r] option_parser
10
- # @return [OptionParser]
6
+ # @!attribute [r] parser
7
+ # @return [Slop]
11
8
  #
12
9
  class CLI
13
- attr_reader :options, :option_parser
10
+ attr_reader :parser
11
+
12
+ def initialize
13
+ @parser = configure_slop
14
+ end
14
15
 
15
16
  ##
16
- # @param [Hash] options
17
+ # @param [Array] argv
17
18
  #
18
- def initialize(options = {})
19
- @options = DEFAULT_OPTIONS.merge(options)
19
+ def run(argv = ARGV)
20
+ parser.parse(argv)
21
+ end
20
22
 
21
- @option_parser = OptionParser.new do |opts|
22
- opts.program_name = 'language-identifier'
23
- opts.summary_indent = ' '
23
+ ##
24
+ # @return [Slop]
25
+ #
26
+ def configure_slop
27
+ return Slop.new(:strict => false, :indent => 2, :help => true) do
28
+ banner 'Usage: language-identifier [OPTIONS]'
24
29
 
25
- opts.on('-v', '--version', 'Shows the current version') do
26
- show_version
27
- end
30
+ separator <<-EOF.chomp
28
31
 
29
- opts.on('-k', '--[no-]kaf', 'Output the language as KAF') do |v|
30
- @options[:kaf] = v
31
- end
32
+ About:
32
33
 
33
- opts.on('-p', '--probs', 'Provide probabilities, assumes --no-kaf') do
34
- @options[:kaf] = false
35
- @options[:probs] = true
36
- end
34
+ Language detection for various languages such as English and Dutch. This
35
+ command reads input from STDIN. Output can be a language code as plain text,
36
+ a KAF document containing the input text and language code, or a list of
37
+ probabilities.
38
+
39
+ Example:
37
40
 
38
- opts.separator <<-EOF
39
-
40
- Examples:
41
-
42
- cat example_text.txt | #{opts.program_name} # Basic detection
43
-
44
- Languages:
45
-
46
- * ar Arabic
47
- * bg Bulgarian
48
- * bn Bengali
49
- * cs Czech
50
- * da Danish
51
- * de German
52
- * el Greek
53
- * en English
54
- * es Spanish
55
- * et Estonian
56
- * fa Persian
57
- * fi Finnish
58
- * fr French
59
- * gu Gujarati
60
- * he Hebrew
61
- * hi Hindi
62
- * hr Croatian
63
- * hu Hungarian
64
- * id Indonesian
65
- * it Italian
66
- * ja Japanese
67
- * kn Kannada
68
- * ko Korean
69
- * lt Lithuanian
70
- * lv Latvian
71
- * mk Macedonian
72
- * ml Malayalam
73
- * mr Marathi
74
- * ne Nepali
75
- * nl Dutch
76
- * no Norwegian
77
- * pa Punjabi
78
- * pl Polish
79
- * pt Portuguese
80
- * ro Romanian
81
- * ru Russian
82
- * sk Slovak
83
- * sl Slovene
84
- * so Somali
85
- * sq Albanian
86
- * sv Swedish
87
- * sw Swahili
88
- * ta Tamil
89
- * te Telugu
90
- * th Thai
91
- * tl Tagalog
92
- * tr Turkish
93
- * uk Ukrainian
94
- * ur Urdu
95
- * vi Vietnamese
96
- * zh-cn Simplified Chinese
97
- * zh-tw Traditional Chinese
41
+ cat some_file.kaf | language-identifier
98
42
  EOF
99
43
 
100
- opts.separator ""
101
- opts.separator "Common options:"
102
- # No argument, shows at tail. This will print an options summary.
103
- # Try it and see!
104
- opts.on_tail("-h", "--help", "Show this message.") do
105
- puts opts
106
- exit
44
+ separator "\nOptions:\n"
45
+
46
+ on :v, :version, 'Shows the current version' do
47
+ abort "language-identifier v#{VERSION} on #{RUBY_DESCRIPTION}"
107
48
  end
108
- end
109
- end
110
49
 
111
- ##
112
- # @param [String] input
113
- #
114
- def run(input)
115
- option_parser.parse!(options[:args])
116
- identifier = LanguageIdentifier.new(options)
50
+ on :'no-kaf', 'Disables KAF output'
51
+ on :p, :probs, 'Displays probabilities instead of a language code'
117
52
 
118
- output = identifier.run(input)
119
- puts output
120
- end
53
+ run do |opts, args|
54
+ enable_kaf = true
55
+ enable_probs = false
121
56
 
122
- private
57
+ if opts[:'no-kaf']
58
+ enable_kaf = false
59
+ end
123
60
 
124
- ##
125
- # Shows the help message and exits the program.
126
- #
127
- def show_help
128
- abort option_parser.to_s
129
- end
61
+ if opts[:probs]
62
+ enable_kf = false
63
+ enable_probs = true
64
+ end
130
65
 
131
- ##
132
- # Shows the version and exits the program.
133
- #
134
- def show_version
135
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
66
+ identifier = LanguageIdentifier.new(
67
+ :args => args,
68
+ :kaf => enable_kaf,
69
+ :probs => enable_probs
70
+ )
71
+
72
+ input = STDIN.tty? ? nil : STDIN.read
73
+
74
+ puts identifier.run(input)
75
+ end
76
+ end
136
77
  end
137
78
  end # CLI
138
79
  end # LanguageIdentifier
@@ -4,6 +4,11 @@ import 'org.vicomtech.opennlp.LanguageDetection.CybozuDetector'
4
4
 
5
5
  module Opener
6
6
  class LanguageIdentifier
7
+ ##
8
+ # Singleton class wrapped around the Cybozu detector. The Cybozu code uses
9
+ # the factory pattern and stores a bunch of things on class level. As such
10
+ # the Cybozu code is *not* thread-safe.
11
+ #
7
12
  class Detector
8
13
  attr_reader :options
9
14
 
@@ -1,5 +1,3 @@
1
- require 'sinatra/base'
2
- require 'httpclient'
3
1
  require 'opener/webservice'
4
2
 
5
3
  module Opener
@@ -7,27 +5,11 @@ module Opener
7
5
  ##
8
6
  # A basic language identification server powered by Sinatra.
9
7
  #
10
- class Server < Webservice
8
+ class Server < Opener::Webservice::Server
11
9
  set :views, File.expand_path('../views', __FILE__)
12
- text_processor LanguageIdentifier
13
- accepted_params :input, :kaf, :benchmark
14
10
 
15
- ##
16
- # Gets the Analyzed output of an input.
17
- #
18
- # @param [Hash] options The options for the text_processor
19
- # @return [String] output the output of the text_processor
20
- # @return [Symbol] type the output type ot the text_processor
21
- #
22
- # @raise RunetimeError Raised when the tagging process failed.
23
- #
24
- def analyze(options)
25
- options[:kaf] = true if options[:kaf].nil?
26
- processor = text_processor.new(options)
27
- output = processor.run(options[:input])
28
-
29
- return output
30
- end
11
+ self.text_processor = LanguageIdentifier
12
+ self.accepted_params = [:input, :kaf]
31
13
  end # Server
32
14
  end # LanguageIdentifier
33
15
  end # Opener
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class LanguageIdentifier
3
- VERSION = "3.1.7"
3
+ VERSION = "4.1.0"
4
4
  end
5
5
  end
@@ -32,20 +32,10 @@
32
32
  <div>
33
33
  <label for="kaf">
34
34
  <input type='hidden' value='false' name='kaf'>
35
- <input type="checkbox" name="kaf" id="kaf" checked/>
35
+ <input type="checkbox" name="kaf" id="kaf" checked />
36
36
 
37
37
  Output KAF instead of just the language code
38
38
  </label>
39
-
40
- <br>
41
-
42
- <label for="benchmark">
43
- <input type="checkbox" name="benchmark" />
44
-
45
- Include benchmark output in the KAF
46
- </label>
47
-
48
- <br>
49
39
  <br>
50
40
  </div>
51
41
  <% 10.times do |t| %>
@@ -25,15 +25,12 @@ Gem::Specification.new do |gem|
25
25
 
26
26
  gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
27
27
 
28
+ gem.add_dependency 'opener-daemons', '~> 2.2'
29
+ gem.add_dependency 'opener-webservice', '~> 2.1'
30
+
28
31
  gem.add_dependency 'builder'
29
- gem.add_dependency 'puma'
30
- gem.add_dependency 'sinatra', '~>1.4.2'
31
- gem.add_dependency 'httpclient'
32
- gem.add_dependency 'uuidtools'
33
- gem.add_dependency 'opener-webservice'
34
- gem.add_dependency 'opener-daemons'
35
32
  gem.add_dependency 'nokogiri'
36
- gem.add_dependency 'opener-core', '~> 1.0'
33
+ gem.add_dependency 'slop', '~> 3.5'
37
34
 
38
35
  gem.add_development_dependency 'rspec', '~> 3.0'
39
36
  gem.add_development_dependency 'cucumber'
metadata CHANGED
@@ -1,101 +1,45 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-language-identifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.7
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: builder
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- requirement: !ruby/object:Gem::Requirement
21
- requirements:
22
- - - '>='
23
- - !ruby/object:Gem::Version
24
- version: '0'
25
- prerelease: false
26
- type: :runtime
27
- - !ruby/object:Gem::Dependency
28
- name: puma
29
- version_requirements: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - '>='
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- requirement: !ruby/object:Gem::Requirement
35
- requirements:
36
- - - '>='
37
- - !ruby/object:Gem::Version
38
- version: '0'
39
- prerelease: false
40
- type: :runtime
41
- - !ruby/object:Gem::Dependency
42
- name: sinatra
14
+ name: opener-daemons
43
15
  version_requirements: !ruby/object:Gem::Requirement
44
16
  requirements:
45
17
  - - ~>
46
18
  - !ruby/object:Gem::Version
47
- version: 1.4.2
19
+ version: '2.2'
48
20
  requirement: !ruby/object:Gem::Requirement
49
21
  requirements:
50
22
  - - ~>
51
23
  - !ruby/object:Gem::Version
52
- version: 1.4.2
53
- prerelease: false
54
- type: :runtime
55
- - !ruby/object:Gem::Dependency
56
- name: httpclient
57
- version_requirements: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- requirement: !ruby/object:Gem::Requirement
63
- requirements:
64
- - - '>='
65
- - !ruby/object:Gem::Version
66
- version: '0'
67
- prerelease: false
68
- type: :runtime
69
- - !ruby/object:Gem::Dependency
70
- name: uuidtools
71
- version_requirements: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - '>='
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- requirement: !ruby/object:Gem::Requirement
77
- requirements:
78
- - - '>='
79
- - !ruby/object:Gem::Version
80
- version: '0'
24
+ version: '2.2'
81
25
  prerelease: false
82
26
  type: :runtime
83
27
  - !ruby/object:Gem::Dependency
84
28
  name: opener-webservice
85
29
  version_requirements: !ruby/object:Gem::Requirement
86
30
  requirements:
87
- - - '>='
31
+ - - ~>
88
32
  - !ruby/object:Gem::Version
89
- version: '0'
33
+ version: '2.1'
90
34
  requirement: !ruby/object:Gem::Requirement
91
35
  requirements:
92
- - - '>='
36
+ - - ~>
93
37
  - !ruby/object:Gem::Version
94
- version: '0'
38
+ version: '2.1'
95
39
  prerelease: false
96
40
  type: :runtime
97
41
  - !ruby/object:Gem::Dependency
98
- name: opener-daemons
42
+ name: builder
99
43
  version_requirements: !ruby/object:Gem::Requirement
100
44
  requirements:
101
45
  - - '>='
@@ -123,17 +67,17 @@ dependencies:
123
67
  prerelease: false
124
68
  type: :runtime
125
69
  - !ruby/object:Gem::Dependency
126
- name: opener-core
70
+ name: slop
127
71
  version_requirements: !ruby/object:Gem::Requirement
128
72
  requirements:
129
73
  - - ~>
130
74
  - !ruby/object:Gem::Version
131
- version: '1.0'
75
+ version: '3.5'
132
76
  requirement: !ruby/object:Gem::Requirement
133
77
  requirements:
134
78
  - - ~>
135
79
  - !ruby/object:Gem::Version
136
- version: '1.0'
80
+ version: '3.5'
137
81
  prerelease: false
138
82
  type: :runtime
139
83
  - !ruby/object:Gem::Dependency