opener-tokenizer 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dab5882d292da38d032ad2de7dfb8c289372428b
4
- data.tar.gz: 19ee04d381d370e64d606dc41e3472296980df67
3
+ metadata.gz: a01694d8c3c4cabbeadcee2b2e478ed699a21555
4
+ data.tar.gz: 3513b6bd6fe22ea36edc82eb2248204f24dedc8e
5
5
  SHA512:
6
- metadata.gz: dcdb9e6f44524b5a1a23aa54dea0ed68e4b048b5b0b929c0964aeb576d2eecdc2877a0d054cd73e8afd67ad936016a5e044ae9b95a507219a9f930a7bf4775ec
7
- data.tar.gz: e8660e048b3b58049bed277b207a69a5c763eac89049c251b750e0d4e111bb003030987540758686f175f4cfd2be76959a3534262faf57b5475f7982286a966c
6
+ metadata.gz: 3928380d43ccd980b675562c0a2b19e3ae45ebe410d300314685678b9c2829dac0c66b5cc805fbc4a22ed6267563869fe7fca5bee11ddc78e17cea6cabcec5f6
7
+ data.tar.gz: dc0a947729d4f97f49a919a240c8ab73b8122315add3aeecc65cd8ada3c0474e5faca2d4c0e84f69dce77ad74937f22e7b65932bae146c5592b129d17f5ed42f
data/bin/tokenizer CHANGED
@@ -2,6 +2,6 @@
2
2
 
3
3
  require_relative '../lib/opener/tokenizer'
4
4
 
5
- cli = Opener::Tokenizer::CLI.new(:args => ARGV)
5
+ cli = Opener::Tokenizer::CLI.new
6
6
 
7
- cli.run(STDIN.tty? ? nil : STDIN.read)
7
+ cli.run
@@ -1,7 +1,7 @@
1
1
  require 'opener/tokenizers/base'
2
2
  require 'nokogiri'
3
3
  require 'open3'
4
- require 'optparse'
4
+ require 'slop'
5
5
 
6
6
  require_relative 'tokenizer/version'
7
7
  require_relative 'tokenizer/cli'
@@ -52,35 +52,32 @@ module Opener
52
52
  end
53
53
 
54
54
  ##
55
- # Processes the input and returns an array containing the output of STDOUT,
56
- # STDERR and an object containing process information.
55
+ # Tokenizes the input and returns the results as a KAF document.
57
56
  #
58
57
  # @param [String] input
59
- # @return [Array]
58
+ # @return [String]
60
59
  #
61
60
  def run(input)
62
- begin
63
- if options[:kaf]
64
- language, input = kaf_elements(input)
65
- else
66
- language = options[:language]
67
- end
61
+ if options[:kaf]
62
+ language, input = kaf_elements(input)
63
+ else
64
+ language = options[:language]
65
+ end
68
66
 
69
- unless valid_language?(language)
70
- raise ArgumentError, "The specified language (#{language}) is invalid"
71
- end
67
+ unless valid_language?(language)
68
+ raise ArgumentError, "The specified language (#{language}) is invalid"
69
+ end
72
70
 
73
- kernel = language_constant(language).new(:args => options[:args])
71
+ kernel = language_constant(language).new(:args => options[:args])
74
72
 
75
- stdout, stderr, process = Open3.capture3(
76
- *kernel.command.split(" "),
77
- :stdin_data => input
78
- )
73
+ stdout, stderr, process = Open3.capture3(
74
+ *kernel.command.split(" "),
75
+ :stdin_data => input
76
+ )
79
77
 
80
- raise stderr unless process.success?
78
+ raise stderr unless process.success?
81
79
 
82
- return stdout
83
- end
80
+ return stdout
84
81
  end
85
82
 
86
83
  alias tokenize run
@@ -1,110 +1,92 @@
1
1
  module Opener
2
2
  class Tokenizer
3
3
  ##
4
- # CLI wrapper around {Opener::Tokenizer} using OptionParser.
4
+ # CLI wrapper around {Opener::Tokenizer} using Slop.
5
5
  #
6
- # @!attribute [r] options
7
- # @return [Hash]
8
- # @!attribute [r] option_parser
9
- # @return [OptionParser]
6
+ # @!attribute [r] parser
7
+ # @return [Slop]
10
8
  #
11
9
  class CLI
12
- attr_reader :options, :option_parser
10
+ attr_reader :parser
11
+
12
+ def initialize
13
+ @parser = configure_slop
14
+ end
13
15
 
14
16
  ##
15
- # @param [Hash] options
17
+ # @param [Array] argv
16
18
  #
17
- def initialize(options = {})
18
- @options = DEFAULT_OPTIONS.merge(options)
19
-
20
- @option_parser = OptionParser.new do |opts|
21
- opts.program_name = 'tokenizer'
22
- opts.summary_indent = ' '
23
-
24
- opts.on('-h', '--help', 'Shows this help message') do
25
- show_help
26
- end
27
-
28
- opts.on('-v', '--version', 'Shows the current version') do
29
- show_version
30
- end
19
+ def run(argv = ARGV)
20
+ parser.parse(argv)
21
+ end
31
22
 
32
- opts.on(
33
- '-l',
34
- '--language [VALUE]',
35
- 'Uses this specific language'
36
- ) do |value|
37
- @options[:language] = value
38
- @options[:kaf] = false
39
- end
23
+ ##
24
+ # @return [Slop]
25
+ #
26
+ def configure_slop
27
+ return Slop.new(:strict => false, :indent => 2, :help => true) do
28
+ banner 'Usage: tokenizer [OPTIONS]'
40
29
 
41
- opts.on('-k', '--kaf', 'Treats the input as a KAF document') do
42
- @options[:kaf] = true
43
- end
30
+ separator <<-EOF.chomp
44
31
 
45
- opts.on('-p', '--plain', 'Treats the input as plain text') do
46
- @options[:kaf] = false
47
- end
32
+ About:
48
33
 
49
- opts.separator <<-EOF
34
+ Tokenizer for KAF/plain text documents with support for various languages
35
+ such as Dutch and English. This command reads input from STDIN.
50
36
 
51
37
  Examples:
52
38
 
53
- cat example.txt | #{opts.program_name} -l en # Manually specify the language
54
- cat example.kaf | #{opts.program_name} # Uses the xml:lang attribute
39
+ cat example.txt | tokenizer -l en # Manually specify the language
40
+ cat example.kaf | tokenizer # Uses the xml:lang attribute
55
41
 
56
42
  Languages:
57
43
 
58
- * Dutch (nl)
59
- * English (en)
60
- * French (fr)
61
- * German (de)
62
- * Italian (it)
63
- * Spanish (es)
44
+ * Dutch (nl)
45
+ * English (en)
46
+ * French (fr)
47
+ * German (de)
48
+ * Italian (it)
49
+ * Spanish (es)
64
50
 
65
51
  KAF Input:
66
52
 
67
- If you give a KAF file as an input (-k or --kaf) the language is taken from
68
- the xml:lang attribute inside the file. Else it expects that you give the
69
- language as an argument (-l or --language)
53
+ If you give a KAF file as an input (-k or --kaf) the language is taken from
54
+ the xml:lang attribute inside the file. Else it expects that you give the
55
+ language as an argument (-l or --language)
70
56
 
71
- Sample KAF syntax:
57
+ Example KAF:
72
58
 
73
- <?xml version="1.0" encoding="UTF-8" standalone="no"?>
74
- <KAF version="v1.opener" xml:lang="en">
75
- <raw>This is some text.</raw>
76
- </KAF>
59
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
60
+ <KAF version="v1.opener" xml:lang="en">
61
+ <raw>This is some text.</raw>
62
+ </KAF>
77
63
  EOF
78
- end
79
- end
80
64
 
81
- ##
82
- # @param [String] input
83
- #
84
- def run(input)
85
- option_parser.parse!(options[:args])
65
+ separator "\nOptions:\n"
86
66
 
87
- tokenizer = Tokenizer.new(options)
67
+ on :v, :version, 'Shows the current version' do
68
+ abort "tokenizer v#{VERSION} on #{RUBY_DESCRIPTION}"
69
+ end
88
70
 
89
- stdout, stderr, process = tokenizer.run(input)
71
+ on :l=, :language=, 'A specific language to use',
72
+ :as => String,
73
+ :default => DEFAULT_LANGUAGE
90
74
 
91
- puts stdout
92
- end
75
+ on :k, :kaf, 'Treats the input as a KAF document'
76
+ on :p, :plain, 'Treats the input as plain text'
93
77
 
94
- private
78
+ run do |opts, args|
79
+ tokenizer = Tokenizer.new(
80
+ :args => args,
81
+ :kaf => opts[:plain] ? false : true,
82
+ :language => opts[:language]
83
+ )
95
84
 
96
- ##
97
- # Shows the help message and exits the program.
98
- #
99
- def show_help
100
- abort option_parser.to_s
101
- end
85
+ input = STDIN.tty? ? nil : STDIN.read
102
86
 
103
- ##
104
- # Shows the version and exits the program.
105
- #
106
- def show_version
107
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
87
+ puts tokenizer.run(input)
88
+ end
89
+ end
108
90
  end
109
91
  end # CLI
110
92
  end # Tokenizer
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class Tokenizer
3
- VERSION = '2.0.0'
3
+ VERSION = '2.1.0'
4
4
  end
5
5
  end
@@ -24,12 +24,14 @@ Gem::Specification.new do |gem|
24
24
 
25
25
  gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
26
26
 
27
- gem.add_dependency 'nokogiri'
28
27
  gem.add_dependency 'opener-tokenizer-base', '~> 1.0'
29
28
  gem.add_dependency 'opener-webservice', '~> 2.1'
30
29
  gem.add_dependency 'opener-daemons', '~> 2.1'
31
30
  gem.add_dependency 'opener-core', '~> 2.0'
32
31
 
32
+ gem.add_dependency 'nokogiri'
33
+ gem.add_dependency 'slop', '~> 3.5'
34
+
33
35
  gem.add_development_dependency 'rspec'
34
36
  gem.add_development_dependency 'cucumber'
35
37
  gem.add_development_dependency 'pry'
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-24 00:00:00.000000000 Z
11
+ date: 2014-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: nokogiri
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: opener-tokenizer-base
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +66,34 @@ dependencies:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
68
  version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: slop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.5'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.5'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rspec
85
99
  requirement: !ruby/object:Gem::Requirement