opener-tokenizer 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dab5882d292da38d032ad2de7dfb8c289372428b
4
- data.tar.gz: 19ee04d381d370e64d606dc41e3472296980df67
3
+ metadata.gz: a01694d8c3c4cabbeadcee2b2e478ed699a21555
4
+ data.tar.gz: 3513b6bd6fe22ea36edc82eb2248204f24dedc8e
5
5
  SHA512:
6
- metadata.gz: dcdb9e6f44524b5a1a23aa54dea0ed68e4b048b5b0b929c0964aeb576d2eecdc2877a0d054cd73e8afd67ad936016a5e044ae9b95a507219a9f930a7bf4775ec
7
- data.tar.gz: e8660e048b3b58049bed277b207a69a5c763eac89049c251b750e0d4e111bb003030987540758686f175f4cfd2be76959a3534262faf57b5475f7982286a966c
6
+ metadata.gz: 3928380d43ccd980b675562c0a2b19e3ae45ebe410d300314685678b9c2829dac0c66b5cc805fbc4a22ed6267563869fe7fca5bee11ddc78e17cea6cabcec5f6
7
+ data.tar.gz: dc0a947729d4f97f49a919a240c8ab73b8122315add3aeecc65cd8ada3c0474e5faca2d4c0e84f69dce77ad74937f22e7b65932bae146c5592b129d17f5ed42f
data/bin/tokenizer CHANGED
@@ -2,6 +2,6 @@
2
2
 
3
3
  require_relative '../lib/opener/tokenizer'
4
4
 
5
- cli = Opener::Tokenizer::CLI.new(:args => ARGV)
5
+ cli = Opener::Tokenizer::CLI.new
6
6
 
7
- cli.run(STDIN.tty? ? nil : STDIN.read)
7
+ cli.run
@@ -1,7 +1,7 @@
1
1
  require 'opener/tokenizers/base'
2
2
  require 'nokogiri'
3
3
  require 'open3'
4
- require 'optparse'
4
+ require 'slop'
5
5
 
6
6
  require_relative 'tokenizer/version'
7
7
  require_relative 'tokenizer/cli'
@@ -52,35 +52,32 @@ module Opener
52
52
  end
53
53
 
54
54
  ##
55
- # Processes the input and returns an array containing the output of STDOUT,
56
- # STDERR and an object containing process information.
55
+ # Tokenizes the input and returns the results as a KAF document.
57
56
  #
58
57
  # @param [String] input
59
- # @return [Array]
58
+ # @return [String]
60
59
  #
61
60
  def run(input)
62
- begin
63
- if options[:kaf]
64
- language, input = kaf_elements(input)
65
- else
66
- language = options[:language]
67
- end
61
+ if options[:kaf]
62
+ language, input = kaf_elements(input)
63
+ else
64
+ language = options[:language]
65
+ end
68
66
 
69
- unless valid_language?(language)
70
- raise ArgumentError, "The specified language (#{language}) is invalid"
71
- end
67
+ unless valid_language?(language)
68
+ raise ArgumentError, "The specified language (#{language}) is invalid"
69
+ end
72
70
 
73
- kernel = language_constant(language).new(:args => options[:args])
71
+ kernel = language_constant(language).new(:args => options[:args])
74
72
 
75
- stdout, stderr, process = Open3.capture3(
76
- *kernel.command.split(" "),
77
- :stdin_data => input
78
- )
73
+ stdout, stderr, process = Open3.capture3(
74
+ *kernel.command.split(" "),
75
+ :stdin_data => input
76
+ )
79
77
 
80
- raise stderr unless process.success?
78
+ raise stderr unless process.success?
81
79
 
82
- return stdout
83
- end
80
+ return stdout
84
81
  end
85
82
 
86
83
  alias tokenize run
@@ -1,110 +1,92 @@
1
1
  module Opener
2
2
  class Tokenizer
3
3
  ##
4
- # CLI wrapper around {Opener::Tokenizer} using OptionParser.
4
+ # CLI wrapper around {Opener::Tokenizer} using Slop.
5
5
  #
6
- # @!attribute [r] options
7
- # @return [Hash]
8
- # @!attribute [r] option_parser
9
- # @return [OptionParser]
6
+ # @!attribute [r] parser
7
+ # @return [Slop]
10
8
  #
11
9
  class CLI
12
- attr_reader :options, :option_parser
10
+ attr_reader :parser
11
+
12
+ def initialize
13
+ @parser = configure_slop
14
+ end
13
15
 
14
16
  ##
15
- # @param [Hash] options
17
+ # @param [Array] argv
16
18
  #
17
- def initialize(options = {})
18
- @options = DEFAULT_OPTIONS.merge(options)
19
-
20
- @option_parser = OptionParser.new do |opts|
21
- opts.program_name = 'tokenizer'
22
- opts.summary_indent = ' '
23
-
24
- opts.on('-h', '--help', 'Shows this help message') do
25
- show_help
26
- end
27
-
28
- opts.on('-v', '--version', 'Shows the current version') do
29
- show_version
30
- end
19
+ def run(argv = ARGV)
20
+ parser.parse(argv)
21
+ end
31
22
 
32
- opts.on(
33
- '-l',
34
- '--language [VALUE]',
35
- 'Uses this specific language'
36
- ) do |value|
37
- @options[:language] = value
38
- @options[:kaf] = false
39
- end
23
+ ##
24
+ # @return [Slop]
25
+ #
26
+ def configure_slop
27
+ return Slop.new(:strict => false, :indent => 2, :help => true) do
28
+ banner 'Usage: tokenizer [OPTIONS]'
40
29
 
41
- opts.on('-k', '--kaf', 'Treats the input as a KAF document') do
42
- @options[:kaf] = true
43
- end
30
+ separator <<-EOF.chomp
44
31
 
45
- opts.on('-p', '--plain', 'Treats the input as plain text') do
46
- @options[:kaf] = false
47
- end
32
+ About:
48
33
 
49
- opts.separator <<-EOF
34
+ Tokenizer for KAF/plain text documents with support for various languages
35
+ such as Dutch and English. This command reads input from STDIN.
50
36
 
51
37
  Examples:
52
38
 
53
- cat example.txt | #{opts.program_name} -l en # Manually specify the language
54
- cat example.kaf | #{opts.program_name} # Uses the xml:lang attribute
39
+ cat example.txt | tokenizer -l en # Manually specify the language
40
+ cat example.kaf | tokenizer # Uses the xml:lang attribute
55
41
 
56
42
  Languages:
57
43
 
58
- * Dutch (nl)
59
- * English (en)
60
- * French (fr)
61
- * German (de)
62
- * Italian (it)
63
- * Spanish (es)
44
+ * Dutch (nl)
45
+ * English (en)
46
+ * French (fr)
47
+ * German (de)
48
+ * Italian (it)
49
+ * Spanish (es)
64
50
 
65
51
  KAF Input:
66
52
 
67
- If you give a KAF file as an input (-k or --kaf) the language is taken from
68
- the xml:lang attribute inside the file. Else it expects that you give the
69
- language as an argument (-l or --language)
53
+ If you give a KAF file as an input (-k or --kaf) the language is taken from
54
+ the xml:lang attribute inside the file. Else it expects that you give the
55
+ language as an argument (-l or --language)
70
56
 
71
- Sample KAF syntax:
57
+ Example KAF:
72
58
 
73
- <?xml version="1.0" encoding="UTF-8" standalone="no"?>
74
- <KAF version="v1.opener" xml:lang="en">
75
- <raw>This is some text.</raw>
76
- </KAF>
59
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
60
+ <KAF version="v1.opener" xml:lang="en">
61
+ <raw>This is some text.</raw>
62
+ </KAF>
77
63
  EOF
78
- end
79
- end
80
64
 
81
- ##
82
- # @param [String] input
83
- #
84
- def run(input)
85
- option_parser.parse!(options[:args])
65
+ separator "\nOptions:\n"
86
66
 
87
- tokenizer = Tokenizer.new(options)
67
+ on :v, :version, 'Shows the current version' do
68
+ abort "tokenizer v#{VERSION} on #{RUBY_DESCRIPTION}"
69
+ end
88
70
 
89
- stdout, stderr, process = tokenizer.run(input)
71
+ on :l=, :language=, 'A specific language to use',
72
+ :as => String,
73
+ :default => DEFAULT_LANGUAGE
90
74
 
91
- puts stdout
92
- end
75
+ on :k, :kaf, 'Treats the input as a KAF document'
76
+ on :p, :plain, 'Treats the input as plain text'
93
77
 
94
- private
78
+ run do |opts, args|
79
+ tokenizer = Tokenizer.new(
80
+ :args => args,
81
+ :kaf => opts[:plain] ? false : true,
82
+ :language => opts[:language]
83
+ )
95
84
 
96
- ##
97
- # Shows the help message and exits the program.
98
- #
99
- def show_help
100
- abort option_parser.to_s
101
- end
85
+ input = STDIN.tty? ? nil : STDIN.read
102
86
 
103
- ##
104
- # Shows the version and exits the program.
105
- #
106
- def show_version
107
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
87
+ puts tokenizer.run(input)
88
+ end
89
+ end
108
90
  end
109
91
  end # CLI
110
92
  end # Tokenizer
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class Tokenizer
3
- VERSION = '2.0.0'
3
+ VERSION = '2.1.0'
4
4
  end
5
5
  end
@@ -24,12 +24,14 @@ Gem::Specification.new do |gem|
24
24
 
25
25
  gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
26
26
 
27
- gem.add_dependency 'nokogiri'
28
27
  gem.add_dependency 'opener-tokenizer-base', '~> 1.0'
29
28
  gem.add_dependency 'opener-webservice', '~> 2.1'
30
29
  gem.add_dependency 'opener-daemons', '~> 2.1'
31
30
  gem.add_dependency 'opener-core', '~> 2.0'
32
31
 
32
+ gem.add_dependency 'nokogiri'
33
+ gem.add_dependency 'slop', '~> 3.5'
34
+
33
35
  gem.add_development_dependency 'rspec'
34
36
  gem.add_development_dependency 'cucumber'
35
37
  gem.add_development_dependency 'pry'
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-24 00:00:00.000000000 Z
11
+ date: 2014-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: nokogiri
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: opener-tokenizer-base
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +66,34 @@ dependencies:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
68
  version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: slop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.5'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.5'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rspec
85
99
  requirement: !ruby/object:Gem::Requirement