libis-tools 1.0.5-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +40 -0
  6. data/Gemfile +7 -0
  7. data/README.md +202 -0
  8. data/Rakefile +11 -0
  9. data/bin/libis_tool +5 -0
  10. data/lib/libis-tools.rb +1 -0
  11. data/lib/libis/tools.rb +25 -0
  12. data/lib/libis/tools/assert.rb +52 -0
  13. data/lib/libis/tools/checksum.rb +106 -0
  14. data/lib/libis/tools/cli/cli_helper.rb +189 -0
  15. data/lib/libis/tools/cli/reorg.rb +416 -0
  16. data/lib/libis/tools/command.rb +133 -0
  17. data/lib/libis/tools/command_line.rb +23 -0
  18. data/lib/libis/tools/config.rb +147 -0
  19. data/lib/libis/tools/config_file.rb +85 -0
  20. data/lib/libis/tools/csv.rb +38 -0
  21. data/lib/libis/tools/deep_struct.rb +71 -0
  22. data/lib/libis/tools/extend/array.rb +16 -0
  23. data/lib/libis/tools/extend/empty.rb +7 -0
  24. data/lib/libis/tools/extend/hash.rb +147 -0
  25. data/lib/libis/tools/extend/kernel.rb +25 -0
  26. data/lib/libis/tools/extend/ostruct.rb +3 -0
  27. data/lib/libis/tools/extend/roo.rb +91 -0
  28. data/lib/libis/tools/extend/string.rb +94 -0
  29. data/lib/libis/tools/extend/struct.rb +29 -0
  30. data/lib/libis/tools/extend/symbol.rb +8 -0
  31. data/lib/libis/tools/logger.rb +130 -0
  32. data/lib/libis/tools/mets_dnx.rb +61 -0
  33. data/lib/libis/tools/mets_file.rb +504 -0
  34. data/lib/libis/tools/mets_objects.rb +547 -0
  35. data/lib/libis/tools/parameter.rb +372 -0
  36. data/lib/libis/tools/spreadsheet.rb +196 -0
  37. data/lib/libis/tools/temp_file.rb +42 -0
  38. data/lib/libis/tools/thread_safe.rb +31 -0
  39. data/lib/libis/tools/version.rb +5 -0
  40. data/lib/libis/tools/xml_document.rb +583 -0
  41. data/libis-tools.gemspec +55 -0
  42. data/spec/assert_spec.rb +65 -0
  43. data/spec/checksum_spec.rb +68 -0
  44. data/spec/command_spec.rb +90 -0
  45. data/spec/config_file_spec.rb +83 -0
  46. data/spec/config_spec.rb +113 -0
  47. data/spec/csv_spec.rb +159 -0
  48. data/spec/data/test-headers.csv +2 -0
  49. data/spec/data/test-headers.tsv +2 -0
  50. data/spec/data/test-noheaders.csv +1 -0
  51. data/spec/data/test-noheaders.tsv +1 -0
  52. data/spec/data/test.data +9 -0
  53. data/spec/data/test.xlsx +0 -0
  54. data/spec/data/test.xml +8 -0
  55. data/spec/data/test.yml +2 -0
  56. data/spec/data/test_config.yml +15 -0
  57. data/spec/deep_struct_spec.rb +138 -0
  58. data/spec/logger_spec.rb +165 -0
  59. data/spec/mets_file_spec.rb +223 -0
  60. data/spec/parameter_container_spec.rb +152 -0
  61. data/spec/parameter_spec.rb +148 -0
  62. data/spec/spec_helper.rb +29 -0
  63. data/spec/spreadsheet_spec.rb +1820 -0
  64. data/spec/temp_file_spec.rb +76 -0
  65. data/spec/test.xsd +20 -0
  66. data/spec/thread_safe_spec.rb +64 -0
  67. data/spec/xmldocument_spec.rb +421 -0
  68. data/test/test_helper.rb +7 -0
  69. data/test/webservices/test_ca_item_info.rb +59 -0
  70. data/test/webservices/test_ca_search.rb +35 -0
  71. metadata +437 -0
@@ -0,0 +1,133 @@
1
+ # encoding: utf-8
2
+ require 'timeout'
3
+
4
+ module Libis
5
+ module Tools
6
+
7
+ # This module allows to run an external command safely and returns it's output, error messages and status.
8
+ # The run method takes any number of arguments that will be used as command-line arguments. The method returns
9
+ # a Hash with:
10
+ # * :out => an array with lines that were printed on the external program's standard out.
11
+ # * :err => an array with lines that were printed on the external program's standard error.
12
+ # * :status => exit code returned by the external program.
13
+ # * :timeout => true if the command was terminated due to a timeout.
14
+ # * :pid => pid of the command (in case <pid>.log files need to be cleaned up)
15
+ #
16
+ # Optionally an option hash can be appended to the list of arguments with:
17
+ # * :stdin_data => values sent to the command's standard input (optional, nothing sent if not present)
18
+ # * :binmode => if present and true, will set the IO communication to binary data
19
+ # * :timeout => if specified, SIGTERM signal is sent to the command after the number of seconds
20
+ # * :signal => Signal sent to the command instead of the default SIGTERM
21
+ # * :kill_after => if specified, SIGKILL signal is sent aftern the number of seconds if command is still running
22
+ # after initial signal was sent
23
+ # * any other options will be handed over to the spawn command (e.g. pgroup)
24
+ #
25
+ # Examples:
26
+ #
27
+ # require 'libis/tools/command'
28
+ # result = ::Libis::Tools::Command.run('ls', '-l', File.absolute_path(__FILE__))
29
+ # p result # => {out: [...], err: [...], status: 0}
30
+ #
31
+ # require 'libis/tools/command'
32
+ # include ::Libis::Tools::Command
33
+ # result = run('ls', '-l', File.absolute_path(__FILE__))
34
+ # p result # => {out: [...], err: [...], status: 0}
35
+ #
36
+ # Note that the Command class uses Open3#popen3 internally. All arguments supplied to Command#run are passed to
37
+ # the popen3 call. Unfortunately some older JRuby versions have some known issues with popen3. Please use and
38
+ # test carefully in JRuby environments.
39
+ module Command
40
+
41
+ # Run an external program and return status, stdout and stderr.
42
+ #
43
+ #
44
+ # @param [Array<String>] cmd command name optionally prepended with env and appended with command-line arguments
45
+ # @return [Hash] a Hash with:
46
+ # * :status (Integer) - the exit status of the command
47
+ # * :out (Array<String>) - the stdout output of the command
48
+ # * :err (Array<String>)- the stderr output of the command
49
+ # * :timeout(Boolean) - if true, the command did not return in time
50
+ # * :pid(Integer) - the command's processID
51
+ def self.run(*cmd)
52
+
53
+ spawn_opts = Hash === cmd.last ? cmd.pop.dup : {}
54
+ opts = {
55
+ :stdin_data => spawn_opts.delete(:stdin_data) || '',
56
+ :binmode => spawn_opts.delete(:binmode) || false,
57
+ :timeout => spawn_opts.delete(:timeout),
58
+ :signal => spawn_opts.delete(:signal) || :TERM,
59
+ :kill_after => spawn_opts.delete(:kill_after),
60
+ }
61
+ in_r, in_w = IO.pipe
62
+ out_r, out_w = IO.pipe
63
+ err_r, err_w = IO.pipe
64
+ in_w.sync = true
65
+
66
+ if opts[:binmode]
67
+ in_w.binmode
68
+ out_r.binmode
69
+ err_r.binmode
70
+ end
71
+
72
+ spawn_opts[:in] = in_r
73
+ spawn_opts[:out] = out_w
74
+ spawn_opts[:err] = err_w
75
+
76
+ result = {
77
+ :pid => nil,
78
+ :status => nil,
79
+ :out => [],
80
+ :err => [],
81
+ :timeout => false,
82
+ }
83
+
84
+ out_reader = nil
85
+ err_reader = nil
86
+ wait_thr = nil
87
+
88
+ begin
89
+ Timeout.timeout(opts[:timeout]) do
90
+ result[:pid] = spawn(*cmd, spawn_opts)
91
+ wait_thr = Process.detach(result[:pid])
92
+ in_r.close
93
+ out_w.close
94
+ err_w.close
95
+
96
+ out_reader = Thread.new {out_r.read}
97
+ err_reader = Thread.new {err_r.read}
98
+
99
+ in_w.write opts[:stdin_data]
100
+ in_w.close
101
+
102
+ result[:status] = wait_thr.value
103
+ end
104
+
105
+ rescue Timeout::Error
106
+ result[:timeout] = true
107
+ pid = spawn_opts[:pgroup] ? -result[:pid] : result[:pid]
108
+ Process.kill(opts[:signal], pid)
109
+ if opts[:kill_after]
110
+ unless wait_thr.join(opts[:kill_after])
111
+ Process.kill(:KILL, pid)
112
+ end
113
+ end
114
+
115
+ rescue StandardError => e
116
+ result[:err] = [e.class.name, e.message]
117
+
118
+ ensure
119
+ result[:status] = wait_thr.value.exitstatus if wait_thr
120
+ result[:out] += out_reader.value.split("\n").map(&:chomp) if out_reader
121
+ result[:err] += err_reader.value.split("\n").map(&:chomp) if err_reader
122
+ out_r.close unless out_r.closed?
123
+ err_r.close unless err_r.closed?
124
+ end
125
+
126
+ result
127
+
128
+ end
129
+
130
+ end
131
+
132
+ end
133
+ end
@@ -0,0 +1,23 @@
1
+ require 'thor'
2
+ require 'tty-prompt'
3
+ require 'tty-config'
4
+
5
+ require 'libis/tools/cli/cli_helper'
6
+ require 'libis/tools/cli/reorg'
7
+
8
+ module Libis
9
+ module Tools
10
+
11
+ class CommandLine < Thor
12
+
13
+ include Cli::Helper
14
+ include Cli::Reorg
15
+
16
+ def reorg
17
+ super
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,147 @@
1
+ # encoding: utf-8
2
+ require 'singleton'
3
+ require 'yaml'
4
+ require 'erb'
5
+ require 'logging'
6
+
7
+ require_relative 'config_file'
8
+
9
+ module Libis
10
+ module Tools
11
+
12
+ # The Singleton Config class is a convenience class for easy configuration maintenance, loading and saving.
13
+ # It also initializes a default logger and supports creating extra loggers. The logging infrastructure is based on
14
+ # the {http://www.rubydoc.info/gems/logging/Logging ::Logging} gem and supports the {::Libis::Tools::Logger} class.
15
+ #
16
+ # For the configuration parameters, it supports code defaults, loading configurations from multiple YAML files
17
+ # containing ERB statements. The Config class behaves like a Hash/OpenStruct/HashWithIndifferentAccess.
18
+ #
19
+ # The parameters can be accessed by getter/setter method or using the Hash syntax:
20
+ #
21
+ # require 'libis/tools/config'
22
+ # cfg = ::Libis::Tools::Config
23
+ # cfg['my_value'] = 10
24
+ # p cfg.instance.my_value # => 10
25
+ # cfg.instance.my_text = 'abc'
26
+ # p cfg[:my_text] # => 'abc'
27
+ # p cfg.logger.warn('message') # => W, [2015-03-16T12:51:01.180548 #123.456] WARN : message
28
+ #
29
+ class Config
30
+ include Singleton
31
+
32
+ class << self
33
+
34
+ private
35
+
36
+ # For each configuration parameter, the value can be accessed via the class or the Singleton instance.
37
+ # The class diverts to the instance automatically.
38
+ def method_missing(name, *args, &block)
39
+ result = instance.send(name, *args, &block)
40
+ self === result ? self : result
41
+ end
42
+
43
+ end
44
+
45
+ # Instance method that allows to access the configuration parameters by method.
46
+ def method_missing(name, *args, &block)
47
+ result = config.send(name, *args, &block)
48
+ self === config ? self : result
49
+ end
50
+
51
+ # Load configuration parameters from a YAML file or Hash.
52
+ #
53
+ # The file paths and Hashes are memorised and loaded again by the {#reload} methods.
54
+ # @param [String,Hash] file_or_hash
55
+ def <<(file_or_hash)
56
+ sync do
57
+ @config.send('<<', (file_or_hash)) { |data| @sources << data }
58
+ self
59
+ end
60
+ end
61
+
62
+ # Load all files and Hashes again.
63
+ #
64
+ # Will not reset the configuration parameters. Parameters set directly on the
65
+ # configuration are kept intact unless they also exist in the files or hashes in which case they will be overwritten.
66
+ def reload
67
+ sync do
68
+ sources = @sources.dup
69
+ @sources.clear
70
+ sources.each { |f| self << f }
71
+ self
72
+ end
73
+ end
74
+
75
+ # Clear data and load all files and Hashes again.
76
+ #
77
+ # All configuration parameters are first deleted which means that any parameters
78
+ # added directly (not via file or hash) will no longer be available. Parameters set explicitly that also exist in
79
+ # the files or hashes will be reset to the values in those files and hashes.
80
+ def reload!
81
+ sync do
82
+ @config.clear!
83
+ reload
84
+ end
85
+ end
86
+
87
+ # Clear all data.
88
+ #
89
+ # Not only all configuration parameters are deleted, but also the memorized list of loaded files
90
+ # and hashes are cleared and the logger configuration is reset to it's default status.
91
+ def clear!
92
+ sync do
93
+ @config.clear!
94
+ @sources = Array.new
95
+ self.logger
96
+ self
97
+ end
98
+ end
99
+
100
+ # Gets the default ::Logging formatter.
101
+ #
102
+ # This in an instance of a layout that prints in the default message format.
103
+ #
104
+ # The default layout prints log lines like this:
105
+ #
106
+ # <first char of severity>, [<timestamp> #<process-id>.<thread-id] <severity> : <message>
107
+ #
108
+ def get_log_formatter
109
+ # noinspection RubyResolve
110
+ ::Logging::Layouts::Pattern.new(DEFAULT_LOG_LAYOUT_PARAMETERS)
111
+ end
112
+
113
+ def logger(name = nil, appenders = nil)
114
+ sync do
115
+ name ||= 'root'
116
+ logger = ::Logging.logger[name]
117
+ if logger.appenders.empty?
118
+ logger.appenders = appenders || ::Logging.appenders.stdout(layout: get_log_formatter)
119
+ end
120
+ logger
121
+ end
122
+ end
123
+
124
+ attr_accessor :config, :sources
125
+
126
+ protected
127
+
128
+ def initialize(hash = nil, opts = {})
129
+ @mutex = ReentrantMutex.new
130
+ @config = ConfigFile.new(hash, opts)
131
+ self.clear!
132
+ end
133
+
134
+ def sync(&block)
135
+ @mutex.synchronize(&block)
136
+ end
137
+
138
+ ::Logging::init
139
+ # noinspection RubyResolve
140
+ DEFAULT_LOG_LAYOUT_PARAMETERS = {
141
+ pattern: "%.1l, [%d #%p.%t] %5l%X{Application}%X{Subject} : %m\n",
142
+ date_pattern: '%Y-%m-%dT%H:%M:%S.%L'
143
+ }
144
+
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,85 @@
1
+ # encoding: utf-8
2
+ require 'singleton'
3
+ require 'yaml'
4
+ require 'erb'
5
+
6
+ require 'libis/tools/deep_struct'
7
+
8
+ module Libis
9
+ module Tools
10
+
11
+ # The ConfigFile class is a convenience class for interfacing with YAML configuration files. These files can
12
+ # contain ERB statements. An initial hash or file can be loaded during initialization. The class supports loading
13
+ # and saving of files, but note that any ERB statements in the file are lost by performing such a round trip.
14
+ # The class is derived from the DeepStruct class and therefore supports nested hashes and arrays and supports
15
+ # the OpenStruct style of accessors.
16
+ #
17
+ # The parameters can be accessed by getter/setter method or using the Hash syntax:
18
+ #
19
+ # require 'libis/tools/config_file'
20
+ # cfg_file = ::Libis::Tools::ConfigFile.new
21
+ # cfg_file << {foo: 'bar'}
22
+ # cfg_file.my_value = 10
23
+ # p cfg_file[:my_value] # => 10
24
+ # cfg_file{:my_text] = 'abc'
25
+ # p cfg_file['my_text'] # => 'abc'
26
+ # p cfg_file.to_hash # => { :foo => 'bar', 'my_value' => 10, :my_text => 'abc' }
27
+ # cfg >> 'my_config.yml'
28
+ #
29
+ class ConfigFile < DeepStruct
30
+
31
+ # Create a new ConfigFile instance. The optional argument can either be a Hash or a String. The argument is
32
+ # passed to the {#<<} method after initialization.
33
+ #
34
+ # @param [String,Hash] file_or_hash optional String or Hash argument to initialize the data.
35
+ def initialize(file_or_hash = nil, opt = {})
36
+ super _file_to_hash(file_or_hash), opt
37
+ end
38
+
39
+ # Load configuration parameters from a YAML file or Hash.
40
+ #
41
+ # The YAML file can contain ERB syntax values that will be evaluated at loading time. Instead of a YAML file,
42
+ # a Hash can be passed.
43
+ #
44
+ # Note that the method also yields the hash or absolute path to a given block. This is for data management of
45
+ # derived classes such as ::Libis::Tools::Config.
46
+ #
47
+ # @param [String,Hash] file_or_hash optional String or Hash argument to initialize the data.
48
+ def <<(file_or_hash, &block)
49
+ self.merge!(_file_to_hash(file_or_hash, &block))
50
+ end
51
+
52
+ # Save configuration parameters in a YAML file.
53
+ #
54
+ # @param [String] file path of the YAML file to save the configuration to.
55
+ def >>(file)
56
+ File.open(file, 'w') { |f| f.write to_hash.to_yaml }
57
+ end
58
+
59
+ protected
60
+
61
+ def _file_to_hash(file_or_hash)
62
+ return {} if file_or_hash.nil? || (file_or_hash.respond_to?(:empty?) && file_or_hash.empty?)
63
+ hash = case file_or_hash
64
+ when Hash
65
+ yield file_or_hash if block_given?
66
+ file_or_hash
67
+ when String
68
+ return {} unless File.exist?(file_or_hash)
69
+ yield File.absolute_path(file_or_hash) if block_given?
70
+ # noinspection RubyResolve
71
+ begin
72
+ YAML.load(ERB.new(open(file_or_hash).read).result)
73
+ rescue Exception => e
74
+ raise RuntimeError, "Error loading YAML '#{file_or_hash}': #{e.message}"
75
+ end
76
+ else
77
+ {}
78
+ end
79
+ hash = {} unless hash.is_a? Hash
80
+ hash
81
+ end
82
+
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,38 @@
1
+ require 'csv'
2
+
3
+ module Libis
4
+ module Tools
5
+ module Csv
6
+
7
+ # @param [String] file_name
8
+ # @param [Hash] options
9
+ # @return [CSV] Open CSV object
10
+ def self.open(file_name, options = {})
11
+ options = {
12
+ mode: 'rb:UTF-8',
13
+ required: %w'',
14
+ optional: %w'',
15
+ col_sep: ',',
16
+ quote_char: '"'
17
+ }.merge options
18
+ mode = options.delete(:mode)
19
+ required_headers = options.delete(:required)
20
+ optional_headers = options.delete(:optional)
21
+ options[:headers] = true
22
+ options[:return_headers] = true
23
+ csv = CSV.open(file_name, mode, options)
24
+ line = csv.shift
25
+ found_headers = required_headers & line.headers
26
+ return csv if found_headers.size == required_headers.size
27
+ raise RuntimeError, "CSV headers not found: #{required_headers - found_headers}" unless found_headers.empty?
28
+ csv.close
29
+ options[:headers] = (required_headers + optional_headers)[0...line.size]
30
+ raise RuntimeError, 'CSV does not contain enough columns' if required_headers.size > line.size
31
+ options[:return_headers] = true
32
+ csv = CSV.open(file_name, mode, options)
33
+ csv.shift
34
+ csv
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,71 @@
1
+ require 'libis/tools/extend/ostruct'
2
+ require 'recursive-open-struct'
3
+
4
+ module Libis
5
+ module Tools
6
+
7
+ # A class that derives from OpenStruct through the RecursiveOpenStruct.
8
+ # By wrapping a Hash recursively, it allows for easy access to the content by method names.
9
+ # A RecursiveOpenStruct is derived from stdlib's OpenStruct, but can be made recursive.
10
+ # DeepStruct enforces this behaviour and adds a clear! method.
11
+ class DeepStruct < RecursiveOpenStruct
12
+ include Enumerable
13
+
14
+ # Create a new DeepStruct from a Hash and configure the behaviour.
15
+ #
16
+ # @param [Hash] hash the initial data structure.
17
+ # @param [Hash] opts optional configuration options:
18
+ # * recurse_over_arrays: also wrap the Hashes that are enbedded in Arrays. Default: true.
19
+ # * preserver_original_keys: creating a Hash from the wrapper preserves symbols and strings as keys. Default: true.
20
+ def initialize(hash = {}, opts = {})
21
+ hash = {} unless hash
22
+ opts = {} unless opts
23
+ hash = {default: hash} unless hash.is_a? Hash
24
+ super(hash, {recurse_over_arrays: true, preserve_original_keys: true}.merge(opts))
25
+ end
26
+
27
+ def merge(hash)
28
+ return self unless hash.respond_to?(:to_hash)
29
+ hash.to_hash.inject(self.dup) do |ds, (key, value)|
30
+ ds[key] = DeepDup.new(
31
+ recurse_over_arrays: @recurse_over_arrays,
32
+ preserve_original_keys: @preserve_original_keys
33
+ ).call(value)
34
+ ds
35
+ end
36
+ end
37
+
38
+ def merge!(hash)
39
+ return self unless hash.respond_to?(:to_hash)
40
+ hash.to_hash.inject(self) do |ds, (key, value)|
41
+ ds[key] = DeepDup.new(
42
+ recurse_over_arrays: @recurse_over_arrays,
43
+ preserve_original_keys: @preserve_original_keys
44
+ ).call(value)
45
+ ds
46
+ end
47
+ self
48
+ end
49
+
50
+ def key?(key)
51
+ self.respond_to?(key)
52
+ end
53
+ alias_method :has_key?, :key?
54
+
55
+ def keys
56
+ @table.keys
57
+ end
58
+
59
+ def each(&block)
60
+ self.each_pair &block
61
+ end
62
+
63
+ # Delete all data fields
64
+ def clear!
65
+ @table.keys.each { |key| delete_field(key) }
66
+ @sub_elements = {}
67
+ end
68
+
69
+ end
70
+ end
71
+ end