metacrunch 2.2.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/Gemfile +11 -13
  4. data/License.txt +1 -1
  5. data/Readme.md +139 -2
  6. data/bin/console +9 -6
  7. data/exe/metacrunch +1 -2
  8. data/lib/metacrunch/cli.rb +62 -14
  9. data/lib/metacrunch/db/reader.rb +27 -0
  10. data/lib/metacrunch/db/writer.rb +23 -0
  11. data/lib/metacrunch/db.rb +8 -0
  12. data/lib/metacrunch/fs/entry.rb +17 -0
  13. data/lib/metacrunch/{file_reader.rb → fs/reader.rb} +9 -10
  14. data/lib/metacrunch/fs.rb +6 -0
  15. data/lib/metacrunch/job/buffer.rb +26 -0
  16. data/lib/metacrunch/job/dsl/option_support.rb +102 -0
  17. data/lib/metacrunch/job/dsl.rb +42 -0
  18. data/lib/metacrunch/job.rb +149 -0
  19. data/lib/metacrunch/test_utils/dummy_callable.rb +14 -0
  20. data/lib/metacrunch/test_utils/dummy_destination.rb +21 -0
  21. data/lib/metacrunch/test_utils/dummy_source.rb +22 -0
  22. data/lib/metacrunch/test_utils.rb +7 -0
  23. data/lib/metacrunch/version.rb +1 -1
  24. data/lib/metacrunch.rb +14 -27
  25. data/metacrunch.gemspec +5 -10
  26. metadata +24 -144
  27. data/lib/metacrunch/cli/base.rb +0 -29
  28. data/lib/metacrunch/cli/command_definition.rb +0 -41
  29. data/lib/metacrunch/cli/command_registry.rb +0 -17
  30. data/lib/metacrunch/cli/main.rb +0 -16
  31. data/lib/metacrunch/command.rb +0 -27
  32. data/lib/metacrunch/file/reader/file_system_fetcher.rb +0 -21
  33. data/lib/metacrunch/file/reader/plain_file_reader.rb +0 -33
  34. data/lib/metacrunch/file/reader/scp_fetcher.rb +0 -56
  35. data/lib/metacrunch/file/reader/tar_file_reader.rb +0 -37
  36. data/lib/metacrunch/file/reader/zip_file_reader.rb +0 -30
  37. data/lib/metacrunch/file/reader.rb +0 -72
  38. data/lib/metacrunch/file/writer/plain_file_writer.rb +0 -19
  39. data/lib/metacrunch/file/writer/tar_file_writer.rb +0 -26
  40. data/lib/metacrunch/file/writer/zip_file_writer.rb +0 -29
  41. data/lib/metacrunch/file/writer.rb +0 -26
  42. data/lib/metacrunch/file.rb +0 -24
  43. data/lib/metacrunch/file_reader_entry.rb +0 -21
  44. data/lib/metacrunch/file_writer.rb +0 -40
  45. data/lib/metacrunch/hash.rb +0 -51
  46. data/lib/metacrunch/parallel.rb +0 -69
  47. data/lib/metacrunch/processor.rb +0 -10
  48. data/lib/metacrunch/snr/field.rb +0 -31
  49. data/lib/metacrunch/snr/section.rb +0 -74
  50. data/lib/metacrunch/snr.rb +0 -117
  51. data/lib/metacrunch/tar_writer.rb +0 -26
  52. data/lib/metacrunch/transformator/transformation/step.rb +0 -45
  53. data/lib/metacrunch/transformator/transformation.rb +0 -48
  54. data/lib/metacrunch/transformator.rb +0 -5
  55. data/lib/metacrunch/transformer/helper.rb +0 -29
  56. data/lib/metacrunch/transformer/step.rb +0 -37
  57. data/lib/metacrunch/transformer.rb +0 -38
@@ -1,33 +0,0 @@
1
- require_relative "../reader"
2
-
3
- class Metacrunch::File::Reader::PlainFileReader
4
- include Enumerable
5
-
6
- def self.accepts?(filename)
7
- true
8
- end
9
-
10
- def initialize(filename)
11
- @filename = filename
12
- end
13
-
14
- def each
15
- return enum_for(__method__) unless block_given?
16
-
17
- io =
18
- if @filename.end_with?("gz") # catches tgz and tar.gz
19
- Zlib::GzipReader.open(@filename)
20
- else
21
- File.open(@filename)
22
- end
23
-
24
- yield Metacrunch::File.new({
25
- content: io.read,
26
- entry_name: File.basename(@filename),
27
- file_name: @filename,
28
- is_directory: false
29
- })
30
-
31
- io.close
32
- end
33
- end
@@ -1,56 +0,0 @@
1
- require "etc"
2
- require "net/scp"
3
- require "net/ssh"
4
- require "securerandom"
5
- require_relative "../reader"
6
-
7
- class Metacrunch::File::Reader::ScpFetcher
8
- include Enumerable
9
-
10
- RECOGNIZED_PROTOCOL_REGEX = /\Ascp:\/\//
11
- TILDE_REPLACEMENT = "/__TILDE__"
12
-
13
- def self.accepts?(url)
14
- !!url[RECOGNIZED_PROTOCOL_REGEX]
15
- end
16
-
17
- def initialize(url, options = {})
18
- URI(url.sub("~", TILDE_REPLACEMENT)).try do |_uri|
19
- @host = _uri.host
20
- @password = options[:password] || _uri.password
21
- @path = _uri.path.sub(TILDE_REPLACEMENT, "~")
22
- @username = options[:username] || _uri.user || Etc.getlogin
23
- end
24
- end
25
-
26
- def each
27
- return enum_for(__method__) unless block_given?
28
-
29
- begin
30
- begin
31
- Dir.mkdir temporary_directory = File.join(Dir.tmpdir, SecureRandom.hex)
32
- rescue Errno::EEXIST
33
- retry
34
- end
35
-
36
- remote_filenames.each do |_remote_filename|
37
- _local_filename = File.join(temporary_directory, File.basename(_remote_filename))
38
- Net::SCP.download!(@host, @username, _remote_filename, _local_filename, ssh: { password: @password })
39
- yield _local_filename
40
- File.delete(_local_filename)
41
- end
42
- ensure
43
- FileUtils.remove_dir(temporary_directory)
44
- end
45
- end
46
-
47
- private
48
-
49
- def remote_filenames
50
- @remote_filenames ||= [].tap do |_remote_filenames|
51
- Net::SSH.start(@host, @username, password: @password) do |_ssh|
52
- _remote_filenames.concat _ssh.exec!("ruby -e \"puts Dir.glob(File.expand_path('#{@path}'))\"").try(:split, "\n") || []
53
- end
54
- end
55
- end
56
- end
@@ -1,37 +0,0 @@
1
- require "rubygems/package"
2
- require_relative "../reader"
3
-
4
- class Metacrunch::File::Reader::TarFileReader
5
- include Enumerable
6
-
7
- def self.accepts?(filename)
8
- !!filename[/\.tar\Z|\.tar\.gz\Z|\.tgz\Z/]
9
- end
10
-
11
- def initialize(filename)
12
- @filename = filename
13
- end
14
-
15
- def each
16
- return enum_for(__method__) unless block_given?
17
-
18
- io =
19
- if @filename.end_with?("gz") # catches tgz and tar.gz
20
- Zlib::GzipReader.open(@filename)
21
- else
22
- File.open(@filename)
23
- end
24
-
25
- Gem::Package::TarReader.new(io).each do |_tar_entry|
26
- unless _tar_entry.directory?
27
- yield Metacrunch::File.new({
28
- content: _tar_entry.read,
29
- entry_name: _tar_entry.full_name,
30
- file_name: @filename
31
- })
32
- end
33
- end
34
-
35
- io.close
36
- end
37
- end
@@ -1,30 +0,0 @@
1
- require "zip"
2
- require_relative "../reader"
3
-
4
- class Metacrunch::File::Reader::ZipFileReader
5
- include Enumerable
6
-
7
- def self.accepts?(filename)
8
- !!filename[/\.zip\Z/]
9
- end
10
-
11
- def initialize(filename)
12
- @filename = filename
13
- end
14
-
15
- def each
16
- return enum_for(__method__) unless block_given?
17
-
18
- Zip::File.open(@filename) do |_zip_file|
19
- _zip_file.each do |_zip_entry|
20
- unless _zip_entry.directory?
21
- yield Metacrunch::File.new({
22
- content: _zip_entry.get_input_stream.read,
23
- entry_name: _zip_entry.name,
24
- file_name: @filename
25
- })
26
- end
27
- end
28
- end
29
- end
30
- end
@@ -1,72 +0,0 @@
1
- require_relative "../file"
2
- require_relative "../processor"
3
-
4
- class Metacrunch::File::Reader < Metacrunch::Processor
5
- require_relative "./reader/file_system_fetcher"
6
- require_relative "./reader/plain_file_reader"
7
- require_relative "./reader/scp_fetcher"
8
- require_relative "./reader/tar_file_reader"
9
- require_relative "./reader/zip_file_reader"
10
-
11
- include Enumerable
12
-
13
- def initialize(options = {})
14
- @bulk_size = options[:bulk_size].try(:to_i) || 1
15
- @urls = [
16
- options[:filename], options[:filenames],
17
- options[:url], options[:urls]
18
- ]
19
- .flatten
20
- .compact
21
- .map do |_filename_or_url|
22
- if (_url = _filename_or_url)[/\A\w+:\/\//]
23
- _url
24
- else
25
- Dir.glob(File.expand_path(_filename_or_url)).map do |_filename|
26
- "file://#{_filename}"
27
- end
28
- end
29
- end
30
- .flatten # because there might be arrays again because of Dir.glob
31
-
32
- @force_content_encoding = options[:force_content_encoding]
33
- @password = options[:password]
34
- @username = options[:username]
35
- end
36
-
37
- def call(items = [], pipeline = nil)
38
- @chunks_of_entries_enumerator ||= each_slice(@bulk_size) # instance method from Enumerable
39
-
40
- begin
41
- items.concat(@chunks_of_entries_enumerator.next)
42
- rescue StopIteration
43
- pipeline.try(:terminate!)
44
- end
45
- end
46
-
47
- def each
48
- return enum_for(__method__) unless block_given?
49
-
50
- @urls.each do |_url|
51
- [FileSystemFetcher, ScpFetcher].find do |_fetcher|
52
- _fetcher.accepts?(_url)
53
- end
54
- .try do |_appropriate_fetcher|
55
- _appropriate_fetcher.new(_url, username: @username, password: @password).each do |_filename|
56
- [TarFileReader, ZipFileReader, PlainFileReader].find do |_reader| # PlainFileReader as last will read any file
57
- _reader.accepts?(_filename)
58
- end
59
- .try do |_appropriate_reader|
60
- _appropriate_reader.new(_filename).each do |_file|
61
- if @force_content_encoding.present?
62
- _file.content.try(:force_encoding, @force_content_encoding)
63
- end
64
-
65
- yield _file
66
- end
67
- end
68
- end
69
- end
70
- end
71
- end
72
- end
@@ -1,19 +0,0 @@
1
- require_relative "../writer"
2
-
3
- class Metacrunch::File::Writer::PlainFileWriter
4
- def self.supports?(filename)
5
- true
6
- end
7
-
8
- def initialize(filename)
9
- @io = File.open(filename, "w")
10
- end
11
-
12
- def close
13
- @io.close
14
- end
15
-
16
- def write(options = {})
17
- @io.write(options[:content])
18
- end
19
- end
@@ -1,26 +0,0 @@
1
- require "rubygems/package"
2
- require "zlib"
3
- require_relative "../writer"
4
-
5
- class Metacrunch::File::Writer::TarFileWriter
6
- def self.supports?(filename)
7
- !!filename[/\.tar\Z|\.tar\.gz\Z|\.tgz\Z/]
8
- end
9
-
10
- def initialize(filename)
11
- @io = File.open(filename, "w")
12
- @io = Zlib::GzipWriter.new(@io) if filename.end_with?("gz")
13
- @tar_writer = Gem::Package::TarWriter.new(@io)
14
- end
15
-
16
- def close
17
- @tar_writer.close
18
- @io.close
19
- end
20
-
21
- def write(options = {})
22
- @tar_writer.add_file_simple(options[:entry_name], 0644, options[:content].bytesize) do |_tar_entry|
23
- _tar_entry.write(options[:content])
24
- end
25
- end
26
- end
@@ -1,29 +0,0 @@
1
- require "zip"
2
- require_relative "../writer"
3
-
4
- class Metacrunch::File::Writer::ZipFileWriter
5
- def self.supports?(filename)
6
- !!filename[/\.zip\Z/]
7
- end
8
-
9
- def self.write(*args)
10
- end
11
-
12
- =begin
13
- def each
14
- return enum_for(__method__) unless block_given?
15
-
16
- Zip::File.open(@filename) do |_zip_file|
17
- _zip_file.each do |_zip_entry|
18
- unless _zip_entry.directory?
19
- yield Metacrunch::File.new({
20
- content: _zip_entry.get_input_stream.read,
21
- entry_name: _zip_entry.name,
22
- file_name: @filename
23
- })
24
- end
25
- end
26
- end
27
- end
28
- =end
29
- end
@@ -1,26 +0,0 @@
1
- require_relative "../file"
2
- require_relative "../processor"
3
-
4
- class Metacrunch::File::Writer < Metacrunch::Processor
5
- require_relative "./writer/plain_file_writer"
6
- require_relative "./writer/tar_file_writer"
7
- require_relative "./writer/zip_file_writer"
8
-
9
- def initialize(filename)
10
- @writer =
11
- [TarFileWriter, ZipFileWriter, PlainFileWriter].find do |_writer|
12
- _writer.supports?(filename)
13
- end
14
- .try do |_appropriate_writer_class|
15
- _appropriate_writer_class.new(filename)
16
- end
17
- end
18
-
19
- def close
20
- @writer.close
21
- end
22
-
23
- def write(*args)
24
- @writer.write(*args)
25
- end
26
- end
@@ -1,24 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- class Metacrunch::File
4
- require_relative "./file/reader"
5
- require_relative "./file/writer"
6
-
7
- attr_accessor :content
8
- attr_accessor :entry_name # equals file_name for plain files
9
- attr_accessor :file_name
10
-
11
- def initialize(options = {})
12
- @content = options[:content]
13
- @entry_name = options[:entry_name]
14
- @file_name = options[:file_name]
15
- end
16
-
17
- def to_h
18
- {
19
- content: @content,
20
- entry_name: @entry_name,
21
- file_name: @file_name
22
- }
23
- end
24
- end
@@ -1,21 +0,0 @@
1
- require_relative "./file_reader"
2
-
3
- module Metacrunch
4
- class FileReader
5
- class Entry
6
-
7
- attr_reader :filename, :archive_filename, :contents
8
-
9
- def initialize(filename:, archive_filename:nil, contents:nil)
10
- @filename = filename
11
- @archive_filename = archive_filename.presence
12
- @contents = contents
13
- end
14
-
15
- def from_archive?
16
- @archive_filename != nil
17
- end
18
-
19
- end
20
- end
21
- end
@@ -1,40 +0,0 @@
1
- module Metacrunch
2
- class FileWriter
3
-
4
- class FileExistError < RuntimeError ; end
5
-
6
-
7
- def initialize(filename, override: false, compress: nil)
8
- @path = ::File.expand_path(filename)
9
- @compressed = (compress ||= @path.ends_with?(".gz"))
10
-
11
- if ::File.exist?(@path) && !override
12
- raise FileExistError, "File #{@path} already exists. Set override = true to override the existing file."
13
- end
14
- end
15
-
16
- def write(data, options = {})
17
- if block_given?
18
- yield(io)
19
- else
20
- io.write(data)
21
- end
22
- end
23
-
24
- def flush
25
- @io.flush if @io
26
- end
27
-
28
- def close
29
- flush
30
- @io.close if @io
31
- end
32
-
33
- private
34
-
35
- def io
36
- @io ||= (@compressed == true) ? Zlib::GzipWriter.open(@path) : ::File.open(@path, "w")
37
- end
38
-
39
- end
40
- end
@@ -1,51 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- module Metacrunch::Hash
4
- def self.add(object, *args)
5
- if args.length < 2
6
- raise ArgumentError
7
- else
8
- return args.last if args.last.nil? || (args.last.respond_to?(:empty?) && args.last.empty?)
9
-
10
- if args.length == 2
11
- args_first_is_a_string = args.first.is_a?(String) # memoize
12
-
13
- if args_first_is_a_string && args.first.include?("/")
14
- add(object, *args.first.split("/"), args.last)
15
- else
16
- if args_first_is_a_string && args.first.start_with?(":")
17
- _add(object, args.first[1..-1].to_sym, args.last)
18
- else
19
- _add(object, args.first, args.last)
20
- end
21
- end
22
- else
23
- nested_hash = args[0..-3].inject(object) do |_memo, _key|
24
- if _key.is_a?(String) && _key.start_with?(":")
25
- _key = _key[1..-1].to_sym
26
- end
27
-
28
- _memo[_key] ||= object.class.new
29
- end
30
-
31
- _add(nested_hash, args[-2], args[-1])
32
- end
33
- end
34
- end
35
-
36
- private
37
-
38
- def self._add(hash, key, value)
39
- #if value.is_a?(FalseClass) || (value.respond_to?(:empty?) ? !value.empty? : !!value) # like ActiveSupport implements blank?/present?
40
- if hash[key].nil?
41
- hash[key] = value.is_a?(Array) && value.length == 1 ? value.first : value
42
- elsif hash[key].is_a?(Array)
43
- (hash[key] << value).flatten!(1)
44
- else
45
- (hash[key] = [hash[key], value]).flatten!(1)
46
- end
47
- #end
48
-
49
- hash[key]
50
- end
51
- end
@@ -1,69 +0,0 @@
1
- module Metacrunch
2
- class Parallel
3
-
4
- module DSL
5
- def parallel(enumerable, options = {}, &block)
6
- Parallel.each(enumerable, options, &block)
7
- end
8
- end
9
-
10
- def self.each(enumerable, options = {}, &block)
11
- self.new(enumerable, options, &block).call
12
- end
13
-
14
- def initialize(enumerable, options = {}, &block)
15
- @enumerable = enumerable
16
- @callable = block
17
- @no_of_procs = options[:in_processes] || 0
18
- @on_process_finished = options[:on_process_finished] || -> {}
19
-
20
- unless block_given?
21
- raise ArgumentError, "you must provide a block"
22
- end
23
-
24
- unless @enumerable.respond_to?(:each)
25
- raise ArgumentError, "enumerable must respond to each"
26
- end
27
-
28
- unless @on_process_finished.respond_to?(:call)
29
- raise ArgumentError, "on_process_finished must respond to call"
30
- end
31
- end
32
-
33
- def call
34
- @enumerable.each do |_value|
35
- if @no_of_procs == 0
36
- @callable.call(_value)
37
- @on_process_finished.call
38
- else
39
- fork_process do
40
- @callable.call(_value)
41
- end
42
-
43
- if processes_limit_reached?
44
- wait_for_some_process_to_terminate
45
- @on_process_finished.call
46
- end
47
- end
48
- end
49
- ensure
50
- Process.waitall.each { @on_process_finished.call }
51
- end
52
-
53
- private
54
-
55
- def fork_process(&block)
56
- (@pids ||= []).push(fork(&block))
57
- end
58
-
59
- def processes_limit_reached?
60
- (@pids || []).length >= @no_of_procs
61
- end
62
-
63
- def wait_for_some_process_to_terminate
64
- pid_of_finished_process = Process.wait
65
- @pids.delete(pid_of_finished_process)
66
- end
67
-
68
- end
69
- end
@@ -1,10 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- class Metacrunch::Processor
4
- def initialize(options = {})
5
- @options ||= options
6
- end
7
-
8
- def call(items = [], pipeline = nil)
9
- end
10
- end
@@ -1,31 +0,0 @@
1
- module Metacrunch
2
- class SNR
3
- class Section
4
- class Field
5
-
6
- attr_reader :name
7
- attr_accessor :value
8
-
9
- def initialize(name, value)
10
- raise ArgumentError, "required Field#name not given" if name.nil?
11
-
12
- @name = name
13
- @value = value
14
- end
15
-
16
- # ------------------------------------------------------------------------------
17
- # Serialization
18
- # ------------------------------------------------------------------------------
19
-
20
- def to_xml(builder)
21
- if value.respond_to?(:to_xml)
22
- value.to_xml(root: self.name, builder: builder, skip_instruct: true)
23
- else
24
- builder.tag!(self.name, self.value)
25
- end
26
- end
27
-
28
- end
29
- end
30
- end
31
- end
@@ -1,74 +0,0 @@
1
- module Metacrunch
2
- class SNR
3
- class Section
4
-
5
- attr_reader :name
6
-
7
- def initialize(name)
8
- raise ArgumentError, "required Section#name not given" if name.nil?
9
-
10
- @name = name
11
- @fields = []
12
- end
13
-
14
- # ------------------------------------------------------------------------------
15
- # Common API
16
- # ------------------------------------------------------------------------------
17
-
18
- #
19
- # Adds a field
20
- #
21
- def add(field_name, value)
22
- if value.is_a?(Array)
23
- value.each do |_value|
24
- add_field(Field.new(field_name, _value))
25
- end
26
- else
27
- add_field(Field.new(field_name, value))
28
- end
29
- end
30
-
31
- # ------------------------------------------------------------------------------
32
- # Fields
33
- # ------------------------------------------------------------------------------
34
-
35
- #
36
- # Return all fields. A name can be provided to filter fields by name.
37
- #
38
- # @param [String, nil] name
39
- # @return [Array<Metacrunch::SNR::Section::Field>]
40
- #
41
- def fields(name = nil)
42
- if name
43
- @fields.select{|field| field.name == name}
44
- else
45
- @fields
46
- end
47
- end
48
-
49
- #
50
- # Adds a new field to this section.
51
- #
52
- # @param [Metacrunch::SNR::Section::Field] field
53
- # @return [Metacrunch::SNR::Section::Field]
54
- #
55
- def add_field(field)
56
- @fields << field
57
- field
58
- end
59
-
60
- # ------------------------------------------------------------------------------
61
- # Serialization
62
- # ------------------------------------------------------------------------------
63
-
64
- def to_xml(builder)
65
- builder.tag!(self.name) do
66
- @fields.each do |_field|
67
- _field.to_xml(builder)
68
- end
69
- end
70
- end
71
-
72
- end
73
- end
74
- end