metacrunch 2.2.3 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/Gemfile +11 -13
  4. data/License.txt +1 -1
  5. data/Readme.md +139 -2
  6. data/bin/console +9 -6
  7. data/exe/metacrunch +1 -2
  8. data/lib/metacrunch/cli.rb +62 -14
  9. data/lib/metacrunch/db/reader.rb +27 -0
  10. data/lib/metacrunch/db/writer.rb +23 -0
  11. data/lib/metacrunch/db.rb +8 -0
  12. data/lib/metacrunch/fs/entry.rb +17 -0
  13. data/lib/metacrunch/{file_reader.rb → fs/reader.rb} +9 -10
  14. data/lib/metacrunch/fs.rb +6 -0
  15. data/lib/metacrunch/job/buffer.rb +26 -0
  16. data/lib/metacrunch/job/dsl/option_support.rb +102 -0
  17. data/lib/metacrunch/job/dsl.rb +42 -0
  18. data/lib/metacrunch/job.rb +149 -0
  19. data/lib/metacrunch/test_utils/dummy_callable.rb +14 -0
  20. data/lib/metacrunch/test_utils/dummy_destination.rb +21 -0
  21. data/lib/metacrunch/test_utils/dummy_source.rb +22 -0
  22. data/lib/metacrunch/test_utils.rb +7 -0
  23. data/lib/metacrunch/version.rb +1 -1
  24. data/lib/metacrunch.rb +14 -27
  25. data/metacrunch.gemspec +5 -10
  26. metadata +24 -144
  27. data/lib/metacrunch/cli/base.rb +0 -29
  28. data/lib/metacrunch/cli/command_definition.rb +0 -41
  29. data/lib/metacrunch/cli/command_registry.rb +0 -17
  30. data/lib/metacrunch/cli/main.rb +0 -16
  31. data/lib/metacrunch/command.rb +0 -27
  32. data/lib/metacrunch/file/reader/file_system_fetcher.rb +0 -21
  33. data/lib/metacrunch/file/reader/plain_file_reader.rb +0 -33
  34. data/lib/metacrunch/file/reader/scp_fetcher.rb +0 -56
  35. data/lib/metacrunch/file/reader/tar_file_reader.rb +0 -37
  36. data/lib/metacrunch/file/reader/zip_file_reader.rb +0 -30
  37. data/lib/metacrunch/file/reader.rb +0 -72
  38. data/lib/metacrunch/file/writer/plain_file_writer.rb +0 -19
  39. data/lib/metacrunch/file/writer/tar_file_writer.rb +0 -26
  40. data/lib/metacrunch/file/writer/zip_file_writer.rb +0 -29
  41. data/lib/metacrunch/file/writer.rb +0 -26
  42. data/lib/metacrunch/file.rb +0 -24
  43. data/lib/metacrunch/file_reader_entry.rb +0 -21
  44. data/lib/metacrunch/file_writer.rb +0 -40
  45. data/lib/metacrunch/hash.rb +0 -51
  46. data/lib/metacrunch/parallel.rb +0 -69
  47. data/lib/metacrunch/processor.rb +0 -10
  48. data/lib/metacrunch/snr/field.rb +0 -31
  49. data/lib/metacrunch/snr/section.rb +0 -74
  50. data/lib/metacrunch/snr.rb +0 -117
  51. data/lib/metacrunch/tar_writer.rb +0 -26
  52. data/lib/metacrunch/transformator/transformation/step.rb +0 -45
  53. data/lib/metacrunch/transformator/transformation.rb +0 -48
  54. data/lib/metacrunch/transformator.rb +0 -5
  55. data/lib/metacrunch/transformer/helper.rb +0 -29
  56. data/lib/metacrunch/transformer/step.rb +0 -37
  57. data/lib/metacrunch/transformer.rb +0 -38
@@ -1,33 +0,0 @@
1
- require_relative "../reader"
2
-
3
- class Metacrunch::File::Reader::PlainFileReader
4
- include Enumerable
5
-
6
- def self.accepts?(filename)
7
- true
8
- end
9
-
10
- def initialize(filename)
11
- @filename = filename
12
- end
13
-
14
- def each
15
- return enum_for(__method__) unless block_given?
16
-
17
- io =
18
- if @filename.end_with?("gz") # catches tgz and tar.gz
19
- Zlib::GzipReader.open(@filename)
20
- else
21
- File.open(@filename)
22
- end
23
-
24
- yield Metacrunch::File.new({
25
- content: io.read,
26
- entry_name: File.basename(@filename),
27
- file_name: @filename,
28
- is_directory: false
29
- })
30
-
31
- io.close
32
- end
33
- end
@@ -1,56 +0,0 @@
1
- require "etc"
2
- require "net/scp"
3
- require "net/ssh"
4
- require "securerandom"
5
- require_relative "../reader"
6
-
7
- class Metacrunch::File::Reader::ScpFetcher
8
- include Enumerable
9
-
10
- RECOGNIZED_PROTOCOL_REGEX = /\Ascp:\/\//
11
- TILDE_REPLACEMENT = "/__TILDE__"
12
-
13
- def self.accepts?(url)
14
- !!url[RECOGNIZED_PROTOCOL_REGEX]
15
- end
16
-
17
- def initialize(url, options = {})
18
- URI(url.sub("~", TILDE_REPLACEMENT)).try do |_uri|
19
- @host = _uri.host
20
- @password = options[:password] || _uri.password
21
- @path = _uri.path.sub(TILDE_REPLACEMENT, "~")
22
- @username = options[:username] || _uri.user || Etc.getlogin
23
- end
24
- end
25
-
26
- def each
27
- return enum_for(__method__) unless block_given?
28
-
29
- begin
30
- begin
31
- Dir.mkdir temporary_directory = File.join(Dir.tmpdir, SecureRandom.hex)
32
- rescue Errno::EEXIST
33
- retry
34
- end
35
-
36
- remote_filenames.each do |_remote_filename|
37
- _local_filename = File.join(temporary_directory, File.basename(_remote_filename))
38
- Net::SCP.download!(@host, @username, _remote_filename, _local_filename, ssh: { password: @password })
39
- yield _local_filename
40
- File.delete(_local_filename)
41
- end
42
- ensure
43
- FileUtils.remove_dir(temporary_directory)
44
- end
45
- end
46
-
47
- private
48
-
49
- def remote_filenames
50
- @remote_filenames ||= [].tap do |_remote_filenames|
51
- Net::SSH.start(@host, @username, password: @password) do |_ssh|
52
- _remote_filenames.concat _ssh.exec!("ruby -e \"puts Dir.glob(File.expand_path('#{@path}'))\"").try(:split, "\n") || []
53
- end
54
- end
55
- end
56
- end
@@ -1,37 +0,0 @@
1
- require "rubygems/package"
2
- require_relative "../reader"
3
-
4
- class Metacrunch::File::Reader::TarFileReader
5
- include Enumerable
6
-
7
- def self.accepts?(filename)
8
- !!filename[/\.tar\Z|\.tar\.gz\Z|\.tgz\Z/]
9
- end
10
-
11
- def initialize(filename)
12
- @filename = filename
13
- end
14
-
15
- def each
16
- return enum_for(__method__) unless block_given?
17
-
18
- io =
19
- if @filename.end_with?("gz") # catches tgz and tar.gz
20
- Zlib::GzipReader.open(@filename)
21
- else
22
- File.open(@filename)
23
- end
24
-
25
- Gem::Package::TarReader.new(io).each do |_tar_entry|
26
- unless _tar_entry.directory?
27
- yield Metacrunch::File.new({
28
- content: _tar_entry.read,
29
- entry_name: _tar_entry.full_name,
30
- file_name: @filename
31
- })
32
- end
33
- end
34
-
35
- io.close
36
- end
37
- end
@@ -1,30 +0,0 @@
1
- require "zip"
2
- require_relative "../reader"
3
-
4
- class Metacrunch::File::Reader::ZipFileReader
5
- include Enumerable
6
-
7
- def self.accepts?(filename)
8
- !!filename[/\.zip\Z/]
9
- end
10
-
11
- def initialize(filename)
12
- @filename = filename
13
- end
14
-
15
- def each
16
- return enum_for(__method__) unless block_given?
17
-
18
- Zip::File.open(@filename) do |_zip_file|
19
- _zip_file.each do |_zip_entry|
20
- unless _zip_entry.directory?
21
- yield Metacrunch::File.new({
22
- content: _zip_entry.get_input_stream.read,
23
- entry_name: _zip_entry.name,
24
- file_name: @filename
25
- })
26
- end
27
- end
28
- end
29
- end
30
- end
@@ -1,72 +0,0 @@
1
- require_relative "../file"
2
- require_relative "../processor"
3
-
4
- class Metacrunch::File::Reader < Metacrunch::Processor
5
- require_relative "./reader/file_system_fetcher"
6
- require_relative "./reader/plain_file_reader"
7
- require_relative "./reader/scp_fetcher"
8
- require_relative "./reader/tar_file_reader"
9
- require_relative "./reader/zip_file_reader"
10
-
11
- include Enumerable
12
-
13
- def initialize(options = {})
14
- @bulk_size = options[:bulk_size].try(:to_i) || 1
15
- @urls = [
16
- options[:filename], options[:filenames],
17
- options[:url], options[:urls]
18
- ]
19
- .flatten
20
- .compact
21
- .map do |_filename_or_url|
22
- if (_url = _filename_or_url)[/\A\w+:\/\//]
23
- _url
24
- else
25
- Dir.glob(File.expand_path(_filename_or_url)).map do |_filename|
26
- "file://#{_filename}"
27
- end
28
- end
29
- end
30
- .flatten # because there might be arrays again because of Dir.glob
31
-
32
- @force_content_encoding = options[:force_content_encoding]
33
- @password = options[:password]
34
- @username = options[:username]
35
- end
36
-
37
- def call(items = [], pipeline = nil)
38
- @chunks_of_entries_enumerator ||= each_slice(@bulk_size) # instance method from Enumerable
39
-
40
- begin
41
- items.concat(@chunks_of_entries_enumerator.next)
42
- rescue StopIteration
43
- pipeline.try(:terminate!)
44
- end
45
- end
46
-
47
- def each
48
- return enum_for(__method__) unless block_given?
49
-
50
- @urls.each do |_url|
51
- [FileSystemFetcher, ScpFetcher].find do |_fetcher|
52
- _fetcher.accepts?(_url)
53
- end
54
- .try do |_appropriate_fetcher|
55
- _appropriate_fetcher.new(_url, username: @username, password: @password).each do |_filename|
56
- [TarFileReader, ZipFileReader, PlainFileReader].find do |_reader| # PlainFileReader as last will read any file
57
- _reader.accepts?(_filename)
58
- end
59
- .try do |_appropriate_reader|
60
- _appropriate_reader.new(_filename).each do |_file|
61
- if @force_content_encoding.present?
62
- _file.content.try(:force_encoding, @force_content_encoding)
63
- end
64
-
65
- yield _file
66
- end
67
- end
68
- end
69
- end
70
- end
71
- end
72
- end
@@ -1,19 +0,0 @@
1
- require_relative "../writer"
2
-
3
- class Metacrunch::File::Writer::PlainFileWriter
4
- def self.supports?(filename)
5
- true
6
- end
7
-
8
- def initialize(filename)
9
- @io = File.open(filename, "w")
10
- end
11
-
12
- def close
13
- @io.close
14
- end
15
-
16
- def write(options = {})
17
- @io.write(options[:content])
18
- end
19
- end
@@ -1,26 +0,0 @@
1
- require "rubygems/package"
2
- require "zlib"
3
- require_relative "../writer"
4
-
5
- class Metacrunch::File::Writer::TarFileWriter
6
- def self.supports?(filename)
7
- !!filename[/\.tar\Z|\.tar\.gz\Z|\.tgz\Z/]
8
- end
9
-
10
- def initialize(filename)
11
- @io = File.open(filename, "w")
12
- @io = Zlib::GzipWriter.new(@io) if filename.end_with?("gz")
13
- @tar_writer = Gem::Package::TarWriter.new(@io)
14
- end
15
-
16
- def close
17
- @tar_writer.close
18
- @io.close
19
- end
20
-
21
- def write(options = {})
22
- @tar_writer.add_file_simple(options[:entry_name], 0644, options[:content].bytesize) do |_tar_entry|
23
- _tar_entry.write(options[:content])
24
- end
25
- end
26
- end
@@ -1,29 +0,0 @@
1
- require "zip"
2
- require_relative "../writer"
3
-
4
- class Metacrunch::File::Writer::ZipFileWriter
5
- def self.supports?(filename)
6
- !!filename[/\.zip\Z/]
7
- end
8
-
9
- def self.write(*args)
10
- end
11
-
12
- =begin
13
- def each
14
- return enum_for(__method__) unless block_given?
15
-
16
- Zip::File.open(@filename) do |_zip_file|
17
- _zip_file.each do |_zip_entry|
18
- unless _zip_entry.directory?
19
- yield Metacrunch::File.new({
20
- content: _zip_entry.get_input_stream.read,
21
- entry_name: _zip_entry.name,
22
- file_name: @filename
23
- })
24
- end
25
- end
26
- end
27
- end
28
- =end
29
- end
@@ -1,26 +0,0 @@
1
- require_relative "../file"
2
- require_relative "../processor"
3
-
4
- class Metacrunch::File::Writer < Metacrunch::Processor
5
- require_relative "./writer/plain_file_writer"
6
- require_relative "./writer/tar_file_writer"
7
- require_relative "./writer/zip_file_writer"
8
-
9
- def initialize(filename)
10
- @writer =
11
- [TarFileWriter, ZipFileWriter, PlainFileWriter].find do |_writer|
12
- _writer.supports?(filename)
13
- end
14
- .try do |_appropriate_writer_class|
15
- _appropriate_writer_class.new(filename)
16
- end
17
- end
18
-
19
- def close
20
- @writer.close
21
- end
22
-
23
- def write(*args)
24
- @writer.write(*args)
25
- end
26
- end
@@ -1,24 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- class Metacrunch::File
4
- require_relative "./file/reader"
5
- require_relative "./file/writer"
6
-
7
- attr_accessor :content
8
- attr_accessor :entry_name # equals file_name for plain files
9
- attr_accessor :file_name
10
-
11
- def initialize(options = {})
12
- @content = options[:content]
13
- @entry_name = options[:entry_name]
14
- @file_name = options[:file_name]
15
- end
16
-
17
- def to_h
18
- {
19
- content: @content,
20
- entry_name: @entry_name,
21
- file_name: @file_name
22
- }
23
- end
24
- end
@@ -1,21 +0,0 @@
1
- require_relative "./file_reader"
2
-
3
- module Metacrunch
4
- class FileReader
5
- class Entry
6
-
7
- attr_reader :filename, :archive_filename, :contents
8
-
9
- def initialize(filename:, archive_filename:nil, contents:nil)
10
- @filename = filename
11
- @archive_filename = archive_filename.presence
12
- @contents = contents
13
- end
14
-
15
- def from_archive?
16
- @archive_filename != nil
17
- end
18
-
19
- end
20
- end
21
- end
@@ -1,40 +0,0 @@
1
- module Metacrunch
2
- class FileWriter
3
-
4
- class FileExistError < RuntimeError ; end
5
-
6
-
7
- def initialize(filename, override: false, compress: nil)
8
- @path = ::File.expand_path(filename)
9
- @compressed = (compress ||= @path.ends_with?(".gz"))
10
-
11
- if ::File.exist?(@path) && !override
12
- raise FileExistError, "File #{@path} already exists. Set override = true to override the existing file."
13
- end
14
- end
15
-
16
- def write(data, options = {})
17
- if block_given?
18
- yield(io)
19
- else
20
- io.write(data)
21
- end
22
- end
23
-
24
- def flush
25
- @io.flush if @io
26
- end
27
-
28
- def close
29
- flush
30
- @io.close if @io
31
- end
32
-
33
- private
34
-
35
- def io
36
- @io ||= (@compressed == true) ? Zlib::GzipWriter.open(@path) : ::File.open(@path, "w")
37
- end
38
-
39
- end
40
- end
@@ -1,51 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- module Metacrunch::Hash
4
- def self.add(object, *args)
5
- if args.length < 2
6
- raise ArgumentError
7
- else
8
- return args.last if args.last.nil? || (args.last.respond_to?(:empty?) && args.last.empty?)
9
-
10
- if args.length == 2
11
- args_first_is_a_string = args.first.is_a?(String) # memoize
12
-
13
- if args_first_is_a_string && args.first.include?("/")
14
- add(object, *args.first.split("/"), args.last)
15
- else
16
- if args_first_is_a_string && args.first.start_with?(":")
17
- _add(object, args.first[1..-1].to_sym, args.last)
18
- else
19
- _add(object, args.first, args.last)
20
- end
21
- end
22
- else
23
- nested_hash = args[0..-3].inject(object) do |_memo, _key|
24
- if _key.is_a?(String) && _key.start_with?(":")
25
- _key = _key[1..-1].to_sym
26
- end
27
-
28
- _memo[_key] ||= object.class.new
29
- end
30
-
31
- _add(nested_hash, args[-2], args[-1])
32
- end
33
- end
34
- end
35
-
36
- private
37
-
38
- def self._add(hash, key, value)
39
- #if value.is_a?(FalseClass) || (value.respond_to?(:empty?) ? !value.empty? : !!value) # like ActiveSupport implements blank?/present?
40
- if hash[key].nil?
41
- hash[key] = value.is_a?(Array) && value.length == 1 ? value.first : value
42
- elsif hash[key].is_a?(Array)
43
- (hash[key] << value).flatten!(1)
44
- else
45
- (hash[key] = [hash[key], value]).flatten!(1)
46
- end
47
- #end
48
-
49
- hash[key]
50
- end
51
- end
@@ -1,69 +0,0 @@
1
- module Metacrunch
2
- class Parallel
3
-
4
- module DSL
5
- def parallel(enumerable, options = {}, &block)
6
- Parallel.each(enumerable, options, &block)
7
- end
8
- end
9
-
10
- def self.each(enumerable, options = {}, &block)
11
- self.new(enumerable, options, &block).call
12
- end
13
-
14
- def initialize(enumerable, options = {}, &block)
15
- @enumerable = enumerable
16
- @callable = block
17
- @no_of_procs = options[:in_processes] || 0
18
- @on_process_finished = options[:on_process_finished] || -> {}
19
-
20
- unless block_given?
21
- raise ArgumentError, "you must provide a block"
22
- end
23
-
24
- unless @enumerable.respond_to?(:each)
25
- raise ArgumentError, "enumerable must respond to each"
26
- end
27
-
28
- unless @on_process_finished.respond_to?(:call)
29
- raise ArgumentError, "on_process_finished must respond to call"
30
- end
31
- end
32
-
33
- def call
34
- @enumerable.each do |_value|
35
- if @no_of_procs == 0
36
- @callable.call(_value)
37
- @on_process_finished.call
38
- else
39
- fork_process do
40
- @callable.call(_value)
41
- end
42
-
43
- if processes_limit_reached?
44
- wait_for_some_process_to_terminate
45
- @on_process_finished.call
46
- end
47
- end
48
- end
49
- ensure
50
- Process.waitall.each { @on_process_finished.call }
51
- end
52
-
53
- private
54
-
55
- def fork_process(&block)
56
- (@pids ||= []).push(fork(&block))
57
- end
58
-
59
- def processes_limit_reached?
60
- (@pids || []).length >= @no_of_procs
61
- end
62
-
63
- def wait_for_some_process_to_terminate
64
- pid_of_finished_process = Process.wait
65
- @pids.delete(pid_of_finished_process)
66
- end
67
-
68
- end
69
- end
@@ -1,10 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- class Metacrunch::Processor
4
- def initialize(options = {})
5
- @options ||= options
6
- end
7
-
8
- def call(items = [], pipeline = nil)
9
- end
10
- end
@@ -1,31 +0,0 @@
1
- module Metacrunch
2
- class SNR
3
- class Section
4
- class Field
5
-
6
- attr_reader :name
7
- attr_accessor :value
8
-
9
- def initialize(name, value)
10
- raise ArgumentError, "required Field#name not given" if name.nil?
11
-
12
- @name = name
13
- @value = value
14
- end
15
-
16
- # ------------------------------------------------------------------------------
17
- # Serialization
18
- # ------------------------------------------------------------------------------
19
-
20
- def to_xml(builder)
21
- if value.respond_to?(:to_xml)
22
- value.to_xml(root: self.name, builder: builder, skip_instruct: true)
23
- else
24
- builder.tag!(self.name, self.value)
25
- end
26
- end
27
-
28
- end
29
- end
30
- end
31
- end
@@ -1,74 +0,0 @@
1
- module Metacrunch
2
- class SNR
3
- class Section
4
-
5
- attr_reader :name
6
-
7
- def initialize(name)
8
- raise ArgumentError, "required Section#name not given" if name.nil?
9
-
10
- @name = name
11
- @fields = []
12
- end
13
-
14
- # ------------------------------------------------------------------------------
15
- # Common API
16
- # ------------------------------------------------------------------------------
17
-
18
- #
19
- # Adds a field
20
- #
21
- def add(field_name, value)
22
- if value.is_a?(Array)
23
- value.each do |_value|
24
- add_field(Field.new(field_name, _value))
25
- end
26
- else
27
- add_field(Field.new(field_name, value))
28
- end
29
- end
30
-
31
- # ------------------------------------------------------------------------------
32
- # Fields
33
- # ------------------------------------------------------------------------------
34
-
35
- #
36
- # Return all fields. A name can be provided to filter fields by name.
37
- #
38
- # @param [String, nil] name
39
- # @return [Array<Metacrunch::SNR::Section::Field>]
40
- #
41
- def fields(name = nil)
42
- if name
43
- @fields.select{|field| field.name == name}
44
- else
45
- @fields
46
- end
47
- end
48
-
49
- #
50
- # Adds a new field to this section.
51
- #
52
- # @param [Metacrunch::SNR::Section::Field] field
53
- # @return [Metacrunch::SNR::Section::Field]
54
- #
55
- def add_field(field)
56
- @fields << field
57
- field
58
- end
59
-
60
- # ------------------------------------------------------------------------------
61
- # Serialization
62
- # ------------------------------------------------------------------------------
63
-
64
- def to_xml(builder)
65
- builder.tag!(self.name) do
66
- @fields.each do |_field|
67
- _field.to_xml(builder)
68
- end
69
- end
70
- end
71
-
72
- end
73
- end
74
- end