metacrunch 2.2.3 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/Gemfile +11 -13
- data/License.txt +1 -1
- data/Readme.md +139 -2
- data/bin/console +9 -6
- data/exe/metacrunch +1 -2
- data/lib/metacrunch/cli.rb +62 -14
- data/lib/metacrunch/db/reader.rb +27 -0
- data/lib/metacrunch/db/writer.rb +23 -0
- data/lib/metacrunch/db.rb +8 -0
- data/lib/metacrunch/fs/entry.rb +17 -0
- data/lib/metacrunch/{file_reader.rb → fs/reader.rb} +9 -10
- data/lib/metacrunch/fs.rb +6 -0
- data/lib/metacrunch/job/buffer.rb +26 -0
- data/lib/metacrunch/job/dsl/option_support.rb +102 -0
- data/lib/metacrunch/job/dsl.rb +42 -0
- data/lib/metacrunch/job.rb +149 -0
- data/lib/metacrunch/test_utils/dummy_callable.rb +14 -0
- data/lib/metacrunch/test_utils/dummy_destination.rb +21 -0
- data/lib/metacrunch/test_utils/dummy_source.rb +22 -0
- data/lib/metacrunch/test_utils.rb +7 -0
- data/lib/metacrunch/version.rb +1 -1
- data/lib/metacrunch.rb +14 -27
- data/metacrunch.gemspec +5 -10
- metadata +24 -144
- data/lib/metacrunch/cli/base.rb +0 -29
- data/lib/metacrunch/cli/command_definition.rb +0 -41
- data/lib/metacrunch/cli/command_registry.rb +0 -17
- data/lib/metacrunch/cli/main.rb +0 -16
- data/lib/metacrunch/command.rb +0 -27
- data/lib/metacrunch/file/reader/file_system_fetcher.rb +0 -21
- data/lib/metacrunch/file/reader/plain_file_reader.rb +0 -33
- data/lib/metacrunch/file/reader/scp_fetcher.rb +0 -56
- data/lib/metacrunch/file/reader/tar_file_reader.rb +0 -37
- data/lib/metacrunch/file/reader/zip_file_reader.rb +0 -30
- data/lib/metacrunch/file/reader.rb +0 -72
- data/lib/metacrunch/file/writer/plain_file_writer.rb +0 -19
- data/lib/metacrunch/file/writer/tar_file_writer.rb +0 -26
- data/lib/metacrunch/file/writer/zip_file_writer.rb +0 -29
- data/lib/metacrunch/file/writer.rb +0 -26
- data/lib/metacrunch/file.rb +0 -24
- data/lib/metacrunch/file_reader_entry.rb +0 -21
- data/lib/metacrunch/file_writer.rb +0 -40
- data/lib/metacrunch/hash.rb +0 -51
- data/lib/metacrunch/parallel.rb +0 -69
- data/lib/metacrunch/processor.rb +0 -10
- data/lib/metacrunch/snr/field.rb +0 -31
- data/lib/metacrunch/snr/section.rb +0 -74
- data/lib/metacrunch/snr.rb +0 -117
- data/lib/metacrunch/tar_writer.rb +0 -26
- data/lib/metacrunch/transformator/transformation/step.rb +0 -45
- data/lib/metacrunch/transformator/transformation.rb +0 -48
- data/lib/metacrunch/transformator.rb +0 -5
- data/lib/metacrunch/transformer/helper.rb +0 -29
- data/lib/metacrunch/transformer/step.rb +0 -37
- data/lib/metacrunch/transformer.rb +0 -38
data/lib/metacrunch/snr.rb
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
module Metacrunch
|
2
|
-
#
|
3
|
-
# A SNR object (Simple Normalized Record) is a simple data structure
|
4
|
-
# that you can use as a target resource when performing data normalization routines.
|
5
|
-
# A DNSR record consists of unique sections. A section is unique identified by it's
|
6
|
-
# name. Each section can hold many fields that store the actual values.
|
7
|
-
#
|
8
|
-
# A SNR object can be transformed into XML or JSON to allow easy integration into
|
9
|
-
# existing tools and workflows.
|
10
|
-
#
|
11
|
-
# For example: If you normalize MAB XML data for use in a search engine you can
|
12
|
-
# use a SNR object to store your normalized values.
|
13
|
-
#
|
14
|
-
class SNR
|
15
|
-
require_relative "./snr/section"
|
16
|
-
require_relative "./snr/field"
|
17
|
-
|
18
|
-
# ------------------------------------------------------------------------------
|
19
|
-
# Common API
|
20
|
-
# ------------------------------------------------------------------------------
|
21
|
-
|
22
|
-
#
|
23
|
-
# Adds a field with a value to a section. If the section with the given name
|
24
|
-
# doesn't exists it will be created.
|
25
|
-
#
|
26
|
-
# @param [String] section_name
|
27
|
-
# @param [String] field_name
|
28
|
-
# @param [#to_s] value
|
29
|
-
#
|
30
|
-
def add(section_name, field_name, value)
|
31
|
-
section = self.section(section_name) || Section.new(section_name)
|
32
|
-
section.add(field_name, value)
|
33
|
-
add_section(section)
|
34
|
-
end
|
35
|
-
|
36
|
-
#
|
37
|
-
# Returns field values for a given path.
|
38
|
-
#
|
39
|
-
# @param [String] path A path to the fields seperated by /. E.g. section/field
|
40
|
-
# @return [Array<*>]
|
41
|
-
#
|
42
|
-
def values(path)
|
43
|
-
section_name, field_name = path.split("/")
|
44
|
-
section = self.section(section_name)
|
45
|
-
if section && field_name
|
46
|
-
section.fields(field_name).map{|field| field.value}
|
47
|
-
else
|
48
|
-
[]
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
# ------------------------------------------------------------------------------
|
53
|
-
# Sections
|
54
|
-
# ------------------------------------------------------------------------------
|
55
|
-
|
56
|
-
#
|
57
|
-
# @return [Hash{String => Metacrunch::SNR::Section}]
|
58
|
-
# @private
|
59
|
-
#
|
60
|
-
def sections_struct
|
61
|
-
@sections_struct ||= {}
|
62
|
-
end
|
63
|
-
private :sections_struct
|
64
|
-
|
65
|
-
#
|
66
|
-
# Return all sections.
|
67
|
-
#
|
68
|
-
# @return [Array<Metacrunch::SNR::Section>]
|
69
|
-
#
|
70
|
-
def sections
|
71
|
-
sections_struct.values
|
72
|
-
end
|
73
|
-
|
74
|
-
#
|
75
|
-
# Get section by name.
|
76
|
-
#
|
77
|
-
# @param [String] name Name of the section
|
78
|
-
# @return [Metacrunch::SNR::Section, nil] section by name or nil if a section
|
79
|
-
# with the given name doesn't exists.
|
80
|
-
#
|
81
|
-
def section(name)
|
82
|
-
sections_struct[name]
|
83
|
-
end
|
84
|
-
|
85
|
-
#
|
86
|
-
# Adds / replaces a section. The name of the section is used as a unique identifier.
|
87
|
-
# Therefore if you add a section with a name that already exists, the new section
|
88
|
-
# will override the existing one.
|
89
|
-
#
|
90
|
-
# @param [Metacrunch::SNR::Section] section
|
91
|
-
# @return [Metacrunch::SNR::Section]
|
92
|
-
#
|
93
|
-
def add_section(section)
|
94
|
-
sections_struct[section.name] = section
|
95
|
-
end
|
96
|
-
|
97
|
-
# ------------------------------------------------------------------------------
|
98
|
-
# Serialization
|
99
|
-
# ------------------------------------------------------------------------------
|
100
|
-
|
101
|
-
#
|
102
|
-
# Transforms the SNR into XML.
|
103
|
-
#
|
104
|
-
# @return [String] The SNR as XML string.
|
105
|
-
#
|
106
|
-
def to_xml
|
107
|
-
builder = Builder::XmlMarkup.new(indent: 2)
|
108
|
-
builder.instruct!(:xml, :encoding => "UTF-8")
|
109
|
-
builder.snr do
|
110
|
-
sections.each do |_section|
|
111
|
-
_section.to_xml(builder)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require "rubygems/package"
|
2
|
-
|
3
|
-
module Metacrunch
|
4
|
-
class TarWriter < FileWriter
|
5
|
-
|
6
|
-
def write(data, options = {})
|
7
|
-
raise ArgumentError, "Missing option 'filename'" if options[:filename].blank?
|
8
|
-
|
9
|
-
io.add_file_simple(options[:filename], 0644, data.bytesize) do |_io|
|
10
|
-
if block_given?
|
11
|
-
yield(_io)
|
12
|
-
else
|
13
|
-
_io.write(data)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def io
|
21
|
-
@io ||= super
|
22
|
-
@tar_io ||= Gem::Package::TarWriter.new(@io)
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require_relative "../transformation"
|
2
|
-
|
3
|
-
class Metacrunch::Transformator::Transformation::Step
|
4
|
-
attr_accessor :transformation
|
5
|
-
|
6
|
-
def initialize(transformation = nil, options = {})
|
7
|
-
if transformation.is_a?(Hash)
|
8
|
-
options = transformation
|
9
|
-
transformation = nil
|
10
|
-
end
|
11
|
-
|
12
|
-
if transformation
|
13
|
-
@transformation = transformation
|
14
|
-
else
|
15
|
-
@transformation = Struct.new(:source, :target).new.tap do |_struct|
|
16
|
-
_struct.source = options[:source]
|
17
|
-
_struct.target = options[:target]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def call
|
23
|
-
end
|
24
|
-
|
25
|
-
#
|
26
|
-
# Each step has transparent access to all methods of it's transformation
|
27
|
-
#
|
28
|
-
def method_missing(method_name, *args, &block)
|
29
|
-
if @transformation.respond_to?(method_name)
|
30
|
-
@transformation.send(method_name, *args, &block)
|
31
|
-
else
|
32
|
-
super
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def respond_to_missing?(method_name, include_private = false)
|
37
|
-
@transformation.respond_to?(method_name) || super
|
38
|
-
end
|
39
|
-
|
40
|
-
# avoid method_missing penalty for the most used transformation methods
|
41
|
-
def source; @transformation.source; end
|
42
|
-
def source=(value); @transformation.source=(value); end
|
43
|
-
def target; @transformation.target; end
|
44
|
-
def target=(value); @transformation.target=(value); end
|
45
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require_relative "../transformator"
|
2
|
-
|
3
|
-
class Metacrunch::Transformator::Transformation
|
4
|
-
require_relative "./transformation/step"
|
5
|
-
|
6
|
-
attr_accessor :source
|
7
|
-
attr_accessor :target
|
8
|
-
|
9
|
-
class << self
|
10
|
-
def steps(value = nil)
|
11
|
-
unless value
|
12
|
-
@steps
|
13
|
-
else
|
14
|
-
@steps = value
|
15
|
-
end
|
16
|
-
end
|
17
|
-
alias_method :sequence, :steps
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.call(*args)
|
21
|
-
new.call(*args)
|
22
|
-
end
|
23
|
-
|
24
|
-
# since a transformation can have many steps, writing a "require" for each is tedious
|
25
|
-
def self.require_directory(directory)
|
26
|
-
Dir.glob("#{File.expand_path(directory)}/*.rb").each do |_filename|
|
27
|
-
require _filename
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def initialize
|
32
|
-
# steps are instanced once, which means that instance variables retain
|
33
|
-
@steps = self.class.steps.flatten.map do |_step|
|
34
|
-
_step.is_a?(Class) ? _step.new(self) : _step
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def call(source, options = {})
|
39
|
-
@source = source
|
40
|
-
@target = options[:target]
|
41
|
-
|
42
|
-
@steps.each do |_step|
|
43
|
-
_step.is_a?(Proc) ? instance_exec(&_step) : _step.call
|
44
|
-
end
|
45
|
-
|
46
|
-
return @target
|
47
|
-
end
|
48
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require_relative "../transformer"
|
2
|
-
|
3
|
-
module Metacrunch
|
4
|
-
class Transformer
|
5
|
-
class Helper
|
6
|
-
|
7
|
-
def initialize(transformer)
|
8
|
-
@transformer = transformer
|
9
|
-
end
|
10
|
-
|
11
|
-
def transformer
|
12
|
-
@transformer
|
13
|
-
end
|
14
|
-
|
15
|
-
def source
|
16
|
-
transformer.source
|
17
|
-
end
|
18
|
-
|
19
|
-
def target
|
20
|
-
transformer.target
|
21
|
-
end
|
22
|
-
|
23
|
-
def options
|
24
|
-
transformer.options
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require_relative "../transformer"
|
2
|
-
|
3
|
-
module Metacrunch
|
4
|
-
class Transformer
|
5
|
-
class Step
|
6
|
-
|
7
|
-
def initialize(transformer)
|
8
|
-
@transformer = transformer
|
9
|
-
end
|
10
|
-
|
11
|
-
def perform
|
12
|
-
raise NotImplementedError, "You must implement .perform() in your rule sub-class"
|
13
|
-
end
|
14
|
-
|
15
|
-
def transformer
|
16
|
-
@transformer
|
17
|
-
end
|
18
|
-
|
19
|
-
def source
|
20
|
-
transformer.source
|
21
|
-
end
|
22
|
-
|
23
|
-
def target
|
24
|
-
transformer.target
|
25
|
-
end
|
26
|
-
|
27
|
-
def options
|
28
|
-
transformer.options
|
29
|
-
end
|
30
|
-
|
31
|
-
def helper
|
32
|
-
transformer.helper
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
module Metacrunch
|
2
|
-
class Transformer
|
3
|
-
require_relative "./transformer/step"
|
4
|
-
require_relative "./transformer/helper"
|
5
|
-
|
6
|
-
attr_accessor :source, :target, :options
|
7
|
-
attr_reader :step
|
8
|
-
|
9
|
-
|
10
|
-
def initialize(source:nil, target:nil, options: {})
|
11
|
-
@source = source
|
12
|
-
@target = target
|
13
|
-
@options = options
|
14
|
-
end
|
15
|
-
|
16
|
-
def transform(step_class = nil, &block)
|
17
|
-
if block_given?
|
18
|
-
@step = Step.new(self)
|
19
|
-
@step.instance_eval(&block)
|
20
|
-
else
|
21
|
-
raise ArgumentError, "You need to provide a STEP or a block" if step_class.nil?
|
22
|
-
clazz = step_class.is_a?(Class) ? step_class : step_class.to_s.constantize
|
23
|
-
@step = clazz.new(self)
|
24
|
-
@step.perform
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def helper
|
29
|
-
@helper ||= Helper.new(self)
|
30
|
-
end
|
31
|
-
|
32
|
-
def register_helper(helper_module)
|
33
|
-
raise ArgumentError, "Must be a module" unless helper_module.is_a?(Module)
|
34
|
-
helper.class.send(:include, helper_module) # TODO: Benchmark this
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|