metacrunch 2.2.3 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/Gemfile +11 -13
- data/License.txt +1 -1
- data/Readme.md +139 -2
- data/bin/console +9 -6
- data/exe/metacrunch +1 -2
- data/lib/metacrunch/cli.rb +62 -14
- data/lib/metacrunch/db/reader.rb +27 -0
- data/lib/metacrunch/db/writer.rb +23 -0
- data/lib/metacrunch/db.rb +8 -0
- data/lib/metacrunch/fs/entry.rb +17 -0
- data/lib/metacrunch/{file_reader.rb → fs/reader.rb} +9 -10
- data/lib/metacrunch/fs.rb +6 -0
- data/lib/metacrunch/job/buffer.rb +26 -0
- data/lib/metacrunch/job/dsl/option_support.rb +102 -0
- data/lib/metacrunch/job/dsl.rb +42 -0
- data/lib/metacrunch/job.rb +149 -0
- data/lib/metacrunch/test_utils/dummy_callable.rb +14 -0
- data/lib/metacrunch/test_utils/dummy_destination.rb +21 -0
- data/lib/metacrunch/test_utils/dummy_source.rb +22 -0
- data/lib/metacrunch/test_utils.rb +7 -0
- data/lib/metacrunch/version.rb +1 -1
- data/lib/metacrunch.rb +14 -27
- data/metacrunch.gemspec +5 -10
- metadata +24 -144
- data/lib/metacrunch/cli/base.rb +0 -29
- data/lib/metacrunch/cli/command_definition.rb +0 -41
- data/lib/metacrunch/cli/command_registry.rb +0 -17
- data/lib/metacrunch/cli/main.rb +0 -16
- data/lib/metacrunch/command.rb +0 -27
- data/lib/metacrunch/file/reader/file_system_fetcher.rb +0 -21
- data/lib/metacrunch/file/reader/plain_file_reader.rb +0 -33
- data/lib/metacrunch/file/reader/scp_fetcher.rb +0 -56
- data/lib/metacrunch/file/reader/tar_file_reader.rb +0 -37
- data/lib/metacrunch/file/reader/zip_file_reader.rb +0 -30
- data/lib/metacrunch/file/reader.rb +0 -72
- data/lib/metacrunch/file/writer/plain_file_writer.rb +0 -19
- data/lib/metacrunch/file/writer/tar_file_writer.rb +0 -26
- data/lib/metacrunch/file/writer/zip_file_writer.rb +0 -29
- data/lib/metacrunch/file/writer.rb +0 -26
- data/lib/metacrunch/file.rb +0 -24
- data/lib/metacrunch/file_reader_entry.rb +0 -21
- data/lib/metacrunch/file_writer.rb +0 -40
- data/lib/metacrunch/hash.rb +0 -51
- data/lib/metacrunch/parallel.rb +0 -69
- data/lib/metacrunch/processor.rb +0 -10
- data/lib/metacrunch/snr/field.rb +0 -31
- data/lib/metacrunch/snr/section.rb +0 -74
- data/lib/metacrunch/snr.rb +0 -117
- data/lib/metacrunch/tar_writer.rb +0 -26
- data/lib/metacrunch/transformator/transformation/step.rb +0 -45
- data/lib/metacrunch/transformator/transformation.rb +0 -48
- data/lib/metacrunch/transformator.rb +0 -5
- data/lib/metacrunch/transformer/helper.rb +0 -29
- data/lib/metacrunch/transformer/step.rb +0 -37
- data/lib/metacrunch/transformer.rb +0 -38
data/lib/metacrunch/snr.rb
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
module Metacrunch
|
2
|
-
#
|
3
|
-
# A SNR object (Simple Normalized Record) is a simple data structure
|
4
|
-
# that you can use as a target resource when performing data normalization routines.
|
5
|
-
# A DNSR record consists of unique sections. A section is unique identified by it's
|
6
|
-
# name. Each section can hold many fields that store the actual values.
|
7
|
-
#
|
8
|
-
# A SNR object can be transformed into XML or JSON to allow easy integration into
|
9
|
-
# existing tools and workflows.
|
10
|
-
#
|
11
|
-
# For example: If you normalize MAB XML data for use in a search engine you can
|
12
|
-
# use a SNR object to store your normalized values.
|
13
|
-
#
|
14
|
-
class SNR
|
15
|
-
require_relative "./snr/section"
|
16
|
-
require_relative "./snr/field"
|
17
|
-
|
18
|
-
# ------------------------------------------------------------------------------
|
19
|
-
# Common API
|
20
|
-
# ------------------------------------------------------------------------------
|
21
|
-
|
22
|
-
#
|
23
|
-
# Adds a field with a value to a section. If the section with the given name
|
24
|
-
# doesn't exists it will be created.
|
25
|
-
#
|
26
|
-
# @param [String] section_name
|
27
|
-
# @param [String] field_name
|
28
|
-
# @param [#to_s] value
|
29
|
-
#
|
30
|
-
def add(section_name, field_name, value)
|
31
|
-
section = self.section(section_name) || Section.new(section_name)
|
32
|
-
section.add(field_name, value)
|
33
|
-
add_section(section)
|
34
|
-
end
|
35
|
-
|
36
|
-
#
|
37
|
-
# Returns field values for a given path.
|
38
|
-
#
|
39
|
-
# @param [String] path A path to the fields seperated by /. E.g. section/field
|
40
|
-
# @return [Array<*>]
|
41
|
-
#
|
42
|
-
def values(path)
|
43
|
-
section_name, field_name = path.split("/")
|
44
|
-
section = self.section(section_name)
|
45
|
-
if section && field_name
|
46
|
-
section.fields(field_name).map{|field| field.value}
|
47
|
-
else
|
48
|
-
[]
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
# ------------------------------------------------------------------------------
|
53
|
-
# Sections
|
54
|
-
# ------------------------------------------------------------------------------
|
55
|
-
|
56
|
-
#
|
57
|
-
# @return [Hash{String => Metacrunch::SNR::Section}]
|
58
|
-
# @private
|
59
|
-
#
|
60
|
-
def sections_struct
|
61
|
-
@sections_struct ||= {}
|
62
|
-
end
|
63
|
-
private :sections_struct
|
64
|
-
|
65
|
-
#
|
66
|
-
# Return all sections.
|
67
|
-
#
|
68
|
-
# @return [Array<Metacrunch::SNR::Section>]
|
69
|
-
#
|
70
|
-
def sections
|
71
|
-
sections_struct.values
|
72
|
-
end
|
73
|
-
|
74
|
-
#
|
75
|
-
# Get section by name.
|
76
|
-
#
|
77
|
-
# @param [String] name Name of the section
|
78
|
-
# @return [Metacrunch::SNR::Section, nil] section by name or nil if a section
|
79
|
-
# with the given name doesn't exists.
|
80
|
-
#
|
81
|
-
def section(name)
|
82
|
-
sections_struct[name]
|
83
|
-
end
|
84
|
-
|
85
|
-
#
|
86
|
-
# Adds / replaces a section. The name of the section is used as a unique identifier.
|
87
|
-
# Therefore if you add a section with a name that already exists, the new section
|
88
|
-
# will override the existing one.
|
89
|
-
#
|
90
|
-
# @param [Metacrunch::SNR::Section] section
|
91
|
-
# @return [Metacrunch::SNR::Section]
|
92
|
-
#
|
93
|
-
def add_section(section)
|
94
|
-
sections_struct[section.name] = section
|
95
|
-
end
|
96
|
-
|
97
|
-
# ------------------------------------------------------------------------------
|
98
|
-
# Serialization
|
99
|
-
# ------------------------------------------------------------------------------
|
100
|
-
|
101
|
-
#
|
102
|
-
# Transforms the SNR into XML.
|
103
|
-
#
|
104
|
-
# @return [String] The SNR as XML string.
|
105
|
-
#
|
106
|
-
def to_xml
|
107
|
-
builder = Builder::XmlMarkup.new(indent: 2)
|
108
|
-
builder.instruct!(:xml, :encoding => "UTF-8")
|
109
|
-
builder.snr do
|
110
|
-
sections.each do |_section|
|
111
|
-
_section.to_xml(builder)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require "rubygems/package"
|
2
|
-
|
3
|
-
module Metacrunch
|
4
|
-
class TarWriter < FileWriter
|
5
|
-
|
6
|
-
def write(data, options = {})
|
7
|
-
raise ArgumentError, "Missing option 'filename'" if options[:filename].blank?
|
8
|
-
|
9
|
-
io.add_file_simple(options[:filename], 0644, data.bytesize) do |_io|
|
10
|
-
if block_given?
|
11
|
-
yield(_io)
|
12
|
-
else
|
13
|
-
_io.write(data)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def io
|
21
|
-
@io ||= super
|
22
|
-
@tar_io ||= Gem::Package::TarWriter.new(@io)
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require_relative "../transformation"
|
2
|
-
|
3
|
-
class Metacrunch::Transformator::Transformation::Step
|
4
|
-
attr_accessor :transformation
|
5
|
-
|
6
|
-
def initialize(transformation = nil, options = {})
|
7
|
-
if transformation.is_a?(Hash)
|
8
|
-
options = transformation
|
9
|
-
transformation = nil
|
10
|
-
end
|
11
|
-
|
12
|
-
if transformation
|
13
|
-
@transformation = transformation
|
14
|
-
else
|
15
|
-
@transformation = Struct.new(:source, :target).new.tap do |_struct|
|
16
|
-
_struct.source = options[:source]
|
17
|
-
_struct.target = options[:target]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def call
|
23
|
-
end
|
24
|
-
|
25
|
-
#
|
26
|
-
# Each step has transparent access to all methods of it's transformation
|
27
|
-
#
|
28
|
-
def method_missing(method_name, *args, &block)
|
29
|
-
if @transformation.respond_to?(method_name)
|
30
|
-
@transformation.send(method_name, *args, &block)
|
31
|
-
else
|
32
|
-
super
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def respond_to_missing?(method_name, include_private = false)
|
37
|
-
@transformation.respond_to?(method_name) || super
|
38
|
-
end
|
39
|
-
|
40
|
-
# avoid method_missing penalty for the most used transformation methods
|
41
|
-
def source; @transformation.source; end
|
42
|
-
def source=(value); @transformation.source=(value); end
|
43
|
-
def target; @transformation.target; end
|
44
|
-
def target=(value); @transformation.target=(value); end
|
45
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require_relative "../transformator"
|
2
|
-
|
3
|
-
class Metacrunch::Transformator::Transformation
|
4
|
-
require_relative "./transformation/step"
|
5
|
-
|
6
|
-
attr_accessor :source
|
7
|
-
attr_accessor :target
|
8
|
-
|
9
|
-
class << self
|
10
|
-
def steps(value = nil)
|
11
|
-
unless value
|
12
|
-
@steps
|
13
|
-
else
|
14
|
-
@steps = value
|
15
|
-
end
|
16
|
-
end
|
17
|
-
alias_method :sequence, :steps
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.call(*args)
|
21
|
-
new.call(*args)
|
22
|
-
end
|
23
|
-
|
24
|
-
# since a transformation can have many steps, writing a "require" for each is tedious
|
25
|
-
def self.require_directory(directory)
|
26
|
-
Dir.glob("#{File.expand_path(directory)}/*.rb").each do |_filename|
|
27
|
-
require _filename
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def initialize
|
32
|
-
# steps are instanced once, which means that instance variables retain
|
33
|
-
@steps = self.class.steps.flatten.map do |_step|
|
34
|
-
_step.is_a?(Class) ? _step.new(self) : _step
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def call(source, options = {})
|
39
|
-
@source = source
|
40
|
-
@target = options[:target]
|
41
|
-
|
42
|
-
@steps.each do |_step|
|
43
|
-
_step.is_a?(Proc) ? instance_exec(&_step) : _step.call
|
44
|
-
end
|
45
|
-
|
46
|
-
return @target
|
47
|
-
end
|
48
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require_relative "../transformer"
|
2
|
-
|
3
|
-
module Metacrunch
|
4
|
-
class Transformer
|
5
|
-
class Helper
|
6
|
-
|
7
|
-
def initialize(transformer)
|
8
|
-
@transformer = transformer
|
9
|
-
end
|
10
|
-
|
11
|
-
def transformer
|
12
|
-
@transformer
|
13
|
-
end
|
14
|
-
|
15
|
-
def source
|
16
|
-
transformer.source
|
17
|
-
end
|
18
|
-
|
19
|
-
def target
|
20
|
-
transformer.target
|
21
|
-
end
|
22
|
-
|
23
|
-
def options
|
24
|
-
transformer.options
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require_relative "../transformer"
|
2
|
-
|
3
|
-
module Metacrunch
|
4
|
-
class Transformer
|
5
|
-
class Step
|
6
|
-
|
7
|
-
def initialize(transformer)
|
8
|
-
@transformer = transformer
|
9
|
-
end
|
10
|
-
|
11
|
-
def perform
|
12
|
-
raise NotImplementedError, "You must implement .perform() in your rule sub-class"
|
13
|
-
end
|
14
|
-
|
15
|
-
def transformer
|
16
|
-
@transformer
|
17
|
-
end
|
18
|
-
|
19
|
-
def source
|
20
|
-
transformer.source
|
21
|
-
end
|
22
|
-
|
23
|
-
def target
|
24
|
-
transformer.target
|
25
|
-
end
|
26
|
-
|
27
|
-
def options
|
28
|
-
transformer.options
|
29
|
-
end
|
30
|
-
|
31
|
-
def helper
|
32
|
-
transformer.helper
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
module Metacrunch
|
2
|
-
class Transformer
|
3
|
-
require_relative "./transformer/step"
|
4
|
-
require_relative "./transformer/helper"
|
5
|
-
|
6
|
-
attr_accessor :source, :target, :options
|
7
|
-
attr_reader :step
|
8
|
-
|
9
|
-
|
10
|
-
def initialize(source:nil, target:nil, options: {})
|
11
|
-
@source = source
|
12
|
-
@target = target
|
13
|
-
@options = options
|
14
|
-
end
|
15
|
-
|
16
|
-
def transform(step_class = nil, &block)
|
17
|
-
if block_given?
|
18
|
-
@step = Step.new(self)
|
19
|
-
@step.instance_eval(&block)
|
20
|
-
else
|
21
|
-
raise ArgumentError, "You need to provide a STEP or a block" if step_class.nil?
|
22
|
-
clazz = step_class.is_a?(Class) ? step_class : step_class.to_s.constantize
|
23
|
-
@step = clazz.new(self)
|
24
|
-
@step.perform
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def helper
|
29
|
-
@helper ||= Helper.new(self)
|
30
|
-
end
|
31
|
-
|
32
|
-
def register_helper(helper_module)
|
33
|
-
raise ArgumentError, "Must be a module" unless helper_module.is_a?(Module)
|
34
|
-
helper.class.send(:include, helper_module) # TODO: Benchmark this
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|