metacrunch 2.2.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/Gemfile +11 -13
  4. data/License.txt +1 -1
  5. data/Readme.md +139 -2
  6. data/bin/console +9 -6
  7. data/exe/metacrunch +1 -2
  8. data/lib/metacrunch/cli.rb +62 -14
  9. data/lib/metacrunch/db/reader.rb +27 -0
  10. data/lib/metacrunch/db/writer.rb +23 -0
  11. data/lib/metacrunch/db.rb +8 -0
  12. data/lib/metacrunch/fs/entry.rb +17 -0
  13. data/lib/metacrunch/{file_reader.rb → fs/reader.rb} +9 -10
  14. data/lib/metacrunch/fs.rb +6 -0
  15. data/lib/metacrunch/job/buffer.rb +26 -0
  16. data/lib/metacrunch/job/dsl/option_support.rb +102 -0
  17. data/lib/metacrunch/job/dsl.rb +42 -0
  18. data/lib/metacrunch/job.rb +149 -0
  19. data/lib/metacrunch/test_utils/dummy_callable.rb +14 -0
  20. data/lib/metacrunch/test_utils/dummy_destination.rb +21 -0
  21. data/lib/metacrunch/test_utils/dummy_source.rb +22 -0
  22. data/lib/metacrunch/test_utils.rb +7 -0
  23. data/lib/metacrunch/version.rb +1 -1
  24. data/lib/metacrunch.rb +14 -27
  25. data/metacrunch.gemspec +5 -10
  26. metadata +24 -144
  27. data/lib/metacrunch/cli/base.rb +0 -29
  28. data/lib/metacrunch/cli/command_definition.rb +0 -41
  29. data/lib/metacrunch/cli/command_registry.rb +0 -17
  30. data/lib/metacrunch/cli/main.rb +0 -16
  31. data/lib/metacrunch/command.rb +0 -27
  32. data/lib/metacrunch/file/reader/file_system_fetcher.rb +0 -21
  33. data/lib/metacrunch/file/reader/plain_file_reader.rb +0 -33
  34. data/lib/metacrunch/file/reader/scp_fetcher.rb +0 -56
  35. data/lib/metacrunch/file/reader/tar_file_reader.rb +0 -37
  36. data/lib/metacrunch/file/reader/zip_file_reader.rb +0 -30
  37. data/lib/metacrunch/file/reader.rb +0 -72
  38. data/lib/metacrunch/file/writer/plain_file_writer.rb +0 -19
  39. data/lib/metacrunch/file/writer/tar_file_writer.rb +0 -26
  40. data/lib/metacrunch/file/writer/zip_file_writer.rb +0 -29
  41. data/lib/metacrunch/file/writer.rb +0 -26
  42. data/lib/metacrunch/file.rb +0 -24
  43. data/lib/metacrunch/file_reader_entry.rb +0 -21
  44. data/lib/metacrunch/file_writer.rb +0 -40
  45. data/lib/metacrunch/hash.rb +0 -51
  46. data/lib/metacrunch/parallel.rb +0 -69
  47. data/lib/metacrunch/processor.rb +0 -10
  48. data/lib/metacrunch/snr/field.rb +0 -31
  49. data/lib/metacrunch/snr/section.rb +0 -74
  50. data/lib/metacrunch/snr.rb +0 -117
  51. data/lib/metacrunch/tar_writer.rb +0 -26
  52. data/lib/metacrunch/transformator/transformation/step.rb +0 -45
  53. data/lib/metacrunch/transformator/transformation.rb +0 -48
  54. data/lib/metacrunch/transformator.rb +0 -5
  55. data/lib/metacrunch/transformer/helper.rb +0 -29
  56. data/lib/metacrunch/transformer/step.rb +0 -37
  57. data/lib/metacrunch/transformer.rb +0 -38
@@ -1,117 +0,0 @@
1
- module Metacrunch
2
- #
3
- # A SNR object (Simple Normalized Record) is a simple data structure
4
- # that you can use as a target resource when performing data normalization routines.
5
- # A DNSR record consists of unique sections. A section is unique identified by it's
6
- # name. Each section can hold many fields that store the actual values.
7
- #
8
- # A SNR object can be transformed into XML or JSON to allow easy integration into
9
- # existing tools and workflows.
10
- #
11
- # For example: If you normalize MAB XML data for use in a search engine you can
12
- # use a SNR object to store your normalized values.
13
- #
14
- class SNR
15
- require_relative "./snr/section"
16
- require_relative "./snr/field"
17
-
18
- # ------------------------------------------------------------------------------
19
- # Common API
20
- # ------------------------------------------------------------------------------
21
-
22
- #
23
- # Adds a field with a value to a section. If the section with the given name
24
- # doesn't exists it will be created.
25
- #
26
- # @param [String] section_name
27
- # @param [String] field_name
28
- # @param [#to_s] value
29
- #
30
- def add(section_name, field_name, value)
31
- section = self.section(section_name) || Section.new(section_name)
32
- section.add(field_name, value)
33
- add_section(section)
34
- end
35
-
36
- #
37
- # Returns field values for a given path.
38
- #
39
- # @param [String] path A path to the fields seperated by /. E.g. section/field
40
- # @return [Array<*>]
41
- #
42
- def values(path)
43
- section_name, field_name = path.split("/")
44
- section = self.section(section_name)
45
- if section && field_name
46
- section.fields(field_name).map{|field| field.value}
47
- else
48
- []
49
- end
50
- end
51
-
52
- # ------------------------------------------------------------------------------
53
- # Sections
54
- # ------------------------------------------------------------------------------
55
-
56
- #
57
- # @return [Hash{String => Metacrunch::SNR::Section}]
58
- # @private
59
- #
60
- def sections_struct
61
- @sections_struct ||= {}
62
- end
63
- private :sections_struct
64
-
65
- #
66
- # Return all sections.
67
- #
68
- # @return [Array<Metacrunch::SNR::Section>]
69
- #
70
- def sections
71
- sections_struct.values
72
- end
73
-
74
- #
75
- # Get section by name.
76
- #
77
- # @param [String] name Name of the section
78
- # @return [Metacrunch::SNR::Section, nil] section by name or nil if a section
79
- # with the given name doesn't exists.
80
- #
81
- def section(name)
82
- sections_struct[name]
83
- end
84
-
85
- #
86
- # Adds / replaces a section. The name of the section is used as a unique identifier.
87
- # Therefore if you add a section with a name that already exists, the new section
88
- # will override the existing one.
89
- #
90
- # @param [Metacrunch::SNR::Section] section
91
- # @return [Metacrunch::SNR::Section]
92
- #
93
- def add_section(section)
94
- sections_struct[section.name] = section
95
- end
96
-
97
- # ------------------------------------------------------------------------------
98
- # Serialization
99
- # ------------------------------------------------------------------------------
100
-
101
- #
102
- # Transforms the SNR into XML.
103
- #
104
- # @return [String] The SNR as XML string.
105
- #
106
- def to_xml
107
- builder = Builder::XmlMarkup.new(indent: 2)
108
- builder.instruct!(:xml, :encoding => "UTF-8")
109
- builder.snr do
110
- sections.each do |_section|
111
- _section.to_xml(builder)
112
- end
113
- end
114
- end
115
-
116
- end
117
- end
@@ -1,26 +0,0 @@
1
- require "rubygems/package"
2
-
3
- module Metacrunch
4
- class TarWriter < FileWriter
5
-
6
- def write(data, options = {})
7
- raise ArgumentError, "Missing option 'filename'" if options[:filename].blank?
8
-
9
- io.add_file_simple(options[:filename], 0644, data.bytesize) do |_io|
10
- if block_given?
11
- yield(_io)
12
- else
13
- _io.write(data)
14
- end
15
- end
16
- end
17
-
18
- private
19
-
20
- def io
21
- @io ||= super
22
- @tar_io ||= Gem::Package::TarWriter.new(@io)
23
- end
24
-
25
- end
26
- end
@@ -1,45 +0,0 @@
1
- require_relative "../transformation"
2
-
3
- class Metacrunch::Transformator::Transformation::Step
4
- attr_accessor :transformation
5
-
6
- def initialize(transformation = nil, options = {})
7
- if transformation.is_a?(Hash)
8
- options = transformation
9
- transformation = nil
10
- end
11
-
12
- if transformation
13
- @transformation = transformation
14
- else
15
- @transformation = Struct.new(:source, :target).new.tap do |_struct|
16
- _struct.source = options[:source]
17
- _struct.target = options[:target]
18
- end
19
- end
20
- end
21
-
22
- def call
23
- end
24
-
25
- #
26
- # Each step has transparent access to all methods of it's transformation
27
- #
28
- def method_missing(method_name, *args, &block)
29
- if @transformation.respond_to?(method_name)
30
- @transformation.send(method_name, *args, &block)
31
- else
32
- super
33
- end
34
- end
35
-
36
- def respond_to_missing?(method_name, include_private = false)
37
- @transformation.respond_to?(method_name) || super
38
- end
39
-
40
- # avoid method_missing penalty for the most used transformation methods
41
- def source; @transformation.source; end
42
- def source=(value); @transformation.source=(value); end
43
- def target; @transformation.target; end
44
- def target=(value); @transformation.target=(value); end
45
- end
@@ -1,48 +0,0 @@
1
- require_relative "../transformator"
2
-
3
- class Metacrunch::Transformator::Transformation
4
- require_relative "./transformation/step"
5
-
6
- attr_accessor :source
7
- attr_accessor :target
8
-
9
- class << self
10
- def steps(value = nil)
11
- unless value
12
- @steps
13
- else
14
- @steps = value
15
- end
16
- end
17
- alias_method :sequence, :steps
18
- end
19
-
20
- def self.call(*args)
21
- new.call(*args)
22
- end
23
-
24
- # since a transformation can have many steps, writing a "require" for each is tedious
25
- def self.require_directory(directory)
26
- Dir.glob("#{File.expand_path(directory)}/*.rb").each do |_filename|
27
- require _filename
28
- end
29
- end
30
-
31
- def initialize
32
- # steps are instanced once, which means that instance variables retain
33
- @steps = self.class.steps.flatten.map do |_step|
34
- _step.is_a?(Class) ? _step.new(self) : _step
35
- end
36
- end
37
-
38
- def call(source, options = {})
39
- @source = source
40
- @target = options[:target]
41
-
42
- @steps.each do |_step|
43
- _step.is_a?(Proc) ? instance_exec(&_step) : _step.call
44
- end
45
-
46
- return @target
47
- end
48
- end
@@ -1,5 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- module Metacrunch::Transformator
4
- require_relative "./transformator/transformation"
5
- end
@@ -1,29 +0,0 @@
1
- require_relative "../transformer"
2
-
3
- module Metacrunch
4
- class Transformer
5
- class Helper
6
-
7
- def initialize(transformer)
8
- @transformer = transformer
9
- end
10
-
11
- def transformer
12
- @transformer
13
- end
14
-
15
- def source
16
- transformer.source
17
- end
18
-
19
- def target
20
- transformer.target
21
- end
22
-
23
- def options
24
- transformer.options
25
- end
26
-
27
- end
28
- end
29
- end
@@ -1,37 +0,0 @@
1
- require_relative "../transformer"
2
-
3
- module Metacrunch
4
- class Transformer
5
- class Step
6
-
7
- def initialize(transformer)
8
- @transformer = transformer
9
- end
10
-
11
- def perform
12
- raise NotImplementedError, "You must implement .perform() in your rule sub-class"
13
- end
14
-
15
- def transformer
16
- @transformer
17
- end
18
-
19
- def source
20
- transformer.source
21
- end
22
-
23
- def target
24
- transformer.target
25
- end
26
-
27
- def options
28
- transformer.options
29
- end
30
-
31
- def helper
32
- transformer.helper
33
- end
34
-
35
- end
36
- end
37
- end
@@ -1,38 +0,0 @@
1
- module Metacrunch
2
- class Transformer
3
- require_relative "./transformer/step"
4
- require_relative "./transformer/helper"
5
-
6
- attr_accessor :source, :target, :options
7
- attr_reader :step
8
-
9
-
10
- def initialize(source:nil, target:nil, options: {})
11
- @source = source
12
- @target = target
13
- @options = options
14
- end
15
-
16
- def transform(step_class = nil, &block)
17
- if block_given?
18
- @step = Step.new(self)
19
- @step.instance_eval(&block)
20
- else
21
- raise ArgumentError, "You need to provide a STEP or a block" if step_class.nil?
22
- clazz = step_class.is_a?(Class) ? step_class : step_class.to_s.constantize
23
- @step = clazz.new(self)
24
- @step.perform
25
- end
26
- end
27
-
28
- def helper
29
- @helper ||= Helper.new(self)
30
- end
31
-
32
- def register_helper(helper_module)
33
- raise ArgumentError, "Must be a module" unless helper_module.is_a?(Module)
34
- helper.class.send(:include, helper_module) # TODO: Benchmark this
35
- end
36
-
37
- end
38
- end