metacrunch 2.2.3 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/Gemfile +11 -13
  4. data/License.txt +1 -1
  5. data/Readme.md +139 -2
  6. data/bin/console +9 -6
  7. data/exe/metacrunch +1 -2
  8. data/lib/metacrunch/cli.rb +62 -14
  9. data/lib/metacrunch/db/reader.rb +27 -0
  10. data/lib/metacrunch/db/writer.rb +23 -0
  11. data/lib/metacrunch/db.rb +8 -0
  12. data/lib/metacrunch/fs/entry.rb +17 -0
  13. data/lib/metacrunch/{file_reader.rb → fs/reader.rb} +9 -10
  14. data/lib/metacrunch/fs.rb +6 -0
  15. data/lib/metacrunch/job/buffer.rb +26 -0
  16. data/lib/metacrunch/job/dsl/option_support.rb +102 -0
  17. data/lib/metacrunch/job/dsl.rb +42 -0
  18. data/lib/metacrunch/job.rb +149 -0
  19. data/lib/metacrunch/test_utils/dummy_callable.rb +14 -0
  20. data/lib/metacrunch/test_utils/dummy_destination.rb +21 -0
  21. data/lib/metacrunch/test_utils/dummy_source.rb +22 -0
  22. data/lib/metacrunch/test_utils.rb +7 -0
  23. data/lib/metacrunch/version.rb +1 -1
  24. data/lib/metacrunch.rb +14 -27
  25. data/metacrunch.gemspec +5 -10
  26. metadata +24 -144
  27. data/lib/metacrunch/cli/base.rb +0 -29
  28. data/lib/metacrunch/cli/command_definition.rb +0 -41
  29. data/lib/metacrunch/cli/command_registry.rb +0 -17
  30. data/lib/metacrunch/cli/main.rb +0 -16
  31. data/lib/metacrunch/command.rb +0 -27
  32. data/lib/metacrunch/file/reader/file_system_fetcher.rb +0 -21
  33. data/lib/metacrunch/file/reader/plain_file_reader.rb +0 -33
  34. data/lib/metacrunch/file/reader/scp_fetcher.rb +0 -56
  35. data/lib/metacrunch/file/reader/tar_file_reader.rb +0 -37
  36. data/lib/metacrunch/file/reader/zip_file_reader.rb +0 -30
  37. data/lib/metacrunch/file/reader.rb +0 -72
  38. data/lib/metacrunch/file/writer/plain_file_writer.rb +0 -19
  39. data/lib/metacrunch/file/writer/tar_file_writer.rb +0 -26
  40. data/lib/metacrunch/file/writer/zip_file_writer.rb +0 -29
  41. data/lib/metacrunch/file/writer.rb +0 -26
  42. data/lib/metacrunch/file.rb +0 -24
  43. data/lib/metacrunch/file_reader_entry.rb +0 -21
  44. data/lib/metacrunch/file_writer.rb +0 -40
  45. data/lib/metacrunch/hash.rb +0 -51
  46. data/lib/metacrunch/parallel.rb +0 -69
  47. data/lib/metacrunch/processor.rb +0 -10
  48. data/lib/metacrunch/snr/field.rb +0 -31
  49. data/lib/metacrunch/snr/section.rb +0 -74
  50. data/lib/metacrunch/snr.rb +0 -117
  51. data/lib/metacrunch/tar_writer.rb +0 -26
  52. data/lib/metacrunch/transformator/transformation/step.rb +0 -45
  53. data/lib/metacrunch/transformator/transformation.rb +0 -48
  54. data/lib/metacrunch/transformator.rb +0 -5
  55. data/lib/metacrunch/transformer/helper.rb +0 -29
  56. data/lib/metacrunch/transformer/step.rb +0 -37
  57. data/lib/metacrunch/transformer.rb +0 -38
@@ -1,117 +0,0 @@
1
- module Metacrunch
2
- #
3
- # A SNR object (Simple Normalized Record) is a simple data structure
4
- # that you can use as a target resource when performing data normalization routines.
5
- # A DNSR record consists of unique sections. A section is unique identified by it's
6
- # name. Each section can hold many fields that store the actual values.
7
- #
8
- # A SNR object can be transformed into XML or JSON to allow easy integration into
9
- # existing tools and workflows.
10
- #
11
- # For example: If you normalize MAB XML data for use in a search engine you can
12
- # use a SNR object to store your normalized values.
13
- #
14
- class SNR
15
- require_relative "./snr/section"
16
- require_relative "./snr/field"
17
-
18
- # ------------------------------------------------------------------------------
19
- # Common API
20
- # ------------------------------------------------------------------------------
21
-
22
- #
23
- # Adds a field with a value to a section. If the section with the given name
24
- # doesn't exists it will be created.
25
- #
26
- # @param [String] section_name
27
- # @param [String] field_name
28
- # @param [#to_s] value
29
- #
30
- def add(section_name, field_name, value)
31
- section = self.section(section_name) || Section.new(section_name)
32
- section.add(field_name, value)
33
- add_section(section)
34
- end
35
-
36
- #
37
- # Returns field values for a given path.
38
- #
39
- # @param [String] path A path to the fields seperated by /. E.g. section/field
40
- # @return [Array<*>]
41
- #
42
- def values(path)
43
- section_name, field_name = path.split("/")
44
- section = self.section(section_name)
45
- if section && field_name
46
- section.fields(field_name).map{|field| field.value}
47
- else
48
- []
49
- end
50
- end
51
-
52
- # ------------------------------------------------------------------------------
53
- # Sections
54
- # ------------------------------------------------------------------------------
55
-
56
- #
57
- # @return [Hash{String => Metacrunch::SNR::Section}]
58
- # @private
59
- #
60
- def sections_struct
61
- @sections_struct ||= {}
62
- end
63
- private :sections_struct
64
-
65
- #
66
- # Return all sections.
67
- #
68
- # @return [Array<Metacrunch::SNR::Section>]
69
- #
70
- def sections
71
- sections_struct.values
72
- end
73
-
74
- #
75
- # Get section by name.
76
- #
77
- # @param [String] name Name of the section
78
- # @return [Metacrunch::SNR::Section, nil] section by name or nil if a section
79
- # with the given name doesn't exists.
80
- #
81
- def section(name)
82
- sections_struct[name]
83
- end
84
-
85
- #
86
- # Adds / replaces a section. The name of the section is used as a unique identifier.
87
- # Therefore if you add a section with a name that already exists, the new section
88
- # will override the existing one.
89
- #
90
- # @param [Metacrunch::SNR::Section] section
91
- # @return [Metacrunch::SNR::Section]
92
- #
93
- def add_section(section)
94
- sections_struct[section.name] = section
95
- end
96
-
97
- # ------------------------------------------------------------------------------
98
- # Serialization
99
- # ------------------------------------------------------------------------------
100
-
101
- #
102
- # Transforms the SNR into XML.
103
- #
104
- # @return [String] The SNR as XML string.
105
- #
106
- def to_xml
107
- builder = Builder::XmlMarkup.new(indent: 2)
108
- builder.instruct!(:xml, :encoding => "UTF-8")
109
- builder.snr do
110
- sections.each do |_section|
111
- _section.to_xml(builder)
112
- end
113
- end
114
- end
115
-
116
- end
117
- end
@@ -1,26 +0,0 @@
1
- require "rubygems/package"
2
-
3
- module Metacrunch
4
- class TarWriter < FileWriter
5
-
6
- def write(data, options = {})
7
- raise ArgumentError, "Missing option 'filename'" if options[:filename].blank?
8
-
9
- io.add_file_simple(options[:filename], 0644, data.bytesize) do |_io|
10
- if block_given?
11
- yield(_io)
12
- else
13
- _io.write(data)
14
- end
15
- end
16
- end
17
-
18
- private
19
-
20
- def io
21
- @io ||= super
22
- @tar_io ||= Gem::Package::TarWriter.new(@io)
23
- end
24
-
25
- end
26
- end
@@ -1,45 +0,0 @@
1
- require_relative "../transformation"
2
-
3
- class Metacrunch::Transformator::Transformation::Step
4
- attr_accessor :transformation
5
-
6
- def initialize(transformation = nil, options = {})
7
- if transformation.is_a?(Hash)
8
- options = transformation
9
- transformation = nil
10
- end
11
-
12
- if transformation
13
- @transformation = transformation
14
- else
15
- @transformation = Struct.new(:source, :target).new.tap do |_struct|
16
- _struct.source = options[:source]
17
- _struct.target = options[:target]
18
- end
19
- end
20
- end
21
-
22
- def call
23
- end
24
-
25
- #
26
- # Each step has transparent access to all methods of it's transformation
27
- #
28
- def method_missing(method_name, *args, &block)
29
- if @transformation.respond_to?(method_name)
30
- @transformation.send(method_name, *args, &block)
31
- else
32
- super
33
- end
34
- end
35
-
36
- def respond_to_missing?(method_name, include_private = false)
37
- @transformation.respond_to?(method_name) || super
38
- end
39
-
40
- # avoid method_missing penalty for the most used transformation methods
41
- def source; @transformation.source; end
42
- def source=(value); @transformation.source=(value); end
43
- def target; @transformation.target; end
44
- def target=(value); @transformation.target=(value); end
45
- end
@@ -1,48 +0,0 @@
1
- require_relative "../transformator"
2
-
3
- class Metacrunch::Transformator::Transformation
4
- require_relative "./transformation/step"
5
-
6
- attr_accessor :source
7
- attr_accessor :target
8
-
9
- class << self
10
- def steps(value = nil)
11
- unless value
12
- @steps
13
- else
14
- @steps = value
15
- end
16
- end
17
- alias_method :sequence, :steps
18
- end
19
-
20
- def self.call(*args)
21
- new.call(*args)
22
- end
23
-
24
- # since a transformation can have many steps, writing a "require" for each is tedious
25
- def self.require_directory(directory)
26
- Dir.glob("#{File.expand_path(directory)}/*.rb").each do |_filename|
27
- require _filename
28
- end
29
- end
30
-
31
- def initialize
32
- # steps are instanced once, which means that instance variables retain
33
- @steps = self.class.steps.flatten.map do |_step|
34
- _step.is_a?(Class) ? _step.new(self) : _step
35
- end
36
- end
37
-
38
- def call(source, options = {})
39
- @source = source
40
- @target = options[:target]
41
-
42
- @steps.each do |_step|
43
- _step.is_a?(Proc) ? instance_exec(&_step) : _step.call
44
- end
45
-
46
- return @target
47
- end
48
- end
@@ -1,5 +0,0 @@
1
- require_relative "../metacrunch"
2
-
3
- module Metacrunch::Transformator
4
- require_relative "./transformator/transformation"
5
- end
@@ -1,29 +0,0 @@
1
- require_relative "../transformer"
2
-
3
- module Metacrunch
4
- class Transformer
5
- class Helper
6
-
7
- def initialize(transformer)
8
- @transformer = transformer
9
- end
10
-
11
- def transformer
12
- @transformer
13
- end
14
-
15
- def source
16
- transformer.source
17
- end
18
-
19
- def target
20
- transformer.target
21
- end
22
-
23
- def options
24
- transformer.options
25
- end
26
-
27
- end
28
- end
29
- end
@@ -1,37 +0,0 @@
1
- require_relative "../transformer"
2
-
3
- module Metacrunch
4
- class Transformer
5
- class Step
6
-
7
- def initialize(transformer)
8
- @transformer = transformer
9
- end
10
-
11
- def perform
12
- raise NotImplementedError, "You must implement .perform() in your rule sub-class"
13
- end
14
-
15
- def transformer
16
- @transformer
17
- end
18
-
19
- def source
20
- transformer.source
21
- end
22
-
23
- def target
24
- transformer.target
25
- end
26
-
27
- def options
28
- transformer.options
29
- end
30
-
31
- def helper
32
- transformer.helper
33
- end
34
-
35
- end
36
- end
37
- end
@@ -1,38 +0,0 @@
1
- module Metacrunch
2
- class Transformer
3
- require_relative "./transformer/step"
4
- require_relative "./transformer/helper"
5
-
6
- attr_accessor :source, :target, :options
7
- attr_reader :step
8
-
9
-
10
- def initialize(source:nil, target:nil, options: {})
11
- @source = source
12
- @target = target
13
- @options = options
14
- end
15
-
16
- def transform(step_class = nil, &block)
17
- if block_given?
18
- @step = Step.new(self)
19
- @step.instance_eval(&block)
20
- else
21
- raise ArgumentError, "You need to provide a STEP or a block" if step_class.nil?
22
- clazz = step_class.is_a?(Class) ? step_class : step_class.to_s.constantize
23
- @step = clazz.new(self)
24
- @step.perform
25
- end
26
- end
27
-
28
- def helper
29
- @helper ||= Helper.new(self)
30
- end
31
-
32
- def register_helper(helper_module)
33
- raise ArgumentError, "Must be a module" unless helper_module.is_a?(Module)
34
- helper.class.send(:include, helper_module) # TODO: Benchmark this
35
- end
36
-
37
- end
38
- end