metacrunch 3.1.4 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,140 +3,121 @@ module Metacrunch
3
3
  require_relative "job/dsl"
4
4
  require_relative "job/buffer"
5
5
 
6
- attr_reader :builder, :args
6
+ attr_reader :dsl
7
7
 
8
8
  class << self
9
- def define(file_content = nil, filename: nil, args: nil, number_of_processes: 1, process_index: 0, &block)
10
- self.new(file_content, filename: filename, args: args, number_of_processes: number_of_processes, process_index: process_index, &block)
9
+ def define(file_content = nil, &block)
10
+ self.new(file_content, &block)
11
11
  end
12
12
  end
13
13
 
14
- def initialize(file_content = nil, filename: nil, args: nil, number_of_processes: 1, process_index: 0, &block)
15
- @builder = Dsl.new(self)
16
- @args = args
17
- @number_of_processes = number_of_processes
18
- @process_index = process_index
14
+ def initialize(file_content = nil, &block)
15
+ @dsl = Dsl.new(self)
19
16
 
20
17
  if file_content
21
- @builder.instance_eval(file_content, filename || "")
18
+ @dsl.instance_eval(file_content, "Check your metacrunch Job at Line")
22
19
  elsif block_given?
23
- @builder.instance_eval(&block)
20
+ @dsl.instance_eval(&block)
24
21
  end
25
22
  end
26
23
 
27
- def sources
28
- @sources ||= []
24
+ def source
25
+ @source
29
26
  end
30
27
 
31
- def add_source(source)
28
+ def source=(source)
32
29
  ensure_source!(source)
33
- sources << source
30
+ @source = source
34
31
  end
35
32
 
36
- def destinations
37
- @destinations ||= []
33
+ def destination
34
+ @destination
38
35
  end
39
36
 
40
- def add_destination(destination)
37
+ def destination=(destination)
41
38
  ensure_destination!(destination)
42
- destinations << destination
39
+ @destination = destination
43
40
  end
44
41
 
45
- def pre_processes
46
- @pre_processes ||= []
42
+ def pre_process
43
+ @pre_process
47
44
  end
48
45
 
49
- def add_pre_process(callable = nil, &block)
50
- add_callable_or_block(pre_processes, callable, &block)
46
+ def pre_process=(callable)
47
+ ensure_callable!(callable)
48
+ @pre_process = callable
51
49
  end
52
50
 
53
- def post_processes
54
- @post_processes ||= []
51
+ def post_process
52
+ @post_process
55
53
  end
56
54
 
57
- def add_post_process(callable = nil, &block)
58
- add_callable_or_block(post_processes, callable, &block)
55
+ def post_process=(callable)
56
+ ensure_callable!(callable)
57
+ @post_process = callable
59
58
  end
60
59
 
61
60
  def transformations
62
61
  @transformations ||= []
63
62
  end
64
63
 
65
- def add_transformation(callable = nil, &block)
66
- add_callable_or_block(transformations, callable, &block)
67
- end
64
+ def add_transformation(callable, buffer_size: nil)
65
+ ensure_callable!(callable)
66
+
67
+ if buffer_size && buffer_size.to_i > 0
68
+ transformations << Metacrunch::Job::Buffer.new(buffer_size)
69
+ end
68
70
 
69
- def add_transformation_buffer(size)
70
- transformations << Metacrunch::Job::Buffer.new(size)
71
+ transformations << callable
71
72
  end
72
73
 
73
74
  def run
74
- run_pre_processes
75
- run_transformations
76
- run_post_processes
77
- self
78
- end
75
+ run_pre_process
79
76
 
80
- private
77
+ if source
78
+ # Run transformation for each data object available in source
79
+ source.each do |data|
80
+ data = run_transformations(data)
81
+ write_destination(data)
82
+ end
81
83
 
82
- def add_callable_or_block(array, callable, &block)
83
- if block_given?
84
- array << block
85
- elsif callable
86
- ensure_callable!(callable)
87
- array << callable
84
+ # Run all transformations a last time to flush existing buffers
85
+ data = run_transformations(data = nil, flush_buffers: true)
86
+ write_destination(data)
87
+
88
+ # Close destination
89
+ destination.close if destination
88
90
  end
89
- end
90
91
 
91
- def ensure_callable!(object)
92
- raise ArgumentError, "#{object} does't respond to #call." unless object.respond_to?(:call)
92
+ run_post_process
93
+
94
+ self
93
95
  end
94
96
 
97
+ private
98
+
95
99
  def ensure_source!(object)
96
- raise ArgumentError, "#{object} does't respond to #each." unless object.respond_to?(:each)
100
+ raise ArgumentError, "#{object} doesn't respond to #each." unless object.respond_to?(:each)
97
101
  end
98
102
 
99
103
  def ensure_destination!(object)
100
- raise ArgumentError, "#{object} does't respond to #write." unless object.respond_to?(:write)
101
- raise ArgumentError, "#{object} does't respond to #close." unless object.respond_to?(:close)
104
+ raise ArgumentError, "#{object} doesn't respond to #write." unless object.respond_to?(:write)
105
+ raise ArgumentError, "#{object} doesn't respond to #close." unless object.respond_to?(:close)
102
106
  end
103
107
 
104
- def run_pre_processes
105
- pre_processes.each(&:call)
108
+ def ensure_callable!(object)
109
+ raise ArgumentError, "#{object} doesn't respond to #call." unless object.respond_to?(:call)
106
110
  end
107
111
 
108
- def run_post_processes
109
- post_processes.each(&:call)
112
+ def run_pre_process
113
+ pre_process.call if pre_process
110
114
  end
111
115
 
112
- def run_transformations
113
- sources.each do |source|
114
- # Setup parallel processing
115
- if @number_of_processes > 1
116
- if source.class.included_modules.include?(Metacrunch::ParallelProcessableReader)
117
- source.set_parallel_process_options(
118
- number_of_processes: @number_of_processes,
119
- process_index: @process_index
120
- )
121
- else
122
- raise RuntimeError, "source does't support parallel processing"
123
- end
124
- end
125
-
126
- # sources are expected to respond to `each`
127
- source.each do |data|
128
- run_transformations_and_write_destinations(data)
129
- end
130
-
131
- # Run all transformations a last time to flush possible buffers
132
- run_transformations_and_write_destinations(nil, flush_buffers: true)
133
- end
134
-
135
- # destination implementations are expected to respond to `close`
136
- destinations.each(&:close)
116
+ def run_post_process
117
+ post_process.call if post_process
137
118
  end
138
119
 
139
- def run_transformations_and_write_destinations(data, flush_buffers: false)
120
+ def run_transformations(data, flush_buffers: false)
140
121
  transformations.each do |transformation|
141
122
  if transformation.is_a?(Buffer)
142
123
  if data.present?
@@ -150,11 +131,11 @@ module Metacrunch
150
131
  end
151
132
  end
152
133
 
153
- if data.present?
154
- destinations.each do |destination|
155
- destination.write(data) # destinations are expected to respond to `write(data)`
156
- end
157
- end
134
+ data
135
+ end
136
+
137
+ def write_destination(data)
138
+ destination.write(data) if destination
158
139
  end
159
140
 
160
141
  end
@@ -1,38 +1,34 @@
1
1
  module Metacrunch
2
2
  class Job::Dsl
3
- require_relative "dsl/option_support"
3
+ require_relative "dsl/options"
4
4
 
5
5
  def initialize(job)
6
6
  @_job = job
7
7
  end
8
8
 
9
9
  def source(source)
10
- @_job.add_source(source)
10
+ @_job.source = source
11
11
  end
12
12
 
13
13
  def destination(destination)
14
- @_job.add_destination(destination)
14
+ @_job.destination = destination
15
15
  end
16
16
 
17
- def pre_process(callable = nil, &block)
18
- @_job.add_pre_process(callable, &block)
17
+ def pre_process(callable)
18
+ @_job.pre_process = callable
19
19
  end
20
20
 
21
- def post_process(callable = nil, &block)
22
- @_job.add_post_process(callable, &block)
21
+ def post_process(callable)
22
+ @_job.post_process = callable
23
23
  end
24
24
 
25
- def transformation_buffer(size)
26
- @_job.add_transformation_buffer(size)
27
- end
28
-
29
- def transformation(callable = nil, &block)
30
- @_job.add_transformation(callable, &block)
25
+ def transformation(callable, buffer_size: nil)
26
+ @_job.add_transformation(callable, buffer_size: buffer_size)
31
27
  end
32
28
 
33
29
  def options(require_args: false, &block)
34
30
  if block_given?
35
- @_options = OptionSupport.new.register_options(@_job.args, require_args: require_args, &block)
31
+ @_options = Options.new(require_args: require_args, &block).options
36
32
  else
37
33
  @_options ||= {}
38
34
  end
@@ -0,0 +1,80 @@
1
+ module Metacrunch
2
+ class Job::Dsl::Options
3
+ require_relative "options/dsl"
4
+
5
+ attr_reader :options
6
+
7
+ def initialize(argv = ARGV, require_args: false, &block)
8
+ @options = {}
9
+ dsl.instance_eval(&block)
10
+
11
+ dsl.options.each do |key, opt_def|
12
+ # Set default value
13
+ @options[key] = opt_def[:default]
14
+
15
+ # Register with OptionParser
16
+ if opt_def[:args].present?
17
+ option = parser.define(*opt_def[:args]) { |value| @options[key] = value }
18
+
19
+ option.desc << "REQUIRED" if opt_def[:required]
20
+ option.desc << "DEFAULT: #{opt_def[:default]}" if opt_def[:default].present?
21
+
22
+ parser_options[key] = option
23
+ end
24
+ end
25
+
26
+ # Finally parse CLI options with OptionParser
27
+ parser.parse!(argv)
28
+
29
+ # Make sure required options are present
30
+ ensure_required_options!(@options)
31
+
32
+ # Make sure args are present if required
33
+ ensure_required_args!(argv) if require_args
34
+ rescue OptionParser::ParseError => e
35
+ error(e.message)
36
+ end
37
+
38
+ private
39
+
40
+ def parser
41
+ @parser ||= OptionParser.new do |opts|
42
+ opts.banner = <<-BANNER.strip_heredoc
43
+ #{ColorizedString["Job options:"].bold}
44
+ BANNER
45
+ end
46
+ end
47
+
48
+ def dsl
49
+ @dsl ||= Dsl.new
50
+ end
51
+
52
+ def parser_options
53
+ @parser_options ||= {}
54
+ end
55
+
56
+ def error(message)
57
+ puts ColorizedString["Error: #{message}\n"].red.bold
58
+ puts parser.help
59
+ exit(1)
60
+ end
61
+
62
+ def ensure_required_options!(options)
63
+ dsl.options.each do |key, opt_def|
64
+ if opt_def[:required] && options[key].blank?
65
+ long_option = parser_options[key].long.try(:[], 0)
66
+ short_option = parser_options[key].short.try(:[], 0)
67
+
68
+ error("Required job option `#{long_option || short_option}` missing.")
69
+ end
70
+ end
71
+ end
72
+
73
+ def ensure_required_args!(argv)
74
+ if argv.blank?
75
+ error("Required ARGS are missing.")
76
+ end
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,21 @@
1
+ module Metacrunch
2
+ class Job::Dsl::Options::Dsl
3
+
4
+ def add(name, *args, default: nil, required: false)
5
+ if default && required
6
+ raise ArgumentError, "You can't use `default` and `required` option at the same time."
7
+ end
8
+
9
+ options[name.to_sym] = {
10
+ args: args,
11
+ default: default,
12
+ required: required
13
+ }
14
+ end
15
+
16
+ def options
17
+ @options ||= {}
18
+ end
19
+
20
+ end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module Metacrunch
2
- VERSION = "3.1.4"
2
+ VERSION = "4.0.1"
3
3
  end
data/metacrunch.gemspec CHANGED
@@ -7,7 +7,6 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "metacrunch"
8
8
  spec.version = Metacrunch::VERSION
9
9
  spec.authors = ["René Sprotte", "Michael Sievers", "Marcel Otto"]
10
- spec.email = "r.sprotte@ub.uni-paderborn.de"
11
10
  spec.summary = %q{Data processing and ETL toolkit for Ruby}
12
11
  spec.homepage = "http://github.com/ubpb/metacrunch"
13
12
  spec.license = "MIT"
@@ -17,9 +16,6 @@ Gem::Specification.new do |spec|
17
16
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
17
  spec.require_paths = ["lib"]
19
18
 
20
- spec.add_dependency "activesupport", ">= 4.2", "< 5.1"
21
- spec.add_dependency "colorize", ">= 0.8"
22
- spec.add_dependency "parallel", "~> 1.9"
23
- spec.add_dependency "sequel", "~> 4.33"
24
- spec.add_dependency "redis", "~> 3.3"
19
+ spec.add_dependency "activesupport", ">= 5.1.0"
20
+ spec.add_dependency "colorize", ">= 0.8.1"
25
21
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.4
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2017-02-09 00:00:00.000000000 Z
13
+ date: 2017-09-25 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -18,78 +18,30 @@ dependencies:
18
18
  requirements:
19
19
  - - ">="
20
20
  - !ruby/object:Gem::Version
21
- version: '4.2'
22
- - - "<"
23
- - !ruby/object:Gem::Version
24
- version: '5.1'
21
+ version: 5.1.0
25
22
  type: :runtime
26
23
  prerelease: false
27
24
  version_requirements: !ruby/object:Gem::Requirement
28
25
  requirements:
29
26
  - - ">="
30
27
  - !ruby/object:Gem::Version
31
- version: '4.2'
32
- - - "<"
33
- - !ruby/object:Gem::Version
34
- version: '5.1'
28
+ version: 5.1.0
35
29
  - !ruby/object:Gem::Dependency
36
30
  name: colorize
37
31
  requirement: !ruby/object:Gem::Requirement
38
32
  requirements:
39
33
  - - ">="
40
34
  - !ruby/object:Gem::Version
41
- version: '0.8'
35
+ version: 0.8.1
42
36
  type: :runtime
43
37
  prerelease: false
44
38
  version_requirements: !ruby/object:Gem::Requirement
45
39
  requirements:
46
40
  - - ">="
47
41
  - !ruby/object:Gem::Version
48
- version: '0.8'
49
- - !ruby/object:Gem::Dependency
50
- name: parallel
51
- requirement: !ruby/object:Gem::Requirement
52
- requirements:
53
- - - "~>"
54
- - !ruby/object:Gem::Version
55
- version: '1.9'
56
- type: :runtime
57
- prerelease: false
58
- version_requirements: !ruby/object:Gem::Requirement
59
- requirements:
60
- - - "~>"
61
- - !ruby/object:Gem::Version
62
- version: '1.9'
63
- - !ruby/object:Gem::Dependency
64
- name: sequel
65
- requirement: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '4.33'
70
- type: :runtime
71
- prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- requirements:
74
- - - "~>"
75
- - !ruby/object:Gem::Version
76
- version: '4.33'
77
- - !ruby/object:Gem::Dependency
78
- name: redis
79
- requirement: !ruby/object:Gem::Requirement
80
- requirements:
81
- - - "~>"
82
- - !ruby/object:Gem::Version
83
- version: '3.3'
84
- type: :runtime
85
- prerelease: false
86
- version_requirements: !ruby/object:Gem::Requirement
87
- requirements:
88
- - - "~>"
89
- - !ruby/object:Gem::Version
90
- version: '3.3'
42
+ version: 0.8.1
91
43
  description:
92
- email: r.sprotte@ub.uni-paderborn.de
44
+ email:
93
45
  executables:
94
46
  - metacrunch
95
47
  extensions: []
@@ -106,21 +58,11 @@ files:
106
58
  - exe/metacrunch
107
59
  - lib/metacrunch.rb
108
60
  - lib/metacrunch/cli.rb
109
- - lib/metacrunch/db.rb
110
- - lib/metacrunch/db/reader.rb
111
- - lib/metacrunch/db/writer.rb
112
- - lib/metacrunch/fs.rb
113
- - lib/metacrunch/fs/entry.rb
114
- - lib/metacrunch/fs/reader.rb
115
61
  - lib/metacrunch/job.rb
116
62
  - lib/metacrunch/job/buffer.rb
117
63
  - lib/metacrunch/job/dsl.rb
118
- - lib/metacrunch/job/dsl/option_support.rb
119
- - lib/metacrunch/parallel_processable_reader.rb
120
- - lib/metacrunch/redis.rb
121
- - lib/metacrunch/redis/queue_reader.rb
122
- - lib/metacrunch/redis/queue_writer.rb
123
- - lib/metacrunch/redis/writer.rb
64
+ - lib/metacrunch/job/dsl/options.rb
65
+ - lib/metacrunch/job/dsl/options/dsl.rb
124
66
  - lib/metacrunch/test_utils.rb
125
67
  - lib/metacrunch/test_utils/dummy_callable.rb
126
68
  - lib/metacrunch/test_utils/dummy_destination.rb
@@ -147,7 +89,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
147
89
  version: '0'
148
90
  requirements: []
149
91
  rubyforge_project:
150
- rubygems_version: 2.5.2
92
+ rubygems_version: 2.6.11
151
93
  signing_key:
152
94
  specification_version: 4
153
95
  summary: Data processing and ETL toolkit for Ruby