metacrunch 3.1.4 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/Gemfile +5 -11
- data/Rakefile +1 -0
- data/Readme.md +98 -90
- data/lib/metacrunch.rb +0 -5
- data/lib/metacrunch/cli.rb +22 -61
- data/lib/metacrunch/job.rb +65 -84
- data/lib/metacrunch/job/dsl.rb +10 -14
- data/lib/metacrunch/job/dsl/options.rb +80 -0
- data/lib/metacrunch/job/dsl/options/dsl.rb +21 -0
- data/lib/metacrunch/version.rb +1 -1
- data/metacrunch.gemspec +2 -6
- metadata +10 -68
- data/lib/metacrunch/db.rb +0 -8
- data/lib/metacrunch/db/reader.rb +0 -33
- data/lib/metacrunch/db/writer.rb +0 -55
- data/lib/metacrunch/fs.rb +0 -6
- data/lib/metacrunch/fs/entry.rb +0 -17
- data/lib/metacrunch/fs/reader.rb +0 -63
- data/lib/metacrunch/job/dsl/option_support.rb +0 -102
- data/lib/metacrunch/parallel_processable_reader.rb +0 -21
- data/lib/metacrunch/redis.rb +0 -8
- data/lib/metacrunch/redis/queue_reader.rb +0 -43
- data/lib/metacrunch/redis/queue_writer.rb +0 -39
- data/lib/metacrunch/redis/writer.rb +0 -33
data/lib/metacrunch/job.rb
CHANGED
@@ -3,140 +3,121 @@ module Metacrunch
|
|
3
3
|
require_relative "job/dsl"
|
4
4
|
require_relative "job/buffer"
|
5
5
|
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :dsl
|
7
7
|
|
8
8
|
class << self
|
9
|
-
def define(file_content = nil,
|
10
|
-
self.new(file_content,
|
9
|
+
def define(file_content = nil, &block)
|
10
|
+
self.new(file_content, &block)
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
14
|
-
def initialize(file_content = nil,
|
15
|
-
@
|
16
|
-
@args = args
|
17
|
-
@number_of_processes = number_of_processes
|
18
|
-
@process_index = process_index
|
14
|
+
def initialize(file_content = nil, &block)
|
15
|
+
@dsl = Dsl.new(self)
|
19
16
|
|
20
17
|
if file_content
|
21
|
-
@
|
18
|
+
@dsl.instance_eval(file_content, "Check your metacrunch Job at Line")
|
22
19
|
elsif block_given?
|
23
|
-
@
|
20
|
+
@dsl.instance_eval(&block)
|
24
21
|
end
|
25
22
|
end
|
26
23
|
|
27
|
-
def
|
28
|
-
@
|
24
|
+
def source
|
25
|
+
@source
|
29
26
|
end
|
30
27
|
|
31
|
-
def
|
28
|
+
def source=(source)
|
32
29
|
ensure_source!(source)
|
33
|
-
|
30
|
+
@source = source
|
34
31
|
end
|
35
32
|
|
36
|
-
def
|
37
|
-
@
|
33
|
+
def destination
|
34
|
+
@destination
|
38
35
|
end
|
39
36
|
|
40
|
-
def
|
37
|
+
def destination=(destination)
|
41
38
|
ensure_destination!(destination)
|
42
|
-
|
39
|
+
@destination = destination
|
43
40
|
end
|
44
41
|
|
45
|
-
def
|
46
|
-
@
|
42
|
+
def pre_process
|
43
|
+
@pre_process
|
47
44
|
end
|
48
45
|
|
49
|
-
def
|
50
|
-
|
46
|
+
def pre_process=(callable)
|
47
|
+
ensure_callable!(callable)
|
48
|
+
@pre_process = callable
|
51
49
|
end
|
52
50
|
|
53
|
-
def
|
54
|
-
@
|
51
|
+
def post_process
|
52
|
+
@post_process
|
55
53
|
end
|
56
54
|
|
57
|
-
def
|
58
|
-
|
55
|
+
def post_process=(callable)
|
56
|
+
ensure_callable!(callable)
|
57
|
+
@post_process = callable
|
59
58
|
end
|
60
59
|
|
61
60
|
def transformations
|
62
61
|
@transformations ||= []
|
63
62
|
end
|
64
63
|
|
65
|
-
def add_transformation(callable
|
66
|
-
|
67
|
-
|
64
|
+
def add_transformation(callable, buffer_size: nil)
|
65
|
+
ensure_callable!(callable)
|
66
|
+
|
67
|
+
if buffer_size && buffer_size.to_i > 0
|
68
|
+
transformations << Metacrunch::Job::Buffer.new(buffer_size)
|
69
|
+
end
|
68
70
|
|
69
|
-
|
70
|
-
transformations << Metacrunch::Job::Buffer.new(size)
|
71
|
+
transformations << callable
|
71
72
|
end
|
72
73
|
|
73
74
|
def run
|
74
|
-
|
75
|
-
run_transformations
|
76
|
-
run_post_processes
|
77
|
-
self
|
78
|
-
end
|
75
|
+
run_pre_process
|
79
76
|
|
80
|
-
|
77
|
+
if source
|
78
|
+
# Run transformation for each data object available in source
|
79
|
+
source.each do |data|
|
80
|
+
data = run_transformations(data)
|
81
|
+
write_destination(data)
|
82
|
+
end
|
81
83
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
84
|
+
# Run all transformations a last time to flush existing buffers
|
85
|
+
data = run_transformations(data = nil, flush_buffers: true)
|
86
|
+
write_destination(data)
|
87
|
+
|
88
|
+
# Close destination
|
89
|
+
destination.close if destination
|
88
90
|
end
|
89
|
-
end
|
90
91
|
|
91
|
-
|
92
|
-
|
92
|
+
run_post_process
|
93
|
+
|
94
|
+
self
|
93
95
|
end
|
94
96
|
|
97
|
+
private
|
98
|
+
|
95
99
|
def ensure_source!(object)
|
96
|
-
raise ArgumentError, "#{object}
|
100
|
+
raise ArgumentError, "#{object} doesn't respond to #each." unless object.respond_to?(:each)
|
97
101
|
end
|
98
102
|
|
99
103
|
def ensure_destination!(object)
|
100
|
-
raise ArgumentError, "#{object}
|
101
|
-
raise ArgumentError, "#{object}
|
104
|
+
raise ArgumentError, "#{object} doesn't respond to #write." unless object.respond_to?(:write)
|
105
|
+
raise ArgumentError, "#{object} doesn't respond to #close." unless object.respond_to?(:close)
|
102
106
|
end
|
103
107
|
|
104
|
-
def
|
105
|
-
|
108
|
+
def ensure_callable!(object)
|
109
|
+
raise ArgumentError, "#{object} doesn't respond to #call." unless object.respond_to?(:call)
|
106
110
|
end
|
107
111
|
|
108
|
-
def
|
109
|
-
|
112
|
+
def run_pre_process
|
113
|
+
pre_process.call if pre_process
|
110
114
|
end
|
111
115
|
|
112
|
-
def
|
113
|
-
|
114
|
-
# Setup parallel processing
|
115
|
-
if @number_of_processes > 1
|
116
|
-
if source.class.included_modules.include?(Metacrunch::ParallelProcessableReader)
|
117
|
-
source.set_parallel_process_options(
|
118
|
-
number_of_processes: @number_of_processes,
|
119
|
-
process_index: @process_index
|
120
|
-
)
|
121
|
-
else
|
122
|
-
raise RuntimeError, "source does't support parallel processing"
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
# sources are expected to respond to `each`
|
127
|
-
source.each do |data|
|
128
|
-
run_transformations_and_write_destinations(data)
|
129
|
-
end
|
130
|
-
|
131
|
-
# Run all transformations a last time to flush possible buffers
|
132
|
-
run_transformations_and_write_destinations(nil, flush_buffers: true)
|
133
|
-
end
|
134
|
-
|
135
|
-
# destination implementations are expected to respond to `close`
|
136
|
-
destinations.each(&:close)
|
116
|
+
def run_post_process
|
117
|
+
post_process.call if post_process
|
137
118
|
end
|
138
119
|
|
139
|
-
def
|
120
|
+
def run_transformations(data, flush_buffers: false)
|
140
121
|
transformations.each do |transformation|
|
141
122
|
if transformation.is_a?(Buffer)
|
142
123
|
if data.present?
|
@@ -150,11 +131,11 @@ module Metacrunch
|
|
150
131
|
end
|
151
132
|
end
|
152
133
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
134
|
+
data
|
135
|
+
end
|
136
|
+
|
137
|
+
def write_destination(data)
|
138
|
+
destination.write(data) if destination
|
158
139
|
end
|
159
140
|
|
160
141
|
end
|
data/lib/metacrunch/job/dsl.rb
CHANGED
@@ -1,38 +1,34 @@
|
|
1
1
|
module Metacrunch
|
2
2
|
class Job::Dsl
|
3
|
-
require_relative "dsl/
|
3
|
+
require_relative "dsl/options"
|
4
4
|
|
5
5
|
def initialize(job)
|
6
6
|
@_job = job
|
7
7
|
end
|
8
8
|
|
9
9
|
def source(source)
|
10
|
-
@_job.
|
10
|
+
@_job.source = source
|
11
11
|
end
|
12
12
|
|
13
13
|
def destination(destination)
|
14
|
-
@_job.
|
14
|
+
@_job.destination = destination
|
15
15
|
end
|
16
16
|
|
17
|
-
def pre_process(callable
|
18
|
-
@_job.
|
17
|
+
def pre_process(callable)
|
18
|
+
@_job.pre_process = callable
|
19
19
|
end
|
20
20
|
|
21
|
-
def post_process(callable
|
22
|
-
@_job.
|
21
|
+
def post_process(callable)
|
22
|
+
@_job.post_process = callable
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
@_job.
|
27
|
-
end
|
28
|
-
|
29
|
-
def transformation(callable = nil, &block)
|
30
|
-
@_job.add_transformation(callable, &block)
|
25
|
+
def transformation(callable, buffer_size: nil)
|
26
|
+
@_job.add_transformation(callable, buffer_size: buffer_size)
|
31
27
|
end
|
32
28
|
|
33
29
|
def options(require_args: false, &block)
|
34
30
|
if block_given?
|
35
|
-
@_options =
|
31
|
+
@_options = Options.new(require_args: require_args, &block).options
|
36
32
|
else
|
37
33
|
@_options ||= {}
|
38
34
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Metacrunch
|
2
|
+
class Job::Dsl::Options
|
3
|
+
require_relative "options/dsl"
|
4
|
+
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(argv = ARGV, require_args: false, &block)
|
8
|
+
@options = {}
|
9
|
+
dsl.instance_eval(&block)
|
10
|
+
|
11
|
+
dsl.options.each do |key, opt_def|
|
12
|
+
# Set default value
|
13
|
+
@options[key] = opt_def[:default]
|
14
|
+
|
15
|
+
# Register with OptionParser
|
16
|
+
if opt_def[:args].present?
|
17
|
+
option = parser.define(*opt_def[:args]) { |value| @options[key] = value }
|
18
|
+
|
19
|
+
option.desc << "REQUIRED" if opt_def[:required]
|
20
|
+
option.desc << "DEFAULT: #{opt_def[:default]}" if opt_def[:default].present?
|
21
|
+
|
22
|
+
parser_options[key] = option
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Finally parse CLI options with OptionParser
|
27
|
+
parser.parse!(argv)
|
28
|
+
|
29
|
+
# Make sure required options are present
|
30
|
+
ensure_required_options!(@options)
|
31
|
+
|
32
|
+
# Make sure args are present if required
|
33
|
+
ensure_required_args!(argv) if require_args
|
34
|
+
rescue OptionParser::ParseError => e
|
35
|
+
error(e.message)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parser
|
41
|
+
@parser ||= OptionParser.new do |opts|
|
42
|
+
opts.banner = <<-BANNER.strip_heredoc
|
43
|
+
#{ColorizedString["Job options:"].bold}
|
44
|
+
BANNER
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def dsl
|
49
|
+
@dsl ||= Dsl.new
|
50
|
+
end
|
51
|
+
|
52
|
+
def parser_options
|
53
|
+
@parser_options ||= {}
|
54
|
+
end
|
55
|
+
|
56
|
+
def error(message)
|
57
|
+
puts ColorizedString["Error: #{message}\n"].red.bold
|
58
|
+
puts parser.help
|
59
|
+
exit(1)
|
60
|
+
end
|
61
|
+
|
62
|
+
def ensure_required_options!(options)
|
63
|
+
dsl.options.each do |key, opt_def|
|
64
|
+
if opt_def[:required] && options[key].blank?
|
65
|
+
long_option = parser_options[key].long.try(:[], 0)
|
66
|
+
short_option = parser_options[key].short.try(:[], 0)
|
67
|
+
|
68
|
+
error("Required job option `#{long_option || short_option}` missing.")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def ensure_required_args!(argv)
|
74
|
+
if argv.blank?
|
75
|
+
error("Required ARGS are missing.")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Metacrunch
|
2
|
+
class Job::Dsl::Options::Dsl
|
3
|
+
|
4
|
+
def add(name, *args, default: nil, required: false)
|
5
|
+
if default && required
|
6
|
+
raise ArgumentError, "You can't use `default` and `required` option at the same time."
|
7
|
+
end
|
8
|
+
|
9
|
+
options[name.to_sym] = {
|
10
|
+
args: args,
|
11
|
+
default: default,
|
12
|
+
required: required
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def options
|
17
|
+
@options ||= {}
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
data/lib/metacrunch/version.rb
CHANGED
data/metacrunch.gemspec
CHANGED
@@ -7,7 +7,6 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "metacrunch"
|
8
8
|
spec.version = Metacrunch::VERSION
|
9
9
|
spec.authors = ["René Sprotte", "Michael Sievers", "Marcel Otto"]
|
10
|
-
spec.email = "r.sprotte@ub.uni-paderborn.de"
|
11
10
|
spec.summary = %q{Data processing and ETL toolkit for Ruby}
|
12
11
|
spec.homepage = "http://github.com/ubpb/metacrunch"
|
13
12
|
spec.license = "MIT"
|
@@ -17,9 +16,6 @@ Gem::Specification.new do |spec|
|
|
17
16
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
17
|
spec.require_paths = ["lib"]
|
19
18
|
|
20
|
-
spec.add_dependency "activesupport", ">=
|
21
|
-
spec.add_dependency "colorize", ">= 0.8"
|
22
|
-
spec.add_dependency "parallel", "~> 1.9"
|
23
|
-
spec.add_dependency "sequel", "~> 4.33"
|
24
|
-
spec.add_dependency "redis", "~> 3.3"
|
19
|
+
spec.add_dependency "activesupport", ">= 5.1.0"
|
20
|
+
spec.add_dependency "colorize", ">= 0.8.1"
|
25
21
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2017-
|
13
|
+
date: 2017-09-25 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
@@ -18,78 +18,30 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ">="
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
22
|
-
- - "<"
|
23
|
-
- !ruby/object:Gem::Version
|
24
|
-
version: '5.1'
|
21
|
+
version: 5.1.0
|
25
22
|
type: :runtime
|
26
23
|
prerelease: false
|
27
24
|
version_requirements: !ruby/object:Gem::Requirement
|
28
25
|
requirements:
|
29
26
|
- - ">="
|
30
27
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
32
|
-
- - "<"
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: '5.1'
|
28
|
+
version: 5.1.0
|
35
29
|
- !ruby/object:Gem::Dependency
|
36
30
|
name: colorize
|
37
31
|
requirement: !ruby/object:Gem::Requirement
|
38
32
|
requirements:
|
39
33
|
- - ">="
|
40
34
|
- !ruby/object:Gem::Version
|
41
|
-
version:
|
35
|
+
version: 0.8.1
|
42
36
|
type: :runtime
|
43
37
|
prerelease: false
|
44
38
|
version_requirements: !ruby/object:Gem::Requirement
|
45
39
|
requirements:
|
46
40
|
- - ">="
|
47
41
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
49
|
-
- !ruby/object:Gem::Dependency
|
50
|
-
name: parallel
|
51
|
-
requirement: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - "~>"
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '1.9'
|
56
|
-
type: :runtime
|
57
|
-
prerelease: false
|
58
|
-
version_requirements: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '1.9'
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: sequel
|
65
|
-
requirement: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '4.33'
|
70
|
-
type: :runtime
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - "~>"
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '4.33'
|
77
|
-
- !ruby/object:Gem::Dependency
|
78
|
-
name: redis
|
79
|
-
requirement: !ruby/object:Gem::Requirement
|
80
|
-
requirements:
|
81
|
-
- - "~>"
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
version: '3.3'
|
84
|
-
type: :runtime
|
85
|
-
prerelease: false
|
86
|
-
version_requirements: !ruby/object:Gem::Requirement
|
87
|
-
requirements:
|
88
|
-
- - "~>"
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: '3.3'
|
42
|
+
version: 0.8.1
|
91
43
|
description:
|
92
|
-
email:
|
44
|
+
email:
|
93
45
|
executables:
|
94
46
|
- metacrunch
|
95
47
|
extensions: []
|
@@ -106,21 +58,11 @@ files:
|
|
106
58
|
- exe/metacrunch
|
107
59
|
- lib/metacrunch.rb
|
108
60
|
- lib/metacrunch/cli.rb
|
109
|
-
- lib/metacrunch/db.rb
|
110
|
-
- lib/metacrunch/db/reader.rb
|
111
|
-
- lib/metacrunch/db/writer.rb
|
112
|
-
- lib/metacrunch/fs.rb
|
113
|
-
- lib/metacrunch/fs/entry.rb
|
114
|
-
- lib/metacrunch/fs/reader.rb
|
115
61
|
- lib/metacrunch/job.rb
|
116
62
|
- lib/metacrunch/job/buffer.rb
|
117
63
|
- lib/metacrunch/job/dsl.rb
|
118
|
-
- lib/metacrunch/job/dsl/
|
119
|
-
- lib/metacrunch/
|
120
|
-
- lib/metacrunch/redis.rb
|
121
|
-
- lib/metacrunch/redis/queue_reader.rb
|
122
|
-
- lib/metacrunch/redis/queue_writer.rb
|
123
|
-
- lib/metacrunch/redis/writer.rb
|
64
|
+
- lib/metacrunch/job/dsl/options.rb
|
65
|
+
- lib/metacrunch/job/dsl/options/dsl.rb
|
124
66
|
- lib/metacrunch/test_utils.rb
|
125
67
|
- lib/metacrunch/test_utils/dummy_callable.rb
|
126
68
|
- lib/metacrunch/test_utils/dummy_destination.rb
|
@@ -147,7 +89,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
89
|
version: '0'
|
148
90
|
requirements: []
|
149
91
|
rubyforge_project:
|
150
|
-
rubygems_version: 2.
|
92
|
+
rubygems_version: 2.6.11
|
151
93
|
signing_key:
|
152
94
|
specification_version: 4
|
153
95
|
summary: Data processing and ETL toolkit for Ruby
|