metacrunch 3.1.4 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/Gemfile +5 -11
- data/Rakefile +1 -0
- data/Readme.md +98 -90
- data/lib/metacrunch.rb +0 -5
- data/lib/metacrunch/cli.rb +22 -61
- data/lib/metacrunch/job.rb +65 -84
- data/lib/metacrunch/job/dsl.rb +10 -14
- data/lib/metacrunch/job/dsl/options.rb +80 -0
- data/lib/metacrunch/job/dsl/options/dsl.rb +21 -0
- data/lib/metacrunch/version.rb +1 -1
- data/metacrunch.gemspec +2 -6
- metadata +10 -68
- data/lib/metacrunch/db.rb +0 -8
- data/lib/metacrunch/db/reader.rb +0 -33
- data/lib/metacrunch/db/writer.rb +0 -55
- data/lib/metacrunch/fs.rb +0 -6
- data/lib/metacrunch/fs/entry.rb +0 -17
- data/lib/metacrunch/fs/reader.rb +0 -63
- data/lib/metacrunch/job/dsl/option_support.rb +0 -102
- data/lib/metacrunch/parallel_processable_reader.rb +0 -21
- data/lib/metacrunch/redis.rb +0 -8
- data/lib/metacrunch/redis/queue_reader.rb +0 -43
- data/lib/metacrunch/redis/queue_writer.rb +0 -39
- data/lib/metacrunch/redis/writer.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a75f1157f466513ad8b721d4aada832173503ef
|
4
|
+
data.tar.gz: 97872d8c9ee3c7ed78f5cfda03d5535c94d06b53
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92f378e2f694693d17e593a7d4d37aa6e23c7b7e88d659a675772950b38bea37f2462bfdbda951b1ac316fae531b0c96c6e6714484e5047e8b9e6ccaa6558e28
|
7
|
+
data.tar.gz: b0bc8b77912abb87fecd6d756911d6eec7802cdf20a8d81d51b75f775889eda798decef8d069abf75fa9979e61d42be6f1423da83bebafb64d2550aa78c37f87
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -3,21 +3,15 @@ source "https://rubygems.org"
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development do
|
6
|
-
gem "bundler",
|
7
|
-
gem "rake",
|
8
|
-
gem "rspec",
|
9
|
-
gem "simplecov", ">= 0.11.0"
|
10
|
-
gem "sqlite3", ">= 1.3.11", platform: :ruby
|
11
|
-
gem "jdbc-sqlite3", ">= 3.8", platform: :jruby
|
6
|
+
gem "bundler", ">= 1.15"
|
7
|
+
gem "rake", ">= 12.1"
|
8
|
+
gem "rspec", ">= 3.5.0", "< 4.0.0"
|
12
9
|
|
13
10
|
if !ENV["CI"]
|
14
|
-
gem "
|
15
|
-
gem "pry-byebug", ">= 3.3.0", platform: :ruby
|
16
|
-
gem "pry-rescue", ">= 1.4.2", platform: :ruby
|
17
|
-
gem "pry-state", ">= 0.1.7", platform: :ruby
|
11
|
+
gem "pry-byebug", ">= 3.5.0"
|
18
12
|
end
|
19
13
|
end
|
20
14
|
|
21
15
|
group :test do
|
22
|
-
gem "
|
16
|
+
gem "simplecov", ">= 0.15.0"
|
23
17
|
end
|
data/Rakefile
CHANGED
data/Readme.md
CHANGED
@@ -8,7 +8,6 @@ metacrunch
|
|
8
8
|
metacrunch is a simple and lightweight data processing and ETL ([Extract-Transform-Load](http://en.wikipedia.org/wiki/Extract,_transform,_load))
|
9
9
|
toolkit for Ruby.
|
10
10
|
|
11
|
-
**NOTE: THIS README IS FOR THE MASTER BRANCH. CHECK THE [RELEASES-PAGE](https://github.com/ubpb/metacrunch/releases) TO SEE THE README FOR THE RELEVANT RELEASES**
|
12
11
|
|
13
12
|
Installation
|
14
13
|
------------
|
@@ -17,15 +16,17 @@ Installation
|
|
17
16
|
$ gem install metacrunch
|
18
17
|
```
|
19
18
|
|
19
|
+
*Note: When upgrading from metacrunch 3.x, there are some breaking changes you need to address. See the [notes below](#upgrading) for details.*
|
20
|
+
|
20
21
|
|
21
22
|
Creating ETL jobs
|
22
23
|
-----------------
|
23
24
|
|
24
|
-
The basic idea behind an ETL job in metacrunch is the concept of a data processing pipeline. Each ETL job reads data from
|
25
|
+
The basic idea behind an ETL job in metacrunch is the concept of a data processing pipeline. Each ETL job reads data from a **source** (extract step), runs one or more **transformations** (transform step) on the data and finally loads the transformed data to a **destination** (load step).
|
25
26
|
|
26
|
-
metacrunch
|
27
|
+
metacrunch gives you a simple DSL ([Domain-specific language](https://en.wikipedia.org/wiki/Domain-specific_language)) to define and run ETL jobs in Ruby. Just create a text file with the extension `.metacrunch` and [run it](#running-etl-jobs) with the provided `metacrunch` CLI command. *Note: The file extension doesn't really matter but you should avoid `.rb` to not loading them by mistake from another Ruby component.*
|
27
28
|
|
28
|
-
Let's walk through the main steps of creating ETL jobs with metacrunch. For a collection of working examples check out our [metacrunch-demo](https://github.com/ubpb/metacrunch-demo)
|
29
|
+
Let's walk through the main steps of creating ETL jobs with metacrunch. For a collection of working examples check out our [metacrunch-demo](https://github.com/ubpb/metacrunch-demo) repository.
|
29
30
|
|
30
31
|
#### It's Ruby
|
31
32
|
|
@@ -49,80 +50,91 @@ require "SomeGem"
|
|
49
50
|
require_relative "./some/other/ruby/file"
|
50
51
|
```
|
51
52
|
|
52
|
-
#### Defining
|
53
|
+
#### Defining a source
|
53
54
|
|
54
|
-
A source
|
55
|
+
A source is an object that reads data (e.g. from a file or an external system) into the metacrunch processing pipeline. Implementing sources is easy – a source can be any Ruby object that responds to `#each`. For more information on how to implement sources [see notes below](#implementing-sources).
|
55
56
|
|
56
|
-
You must declare
|
57
|
+
You must declare a source to allow a job to run.
|
57
58
|
|
58
59
|
```ruby
|
59
60
|
# File: my_etl_job.metacrunch
|
60
61
|
|
61
|
-
source
|
62
|
+
source [1,2,3,4]
|
63
|
+
# or ...
|
64
|
+
source Metacrunch::File::Source.new(ARGV)
|
65
|
+
# or ...
|
62
66
|
source MySource.new
|
63
67
|
```
|
64
68
|
|
65
|
-
This example uses a build-in file reader source. To learn more about the build-in sources see [notes below](#built-in-sources-and-destinations).
|
66
|
-
|
67
69
|
#### Defining transformations
|
68
70
|
|
69
|
-
To process, transform or manipulate data use the `#transformation` hook. A transformation
|
71
|
+
To process, transform or manipulate data use the `#transformation` hook. A transformation is implemented with a `callable` object (any Ruby object that responds to `#call`. E.g. a lambda). To learn more about transformations check the section about [implementing transformations](#implementing-transformations) below.
|
70
72
|
|
71
|
-
The current data object (the object
|
73
|
+
The current data object (the last object yielded by the source) will be passed to the first transformation as a parameter. The return value of a transformation will then be passed to the next transformation and so on.
|
72
74
|
|
73
|
-
If you return nil the current data object will be dismissed and the next transformation
|
75
|
+
If you return `nil` the current data object will be dismissed and the next transformation won't be called.
|
74
76
|
|
75
77
|
```ruby
|
76
78
|
# File: my_etl_job.metacrunch
|
77
79
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
# Do your data transformation process here.
|
80
|
+
# Array implements #each and therefore is a valid source
|
81
|
+
source [1,2,3,4,5,6,7,8,9]
|
82
82
|
|
83
|
+
# A transformation is implemented with a `callable` object (any
|
84
|
+
# object that responds to #call).
|
85
|
+
# Lambdas responds to #call
|
86
|
+
transformation ->(number) {
|
87
|
+
# Called for each data object that has been read by a source.
|
83
88
|
# You must return the data to keep it in the pipeline. Dismiss the
|
84
89
|
# data conditionally by returning nil.
|
85
|
-
|
86
|
-
|
90
|
+
number if number.odd?
|
91
|
+
}
|
87
92
|
|
88
|
-
|
89
|
-
|
90
|
-
transformation ->(data) {
|
91
|
-
# Lambdas responds to #call
|
93
|
+
transformation ->(odd_number) {
|
94
|
+
odd_number * 2
|
92
95
|
}
|
93
96
|
|
94
|
-
# MyTransformation
|
97
|
+
# MyTransformation implements #call
|
95
98
|
transformation MyTransformation.new
|
96
99
|
```
|
97
100
|
|
98
|
-
####
|
101
|
+
#### Using a transformation buffer
|
99
102
|
|
100
|
-
|
103
|
+
Sometimes it is useful to buffer data between transformation steps to allow a transformation to work on larger bulks of data. metacrunch uses a simple transformation buffer to achieve this.
|
104
|
+
|
105
|
+
To use a transformation buffer pass the buffer size as an option to the transformation.
|
101
106
|
|
102
107
|
```ruby
|
103
108
|
# File: my_etl_job.metacrunch
|
104
109
|
|
105
|
-
|
110
|
+
source 1..95 # A range responds to #each and is a valid source
|
111
|
+
|
112
|
+
transformation ->(bulk) {
|
113
|
+
# this transformation is called when the buffer
|
114
|
+
# is filled with 10 objects or if the source has
|
115
|
+
# yielded the last data object.
|
116
|
+
# bulk would be: [1,...,10], [11,...,20], ..., [91,...,95]
|
117
|
+
}, buffer_size: 10
|
106
118
|
```
|
107
119
|
|
108
|
-
|
120
|
+
#### Defining a destination
|
109
121
|
|
110
|
-
|
122
|
+
A destination is an object that writes the transformed data to an external system. Implementing destinations is easy – [see notes below](#implementing-destinations). A destination receives the return value from the last transformation as a parameter if the return value from the last transformation was not `nil`.
|
111
123
|
|
112
|
-
|
113
|
-
`#pre_process` hook. To run arbitrary after the last transformation use
|
114
|
-
`#post_process`. Like transformations, `#post_process` and `#pre_process` can be called with a block, a lambda or a (callable) object.
|
124
|
+
Using destinations is optional. In most cases using the last transformation to write the data to an external system is fine. Destinations are useful if the required code is more complex.
|
115
125
|
|
116
126
|
```ruby
|
117
|
-
|
118
|
-
# Called before the first transformation
|
119
|
-
end
|
127
|
+
# File: my_etl_job.metacrunch
|
120
128
|
|
121
|
-
|
122
|
-
|
123
|
-
|
129
|
+
destination MyDestination.new
|
130
|
+
```
|
131
|
+
|
132
|
+
#### Pre/Post process
|
133
|
+
|
134
|
+
To run arbitrary code before the first transformation is run on the first data object use the `#pre_process` hook. To run arbitrary code after the last transformation is run on the last data object use `#post_process`. Like transformations, `#post_process` and `#pre_process` must be implemented using a `callable` object.
|
124
135
|
|
125
|
-
|
136
|
+
```ruby
|
137
|
+
pre_process -> {
|
126
138
|
# Lambdas responds to #call
|
127
139
|
}
|
128
140
|
|
@@ -130,42 +142,60 @@ pre_process ->() {
|
|
130
142
|
post_process MyCallable.new
|
131
143
|
```
|
132
144
|
|
133
|
-
#### Defining options
|
145
|
+
#### Defining job options
|
134
146
|
|
135
|
-
metacrunch has build-in support to parameterize
|
147
|
+
metacrunch has build-in support to parameterize jobs. Using the `options` hook you can declare options that can be set/overridden by the CLI when [running your jobs](#running-etl-jobs).
|
136
148
|
|
137
149
|
```ruby
|
150
|
+
# File: my_etl_job.metacrunch
|
151
|
+
|
138
152
|
options do
|
139
|
-
add :
|
153
|
+
add :log_level, "-l", "--log-level LEVEL", "Log level (debug,info,warn,error)", default: "info"
|
140
154
|
add :database_url, "-d", "--database URL", "Database connection URL", required: true
|
141
155
|
end
|
156
|
+
|
157
|
+
# Prints out 'info'
|
158
|
+
echo options[:log_level]
|
142
159
|
```
|
143
160
|
|
144
|
-
In this example we declare two options `
|
161
|
+
In this example we declare two options `log_level` and `database_url`. `log_level` defaults to `info`, whereas `database_url` has no default and is required. In your job file you can access the option values using the `options` Hash. E.g. `options[:log_level]`.
|
145
162
|
|
146
163
|
To set/override these options use the command line.
|
147
164
|
|
148
165
|
```
|
149
|
-
$ bundle exec metacrunch my_etl_job.metacrunch
|
166
|
+
$ bundle exec metacrunch my_etl_job.metacrunch --log-level debug
|
150
167
|
```
|
151
168
|
|
152
|
-
This will set the `options[:
|
169
|
+
This will set the `options[:log_level]` to `debug`.
|
153
170
|
|
154
171
|
To get a list of available options for a job, use `--help` on the command line.
|
155
172
|
|
156
173
|
```
|
157
|
-
$ bundle exec metacrunch my_etl_job.metacrunch
|
174
|
+
$ bundle exec metacrunch my_etl_job.metacrunch --help
|
158
175
|
|
159
|
-
Usage: metacrunch
|
176
|
+
Usage: metacrunch [options] JOB_FILE [job-options] [ARGS]
|
160
177
|
Job options:
|
161
|
-
-
|
162
|
-
DEFAULT:
|
178
|
+
-l, --log-level LEVEL Log level (debug,info,warn,error)
|
179
|
+
DEFAULT: info
|
163
180
|
-d, --database URL Database connection URL
|
164
181
|
REQUIRED
|
165
182
|
```
|
166
183
|
|
167
184
|
To learn more about defining options take a look at the [reference below](#defining-job-options).
|
168
185
|
|
186
|
+
#### Require non-option arguments
|
187
|
+
|
188
|
+
All non-option arguments that get passed to the job when running are available to the `ARGV` constant. If your job requires such arguments (e.g. if you work with a list of files) you can require it.
|
189
|
+
|
190
|
+
```ruby
|
191
|
+
# File: my_etl_job.metacrunch
|
192
|
+
|
193
|
+
options(require_args: true) do
|
194
|
+
# ...
|
195
|
+
end
|
196
|
+
|
197
|
+
```
|
198
|
+
|
169
199
|
|
170
200
|
Running ETL jobs
|
171
201
|
----------------
|
@@ -185,21 +215,19 @@ If you use [Bundler](http://bundler.io) to manage dependencies for your jobs mak
|
|
185
215
|
$ bundle exec metacrunch my_etl_job.metacrunch
|
186
216
|
```
|
187
217
|
|
188
|
-
Depending on your environment `bundle exec` may not be required (e.g. you have rubygems-bundler installed) but we recommend using it whenever you have a Gemfile you like to use. When using Bundler make sure to add `gem "metacrunch"` to the Gemfile.
|
189
|
-
|
190
|
-
To pass options to the job, separate job options from the metacrunch command options using the `@@` separator.
|
218
|
+
Depending on your environment `bundle exec` may not be required (e.g. if you have rubygems-bundler installed) but we recommend using it whenever you have a Gemfile you like to use. When using Bundler make sure to add `gem "metacrunch"` to the Gemfile.
|
191
219
|
|
192
|
-
Use the following syntax
|
220
|
+
Use the following syntax to run a metacrunch job
|
193
221
|
|
194
222
|
```
|
195
|
-
$ [bundle exec] metacrunch [COMMAND_OPTIONS] JOB_FILE [
|
223
|
+
$ [bundle exec] metacrunch [COMMAND_OPTIONS] JOB_FILE [JOB_OPTIONS] [JOB_ARGS...]
|
196
224
|
```
|
197
225
|
|
198
226
|
|
199
227
|
Implementing sources
|
200
228
|
--------------------
|
201
229
|
|
202
|
-
A source
|
230
|
+
A metacrunch source is any Ruby object that responds to the `each` method that yields data objects one by one.
|
203
231
|
|
204
232
|
The data is usually a `Hash` instance, but could be other structures as long as the rest of your pipeline is expecting it.
|
205
233
|
|
@@ -245,29 +273,9 @@ source MyCsvSource.new("my_data.csv")
|
|
245
273
|
Implementing transformations
|
246
274
|
----------------------------
|
247
275
|
|
248
|
-
|
249
|
-
|
250
|
-
### Transformations as a block
|
251
|
-
|
252
|
-
When using the block syntax the current data row will be passed as a parameter.
|
253
|
-
|
254
|
-
```ruby
|
255
|
-
# File: my_etl_job.metacrunch
|
256
|
-
|
257
|
-
transformation do |data|
|
258
|
-
# DO YOUR TRANSFORMATION HERE
|
259
|
-
data = ...
|
260
|
-
|
261
|
-
# Make sure to return the data to keep it in the pipeline. Dismiss the
|
262
|
-
# data conditionally by returning nil.
|
263
|
-
data
|
264
|
-
end
|
265
|
-
|
266
|
-
```
|
276
|
+
A metacrunch transformation is implemented as a `callable` object. A `callable` in Ruby is any object that responds to the `call` method.
|
267
277
|
|
268
|
-
|
269
|
-
|
270
|
-
Procs and Lambdas in Ruby respond to `call`. They can be used to implement transformations similar to blocks.
|
278
|
+
Procs and Lambdas in Ruby respond to `call`. They can be used to implement transformations inline.
|
271
279
|
|
272
280
|
```ruby
|
273
281
|
# File: my_etl_job.metacrunch
|
@@ -306,7 +314,7 @@ transformation MyTransformation.new
|
|
306
314
|
Implementing destinations
|
307
315
|
-------------------------
|
308
316
|
|
309
|
-
A destination
|
317
|
+
A destination is any Ruby object that responds to `write(data)` and `close`.
|
310
318
|
|
311
319
|
Like sources you are encouraged to implement destinations as classes.
|
312
320
|
|
@@ -337,21 +345,21 @@ destination MyDestination.new
|
|
337
345
|
|
338
346
|
```
|
339
347
|
|
348
|
+
Upgrading
|
349
|
+
---------
|
340
350
|
|
341
|
-
|
342
|
-
---------------------------------
|
343
|
-
|
344
|
-
TBD.
|
345
|
-
|
346
|
-
Defining job dependencies
|
347
|
-
-------------------------
|
348
|
-
|
349
|
-
TBD.
|
351
|
+
#### 3.x -> 4.x
|
350
352
|
|
351
|
-
|
352
|
-
--------------------
|
353
|
+
When upgrading from metacrunch 3.x, there are some breaking changes you need to address.
|
353
354
|
|
354
|
-
|
355
|
+
* There is now only one `source` and `destination`. If you have more than one in your job file the last definition will used.
|
356
|
+
* There is no `transformation_buffer` anymore. Instead set `buffer_size` as an option to `transformation`.
|
357
|
+
* `transformation`, `pre_process` and `post_process` can't be implemented using a block anymore. Always use a `callable` (E.g. Lambda, Proc or any object responding to `#call`).
|
358
|
+
* When running jobs via the CLI you do not need to separate the arguments passed to metacrunch from the arguments passed to the job with `@@`.
|
359
|
+
* The `args` function used to get the non-option arguments passed to a job has been removed. Use `ARGV` instead.
|
360
|
+
* `Metacrunch::Db` classes have been moved into the [metacrunch-db](https://github.com/ubpb/metacrunch-db) gem package.
|
361
|
+
* `Metacrunch::Redis` classes have been moved into the [metacrunch-redis](https://github.com/ubpb/metacrunch-redis) gem package.
|
362
|
+
* `Metacrunch::File` classes have been moved into the [metacrunch-file](https://github.com/ubpb/metacrunch-file) gem package.
|
355
363
|
|
356
364
|
License
|
357
365
|
-------
|
data/lib/metacrunch.rb
CHANGED
@@ -1,14 +1,9 @@
|
|
1
1
|
require "active_support"
|
2
2
|
require "active_support/core_ext"
|
3
3
|
require "colorized_string"
|
4
|
-
require "parallel"
|
5
4
|
|
6
5
|
module Metacrunch
|
7
6
|
require_relative "metacrunch/version"
|
8
7
|
require_relative "metacrunch/cli"
|
9
8
|
require_relative "metacrunch/job"
|
10
|
-
require_relative "metacrunch/parallel_processable_reader"
|
11
|
-
require_relative "metacrunch/fs"
|
12
|
-
require_relative "metacrunch/db"
|
13
|
-
require_relative "metacrunch/redis"
|
14
9
|
end
|
data/lib/metacrunch/cli.rb
CHANGED
@@ -2,21 +2,32 @@ require "optparse"
|
|
2
2
|
|
3
3
|
module Metacrunch
|
4
4
|
class Cli
|
5
|
-
ARGS_SEPERATOR = "@@"
|
6
5
|
|
7
6
|
def run
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
# Parse global options on order
|
8
|
+
job_argv = global_parser.order(ARGV)
|
9
|
+
# The first of the unparsed arguments is by definition the filename
|
10
|
+
# of the job.
|
11
|
+
job_file = job_argv[0]
|
12
|
+
# Manipulate ARGV so that the option handling for the job can work
|
13
|
+
ARGV.clear
|
14
|
+
job_argv[1..-1]&.each {|arg| ARGV << arg}
|
15
|
+
# Delete the old separator symbol for backward compatability
|
16
|
+
ARGV.delete_if{|arg| arg == "@@"}
|
17
|
+
# Finally run the job
|
18
|
+
run!(job_file)
|
19
|
+
rescue OptionParser::ParseError => e
|
20
|
+
error(e.message)
|
11
21
|
end
|
12
22
|
|
13
23
|
private
|
24
|
+
|
14
25
|
def global_parser
|
15
26
|
@global_parser ||= OptionParser.new do |opts|
|
16
27
|
opts.banner = <<-BANNER.strip_heredoc
|
17
28
|
#{ColorizedString["Usage:"].bold}
|
18
29
|
|
19
|
-
metacrunch [options] JOB_FILE
|
30
|
+
metacrunch [options] JOB_FILE [job-options] [ARGS...]
|
20
31
|
|
21
32
|
#{ColorizedString["Options:"].bold}
|
22
33
|
BANNER
|
@@ -24,22 +35,9 @@ module Metacrunch
|
|
24
35
|
opts.on("-v", "--version", "Show metacrunch version and exit") do
|
25
36
|
show_version
|
26
37
|
end
|
27
|
-
|
28
|
-
opts.on("-n INTEGER", "--number-of-processes INTEGER", Integer, "Number of parallel processes to run the job. Source needs to support this. DEFAULT: 1") do |n|
|
29
|
-
error("--number-of-procs must be > 0") if n <= 0
|
30
|
-
global_options[:number_of_processes] = n
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.separator "\n"
|
34
38
|
end
|
35
39
|
end
|
36
40
|
|
37
|
-
def global_options
|
38
|
-
@global_options ||= {
|
39
|
-
number_of_processes: 1
|
40
|
-
}
|
41
|
-
end
|
42
|
-
|
43
41
|
def show_version
|
44
42
|
puts Metacrunch::VERSION
|
45
43
|
exit(0)
|
@@ -51,32 +49,13 @@ module Metacrunch
|
|
51
49
|
exit(0)
|
52
50
|
end
|
53
51
|
|
54
|
-
def
|
55
|
-
|
56
|
-
if index == 0
|
57
|
-
[]
|
58
|
-
else
|
59
|
-
@global_argv ||= index ? ARGV[0..index-1] : ARGV
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def job_argv
|
64
|
-
index = ARGV.index(ARGS_SEPERATOR)
|
65
|
-
@job_argv ||= index ? ARGV[index+1..-1] : nil
|
66
|
-
end
|
67
|
-
|
68
|
-
def run!(job_files)
|
69
|
-
if job_files.first == "run"
|
70
|
-
puts ColorizedString["WARN: Using 'run' is deprecated. Just use 'metacrunch [options] JOB_FILE @@ [job-options] [ARGS...]'\n"].yellow.bold
|
71
|
-
job_files = job_files[1..-1]
|
72
|
-
end
|
73
|
-
|
74
|
-
if job_files.empty?
|
52
|
+
def run!(job_file)
|
53
|
+
if job_file.blank?
|
75
54
|
error "You need to provide a job file."
|
76
|
-
elsif
|
77
|
-
error "
|
55
|
+
elsif !File.exists?(job_file)
|
56
|
+
error "The file `#{job_file}` doesn't exist."
|
78
57
|
else
|
79
|
-
job_filename = File.expand_path(
|
58
|
+
job_filename = File.expand_path(job_file)
|
80
59
|
dir = File.dirname(job_filename)
|
81
60
|
|
82
61
|
Dir.chdir(dir) do
|
@@ -86,25 +65,7 @@ module Metacrunch
|
|
86
65
|
end
|
87
66
|
|
88
67
|
def run_job!(job_filename)
|
89
|
-
|
90
|
-
process_indicies = (0..(global_options[:number_of_processes] - 1)).to_a
|
91
|
-
|
92
|
-
Parallel.each(process_indicies) do |process_index|
|
93
|
-
Metacrunch::Job.define(
|
94
|
-
File.read(job_filename),
|
95
|
-
filename: job_filename,
|
96
|
-
args: job_argv,
|
97
|
-
number_of_processes: global_options[:number_of_processes],
|
98
|
-
process_index: process_index
|
99
|
-
).run
|
100
|
-
end
|
101
|
-
else
|
102
|
-
Metacrunch::Job.define(
|
103
|
-
File.read(job_filename),
|
104
|
-
filename: job_filename,
|
105
|
-
args: job_argv
|
106
|
-
).run
|
107
|
-
end
|
68
|
+
Metacrunch::Job.define(File.read(job_filename)).run
|
108
69
|
end
|
109
70
|
|
110
71
|
end
|