bricolage 5.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +4 -0
- data/bin/bricolage +6 -0
- data/bin/bricolage-jobnet +6 -0
- data/jobclass/create.rb +21 -0
- data/jobclass/exec.rb +17 -0
- data/jobclass/insert-delta.rb +31 -0
- data/jobclass/insert.rb +33 -0
- data/jobclass/load.rb +39 -0
- data/jobclass/my-export.rb +40 -0
- data/jobclass/my-migrate.rb +103 -0
- data/jobclass/noop.rb +13 -0
- data/jobclass/rebuild-drop.rb +37 -0
- data/jobclass/rebuild-rename.rb +49 -0
- data/jobclass/s3-put.rb +19 -0
- data/jobclass/sql.rb +29 -0
- data/jobclass/td-delete.rb +20 -0
- data/jobclass/td-export.rb +30 -0
- data/jobclass/unload.rb +30 -0
- data/jobclass/wait-file.rb +48 -0
- data/lib/bricolage/application.rb +260 -0
- data/lib/bricolage/commandutils.rb +52 -0
- data/lib/bricolage/configloader.rb +126 -0
- data/lib/bricolage/context.rb +108 -0
- data/lib/bricolage/datasource.rb +144 -0
- data/lib/bricolage/eventhandlers.rb +47 -0
- data/lib/bricolage/exception.rb +47 -0
- data/lib/bricolage/filedatasource.rb +42 -0
- data/lib/bricolage/filesystem.rb +165 -0
- data/lib/bricolage/genericdatasource.rb +37 -0
- data/lib/bricolage/job.rb +212 -0
- data/lib/bricolage/jobclass.rb +98 -0
- data/lib/bricolage/jobfile.rb +100 -0
- data/lib/bricolage/jobflow.rb +389 -0
- data/lib/bricolage/jobnetrunner.rb +264 -0
- data/lib/bricolage/jobresult.rb +74 -0
- data/lib/bricolage/logger.rb +52 -0
- data/lib/bricolage/mysqldatasource.rb +223 -0
- data/lib/bricolage/parameters.rb +653 -0
- data/lib/bricolage/postgresconnection.rb +78 -0
- data/lib/bricolage/psqldatasource.rb +449 -0
- data/lib/bricolage/resource.rb +68 -0
- data/lib/bricolage/rubyjobclass.rb +42 -0
- data/lib/bricolage/s3datasource.rb +144 -0
- data/lib/bricolage/script.rb +120 -0
- data/lib/bricolage/sqlstatement.rb +351 -0
- data/lib/bricolage/taskqueue.rb +156 -0
- data/lib/bricolage/tddatasource.rb +116 -0
- data/lib/bricolage/variables.rb +208 -0
- data/lib/bricolage/version.rb +4 -0
- data/lib/bricolage.rb +8 -0
- data/libexec/sqldump +9 -0
- data/libexec/sqldump.Darwin +0 -0
- data/libexec/sqldump.Linux +0 -0
- data/test/all.rb +3 -0
- data/test/home/config/development/database.yml +57 -0
- data/test/home/config/development/password.yml +2 -0
- data/test/home/subsys/separated.job +1 -0
- data/test/home/subsys/separated.sql +1 -0
- data/test/home/subsys/unified.jobnet +1 -0
- data/test/home/subsys/unified.sql.job +5 -0
- data/test/test_filesystem.rb +19 -0
- data/test/test_parameters.rb +401 -0
- data/test/test_variables.rb +114 -0
- metadata +192 -0
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
require 'bricolage/datasource'
|
|
2
|
+
require 'bricolage/s3datasource'
|
|
3
|
+
require 'bricolage/sqlstatement'
|
|
4
|
+
require 'bricolage/commandutils'
|
|
5
|
+
require 'bricolage/postgresconnection'
|
|
6
|
+
require 'bricolage/exception'
|
|
7
|
+
|
|
8
|
+
module Bricolage
|
|
9
|
+
|
|
10
|
+
class PSQLDataSource < DataSource
|
|
11
|
+
declare_type 'psql'
|
|
12
|
+
|
|
13
|
+
include CommandUtils
|
|
14
|
+
|
|
15
|
+
def initialize(
|
|
16
|
+
host: 'localhost',
|
|
17
|
+
port: 5439,
|
|
18
|
+
database: 'dev',
|
|
19
|
+
username: ENV['LOGNAME'],
|
|
20
|
+
password: nil,
|
|
21
|
+
pgpass: nil,
|
|
22
|
+
encoding: nil,
|
|
23
|
+
psql: 'psql',
|
|
24
|
+
tmpdir: Dir.tmpdir)
|
|
25
|
+
@host = host
|
|
26
|
+
@port = port
|
|
27
|
+
@database = database
|
|
28
|
+
@user = username
|
|
29
|
+
@password = password
|
|
30
|
+
@pgpass = pgpass
|
|
31
|
+
@encoding = encoding
|
|
32
|
+
@psql = psql
|
|
33
|
+
@tmpdir = tmpdir
|
|
34
|
+
raise ParameterError, "missing psql host" unless @host
|
|
35
|
+
raise ParameterError, "missing psql port" unless @port
|
|
36
|
+
raise ParameterError, "missing psql database" unless @database
|
|
37
|
+
raise ParameterError, "missing psql username" unless @user
|
|
38
|
+
unless @pgpass or @password
|
|
39
|
+
raise ParameterError, "missing psql password"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
attr_reader :host
|
|
44
|
+
attr_reader :port
|
|
45
|
+
attr_reader :database
|
|
46
|
+
attr_reader :user
|
|
47
|
+
|
|
48
|
+
def new_task
|
|
49
|
+
PSQLTask.new(self)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def open_for_batch
|
|
53
|
+
# do not call #open
|
|
54
|
+
yield
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def execute(source)
|
|
58
|
+
make_tmpfile(source, tmpdir: @tmpdir) {|path|
|
|
59
|
+
st = command @psql, "--no-psqlrc", "--host=#{@host}", "--port=#{@port}",
|
|
60
|
+
"--username=#{@user}", @database,
|
|
61
|
+
'--echo-all',
|
|
62
|
+
'-v', 'ON_ERROR_STOP=true',
|
|
63
|
+
'-f', path,
|
|
64
|
+
'--no-password',
|
|
65
|
+
env: get_psql_env
|
|
66
|
+
msg = retrieve_last_match_from_stderr(/^psql:.*?:\d+: ERROR: (.*)/, 1) unless st.success?
|
|
67
|
+
JobResult.for_process_status(st, msg)
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def get_psql_env
|
|
72
|
+
env = {}
|
|
73
|
+
if @pgpass
|
|
74
|
+
env["PGPASSFILE"] = @pgpass
|
|
75
|
+
elsif @password
|
|
76
|
+
env["PGPASSWORD"] = @password
|
|
77
|
+
end
|
|
78
|
+
env
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
#
|
|
82
|
+
# Ruby Library Interface
|
|
83
|
+
#
|
|
84
|
+
|
|
85
|
+
def password
|
|
86
|
+
# FIXME: same user must not exist
|
|
87
|
+
@password ||= read_password_from_pgpass(@pgpass, @user)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def read_password_from_pgpass(path, user)
|
|
91
|
+
File.read(path).slice(/:#{user}:([^:\r\n]+)$/, 1) or
|
|
92
|
+
raise ParameterError, "could not read password: #{path}, #{user}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def open(&block)
|
|
96
|
+
conn = PG::Connection.open(host: @host, port: @port, dbname: @database, user: @user, password: password)
|
|
97
|
+
yield PostgresConnection.new(conn, self, logger)
|
|
98
|
+
ensure
|
|
99
|
+
conn.close if conn
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def query(query)
|
|
103
|
+
open {|conn| conn.query(query) }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def drop_table(name)
|
|
107
|
+
open {|conn| conn.drop_table(name) }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def drop_table_force(name)
|
|
111
|
+
open {|conn| conn.drop_table_force(name) }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def select(table, &block)
|
|
115
|
+
open {|conn| conn.select(table, &block) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def vacuum(table)
|
|
119
|
+
open {|conn| conn.vacuum(table) }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def vacuum_sort_only(table)
|
|
123
|
+
open {|conn| conn.vacuum_sort_only(table) }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def analyze(table)
|
|
127
|
+
open {|conn| conn.analyze(table) }
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
class DataSource # reopen
|
|
132
|
+
def redshift_loader_source?
|
|
133
|
+
false
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
class S3DataSource # reopen
|
|
138
|
+
def redshift_loader_source?
|
|
139
|
+
true
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# We don't support dynamodb still now
|
|
144
|
+
|
|
145
|
+
class PSQLTask < DataSourceTask
|
|
146
|
+
def exec(stmt)
|
|
147
|
+
add SQLAction.new(stmt)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def each_statement
|
|
151
|
+
each_action do |action|
|
|
152
|
+
yield action.statement
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
class SQLAction < Action
|
|
157
|
+
def initialize(stmt)
|
|
158
|
+
@statement = stmt
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
attr_reader :statement
|
|
162
|
+
|
|
163
|
+
def_delegator '@statement', :bind
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# override
|
|
167
|
+
def source
|
|
168
|
+
buf = StringIO.new
|
|
169
|
+
buf.puts '\timing on'
|
|
170
|
+
each_statement do |stmt|
|
|
171
|
+
buf.puts
|
|
172
|
+
buf.puts "-- #{stmt.location}" if stmt.location
|
|
173
|
+
buf.puts stmt.stripped_source
|
|
174
|
+
end
|
|
175
|
+
buf.string
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# override
|
|
179
|
+
def run
|
|
180
|
+
@ds.execute source
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def run_explain
|
|
184
|
+
@ds.execute explain_source
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def explain_source
|
|
188
|
+
buf = StringIO.new
|
|
189
|
+
each_statement do |stmt|
|
|
190
|
+
buf.puts
|
|
191
|
+
buf.puts "-- #{stmt.location}" if stmt.location
|
|
192
|
+
if support_explain?(stmt.kind)
|
|
193
|
+
buf.puts "explain #{stmt.stripped_source}"
|
|
194
|
+
else
|
|
195
|
+
buf.puts "/* #{stmt.stripped_source} */"
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
buf.string
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def support_explain?(statement_kind)
|
|
202
|
+
case statement_kind
|
|
203
|
+
when 'select', 'insert', 'update', 'delete' then true
|
|
204
|
+
else false
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def create_dummy_table(target)
|
|
209
|
+
exec SQLStatement.for_string(
|
|
210
|
+
"\\set ON_ERROR_STOP false\n" +
|
|
211
|
+
"create table #{target} (x int);\n" +
|
|
212
|
+
"\\set ON_ERROR_STOP true\n"
|
|
213
|
+
)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def drop(target_table)
|
|
217
|
+
exec SQLStatement.for_string("drop table #{target_table} cascade;")
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def drop_if(enabled)
|
|
221
|
+
drop '${dest_table}' if enabled
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def drop_force(target_table)
|
|
225
|
+
exec SQLStatement.for_string(
|
|
226
|
+
"\\set ON_ERROR_STOP false\n" +
|
|
227
|
+
"drop table #{target_table} cascade;\n" +
|
|
228
|
+
"\\set ON_ERROR_STOP true\n"
|
|
229
|
+
)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def drop_force_if(enabled)
|
|
233
|
+
drop_force('${dest_table}') if enabled
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def truncate_if(enabled, target = '${dest_table}')
|
|
237
|
+
exec SQLStatement.for_string("truncate #{target};") if enabled
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def rename_table(src, dest)
|
|
241
|
+
exec SQLStatement.for_string("alter table #{src} rename to #{dest};")
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def vacuum_if(enable_vacuum, enable_vacuum_sort, target = '${dest_table}')
|
|
245
|
+
if enable_vacuum
|
|
246
|
+
exec SQLStatement.for_string("vacuum #{target};")
|
|
247
|
+
elsif enable_vacuum_sort
|
|
248
|
+
exec SQLStatement.for_string("vacuum sort only #{target};")
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def analyze_if(enabled, target = '${dest_table}')
|
|
253
|
+
exec SQLStatement.for_string("analyze #{target};") if enabled
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def grant(privilege:, on:, to:)
|
|
257
|
+
exec SQLStatement.for_string("grant #{privilege} on #{on} to #{to};")
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
GRANT_OPTS = %w[privilege to]
|
|
261
|
+
|
|
262
|
+
def grant_if(opts, target)
|
|
263
|
+
return unless opts
|
|
264
|
+
return if opts.empty?
|
|
265
|
+
unknown_keys = opts.keys - GRANT_OPTS
|
|
266
|
+
raise ParameterError, "unknown grant options: #{unknown_keys.inspect}" unless unknown_keys.empty?
|
|
267
|
+
missing_keys = GRANT_OPTS - opts.keys
|
|
268
|
+
raise ParameterError, %Q(missing grant options: #{missing_keys.inspect}) unless missing_keys.empty?
|
|
269
|
+
args = {on: target}
|
|
270
|
+
opts.each do |k, v|
|
|
271
|
+
args[k.intern] = v
|
|
272
|
+
end
|
|
273
|
+
grant(**args)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def transaction
|
|
277
|
+
exec SQLStatement.for_string('begin transaction;')
|
|
278
|
+
yield
|
|
279
|
+
exec SQLStatement.for_string('commit;')
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def load(src_ds, src_path, dest_table, format, jsonpath, opts)
|
|
283
|
+
exec SQLStatement.for_string(copy_statement(src_ds, src_path, dest_table, format, jsonpath, opts))
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def copy_statement(src_ds, src_path, dest_table, format, jsonpath, opts)
|
|
287
|
+
unless src_ds.redshift_loader_source?
|
|
288
|
+
raise ParameterError, "input data source does not support redshift as bulk loading source: #{src_ds.name}"
|
|
289
|
+
end
|
|
290
|
+
buf = StringIO.new
|
|
291
|
+
buf.puts "copy #{dest_table}"
|
|
292
|
+
buf.puts "from '#{src_ds.url(src_path)}'"
|
|
293
|
+
buf.puts "credentials '#{src_ds.credential_string}'"
|
|
294
|
+
buf.puts format_option(format, src_ds, jsonpath)
|
|
295
|
+
opts.each do |opt|
|
|
296
|
+
buf.puts opt.to_s
|
|
297
|
+
end
|
|
298
|
+
buf.puts ';'
|
|
299
|
+
buf.string
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def format_option(fmt, src_ds, jsonpath)
|
|
303
|
+
case fmt
|
|
304
|
+
when 'tsv'
|
|
305
|
+
%q(delimiter '\t')
|
|
306
|
+
when 'json'
|
|
307
|
+
jsonpath ? "json \'#{src_ds.url(jsonpath)}\'" : %q(json 'auto')
|
|
308
|
+
else
|
|
309
|
+
raise ParameterError, "unsupported format: #{fmt}"
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def unload(stmt, dest_ds, dest_path, format, opts)
|
|
314
|
+
exec unload_statement(stmt, dest_ds, dest_path, format, opts)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def unload_statement(stmt, dest_ds, dest_path, format, opts)
|
|
318
|
+
buf = StringIO.new
|
|
319
|
+
buf.puts "unload ('#{format_query(stmt.stripped_raw_content)}')"
|
|
320
|
+
buf.puts "to '#{dest_ds.url(dest_path)}'"
|
|
321
|
+
buf.puts "credentials '#{dest_ds.credential_string}'"
|
|
322
|
+
buf.puts unload_format_option(format, dest_ds)
|
|
323
|
+
opts.each do |opt|
|
|
324
|
+
buf.puts opt.to_s
|
|
325
|
+
end
|
|
326
|
+
buf.puts ';'
|
|
327
|
+
res = StringResource.new(buf.string, stmt.location)
|
|
328
|
+
SQLStatement.new(res, stmt.declarations)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def unload_format_option(format, ds)
|
|
332
|
+
case format
|
|
333
|
+
when 'tsv'
|
|
334
|
+
%q(delimiter '\t')
|
|
335
|
+
else
|
|
336
|
+
raise ParameterError, "unsupported format: #{fmt}"
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def format_query(query)
|
|
341
|
+
query.gsub(/^--.*/, '').strip.gsub(/[ \t]*\n[ \t]*/, ' ').gsub("'", "\\\\'")
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
class PSQLLoadOptions
|
|
346
|
+
class << self
|
|
347
|
+
def parse(opts)
|
|
348
|
+
case opts
|
|
349
|
+
when Hash then filter_values(opts)
|
|
350
|
+
when String then parse_string(opts) # FIXME: remove
|
|
351
|
+
else raise ParameterError, "unsupported value type for load options: #{opts.class}"
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
private
|
|
356
|
+
|
|
357
|
+
def filter_values(opts)
|
|
358
|
+
list = []
|
|
359
|
+
opts.each do |key, value|
|
|
360
|
+
case value
|
|
361
|
+
when String, Integer, true, false, nil
|
|
362
|
+
list.push Option.new(key.to_s, value)
|
|
363
|
+
else
|
|
364
|
+
raise ParameterError, ""
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
new(list)
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def parse_string(str)
|
|
371
|
+
return new unless str
|
|
372
|
+
list = []
|
|
373
|
+
str.split(',').each do |opt_pair|
|
|
374
|
+
opt, value = opt_pair.strip.split('=', 2)
|
|
375
|
+
list.push Option.new(opt, parse_value(value, opt))
|
|
376
|
+
end
|
|
377
|
+
new(list)
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
def parse_value(value, opt)
|
|
381
|
+
case value
|
|
382
|
+
when nil, 'true', true then true
|
|
383
|
+
when 'false', false then false
|
|
384
|
+
when /\A\d+\z/ then value.to_i
|
|
385
|
+
when String then value
|
|
386
|
+
else
|
|
387
|
+
raise ParameterError, "unsupported load option value for #{opt}: #{value.inspect}"
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def initialize(opts = [])
|
|
393
|
+
@opts = opts
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
def each(&block)
|
|
397
|
+
@opts.each(&block)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def merge(pairs)
|
|
401
|
+
h = {}
|
|
402
|
+
@opts.each do |opt|
|
|
403
|
+
h[opt.name] = opt
|
|
404
|
+
end
|
|
405
|
+
pairs.each do |key, value|
|
|
406
|
+
h[key] = Option.new(key, value)
|
|
407
|
+
end
|
|
408
|
+
self.class.new(h.values)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def to_s
|
|
412
|
+
buf = StringIO.new
|
|
413
|
+
each do |opt|
|
|
414
|
+
buf.puts opt
|
|
415
|
+
end
|
|
416
|
+
buf.string
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
class Option
|
|
420
|
+
def initialize(name, value)
|
|
421
|
+
@name = name
|
|
422
|
+
@value = value
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
attr_reader :name
|
|
426
|
+
attr_reader :value
|
|
427
|
+
|
|
428
|
+
# Make polymorphic?
|
|
429
|
+
def to_s
|
|
430
|
+
case @value
|
|
431
|
+
when true # acceptanydate
|
|
432
|
+
@name
|
|
433
|
+
when false # compupdate false
|
|
434
|
+
"#{@name} false"
|
|
435
|
+
when 'on', 'off'
|
|
436
|
+
"#{@name} #{@value}"
|
|
437
|
+
when String # json 'auto'
|
|
438
|
+
"#{@name} '#{@value}'"
|
|
439
|
+
when Integer # maxerror 10
|
|
440
|
+
"#{@name} #{@value}"
|
|
441
|
+
else
|
|
442
|
+
raise ParameterError, "unsupported type of option value for #{@name}: #{@value.inspect}"
|
|
443
|
+
end
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
require 'bricolage/exception'
|
|
2
|
+
|
|
3
|
+
module Bricolage
|
|
4
|
+
|
|
5
|
+
class Resource
|
|
6
|
+
def each_line(&block)
|
|
7
|
+
content.each_line(&block)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class FileResource < Resource
|
|
12
|
+
def initialize(path, name = nil)
|
|
13
|
+
@path = path
|
|
14
|
+
@name = name
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
attr_reader :path
|
|
18
|
+
|
|
19
|
+
def name
|
|
20
|
+
@name || @path
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def content
|
|
24
|
+
@content ||= File.read(@path)
|
|
25
|
+
rescue SystemCallError => err
|
|
26
|
+
raise ParameterError, "could not open a file: #{@path}: #{err.message}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def inspect
|
|
30
|
+
"\#<#{self.class} #{@path}>"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def ==(other)
|
|
34
|
+
return false unless other.kind_of?(FileResource)
|
|
35
|
+
@path == other.path
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
alias eql? ==
|
|
39
|
+
|
|
40
|
+
def hash
|
|
41
|
+
@path.hash
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
class StringResource < Resource
|
|
46
|
+
def initialize(content, name = '(string)')
|
|
47
|
+
@content = content
|
|
48
|
+
@name = name
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
attr_reader :content
|
|
52
|
+
attr_reader :name
|
|
53
|
+
|
|
54
|
+
def inspect
|
|
55
|
+
"\#<#{self.class} #{(@content.size > 20 ? @content[0, 20] : @content).inspect}>"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def ==(other)
|
|
59
|
+
return false unless other.kind_of?(StringResource)
|
|
60
|
+
@name == other.name and @content == other.content
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def hash
|
|
64
|
+
@name.hash ^ @content.hash
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require 'bricolage/jobclass'
|
|
2
|
+
|
|
3
|
+
module Bricolage
|
|
4
|
+
class RubyJobClass
|
|
5
|
+
def RubyJobClass.job_class_id(id)
|
|
6
|
+
wrapper = self
|
|
7
|
+
JobClass.define(id) {
|
|
8
|
+
job_class = self
|
|
9
|
+
wrapper.define_job_class(job_class)
|
|
10
|
+
}
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def RubyJobClass.define_job_class(job_class)
|
|
14
|
+
job_class.parameters {|params| parameters(params) }
|
|
15
|
+
job_class.declarations {|params| declarations(params) }
|
|
16
|
+
job_class.script {|params, script| script(params, script) }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def RubyJobClass.parameters(params)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def RubyJobClass.declarations(params)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def RubyJobClass.script(params, script)
|
|
26
|
+
ruby_job = new(params)
|
|
27
|
+
script.task(params.generic_ds) {|task|
|
|
28
|
+
task.action('ruby job') {
|
|
29
|
+
ruby_job.run
|
|
30
|
+
nil # job result
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def initialize(params, *args)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def run
|
|
39
|
+
raise "bricolage: error: #{self.class}\#run is not overridden"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
require 'bricolage/datasource'
|
|
2
|
+
require 'bricolage/commandutils'
|
|
3
|
+
require 'aws-sdk'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
|
|
6
|
+
module Bricolage
|
|
7
|
+
|
|
8
|
+
class S3DataSource < DataSource
|
|
9
|
+
declare_type 's3'
|
|
10
|
+
|
|
11
|
+
def initialize(endpoint: 's3-ap-northeast-1.amazonaws.com',
|
|
12
|
+
bucket: nil, prefix: nil,
|
|
13
|
+
access_key_id: nil, secret_access_key: nil,
|
|
14
|
+
s3cfg: nil)
|
|
15
|
+
@endpoint = endpoint
|
|
16
|
+
@bucket = bucket
|
|
17
|
+
@prefix = (prefix && prefix.empty?) ? nil : prefix
|
|
18
|
+
@access_key_id = access_key_id
|
|
19
|
+
@secret_access_key = secret_access_key
|
|
20
|
+
@s3cfg = s3cfg
|
|
21
|
+
@configurations = @s3cfg ? load_configurations(@s3cfg) : nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
attr_reader :endpoint
|
|
25
|
+
attr_reader :bucket
|
|
26
|
+
attr_reader :prefix
|
|
27
|
+
|
|
28
|
+
def new_task
|
|
29
|
+
S3Task.new(self)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# For Redshift
|
|
33
|
+
def credential_string
|
|
34
|
+
"aws_access_key_id=#{access_key};aws_secret_access_key=#{secret_key}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def access_key
|
|
38
|
+
@access_key_id || get_config('access_key')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def secret_key
|
|
42
|
+
@secret_access_key || get_config('secret_key')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def get_config(key)
|
|
46
|
+
@configurations[key] or raise ParameterError, "missing s3cfg entry: #{key}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def load_configurations(path)
|
|
50
|
+
h = {}
|
|
51
|
+
File.foreach(path) do |line|
|
|
52
|
+
case line
|
|
53
|
+
when /\A\s*\w+\s*=\s*/
|
|
54
|
+
key, value = line.split('=', 2)
|
|
55
|
+
val = value.strip
|
|
56
|
+
h[key.strip] = val.empty? ? nil : val
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
h
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
#
|
|
63
|
+
# Ruby Interface
|
|
64
|
+
#
|
|
65
|
+
|
|
66
|
+
def client
|
|
67
|
+
@client ||= AWS::S3.new(s3_endpoint: endpoint, access_key_id: access_key, secret_access_key: secret_key)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def objects
|
|
71
|
+
client.buckets[bucket].objects
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def objects_with_prefix(rel, no_prefix: false)
|
|
75
|
+
objects.with_prefix(path(rel, no_prefix: no_prefix))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def object(rel, no_prefix: false)
|
|
79
|
+
objects[path(rel, no_prefix: no_prefix)]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def url(rel, no_prefix: false)
|
|
83
|
+
"s3://#{@bucket}/#{path(rel, no_prefix: no_prefix)}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def path(rel, no_prefix: false)
|
|
87
|
+
path = (no_prefix || !prefix) ? rel.to_s : "#{@prefix}/#{rel}"
|
|
88
|
+
path.sub(%r<\A/>, '').gsub(%r<//>, '/')
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
class S3Task < DataSourceTask
|
|
93
|
+
def put(src, dest, check_args: true)
|
|
94
|
+
add Put.new(src, dest).tap {|action| action.check_arguments if check_args }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
class Put < Action
|
|
98
|
+
def initialize(src, dest)
|
|
99
|
+
@src = src
|
|
100
|
+
@dest = dest
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def source_files
|
|
104
|
+
@source_files ||= Dir.glob(@src)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def single_source?
|
|
108
|
+
source_files.size == 1 and source_files.first == @src
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def each_src_dest
|
|
112
|
+
source_files.each do |src|
|
|
113
|
+
dest = (@dest.to_s.end_with?('/') ? "#{@dest}/#{File.basename(src)}" : @dest)
|
|
114
|
+
yield src, dest
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def command_line(src, dest)
|
|
119
|
+
"aws s3 cp #{src} #{ds.url(dest)}"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def check_arguments
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def source
|
|
126
|
+
buf = StringIO.new
|
|
127
|
+
each_src_dest do |src, dest|
|
|
128
|
+
buf.puts command_line(src, dest)
|
|
129
|
+
end
|
|
130
|
+
buf.string
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def run
|
|
134
|
+
raise JobFailure, "no such file: #{@src}" if source_files.empty?
|
|
135
|
+
each_src_dest do |src, dest|
|
|
136
|
+
ds.logger.info command_line(src, dest)
|
|
137
|
+
ds.object(dest).write(file: src)
|
|
138
|
+
end
|
|
139
|
+
nil
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
end
|