bricolage 5.8.7 → 5.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -2
- data/jobclass/streaming_load.rb +451 -0
- data/lib/bricolage/version.rb +1 -1
- data/test/home/Gemfile +2 -0
- data/test/home/Gemfile.lock +54 -0
- data/test/home/config/development/database.yml +5 -2
- data/test/home/data/20141002-1355_00.txt +2 -0
- data/test/home/data/20141002-1355_01.txt +2 -0
- data/test/home/data/20141002-1355_02.txt +2 -0
- data/test/home/put.sh +12 -0
- data/test/home/revert.sh +12 -0
- data/test/home/subsys/load_test.ct +6 -0
- data/test/home/subsys/load_test.job +12 -0
- metadata +11 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d8fe450bc754d0074c361ebdec9b3f0119cf440
|
4
|
+
data.tar.gz: a12ba4bf06eb22f598e4d8e7df8b1028c84e0c52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 801a0ba6a167068141d446e2d8e598ef6bcafcfec591c9011dcedf8aa126afdfb2dd6b830dfc9786f530e2608a48d5c69faee6b510c1b0e833feca6ae661df6a
|
7
|
+
data.tar.gz: cbb2b228f85c74f5b1a4f83a9d55eddcccfbcf9d9ea91ab5b2e4564614a11ecb512222448bf2e71eebbb17237eb792368e780f5ca3104f63e3278543cf8bce28
|
data/README.md
CHANGED
@@ -1,4 +1,19 @@
|
|
1
|
-
|
1
|
+
Bricolage
|
2
2
|
=========
|
3
3
|
|
4
|
-
SQL batch framework
|
4
|
+
SQL batch framework for Redshift.
|
5
|
+
|
6
|
+
This software is written in working time in Cookpad, Inc.
|
7
|
+
|
8
|
+
|
9
|
+
License
|
10
|
+
-------
|
11
|
+
|
12
|
+
MIT license.
|
13
|
+
See LICENSES file for details.
|
14
|
+
|
15
|
+
|
16
|
+
Author
|
17
|
+
------
|
18
|
+
|
19
|
+
Minero Aoki
|
@@ -0,0 +1,451 @@
|
|
1
|
+
require 'bricolage/rubyjobclass'
|
2
|
+
require 'bricolage/psqldatasource'
|
3
|
+
require 'bricolage/exception'
|
4
|
+
require 'json'
|
5
|
+
require 'socket'
|
6
|
+
|
7
|
+
class StreamingLoadJobClass < RubyJobClass
|
8
|
+
job_class_id 'streaming_load'
|
9
|
+
|
10
|
+
def StreamingLoadJobClass.parameters(params)
|
11
|
+
super
|
12
|
+
params.add Bricolage::DataSourceParam.new('sql', 'redshift-ds', 'Redshift data source.')
|
13
|
+
params.add Bricolage::DestTableParam.new(optional: false)
|
14
|
+
params.add Bricolage::DestTableParam.new('work-table', optional: true)
|
15
|
+
params.add Bricolage::DestTableParam.new('log-table', optional: true)
|
16
|
+
params.add Bricolage::KeyValuePairsParam.new('load-options', 'OPTIONS', 'Loader options.',
|
17
|
+
optional: true, default: Bricolage::PSQLLoadOptions.new,
|
18
|
+
value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
|
19
|
+
params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
|
20
|
+
params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
|
21
|
+
params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
|
22
|
+
params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
|
23
|
+
params.add Bricolage::OptionalBoolParam.new('noop', 'Does not change any data.')
|
24
|
+
params.add Bricolage::OptionalBoolParam.new('load-only', 'Just issues COPY statement to work table and quit. No INSERT, no dequeue, no load log.')
|
25
|
+
params.add Bricolage::OptionalBoolParam.new('dequeue-only', 'Dequeues already loaded files.')
|
26
|
+
end
|
27
|
+
|
28
|
+
def StreamingLoadJobClass.declarations(params)
|
29
|
+
Bricolage::Declarations.new(
|
30
|
+
'dest_table' => nil,
|
31
|
+
'work_table' => nil,
|
32
|
+
'log_table' => nil
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize(params)
|
37
|
+
@loader = make_loader(params)
|
38
|
+
@load_only = params['load-only']
|
39
|
+
@dequeue_only = params['dequeue-only']
|
40
|
+
end
|
41
|
+
|
42
|
+
def run
|
43
|
+
@loader.dequeue_loaded_files unless @load_only
|
44
|
+
return nil if @dequeue_only
|
45
|
+
@loader.load
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def make_loader(params)
|
50
|
+
ds = params['redshift-ds']
|
51
|
+
RedshiftStreamingLoader.new(
|
52
|
+
data_source: ds,
|
53
|
+
queue: make_s3_queue(params),
|
54
|
+
table: string(params['dest-table']),
|
55
|
+
work_table: string(params['work-table']),
|
56
|
+
log_table: string(params['log-table']),
|
57
|
+
load_options: params['load-options'],
|
58
|
+
logger: ds.logger,
|
59
|
+
noop: params['noop'],
|
60
|
+
load_only: params['load-only']
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
def make_s3_queue(params)
|
65
|
+
ds = params['s3-ds']
|
66
|
+
S3Queue.new(
|
67
|
+
data_source: ds,
|
68
|
+
queue_path: params['queue-path'],
|
69
|
+
persistent_path: params['persistent-path'],
|
70
|
+
file_name: params['file-name'],
|
71
|
+
logger: ds.logger
|
72
|
+
)
|
73
|
+
end
|
74
|
+
|
75
|
+
def string(obj)
|
76
|
+
obj ? obj.to_s : nil
|
77
|
+
end
|
78
|
+
|
79
|
+
class RedshiftStreamingLoader
|
80
|
+
def initialize(data_source:, queue:,
|
81
|
+
table:, work_table: nil, log_table: nil, load_options: nil,
|
82
|
+
logger:, noop: false, load_only: false)
|
83
|
+
@ds = data_source
|
84
|
+
@src = queue
|
85
|
+
@table = table
|
86
|
+
@work_table = work_table
|
87
|
+
@log_table = log_table
|
88
|
+
@load_options = load_options
|
89
|
+
@logger = logger
|
90
|
+
@noop = noop
|
91
|
+
@load_only = load_only
|
92
|
+
|
93
|
+
@start_time = Time.now
|
94
|
+
@end_time = nil
|
95
|
+
@job_process_id = "#{@start_time.strftime('%Y%m%d-%H%M%S')}.#{Socket.gethostname}.#{Process.pid}"
|
96
|
+
end
|
97
|
+
|
98
|
+
def load
|
99
|
+
load_in_parallel
|
100
|
+
end
|
101
|
+
|
102
|
+
def log_basic_info
|
103
|
+
@logger.info "start_time: #{@start_time}"
|
104
|
+
@logger.info "job_process_id: #{@job_process_id}"
|
105
|
+
@logger.info "queue: #{@src.queue_url}"
|
106
|
+
end
|
107
|
+
|
108
|
+
def load_in_parallel
|
109
|
+
log_basic_info
|
110
|
+
@logger.info 'load with manifest'
|
111
|
+
objects = @src.queued_objects
|
112
|
+
if objects.empty?
|
113
|
+
@logger.info 'no target data files; exit'
|
114
|
+
return
|
115
|
+
end
|
116
|
+
create_manifest_file(objects) {|manifest_url|
|
117
|
+
@ds.open {|conn|
|
118
|
+
init_work_table conn
|
119
|
+
execute_update conn, copy_manifest_statement(manifest_url, @src.credential_string)
|
120
|
+
@logger.info "load succeeded: #{manifest_url}" unless @noop
|
121
|
+
commit conn, objects
|
122
|
+
}
|
123
|
+
dequeue_all objects
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
def load_in_sequential
|
128
|
+
log_basic_info
|
129
|
+
@logger.info 'load each objects sequentially'
|
130
|
+
objects = @src.queued_objects
|
131
|
+
@ds.open {|conn|
|
132
|
+
init_work_table(conn)
|
133
|
+
objects.each do |obj|
|
134
|
+
@logger.info "load: #{obj.url}"
|
135
|
+
execute_update conn, copy_file_statement(obj)
|
136
|
+
@logger.info "load succeeded: #{obj.url}" unless @noop
|
137
|
+
end
|
138
|
+
commit conn, objects
|
139
|
+
}
|
140
|
+
dequeue_all objects
|
141
|
+
end
|
142
|
+
|
143
|
+
def commit(conn, objects)
|
144
|
+
@end_time = Time.now
|
145
|
+
return if @load_only
|
146
|
+
transaction(conn) {
|
147
|
+
commit_work_table conn
|
148
|
+
write_load_logs conn, objects
|
149
|
+
}
|
150
|
+
end
|
151
|
+
|
152
|
+
def dequeue_loaded_files
|
153
|
+
@logger.info "dequeue start"
|
154
|
+
objects = @src.queued_objects
|
155
|
+
@ds.open {|conn|
|
156
|
+
objects.each do |obj|
|
157
|
+
if loaded_object?(conn, obj)
|
158
|
+
obj.dequeue(@noop)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
}
|
162
|
+
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
def init_work_table(conn)
|
167
|
+
return unless @work_table
|
168
|
+
execute_update conn, "truncate #{@work_table};"
|
169
|
+
end
|
170
|
+
|
171
|
+
def commit_work_table(conn)
|
172
|
+
return unless @work_table
|
173
|
+
execute_update conn, "insert into #{@table} select * from #{@work_table};"
|
174
|
+
# keep work table records for tracing
|
175
|
+
end
|
176
|
+
|
177
|
+
def copy_file_statement(obj)
|
178
|
+
%Q(
|
179
|
+
copy #{load_target_table} from '#{obj.url}'
|
180
|
+
credentials '#{obj.credential_string}'
|
181
|
+
#{@load_options}
|
182
|
+
;).gsub(/\s+/, ' ').strip
|
183
|
+
end
|
184
|
+
|
185
|
+
def create_manifest_file(objects)
|
186
|
+
manifest_name = "manifest-#{@job_process_id}.json"
|
187
|
+
@logger.info "creating manifest: #{manifest_name}"
|
188
|
+
@logger.info "manifest:\n" + make_manifest_json(objects)
|
189
|
+
url = @src.put_control_file(manifest_name, make_manifest_json(objects), noop: @noop)
|
190
|
+
yield url
|
191
|
+
@src.remove_control_file(File.basename(url), noop: @noop)
|
192
|
+
end
|
193
|
+
|
194
|
+
def make_manifest_json(objects)
|
195
|
+
ents = objects.map {|obj|
|
196
|
+
{ "url" => obj.url, "mandatory" => true }
|
197
|
+
}
|
198
|
+
JSON.pretty_generate({ "entries" => ents })
|
199
|
+
end
|
200
|
+
|
201
|
+
def copy_manifest_statement(manifest_url, credential_string)
|
202
|
+
%Q(
|
203
|
+
copy #{load_target_table}
|
204
|
+
from '#{manifest_url}'
|
205
|
+
credentials '#{credential_string}'
|
206
|
+
manifest
|
207
|
+
#{@load_options}
|
208
|
+
;).gsub(/\s+/, ' ').strip
|
209
|
+
end
|
210
|
+
|
211
|
+
def load_target_table
|
212
|
+
@work_table || @table
|
213
|
+
end
|
214
|
+
|
215
|
+
def write_load_logs(conn, objects)
|
216
|
+
return unless @log_table
|
217
|
+
make_load_logs(objects).each do |record|
|
218
|
+
write_load_log conn, record
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def make_load_logs(objects)
|
223
|
+
objects.map {|obj| make_load_log(obj) }
|
224
|
+
end
|
225
|
+
|
226
|
+
def make_load_log(obj)
|
227
|
+
LoadLogRecord.new(@job_process_id, @start_time, @end_time, @table, obj.url)
|
228
|
+
end
|
229
|
+
|
230
|
+
LoadLogRecord = Struct.new(:job_process_id, :start_time, :end_time, :target_table, :data_file)
|
231
|
+
|
232
|
+
def write_load_log(conn, record)
|
233
|
+
return unless @log_table
|
234
|
+
execute_update(conn, <<-EndSQL.gsub(/^\s+/, '').strip)
|
235
|
+
insert into #{@log_table}
|
236
|
+
( job_process_id
|
237
|
+
, start_time
|
238
|
+
, end_time
|
239
|
+
, target_table
|
240
|
+
, data_file
|
241
|
+
)
|
242
|
+
values
|
243
|
+
( #{sql_string record.job_process_id}
|
244
|
+
, #{sql_timestamp record.start_time}
|
245
|
+
, #{sql_timestamp record.end_time}
|
246
|
+
, #{sql_string record.target_table}
|
247
|
+
, #{sql_string record.data_file}
|
248
|
+
)
|
249
|
+
;
|
250
|
+
EndSQL
|
251
|
+
end
|
252
|
+
|
253
|
+
def loaded_object?(conn, obj)
|
254
|
+
rs = conn.execute("select count(*) as c from #{@log_table} where data_file = #{sql_string obj.url}")
|
255
|
+
rs.first['c'].to_i > 0
|
256
|
+
end
|
257
|
+
|
258
|
+
def sql_timestamp(time)
|
259
|
+
%Q(timestamp '#{time.strftime('%Y-%m-%d %H:%M:%S')}')
|
260
|
+
end
|
261
|
+
|
262
|
+
def sql_string(str)
|
263
|
+
escaped = str.gsub("'", "''")
|
264
|
+
%Q('#{escaped}')
|
265
|
+
end
|
266
|
+
|
267
|
+
def transaction(conn)
|
268
|
+
execute_update conn, 'begin transaction'
|
269
|
+
yield
|
270
|
+
execute_update conn, 'commit'
|
271
|
+
end
|
272
|
+
|
273
|
+
def execute_update(conn, sql)
|
274
|
+
if @noop
|
275
|
+
log_query(sql)
|
276
|
+
else
|
277
|
+
conn.execute(sql)
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def log_query(sql)
|
282
|
+
@logger.info "[#{@ds.name}] #{mask_secrets(sql)}"
|
283
|
+
end
|
284
|
+
|
285
|
+
def mask_secrets(log)
|
286
|
+
log.gsub(/\bcredentials\s+'.*?'/mi, "credentials '****'")
|
287
|
+
end
|
288
|
+
|
289
|
+
def dequeue_all(objects)
|
290
|
+
return if @load_only
|
291
|
+
objects.each do |obj|
|
292
|
+
obj.dequeue(@noop)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
class S3Queue
|
298
|
+
def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
|
299
|
+
@ds = data_source
|
300
|
+
@queue_path = queue_path
|
301
|
+
@persistent_path = persistent_path
|
302
|
+
@file_name = file_name
|
303
|
+
@logger = logger
|
304
|
+
end
|
305
|
+
|
306
|
+
def credential_string
|
307
|
+
@ds.credential_string
|
308
|
+
end
|
309
|
+
|
310
|
+
attr_reader :queue_path
|
311
|
+
|
312
|
+
def queue_url
|
313
|
+
@ds.url(@queue_path)
|
314
|
+
end
|
315
|
+
|
316
|
+
def object_url(key)
|
317
|
+
@ds.url(key, no_prefix: true)
|
318
|
+
end
|
319
|
+
|
320
|
+
def control_file_url(name)
|
321
|
+
@ds.url(control_file_path(name))
|
322
|
+
end
|
323
|
+
|
324
|
+
def put_control_file(name, data, noop: false)
|
325
|
+
@logger.info "s3 put: #{control_file_url(name)}"
|
326
|
+
@ds.object(control_file_path(name)).write(data) unless noop
|
327
|
+
control_file_url(name)
|
328
|
+
end
|
329
|
+
|
330
|
+
def remove_control_file(name, noop: false)
|
331
|
+
@logger.info "s3 delete: #{control_file_url(name)}"
|
332
|
+
@ds.object(control_file_path(name)).delete unless noop
|
333
|
+
end
|
334
|
+
|
335
|
+
def control_file_path(name)
|
336
|
+
"#{queue_path}/#{name}"
|
337
|
+
end
|
338
|
+
|
339
|
+
def consume_each(noop: false, &block)
|
340
|
+
each do |obj|
|
341
|
+
yield obj and obj.save(noop: noop)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
def each(&block)
|
346
|
+
queued_objects.each(&block)
|
347
|
+
end
|
348
|
+
|
349
|
+
def queue_directory
|
350
|
+
@ds.objects_with_prefix(queue_path)
|
351
|
+
end
|
352
|
+
|
353
|
+
def queued_file_nodes
|
354
|
+
queue_directory.as_tree.children.select {|node|
|
355
|
+
node.leaf? and
|
356
|
+
node.key[-1, 1] != '/' and
|
357
|
+
target_file_name?(File.basename(node.key))
|
358
|
+
}
|
359
|
+
end
|
360
|
+
|
361
|
+
def queued_objects
|
362
|
+
queued_file_nodes.map {|node| LoadableObject.new(self, node, @logger) }
|
363
|
+
end
|
364
|
+
|
365
|
+
def target_file_name?(name)
|
366
|
+
file_name_pattern =~ name
|
367
|
+
end
|
368
|
+
|
369
|
+
def persistent_path(name)
|
370
|
+
@ds.path("#{format_path(@persistent_path, name)}/#{name}")
|
371
|
+
end
|
372
|
+
|
373
|
+
def format_path(template, basename)
|
374
|
+
m = file_name_pattern.match(basename) or
|
375
|
+
raise ParameterError, "file name does not match the pattern: #{basename.inspect}"
|
376
|
+
template.gsub(/%./) {|op|
|
377
|
+
case op
|
378
|
+
when '%Y' then m[:year]
|
379
|
+
when '%m' then m[:month]
|
380
|
+
when '%d' then m[:day]
|
381
|
+
when '%H' then m[:hour]
|
382
|
+
when '%M' then m[:minute]
|
383
|
+
when '%S' then m[:second]
|
384
|
+
when '%N' then m[:nanosecond]
|
385
|
+
when '%Q' then m[:seq]
|
386
|
+
else raise ParameterError, "unknown time format in s3.file_name config: #{op}"
|
387
|
+
end
|
388
|
+
}
|
389
|
+
end
|
390
|
+
|
391
|
+
def file_name_pattern
|
392
|
+
@file_name_pattern ||= compile_name_pattern(@file_name)
|
393
|
+
end
|
394
|
+
|
395
|
+
def compile_name_pattern(template)
|
396
|
+
pattern = template.gsub(/[^%]+|%\d*./) {|op|
|
397
|
+
case op
|
398
|
+
when '%Y' then '(?<year>\\d{4})'
|
399
|
+
when '%m' then '(?<month>\\d{2})'
|
400
|
+
when '%d' then '(?<day>\\d{2})'
|
401
|
+
when '%H' then '(?<hour>\\d{2})'
|
402
|
+
when '%M' then '(?<minute>\\d{2})'
|
403
|
+
when '%S' then '(?<second>\\d{2})'
|
404
|
+
when /\A%(\d+)N\z/ then "(?<nanosecond>\\d{#{$1}})"
|
405
|
+
when '%Q' then '(?<seq>\\d+)'
|
406
|
+
when '%%' then '%'
|
407
|
+
when /\A%/ then raise ParameterError, "unknown time format in s3.file_name config: #{op}"
|
408
|
+
else Regexp.quote(op)
|
409
|
+
end
|
410
|
+
}
|
411
|
+
Regexp.compile("\\A#{pattern}\\z")
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
class LoadableObject
|
416
|
+
def initialize(s3queue, node, logger)
|
417
|
+
@s3queue = s3queue
|
418
|
+
@node = node
|
419
|
+
@logger = logger
|
420
|
+
end
|
421
|
+
|
422
|
+
def credential_string
|
423
|
+
@s3queue.credential_string
|
424
|
+
end
|
425
|
+
|
426
|
+
def path
|
427
|
+
@node.key
|
428
|
+
end
|
429
|
+
|
430
|
+
def basename
|
431
|
+
File.basename(path)
|
432
|
+
end
|
433
|
+
|
434
|
+
def url
|
435
|
+
@s3queue.object_url(path)
|
436
|
+
end
|
437
|
+
|
438
|
+
def save(noop = false)
|
439
|
+
@logger.info "s3 move: #{path} -> #{save_path}"
|
440
|
+
return if noop
|
441
|
+
@node.object.move_to save_path
|
442
|
+
@logger.info "file saved"
|
443
|
+
end
|
444
|
+
|
445
|
+
alias dequeue save
|
446
|
+
|
447
|
+
def save_path
|
448
|
+
@s3queue.persistent_path(basename)
|
449
|
+
end
|
450
|
+
end
|
451
|
+
end
|
data/lib/bricolage/version.rb
CHANGED
data/test/home/Gemfile
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
PATH
|
2
|
+
remote: ../..
|
3
|
+
specs:
|
4
|
+
bricolage (5.8.7)
|
5
|
+
aws-sdk (< 2)
|
6
|
+
mysql2
|
7
|
+
pg
|
8
|
+
td
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
aws-sdk (1.64.0)
|
14
|
+
aws-sdk-v1 (= 1.64.0)
|
15
|
+
aws-sdk-v1 (1.64.0)
|
16
|
+
json (~> 1.4)
|
17
|
+
nokogiri (>= 1.4.4)
|
18
|
+
fluent-logger (0.4.10)
|
19
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
20
|
+
yajl-ruby (~> 1.0)
|
21
|
+
hirb (0.7.3)
|
22
|
+
httpclient (2.5.3.3)
|
23
|
+
json (1.8.2)
|
24
|
+
mini_portile (0.6.2)
|
25
|
+
msgpack (0.5.12)
|
26
|
+
mysql2 (0.3.18)
|
27
|
+
nokogiri (1.6.6.2)
|
28
|
+
mini_portile (~> 0.6.0)
|
29
|
+
parallel (0.6.5)
|
30
|
+
pg (0.18.2)
|
31
|
+
rubyzip (0.9.9)
|
32
|
+
td (0.11.10)
|
33
|
+
hirb (>= 0.4.5)
|
34
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
35
|
+
parallel (~> 0.6.1)
|
36
|
+
rubyzip (~> 0.9.9)
|
37
|
+
td-client (~> 0.8.70)
|
38
|
+
td-logger (~> 0.3.21)
|
39
|
+
yajl-ruby (~> 1.1)
|
40
|
+
td-client (0.8.70)
|
41
|
+
httpclient (>= 2.4.0, < 2.6.0)
|
42
|
+
json (>= 1.7.6)
|
43
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
44
|
+
td-logger (0.3.24)
|
45
|
+
fluent-logger (~> 0.4.9)
|
46
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
47
|
+
td-client (~> 0.8.66)
|
48
|
+
yajl-ruby (1.2.1)
|
49
|
+
|
50
|
+
PLATFORMS
|
51
|
+
ruby
|
52
|
+
|
53
|
+
DEPENDENCIES
|
54
|
+
bricolage!
|
@@ -1,6 +1,6 @@
|
|
1
1
|
sql: &redshift_parameters
|
2
2
|
type: psql
|
3
|
-
host: <%= ENV['RSPROXYHOST'] ? 'localhost' : (ENV['RSHOST'] || 'cookpad-dwh-
|
3
|
+
host: <%= ENV['RSPROXYHOST'] ? 'localhost' : (ENV['RSHOST'] || 'cookpad-dwh-301.cbzwjkf60uat.ap-northeast-1.redshift.amazonaws.com') %>
|
4
4
|
port: <%= ENV['RSPROXYPORT'] || ENV['RSPORT'] || 5439 %>
|
5
5
|
database: production
|
6
6
|
username: tabemirudev
|
@@ -16,7 +16,7 @@ sql_prod:
|
|
16
16
|
|
17
17
|
sql_app:
|
18
18
|
type: psql
|
19
|
-
host: localhost #cookpad-dwh-
|
19
|
+
host: localhost #cookpad-dwh-301.cbzwjkf60uat.ap-northeast-1.redshift.amazonaws.com
|
20
20
|
port: 5445
|
21
21
|
database: production
|
22
22
|
username: tabemiru_batch
|
@@ -44,9 +44,12 @@ td_search_log:
|
|
44
44
|
|
45
45
|
s3:
|
46
46
|
type: s3
|
47
|
+
endpoint: "s3-ap-northeast-1.amazonaws.com"
|
47
48
|
bucket: tabemiru-data.ap-northeast-1
|
48
49
|
prefix: "/dev"
|
49
50
|
s3cfg: <%= user_home_relative_path '.s3cfg' %>
|
51
|
+
access_key_id: "AKIAI5R5BL2W73PR3FXQ"
|
52
|
+
secret_access_key: "Mj9/cwWuruUO0qHsYFI2911G31PQaIj3+eJTBxyO"
|
50
53
|
|
51
54
|
mysql:
|
52
55
|
type: mysql
|
data/test/home/put.sh
ADDED
data/test/home/revert.sh
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
|
3
|
+
for name
|
4
|
+
in \
|
5
|
+
20141002-1355_00.txt \
|
6
|
+
20141002-1355_01.txt \
|
7
|
+
20141002-1355_02.txt
|
8
|
+
do
|
9
|
+
aws s3 mv \
|
10
|
+
s3://tabemiru-data.ap-northeast-1/tmp/save/year=2014/month=10/day=02/hour=13/$name \
|
11
|
+
s3://tabemiru-data.ap-northeast-1/tmp/queue/$name
|
12
|
+
done
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class: streaming_load
|
2
|
+
|
3
|
+
s3-ds: s3
|
4
|
+
queue-path: queue
|
5
|
+
persistent-path: save/year=%Y/month=%m/day=%d/hour=%H
|
6
|
+
file-name: "%Y%m%d-%H%M_%Q.txt"
|
7
|
+
|
8
|
+
redshift-ds: sql
|
9
|
+
dest-table: load_test
|
10
|
+
work-table: load_test_wk
|
11
|
+
log-table: load_test_l
|
12
|
+
load-options: "delimiter '\\t'"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- jobclass/rebuild-rename.rb
|
118
118
|
- jobclass/s3-put.rb
|
119
119
|
- jobclass/sql.rb
|
120
|
+
- jobclass/streaming_load.rb
|
120
121
|
- jobclass/td-delete.rb
|
121
122
|
- jobclass/td-export.rb
|
122
123
|
- jobclass/unload.rb
|
@@ -156,8 +157,17 @@ files:
|
|
156
157
|
- libexec/sqldump.Darwin
|
157
158
|
- libexec/sqldump.Linux
|
158
159
|
- test/all.rb
|
160
|
+
- test/home/Gemfile
|
161
|
+
- test/home/Gemfile.lock
|
159
162
|
- test/home/config/development/database.yml
|
160
163
|
- test/home/config/development/password.yml
|
164
|
+
- test/home/data/20141002-1355_00.txt
|
165
|
+
- test/home/data/20141002-1355_01.txt
|
166
|
+
- test/home/data/20141002-1355_02.txt
|
167
|
+
- test/home/put.sh
|
168
|
+
- test/home/revert.sh
|
169
|
+
- test/home/subsys/load_test.ct
|
170
|
+
- test/home/subsys/load_test.job
|
161
171
|
- test/home/subsys/separated.job
|
162
172
|
- test/home/subsys/separated.sql
|
163
173
|
- test/home/subsys/unified.jobnet
|