bricolage 5.8.7 → 5.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -2
- data/jobclass/streaming_load.rb +451 -0
- data/lib/bricolage/version.rb +1 -1
- data/test/home/Gemfile +2 -0
- data/test/home/Gemfile.lock +54 -0
- data/test/home/config/development/database.yml +5 -2
- data/test/home/data/20141002-1355_00.txt +2 -0
- data/test/home/data/20141002-1355_01.txt +2 -0
- data/test/home/data/20141002-1355_02.txt +2 -0
- data/test/home/put.sh +12 -0
- data/test/home/revert.sh +12 -0
- data/test/home/subsys/load_test.ct +6 -0
- data/test/home/subsys/load_test.job +12 -0
- metadata +11 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5d8fe450bc754d0074c361ebdec9b3f0119cf440
|
|
4
|
+
data.tar.gz: a12ba4bf06eb22f598e4d8e7df8b1028c84e0c52
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 801a0ba6a167068141d446e2d8e598ef6bcafcfec591c9011dcedf8aa126afdfb2dd6b830dfc9786f530e2608a48d5c69faee6b510c1b0e833feca6ae661df6a
|
|
7
|
+
data.tar.gz: cbb2b228f85c74f5b1a4f83a9d55eddcccfbcf9d9ea91ab5b2e4564614a11ecb512222448bf2e71eebbb17237eb792368e780f5ca3104f63e3278543cf8bce28
|
data/README.md
CHANGED
|
@@ -1,4 +1,19 @@
|
|
|
1
|
-
|
|
1
|
+
Bricolage
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
-
SQL batch framework
|
|
4
|
+
SQL batch framework for Redshift.
|
|
5
|
+
|
|
6
|
+
This software is written in working time in Cookpad, Inc.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
License
|
|
10
|
+
-------
|
|
11
|
+
|
|
12
|
+
MIT license.
|
|
13
|
+
See LICENSES file for details.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
Author
|
|
17
|
+
------
|
|
18
|
+
|
|
19
|
+
Minero Aoki
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
require 'bricolage/rubyjobclass'
|
|
2
|
+
require 'bricolage/psqldatasource'
|
|
3
|
+
require 'bricolage/exception'
|
|
4
|
+
require 'json'
|
|
5
|
+
require 'socket'
|
|
6
|
+
|
|
7
|
+
class StreamingLoadJobClass < RubyJobClass
|
|
8
|
+
job_class_id 'streaming_load'
|
|
9
|
+
|
|
10
|
+
def StreamingLoadJobClass.parameters(params)
|
|
11
|
+
super
|
|
12
|
+
params.add Bricolage::DataSourceParam.new('sql', 'redshift-ds', 'Redshift data source.')
|
|
13
|
+
params.add Bricolage::DestTableParam.new(optional: false)
|
|
14
|
+
params.add Bricolage::DestTableParam.new('work-table', optional: true)
|
|
15
|
+
params.add Bricolage::DestTableParam.new('log-table', optional: true)
|
|
16
|
+
params.add Bricolage::KeyValuePairsParam.new('load-options', 'OPTIONS', 'Loader options.',
|
|
17
|
+
optional: true, default: Bricolage::PSQLLoadOptions.new,
|
|
18
|
+
value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
|
|
19
|
+
params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
|
|
20
|
+
params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
|
|
21
|
+
params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
|
|
22
|
+
params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
|
|
23
|
+
params.add Bricolage::OptionalBoolParam.new('noop', 'Does not change any data.')
|
|
24
|
+
params.add Bricolage::OptionalBoolParam.new('load-only', 'Just issues COPY statement to work table and quit. No INSERT, no dequeue, no load log.')
|
|
25
|
+
params.add Bricolage::OptionalBoolParam.new('dequeue-only', 'Dequeues already loaded files.')
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def StreamingLoadJobClass.declarations(params)
|
|
29
|
+
Bricolage::Declarations.new(
|
|
30
|
+
'dest_table' => nil,
|
|
31
|
+
'work_table' => nil,
|
|
32
|
+
'log_table' => nil
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(params)
|
|
37
|
+
@loader = make_loader(params)
|
|
38
|
+
@load_only = params['load-only']
|
|
39
|
+
@dequeue_only = params['dequeue-only']
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def run
|
|
43
|
+
@loader.dequeue_loaded_files unless @load_only
|
|
44
|
+
return nil if @dequeue_only
|
|
45
|
+
@loader.load
|
|
46
|
+
nil
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def make_loader(params)
|
|
50
|
+
ds = params['redshift-ds']
|
|
51
|
+
RedshiftStreamingLoader.new(
|
|
52
|
+
data_source: ds,
|
|
53
|
+
queue: make_s3_queue(params),
|
|
54
|
+
table: string(params['dest-table']),
|
|
55
|
+
work_table: string(params['work-table']),
|
|
56
|
+
log_table: string(params['log-table']),
|
|
57
|
+
load_options: params['load-options'],
|
|
58
|
+
logger: ds.logger,
|
|
59
|
+
noop: params['noop'],
|
|
60
|
+
load_only: params['load-only']
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def make_s3_queue(params)
|
|
65
|
+
ds = params['s3-ds']
|
|
66
|
+
S3Queue.new(
|
|
67
|
+
data_source: ds,
|
|
68
|
+
queue_path: params['queue-path'],
|
|
69
|
+
persistent_path: params['persistent-path'],
|
|
70
|
+
file_name: params['file-name'],
|
|
71
|
+
logger: ds.logger
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def string(obj)
|
|
76
|
+
obj ? obj.to_s : nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
class RedshiftStreamingLoader
|
|
80
|
+
def initialize(data_source:, queue:,
|
|
81
|
+
table:, work_table: nil, log_table: nil, load_options: nil,
|
|
82
|
+
logger:, noop: false, load_only: false)
|
|
83
|
+
@ds = data_source
|
|
84
|
+
@src = queue
|
|
85
|
+
@table = table
|
|
86
|
+
@work_table = work_table
|
|
87
|
+
@log_table = log_table
|
|
88
|
+
@load_options = load_options
|
|
89
|
+
@logger = logger
|
|
90
|
+
@noop = noop
|
|
91
|
+
@load_only = load_only
|
|
92
|
+
|
|
93
|
+
@start_time = Time.now
|
|
94
|
+
@end_time = nil
|
|
95
|
+
@job_process_id = "#{@start_time.strftime('%Y%m%d-%H%M%S')}.#{Socket.gethostname}.#{Process.pid}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def load
|
|
99
|
+
load_in_parallel
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def log_basic_info
|
|
103
|
+
@logger.info "start_time: #{@start_time}"
|
|
104
|
+
@logger.info "job_process_id: #{@job_process_id}"
|
|
105
|
+
@logger.info "queue: #{@src.queue_url}"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def load_in_parallel
|
|
109
|
+
log_basic_info
|
|
110
|
+
@logger.info 'load with manifest'
|
|
111
|
+
objects = @src.queued_objects
|
|
112
|
+
if objects.empty?
|
|
113
|
+
@logger.info 'no target data files; exit'
|
|
114
|
+
return
|
|
115
|
+
end
|
|
116
|
+
create_manifest_file(objects) {|manifest_url|
|
|
117
|
+
@ds.open {|conn|
|
|
118
|
+
init_work_table conn
|
|
119
|
+
execute_update conn, copy_manifest_statement(manifest_url, @src.credential_string)
|
|
120
|
+
@logger.info "load succeeded: #{manifest_url}" unless @noop
|
|
121
|
+
commit conn, objects
|
|
122
|
+
}
|
|
123
|
+
dequeue_all objects
|
|
124
|
+
}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def load_in_sequential
|
|
128
|
+
log_basic_info
|
|
129
|
+
@logger.info 'load each objects sequentially'
|
|
130
|
+
objects = @src.queued_objects
|
|
131
|
+
@ds.open {|conn|
|
|
132
|
+
init_work_table(conn)
|
|
133
|
+
objects.each do |obj|
|
|
134
|
+
@logger.info "load: #{obj.url}"
|
|
135
|
+
execute_update conn, copy_file_statement(obj)
|
|
136
|
+
@logger.info "load succeeded: #{obj.url}" unless @noop
|
|
137
|
+
end
|
|
138
|
+
commit conn, objects
|
|
139
|
+
}
|
|
140
|
+
dequeue_all objects
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def commit(conn, objects)
|
|
144
|
+
@end_time = Time.now
|
|
145
|
+
return if @load_only
|
|
146
|
+
transaction(conn) {
|
|
147
|
+
commit_work_table conn
|
|
148
|
+
write_load_logs conn, objects
|
|
149
|
+
}
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def dequeue_loaded_files
|
|
153
|
+
@logger.info "dequeue start"
|
|
154
|
+
objects = @src.queued_objects
|
|
155
|
+
@ds.open {|conn|
|
|
156
|
+
objects.each do |obj|
|
|
157
|
+
if loaded_object?(conn, obj)
|
|
158
|
+
obj.dequeue(@noop)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
}
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
def init_work_table(conn)
|
|
167
|
+
return unless @work_table
|
|
168
|
+
execute_update conn, "truncate #{@work_table};"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def commit_work_table(conn)
|
|
172
|
+
return unless @work_table
|
|
173
|
+
execute_update conn, "insert into #{@table} select * from #{@work_table};"
|
|
174
|
+
# keep work table records for tracing
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def copy_file_statement(obj)
|
|
178
|
+
%Q(
|
|
179
|
+
copy #{load_target_table} from '#{obj.url}'
|
|
180
|
+
credentials '#{obj.credential_string}'
|
|
181
|
+
#{@load_options}
|
|
182
|
+
;).gsub(/\s+/, ' ').strip
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def create_manifest_file(objects)
|
|
186
|
+
manifest_name = "manifest-#{@job_process_id}.json"
|
|
187
|
+
@logger.info "creating manifest: #{manifest_name}"
|
|
188
|
+
@logger.info "manifest:\n" + make_manifest_json(objects)
|
|
189
|
+
url = @src.put_control_file(manifest_name, make_manifest_json(objects), noop: @noop)
|
|
190
|
+
yield url
|
|
191
|
+
@src.remove_control_file(File.basename(url), noop: @noop)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def make_manifest_json(objects)
|
|
195
|
+
ents = objects.map {|obj|
|
|
196
|
+
{ "url" => obj.url, "mandatory" => true }
|
|
197
|
+
}
|
|
198
|
+
JSON.pretty_generate({ "entries" => ents })
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def copy_manifest_statement(manifest_url, credential_string)
|
|
202
|
+
%Q(
|
|
203
|
+
copy #{load_target_table}
|
|
204
|
+
from '#{manifest_url}'
|
|
205
|
+
credentials '#{credential_string}'
|
|
206
|
+
manifest
|
|
207
|
+
#{@load_options}
|
|
208
|
+
;).gsub(/\s+/, ' ').strip
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def load_target_table
|
|
212
|
+
@work_table || @table
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def write_load_logs(conn, objects)
|
|
216
|
+
return unless @log_table
|
|
217
|
+
make_load_logs(objects).each do |record|
|
|
218
|
+
write_load_log conn, record
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def make_load_logs(objects)
|
|
223
|
+
objects.map {|obj| make_load_log(obj) }
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def make_load_log(obj)
|
|
227
|
+
LoadLogRecord.new(@job_process_id, @start_time, @end_time, @table, obj.url)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
LoadLogRecord = Struct.new(:job_process_id, :start_time, :end_time, :target_table, :data_file)
|
|
231
|
+
|
|
232
|
+
def write_load_log(conn, record)
|
|
233
|
+
return unless @log_table
|
|
234
|
+
execute_update(conn, <<-EndSQL.gsub(/^\s+/, '').strip)
|
|
235
|
+
insert into #{@log_table}
|
|
236
|
+
( job_process_id
|
|
237
|
+
, start_time
|
|
238
|
+
, end_time
|
|
239
|
+
, target_table
|
|
240
|
+
, data_file
|
|
241
|
+
)
|
|
242
|
+
values
|
|
243
|
+
( #{sql_string record.job_process_id}
|
|
244
|
+
, #{sql_timestamp record.start_time}
|
|
245
|
+
, #{sql_timestamp record.end_time}
|
|
246
|
+
, #{sql_string record.target_table}
|
|
247
|
+
, #{sql_string record.data_file}
|
|
248
|
+
)
|
|
249
|
+
;
|
|
250
|
+
EndSQL
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def loaded_object?(conn, obj)
|
|
254
|
+
rs = conn.execute("select count(*) as c from #{@log_table} where data_file = #{sql_string obj.url}")
|
|
255
|
+
rs.first['c'].to_i > 0
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def sql_timestamp(time)
|
|
259
|
+
%Q(timestamp '#{time.strftime('%Y-%m-%d %H:%M:%S')}')
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def sql_string(str)
|
|
263
|
+
escaped = str.gsub("'", "''")
|
|
264
|
+
%Q('#{escaped}')
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def transaction(conn)
|
|
268
|
+
execute_update conn, 'begin transaction'
|
|
269
|
+
yield
|
|
270
|
+
execute_update conn, 'commit'
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def execute_update(conn, sql)
|
|
274
|
+
if @noop
|
|
275
|
+
log_query(sql)
|
|
276
|
+
else
|
|
277
|
+
conn.execute(sql)
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def log_query(sql)
|
|
282
|
+
@logger.info "[#{@ds.name}] #{mask_secrets(sql)}"
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def mask_secrets(log)
|
|
286
|
+
log.gsub(/\bcredentials\s+'.*?'/mi, "credentials '****'")
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def dequeue_all(objects)
|
|
290
|
+
return if @load_only
|
|
291
|
+
objects.each do |obj|
|
|
292
|
+
obj.dequeue(@noop)
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
class S3Queue
|
|
298
|
+
def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
|
|
299
|
+
@ds = data_source
|
|
300
|
+
@queue_path = queue_path
|
|
301
|
+
@persistent_path = persistent_path
|
|
302
|
+
@file_name = file_name
|
|
303
|
+
@logger = logger
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def credential_string
|
|
307
|
+
@ds.credential_string
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
attr_reader :queue_path
|
|
311
|
+
|
|
312
|
+
def queue_url
|
|
313
|
+
@ds.url(@queue_path)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def object_url(key)
|
|
317
|
+
@ds.url(key, no_prefix: true)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def control_file_url(name)
|
|
321
|
+
@ds.url(control_file_path(name))
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def put_control_file(name, data, noop: false)
|
|
325
|
+
@logger.info "s3 put: #{control_file_url(name)}"
|
|
326
|
+
@ds.object(control_file_path(name)).write(data) unless noop
|
|
327
|
+
control_file_url(name)
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def remove_control_file(name, noop: false)
|
|
331
|
+
@logger.info "s3 delete: #{control_file_url(name)}"
|
|
332
|
+
@ds.object(control_file_path(name)).delete unless noop
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def control_file_path(name)
|
|
336
|
+
"#{queue_path}/#{name}"
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def consume_each(noop: false, &block)
|
|
340
|
+
each do |obj|
|
|
341
|
+
yield obj and obj.save(noop: noop)
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def each(&block)
|
|
346
|
+
queued_objects.each(&block)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def queue_directory
|
|
350
|
+
@ds.objects_with_prefix(queue_path)
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def queued_file_nodes
|
|
354
|
+
queue_directory.as_tree.children.select {|node|
|
|
355
|
+
node.leaf? and
|
|
356
|
+
node.key[-1, 1] != '/' and
|
|
357
|
+
target_file_name?(File.basename(node.key))
|
|
358
|
+
}
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def queued_objects
|
|
362
|
+
queued_file_nodes.map {|node| LoadableObject.new(self, node, @logger) }
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def target_file_name?(name)
|
|
366
|
+
file_name_pattern =~ name
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def persistent_path(name)
|
|
370
|
+
@ds.path("#{format_path(@persistent_path, name)}/#{name}")
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def format_path(template, basename)
|
|
374
|
+
m = file_name_pattern.match(basename) or
|
|
375
|
+
raise ParameterError, "file name does not match the pattern: #{basename.inspect}"
|
|
376
|
+
template.gsub(/%./) {|op|
|
|
377
|
+
case op
|
|
378
|
+
when '%Y' then m[:year]
|
|
379
|
+
when '%m' then m[:month]
|
|
380
|
+
when '%d' then m[:day]
|
|
381
|
+
when '%H' then m[:hour]
|
|
382
|
+
when '%M' then m[:minute]
|
|
383
|
+
when '%S' then m[:second]
|
|
384
|
+
when '%N' then m[:nanosecond]
|
|
385
|
+
when '%Q' then m[:seq]
|
|
386
|
+
else raise ParameterError, "unknown time format in s3.file_name config: #{op}"
|
|
387
|
+
end
|
|
388
|
+
}
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def file_name_pattern
|
|
392
|
+
@file_name_pattern ||= compile_name_pattern(@file_name)
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def compile_name_pattern(template)
|
|
396
|
+
pattern = template.gsub(/[^%]+|%\d*./) {|op|
|
|
397
|
+
case op
|
|
398
|
+
when '%Y' then '(?<year>\\d{4})'
|
|
399
|
+
when '%m' then '(?<month>\\d{2})'
|
|
400
|
+
when '%d' then '(?<day>\\d{2})'
|
|
401
|
+
when '%H' then '(?<hour>\\d{2})'
|
|
402
|
+
when '%M' then '(?<minute>\\d{2})'
|
|
403
|
+
when '%S' then '(?<second>\\d{2})'
|
|
404
|
+
when /\A%(\d+)N\z/ then "(?<nanosecond>\\d{#{$1}})"
|
|
405
|
+
when '%Q' then '(?<seq>\\d+)'
|
|
406
|
+
when '%%' then '%'
|
|
407
|
+
when /\A%/ then raise ParameterError, "unknown time format in s3.file_name config: #{op}"
|
|
408
|
+
else Regexp.quote(op)
|
|
409
|
+
end
|
|
410
|
+
}
|
|
411
|
+
Regexp.compile("\\A#{pattern}\\z")
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
class LoadableObject
|
|
416
|
+
def initialize(s3queue, node, logger)
|
|
417
|
+
@s3queue = s3queue
|
|
418
|
+
@node = node
|
|
419
|
+
@logger = logger
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
def credential_string
|
|
423
|
+
@s3queue.credential_string
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def path
|
|
427
|
+
@node.key
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
def basename
|
|
431
|
+
File.basename(path)
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def url
|
|
435
|
+
@s3queue.object_url(path)
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
def save(noop = false)
|
|
439
|
+
@logger.info "s3 move: #{path} -> #{save_path}"
|
|
440
|
+
return if noop
|
|
441
|
+
@node.object.move_to save_path
|
|
442
|
+
@logger.info "file saved"
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
alias dequeue save
|
|
446
|
+
|
|
447
|
+
def save_path
|
|
448
|
+
@s3queue.persistent_path(basename)
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
end
|
data/lib/bricolage/version.rb
CHANGED
data/test/home/Gemfile
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: ../..
|
|
3
|
+
specs:
|
|
4
|
+
bricolage (5.8.7)
|
|
5
|
+
aws-sdk (< 2)
|
|
6
|
+
mysql2
|
|
7
|
+
pg
|
|
8
|
+
td
|
|
9
|
+
|
|
10
|
+
GEM
|
|
11
|
+
remote: https://rubygems.org/
|
|
12
|
+
specs:
|
|
13
|
+
aws-sdk (1.64.0)
|
|
14
|
+
aws-sdk-v1 (= 1.64.0)
|
|
15
|
+
aws-sdk-v1 (1.64.0)
|
|
16
|
+
json (~> 1.4)
|
|
17
|
+
nokogiri (>= 1.4.4)
|
|
18
|
+
fluent-logger (0.4.10)
|
|
19
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
|
20
|
+
yajl-ruby (~> 1.0)
|
|
21
|
+
hirb (0.7.3)
|
|
22
|
+
httpclient (2.5.3.3)
|
|
23
|
+
json (1.8.2)
|
|
24
|
+
mini_portile (0.6.2)
|
|
25
|
+
msgpack (0.5.12)
|
|
26
|
+
mysql2 (0.3.18)
|
|
27
|
+
nokogiri (1.6.6.2)
|
|
28
|
+
mini_portile (~> 0.6.0)
|
|
29
|
+
parallel (0.6.5)
|
|
30
|
+
pg (0.18.2)
|
|
31
|
+
rubyzip (0.9.9)
|
|
32
|
+
td (0.11.10)
|
|
33
|
+
hirb (>= 0.4.5)
|
|
34
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
|
35
|
+
parallel (~> 0.6.1)
|
|
36
|
+
rubyzip (~> 0.9.9)
|
|
37
|
+
td-client (~> 0.8.70)
|
|
38
|
+
td-logger (~> 0.3.21)
|
|
39
|
+
yajl-ruby (~> 1.1)
|
|
40
|
+
td-client (0.8.70)
|
|
41
|
+
httpclient (>= 2.4.0, < 2.6.0)
|
|
42
|
+
json (>= 1.7.6)
|
|
43
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
|
44
|
+
td-logger (0.3.24)
|
|
45
|
+
fluent-logger (~> 0.4.9)
|
|
46
|
+
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
|
47
|
+
td-client (~> 0.8.66)
|
|
48
|
+
yajl-ruby (1.2.1)
|
|
49
|
+
|
|
50
|
+
PLATFORMS
|
|
51
|
+
ruby
|
|
52
|
+
|
|
53
|
+
DEPENDENCIES
|
|
54
|
+
bricolage!
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
sql: &redshift_parameters
|
|
2
2
|
type: psql
|
|
3
|
-
host: <%= ENV['RSPROXYHOST'] ? 'localhost' : (ENV['RSHOST'] || 'cookpad-dwh-
|
|
3
|
+
host: <%= ENV['RSPROXYHOST'] ? 'localhost' : (ENV['RSHOST'] || 'cookpad-dwh-301.cbzwjkf60uat.ap-northeast-1.redshift.amazonaws.com') %>
|
|
4
4
|
port: <%= ENV['RSPROXYPORT'] || ENV['RSPORT'] || 5439 %>
|
|
5
5
|
database: production
|
|
6
6
|
username: tabemirudev
|
|
@@ -16,7 +16,7 @@ sql_prod:
|
|
|
16
16
|
|
|
17
17
|
sql_app:
|
|
18
18
|
type: psql
|
|
19
|
-
host: localhost #cookpad-dwh-
|
|
19
|
+
host: localhost #cookpad-dwh-301.cbzwjkf60uat.ap-northeast-1.redshift.amazonaws.com
|
|
20
20
|
port: 5445
|
|
21
21
|
database: production
|
|
22
22
|
username: tabemiru_batch
|
|
@@ -44,9 +44,12 @@ td_search_log:
|
|
|
44
44
|
|
|
45
45
|
s3:
|
|
46
46
|
type: s3
|
|
47
|
+
endpoint: "s3-ap-northeast-1.amazonaws.com"
|
|
47
48
|
bucket: tabemiru-data.ap-northeast-1
|
|
48
49
|
prefix: "/dev"
|
|
49
50
|
s3cfg: <%= user_home_relative_path '.s3cfg' %>
|
|
51
|
+
access_key_id: "AKIAI5R5BL2W73PR3FXQ"
|
|
52
|
+
secret_access_key: "Mj9/cwWuruUO0qHsYFI2911G31PQaIj3+eJTBxyO"
|
|
50
53
|
|
|
51
54
|
mysql:
|
|
52
55
|
type: mysql
|
data/test/home/put.sh
ADDED
data/test/home/revert.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/bin/sh
|
|
2
|
+
|
|
3
|
+
for name
|
|
4
|
+
in \
|
|
5
|
+
20141002-1355_00.txt \
|
|
6
|
+
20141002-1355_01.txt \
|
|
7
|
+
20141002-1355_02.txt
|
|
8
|
+
do
|
|
9
|
+
aws s3 mv \
|
|
10
|
+
s3://tabemiru-data.ap-northeast-1/tmp/save/year=2014/month=10/day=02/hour=13/$name \
|
|
11
|
+
s3://tabemiru-data.ap-northeast-1/tmp/queue/$name
|
|
12
|
+
done
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
class: streaming_load
|
|
2
|
+
|
|
3
|
+
s3-ds: s3
|
|
4
|
+
queue-path: queue
|
|
5
|
+
persistent-path: save/year=%Y/month=%m/day=%d/hour=%H
|
|
6
|
+
file-name: "%Y%m%d-%H%M_%Q.txt"
|
|
7
|
+
|
|
8
|
+
redshift-ds: sql
|
|
9
|
+
dest-table: load_test
|
|
10
|
+
work-table: load_test_wk
|
|
11
|
+
log-table: load_test_l
|
|
12
|
+
load-options: "delimiter '\\t'"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bricolage
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.
|
|
4
|
+
version: 5.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Minero Aoki
|
|
@@ -117,6 +117,7 @@ files:
|
|
|
117
117
|
- jobclass/rebuild-rename.rb
|
|
118
118
|
- jobclass/s3-put.rb
|
|
119
119
|
- jobclass/sql.rb
|
|
120
|
+
- jobclass/streaming_load.rb
|
|
120
121
|
- jobclass/td-delete.rb
|
|
121
122
|
- jobclass/td-export.rb
|
|
122
123
|
- jobclass/unload.rb
|
|
@@ -156,8 +157,17 @@ files:
|
|
|
156
157
|
- libexec/sqldump.Darwin
|
|
157
158
|
- libexec/sqldump.Linux
|
|
158
159
|
- test/all.rb
|
|
160
|
+
- test/home/Gemfile
|
|
161
|
+
- test/home/Gemfile.lock
|
|
159
162
|
- test/home/config/development/database.yml
|
|
160
163
|
- test/home/config/development/password.yml
|
|
164
|
+
- test/home/data/20141002-1355_00.txt
|
|
165
|
+
- test/home/data/20141002-1355_01.txt
|
|
166
|
+
- test/home/data/20141002-1355_02.txt
|
|
167
|
+
- test/home/put.sh
|
|
168
|
+
- test/home/revert.sh
|
|
169
|
+
- test/home/subsys/load_test.ct
|
|
170
|
+
- test/home/subsys/load_test.job
|
|
161
171
|
- test/home/subsys/separated.job
|
|
162
172
|
- test/home/subsys/separated.sql
|
|
163
173
|
- test/home/subsys/unified.jobnet
|