bricolage-streamingload 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/bricolage-streaming-loader +2 -2
- data/lib/bricolage/sqsmock.rb +0 -1
- data/lib/bricolage/streamingload/dispatcher.rb +2 -1
- data/lib/bricolage/streamingload/job.rb +387 -0
- data/lib/bricolage/streamingload/{loaderparams.rb → jobparams.rb} +14 -39
- data/lib/bricolage/streamingload/manifest.rb +7 -1
- data/lib/bricolage/streamingload/objectbuffer.rb +0 -3
- data/lib/bricolage/streamingload/task.rb +5 -68
- data/lib/bricolage/streamingload/{loaderservice.rb → taskhandler.rb} +102 -61
- data/lib/bricolage/streamingload/version.rb +1 -1
- data/test/streamingload/test_dispatcher.rb +6 -6
- data/test/streamingload/test_job.rb +438 -0
- metadata +8 -9
- data/lib/bricolage/nulllogger.rb +0 -20
- data/lib/bricolage/snsdatasource.rb +0 -40
- data/lib/bricolage/streamingload/loader.rb +0 -158
@@ -1,158 +0,0 @@
|
|
1
|
-
require 'bricolage/streamingload/loaderparams'
|
2
|
-
require 'bricolage/streamingload/manifest'
|
3
|
-
require 'bricolage/sqlutils'
|
4
|
-
require 'socket'
|
5
|
-
require 'json'
|
6
|
-
|
7
|
-
module Bricolage
|
8
|
-
|
9
|
-
module StreamingLoad
|
10
|
-
|
11
|
-
class Loader
|
12
|
-
|
13
|
-
include SQLUtils
|
14
|
-
|
15
|
-
def Loader.load_from_file(ctx, ctl_ds, task, logger:)
|
16
|
-
params = LoaderParams.load(ctx, task)
|
17
|
-
new(ctl_ds, params, logger: logger)
|
18
|
-
end
|
19
|
-
|
20
|
-
def initialize(ctl_ds, params, logger:)
|
21
|
-
@ctl_ds = ctl_ds
|
22
|
-
@params = params
|
23
|
-
@logger = logger
|
24
|
-
@process_id = "#{Socket.gethostname}-#{$$}"
|
25
|
-
end
|
26
|
-
|
27
|
-
def execute
|
28
|
-
@job_id = assign_task
|
29
|
-
return unless @job_id # task already executed by other loader
|
30
|
-
@params.ds.open {|conn|
|
31
|
-
@connection = conn
|
32
|
-
do_load
|
33
|
-
}
|
34
|
-
end
|
35
|
-
|
36
|
-
def assign_task
|
37
|
-
@ctl_ds.open {|conn|
|
38
|
-
job_id = conn.query_value(<<-EndSQL)
|
39
|
-
insert into strload_jobs
|
40
|
-
( task_id
|
41
|
-
, process_id
|
42
|
-
, status
|
43
|
-
, start_time
|
44
|
-
)
|
45
|
-
select
|
46
|
-
task_id
|
47
|
-
, #{s @process_id}
|
48
|
-
, 'running'
|
49
|
-
, current_timestamp
|
50
|
-
from
|
51
|
-
strload_tasks
|
52
|
-
where
|
53
|
-
task_id = #{@params.task_id}
|
54
|
-
and (#{@params.force?} or task_id not in (select task_id from strload_jobs))
|
55
|
-
returning job_id
|
56
|
-
;
|
57
|
-
EndSQL
|
58
|
-
return job_id
|
59
|
-
}
|
60
|
-
end
|
61
|
-
|
62
|
-
def do_load
|
63
|
-
manifest = ManifestFile.create(@params.ctl_bucket, job_id: @job_id, object_urls: @params.object_urls, logger: @logger)
|
64
|
-
if @params.enable_work_table?
|
65
|
-
@connection.transaction {|txn|
|
66
|
-
# NOTE: This transaction ends with truncation, this DELETE does nothing
|
67
|
-
# from the second time. So don't worry about DELETE cost here.
|
68
|
-
@connection.execute("delete from #{@params.work_table}")
|
69
|
-
load_objects @params.work_table, manifest, @params.load_options_string
|
70
|
-
commit_work_table txn, @params
|
71
|
-
}
|
72
|
-
commit_job_result
|
73
|
-
else
|
74
|
-
load_objects @params.dest_table, manifest, @params.load_options_string
|
75
|
-
commit_job_result
|
76
|
-
end
|
77
|
-
rescue JobFailure => ex
|
78
|
-
write_job_error 'failure', ex.message
|
79
|
-
raise
|
80
|
-
rescue Exception => ex
|
81
|
-
write_job_error 'error', ex.message
|
82
|
-
raise
|
83
|
-
end
|
84
|
-
|
85
|
-
def load_objects(dest_table, manifest, options)
|
86
|
-
@connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
|
87
|
-
copy #{dest_table}
|
88
|
-
from #{s manifest.url}
|
89
|
-
credentials #{s manifest.credential_string}
|
90
|
-
manifest
|
91
|
-
statupdate false
|
92
|
-
compupdate false
|
93
|
-
#{options}
|
94
|
-
;
|
95
|
-
EndSQL
|
96
|
-
@logger.info "load succeeded: #{manifest.url}"
|
97
|
-
end
|
98
|
-
|
99
|
-
def commit_work_table(txn, params)
|
100
|
-
@connection.execute(params.sql_source)
|
101
|
-
txn.truncate_and_commit(params.work_table)
|
102
|
-
end
|
103
|
-
|
104
|
-
def commit_job_result
|
105
|
-
@end_time = Time.now
|
106
|
-
@ctl_ds.open {|conn|
|
107
|
-
conn.transaction {
|
108
|
-
write_job_result conn, 'success', ''
|
109
|
-
update_loaded_flag conn
|
110
|
-
}
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
def update_loaded_flag(connection)
|
115
|
-
connection.execute(<<-EndSQL)
|
116
|
-
update
|
117
|
-
strload_objects
|
118
|
-
set
|
119
|
-
loaded = true
|
120
|
-
where
|
121
|
-
object_id in (
|
122
|
-
select
|
123
|
-
object_id
|
124
|
-
from
|
125
|
-
strload_task_objects
|
126
|
-
where task_id = (select task_id from strload_jobs where job_id = #{@job_id})
|
127
|
-
)
|
128
|
-
;
|
129
|
-
EndSQL
|
130
|
-
end
|
131
|
-
|
132
|
-
MAX_MESSAGE_LENGTH = 1000
|
133
|
-
|
134
|
-
def write_job_error(status, message)
|
135
|
-
@end_time = Time.now
|
136
|
-
@logger.warn message.lines.first
|
137
|
-
@ctl_ds.open {|conn|
|
138
|
-
write_job_result conn, status, message.lines.first.strip[0, MAX_MESSAGE_LENGTH]
|
139
|
-
}
|
140
|
-
end
|
141
|
-
|
142
|
-
def write_job_result(connection, status, message)
|
143
|
-
connection.execute(<<-EndSQL)
|
144
|
-
update
|
145
|
-
strload_jobs
|
146
|
-
set
|
147
|
-
(status, finish_time, message) = (#{s status}, current_timestamp, #{s message})
|
148
|
-
where
|
149
|
-
job_id = #{@job_id}
|
150
|
-
;
|
151
|
-
EndSQL
|
152
|
-
end
|
153
|
-
|
154
|
-
end
|
155
|
-
|
156
|
-
end
|
157
|
-
|
158
|
-
end
|