bricolage-streamingload 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,158 +0,0 @@
1
- require 'bricolage/streamingload/loaderparams'
2
- require 'bricolage/streamingload/manifest'
3
- require 'bricolage/sqlutils'
4
- require 'socket'
5
- require 'json'
6
-
7
- module Bricolage
8
-
9
- module StreamingLoad
10
-
11
- class Loader
12
-
13
- include SQLUtils
14
-
15
- def Loader.load_from_file(ctx, ctl_ds, task, logger:)
16
- params = LoaderParams.load(ctx, task)
17
- new(ctl_ds, params, logger: logger)
18
- end
19
-
20
- def initialize(ctl_ds, params, logger:)
21
- @ctl_ds = ctl_ds
22
- @params = params
23
- @logger = logger
24
- @process_id = "#{Socket.gethostname}-#{$$}"
25
- end
26
-
27
- def execute
28
- @job_id = assign_task
29
- return unless @job_id # task already executed by other loader
30
- @params.ds.open {|conn|
31
- @connection = conn
32
- do_load
33
- }
34
- end
35
-
36
- def assign_task
37
- @ctl_ds.open {|conn|
38
- job_id = conn.query_value(<<-EndSQL)
39
- insert into strload_jobs
40
- ( task_id
41
- , process_id
42
- , status
43
- , start_time
44
- )
45
- select
46
- task_id
47
- , #{s @process_id}
48
- , 'running'
49
- , current_timestamp
50
- from
51
- strload_tasks
52
- where
53
- task_id = #{@params.task_id}
54
- and (#{@params.force?} or task_id not in (select task_id from strload_jobs))
55
- returning job_id
56
- ;
57
- EndSQL
58
- return job_id
59
- }
60
- end
61
-
62
- def do_load
63
- manifest = ManifestFile.create(@params.ctl_bucket, job_id: @job_id, object_urls: @params.object_urls, logger: @logger)
64
- if @params.enable_work_table?
65
- @connection.transaction {|txn|
66
- # NOTE: This transaction ends with truncation, this DELETE does nothing
67
- # from the second time. So don't worry about DELETE cost here.
68
- @connection.execute("delete from #{@params.work_table}")
69
- load_objects @params.work_table, manifest, @params.load_options_string
70
- commit_work_table txn, @params
71
- }
72
- commit_job_result
73
- else
74
- load_objects @params.dest_table, manifest, @params.load_options_string
75
- commit_job_result
76
- end
77
- rescue JobFailure => ex
78
- write_job_error 'failure', ex.message
79
- raise
80
- rescue Exception => ex
81
- write_job_error 'error', ex.message
82
- raise
83
- end
84
-
85
- def load_objects(dest_table, manifest, options)
86
- @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
87
- copy #{dest_table}
88
- from #{s manifest.url}
89
- credentials #{s manifest.credential_string}
90
- manifest
91
- statupdate false
92
- compupdate false
93
- #{options}
94
- ;
95
- EndSQL
96
- @logger.info "load succeeded: #{manifest.url}"
97
- end
98
-
99
- def commit_work_table(txn, params)
100
- @connection.execute(params.sql_source)
101
- txn.truncate_and_commit(params.work_table)
102
- end
103
-
104
- def commit_job_result
105
- @end_time = Time.now
106
- @ctl_ds.open {|conn|
107
- conn.transaction {
108
- write_job_result conn, 'success', ''
109
- update_loaded_flag conn
110
- }
111
- }
112
- end
113
-
114
- def update_loaded_flag(connection)
115
- connection.execute(<<-EndSQL)
116
- update
117
- strload_objects
118
- set
119
- loaded = true
120
- where
121
- object_id in (
122
- select
123
- object_id
124
- from
125
- strload_task_objects
126
- where task_id = (select task_id from strload_jobs where job_id = #{@job_id})
127
- )
128
- ;
129
- EndSQL
130
- end
131
-
132
- MAX_MESSAGE_LENGTH = 1000
133
-
134
- def write_job_error(status, message)
135
- @end_time = Time.now
136
- @logger.warn message.lines.first
137
- @ctl_ds.open {|conn|
138
- write_job_result conn, status, message.lines.first.strip[0, MAX_MESSAGE_LENGTH]
139
- }
140
- end
141
-
142
- def write_job_result(connection, status, message)
143
- connection.execute(<<-EndSQL)
144
- update
145
- strload_jobs
146
- set
147
- (status, finish_time, message) = (#{s status}, current_timestamp, #{s message})
148
- where
149
- job_id = #{@job_id}
150
- ;
151
- EndSQL
152
- end
153
-
154
- end
155
-
156
- end
157
-
158
- end