bricolage-streamingload 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,158 +0,0 @@
1
- require 'bricolage/streamingload/loaderparams'
2
- require 'bricolage/streamingload/manifest'
3
- require 'bricolage/sqlutils'
4
- require 'socket'
5
- require 'json'
6
-
7
- module Bricolage
8
-
9
- module StreamingLoad
10
-
11
- class Loader
12
-
13
- include SQLUtils
14
-
15
- def Loader.load_from_file(ctx, ctl_ds, task, logger:)
16
- params = LoaderParams.load(ctx, task)
17
- new(ctl_ds, params, logger: logger)
18
- end
19
-
20
- def initialize(ctl_ds, params, logger:)
21
- @ctl_ds = ctl_ds
22
- @params = params
23
- @logger = logger
24
- @process_id = "#{Socket.gethostname}-#{$$}"
25
- end
26
-
27
- def execute
28
- @job_id = assign_task
29
- return unless @job_id # task already executed by other loader
30
- @params.ds.open {|conn|
31
- @connection = conn
32
- do_load
33
- }
34
- end
35
-
36
- def assign_task
37
- @ctl_ds.open {|conn|
38
- job_id = conn.query_value(<<-EndSQL)
39
- insert into strload_jobs
40
- ( task_id
41
- , process_id
42
- , status
43
- , start_time
44
- )
45
- select
46
- task_id
47
- , #{s @process_id}
48
- , 'running'
49
- , current_timestamp
50
- from
51
- strload_tasks
52
- where
53
- task_id = #{@params.task_id}
54
- and (#{@params.force?} or task_id not in (select task_id from strload_jobs))
55
- returning job_id
56
- ;
57
- EndSQL
58
- return job_id
59
- }
60
- end
61
-
62
- def do_load
63
- manifest = ManifestFile.create(@params.ctl_bucket, job_id: @job_id, object_urls: @params.object_urls, logger: @logger)
64
- if @params.enable_work_table?
65
- @connection.transaction {|txn|
66
- # NOTE: This transaction ends with truncation, this DELETE does nothing
67
- # from the second time. So don't worry about DELETE cost here.
68
- @connection.execute("delete from #{@params.work_table}")
69
- load_objects @params.work_table, manifest, @params.load_options_string
70
- commit_work_table txn, @params
71
- }
72
- commit_job_result
73
- else
74
- load_objects @params.dest_table, manifest, @params.load_options_string
75
- commit_job_result
76
- end
77
- rescue JobFailure => ex
78
- write_job_error 'failure', ex.message
79
- raise
80
- rescue Exception => ex
81
- write_job_error 'error', ex.message
82
- raise
83
- end
84
-
85
- def load_objects(dest_table, manifest, options)
86
- @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
87
- copy #{dest_table}
88
- from #{s manifest.url}
89
- credentials #{s manifest.credential_string}
90
- manifest
91
- statupdate false
92
- compupdate false
93
- #{options}
94
- ;
95
- EndSQL
96
- @logger.info "load succeeded: #{manifest.url}"
97
- end
98
-
99
- def commit_work_table(txn, params)
100
- @connection.execute(params.sql_source)
101
- txn.truncate_and_commit(params.work_table)
102
- end
103
-
104
- def commit_job_result
105
- @end_time = Time.now
106
- @ctl_ds.open {|conn|
107
- conn.transaction {
108
- write_job_result conn, 'success', ''
109
- update_loaded_flag conn
110
- }
111
- }
112
- end
113
-
114
- def update_loaded_flag(connection)
115
- connection.execute(<<-EndSQL)
116
- update
117
- strload_objects
118
- set
119
- loaded = true
120
- where
121
- object_id in (
122
- select
123
- object_id
124
- from
125
- strload_task_objects
126
- where task_id = (select task_id from strload_jobs where job_id = #{@job_id})
127
- )
128
- ;
129
- EndSQL
130
- end
131
-
132
- MAX_MESSAGE_LENGTH = 1000
133
-
134
- def write_job_error(status, message)
135
- @end_time = Time.now
136
- @logger.warn message.lines.first
137
- @ctl_ds.open {|conn|
138
- write_job_result conn, status, message.lines.first.strip[0, MAX_MESSAGE_LENGTH]
139
- }
140
- end
141
-
142
- def write_job_result(connection, status, message)
143
- connection.execute(<<-EndSQL)
144
- update
145
- strload_jobs
146
- set
147
- (status, finish_time, message) = (#{s status}, current_timestamp, #{s message})
148
- where
149
- job_id = #{@job_id}
150
- ;
151
- EndSQL
152
- end
153
-
154
- end
155
-
156
- end
157
-
158
- end