workflow-archiver 1.3.2 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/dor/archiver_version.rb +2 -5
- data/lib/dor/workflow_archiver.rb +62 -130
- data/lib/workflow-archiver.rb +1 -0
- metadata +6 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e07be8143cac430dfa7166ad246e81d64c85558
|
4
|
+
data.tar.gz: 23612c1949726b163ad89534727679fbb6da3968
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 779182bd0fd4cdabb6c4480aa7ceb5aa102bfe33c60a00317ea2b98ec4d8e62e2caef69c4cdc784ccd5cea3e91960fe2d077b076bccdb47c02c73af796755fd6
|
7
|
+
data.tar.gz: 679c197f47b3c409a0f61f3350eae177bfc230e9ae9b9723b090a328f2a87be711c4b26cafbb7680ce280f2b42a45d1e4ed708db48be7f1e1f4aaed9641ac0c0
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.0
|
data/lib/dor/archiver_version.rb
CHANGED
@@ -1,18 +1,16 @@
|
|
1
1
|
require 'rest_client'
|
2
2
|
require 'confstruct'
|
3
3
|
require 'lyber_core'
|
4
|
-
require '
|
4
|
+
require 'sequel'
|
5
5
|
|
6
6
|
module Dor
|
7
|
-
|
8
7
|
# Holds the paramaters about the workflow rows that need to be deleted
|
9
8
|
ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
|
10
9
|
# @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
|
11
|
-
# "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
|
12
10
|
def setup_from_query(row_hash)
|
13
|
-
self.repository = row_hash[
|
14
|
-
self.druid = row_hash[
|
15
|
-
self.datastream = row_hash[
|
11
|
+
self.repository = row_hash[:repository]
|
12
|
+
self.druid = row_hash[:druid]
|
13
|
+
self.datastream = row_hash[:datastream]
|
16
14
|
set_current_version
|
17
15
|
self
|
18
16
|
end
|
@@ -21,29 +19,27 @@ module Dor
|
|
21
19
|
# @return [Hash] Maps column names (in ALL caps) to non-nil column values
|
22
20
|
def to_bind_hash
|
23
21
|
h = {}
|
24
|
-
members.reject{|mem| mem =~ /version/}.each do |m|
|
25
|
-
h[m
|
22
|
+
members.reject { |mem| mem =~ /version/ }.each do |m|
|
23
|
+
h[m] = send(m) if send(m)
|
26
24
|
end
|
27
25
|
h
|
28
26
|
end
|
29
27
|
|
30
28
|
def set_current_version
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
self.version = '1'
|
38
|
-
end
|
29
|
+
self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{druid}/versions/current"
|
30
|
+
rescue RestClient::InternalServerError => ise
|
31
|
+
raise unless ise.inspect =~ /Unable to find.*in fedora/
|
32
|
+
LyberCore::Log.warn ise.inspect.to_s
|
33
|
+
LyberCore::Log.warn "Moving workflow rows with version set to '1'"
|
34
|
+
self.version = '1'
|
39
35
|
end
|
40
36
|
end
|
41
37
|
|
42
38
|
class WorkflowArchiver
|
43
|
-
WF_COLUMNS = %w(
|
39
|
+
WF_COLUMNS = %w(id druid datastream process status error_msg error_txt datetime attempts lifecycle elapsed repository note priority lane_id)
|
44
40
|
|
45
41
|
# These attributes mostly used for testing
|
46
|
-
attr_reader :
|
42
|
+
attr_reader :errors
|
47
43
|
|
48
44
|
def self.config
|
49
45
|
@@conf ||= Confstruct::Configuration.new
|
@@ -52,63 +48,33 @@ module Dor
|
|
52
48
|
# Sets up logging and connects to the database. By default it reads values from constants:
|
53
49
|
# WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
|
54
50
|
# @param [Hash] opts Options to override database parameters
|
55
|
-
# @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
|
56
|
-
# @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
|
57
51
|
# @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
|
58
52
|
# @option opts [String] :wf_table ('workflow') Name of the active workflow table
|
59
53
|
# @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
|
60
|
-
# @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
|
61
54
|
# @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
|
62
|
-
def initialize(opts={})
|
63
|
-
@
|
64
|
-
@
|
65
|
-
@
|
66
|
-
@
|
67
|
-
@
|
68
|
-
@workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
|
69
|
-
@retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
|
70
|
-
|
55
|
+
def initialize(opts = {})
|
56
|
+
@conn = opts[:db_connection]
|
57
|
+
@db_uri = opts.fetch(:db_uri, WorkflowArchiver.config.db_uri).freeze
|
58
|
+
@workflow_table = opts.include?(:wf_table) ? opts[:wf_table] : 'workflow'
|
59
|
+
@workflow_archive_table = opts.include?(:wfa_table) ? opts[:wfa_table] : 'workflow_archive'
|
60
|
+
@retry_delay = opts.include?(:retry_delay) ? opts[:retry_delay] : 5
|
71
61
|
# initialize some counters
|
72
62
|
@errors = 0
|
73
63
|
@archived = 0
|
74
64
|
end
|
75
65
|
|
76
|
-
def
|
77
|
-
|
78
|
-
@conn = OCI8.new(@login, @password, $odb_pool)
|
79
|
-
@conn.autocommit = false
|
66
|
+
def conn
|
67
|
+
@conn ||= Sequel.connect(@db_uri)
|
80
68
|
end
|
81
69
|
|
82
|
-
|
83
|
-
$odb_pool.destroy if($odb_pool)
|
84
|
-
end
|
85
|
-
|
86
|
-
def bind_and_exec_sql(sql, workflow_info)
|
87
|
-
# LyberCore::Log.debug("Executing: #{sql}")
|
88
|
-
cursor = @conn.parse(sql)
|
89
|
-
|
90
|
-
workflow_info.to_bind_hash.each do |k, v|
|
91
|
-
param = ":#{k}"
|
92
|
-
#LyberCore::Log.debug("Setting: #{param} #{v}")
|
93
|
-
cursor.bind_param(param, v)
|
94
|
-
end
|
95
|
-
|
96
|
-
num_rows = cursor.exec
|
97
|
-
unless num_rows > 0
|
98
|
-
raise "Expected more than 0 rows to be updated"
|
99
|
-
end
|
100
|
-
ensure
|
101
|
-
cursor.close
|
102
|
-
end
|
103
|
-
|
104
|
-
# @return String The columns appended with comma and newline
|
70
|
+
# @return [String] The columns appended with comma and newline
|
105
71
|
def wf_column_string
|
106
|
-
WF_COLUMNS.
|
72
|
+
WF_COLUMNS.join(",\n")
|
107
73
|
end
|
108
74
|
|
109
|
-
# @return String The columns prepended with 'w.' and appended with comma and newline
|
75
|
+
# @return [String] The columns prepended with 'w.' and appended with comma and newline
|
110
76
|
def wf_archive_column_string
|
111
|
-
WF_COLUMNS.
|
77
|
+
WF_COLUMNS.map { |col| "#{@workflow_table}.#{col}" }.join(",\n")
|
112
78
|
end
|
113
79
|
|
114
80
|
# Use this as a one-shot method to archive all the steps of an object's particular datastream
|
@@ -120,17 +86,14 @@ module Dor
|
|
120
86
|
# @param [String] version
|
121
87
|
def archive_one_datastream(repository, druid, datastream, version)
|
122
88
|
criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
|
123
|
-
connect_to_db
|
124
89
|
archive_rows criteria
|
125
|
-
ensure
|
126
|
-
@conn.logoff if(@conn)
|
127
90
|
end
|
128
91
|
|
129
92
|
# Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
|
130
93
|
# Both operations must complete, or they get rolled back
|
131
94
|
# @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
|
132
95
|
def archive_rows(objs)
|
133
|
-
|
96
|
+
objs.each do |obj|
|
134
97
|
tries = 0
|
135
98
|
begin
|
136
99
|
tries += 1
|
@@ -138,11 +101,8 @@ module Dor
|
|
138
101
|
@archived += 1
|
139
102
|
rescue => e
|
140
103
|
LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
|
141
|
-
|
142
|
-
|
143
|
-
# Retry this druid up to 3 times
|
144
|
-
if tries < 3
|
145
|
-
LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
|
104
|
+
if tries < 3 # Retry this druid up to 3 times
|
105
|
+
LyberCore::Log.error " Retrying archive operation in #{@retry_delay} seconds..."
|
146
106
|
sleep @retry_delay
|
147
107
|
retry
|
148
108
|
end
|
@@ -150,55 +110,55 @@ module Dor
|
|
150
110
|
|
151
111
|
@errors += 1
|
152
112
|
if @errors >= 3
|
153
|
-
LyberCore::Log.fatal(
|
113
|
+
LyberCore::Log.fatal('Too many errors. Archiving halted')
|
154
114
|
break
|
155
115
|
end
|
156
116
|
end
|
157
|
-
|
158
117
|
end # druids.each
|
159
118
|
end
|
160
119
|
|
161
120
|
# @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
|
162
121
|
def do_one_archive(workflow_info)
|
163
122
|
LyberCore::Log.info "Archiving #{workflow_info.inspect}"
|
164
|
-
|
165
|
-
|
166
|
-
copy_sql =<<-EOSQL
|
123
|
+
copy_sql = <<-EOSQL
|
167
124
|
insert into #{@workflow_archive_table} (
|
168
|
-
#{wf_column_string}
|
169
|
-
|
125
|
+
#{wf_column_string},
|
126
|
+
version
|
170
127
|
)
|
171
128
|
select
|
172
|
-
#{wf_archive_column_string}
|
173
|
-
#{workflow_info.version} as
|
174
|
-
from #{@workflow_table}
|
175
|
-
where
|
176
|
-
and
|
129
|
+
#{wf_archive_column_string},
|
130
|
+
#{workflow_info.version} as version
|
131
|
+
from #{@workflow_table}
|
132
|
+
where #{@workflow_table}.druid = :druid
|
133
|
+
and #{@workflow_table}.datastream = :datastream
|
177
134
|
EOSQL
|
178
135
|
|
179
|
-
delete_sql = "delete #{@workflow_table} where druid = :
|
136
|
+
delete_sql = "delete from #{@workflow_table} where druid = :druid and datastream = :datastream "
|
180
137
|
|
181
138
|
if(workflow_info.repository)
|
182
|
-
copy_sql
|
183
|
-
delete_sql
|
139
|
+
copy_sql += "and #{@workflow_table}.repository = :repository"
|
140
|
+
delete_sql += 'and repository = :repository'
|
184
141
|
else
|
185
|
-
copy_sql
|
186
|
-
delete_sql
|
142
|
+
copy_sql += "and #{@workflow_table}.repository IS NULL"
|
143
|
+
delete_sql += 'and repository IS NULL'
|
187
144
|
end
|
188
145
|
|
189
|
-
|
146
|
+
conn.transaction do
|
147
|
+
conn.run Sequel::SQL::PlaceholderLiteralString.new(copy_sql, workflow_info.to_bind_hash)
|
190
148
|
|
191
|
-
|
192
|
-
bind_and_exec_sql(delete_sql, workflow_info)
|
149
|
+
LyberCore::Log.debug ' Removing old workflow rows'
|
193
150
|
|
194
|
-
|
151
|
+
conn.run Sequel::SQL::PlaceholderLiteralString.new(delete_sql, workflow_info.to_bind_hash)
|
152
|
+
end
|
195
153
|
end
|
196
154
|
|
197
155
|
# Finds objects where all workflow steps are complete
|
198
|
-
#
|
199
|
-
#
|
156
|
+
# @return [Array<Hash{String=>String}>] each hash returned has the following keys:
|
157
|
+
# {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
|
200
158
|
def find_completed_objects
|
201
|
-
|
159
|
+
return to_enum(:find_completed_objects) unless block_given?
|
160
|
+
|
161
|
+
completed_query = <<-EOSQL
|
202
162
|
select distinct repository, datastream, druid
|
203
163
|
from workflow w1
|
204
164
|
where w1.status in ('completed', 'skipped')
|
@@ -213,18 +173,15 @@ module Dor
|
|
213
173
|
)
|
214
174
|
EOSQL
|
215
175
|
|
216
|
-
|
217
|
-
|
218
|
-
while r = cursor.fetch_hash
|
219
|
-
rows << r
|
176
|
+
conn.fetch(completed_query) do |row|
|
177
|
+
yield row
|
220
178
|
end
|
221
|
-
rows
|
222
179
|
end
|
223
180
|
|
224
181
|
# @param [Array<Hash>] rows result from #find_completed_objects
|
225
182
|
# @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
|
226
183
|
def map_result_to_criteria(rows)
|
227
|
-
|
184
|
+
rows.lazy.map do |r|
|
228
185
|
begin
|
229
186
|
ArchiveCriteria.new.setup_from_query(r)
|
230
187
|
rescue => e
|
@@ -232,47 +189,22 @@ module Dor
|
|
232
189
|
LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
|
233
190
|
nil
|
234
191
|
end
|
235
|
-
end
|
236
|
-
criteria.reject {|c| c.nil?}
|
237
|
-
end
|
238
|
-
|
239
|
-
def simple_sql_exec(sql)
|
240
|
-
@conn.exec(sql)
|
241
|
-
rescue Exception => e
|
242
|
-
LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
|
243
|
-
end
|
244
|
-
|
245
|
-
def with_indexing_disabled(&block)
|
246
|
-
simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
|
247
|
-
simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
|
248
|
-
yield
|
249
|
-
ensure
|
250
|
-
simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
|
251
|
-
simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
|
192
|
+
end.reject { |r| r.nil? }
|
252
193
|
end
|
253
194
|
|
254
195
|
# Does the work of finding completed objects and archiving the rows
|
255
196
|
def archive
|
256
|
-
connect_to_db
|
257
197
|
objs = find_completed_objects
|
258
198
|
|
259
|
-
if objs.
|
260
|
-
LyberCore::Log.info
|
199
|
+
if objs.none?
|
200
|
+
LyberCore::Log.info 'Nothing to archive'
|
261
201
|
exit true
|
262
202
|
end
|
263
|
-
|
264
|
-
LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
|
265
|
-
objs = objs.first(600) # FIXME: temporarily limit objs processed until we fix cron not to fire if job still running
|
266
|
-
|
203
|
+
LyberCore::Log.info "Found #{objs.size} completed workflows"
|
267
204
|
archiving_criteria = map_result_to_criteria(objs)
|
268
|
-
|
205
|
+
archive_rows(archiving_criteria)
|
269
206
|
|
270
|
-
LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if
|
271
|
-
ensure
|
272
|
-
@conn.logoff
|
273
|
-
destroy_pool
|
207
|
+
LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if @errors < 3
|
274
208
|
end
|
275
|
-
|
276
209
|
end
|
277
|
-
|
278
210
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'dor/workflow_archiver'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow-archiver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Willy Mene
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lyber-core
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: sequel
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -95,21 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: active-fedora
|
98
|
+
name: sqlite3
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
114
100
|
requirements:
|
115
101
|
- - ">="
|
@@ -132,6 +118,7 @@ files:
|
|
132
118
|
- VERSION
|
133
119
|
- lib/dor/archiver_version.rb
|
134
120
|
- lib/dor/workflow_archiver.rb
|
121
|
+
- lib/workflow-archiver.rb
|
135
122
|
homepage:
|
136
123
|
licenses: []
|
137
124
|
metadata: {}
|
@@ -151,7 +138,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
138
|
version: 1.3.6
|
152
139
|
requirements: []
|
153
140
|
rubyforge_project:
|
154
|
-
rubygems_version: 2.
|
141
|
+
rubygems_version: 2.4.5.1
|
155
142
|
signing_key:
|
156
143
|
specification_version: 4
|
157
144
|
summary: Enables archiving of DOR workflows
|