workflow-archiver 1.3.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/dor/archiver_version.rb +2 -5
- data/lib/dor/workflow_archiver.rb +62 -130
- data/lib/workflow-archiver.rb +1 -0
- metadata +6 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e07be8143cac430dfa7166ad246e81d64c85558
|
4
|
+
data.tar.gz: 23612c1949726b163ad89534727679fbb6da3968
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 779182bd0fd4cdabb6c4480aa7ceb5aa102bfe33c60a00317ea2b98ec4d8e62e2caef69c4cdc784ccd5cea3e91960fe2d077b076bccdb47c02c73af796755fd6
|
7
|
+
data.tar.gz: 679c197f47b3c409a0f61f3350eae177bfc230e9ae9b9723b090a328f2a87be711c4b26cafbb7680ce280f2b42a45d1e4ed708db48be7f1e1f4aaed9641ac0c0
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.0
|
data/lib/dor/archiver_version.rb
CHANGED
@@ -1,18 +1,16 @@
|
|
1
1
|
require 'rest_client'
|
2
2
|
require 'confstruct'
|
3
3
|
require 'lyber_core'
|
4
|
-
require '
|
4
|
+
require 'sequel'
|
5
5
|
|
6
6
|
module Dor
|
7
|
-
|
8
7
|
# Holds the paramaters about the workflow rows that need to be deleted
|
9
8
|
ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
|
10
9
|
# @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
|
11
|
-
# "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
|
12
10
|
def setup_from_query(row_hash)
|
13
|
-
self.repository = row_hash[
|
14
|
-
self.druid = row_hash[
|
15
|
-
self.datastream = row_hash[
|
11
|
+
self.repository = row_hash[:repository]
|
12
|
+
self.druid = row_hash[:druid]
|
13
|
+
self.datastream = row_hash[:datastream]
|
16
14
|
set_current_version
|
17
15
|
self
|
18
16
|
end
|
@@ -21,29 +19,27 @@ module Dor
|
|
21
19
|
# @return [Hash] Maps column names (in ALL caps) to non-nil column values
|
22
20
|
def to_bind_hash
|
23
21
|
h = {}
|
24
|
-
members.reject{|mem| mem =~ /version/}.each do |m|
|
25
|
-
h[m
|
22
|
+
members.reject { |mem| mem =~ /version/ }.each do |m|
|
23
|
+
h[m] = send(m) if send(m)
|
26
24
|
end
|
27
25
|
h
|
28
26
|
end
|
29
27
|
|
30
28
|
def set_current_version
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
self.version = '1'
|
38
|
-
end
|
29
|
+
self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{druid}/versions/current"
|
30
|
+
rescue RestClient::InternalServerError => ise
|
31
|
+
raise unless ise.inspect =~ /Unable to find.*in fedora/
|
32
|
+
LyberCore::Log.warn ise.inspect.to_s
|
33
|
+
LyberCore::Log.warn "Moving workflow rows with version set to '1'"
|
34
|
+
self.version = '1'
|
39
35
|
end
|
40
36
|
end
|
41
37
|
|
42
38
|
class WorkflowArchiver
|
43
|
-
WF_COLUMNS = %w(
|
39
|
+
WF_COLUMNS = %w(id druid datastream process status error_msg error_txt datetime attempts lifecycle elapsed repository note priority lane_id)
|
44
40
|
|
45
41
|
# These attributes mostly used for testing
|
46
|
-
attr_reader :
|
42
|
+
attr_reader :errors
|
47
43
|
|
48
44
|
def self.config
|
49
45
|
@@conf ||= Confstruct::Configuration.new
|
@@ -52,63 +48,33 @@ module Dor
|
|
52
48
|
# Sets up logging and connects to the database. By default it reads values from constants:
|
53
49
|
# WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
|
54
50
|
# @param [Hash] opts Options to override database parameters
|
55
|
-
# @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
|
56
|
-
# @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
|
57
51
|
# @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
|
58
52
|
# @option opts [String] :wf_table ('workflow') Name of the active workflow table
|
59
53
|
# @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
|
60
|
-
# @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
|
61
54
|
# @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
|
62
|
-
def initialize(opts={})
|
63
|
-
@
|
64
|
-
@
|
65
|
-
@
|
66
|
-
@
|
67
|
-
@
|
68
|
-
@workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
|
69
|
-
@retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
|
70
|
-
|
55
|
+
def initialize(opts = {})
|
56
|
+
@conn = opts[:db_connection]
|
57
|
+
@db_uri = opts.fetch(:db_uri, WorkflowArchiver.config.db_uri).freeze
|
58
|
+
@workflow_table = opts.include?(:wf_table) ? opts[:wf_table] : 'workflow'
|
59
|
+
@workflow_archive_table = opts.include?(:wfa_table) ? opts[:wfa_table] : 'workflow_archive'
|
60
|
+
@retry_delay = opts.include?(:retry_delay) ? opts[:retry_delay] : 5
|
71
61
|
# initialize some counters
|
72
62
|
@errors = 0
|
73
63
|
@archived = 0
|
74
64
|
end
|
75
65
|
|
76
|
-
def
|
77
|
-
|
78
|
-
@conn = OCI8.new(@login, @password, $odb_pool)
|
79
|
-
@conn.autocommit = false
|
66
|
+
def conn
|
67
|
+
@conn ||= Sequel.connect(@db_uri)
|
80
68
|
end
|
81
69
|
|
82
|
-
|
83
|
-
$odb_pool.destroy if($odb_pool)
|
84
|
-
end
|
85
|
-
|
86
|
-
def bind_and_exec_sql(sql, workflow_info)
|
87
|
-
# LyberCore::Log.debug("Executing: #{sql}")
|
88
|
-
cursor = @conn.parse(sql)
|
89
|
-
|
90
|
-
workflow_info.to_bind_hash.each do |k, v|
|
91
|
-
param = ":#{k}"
|
92
|
-
#LyberCore::Log.debug("Setting: #{param} #{v}")
|
93
|
-
cursor.bind_param(param, v)
|
94
|
-
end
|
95
|
-
|
96
|
-
num_rows = cursor.exec
|
97
|
-
unless num_rows > 0
|
98
|
-
raise "Expected more than 0 rows to be updated"
|
99
|
-
end
|
100
|
-
ensure
|
101
|
-
cursor.close
|
102
|
-
end
|
103
|
-
|
104
|
-
# @return String The columns appended with comma and newline
|
70
|
+
# @return [String] The columns appended with comma and newline
|
105
71
|
def wf_column_string
|
106
|
-
WF_COLUMNS.
|
72
|
+
WF_COLUMNS.join(",\n")
|
107
73
|
end
|
108
74
|
|
109
|
-
# @return String The columns prepended with 'w.' and appended with comma and newline
|
75
|
+
# @return [String] The columns prepended with 'w.' and appended with comma and newline
|
110
76
|
def wf_archive_column_string
|
111
|
-
WF_COLUMNS.
|
77
|
+
WF_COLUMNS.map { |col| "#{@workflow_table}.#{col}" }.join(",\n")
|
112
78
|
end
|
113
79
|
|
114
80
|
# Use this as a one-shot method to archive all the steps of an object's particular datastream
|
@@ -120,17 +86,14 @@ module Dor
|
|
120
86
|
# @param [String] version
|
121
87
|
def archive_one_datastream(repository, druid, datastream, version)
|
122
88
|
criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
|
123
|
-
connect_to_db
|
124
89
|
archive_rows criteria
|
125
|
-
ensure
|
126
|
-
@conn.logoff if(@conn)
|
127
90
|
end
|
128
91
|
|
129
92
|
# Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
|
130
93
|
# Both operations must complete, or they get rolled back
|
131
94
|
# @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
|
132
95
|
def archive_rows(objs)
|
133
|
-
|
96
|
+
objs.each do |obj|
|
134
97
|
tries = 0
|
135
98
|
begin
|
136
99
|
tries += 1
|
@@ -138,11 +101,8 @@ module Dor
|
|
138
101
|
@archived += 1
|
139
102
|
rescue => e
|
140
103
|
LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
|
141
|
-
|
142
|
-
|
143
|
-
# Retry this druid up to 3 times
|
144
|
-
if tries < 3
|
145
|
-
LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
|
104
|
+
if tries < 3 # Retry this druid up to 3 times
|
105
|
+
LyberCore::Log.error " Retrying archive operation in #{@retry_delay} seconds..."
|
146
106
|
sleep @retry_delay
|
147
107
|
retry
|
148
108
|
end
|
@@ -150,55 +110,55 @@ module Dor
|
|
150
110
|
|
151
111
|
@errors += 1
|
152
112
|
if @errors >= 3
|
153
|
-
LyberCore::Log.fatal(
|
113
|
+
LyberCore::Log.fatal('Too many errors. Archiving halted')
|
154
114
|
break
|
155
115
|
end
|
156
116
|
end
|
157
|
-
|
158
117
|
end # druids.each
|
159
118
|
end
|
160
119
|
|
161
120
|
# @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
|
162
121
|
def do_one_archive(workflow_info)
|
163
122
|
LyberCore::Log.info "Archiving #{workflow_info.inspect}"
|
164
|
-
|
165
|
-
|
166
|
-
copy_sql =<<-EOSQL
|
123
|
+
copy_sql = <<-EOSQL
|
167
124
|
insert into #{@workflow_archive_table} (
|
168
|
-
#{wf_column_string}
|
169
|
-
|
125
|
+
#{wf_column_string},
|
126
|
+
version
|
170
127
|
)
|
171
128
|
select
|
172
|
-
#{wf_archive_column_string}
|
173
|
-
#{workflow_info.version} as
|
174
|
-
from #{@workflow_table}
|
175
|
-
where
|
176
|
-
and
|
129
|
+
#{wf_archive_column_string},
|
130
|
+
#{workflow_info.version} as version
|
131
|
+
from #{@workflow_table}
|
132
|
+
where #{@workflow_table}.druid = :druid
|
133
|
+
and #{@workflow_table}.datastream = :datastream
|
177
134
|
EOSQL
|
178
135
|
|
179
|
-
delete_sql = "delete #{@workflow_table} where druid = :
|
136
|
+
delete_sql = "delete from #{@workflow_table} where druid = :druid and datastream = :datastream "
|
180
137
|
|
181
138
|
if(workflow_info.repository)
|
182
|
-
copy_sql
|
183
|
-
delete_sql
|
139
|
+
copy_sql += "and #{@workflow_table}.repository = :repository"
|
140
|
+
delete_sql += 'and repository = :repository'
|
184
141
|
else
|
185
|
-
copy_sql
|
186
|
-
delete_sql
|
142
|
+
copy_sql += "and #{@workflow_table}.repository IS NULL"
|
143
|
+
delete_sql += 'and repository IS NULL'
|
187
144
|
end
|
188
145
|
|
189
|
-
|
146
|
+
conn.transaction do
|
147
|
+
conn.run Sequel::SQL::PlaceholderLiteralString.new(copy_sql, workflow_info.to_bind_hash)
|
190
148
|
|
191
|
-
|
192
|
-
bind_and_exec_sql(delete_sql, workflow_info)
|
149
|
+
LyberCore::Log.debug ' Removing old workflow rows'
|
193
150
|
|
194
|
-
|
151
|
+
conn.run Sequel::SQL::PlaceholderLiteralString.new(delete_sql, workflow_info.to_bind_hash)
|
152
|
+
end
|
195
153
|
end
|
196
154
|
|
197
155
|
# Finds objects where all workflow steps are complete
|
198
|
-
#
|
199
|
-
#
|
156
|
+
# @return [Array<Hash{String=>String}>] each hash returned has the following keys:
|
157
|
+
# {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
|
200
158
|
def find_completed_objects
|
201
|
-
|
159
|
+
return to_enum(:find_completed_objects) unless block_given?
|
160
|
+
|
161
|
+
completed_query = <<-EOSQL
|
202
162
|
select distinct repository, datastream, druid
|
203
163
|
from workflow w1
|
204
164
|
where w1.status in ('completed', 'skipped')
|
@@ -213,18 +173,15 @@ module Dor
|
|
213
173
|
)
|
214
174
|
EOSQL
|
215
175
|
|
216
|
-
|
217
|
-
|
218
|
-
while r = cursor.fetch_hash
|
219
|
-
rows << r
|
176
|
+
conn.fetch(completed_query) do |row|
|
177
|
+
yield row
|
220
178
|
end
|
221
|
-
rows
|
222
179
|
end
|
223
180
|
|
224
181
|
# @param [Array<Hash>] rows result from #find_completed_objects
|
225
182
|
# @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
|
226
183
|
def map_result_to_criteria(rows)
|
227
|
-
|
184
|
+
rows.lazy.map do |r|
|
228
185
|
begin
|
229
186
|
ArchiveCriteria.new.setup_from_query(r)
|
230
187
|
rescue => e
|
@@ -232,47 +189,22 @@ module Dor
|
|
232
189
|
LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
|
233
190
|
nil
|
234
191
|
end
|
235
|
-
end
|
236
|
-
criteria.reject {|c| c.nil?}
|
237
|
-
end
|
238
|
-
|
239
|
-
def simple_sql_exec(sql)
|
240
|
-
@conn.exec(sql)
|
241
|
-
rescue Exception => e
|
242
|
-
LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
|
243
|
-
end
|
244
|
-
|
245
|
-
def with_indexing_disabled(&block)
|
246
|
-
simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
|
247
|
-
simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
|
248
|
-
yield
|
249
|
-
ensure
|
250
|
-
simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
|
251
|
-
simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
|
192
|
+
end.reject { |r| r.nil? }
|
252
193
|
end
|
253
194
|
|
254
195
|
# Does the work of finding completed objects and archiving the rows
|
255
196
|
def archive
|
256
|
-
connect_to_db
|
257
197
|
objs = find_completed_objects
|
258
198
|
|
259
|
-
if objs.
|
260
|
-
LyberCore::Log.info
|
199
|
+
if objs.none?
|
200
|
+
LyberCore::Log.info 'Nothing to archive'
|
261
201
|
exit true
|
262
202
|
end
|
263
|
-
|
264
|
-
LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
|
265
|
-
objs = objs.first(600) # FIXME: temporarily limit objs processed until we fix cron not to fire if job still running
|
266
|
-
|
203
|
+
LyberCore::Log.info "Found #{objs.size} completed workflows"
|
267
204
|
archiving_criteria = map_result_to_criteria(objs)
|
268
|
-
|
205
|
+
archive_rows(archiving_criteria)
|
269
206
|
|
270
|
-
LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if
|
271
|
-
ensure
|
272
|
-
@conn.logoff
|
273
|
-
destroy_pool
|
207
|
+
LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if @errors < 3
|
274
208
|
end
|
275
|
-
|
276
209
|
end
|
277
|
-
|
278
210
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'dor/workflow_archiver'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow-archiver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Willy Mene
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lyber-core
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: sequel
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -95,21 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: active-fedora
|
98
|
+
name: sqlite3
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
114
100
|
requirements:
|
115
101
|
- - ">="
|
@@ -132,6 +118,7 @@ files:
|
|
132
118
|
- VERSION
|
133
119
|
- lib/dor/archiver_version.rb
|
134
120
|
- lib/dor/workflow_archiver.rb
|
121
|
+
- lib/workflow-archiver.rb
|
135
122
|
homepage:
|
136
123
|
licenses: []
|
137
124
|
metadata: {}
|
@@ -151,7 +138,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
138
|
version: 1.3.6
|
152
139
|
requirements: []
|
153
140
|
rubyforge_project:
|
154
|
-
rubygems_version: 2.
|
141
|
+
rubygems_version: 2.4.5.1
|
155
142
|
signing_key:
|
156
143
|
specification_version: 4
|
157
144
|
summary: Enables archiving of DOR workflows
|