workflow-archiver 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/VERSION +2 -0
- data/lib/dor/archiver_version.rb +11 -0
- data/lib/dor/workflow_archiver.rb +274 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGY0MWY1NjlmNmVlMzI3ZmRiMzg5ZThkNzhjMzFlYzQyMzdlZTVmOQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MGNlY2I2OTlhNGYzNWRhODQ4N2I2YmEwMzc4Y2NjMTkzMzViNzg4Zg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NDkyYjQ5ZmFmOTA2ZDEyMmNjOTU4Njk0ZTJkYTkxNzllMjIxMmIxNmUxZjY4
|
10
|
+
NmI5YTQ0OTQ2ZWZhMjhkY2MxZTZiMWYwZjRlYzEzYTBmOWEyNzY5OWE3N2My
|
11
|
+
NjQ4ZTI4ODllODM4OWQ4YWM5NDk4OGVhOGJkZDljMDliZGNlMzM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZDkzMjY1YzdmNjE4NDAwMTQ1MGE1NzU0NGMxOGQ2NWJmOTA1MGI5MDhiYjRh
|
14
|
+
MzY4YTlmZTRmOTBkMTQ1YjI4OTcxZTA4NzA5NjVjMzU1ZWJiNGQ3ODk0ZTM1
|
15
|
+
Mzk3OTIxMzA0ZTJiNWExYzkyODgwMGQ1YjViMmYzNGJjNGVmNzY=
|
data/VERSION
ADDED
@@ -0,0 +1,274 @@
|
|
1
|
+
require 'rest_client'
|
2
|
+
require 'oci8'
|
3
|
+
|
4
|
+
module Dor
|
5
|
+
|
6
|
+
# Holds the paramaters about the workflow rows that need to be deleted
|
7
|
+
ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
|
8
|
+
# @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
|
9
|
+
# "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
|
10
|
+
def setup_from_query(row_hash)
|
11
|
+
self.repository = row_hash["REPOSITORY"]
|
12
|
+
self.druid = row_hash["DRUID"]
|
13
|
+
self.datastream = row_hash["DATASTREAM"]
|
14
|
+
set_current_version
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
# Removes version from list of members, then picks out non nil members and builds a hash of column_name => column_value
|
19
|
+
# @return [Hash] Maps column names (in ALL caps) to non-nil column values
|
20
|
+
def to_bind_hash
|
21
|
+
h = {}
|
22
|
+
members.reject{|mem| mem =~ /version/}.each do |m|
|
23
|
+
h[m.swapcase] = self.send(m) if(self.send(m))
|
24
|
+
end
|
25
|
+
h
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_current_version
|
29
|
+
begin
|
30
|
+
self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{self.druid}/versions/current"
|
31
|
+
rescue RestClient::InternalServerError => ise
|
32
|
+
raise unless(ise.inspect =~ /Unable to find.*in fedora/)
|
33
|
+
LyberCore::Log.warn "#{ise.inspect}"
|
34
|
+
LyberCore::Log.warn "Moving workflow rows with version set to '1'"
|
35
|
+
self.version = '1'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class WorkflowArchiver
|
41
|
+
WF_COLUMNS = %w(ID DRUID DATASTREAM PROCESS STATUS ERROR_MSG ERROR_TXT DATETIME ATTEMPTS LIFECYCLE ELAPSED REPOSITORY NOTE PRIORITY)
|
42
|
+
|
43
|
+
# These attributes mostly used for testing
|
44
|
+
attr_reader :conn, :errors
|
45
|
+
|
46
|
+
def WorkflowArchiver.config
|
47
|
+
@@conf ||= Confstruct::Configuration.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# Sets up logging and connects to the database. By default it reads values from constants:
|
51
|
+
# WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
|
52
|
+
# @param [Hash] opts Options to override database parameters
|
53
|
+
# @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
|
54
|
+
# @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
|
55
|
+
# @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
|
56
|
+
# @option opts [String] :wf_table ('workflow') Name of the active workflow table
|
57
|
+
# @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
|
58
|
+
# @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
|
59
|
+
# @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
|
60
|
+
def initialize(opts={})
|
61
|
+
@login = (opts.include?(:login) ? opts[:login] : WorkflowArchiver.config.db_login)
|
62
|
+
@password = (opts.include?(:password) ? opts[:password] : WorkflowArchiver.config.db_password)
|
63
|
+
@db_uri = (opts.include?(:db_uri) ? opts[:db_uri] : WorkflowArchiver.config.db_uri)
|
64
|
+
@dor_service_uri = (opts.include?(:dor_service_uri) ? opts[:dor_service_uri] : WorkflowArchiver.config.dor_service_uri)
|
65
|
+
@workflow_table = (opts.include?(:wf_table) ? opts[:wf_table] : "workflow")
|
66
|
+
@workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
|
67
|
+
@retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
|
68
|
+
|
69
|
+
# initialize some counters
|
70
|
+
@errors = 0
|
71
|
+
@archived = 0
|
72
|
+
end
|
73
|
+
|
74
|
+
def connect_to_db
|
75
|
+
$odb_pool ||= OCI8::ConnectionPool.new(1, 5, 2, @login, @password, @db_uri)
|
76
|
+
@conn = OCI8.new(@login, @password, $odb_pool)
|
77
|
+
@conn.autocommit = false
|
78
|
+
end
|
79
|
+
|
80
|
+
def destroy_pool
|
81
|
+
$odb_pool.destroy if($odb_pool)
|
82
|
+
end
|
83
|
+
|
84
|
+
def bind_and_exec_sql(sql, workflow_info)
|
85
|
+
# LyberCore::Log.debug("Executing: #{sql}")
|
86
|
+
cursor = @conn.parse(sql)
|
87
|
+
|
88
|
+
workflow_info.to_bind_hash.each do |k, v|
|
89
|
+
param = ":#{k}"
|
90
|
+
#LyberCore::Log.debug("Setting: #{param} #{v}")
|
91
|
+
cursor.bind_param(param, v)
|
92
|
+
end
|
93
|
+
|
94
|
+
num_rows = cursor.exec
|
95
|
+
unless num_rows > 0
|
96
|
+
raise "Expected more than 0 rows to be updated"
|
97
|
+
end
|
98
|
+
ensure
|
99
|
+
cursor.close
|
100
|
+
end
|
101
|
+
|
102
|
+
# @return String The columns appended with comma and newline
|
103
|
+
def wf_column_string
|
104
|
+
WF_COLUMNS.inject('') { |str, col| str << col << ",\n"}
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return String The columns prepended with 'w.' and appended with comma and newline
|
108
|
+
def wf_archive_column_string
|
109
|
+
WF_COLUMNS.inject('') { |str, col| str << 'w.' << col << ",\n"}
|
110
|
+
end
|
111
|
+
|
112
|
+
# Use this as a one-shot method to archive all the steps of an object's particular datastream
|
113
|
+
# It will connect to the database, archive the rows, then logoff. Assumes caller will set version (like the Dor REST service)
|
114
|
+
# @note Caller of this method must handle destroying of the connection pool
|
115
|
+
# @param [String] repository
|
116
|
+
# @param [String] druid
|
117
|
+
# @param [String] datastream
|
118
|
+
# @param [String] version
|
119
|
+
def archive_one_datastream(repository, druid, datastream, version)
|
120
|
+
criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
|
121
|
+
connect_to_db
|
122
|
+
archive_rows criteria
|
123
|
+
ensure
|
124
|
+
@conn.logoff if(@conn)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
|
128
|
+
# Both operations must complete, or they get rolled back
|
129
|
+
# @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
|
130
|
+
def archive_rows(objs)
|
131
|
+
Array(objs).each do |obj|
|
132
|
+
tries = 0
|
133
|
+
begin
|
134
|
+
tries += 1
|
135
|
+
do_one_archive(obj)
|
136
|
+
@archived += 1
|
137
|
+
rescue => e
|
138
|
+
LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
|
139
|
+
@conn.rollback
|
140
|
+
|
141
|
+
# Retry this druid up to 3 times
|
142
|
+
if tries < 3
|
143
|
+
LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
|
144
|
+
sleep @retry_delay
|
145
|
+
retry
|
146
|
+
end
|
147
|
+
LyberCore::Log.error " Too many retries. Giving up on #{obj.inspect}"
|
148
|
+
|
149
|
+
@errors += 1
|
150
|
+
if @errors >= 3
|
151
|
+
LyberCore::Log.fatal("Too many errors. Archiving halted")
|
152
|
+
break
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end # druids.each
|
157
|
+
end
|
158
|
+
|
159
|
+
# @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
|
160
|
+
def do_one_archive(workflow_info)
|
161
|
+
LyberCore::Log.info "Archiving #{workflow_info.inspect}"
|
162
|
+
|
163
|
+
|
164
|
+
copy_sql =<<-EOSQL
|
165
|
+
insert into #{@workflow_archive_table} (
|
166
|
+
#{wf_column_string}
|
167
|
+
VERSION
|
168
|
+
)
|
169
|
+
select
|
170
|
+
#{wf_archive_column_string}
|
171
|
+
#{workflow_info.version} as VERSION
|
172
|
+
from #{@workflow_table} w
|
173
|
+
where w.druid = :DRUID
|
174
|
+
and w.datastream = :DATASTREAM
|
175
|
+
EOSQL
|
176
|
+
|
177
|
+
delete_sql = "delete #{@workflow_table} where druid = :DRUID and datastream = :DATASTREAM "
|
178
|
+
|
179
|
+
if(workflow_info.repository)
|
180
|
+
copy_sql << "and w.repository = :REPOSITORY"
|
181
|
+
delete_sql << "and repository = :REPOSITORY"
|
182
|
+
else
|
183
|
+
copy_sql << "and w.repository IS NULL"
|
184
|
+
delete_sql << "and repository IS NULL"
|
185
|
+
end
|
186
|
+
|
187
|
+
bind_and_exec_sql(copy_sql, workflow_info)
|
188
|
+
|
189
|
+
LyberCore::Log.debug " Removing old workflow rows"
|
190
|
+
bind_and_exec_sql(delete_sql, workflow_info)
|
191
|
+
|
192
|
+
@conn.commit
|
193
|
+
end
|
194
|
+
|
195
|
+
# Finds objects where all workflow steps are complete
|
196
|
+
# Returns an array of hashes, each hash having the following keys:
|
197
|
+
# {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
|
198
|
+
def find_completed_objects
|
199
|
+
completed_query =<<-EOSQL
|
200
|
+
select distinct repository, datastream, druid
|
201
|
+
from workflow w1
|
202
|
+
where w1.status in ('completed', 'skipped')
|
203
|
+
and not exists
|
204
|
+
(
|
205
|
+
select *
|
206
|
+
from workflow w2
|
207
|
+
where w1.repository = w2.repository
|
208
|
+
and w1.datastream = w2.datastream
|
209
|
+
and w1.druid = w2.druid
|
210
|
+
and w2.status not in ('completed', 'skipped')
|
211
|
+
)
|
212
|
+
EOSQL
|
213
|
+
|
214
|
+
rows = []
|
215
|
+
cursor = @conn.exec(completed_query)
|
216
|
+
while r = cursor.fetch_hash
|
217
|
+
rows << r
|
218
|
+
end
|
219
|
+
rows
|
220
|
+
end
|
221
|
+
|
222
|
+
# @param [Array<Hash>] rows result from #find_completed_objects
|
223
|
+
# @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
|
224
|
+
def map_result_to_criteria(rows)
|
225
|
+
criteria = rows.map do |r|
|
226
|
+
begin
|
227
|
+
ArchiveCriteria.new.setup_from_query(r)
|
228
|
+
rescue => e
|
229
|
+
LyberCore::Log.error("Skipping archiving of #{r['DRUID']}")
|
230
|
+
LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
|
231
|
+
nil
|
232
|
+
end
|
233
|
+
end
|
234
|
+
criteria.reject {|c| c.nil?}
|
235
|
+
end
|
236
|
+
|
237
|
+
def simple_sql_exec(sql)
|
238
|
+
@conn.exec(sql)
|
239
|
+
rescue Exception => e
|
240
|
+
LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
|
241
|
+
end
|
242
|
+
|
243
|
+
def with_indexing_disabled(&block)
|
244
|
+
simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
|
245
|
+
simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
|
246
|
+
yield
|
247
|
+
ensure
|
248
|
+
simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
|
249
|
+
simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
|
250
|
+
end
|
251
|
+
|
252
|
+
# Does the work of finding completed objects and archiving the rows
|
253
|
+
def archive
|
254
|
+
objs = find_completed_objects
|
255
|
+
|
256
|
+
if objs.size == 0
|
257
|
+
LyberCore::Log.info "Nothing to archive"
|
258
|
+
exit true
|
259
|
+
end
|
260
|
+
|
261
|
+
LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
|
262
|
+
|
263
|
+
archiving_criteria = map_result_to_criteria(objs)
|
264
|
+
with_indexing_disabled { archive_rows(archiving_criteria) }
|
265
|
+
|
266
|
+
LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if(@errors < 3 )
|
267
|
+
ensure
|
268
|
+
@conn.logoff
|
269
|
+
destroy_pool
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: workflow-archiver
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.2.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Willy Mene
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-07-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: lyber-core
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rest-client
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ruby-oci8
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: active-fedora
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Can be used standalone or used as a library
|
98
|
+
email:
|
99
|
+
- wmene@stanford.edu
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- VERSION
|
105
|
+
- lib/dor/archiver_version.rb
|
106
|
+
- lib/dor/workflow_archiver.rb
|
107
|
+
homepage:
|
108
|
+
licenses: []
|
109
|
+
metadata: {}
|
110
|
+
post_install_message:
|
111
|
+
rdoc_options: []
|
112
|
+
require_paths:
|
113
|
+
- lib
|
114
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ! '>='
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ! '>='
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: 1.3.6
|
124
|
+
requirements: []
|
125
|
+
rubyforge_project:
|
126
|
+
rubygems_version: 2.2.2
|
127
|
+
signing_key:
|
128
|
+
specification_version: 4
|
129
|
+
summary: Enables archiving of DOR workflows
|
130
|
+
test_files: []
|