workflow-archiver 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGY0MWY1NjlmNmVlMzI3ZmRiMzg5ZThkNzhjMzFlYzQyMzdlZTVmOQ==
5
+ data.tar.gz: !binary |-
6
+ MGNlY2I2OTlhNGYzNWRhODQ4N2I2YmEwMzc4Y2NjMTkzMzViNzg4Zg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NDkyYjQ5ZmFmOTA2ZDEyMmNjOTU4Njk0ZTJkYTkxNzllMjIxMmIxNmUxZjY4
10
+ NmI5YTQ0OTQ2ZWZhMjhkY2MxZTZiMWYwZjRlYzEzYTBmOWEyNzY5OWE3N2My
11
+ NjQ4ZTI4ODllODM4OWQ4YWM5NDk4OGVhOGJkZDljMDliZGNlMzM=
12
+ data.tar.gz: !binary |-
13
+ ZDkzMjY1YzdmNjE4NDAwMTQ1MGE1NzU0NGMxOGQ2NWJmOTA1MGI5MDhiYjRh
14
+ MzY4YTlmZTRmOTBkMTQ1YjI4OTcxZTA4NzA5NjVjMzU1ZWJiNGQ3ODk0ZTM1
15
+ Mzk3OTIxMzA0ZTJiNWExYzkyODgwMGQ1YjViMmYzNGJjNGVmNzY=
data/VERSION ADDED
@@ -0,0 +1,2 @@
1
+ 1.2.2
2
+
@@ -0,0 +1,11 @@
1
+ module Dor
2
+ unless Dor.const_defined? :ARCHIVER_VERSION
3
+ def self.archiver_version
4
+ @archiver_version ||= File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).chomp
5
+ end
6
+
7
+ ARCHIVER_VERSION = self.archiver_version
8
+ end
9
+
10
+
11
+ end
@@ -0,0 +1,274 @@
1
+ require 'rest_client'
2
+ require 'oci8'
3
+
4
+ module Dor
5
+
6
+ # Holds the paramaters about the workflow rows that need to be deleted
7
+ ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
8
+ # @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
9
+ # "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
10
+ def setup_from_query(row_hash)
11
+ self.repository = row_hash["REPOSITORY"]
12
+ self.druid = row_hash["DRUID"]
13
+ self.datastream = row_hash["DATASTREAM"]
14
+ set_current_version
15
+ self
16
+ end
17
+
18
+ # Removes version from list of members, then picks out non nil members and builds a hash of column_name => column_value
19
+ # @return [Hash] Maps column names (in ALL caps) to non-nil column values
20
+ def to_bind_hash
21
+ h = {}
22
+ members.reject{|mem| mem =~ /version/}.each do |m|
23
+ h[m.swapcase] = self.send(m) if(self.send(m))
24
+ end
25
+ h
26
+ end
27
+
28
+ def set_current_version
29
+ begin
30
+ self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{self.druid}/versions/current"
31
+ rescue RestClient::InternalServerError => ise
32
+ raise unless(ise.inspect =~ /Unable to find.*in fedora/)
33
+ LyberCore::Log.warn "#{ise.inspect}"
34
+ LyberCore::Log.warn "Moving workflow rows with version set to '1'"
35
+ self.version = '1'
36
+ end
37
+ end
38
+ end
39
+
40
+ class WorkflowArchiver
41
+ WF_COLUMNS = %w(ID DRUID DATASTREAM PROCESS STATUS ERROR_MSG ERROR_TXT DATETIME ATTEMPTS LIFECYCLE ELAPSED REPOSITORY NOTE PRIORITY)
42
+
43
+ # These attributes mostly used for testing
44
+ attr_reader :conn, :errors
45
+
46
+ def WorkflowArchiver.config
47
+ @@conf ||= Confstruct::Configuration.new
48
+ end
49
+
50
+ # Sets up logging and connects to the database. By default it reads values from constants:
51
+ # WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
52
+ # @param [Hash] opts Options to override database parameters
53
+ # @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
54
+ # @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
55
+ # @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
56
+ # @option opts [String] :wf_table ('workflow') Name of the active workflow table
57
+ # @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
58
+ # @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
59
+ # @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
60
+ def initialize(opts={})
61
+ @login = (opts.include?(:login) ? opts[:login] : WorkflowArchiver.config.db_login)
62
+ @password = (opts.include?(:password) ? opts[:password] : WorkflowArchiver.config.db_password)
63
+ @db_uri = (opts.include?(:db_uri) ? opts[:db_uri] : WorkflowArchiver.config.db_uri)
64
+ @dor_service_uri = (opts.include?(:dor_service_uri) ? opts[:dor_service_uri] : WorkflowArchiver.config.dor_service_uri)
65
+ @workflow_table = (opts.include?(:wf_table) ? opts[:wf_table] : "workflow")
66
+ @workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
67
+ @retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
68
+
69
+ # initialize some counters
70
+ @errors = 0
71
+ @archived = 0
72
+ end
73
+
74
+ def connect_to_db
75
+ $odb_pool ||= OCI8::ConnectionPool.new(1, 5, 2, @login, @password, @db_uri)
76
+ @conn = OCI8.new(@login, @password, $odb_pool)
77
+ @conn.autocommit = false
78
+ end
79
+
80
+ def destroy_pool
81
+ $odb_pool.destroy if($odb_pool)
82
+ end
83
+
84
+ def bind_and_exec_sql(sql, workflow_info)
85
+ # LyberCore::Log.debug("Executing: #{sql}")
86
+ cursor = @conn.parse(sql)
87
+
88
+ workflow_info.to_bind_hash.each do |k, v|
89
+ param = ":#{k}"
90
+ #LyberCore::Log.debug("Setting: #{param} #{v}")
91
+ cursor.bind_param(param, v)
92
+ end
93
+
94
+ num_rows = cursor.exec
95
+ unless num_rows > 0
96
+ raise "Expected more than 0 rows to be updated"
97
+ end
98
+ ensure
99
+ cursor.close
100
+ end
101
+
102
+ # @return String The columns appended with comma and newline
103
+ def wf_column_string
104
+ WF_COLUMNS.inject('') { |str, col| str << col << ",\n"}
105
+ end
106
+
107
+ # @return String The columns prepended with 'w.' and appended with comma and newline
108
+ def wf_archive_column_string
109
+ WF_COLUMNS.inject('') { |str, col| str << 'w.' << col << ",\n"}
110
+ end
111
+
112
+ # Use this as a one-shot method to archive all the steps of an object's particular datastream
113
+ # It will connect to the database, archive the rows, then logoff. Assumes caller will set version (like the Dor REST service)
114
+ # @note Caller of this method must handle destroying of the connection pool
115
+ # @param [String] repository
116
+ # @param [String] druid
117
+ # @param [String] datastream
118
+ # @param [String] version
119
+ def archive_one_datastream(repository, druid, datastream, version)
120
+ criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
121
+ connect_to_db
122
+ archive_rows criteria
123
+ ensure
124
+ @conn.logoff if(@conn)
125
+ end
126
+
127
+ # Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
128
+ # Both operations must complete, or they get rolled back
129
+ # @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
130
+ def archive_rows(objs)
131
+ Array(objs).each do |obj|
132
+ tries = 0
133
+ begin
134
+ tries += 1
135
+ do_one_archive(obj)
136
+ @archived += 1
137
+ rescue => e
138
+ LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
139
+ @conn.rollback
140
+
141
+ # Retry this druid up to 3 times
142
+ if tries < 3
143
+ LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
144
+ sleep @retry_delay
145
+ retry
146
+ end
147
+ LyberCore::Log.error " Too many retries. Giving up on #{obj.inspect}"
148
+
149
+ @errors += 1
150
+ if @errors >= 3
151
+ LyberCore::Log.fatal("Too many errors. Archiving halted")
152
+ break
153
+ end
154
+ end
155
+
156
+ end # druids.each
157
+ end
158
+
159
+ # @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
160
+ def do_one_archive(workflow_info)
161
+ LyberCore::Log.info "Archiving #{workflow_info.inspect}"
162
+
163
+
164
+ copy_sql =<<-EOSQL
165
+ insert into #{@workflow_archive_table} (
166
+ #{wf_column_string}
167
+ VERSION
168
+ )
169
+ select
170
+ #{wf_archive_column_string}
171
+ #{workflow_info.version} as VERSION
172
+ from #{@workflow_table} w
173
+ where w.druid = :DRUID
174
+ and w.datastream = :DATASTREAM
175
+ EOSQL
176
+
177
+ delete_sql = "delete #{@workflow_table} where druid = :DRUID and datastream = :DATASTREAM "
178
+
179
+ if(workflow_info.repository)
180
+ copy_sql << "and w.repository = :REPOSITORY"
181
+ delete_sql << "and repository = :REPOSITORY"
182
+ else
183
+ copy_sql << "and w.repository IS NULL"
184
+ delete_sql << "and repository IS NULL"
185
+ end
186
+
187
+ bind_and_exec_sql(copy_sql, workflow_info)
188
+
189
+ LyberCore::Log.debug " Removing old workflow rows"
190
+ bind_and_exec_sql(delete_sql, workflow_info)
191
+
192
+ @conn.commit
193
+ end
194
+
195
+ # Finds objects where all workflow steps are complete
196
+ # Returns an array of hashes, each hash having the following keys:
197
+ # {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
198
+ def find_completed_objects
199
+ completed_query =<<-EOSQL
200
+ select distinct repository, datastream, druid
201
+ from workflow w1
202
+ where w1.status in ('completed', 'skipped')
203
+ and not exists
204
+ (
205
+ select *
206
+ from workflow w2
207
+ where w1.repository = w2.repository
208
+ and w1.datastream = w2.datastream
209
+ and w1.druid = w2.druid
210
+ and w2.status not in ('completed', 'skipped')
211
+ )
212
+ EOSQL
213
+
214
+ rows = []
215
+ cursor = @conn.exec(completed_query)
216
+ while r = cursor.fetch_hash
217
+ rows << r
218
+ end
219
+ rows
220
+ end
221
+
222
+ # @param [Array<Hash>] rows result from #find_completed_objects
223
+ # @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
224
+ def map_result_to_criteria(rows)
225
+ criteria = rows.map do |r|
226
+ begin
227
+ ArchiveCriteria.new.setup_from_query(r)
228
+ rescue => e
229
+ LyberCore::Log.error("Skipping archiving of #{r['DRUID']}")
230
+ LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
231
+ nil
232
+ end
233
+ end
234
+ criteria.reject {|c| c.nil?}
235
+ end
236
+
237
+ def simple_sql_exec(sql)
238
+ @conn.exec(sql)
239
+ rescue Exception => e
240
+ LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
241
+ end
242
+
243
+ def with_indexing_disabled(&block)
244
+ simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
245
+ simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
246
+ yield
247
+ ensure
248
+ simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
249
+ simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
250
+ end
251
+
252
+ # Does the work of finding completed objects and archiving the rows
253
+ def archive
254
+ objs = find_completed_objects
255
+
256
+ if objs.size == 0
257
+ LyberCore::Log.info "Nothing to archive"
258
+ exit true
259
+ end
260
+
261
+ LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
262
+
263
+ archiving_criteria = map_result_to_criteria(objs)
264
+ with_indexing_disabled { archive_rows(archiving_criteria) }
265
+
266
+ LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if(@errors < 3 )
267
+ ensure
268
+ @conn.logoff
269
+ destroy_pool
270
+ end
271
+
272
+ end
273
+
274
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: workflow-archiver
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.2.2
5
+ platform: ruby
6
+ authors:
7
+ - Willy Mene
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: lyber-core
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rest-client
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-oci8
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: active-fedora
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Can be used standalone or used as a library
98
+ email:
99
+ - wmene@stanford.edu
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - VERSION
105
+ - lib/dor/archiver_version.rb
106
+ - lib/dor/workflow_archiver.rb
107
+ homepage:
108
+ licenses: []
109
+ metadata: {}
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ! '>='
122
+ - !ruby/object:Gem::Version
123
+ version: 1.3.6
124
+ requirements: []
125
+ rubyforge_project:
126
+ rubygems_version: 2.2.2
127
+ signing_key:
128
+ specification_version: 4
129
+ summary: Enables archiving of DOR workflows
130
+ test_files: []