workflow-archiver 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGY0MWY1NjlmNmVlMzI3ZmRiMzg5ZThkNzhjMzFlYzQyMzdlZTVmOQ==
5
+ data.tar.gz: !binary |-
6
+ MGNlY2I2OTlhNGYzNWRhODQ4N2I2YmEwMzc4Y2NjMTkzMzViNzg4Zg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NDkyYjQ5ZmFmOTA2ZDEyMmNjOTU4Njk0ZTJkYTkxNzllMjIxMmIxNmUxZjY4
10
+ NmI5YTQ0OTQ2ZWZhMjhkY2MxZTZiMWYwZjRlYzEzYTBmOWEyNzY5OWE3N2My
11
+ NjQ4ZTI4ODllODM4OWQ4YWM5NDk4OGVhOGJkZDljMDliZGNlMzM=
12
+ data.tar.gz: !binary |-
13
+ ZDkzMjY1YzdmNjE4NDAwMTQ1MGE1NzU0NGMxOGQ2NWJmOTA1MGI5MDhiYjRh
14
+ MzY4YTlmZTRmOTBkMTQ1YjI4OTcxZTA4NzA5NjVjMzU1ZWJiNGQ3ODk0ZTM1
15
+ Mzk3OTIxMzA0ZTJiNWExYzkyODgwMGQ1YjViMmYzNGJjNGVmNzY=
data/VERSION ADDED
@@ -0,0 +1,2 @@
1
+ 1.2.2
2
+
@@ -0,0 +1,11 @@
1
+ module Dor
2
+ unless Dor.const_defined? :ARCHIVER_VERSION
3
+ def self.archiver_version
4
+ @archiver_version ||= File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).chomp
5
+ end
6
+
7
+ ARCHIVER_VERSION = self.archiver_version
8
+ end
9
+
10
+
11
+ end
@@ -0,0 +1,274 @@
1
+ require 'rest_client'
2
+ require 'oci8'
3
+
4
+ module Dor
5
+
6
+ # Holds the paramaters about the workflow rows that need to be deleted
7
+ ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
8
+ # @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
9
+ # "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
10
+ def setup_from_query(row_hash)
11
+ self.repository = row_hash["REPOSITORY"]
12
+ self.druid = row_hash["DRUID"]
13
+ self.datastream = row_hash["DATASTREAM"]
14
+ set_current_version
15
+ self
16
+ end
17
+
18
+ # Removes version from list of members, then picks out non nil members and builds a hash of column_name => column_value
19
+ # @return [Hash] Maps column names (in ALL caps) to non-nil column values
20
+ def to_bind_hash
21
+ h = {}
22
+ members.reject{|mem| mem =~ /version/}.each do |m|
23
+ h[m.swapcase] = self.send(m) if(self.send(m))
24
+ end
25
+ h
26
+ end
27
+
28
+ def set_current_version
29
+ begin
30
+ self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{self.druid}/versions/current"
31
+ rescue RestClient::InternalServerError => ise
32
+ raise unless(ise.inspect =~ /Unable to find.*in fedora/)
33
+ LyberCore::Log.warn "#{ise.inspect}"
34
+ LyberCore::Log.warn "Moving workflow rows with version set to '1'"
35
+ self.version = '1'
36
+ end
37
+ end
38
+ end
39
+
40
+ class WorkflowArchiver
41
+ WF_COLUMNS = %w(ID DRUID DATASTREAM PROCESS STATUS ERROR_MSG ERROR_TXT DATETIME ATTEMPTS LIFECYCLE ELAPSED REPOSITORY NOTE PRIORITY)
42
+
43
+ # These attributes mostly used for testing
44
+ attr_reader :conn, :errors
45
+
46
+ def WorkflowArchiver.config
47
+ @@conf ||= Confstruct::Configuration.new
48
+ end
49
+
50
+ # Sets up logging and connects to the database. By default it reads values from constants:
51
+ # WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
52
+ # @param [Hash] opts Options to override database parameters
53
+ # @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
54
+ # @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
55
+ # @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
56
+ # @option opts [String] :wf_table ('workflow') Name of the active workflow table
57
+ # @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
58
+ # @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
59
+ # @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
60
+ def initialize(opts={})
61
+ @login = (opts.include?(:login) ? opts[:login] : WorkflowArchiver.config.db_login)
62
+ @password = (opts.include?(:password) ? opts[:password] : WorkflowArchiver.config.db_password)
63
+ @db_uri = (opts.include?(:db_uri) ? opts[:db_uri] : WorkflowArchiver.config.db_uri)
64
+ @dor_service_uri = (opts.include?(:dor_service_uri) ? opts[:dor_service_uri] : WorkflowArchiver.config.dor_service_uri)
65
+ @workflow_table = (opts.include?(:wf_table) ? opts[:wf_table] : "workflow")
66
+ @workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
67
+ @retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
68
+
69
+ # initialize some counters
70
+ @errors = 0
71
+ @archived = 0
72
+ end
73
+
74
+ def connect_to_db
75
+ $odb_pool ||= OCI8::ConnectionPool.new(1, 5, 2, @login, @password, @db_uri)
76
+ @conn = OCI8.new(@login, @password, $odb_pool)
77
+ @conn.autocommit = false
78
+ end
79
+
80
+ def destroy_pool
81
+ $odb_pool.destroy if($odb_pool)
82
+ end
83
+
84
+ def bind_and_exec_sql(sql, workflow_info)
85
+ # LyberCore::Log.debug("Executing: #{sql}")
86
+ cursor = @conn.parse(sql)
87
+
88
+ workflow_info.to_bind_hash.each do |k, v|
89
+ param = ":#{k}"
90
+ #LyberCore::Log.debug("Setting: #{param} #{v}")
91
+ cursor.bind_param(param, v)
92
+ end
93
+
94
+ num_rows = cursor.exec
95
+ unless num_rows > 0
96
+ raise "Expected more than 0 rows to be updated"
97
+ end
98
+ ensure
99
+ cursor.close
100
+ end
101
+
102
+ # @return String The columns appended with comma and newline
103
+ def wf_column_string
104
+ WF_COLUMNS.inject('') { |str, col| str << col << ",\n"}
105
+ end
106
+
107
+ # @return String The columns prepended with 'w.' and appended with comma and newline
108
+ def wf_archive_column_string
109
+ WF_COLUMNS.inject('') { |str, col| str << 'w.' << col << ",\n"}
110
+ end
111
+
112
+ # Use this as a one-shot method to archive all the steps of an object's particular datastream
113
+ # It will connect to the database, archive the rows, then logoff. Assumes caller will set version (like the Dor REST service)
114
+ # @note Caller of this method must handle destroying of the connection pool
115
+ # @param [String] repository
116
+ # @param [String] druid
117
+ # @param [String] datastream
118
+ # @param [String] version
119
+ def archive_one_datastream(repository, druid, datastream, version)
120
+ criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
121
+ connect_to_db
122
+ archive_rows criteria
123
+ ensure
124
+ @conn.logoff if(@conn)
125
+ end
126
+
127
+ # Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
128
+ # Both operations must complete, or they get rolled back
129
+ # @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
130
+ def archive_rows(objs)
131
+ Array(objs).each do |obj|
132
+ tries = 0
133
+ begin
134
+ tries += 1
135
+ do_one_archive(obj)
136
+ @archived += 1
137
+ rescue => e
138
+ LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
139
+ @conn.rollback
140
+
141
+ # Retry this druid up to 3 times
142
+ if tries < 3
143
+ LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
144
+ sleep @retry_delay
145
+ retry
146
+ end
147
+ LyberCore::Log.error " Too many retries. Giving up on #{obj.inspect}"
148
+
149
+ @errors += 1
150
+ if @errors >= 3
151
+ LyberCore::Log.fatal("Too many errors. Archiving halted")
152
+ break
153
+ end
154
+ end
155
+
156
+ end # druids.each
157
+ end
158
+
159
+ # @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
160
+ def do_one_archive(workflow_info)
161
+ LyberCore::Log.info "Archiving #{workflow_info.inspect}"
162
+
163
+
164
+ copy_sql =<<-EOSQL
165
+ insert into #{@workflow_archive_table} (
166
+ #{wf_column_string}
167
+ VERSION
168
+ )
169
+ select
170
+ #{wf_archive_column_string}
171
+ #{workflow_info.version} as VERSION
172
+ from #{@workflow_table} w
173
+ where w.druid = :DRUID
174
+ and w.datastream = :DATASTREAM
175
+ EOSQL
176
+
177
+ delete_sql = "delete #{@workflow_table} where druid = :DRUID and datastream = :DATASTREAM "
178
+
179
+ if(workflow_info.repository)
180
+ copy_sql << "and w.repository = :REPOSITORY"
181
+ delete_sql << "and repository = :REPOSITORY"
182
+ else
183
+ copy_sql << "and w.repository IS NULL"
184
+ delete_sql << "and repository IS NULL"
185
+ end
186
+
187
+ bind_and_exec_sql(copy_sql, workflow_info)
188
+
189
+ LyberCore::Log.debug " Removing old workflow rows"
190
+ bind_and_exec_sql(delete_sql, workflow_info)
191
+
192
+ @conn.commit
193
+ end
194
+
195
+ # Finds objects where all workflow steps are complete
196
+ # Returns an array of hashes, each hash having the following keys:
197
+ # {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
198
+ def find_completed_objects
199
+ completed_query =<<-EOSQL
200
+ select distinct repository, datastream, druid
201
+ from workflow w1
202
+ where w1.status in ('completed', 'skipped')
203
+ and not exists
204
+ (
205
+ select *
206
+ from workflow w2
207
+ where w1.repository = w2.repository
208
+ and w1.datastream = w2.datastream
209
+ and w1.druid = w2.druid
210
+ and w2.status not in ('completed', 'skipped')
211
+ )
212
+ EOSQL
213
+
214
+ rows = []
215
+ cursor = @conn.exec(completed_query)
216
+ while r = cursor.fetch_hash
217
+ rows << r
218
+ end
219
+ rows
220
+ end
221
+
222
+ # @param [Array<Hash>] rows result from #find_completed_objects
223
+ # @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
224
+ def map_result_to_criteria(rows)
225
+ criteria = rows.map do |r|
226
+ begin
227
+ ArchiveCriteria.new.setup_from_query(r)
228
+ rescue => e
229
+ LyberCore::Log.error("Skipping archiving of #{r['DRUID']}")
230
+ LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
231
+ nil
232
+ end
233
+ end
234
+ criteria.reject {|c| c.nil?}
235
+ end
236
+
237
+ def simple_sql_exec(sql)
238
+ @conn.exec(sql)
239
+ rescue Exception => e
240
+ LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
241
+ end
242
+
243
+ def with_indexing_disabled(&block)
244
+ simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
245
+ simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
246
+ yield
247
+ ensure
248
+ simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
249
+ simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
250
+ end
251
+
252
+ # Does the work of finding completed objects and archiving the rows
253
+ def archive
254
+ objs = find_completed_objects
255
+
256
+ if objs.size == 0
257
+ LyberCore::Log.info "Nothing to archive"
258
+ exit true
259
+ end
260
+
261
+ LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
262
+
263
+ archiving_criteria = map_result_to_criteria(objs)
264
+ with_indexing_disabled { archive_rows(archiving_criteria) }
265
+
266
+ LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if(@errors < 3 )
267
+ ensure
268
+ @conn.logoff
269
+ destroy_pool
270
+ end
271
+
272
+ end
273
+
274
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: workflow-archiver
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.2.2
5
+ platform: ruby
6
+ authors:
7
+ - Willy Mene
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: lyber-core
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rest-client
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-oci8
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: active-fedora
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Can be used standalone or used as a library
98
+ email:
99
+ - wmene@stanford.edu
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - VERSION
105
+ - lib/dor/archiver_version.rb
106
+ - lib/dor/workflow_archiver.rb
107
+ homepage:
108
+ licenses: []
109
+ metadata: {}
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ! '>='
122
+ - !ruby/object:Gem::Version
123
+ version: 1.3.6
124
+ requirements: []
125
+ rubyforge_project:
126
+ rubygems_version: 2.2.2
127
+ signing_key:
128
+ specification_version: 4
129
+ summary: Enables archiving of DOR workflows
130
+ test_files: []