workflow-archiver 1.3.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a0db4761d9140127396c162359225c3facbdff59
4
- data.tar.gz: f87202e1789fdb753487445d382c98cb29e8a870
3
+ metadata.gz: 2e07be8143cac430dfa7166ad246e81d64c85558
4
+ data.tar.gz: 23612c1949726b163ad89534727679fbb6da3968
5
5
  SHA512:
6
- metadata.gz: c41b25566683e1dc64120ace58c89db480de9ec57a87f7d58751362c012c7f02279c7de75bfcd62418970086bfb1718dc027b9e3b6fc023dec951a9fecceec2b
7
- data.tar.gz: 1dda723f38294bc01ed7b8d67beb61b0b08943a5d516de9e19924cce26dd78ff116d9175bf2b3b5f71634f8bcf143350fa76867031e0e2f946c5dfa0180e6365
6
+ metadata.gz: 779182bd0fd4cdabb6c4480aa7ceb5aa102bfe33c60a00317ea2b98ec4d8e62e2caef69c4cdc784ccd5cea3e91960fe2d077b076bccdb47c02c73af796755fd6
7
+ data.tar.gz: 679c197f47b3c409a0f61f3350eae177bfc230e9ae9b9723b090a328f2a87be711c4b26cafbb7680ce280f2b42a45d1e4ed708db48be7f1e1f4aaed9641ac0c0
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.3.2
1
+ 2.0.0
@@ -3,9 +3,6 @@ module Dor
3
3
  def self.archiver_version
4
4
  @archiver_version ||= File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).chomp
5
5
  end
6
-
7
- ARCHIVER_VERSION = self.archiver_version
8
- end
9
-
10
-
6
+ ARCHIVER_VERSION = archiver_version
7
+ end
11
8
  end
@@ -1,18 +1,16 @@
1
1
  require 'rest_client'
2
2
  require 'confstruct'
3
3
  require 'lyber_core'
4
- require 'oci8'
4
+ require 'sequel'
5
5
 
6
6
  module Dor
7
-
8
7
  # Holds the paramaters about the workflow rows that need to be deleted
9
8
  ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
10
9
  # @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
11
- # "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
12
10
  def setup_from_query(row_hash)
13
- self.repository = row_hash["REPOSITORY"]
14
- self.druid = row_hash["DRUID"]
15
- self.datastream = row_hash["DATASTREAM"]
11
+ self.repository = row_hash[:repository]
12
+ self.druid = row_hash[:druid]
13
+ self.datastream = row_hash[:datastream]
16
14
  set_current_version
17
15
  self
18
16
  end
@@ -21,29 +19,27 @@ module Dor
21
19
  # @return [Hash] Maps column names (in ALL caps) to non-nil column values
22
20
  def to_bind_hash
23
21
  h = {}
24
- members.reject{|mem| mem =~ /version/}.each do |m|
25
- h[m.swapcase] = self.send(m) if(self.send(m))
22
+ members.reject { |mem| mem =~ /version/ }.each do |m|
23
+ h[m] = send(m) if send(m)
26
24
  end
27
25
  h
28
26
  end
29
27
 
30
28
  def set_current_version
31
- begin
32
- self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{self.druid}/versions/current"
33
- rescue RestClient::InternalServerError => ise
34
- raise unless(ise.inspect =~ /Unable to find.*in fedora/)
35
- LyberCore::Log.warn "#{ise.inspect}"
36
- LyberCore::Log.warn "Moving workflow rows with version set to '1'"
37
- self.version = '1'
38
- end
29
+ self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{druid}/versions/current"
30
+ rescue RestClient::InternalServerError => ise
31
+ raise unless ise.inspect =~ /Unable to find.*in fedora/
32
+ LyberCore::Log.warn ise.inspect.to_s
33
+ LyberCore::Log.warn "Moving workflow rows with version set to '1'"
34
+ self.version = '1'
39
35
  end
40
36
  end
41
37
 
42
38
  class WorkflowArchiver
43
- WF_COLUMNS = %w(ID DRUID DATASTREAM PROCESS STATUS ERROR_MSG ERROR_TXT DATETIME ATTEMPTS LIFECYCLE ELAPSED REPOSITORY NOTE PRIORITY LANE_ID)
39
+ WF_COLUMNS = %w(id druid datastream process status error_msg error_txt datetime attempts lifecycle elapsed repository note priority lane_id)
44
40
 
45
41
  # These attributes mostly used for testing
46
- attr_reader :conn, :errors
42
+ attr_reader :errors
47
43
 
48
44
  def self.config
49
45
  @@conf ||= Confstruct::Configuration.new
@@ -52,63 +48,33 @@ module Dor
52
48
  # Sets up logging and connects to the database. By default it reads values from constants:
53
49
  # WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
54
50
  # @param [Hash] opts Options to override database parameters
55
- # @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
56
- # @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
57
51
  # @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
58
52
  # @option opts [String] :wf_table ('workflow') Name of the active workflow table
59
53
  # @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
60
- # @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
61
54
  # @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
62
- def initialize(opts={})
63
- @login = (opts.include?(:login) ? opts[:login] : WorkflowArchiver.config.db_login)
64
- @password = (opts.include?(:password) ? opts[:password] : WorkflowArchiver.config.db_password)
65
- @db_uri = (opts.include?(:db_uri) ? opts[:db_uri] : WorkflowArchiver.config.db_uri)
66
- @dor_service_uri = (opts.include?(:dor_service_uri) ? opts[:dor_service_uri] : WorkflowArchiver.config.dor_service_uri)
67
- @workflow_table = (opts.include?(:wf_table) ? opts[:wf_table] : "workflow")
68
- @workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
69
- @retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
70
-
55
+ def initialize(opts = {})
56
+ @conn = opts[:db_connection]
57
+ @db_uri = opts.fetch(:db_uri, WorkflowArchiver.config.db_uri).freeze
58
+ @workflow_table = opts.include?(:wf_table) ? opts[:wf_table] : 'workflow'
59
+ @workflow_archive_table = opts.include?(:wfa_table) ? opts[:wfa_table] : 'workflow_archive'
60
+ @retry_delay = opts.include?(:retry_delay) ? opts[:retry_delay] : 5
71
61
  # initialize some counters
72
62
  @errors = 0
73
63
  @archived = 0
74
64
  end
75
65
 
76
- def connect_to_db
77
- $odb_pool ||= OCI8::ConnectionPool.new(1, 5, 2, @login, @password, @db_uri)
78
- @conn = OCI8.new(@login, @password, $odb_pool)
79
- @conn.autocommit = false
66
+ def conn
67
+ @conn ||= Sequel.connect(@db_uri)
80
68
  end
81
69
 
82
- def destroy_pool
83
- $odb_pool.destroy if($odb_pool)
84
- end
85
-
86
- def bind_and_exec_sql(sql, workflow_info)
87
- # LyberCore::Log.debug("Executing: #{sql}")
88
- cursor = @conn.parse(sql)
89
-
90
- workflow_info.to_bind_hash.each do |k, v|
91
- param = ":#{k}"
92
- #LyberCore::Log.debug("Setting: #{param} #{v}")
93
- cursor.bind_param(param, v)
94
- end
95
-
96
- num_rows = cursor.exec
97
- unless num_rows > 0
98
- raise "Expected more than 0 rows to be updated"
99
- end
100
- ensure
101
- cursor.close
102
- end
103
-
104
- # @return String The columns appended with comma and newline
70
+ # @return [String] The columns appended with comma and newline
105
71
  def wf_column_string
106
- WF_COLUMNS.inject('') { |str, col| str << col << ",\n"}
72
+ WF_COLUMNS.join(",\n")
107
73
  end
108
74
 
109
- # @return String The columns prepended with 'w.' and appended with comma and newline
75
+ # @return [String] The columns prepended with 'w.' and appended with comma and newline
110
76
  def wf_archive_column_string
111
- WF_COLUMNS.inject('') { |str, col| str << 'w.' << col << ",\n"}
77
+ WF_COLUMNS.map { |col| "#{@workflow_table}.#{col}" }.join(",\n")
112
78
  end
113
79
 
114
80
  # Use this as a one-shot method to archive all the steps of an object's particular datastream
@@ -120,17 +86,14 @@ module Dor
120
86
  # @param [String] version
121
87
  def archive_one_datastream(repository, druid, datastream, version)
122
88
  criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
123
- connect_to_db
124
89
  archive_rows criteria
125
- ensure
126
- @conn.logoff if(@conn)
127
90
  end
128
91
 
129
92
  # Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
130
93
  # Both operations must complete, or they get rolled back
131
94
  # @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
132
95
  def archive_rows(objs)
133
- Array(objs).each do |obj|
96
+ objs.each do |obj|
134
97
  tries = 0
135
98
  begin
136
99
  tries += 1
@@ -138,11 +101,8 @@ module Dor
138
101
  @archived += 1
139
102
  rescue => e
140
103
  LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
141
- @conn.rollback
142
-
143
- # Retry this druid up to 3 times
144
- if tries < 3
145
- LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
104
+ if tries < 3 # Retry this druid up to 3 times
105
+ LyberCore::Log.error " Retrying archive operation in #{@retry_delay} seconds..."
146
106
  sleep @retry_delay
147
107
  retry
148
108
  end
@@ -150,55 +110,55 @@ module Dor
150
110
 
151
111
  @errors += 1
152
112
  if @errors >= 3
153
- LyberCore::Log.fatal("Too many errors. Archiving halted")
113
+ LyberCore::Log.fatal('Too many errors. Archiving halted')
154
114
  break
155
115
  end
156
116
  end
157
-
158
117
  end # druids.each
159
118
  end
160
119
 
161
120
  # @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
162
121
  def do_one_archive(workflow_info)
163
122
  LyberCore::Log.info "Archiving #{workflow_info.inspect}"
164
-
165
-
166
- copy_sql =<<-EOSQL
123
+ copy_sql = <<-EOSQL
167
124
  insert into #{@workflow_archive_table} (
168
- #{wf_column_string}
169
- VERSION
125
+ #{wf_column_string},
126
+ version
170
127
  )
171
128
  select
172
- #{wf_archive_column_string}
173
- #{workflow_info.version} as VERSION
174
- from #{@workflow_table} w
175
- where w.druid = :DRUID
176
- and w.datastream = :DATASTREAM
129
+ #{wf_archive_column_string},
130
+ #{workflow_info.version} as version
131
+ from #{@workflow_table}
132
+ where #{@workflow_table}.druid = :druid
133
+ and #{@workflow_table}.datastream = :datastream
177
134
  EOSQL
178
135
 
179
- delete_sql = "delete #{@workflow_table} where druid = :DRUID and datastream = :DATASTREAM "
136
+ delete_sql = "delete from #{@workflow_table} where druid = :druid and datastream = :datastream "
180
137
 
181
138
  if(workflow_info.repository)
182
- copy_sql << "and w.repository = :REPOSITORY"
183
- delete_sql << "and repository = :REPOSITORY"
139
+ copy_sql += "and #{@workflow_table}.repository = :repository"
140
+ delete_sql += 'and repository = :repository'
184
141
  else
185
- copy_sql << "and w.repository IS NULL"
186
- delete_sql << "and repository IS NULL"
142
+ copy_sql += "and #{@workflow_table}.repository IS NULL"
143
+ delete_sql += 'and repository IS NULL'
187
144
  end
188
145
 
189
- bind_and_exec_sql(copy_sql, workflow_info)
146
+ conn.transaction do
147
+ conn.run Sequel::SQL::PlaceholderLiteralString.new(copy_sql, workflow_info.to_bind_hash)
190
148
 
191
- LyberCore::Log.debug " Removing old workflow rows"
192
- bind_and_exec_sql(delete_sql, workflow_info)
149
+ LyberCore::Log.debug ' Removing old workflow rows'
193
150
 
194
- @conn.commit
151
+ conn.run Sequel::SQL::PlaceholderLiteralString.new(delete_sql, workflow_info.to_bind_hash)
152
+ end
195
153
  end
196
154
 
197
155
  # Finds objects where all workflow steps are complete
198
- # Returns an array of hashes, each hash having the following keys:
199
- # {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
156
+ # @return [Array<Hash{String=>String}>] each hash returned has the following keys:
157
+ # {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
200
158
  def find_completed_objects
201
- completed_query =<<-EOSQL
159
+ return to_enum(:find_completed_objects) unless block_given?
160
+
161
+ completed_query = <<-EOSQL
202
162
  select distinct repository, datastream, druid
203
163
  from workflow w1
204
164
  where w1.status in ('completed', 'skipped')
@@ -213,18 +173,15 @@ module Dor
213
173
  )
214
174
  EOSQL
215
175
 
216
- rows = []
217
- cursor = @conn.exec(completed_query)
218
- while r = cursor.fetch_hash
219
- rows << r
176
+ conn.fetch(completed_query) do |row|
177
+ yield row
220
178
  end
221
- rows
222
179
  end
223
180
 
224
181
  # @param [Array<Hash>] rows result from #find_completed_objects
225
182
  # @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
226
183
  def map_result_to_criteria(rows)
227
- criteria = rows.map do |r|
184
+ rows.lazy.map do |r|
228
185
  begin
229
186
  ArchiveCriteria.new.setup_from_query(r)
230
187
  rescue => e
@@ -232,47 +189,22 @@ module Dor
232
189
  LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
233
190
  nil
234
191
  end
235
- end
236
- criteria.reject {|c| c.nil?}
237
- end
238
-
239
- def simple_sql_exec(sql)
240
- @conn.exec(sql)
241
- rescue Exception => e
242
- LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
243
- end
244
-
245
- def with_indexing_disabled(&block)
246
- simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
247
- simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
248
- yield
249
- ensure
250
- simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
251
- simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
192
+ end.reject { |r| r.nil? }
252
193
  end
253
194
 
254
195
  # Does the work of finding completed objects and archiving the rows
255
196
  def archive
256
- connect_to_db
257
197
  objs = find_completed_objects
258
198
 
259
- if objs.size == 0
260
- LyberCore::Log.info "Nothing to archive"
199
+ if objs.none?
200
+ LyberCore::Log.info 'Nothing to archive'
261
201
  exit true
262
202
  end
263
-
264
- LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
265
- objs = objs.first(600) # FIXME: temporarily limit objs processed until we fix cron not to fire if job still running
266
-
203
+ LyberCore::Log.info "Found #{objs.size} completed workflows"
267
204
  archiving_criteria = map_result_to_criteria(objs)
268
- with_indexing_disabled { archive_rows(archiving_criteria) }
205
+ archive_rows(archiving_criteria)
269
206
 
270
- LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if(@errors < 3 )
271
- ensure
272
- @conn.logoff
273
- destroy_pool
207
+ LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if @errors < 3
274
208
  end
275
-
276
209
  end
277
-
278
210
  end
@@ -0,0 +1 @@
1
+ require 'dor/workflow_archiver'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow-archiver
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Willy Mene
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-30 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lyber-core
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: ruby-oci8
42
+ name: sequel
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -95,21 +95,7 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: coveralls
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: active-fedora
98
+ name: sqlite3
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
101
  - - ">="
@@ -132,6 +118,7 @@ files:
132
118
  - VERSION
133
119
  - lib/dor/archiver_version.rb
134
120
  - lib/dor/workflow_archiver.rb
121
+ - lib/workflow-archiver.rb
135
122
  homepage:
136
123
  licenses: []
137
124
  metadata: {}
@@ -151,7 +138,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
151
138
  version: 1.3.6
152
139
  requirements: []
153
140
  rubyforge_project:
154
- rubygems_version: 2.6.13
141
+ rubygems_version: 2.4.5.1
155
142
  signing_key:
156
143
  specification_version: 4
157
144
  summary: Enables archiving of DOR workflows