workflow-archiver 1.3.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a0db4761d9140127396c162359225c3facbdff59
4
- data.tar.gz: f87202e1789fdb753487445d382c98cb29e8a870
3
+ metadata.gz: 2e07be8143cac430dfa7166ad246e81d64c85558
4
+ data.tar.gz: 23612c1949726b163ad89534727679fbb6da3968
5
5
  SHA512:
6
- metadata.gz: c41b25566683e1dc64120ace58c89db480de9ec57a87f7d58751362c012c7f02279c7de75bfcd62418970086bfb1718dc027b9e3b6fc023dec951a9fecceec2b
7
- data.tar.gz: 1dda723f38294bc01ed7b8d67beb61b0b08943a5d516de9e19924cce26dd78ff116d9175bf2b3b5f71634f8bcf143350fa76867031e0e2f946c5dfa0180e6365
6
+ metadata.gz: 779182bd0fd4cdabb6c4480aa7ceb5aa102bfe33c60a00317ea2b98ec4d8e62e2caef69c4cdc784ccd5cea3e91960fe2d077b076bccdb47c02c73af796755fd6
7
+ data.tar.gz: 679c197f47b3c409a0f61f3350eae177bfc230e9ae9b9723b090a328f2a87be711c4b26cafbb7680ce280f2b42a45d1e4ed708db48be7f1e1f4aaed9641ac0c0
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.3.2
1
+ 2.0.0
@@ -3,9 +3,6 @@ module Dor
3
3
  def self.archiver_version
4
4
  @archiver_version ||= File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).chomp
5
5
  end
6
-
7
- ARCHIVER_VERSION = self.archiver_version
8
- end
9
-
10
-
6
+ ARCHIVER_VERSION = archiver_version
7
+ end
11
8
  end
@@ -1,18 +1,16 @@
1
1
  require 'rest_client'
2
2
  require 'confstruct'
3
3
  require 'lyber_core'
4
- require 'oci8'
4
+ require 'sequel'
5
5
 
6
6
  module Dor
7
-
8
7
  # Holds the paramaters about the workflow rows that need to be deleted
9
8
  ArchiveCriteria = Struct.new(:repository, :druid, :datastream, :version) do
10
9
  # @param [Array<Hash>] List of objects returned from {WorkflowArchiver#find_completed_objects}. It expects the following keys in the hash
11
- # "REPOSITORY", "DRUID", "DATASTREAM". Note they are all caps strings, not symbols
12
10
  def setup_from_query(row_hash)
13
- self.repository = row_hash["REPOSITORY"]
14
- self.druid = row_hash["DRUID"]
15
- self.datastream = row_hash["DATASTREAM"]
11
+ self.repository = row_hash[:repository]
12
+ self.druid = row_hash[:druid]
13
+ self.datastream = row_hash[:datastream]
16
14
  set_current_version
17
15
  self
18
16
  end
@@ -21,29 +19,27 @@ module Dor
21
19
  # @return [Hash] Maps column names (in ALL caps) to non-nil column values
22
20
  def to_bind_hash
23
21
  h = {}
24
- members.reject{|mem| mem =~ /version/}.each do |m|
25
- h[m.swapcase] = self.send(m) if(self.send(m))
22
+ members.reject { |mem| mem =~ /version/ }.each do |m|
23
+ h[m] = send(m) if send(m)
26
24
  end
27
25
  h
28
26
  end
29
27
 
30
28
  def set_current_version
31
- begin
32
- self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{self.druid}/versions/current"
33
- rescue RestClient::InternalServerError => ise
34
- raise unless(ise.inspect =~ /Unable to find.*in fedora/)
35
- LyberCore::Log.warn "#{ise.inspect}"
36
- LyberCore::Log.warn "Moving workflow rows with version set to '1'"
37
- self.version = '1'
38
- end
29
+ self.version = RestClient.get WorkflowArchiver.config.dor_service_uri + "/dor/v1/objects/#{druid}/versions/current"
30
+ rescue RestClient::InternalServerError => ise
31
+ raise unless ise.inspect =~ /Unable to find.*in fedora/
32
+ LyberCore::Log.warn ise.inspect.to_s
33
+ LyberCore::Log.warn "Moving workflow rows with version set to '1'"
34
+ self.version = '1'
39
35
  end
40
36
  end
41
37
 
42
38
  class WorkflowArchiver
43
- WF_COLUMNS = %w(ID DRUID DATASTREAM PROCESS STATUS ERROR_MSG ERROR_TXT DATETIME ATTEMPTS LIFECYCLE ELAPSED REPOSITORY NOTE PRIORITY LANE_ID)
39
+ WF_COLUMNS = %w(id druid datastream process status error_msg error_txt datetime attempts lifecycle elapsed repository note priority lane_id)
44
40
 
45
41
  # These attributes mostly used for testing
46
- attr_reader :conn, :errors
42
+ attr_reader :errors
47
43
 
48
44
  def self.config
49
45
  @@conf ||= Confstruct::Configuration.new
@@ -52,63 +48,33 @@ module Dor
52
48
  # Sets up logging and connects to the database. By default it reads values from constants:
53
49
  # WORKFLOW_DB_LOGIN, WORKFLOW_DB_PASSWORD, WORKFLOW_DB_URI, DOR_SERVICE_URI but can be overriden with the opts Hash
54
50
  # @param [Hash] opts Options to override database parameters
55
- # @option opts [String] :login ('WORKFLOW_DB_LOGIN') Database login id
56
- # @option opts [String] :password ('WORKFLOW_DB_PASSWORD') Database password
57
51
  # @option opts [String] :db_uri ('WORKFLOW_DB_URI') Database uri
58
52
  # @option opts [String] :wf_table ('workflow') Name of the active workflow table
59
53
  # @option opts [String] :wfa_table ('workflow_archive') Name of the workflow archive table
60
- # @option opts [String] :dor_service_uri ('DOR_SERVICE_URI') URI of the DOR Rest service
61
54
  # @option opts [Integer] :retry_delay (5) Number of seconds to sleep between retries of database operations
62
- def initialize(opts={})
63
- @login = (opts.include?(:login) ? opts[:login] : WorkflowArchiver.config.db_login)
64
- @password = (opts.include?(:password) ? opts[:password] : WorkflowArchiver.config.db_password)
65
- @db_uri = (opts.include?(:db_uri) ? opts[:db_uri] : WorkflowArchiver.config.db_uri)
66
- @dor_service_uri = (opts.include?(:dor_service_uri) ? opts[:dor_service_uri] : WorkflowArchiver.config.dor_service_uri)
67
- @workflow_table = (opts.include?(:wf_table) ? opts[:wf_table] : "workflow")
68
- @workflow_archive_table = (opts.include?(:wfa_table) ? opts[:wfa_table] : "workflow_archive")
69
- @retry_delay = (opts.include?(:retry_delay) ? opts[:retry_delay] : 5)
70
-
55
+ def initialize(opts = {})
56
+ @conn = opts[:db_connection]
57
+ @db_uri = opts.fetch(:db_uri, WorkflowArchiver.config.db_uri).freeze
58
+ @workflow_table = opts.include?(:wf_table) ? opts[:wf_table] : 'workflow'
59
+ @workflow_archive_table = opts.include?(:wfa_table) ? opts[:wfa_table] : 'workflow_archive'
60
+ @retry_delay = opts.include?(:retry_delay) ? opts[:retry_delay] : 5
71
61
  # initialize some counters
72
62
  @errors = 0
73
63
  @archived = 0
74
64
  end
75
65
 
76
- def connect_to_db
77
- $odb_pool ||= OCI8::ConnectionPool.new(1, 5, 2, @login, @password, @db_uri)
78
- @conn = OCI8.new(@login, @password, $odb_pool)
79
- @conn.autocommit = false
66
+ def conn
67
+ @conn ||= Sequel.connect(@db_uri)
80
68
  end
81
69
 
82
- def destroy_pool
83
- $odb_pool.destroy if($odb_pool)
84
- end
85
-
86
- def bind_and_exec_sql(sql, workflow_info)
87
- # LyberCore::Log.debug("Executing: #{sql}")
88
- cursor = @conn.parse(sql)
89
-
90
- workflow_info.to_bind_hash.each do |k, v|
91
- param = ":#{k}"
92
- #LyberCore::Log.debug("Setting: #{param} #{v}")
93
- cursor.bind_param(param, v)
94
- end
95
-
96
- num_rows = cursor.exec
97
- unless num_rows > 0
98
- raise "Expected more than 0 rows to be updated"
99
- end
100
- ensure
101
- cursor.close
102
- end
103
-
104
- # @return String The columns appended with comma and newline
70
+ # @return [String] The columns appended with comma and newline
105
71
  def wf_column_string
106
- WF_COLUMNS.inject('') { |str, col| str << col << ",\n"}
72
+ WF_COLUMNS.join(",\n")
107
73
  end
108
74
 
109
- # @return String The columns prepended with 'w.' and appended with comma and newline
75
+ # @return [String] The columns prepended with 'w.' and appended with comma and newline
110
76
  def wf_archive_column_string
111
- WF_COLUMNS.inject('') { |str, col| str << 'w.' << col << ",\n"}
77
+ WF_COLUMNS.map { |col| "#{@workflow_table}.#{col}" }.join(",\n")
112
78
  end
113
79
 
114
80
  # Use this as a one-shot method to archive all the steps of an object's particular datastream
@@ -120,17 +86,14 @@ module Dor
120
86
  # @param [String] version
121
87
  def archive_one_datastream(repository, druid, datastream, version)
122
88
  criteria = [ArchiveCriteria.new(repository, druid, datastream, version)]
123
- connect_to_db
124
89
  archive_rows criteria
125
- ensure
126
- @conn.logoff if(@conn)
127
90
  end
128
91
 
129
92
  # Copies rows from the workflow table to the workflow_archive table, then deletes the rows from workflow
130
93
  # Both operations must complete, or they get rolled back
131
94
  # @param [Array<ArchiveCriteria>] objs List of objects returned from {#find_completed_objects} and mapped to an array of ArchiveCriteria objects.
132
95
  def archive_rows(objs)
133
- Array(objs).each do |obj|
96
+ objs.each do |obj|
134
97
  tries = 0
135
98
  begin
136
99
  tries += 1
@@ -138,11 +101,8 @@ module Dor
138
101
  @archived += 1
139
102
  rescue => e
140
103
  LyberCore::Log.error "Rolling back transaction due to: #{e.inspect}\n" << e.backtrace.join("\n") << "\n!!!!!!!!!!!!!!!!!!"
141
- @conn.rollback
142
-
143
- # Retry this druid up to 3 times
144
- if tries < 3
145
- LyberCore::Log.error " Retrying archive operation in #{@retry_delay.to_s} seconds..."
104
+ if tries < 3 # Retry this druid up to 3 times
105
+ LyberCore::Log.error " Retrying archive operation in #{@retry_delay} seconds..."
146
106
  sleep @retry_delay
147
107
  retry
148
108
  end
@@ -150,55 +110,55 @@ module Dor
150
110
 
151
111
  @errors += 1
152
112
  if @errors >= 3
153
- LyberCore::Log.fatal("Too many errors. Archiving halted")
113
+ LyberCore::Log.fatal('Too many errors. Archiving halted')
154
114
  break
155
115
  end
156
116
  end
157
-
158
117
  end # druids.each
159
118
  end
160
119
 
161
120
  # @param [ArchiveCriteria] workflow_info contains paramaters on the workflow rows to archive
162
121
  def do_one_archive(workflow_info)
163
122
  LyberCore::Log.info "Archiving #{workflow_info.inspect}"
164
-
165
-
166
- copy_sql =<<-EOSQL
123
+ copy_sql = <<-EOSQL
167
124
  insert into #{@workflow_archive_table} (
168
- #{wf_column_string}
169
- VERSION
125
+ #{wf_column_string},
126
+ version
170
127
  )
171
128
  select
172
- #{wf_archive_column_string}
173
- #{workflow_info.version} as VERSION
174
- from #{@workflow_table} w
175
- where w.druid = :DRUID
176
- and w.datastream = :DATASTREAM
129
+ #{wf_archive_column_string},
130
+ #{workflow_info.version} as version
131
+ from #{@workflow_table}
132
+ where #{@workflow_table}.druid = :druid
133
+ and #{@workflow_table}.datastream = :datastream
177
134
  EOSQL
178
135
 
179
- delete_sql = "delete #{@workflow_table} where druid = :DRUID and datastream = :DATASTREAM "
136
+ delete_sql = "delete from #{@workflow_table} where druid = :druid and datastream = :datastream "
180
137
 
181
138
  if(workflow_info.repository)
182
- copy_sql << "and w.repository = :REPOSITORY"
183
- delete_sql << "and repository = :REPOSITORY"
139
+ copy_sql += "and #{@workflow_table}.repository = :repository"
140
+ delete_sql += 'and repository = :repository'
184
141
  else
185
- copy_sql << "and w.repository IS NULL"
186
- delete_sql << "and repository IS NULL"
142
+ copy_sql += "and #{@workflow_table}.repository IS NULL"
143
+ delete_sql += 'and repository IS NULL'
187
144
  end
188
145
 
189
- bind_and_exec_sql(copy_sql, workflow_info)
146
+ conn.transaction do
147
+ conn.run Sequel::SQL::PlaceholderLiteralString.new(copy_sql, workflow_info.to_bind_hash)
190
148
 
191
- LyberCore::Log.debug " Removing old workflow rows"
192
- bind_and_exec_sql(delete_sql, workflow_info)
149
+ LyberCore::Log.debug ' Removing old workflow rows'
193
150
 
194
- @conn.commit
151
+ conn.run Sequel::SQL::PlaceholderLiteralString.new(delete_sql, workflow_info.to_bind_hash)
152
+ end
195
153
  end
196
154
 
197
155
  # Finds objects where all workflow steps are complete
198
- # Returns an array of hashes, each hash having the following keys:
199
- # {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
156
+ # @return [Array<Hash{String=>String}>] each hash returned has the following keys:
157
+ # {"REPOSITORY"=>"dor", "DRUID"=>"druid:345", "DATASTREAM"=>"googleScannedBookWF"}
200
158
  def find_completed_objects
201
- completed_query =<<-EOSQL
159
+ return to_enum(:find_completed_objects) unless block_given?
160
+
161
+ completed_query = <<-EOSQL
202
162
  select distinct repository, datastream, druid
203
163
  from workflow w1
204
164
  where w1.status in ('completed', 'skipped')
@@ -213,18 +173,15 @@ module Dor
213
173
  )
214
174
  EOSQL
215
175
 
216
- rows = []
217
- cursor = @conn.exec(completed_query)
218
- while r = cursor.fetch_hash
219
- rows << r
176
+ conn.fetch(completed_query) do |row|
177
+ yield row
220
178
  end
221
- rows
222
179
  end
223
180
 
224
181
  # @param [Array<Hash>] rows result from #find_completed_objects
225
182
  # @return [Array<ArchiveCriteria>] each result mapped to an ArchiveCriteria object
226
183
  def map_result_to_criteria(rows)
227
- criteria = rows.map do |r|
184
+ rows.lazy.map do |r|
228
185
  begin
229
186
  ArchiveCriteria.new.setup_from_query(r)
230
187
  rescue => e
@@ -232,47 +189,22 @@ module Dor
232
189
  LyberCore::Log.error("#{e.inspect}\n" + e.backtrace.join("\n"))
233
190
  nil
234
191
  end
235
- end
236
- criteria.reject {|c| c.nil?}
237
- end
238
-
239
- def simple_sql_exec(sql)
240
- @conn.exec(sql)
241
- rescue Exception => e
242
- LyberCore::Log.warn "Ignoring error: #{e.message}\n while trying to execute: " << sql
243
- end
244
-
245
- def with_indexing_disabled(&block)
246
- simple_sql_exec("drop index ds_wf_ar_bitmap_idx")
247
- simple_sql_exec("drop index repo_wf_ar_bitmap_idx")
248
- yield
249
- ensure
250
- simple_sql_exec("create bitmap index ds_wf_ar_bitmap_idx on workflow_archive (datastream)")
251
- simple_sql_exec("create bitmap index repo_wf_ar_bitmap_idx on workflow_archive (repository)")
192
+ end.reject { |r| r.nil? }
252
193
  end
253
194
 
254
195
  # Does the work of finding completed objects and archiving the rows
255
196
  def archive
256
- connect_to_db
257
197
  objs = find_completed_objects
258
198
 
259
- if objs.size == 0
260
- LyberCore::Log.info "Nothing to archive"
199
+ if objs.none?
200
+ LyberCore::Log.info 'Nothing to archive'
261
201
  exit true
262
202
  end
263
-
264
- LyberCore::Log.info "Found #{objs.size.to_s} completed workflows"
265
- objs = objs.first(600) # FIXME: temporarily limit objs processed until we fix cron not to fire if job still running
266
-
203
+ LyberCore::Log.info "Found #{objs.size} completed workflows"
267
204
  archiving_criteria = map_result_to_criteria(objs)
268
- with_indexing_disabled { archive_rows(archiving_criteria) }
205
+ archive_rows(archiving_criteria)
269
206
 
270
- LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if(@errors < 3 )
271
- ensure
272
- @conn.logoff
273
- destroy_pool
207
+ LyberCore::Log.info "DONE! Processed #{@archived.to_s} objects with #{@errors.to_s} errors" if @errors < 3
274
208
  end
275
-
276
209
  end
277
-
278
210
  end
@@ -0,0 +1 @@
1
+ require 'dor/workflow_archiver'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow-archiver
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Willy Mene
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-30 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lyber-core
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: ruby-oci8
42
+ name: sequel
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -95,21 +95,7 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: coveralls
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: active-fedora
98
+ name: sqlite3
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
101
  - - ">="
@@ -132,6 +118,7 @@ files:
132
118
  - VERSION
133
119
  - lib/dor/archiver_version.rb
134
120
  - lib/dor/workflow_archiver.rb
121
+ - lib/workflow-archiver.rb
135
122
  homepage:
136
123
  licenses: []
137
124
  metadata: {}
@@ -151,7 +138,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
151
138
  version: 1.3.6
152
139
  requirements: []
153
140
  rubyforge_project:
154
- rubygems_version: 2.6.13
141
+ rubygems_version: 2.4.5.1
155
142
  signing_key:
156
143
  specification_version: 4
157
144
  summary: Enables archiving of DOR workflows