cdmbl 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/batch_deleter.rb +12 -11
- data/lib/cdmbl/batch_deleter_worker.rb +13 -8
- data/lib/cdmbl/default_solr.rb +1 -1
- data/lib/cdmbl/oai_deletables.rb +3 -3
- data/lib/cdmbl/tasks/delete.rake +12 -0
- data/lib/cdmbl/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec2dc41e2bed58f7f93388d3327fa4b45812ab33
|
4
|
+
data.tar.gz: e7d0b8c9b54fce9f35978beafe515f9e17f5441d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ed793f0f5cf45571dd89e03649938fbc07a54a05a03942e0646803a8b3270c3c0ae8e97e4b095038cc8634591c2f0897d192ff7bb2c7f9e28415ca2e92c3d2b
|
7
|
+
data.tar.gz: 1aa8224e92bdb48bf875e9b4a188db4d06e003f7e0b95b616aff100bb4dd653c12aeb0a6929e74cdd5dfe7e2197a64daa9d762b6e2aacb9ba0afb0ea9ec9a67e
|
data/lib/cdmbl/batch_deleter.rb
CHANGED
@@ -8,7 +8,7 @@ module CDMBL
|
|
8
8
|
:oai_deletables_klass
|
9
9
|
def initialize(prefix: '',
|
10
10
|
start: 0,
|
11
|
-
batch_size:
|
11
|
+
batch_size: 10,
|
12
12
|
oai_client: :missing_oai_client,
|
13
13
|
solr_client: :missing_solr_client,
|
14
14
|
oai_deletables_klass: OaiDeletables)
|
@@ -28,24 +28,25 @@ module CDMBL
|
|
28
28
|
start + batch_size >= num_found
|
29
29
|
end
|
30
30
|
|
31
|
-
private
|
32
|
-
|
33
31
|
def deletables
|
34
|
-
oai_deletables_klass.new(identifiers:
|
35
|
-
|
36
|
-
|
32
|
+
@deletables ||= oai_deletables_klass.new(identifiers: ids,
|
33
|
+
oai_client: oai_client,
|
34
|
+
prefix: prefix).deletables
|
37
35
|
end
|
38
36
|
|
39
|
-
|
40
|
-
|
37
|
+
private
|
38
|
+
|
39
|
+
|
40
|
+
def ids
|
41
|
+
results.fetch('response', {}).fetch('docs', {}).map { |doc| doc['id'] }
|
41
42
|
end
|
42
43
|
|
43
44
|
def num_found
|
44
|
-
|
45
|
+
results.fetch('response', {}).fetch('numFound', 0)
|
45
46
|
end
|
46
47
|
|
47
|
-
def
|
48
|
-
@
|
48
|
+
def results
|
49
|
+
@results ||= solr_client.ids(start: start)
|
49
50
|
end
|
50
51
|
end
|
51
52
|
end
|
@@ -3,31 +3,36 @@ module CDMBL
|
|
3
3
|
class BatchDeleterWorker
|
4
4
|
include Sidekiq::Worker
|
5
5
|
attr_reader :start, :prefix, :oai_url, :solr_url
|
6
|
+
attr_accessor :batch_deleter_klass, :oai_client, :solr_client
|
7
|
+
sidekiq_options :backtrace => true
|
6
8
|
def perform(start = 0, prefix = '', oai_url = '', solr_url = '')
|
7
9
|
@start = start
|
8
10
|
@prefix = prefix
|
9
11
|
@oai_url = oai_url
|
10
12
|
@solr_url = solr_url
|
11
13
|
delete!
|
14
|
+
batch_deleter
|
12
15
|
end
|
13
16
|
|
14
17
|
private
|
15
18
|
|
19
|
+
def batch_deleter_klass
|
20
|
+
@batch_deleter_klass ||= BatchDeleter
|
21
|
+
end
|
22
|
+
|
16
23
|
def delete!
|
17
24
|
batch_deleter.delete!
|
18
25
|
unless batch_deleter.last_batch?
|
19
|
-
|
20
|
-
prefix: prefix,
|
21
|
-
oai_url: oai_url,
|
22
|
-
solr_url: solr_url)
|
26
|
+
BatchDeleterWorker.perform_async start + 1, prefix, oai_url, solr_url
|
23
27
|
end
|
24
28
|
end
|
25
29
|
|
26
30
|
def batch_deleter
|
27
|
-
@
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
@batch_deleter ||=
|
32
|
+
batch_deleter_klass.new(start: start,
|
33
|
+
prefix: prefix,
|
34
|
+
solr_client: solr_client,
|
35
|
+
oai_client: oai_client)
|
31
36
|
end
|
32
37
|
|
33
38
|
def solr_client
|
data/lib/cdmbl/default_solr.rb
CHANGED
data/lib/cdmbl/oai_deletables.rb
CHANGED
@@ -14,7 +14,7 @@ module CDMBL
|
|
14
14
|
|
15
15
|
def deletables
|
16
16
|
identifiers.select do |id|
|
17
|
-
|
17
|
+
record_missing? to_oai_id(id)
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
@@ -36,8 +36,8 @@ module CDMBL
|
|
36
36
|
id.split(':')
|
37
37
|
end
|
38
38
|
|
39
|
-
def
|
40
|
-
oai_record_klass.new(oai_client: oai_client,
|
39
|
+
def record_missing?(identifier)
|
40
|
+
!oai_record_klass.new(oai_client: oai_client,
|
41
41
|
identifier: identifier).record_exists?
|
42
42
|
end
|
43
43
|
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'cdmbl'
|
2
|
+
|
3
|
+
namespace :cdmbl do
|
4
|
+
desc "delete all records that aren't in a given OAI endpoint"
|
5
|
+
task :delete_batch, [:start, :prefix, :oai_url, :solr_url] do |t, args|
|
6
|
+
CDMBL::BatchDeleterWorker.perform_async(args[:start].to_i,
|
7
|
+
args[:prefix],
|
8
|
+
args[:oai_url],
|
9
|
+
args[:solr_url])
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
data/lib/cdmbl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -231,6 +231,7 @@ files:
|
|
231
231
|
- lib/cdmbl/oai_set_lookup.rb
|
232
232
|
- lib/cdmbl/rake_task.rb
|
233
233
|
- lib/cdmbl/record_transformer.rb
|
234
|
+
- lib/cdmbl/tasks/delete.rake
|
234
235
|
- lib/cdmbl/tasks/etl.rake
|
235
236
|
- lib/cdmbl/transformer.rb
|
236
237
|
- lib/cdmbl/version.rb
|