cdmbl 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/batch_deleter.rb +51 -0
- data/lib/cdmbl/batch_deleter_worker.rb +41 -0
- data/lib/cdmbl/default_solr.rb +12 -1
- data/lib/cdmbl/extractor.rb +2 -6
- data/lib/cdmbl/oai_client.rb +24 -0
- data/lib/cdmbl/oai_deletables.rb +44 -0
- data/lib/cdmbl/oai_get_record.rb +30 -0
- data/lib/cdmbl/oai_request.rb +10 -2
- data/lib/cdmbl/version.rb +1 -1
- data/lib/cdmbl.rb +6 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c42251d98410bacf0c610a536e9514e0e4a6662
|
4
|
+
data.tar.gz: 77fb5f56972cf1bb3b7e31ad8be7188cdf4c5fda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49d38e10d6871c4d4e4e249624b543787f6476aaebb680748babf822932127eac82bdb81259177b7b9e1e543b816ac012e4d08735bf882a57e3829c1175efb13
|
7
|
+
data.tar.gz: 8d99175077e75f997efb3c0233eeed32d1be2142403d3a566452471faa3b13926573a2c4cba19540dfab2328455175e8c19369ce61d5db6fe2becf5df1de3995
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class BatchDeleter
|
3
|
+
attr_reader :prefix,
|
4
|
+
:start,
|
5
|
+
:batch_size,
|
6
|
+
:oai_client,
|
7
|
+
:solr_client,
|
8
|
+
:oai_deletables_klass
|
9
|
+
def initialize(prefix: '',
|
10
|
+
start: 0,
|
11
|
+
batch_size: 200,
|
12
|
+
oai_client: :missing_oai_client,
|
13
|
+
solr_client: :missing_solr_client,
|
14
|
+
oai_deletables_klass: OaiDeletables)
|
15
|
+
@prefix = prefix
|
16
|
+
@start = start
|
17
|
+
@batch_size = batch_size
|
18
|
+
@oai_client = oai_client
|
19
|
+
@solr_client = solr_client
|
20
|
+
@oai_deletables_klass = oai_deletables_klass
|
21
|
+
end
|
22
|
+
|
23
|
+
def delete!
|
24
|
+
solr_client.delete deletables
|
25
|
+
end
|
26
|
+
|
27
|
+
def last_batch?
|
28
|
+
start + batch_size >= num_found
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def deletables
|
34
|
+
oai_deletables_klass.new(identifiers: docs,
|
35
|
+
prefix: prefix,
|
36
|
+
oai_client: oai_client).deletables
|
37
|
+
end
|
38
|
+
|
39
|
+
def docs
|
40
|
+
ids.fetch('response', {}).fetch('docs', {})
|
41
|
+
end
|
42
|
+
|
43
|
+
def num_found
|
44
|
+
ids.fetch('response', {}).fetch('numFound', 0)
|
45
|
+
end
|
46
|
+
|
47
|
+
def ids
|
48
|
+
@ids ||= solr_client.ids(start: start)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
module CDMBL
|
3
|
+
class BatchDeleterWorker
|
4
|
+
include Sidekiq::Worker
|
5
|
+
attr_reader :start, :prefix, :oai_url, :solr_url
|
6
|
+
def perform(start = 0, prefix = '', oai_url = '', solr_url = '')
|
7
|
+
@start = start
|
8
|
+
@prefix = prefix
|
9
|
+
@oai_url = oai_url
|
10
|
+
@solr_url = solr_url
|
11
|
+
delete!
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def delete!
|
17
|
+
batch_deleter.delete!
|
18
|
+
unless batch_deleter.last_batch?
|
19
|
+
BatchDeleteWorker.perform_async(start: start + 1,
|
20
|
+
prefix: prefix,
|
21
|
+
oai_url: oai_url,
|
22
|
+
solr_url: solr_url)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def batch_deleter
|
27
|
+
@deleter ||= BatchDeleter.new(start: start,
|
28
|
+
prefix: prefix,
|
29
|
+
solr_client: solr_client,
|
30
|
+
oai_client: oai_client)
|
31
|
+
end
|
32
|
+
|
33
|
+
def solr_client
|
34
|
+
@solr_client ||= CDMBL::Solr.new(url: solr_url)
|
35
|
+
end
|
36
|
+
|
37
|
+
def oai_client
|
38
|
+
@oai_client ||= OaiClient.new base_url: oai_url
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/cdmbl/default_solr.rb
CHANGED
@@ -4,11 +4,22 @@ module CDMBL
|
|
4
4
|
# Commnicate with Solr: add / delete stuff
|
5
5
|
class DefaultSolr
|
6
6
|
attr_reader :url, :client
|
7
|
-
def initialize(url: 'http://localhost:8983', client: RSolr)
|
7
|
+
def initialize(url: 'http://localhost:8983/solr/core-here', client: RSolr)
|
8
8
|
@url = url
|
9
9
|
@client = client
|
10
10
|
end
|
11
11
|
|
12
|
+
def ids(start: 0)
|
13
|
+
connection.get('select',
|
14
|
+
:params => { :q => '*:*',
|
15
|
+
:defType => 'edismax',
|
16
|
+
:fl => '',
|
17
|
+
:rows => 200,
|
18
|
+
:start => start
|
19
|
+
}
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
12
23
|
def connection
|
13
24
|
@connection ||= client.connect url: url
|
14
25
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -29,7 +29,7 @@ module CDMBL
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def set_lookup
|
32
|
-
oai_set_lookup.new(oai_sets:
|
32
|
+
oai_set_lookup.new(oai_sets: sets).keyed
|
33
33
|
end
|
34
34
|
|
35
35
|
def deletable_ids
|
@@ -67,11 +67,7 @@ module CDMBL
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def oai_identifiers
|
70
|
-
|
71
|
-
end
|
72
|
-
|
73
|
-
def to_hash(xml)
|
74
|
-
Hash.from_xml(xml)
|
70
|
+
identifiers
|
75
71
|
end
|
76
72
|
end
|
77
73
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'json'
|
2
|
+
module CDMBL
|
3
|
+
class OaiClient
|
4
|
+
attr_reader :base_url, :http_client
|
5
|
+
def initialize(base_url: '', http_client: Net::HTTP)
|
6
|
+
@base_url = base_url
|
7
|
+
@http_client = http_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def request(query)
|
11
|
+
hashify get("#{base_url}?#{query}")
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def get(url)
|
17
|
+
http_client.get_response(URI(url)).body
|
18
|
+
end
|
19
|
+
|
20
|
+
def hashify(xml)
|
21
|
+
Hash.from_xml(xml)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
module CDMBL
|
3
|
+
class OaiDeletables
|
4
|
+
attr_reader :identifiers, :oai_record_klass, :oai_client, :prefix
|
5
|
+
def initialize(identifiers: [],
|
6
|
+
prefix: '',
|
7
|
+
oai_client: OaiClient.new,
|
8
|
+
oai_record_klass: OaiGetRecord)
|
9
|
+
@identifiers = identifiers
|
10
|
+
@prefix = prefix
|
11
|
+
@oai_client = oai_client
|
12
|
+
@oai_record_klass = oai_record_klass
|
13
|
+
end
|
14
|
+
|
15
|
+
def deletables
|
16
|
+
identifiers.select do |id|
|
17
|
+
record_exists? to_oai_id(id)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def to_oai_id(id)
|
24
|
+
"#{prefix}#{collection(id)}/#{id(id)}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def id(id)
|
28
|
+
id_parts(id).last
|
29
|
+
end
|
30
|
+
|
31
|
+
def collection(id)
|
32
|
+
id_parts(id).first
|
33
|
+
end
|
34
|
+
|
35
|
+
def id_parts(id)
|
36
|
+
id.split(':')
|
37
|
+
end
|
38
|
+
|
39
|
+
def record_exists?(identifier)
|
40
|
+
oai_record_klass.new(oai_client: oai_client,
|
41
|
+
identifier: identifier).record_exists?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'json'
|
2
|
+
module CDMBL
|
3
|
+
# Request a single item from an OAI endpoint
|
4
|
+
# identifier should be forward slash delimited: colllection/identifier
|
5
|
+
class OaiGetRecord
|
6
|
+
attr_reader :identifier, :oai_client
|
7
|
+
def initialize(identifier: '', oai_client: OaiClient.new)
|
8
|
+
@identifier = identifier
|
9
|
+
@oai_client = oai_client
|
10
|
+
end
|
11
|
+
|
12
|
+
def record_exists?
|
13
|
+
(/The value of the identifier argument is unknown/ =~ record_errors) == nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def record
|
17
|
+
@record ||= oai_client.request query
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def record_errors
|
23
|
+
record.fetch('OAI_PMH', {}).fetch('error', '')
|
24
|
+
end
|
25
|
+
|
26
|
+
def query
|
27
|
+
"verb=GetRecord&identifier=#{identifier}&metadataPrefix=oai_dc"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,16 +1,24 @@
|
|
1
|
+
require 'json'
|
1
2
|
module CDMBL
|
2
3
|
class OaiRequest
|
3
|
-
attr_reader :base_uri,
|
4
|
+
attr_reader :base_uri,
|
5
|
+
:resumption_token,
|
6
|
+
:client,
|
7
|
+
:from,
|
8
|
+
:set,
|
9
|
+
:identifier
|
4
10
|
def initialize(base_uri: '',
|
5
11
|
resumption_token: false,
|
6
12
|
from: false,
|
7
13
|
set: false,
|
14
|
+
identifier: '',
|
8
15
|
client: Net::HTTP)
|
9
16
|
@base_uri = base_uri
|
10
17
|
@resumption_token = resumption_token
|
11
18
|
@client = client
|
12
19
|
@from = (from) ? "&from=#{from}" : ''
|
13
20
|
@set = (set) ? "&set=#{set}" : ''
|
21
|
+
@identifier = identifier
|
14
22
|
end
|
15
23
|
|
16
24
|
def identifiers
|
@@ -37,7 +45,7 @@ module CDMBL
|
|
37
45
|
|
38
46
|
def request(location)
|
39
47
|
CDMBL::OaiNotification.call!(location)
|
40
|
-
client.get_response(URI(location)).body
|
48
|
+
Hash.from_xml(client.get_response(URI(location)).body)
|
41
49
|
end
|
42
50
|
end
|
43
51
|
end
|
data/lib/cdmbl/version.rb
CHANGED
data/lib/cdmbl.rb
CHANGED
@@ -16,4 +16,9 @@ require 'cdmbl/default_cdm_notification'
|
|
16
16
|
require 'cdmbl/default_oai_notification'
|
17
17
|
require 'cdmbl/default_loader_notification'
|
18
18
|
require 'cdmbl/hooks'
|
19
|
-
require 'cdmbl/oai_filter'
|
19
|
+
require 'cdmbl/oai_filter'
|
20
|
+
require 'cdmbl/oai_client'
|
21
|
+
require 'cdmbl/oai_get_record'
|
22
|
+
require 'cdmbl/oai_deletables'
|
23
|
+
require 'cdmbl/batch_deleter'
|
24
|
+
require 'cdmbl/batch_deleter_worker'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -208,6 +208,8 @@ files:
|
|
208
208
|
- bin/setup
|
209
209
|
- cdmbl.gemspec
|
210
210
|
- lib/cdmbl.rb
|
211
|
+
- lib/cdmbl/batch_deleter.rb
|
212
|
+
- lib/cdmbl/batch_deleter_worker.rb
|
211
213
|
- lib/cdmbl/default_cdm_notification.rb
|
212
214
|
- lib/cdmbl/default_completed_callback.rb
|
213
215
|
- lib/cdmbl/default_loader_notification.rb
|
@@ -221,7 +223,10 @@ files:
|
|
221
223
|
- lib/cdmbl/formatters.rb
|
222
224
|
- lib/cdmbl/hooks.rb
|
223
225
|
- lib/cdmbl/loader.rb
|
226
|
+
- lib/cdmbl/oai_client.rb
|
227
|
+
- lib/cdmbl/oai_deletables.rb
|
224
228
|
- lib/cdmbl/oai_filter.rb
|
229
|
+
- lib/cdmbl/oai_get_record.rb
|
225
230
|
- lib/cdmbl/oai_request.rb
|
226
231
|
- lib/cdmbl/oai_set_lookup.rb
|
227
232
|
- lib/cdmbl/rake_task.rb
|