cdmbl 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/batch_deleter.rb +51 -0
- data/lib/cdmbl/batch_deleter_worker.rb +41 -0
- data/lib/cdmbl/default_solr.rb +12 -1
- data/lib/cdmbl/extractor.rb +2 -6
- data/lib/cdmbl/oai_client.rb +24 -0
- data/lib/cdmbl/oai_deletables.rb +44 -0
- data/lib/cdmbl/oai_get_record.rb +30 -0
- data/lib/cdmbl/oai_request.rb +10 -2
- data/lib/cdmbl/version.rb +1 -1
- data/lib/cdmbl.rb +6 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c42251d98410bacf0c610a536e9514e0e4a6662
|
4
|
+
data.tar.gz: 77fb5f56972cf1bb3b7e31ad8be7188cdf4c5fda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49d38e10d6871c4d4e4e249624b543787f6476aaebb680748babf822932127eac82bdb81259177b7b9e1e543b816ac012e4d08735bf882a57e3829c1175efb13
|
7
|
+
data.tar.gz: 8d99175077e75f997efb3c0233eeed32d1be2142403d3a566452471faa3b13926573a2c4cba19540dfab2328455175e8c19369ce61d5db6fe2becf5df1de3995
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class BatchDeleter
|
3
|
+
attr_reader :prefix,
|
4
|
+
:start,
|
5
|
+
:batch_size,
|
6
|
+
:oai_client,
|
7
|
+
:solr_client,
|
8
|
+
:oai_deletables_klass
|
9
|
+
def initialize(prefix: '',
|
10
|
+
start: 0,
|
11
|
+
batch_size: 200,
|
12
|
+
oai_client: :missing_oai_client,
|
13
|
+
solr_client: :missing_solr_client,
|
14
|
+
oai_deletables_klass: OaiDeletables)
|
15
|
+
@prefix = prefix
|
16
|
+
@start = start
|
17
|
+
@batch_size = batch_size
|
18
|
+
@oai_client = oai_client
|
19
|
+
@solr_client = solr_client
|
20
|
+
@oai_deletables_klass = oai_deletables_klass
|
21
|
+
end
|
22
|
+
|
23
|
+
def delete!
|
24
|
+
solr_client.delete deletables
|
25
|
+
end
|
26
|
+
|
27
|
+
def last_batch?
|
28
|
+
start + batch_size >= num_found
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def deletables
|
34
|
+
oai_deletables_klass.new(identifiers: docs,
|
35
|
+
prefix: prefix,
|
36
|
+
oai_client: oai_client).deletables
|
37
|
+
end
|
38
|
+
|
39
|
+
def docs
|
40
|
+
ids.fetch('response', {}).fetch('docs', {})
|
41
|
+
end
|
42
|
+
|
43
|
+
def num_found
|
44
|
+
ids.fetch('response', {}).fetch('numFound', 0)
|
45
|
+
end
|
46
|
+
|
47
|
+
def ids
|
48
|
+
@ids ||= solr_client.ids(start: start)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
module CDMBL
|
3
|
+
class BatchDeleterWorker
|
4
|
+
include Sidekiq::Worker
|
5
|
+
attr_reader :start, :prefix, :oai_url, :solr_url
|
6
|
+
def perform(start = 0, prefix = '', oai_url = '', solr_url = '')
|
7
|
+
@start = start
|
8
|
+
@prefix = prefix
|
9
|
+
@oai_url = oai_url
|
10
|
+
@solr_url = solr_url
|
11
|
+
delete!
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def delete!
|
17
|
+
batch_deleter.delete!
|
18
|
+
unless batch_deleter.last_batch?
|
19
|
+
BatchDeleteWorker.perform_async(start: start + 1,
|
20
|
+
prefix: prefix,
|
21
|
+
oai_url: oai_url,
|
22
|
+
solr_url: solr_url)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def batch_deleter
|
27
|
+
@deleter ||= BatchDeleter.new(start: start,
|
28
|
+
prefix: prefix,
|
29
|
+
solr_client: solr_client,
|
30
|
+
oai_client: oai_client)
|
31
|
+
end
|
32
|
+
|
33
|
+
def solr_client
|
34
|
+
@solr_client ||= CDMBL::Solr.new(url: solr_url)
|
35
|
+
end
|
36
|
+
|
37
|
+
def oai_client
|
38
|
+
@oai_client ||= OaiClient.new base_url: oai_url
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/cdmbl/default_solr.rb
CHANGED
@@ -4,11 +4,22 @@ module CDMBL
|
|
4
4
|
# Commnicate with Solr: add / delete stuff
|
5
5
|
class DefaultSolr
|
6
6
|
attr_reader :url, :client
|
7
|
-
def initialize(url: 'http://localhost:8983', client: RSolr)
|
7
|
+
def initialize(url: 'http://localhost:8983/solr/core-here', client: RSolr)
|
8
8
|
@url = url
|
9
9
|
@client = client
|
10
10
|
end
|
11
11
|
|
12
|
+
def ids(start: 0)
|
13
|
+
connection.get('select',
|
14
|
+
:params => { :q => '*:*',
|
15
|
+
:defType => 'edismax',
|
16
|
+
:fl => '',
|
17
|
+
:rows => 200,
|
18
|
+
:start => start
|
19
|
+
}
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
12
23
|
def connection
|
13
24
|
@connection ||= client.connect url: url
|
14
25
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -29,7 +29,7 @@ module CDMBL
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def set_lookup
|
32
|
-
oai_set_lookup.new(oai_sets:
|
32
|
+
oai_set_lookup.new(oai_sets: sets).keyed
|
33
33
|
end
|
34
34
|
|
35
35
|
def deletable_ids
|
@@ -67,11 +67,7 @@ module CDMBL
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def oai_identifiers
|
70
|
-
|
71
|
-
end
|
72
|
-
|
73
|
-
def to_hash(xml)
|
74
|
-
Hash.from_xml(xml)
|
70
|
+
identifiers
|
75
71
|
end
|
76
72
|
end
|
77
73
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'json'
|
2
|
+
module CDMBL
|
3
|
+
class OaiClient
|
4
|
+
attr_reader :base_url, :http_client
|
5
|
+
def initialize(base_url: '', http_client: Net::HTTP)
|
6
|
+
@base_url = base_url
|
7
|
+
@http_client = http_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def request(query)
|
11
|
+
hashify get("#{base_url}?#{query}")
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def get(url)
|
17
|
+
http_client.get_response(URI(url)).body
|
18
|
+
end
|
19
|
+
|
20
|
+
def hashify(xml)
|
21
|
+
Hash.from_xml(xml)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
module CDMBL
|
3
|
+
class OaiDeletables
|
4
|
+
attr_reader :identifiers, :oai_record_klass, :oai_client, :prefix
|
5
|
+
def initialize(identifiers: [],
|
6
|
+
prefix: '',
|
7
|
+
oai_client: OaiClient.new,
|
8
|
+
oai_record_klass: OaiGetRecord)
|
9
|
+
@identifiers = identifiers
|
10
|
+
@prefix = prefix
|
11
|
+
@oai_client = oai_client
|
12
|
+
@oai_record_klass = oai_record_klass
|
13
|
+
end
|
14
|
+
|
15
|
+
def deletables
|
16
|
+
identifiers.select do |id|
|
17
|
+
record_exists? to_oai_id(id)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def to_oai_id(id)
|
24
|
+
"#{prefix}#{collection(id)}/#{id(id)}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def id(id)
|
28
|
+
id_parts(id).last
|
29
|
+
end
|
30
|
+
|
31
|
+
def collection(id)
|
32
|
+
id_parts(id).first
|
33
|
+
end
|
34
|
+
|
35
|
+
def id_parts(id)
|
36
|
+
id.split(':')
|
37
|
+
end
|
38
|
+
|
39
|
+
def record_exists?(identifier)
|
40
|
+
oai_record_klass.new(oai_client: oai_client,
|
41
|
+
identifier: identifier).record_exists?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'json'
|
2
|
+
module CDMBL
|
3
|
+
# Request a single item from an OAI endpoint
|
4
|
+
# identifier should be forward slash delimited: colllection/identifier
|
5
|
+
class OaiGetRecord
|
6
|
+
attr_reader :identifier, :oai_client
|
7
|
+
def initialize(identifier: '', oai_client: OaiClient.new)
|
8
|
+
@identifier = identifier
|
9
|
+
@oai_client = oai_client
|
10
|
+
end
|
11
|
+
|
12
|
+
def record_exists?
|
13
|
+
(/The value of the identifier argument is unknown/ =~ record_errors) == nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def record
|
17
|
+
@record ||= oai_client.request query
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def record_errors
|
23
|
+
record.fetch('OAI_PMH', {}).fetch('error', '')
|
24
|
+
end
|
25
|
+
|
26
|
+
def query
|
27
|
+
"verb=GetRecord&identifier=#{identifier}&metadataPrefix=oai_dc"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,16 +1,24 @@
|
|
1
|
+
require 'json'
|
1
2
|
module CDMBL
|
2
3
|
class OaiRequest
|
3
|
-
attr_reader :base_uri,
|
4
|
+
attr_reader :base_uri,
|
5
|
+
:resumption_token,
|
6
|
+
:client,
|
7
|
+
:from,
|
8
|
+
:set,
|
9
|
+
:identifier
|
4
10
|
def initialize(base_uri: '',
|
5
11
|
resumption_token: false,
|
6
12
|
from: false,
|
7
13
|
set: false,
|
14
|
+
identifier: '',
|
8
15
|
client: Net::HTTP)
|
9
16
|
@base_uri = base_uri
|
10
17
|
@resumption_token = resumption_token
|
11
18
|
@client = client
|
12
19
|
@from = (from) ? "&from=#{from}" : ''
|
13
20
|
@set = (set) ? "&set=#{set}" : ''
|
21
|
+
@identifier = identifier
|
14
22
|
end
|
15
23
|
|
16
24
|
def identifiers
|
@@ -37,7 +45,7 @@ module CDMBL
|
|
37
45
|
|
38
46
|
def request(location)
|
39
47
|
CDMBL::OaiNotification.call!(location)
|
40
|
-
client.get_response(URI(location)).body
|
48
|
+
Hash.from_xml(client.get_response(URI(location)).body)
|
41
49
|
end
|
42
50
|
end
|
43
51
|
end
|
data/lib/cdmbl/version.rb
CHANGED
data/lib/cdmbl.rb
CHANGED
@@ -16,4 +16,9 @@ require 'cdmbl/default_cdm_notification'
|
|
16
16
|
require 'cdmbl/default_oai_notification'
|
17
17
|
require 'cdmbl/default_loader_notification'
|
18
18
|
require 'cdmbl/hooks'
|
19
|
-
require 'cdmbl/oai_filter'
|
19
|
+
require 'cdmbl/oai_filter'
|
20
|
+
require 'cdmbl/oai_client'
|
21
|
+
require 'cdmbl/oai_get_record'
|
22
|
+
require 'cdmbl/oai_deletables'
|
23
|
+
require 'cdmbl/batch_deleter'
|
24
|
+
require 'cdmbl/batch_deleter_worker'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -208,6 +208,8 @@ files:
|
|
208
208
|
- bin/setup
|
209
209
|
- cdmbl.gemspec
|
210
210
|
- lib/cdmbl.rb
|
211
|
+
- lib/cdmbl/batch_deleter.rb
|
212
|
+
- lib/cdmbl/batch_deleter_worker.rb
|
211
213
|
- lib/cdmbl/default_cdm_notification.rb
|
212
214
|
- lib/cdmbl/default_completed_callback.rb
|
213
215
|
- lib/cdmbl/default_loader_notification.rb
|
@@ -221,7 +223,10 @@ files:
|
|
221
223
|
- lib/cdmbl/formatters.rb
|
222
224
|
- lib/cdmbl/hooks.rb
|
223
225
|
- lib/cdmbl/loader.rb
|
226
|
+
- lib/cdmbl/oai_client.rb
|
227
|
+
- lib/cdmbl/oai_deletables.rb
|
224
228
|
- lib/cdmbl/oai_filter.rb
|
229
|
+
- lib/cdmbl/oai_get_record.rb
|
225
230
|
- lib/cdmbl/oai_request.rb
|
226
231
|
- lib/cdmbl/oai_set_lookup.rb
|
227
232
|
- lib/cdmbl/rake_task.rb
|