cdmbl 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 992abfe17a0e27829cf5d531cac21938997e301e
4
- data.tar.gz: 9ef14dcdefb7fbdd428bdf5427af74ff5e30d9c5
3
+ metadata.gz: 5c42251d98410bacf0c610a536e9514e0e4a6662
4
+ data.tar.gz: 77fb5f56972cf1bb3b7e31ad8be7188cdf4c5fda
5
5
  SHA512:
6
- metadata.gz: f6849f88abd6eb0ffd2d5e2606bc5c9e85a583e3f5e2f3502799288455749f47c5a70afd2a4ebb2ca9941913ac460149e04930b78916c36332f81f8b3b6056ba
7
- data.tar.gz: 5ec0e748e237dfca5f576472eb24a96bda23447b937a7f130683783c9311762b8b5d8868bfff7d71971475d20d453c8eaa5a3d96496b57fe0f645252b8ff8e61
6
+ metadata.gz: 49d38e10d6871c4d4e4e249624b543787f6476aaebb680748babf822932127eac82bdb81259177b7b9e1e543b816ac012e4d08735bf882a57e3829c1175efb13
7
+ data.tar.gz: 8d99175077e75f997efb3c0233eeed32d1be2142403d3a566452471faa3b13926573a2c4cba19540dfab2328455175e8c19369ce61d5db6fe2becf5df1de3995
@@ -0,0 +1,51 @@
1
+ module CDMBL
2
+ class BatchDeleter
3
+ attr_reader :prefix,
4
+ :start,
5
+ :batch_size,
6
+ :oai_client,
7
+ :solr_client,
8
+ :oai_deletables_klass
9
+ def initialize(prefix: '',
10
+ start: 0,
11
+ batch_size: 200,
12
+ oai_client: :missing_oai_client,
13
+ solr_client: :missing_solr_client,
14
+ oai_deletables_klass: OaiDeletables)
15
+ @prefix = prefix
16
+ @start = start
17
+ @batch_size = batch_size
18
+ @oai_client = oai_client
19
+ @solr_client = solr_client
20
+ @oai_deletables_klass = oai_deletables_klass
21
+ end
22
+
23
+ def delete!
24
+ solr_client.delete deletables
25
+ end
26
+
27
+ def last_batch?
28
+ start + batch_size >= num_found
29
+ end
30
+
31
+ private
32
+
33
+ def deletables
34
+ oai_deletables_klass.new(identifiers: docs,
35
+ prefix: prefix,
36
+ oai_client: oai_client).deletables
37
+ end
38
+
39
+ def docs
40
+ ids.fetch('response', {}).fetch('docs', {})
41
+ end
42
+
43
+ def num_found
44
+ ids.fetch('response', {}).fetch('numFound', 0)
45
+ end
46
+
47
+ def ids
48
+ @ids ||= solr_client.ids(start: start)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,41 @@
1
+ require 'sidekiq'
2
+ module CDMBL
3
+ class BatchDeleterWorker
4
+ include Sidekiq::Worker
5
+ attr_reader :start, :prefix, :oai_url, :solr_url
6
+ def perform(start = 0, prefix = '', oai_url = '', solr_url = '')
7
+ @start = start
8
+ @prefix = prefix
9
+ @oai_url = oai_url
10
+ @solr_url = solr_url
11
+ delete!
12
+ end
13
+
14
+ private
15
+
16
+ def delete!
17
+ batch_deleter.delete!
18
+ unless batch_deleter.last_batch?
19
+ BatchDeleteWorker.perform_async(start: start + 1,
20
+ prefix: prefix,
21
+ oai_url: oai_url,
22
+ solr_url: solr_url)
23
+ end
24
+ end
25
+
26
+ def batch_deleter
27
+ @deleter ||= BatchDeleter.new(start: start,
28
+ prefix: prefix,
29
+ solr_client: solr_client,
30
+ oai_client: oai_client)
31
+ end
32
+
33
+ def solr_client
34
+ @solr_client ||= CDMBL::Solr.new(url: solr_url)
35
+ end
36
+
37
+ def oai_client
38
+ @oai_client ||= OaiClient.new base_url: oai_url
39
+ end
40
+ end
41
+ end
@@ -4,11 +4,22 @@ module CDMBL
4
4
  # Commnicate with Solr: add / delete stuff
5
5
  class DefaultSolr
6
6
  attr_reader :url, :client
7
- def initialize(url: 'http://localhost:8983', client: RSolr)
7
+ def initialize(url: 'http://localhost:8983/solr/core-here', client: RSolr)
8
8
  @url = url
9
9
  @client = client
10
10
  end
11
11
 
12
+ def ids(start: 0)
13
+ connection.get('select',
14
+ :params => { :q => '*:*',
15
+ :defType => 'edismax',
16
+ :fl => '',
17
+ :rows => 200,
18
+ :start => start
19
+ }
20
+ )
21
+ end
22
+
12
23
  def connection
13
24
  @connection ||= client.connect url: url
14
25
  end
@@ -29,7 +29,7 @@ module CDMBL
29
29
  end
30
30
 
31
31
  def set_lookup
32
- oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
32
+ oai_set_lookup.new(oai_sets: sets).keyed
33
33
  end
34
34
 
35
35
  def deletable_ids
@@ -67,11 +67,7 @@ module CDMBL
67
67
  end
68
68
 
69
69
  def oai_identifiers
70
- to_hash(identifiers)
71
- end
72
-
73
- def to_hash(xml)
74
- Hash.from_xml(xml)
70
+ identifiers
75
71
  end
76
72
  end
77
73
  end
@@ -0,0 +1,24 @@
1
+ require 'json'
2
+ module CDMBL
3
+ class OaiClient
4
+ attr_reader :base_url, :http_client
5
+ def initialize(base_url: '', http_client: Net::HTTP)
6
+ @base_url = base_url
7
+ @http_client = http_client
8
+ end
9
+
10
+ def request(query)
11
+ hashify get("#{base_url}?#{query}")
12
+ end
13
+
14
+ private
15
+
16
+ def get(url)
17
+ http_client.get_response(URI(url)).body
18
+ end
19
+
20
+ def hashify(xml)
21
+ Hash.from_xml(xml)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,44 @@
1
+ require 'sidekiq'
2
+ module CDMBL
3
+ class OaiDeletables
4
+ attr_reader :identifiers, :oai_record_klass, :oai_client, :prefix
5
+ def initialize(identifiers: [],
6
+ prefix: '',
7
+ oai_client: OaiClient.new,
8
+ oai_record_klass: OaiGetRecord)
9
+ @identifiers = identifiers
10
+ @prefix = prefix
11
+ @oai_client = oai_client
12
+ @oai_record_klass = oai_record_klass
13
+ end
14
+
15
+ def deletables
16
+ identifiers.select do |id|
17
+ record_exists? to_oai_id(id)
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def to_oai_id(id)
24
+ "#{prefix}#{collection(id)}/#{id(id)}"
25
+ end
26
+
27
+ def id(id)
28
+ id_parts(id).last
29
+ end
30
+
31
+ def collection(id)
32
+ id_parts(id).first
33
+ end
34
+
35
+ def id_parts(id)
36
+ id.split(':')
37
+ end
38
+
39
+ def record_exists?(identifier)
40
+ oai_record_klass.new(oai_client: oai_client,
41
+ identifier: identifier).record_exists?
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,30 @@
1
+ require 'json'
2
+ module CDMBL
3
+ # Request a single item from an OAI endpoint
4
+ # identifier should be forward slash delimited: colllection/identifier
5
+ class OaiGetRecord
6
+ attr_reader :identifier, :oai_client
7
+ def initialize(identifier: '', oai_client: OaiClient.new)
8
+ @identifier = identifier
9
+ @oai_client = oai_client
10
+ end
11
+
12
+ def record_exists?
13
+ (/The value of the identifier argument is unknown/ =~ record_errors) == nil
14
+ end
15
+
16
+ def record
17
+ @record ||= oai_client.request query
18
+ end
19
+
20
+ private
21
+
22
+ def record_errors
23
+ record.fetch('OAI_PMH', {}).fetch('error', '')
24
+ end
25
+
26
+ def query
27
+ "verb=GetRecord&identifier=#{identifier}&metadataPrefix=oai_dc"
28
+ end
29
+ end
30
+ end
@@ -1,16 +1,24 @@
1
+ require 'json'
1
2
  module CDMBL
2
3
  class OaiRequest
3
- attr_reader :base_uri, :resumption_token, :client, :from, :set
4
+ attr_reader :base_uri,
5
+ :resumption_token,
6
+ :client,
7
+ :from,
8
+ :set,
9
+ :identifier
4
10
  def initialize(base_uri: '',
5
11
  resumption_token: false,
6
12
  from: false,
7
13
  set: false,
14
+ identifier: '',
8
15
  client: Net::HTTP)
9
16
  @base_uri = base_uri
10
17
  @resumption_token = resumption_token
11
18
  @client = client
12
19
  @from = (from) ? "&from=#{from}" : ''
13
20
  @set = (set) ? "&set=#{set}" : ''
21
+ @identifier = identifier
14
22
  end
15
23
 
16
24
  def identifiers
@@ -37,7 +45,7 @@ module CDMBL
37
45
 
38
46
  def request(location)
39
47
  CDMBL::OaiNotification.call!(location)
40
- client.get_response(URI(location)).body
48
+ Hash.from_xml(client.get_response(URI(location)).body)
41
49
  end
42
50
  end
43
51
  end
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/cdmbl.rb CHANGED
@@ -16,4 +16,9 @@ require 'cdmbl/default_cdm_notification'
16
16
  require 'cdmbl/default_oai_notification'
17
17
  require 'cdmbl/default_loader_notification'
18
18
  require 'cdmbl/hooks'
19
- require 'cdmbl/oai_filter'
19
+ require 'cdmbl/oai_filter'
20
+ require 'cdmbl/oai_client'
21
+ require 'cdmbl/oai_get_record'
22
+ require 'cdmbl/oai_deletables'
23
+ require 'cdmbl/batch_deleter'
24
+ require 'cdmbl/batch_deleter_worker'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-06-05 00:00:00.000000000 Z
11
+ date: 2017-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -208,6 +208,8 @@ files:
208
208
  - bin/setup
209
209
  - cdmbl.gemspec
210
210
  - lib/cdmbl.rb
211
+ - lib/cdmbl/batch_deleter.rb
212
+ - lib/cdmbl/batch_deleter_worker.rb
211
213
  - lib/cdmbl/default_cdm_notification.rb
212
214
  - lib/cdmbl/default_completed_callback.rb
213
215
  - lib/cdmbl/default_loader_notification.rb
@@ -221,7 +223,10 @@ files:
221
223
  - lib/cdmbl/formatters.rb
222
224
  - lib/cdmbl/hooks.rb
223
225
  - lib/cdmbl/loader.rb
226
+ - lib/cdmbl/oai_client.rb
227
+ - lib/cdmbl/oai_deletables.rb
224
228
  - lib/cdmbl/oai_filter.rb
229
+ - lib/cdmbl/oai_get_record.rb
225
230
  - lib/cdmbl/oai_request.rb
226
231
  - lib/cdmbl/oai_set_lookup.rb
227
232
  - lib/cdmbl/rake_task.rb