cdmbl 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 992abfe17a0e27829cf5d531cac21938997e301e
4
- data.tar.gz: 9ef14dcdefb7fbdd428bdf5427af74ff5e30d9c5
3
+ metadata.gz: 5c42251d98410bacf0c610a536e9514e0e4a6662
4
+ data.tar.gz: 77fb5f56972cf1bb3b7e31ad8be7188cdf4c5fda
5
5
  SHA512:
6
- metadata.gz: f6849f88abd6eb0ffd2d5e2606bc5c9e85a583e3f5e2f3502799288455749f47c5a70afd2a4ebb2ca9941913ac460149e04930b78916c36332f81f8b3b6056ba
7
- data.tar.gz: 5ec0e748e237dfca5f576472eb24a96bda23447b937a7f130683783c9311762b8b5d8868bfff7d71971475d20d453c8eaa5a3d96496b57fe0f645252b8ff8e61
6
+ metadata.gz: 49d38e10d6871c4d4e4e249624b543787f6476aaebb680748babf822932127eac82bdb81259177b7b9e1e543b816ac012e4d08735bf882a57e3829c1175efb13
7
+ data.tar.gz: 8d99175077e75f997efb3c0233eeed32d1be2142403d3a566452471faa3b13926573a2c4cba19540dfab2328455175e8c19369ce61d5db6fe2becf5df1de3995
@@ -0,0 +1,51 @@
1
+ module CDMBL
2
+ class BatchDeleter
3
+ attr_reader :prefix,
4
+ :start,
5
+ :batch_size,
6
+ :oai_client,
7
+ :solr_client,
8
+ :oai_deletables_klass
9
+ def initialize(prefix: '',
10
+ start: 0,
11
+ batch_size: 200,
12
+ oai_client: :missing_oai_client,
13
+ solr_client: :missing_solr_client,
14
+ oai_deletables_klass: OaiDeletables)
15
+ @prefix = prefix
16
+ @start = start
17
+ @batch_size = batch_size
18
+ @oai_client = oai_client
19
+ @solr_client = solr_client
20
+ @oai_deletables_klass = oai_deletables_klass
21
+ end
22
+
23
+ def delete!
24
+ solr_client.delete deletables
25
+ end
26
+
27
+ def last_batch?
28
+ start + batch_size >= num_found
29
+ end
30
+
31
+ private
32
+
33
+ def deletables
34
+ oai_deletables_klass.new(identifiers: docs,
35
+ prefix: prefix,
36
+ oai_client: oai_client).deletables
37
+ end
38
+
39
+ def docs
40
+ ids.fetch('response', {}).fetch('docs', {})
41
+ end
42
+
43
+ def num_found
44
+ ids.fetch('response', {}).fetch('numFound', 0)
45
+ end
46
+
47
+ def ids
48
+ @ids ||= solr_client.ids(start: start)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,41 @@
1
+ require 'sidekiq'
2
+ module CDMBL
3
+ class BatchDeleterWorker
4
+ include Sidekiq::Worker
5
+ attr_reader :start, :prefix, :oai_url, :solr_url
6
+ def perform(start = 0, prefix = '', oai_url = '', solr_url = '')
7
+ @start = start
8
+ @prefix = prefix
9
+ @oai_url = oai_url
10
+ @solr_url = solr_url
11
+ delete!
12
+ end
13
+
14
+ private
15
+
16
+ def delete!
17
+ batch_deleter.delete!
18
+ unless batch_deleter.last_batch?
19
+ BatchDeleteWorker.perform_async(start: start + 1,
20
+ prefix: prefix,
21
+ oai_url: oai_url,
22
+ solr_url: solr_url)
23
+ end
24
+ end
25
+
26
+ def batch_deleter
27
+ @deleter ||= BatchDeleter.new(start: start,
28
+ prefix: prefix,
29
+ solr_client: solr_client,
30
+ oai_client: oai_client)
31
+ end
32
+
33
+ def solr_client
34
+ @solr_client ||= CDMBL::Solr.new(url: solr_url)
35
+ end
36
+
37
+ def oai_client
38
+ @oai_client ||= OaiClient.new base_url: oai_url
39
+ end
40
+ end
41
+ end
@@ -4,11 +4,22 @@ module CDMBL
4
4
  # Commnicate with Solr: add / delete stuff
5
5
  class DefaultSolr
6
6
  attr_reader :url, :client
7
- def initialize(url: 'http://localhost:8983', client: RSolr)
7
+ def initialize(url: 'http://localhost:8983/solr/core-here', client: RSolr)
8
8
  @url = url
9
9
  @client = client
10
10
  end
11
11
 
12
+ def ids(start: 0)
13
+ connection.get('select',
14
+ :params => { :q => '*:*',
15
+ :defType => 'edismax',
16
+ :fl => '',
17
+ :rows => 200,
18
+ :start => start
19
+ }
20
+ )
21
+ end
22
+
12
23
  def connection
13
24
  @connection ||= client.connect url: url
14
25
  end
@@ -29,7 +29,7 @@ module CDMBL
29
29
  end
30
30
 
31
31
  def set_lookup
32
- oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
32
+ oai_set_lookup.new(oai_sets: sets).keyed
33
33
  end
34
34
 
35
35
  def deletable_ids
@@ -67,11 +67,7 @@ module CDMBL
67
67
  end
68
68
 
69
69
  def oai_identifiers
70
- to_hash(identifiers)
71
- end
72
-
73
- def to_hash(xml)
74
- Hash.from_xml(xml)
70
+ identifiers
75
71
  end
76
72
  end
77
73
  end
@@ -0,0 +1,24 @@
1
+ require 'json'
2
+ module CDMBL
3
+ class OaiClient
4
+ attr_reader :base_url, :http_client
5
+ def initialize(base_url: '', http_client: Net::HTTP)
6
+ @base_url = base_url
7
+ @http_client = http_client
8
+ end
9
+
10
+ def request(query)
11
+ hashify get("#{base_url}?#{query}")
12
+ end
13
+
14
+ private
15
+
16
+ def get(url)
17
+ http_client.get_response(URI(url)).body
18
+ end
19
+
20
+ def hashify(xml)
21
+ Hash.from_xml(xml)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,44 @@
1
+ require 'sidekiq'
2
+ module CDMBL
3
+ class OaiDeletables
4
+ attr_reader :identifiers, :oai_record_klass, :oai_client, :prefix
5
+ def initialize(identifiers: [],
6
+ prefix: '',
7
+ oai_client: OaiClient.new,
8
+ oai_record_klass: OaiGetRecord)
9
+ @identifiers = identifiers
10
+ @prefix = prefix
11
+ @oai_client = oai_client
12
+ @oai_record_klass = oai_record_klass
13
+ end
14
+
15
+ def deletables
16
+ identifiers.select do |id|
17
+ record_exists? to_oai_id(id)
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def to_oai_id(id)
24
+ "#{prefix}#{collection(id)}/#{id(id)}"
25
+ end
26
+
27
+ def id(id)
28
+ id_parts(id).last
29
+ end
30
+
31
+ def collection(id)
32
+ id_parts(id).first
33
+ end
34
+
35
+ def id_parts(id)
36
+ id.split(':')
37
+ end
38
+
39
+ def record_exists?(identifier)
40
+ oai_record_klass.new(oai_client: oai_client,
41
+ identifier: identifier).record_exists?
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,30 @@
1
+ require 'json'
2
+ module CDMBL
3
+ # Request a single item from an OAI endpoint
4
+ # identifier should be forward slash delimited: colllection/identifier
5
+ class OaiGetRecord
6
+ attr_reader :identifier, :oai_client
7
+ def initialize(identifier: '', oai_client: OaiClient.new)
8
+ @identifier = identifier
9
+ @oai_client = oai_client
10
+ end
11
+
12
+ def record_exists?
13
+ (/The value of the identifier argument is unknown/ =~ record_errors) == nil
14
+ end
15
+
16
+ def record
17
+ @record ||= oai_client.request query
18
+ end
19
+
20
+ private
21
+
22
+ def record_errors
23
+ record.fetch('OAI_PMH', {}).fetch('error', '')
24
+ end
25
+
26
+ def query
27
+ "verb=GetRecord&identifier=#{identifier}&metadataPrefix=oai_dc"
28
+ end
29
+ end
30
+ end
@@ -1,16 +1,24 @@
1
+ require 'json'
1
2
  module CDMBL
2
3
  class OaiRequest
3
- attr_reader :base_uri, :resumption_token, :client, :from, :set
4
+ attr_reader :base_uri,
5
+ :resumption_token,
6
+ :client,
7
+ :from,
8
+ :set,
9
+ :identifier
4
10
  def initialize(base_uri: '',
5
11
  resumption_token: false,
6
12
  from: false,
7
13
  set: false,
14
+ identifier: '',
8
15
  client: Net::HTTP)
9
16
  @base_uri = base_uri
10
17
  @resumption_token = resumption_token
11
18
  @client = client
12
19
  @from = (from) ? "&from=#{from}" : ''
13
20
  @set = (set) ? "&set=#{set}" : ''
21
+ @identifier = identifier
14
22
  end
15
23
 
16
24
  def identifiers
@@ -37,7 +45,7 @@ module CDMBL
37
45
 
38
46
  def request(location)
39
47
  CDMBL::OaiNotification.call!(location)
40
- client.get_response(URI(location)).body
48
+ Hash.from_xml(client.get_response(URI(location)).body)
41
49
  end
42
50
  end
43
51
  end
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/cdmbl.rb CHANGED
@@ -16,4 +16,9 @@ require 'cdmbl/default_cdm_notification'
16
16
  require 'cdmbl/default_oai_notification'
17
17
  require 'cdmbl/default_loader_notification'
18
18
  require 'cdmbl/hooks'
19
- require 'cdmbl/oai_filter'
19
+ require 'cdmbl/oai_filter'
20
+ require 'cdmbl/oai_client'
21
+ require 'cdmbl/oai_get_record'
22
+ require 'cdmbl/oai_deletables'
23
+ require 'cdmbl/batch_deleter'
24
+ require 'cdmbl/batch_deleter_worker'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-06-05 00:00:00.000000000 Z
11
+ date: 2017-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -208,6 +208,8 @@ files:
208
208
  - bin/setup
209
209
  - cdmbl.gemspec
210
210
  - lib/cdmbl.rb
211
+ - lib/cdmbl/batch_deleter.rb
212
+ - lib/cdmbl/batch_deleter_worker.rb
211
213
  - lib/cdmbl/default_cdm_notification.rb
212
214
  - lib/cdmbl/default_completed_callback.rb
213
215
  - lib/cdmbl/default_loader_notification.rb
@@ -221,7 +223,10 @@ files:
221
223
  - lib/cdmbl/formatters.rb
222
224
  - lib/cdmbl/hooks.rb
223
225
  - lib/cdmbl/loader.rb
226
+ - lib/cdmbl/oai_client.rb
227
+ - lib/cdmbl/oai_deletables.rb
224
228
  - lib/cdmbl/oai_filter.rb
229
+ - lib/cdmbl/oai_get_record.rb
225
230
  - lib/cdmbl/oai_request.rb
226
231
  - lib/cdmbl/oai_set_lookup.rb
227
232
  - lib/cdmbl/rake_task.rb