cdmdexer 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/cdmdexer.gemspec +7 -6
- data/lib/cdmdexer/etl_worker.rb +10 -1
- data/lib/cdmdexer/oai_request.rb +22 -5
- data/lib/cdmdexer/version.rb +1 -1
- metadata +28 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea1e0a2b54ce8d063a2d84d18987d662ef583abf044142356a46d5bcd85790f5
|
4
|
+
data.tar.gz: a9012a9ae4fcee9bdd37314a0acd5c19238c60691ff60a95668329c89e5c650b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5450e86279e1e3a16fc8aa862f69ac865047bb4a9dc1a067a547152437c9d849acc2cf78f1107b664d4b930c8140913bad5cb0d21a35ada2e720465427a187
|
7
|
+
data.tar.gz: 5ae4ef6681742ec8bd883a388d571c08057e58f23044b1c09278b305159add9aec4ddde656024c813e0afb7c4427bab06d81286874c4c7fe104a2a937134d40e
|
data/cdmdexer.gemspec
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'cdmdexer/version'
|
5
6
|
|
@@ -9,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
9
10
|
spec.authors = ['chadfennell']
|
10
11
|
spec.email = ['fenne035@umn.edu']
|
11
12
|
|
12
|
-
spec.summary =
|
13
|
+
spec.summary = 'Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails application.'
|
13
14
|
spec.license = 'MIT'
|
14
15
|
|
15
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
@@ -17,11 +18,11 @@ Gem::Specification.new do |spec|
|
|
17
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
19
|
spec.require_paths = ['lib']
|
19
20
|
|
20
|
-
spec.add_dependency 'hash_at_path', '~> 0.1'
|
21
21
|
spec.add_dependency 'contentdm_api', '~> 0.5.0'
|
22
|
+
spec.add_dependency 'hash_at_path', '~> 0.1.6'
|
23
|
+
spec.add_dependency 'rsolr', '~> 2.0'
|
22
24
|
spec.add_dependency 'sidekiq', '>= 3.5'
|
23
25
|
spec.add_dependency 'titleize', '~> 1.4'
|
24
|
-
spec.add_dependency 'rsolr', '~> 2.0'
|
25
26
|
# CDMDEXER expects to run in a rails app, but just to avoid adding
|
26
27
|
# another external dependency for XML procssing, we rely on activesupport's
|
27
28
|
# Has.to_jsonl feature for testing and to allow this gem to function
|
@@ -29,7 +30,7 @@ Gem::Specification.new do |spec|
|
|
29
30
|
spec.add_dependency 'rails', '>= 5.2'
|
30
31
|
|
31
32
|
spec.add_development_dependency 'bundler', '~> 1.12'
|
32
|
-
spec.add_development_dependency 'rake', '~> 12.0'
|
33
33
|
spec.add_development_dependency 'minitest', '~> 5.0'
|
34
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
34
35
|
spec.add_development_dependency 'yard', '~> 0.9.0'
|
35
36
|
end
|
data/lib/cdmdexer/etl_worker.rb
CHANGED
@@ -38,11 +38,20 @@ module CDMDEXER
|
|
38
38
|
@resumption_token = config.fetch('resumption_token', nil)
|
39
39
|
@batch_size = config.fetch('batch_size', 5).to_i
|
40
40
|
@is_recursive = config.fetch('is_recursive', true)
|
41
|
+
after_date = config.fetch('after_date', false)
|
41
42
|
|
42
43
|
@oai_request = oai_request_klass.new(
|
43
44
|
endpoint_url: oai_endpoint,
|
44
45
|
resumption_token: resumption_token,
|
45
|
-
set_spec: config.fetch('set_spec', nil)
|
46
|
+
set_spec: config.fetch('set_spec', nil),
|
47
|
+
# Optionally only select records that have been updated after a
|
48
|
+
# certain date. You may need to manually update a parent record
|
49
|
+
# after updating a child in order to signify to the indexer that
|
50
|
+
# some record in the parent's children has been updated. This indexer
|
51
|
+
# expects to only see parent records in the OAI responses.
|
52
|
+
# The default here is to skip indexing based on date.
|
53
|
+
# Rails example for getting a date: `after_date: 2.weeks.ago`
|
54
|
+
after_date: after_date
|
46
55
|
)
|
47
56
|
|
48
57
|
run_batch!
|
data/lib/cdmdexer/oai_request.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'hash_at_path'
|
2
4
|
require 'json'
|
5
|
+
require 'time'
|
3
6
|
|
4
7
|
module CDMDEXER
|
5
8
|
# Light wrapper around OAI requests
|
@@ -10,16 +13,19 @@ module CDMDEXER
|
|
10
13
|
attr_reader :endpoint_url,
|
11
14
|
:resumption_token,
|
12
15
|
:client,
|
13
|
-
:set_spec
|
16
|
+
:set_spec,
|
17
|
+
:after_date
|
14
18
|
|
15
19
|
def initialize(endpoint_url: '',
|
16
20
|
resumption_token: nil,
|
17
21
|
set_spec: nil,
|
18
|
-
client: Net::HTTP
|
22
|
+
client: Net::HTTP,
|
23
|
+
after_date: false)
|
19
24
|
@endpoint_url = endpoint_url
|
20
25
|
@resumption_token = resumption_token
|
21
26
|
@client = client
|
22
27
|
@set_spec = set_spec ? "&set=#{set_spec}" : ''
|
28
|
+
@after_date = after_date
|
23
29
|
end
|
24
30
|
|
25
31
|
def records
|
@@ -44,12 +50,23 @@ module CDMDEXER
|
|
44
50
|
end
|
45
51
|
|
46
52
|
def deletable_ids
|
47
|
-
records.select
|
48
|
-
|
53
|
+
records.select do |record|
|
54
|
+
if record['status'] == 'deleted'
|
55
|
+
after_date ? Time.parse(record['datestamp']) >= after_date : true
|
56
|
+
end
|
57
|
+
end.map { |record| record[:id] }
|
49
58
|
end
|
50
59
|
|
51
60
|
def updatables
|
52
|
-
records.reject
|
61
|
+
records.reject do |record|
|
62
|
+
if record['status'] == 'deleted'
|
63
|
+
true
|
64
|
+
elsif after_date && Time.parse(record['datestamp']) < after_date
|
65
|
+
true
|
66
|
+
else
|
67
|
+
false
|
68
|
+
end
|
69
|
+
end
|
53
70
|
end
|
54
71
|
|
55
72
|
private
|
data/lib/cdmdexer/version.rb
CHANGED
metadata
CHANGED
@@ -1,85 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmdexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: contentdm_api
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 0.5.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 0.5.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: hash_at_path
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.1.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 0.1.6
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: rsolr
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '2.0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '2.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: sidekiq
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '3.5'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '3.5'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: titleize
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '1.4'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '1.4'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rails
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -109,33 +109,33 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.12'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: minitest
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
117
|
+
version: '5.0'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
124
|
+
version: '5.0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: rake
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
131
|
+
version: '12.0'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
138
|
+
version: '12.0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: yard
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -217,7 +217,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
217
217
|
- !ruby/object:Gem::Version
|
218
218
|
version: '0'
|
219
219
|
requirements: []
|
220
|
-
rubygems_version: 3.0.
|
220
|
+
rubygems_version: 3.0.6
|
221
221
|
signing_key:
|
222
222
|
specification_version: 4
|
223
223
|
summary: Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails
|