purl_fetcher-client 0.5.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/.rubocop.yml +5 -0
- data/Gemfile +4 -1
- data/README.md +20 -2
- data/Rakefile +3 -3
- data/bin/console +4 -3
- data/lib/purl_fetcher/client/direct_upload_request.rb +39 -0
- data/lib/purl_fetcher/client/direct_upload_response.rb +13 -0
- data/lib/purl_fetcher/client/reader.rb +22 -51
- data/lib/purl_fetcher/client/upload_files.rb +70 -0
- data/lib/purl_fetcher/client/version.rb +2 -2
- data/lib/purl_fetcher/client.rb +87 -10
- data/purl_fetcher-client.gemspec +15 -17
- metadata +27 -39
- data/lib/purl_fetcher/client/deletes_reader.rb +0 -21
- data/lib/purl_fetcher/client/public_xml_record.rb +0 -205
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb1a08fc0010f80045db352cb2bfb2735dd2b7447513ba366281cc4d488895e4
|
4
|
+
data.tar.gz: a2213eda4272694dcf1e889ed9185956b0ba1da24e98056e2360a242e9eb6a0b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8a0200ff798158ca1b5c58c120c94b1cb3e8f172cec1c28b30b1a1cae47291483dc5d681467a17f08d7ef650a14bf9fffc0c8d33895fd36565e2b2ea7e70ac9
|
7
|
+
data.tar.gz: 4600d53676d35892f3485f9cbc280afb4cc1fd8cedd9028794376cb20b9a49ad6d3981279e84e89b56549b87173fdc106aaa09f459d1d461a72675b2c2c2ca12
|
data/.github/workflows/ruby.yml
CHANGED
data/.rubocop.yml
ADDED
data/Gemfile
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
source "https://rubygems.org"
|
2
2
|
|
3
|
-
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
3
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
4
4
|
|
5
5
|
# Specify your gem's dependencies in purl_fetcher-client.gemspec
|
6
6
|
gemspec
|
7
|
+
|
8
|
+
gem "rubocop-rails-omakase", require: false, group: [ :development ]
|
9
|
+
gem "debug"
|
data/README.md
CHANGED
@@ -22,7 +22,25 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
### Uploading a file
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
PurlFetcher::Client.configure(url:'http://127.0.0.1:3000', token: 'abc123')
|
29
|
+
|
30
|
+
PurlFetcher::Client::UploadFiles.upload(
|
31
|
+
file_metadata: {
|
32
|
+
'file1.txt' => PurlFetcher::Client::DirectUploadRequest.new(
|
33
|
+
checksum: '123',
|
34
|
+
byte_size: 10_000,
|
35
|
+
content_type: 'image/tiff',
|
36
|
+
filename: 'image.tiff'
|
37
|
+
)
|
38
|
+
},
|
39
|
+
filepath_map: {
|
40
|
+
'file1.txt' => File.expand_path('Gemfile.lock')
|
41
|
+
}
|
42
|
+
)
|
43
|
+
```
|
26
44
|
|
27
45
|
## Development
|
28
46
|
|
@@ -36,4 +54,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
|
|
36
54
|
|
37
55
|
## Code of Conduct
|
38
56
|
|
39
|
-
Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/
|
57
|
+
Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'purl_fetcher/client'
|
5
|
+
require 'debug'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "purl_fetcher/client"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start(__FILE__)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "digest"
|
4
|
+
|
5
|
+
module PurlFetcher
|
6
|
+
class Client
|
7
|
+
# This models the JSON that we send to the server.
|
8
|
+
DirectUploadRequest = Data.define(:checksum, :byte_size, :content_type, :filename) do
|
9
|
+
def self.from_file(hexdigest:, byte_size:, file_name:, content_type:)
|
10
|
+
new(checksum: hex_to_base64_digest(hexdigest),
|
11
|
+
byte_size: byte_size,
|
12
|
+
content_type: clean_content_type(content_type),
|
13
|
+
filename: file_name)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_h
|
17
|
+
{
|
18
|
+
blob: { filename: filename, byte_size: byte_size, checksum: checksum,
|
19
|
+
content_type: self.class.clean_content_type(content_type) }
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_json(*_args)
|
24
|
+
JSON.generate(to_h)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.clean_content_type(content_type)
|
28
|
+
return "application/octet-stream" if content_type.blank?
|
29
|
+
|
30
|
+
# ActiveStorage is expecting "application/x-stata-dta" not "application/x-stata-dta;version=14"
|
31
|
+
content_type.split(";").first
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.hex_to_base64_digest(hexdigest)
|
35
|
+
[ [ hexdigest ].pack("H*") ].pack("m0")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PurlFetcher
|
4
|
+
class Client
|
5
|
+
DirectUploadResponse = Data.define(:id, :key, :checksum, :byte_size, :content_type,
|
6
|
+
:filename, :metadata, :created_at, :direct_upload,
|
7
|
+
:signed_id, :service_name) do
|
8
|
+
def with_filename(filename)
|
9
|
+
self.class.new(**deconstruct_keys(nil).merge(filename:))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,77 +1,48 @@
|
|
1
1
|
class PurlFetcher::Client::Reader
|
2
2
|
include Enumerable
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :host, :conn, :range
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@
|
5
|
+
def initialize(host: "https://purl-fetcher.stanford.edu", conn: nil)
|
6
|
+
@host = host
|
7
|
+
@conn = conn || Faraday.new(host) do |f|
|
8
|
+
f.response :json
|
9
|
+
end
|
8
10
|
@range = {}
|
9
11
|
end
|
10
12
|
|
11
|
-
def each
|
12
|
-
return to_enum(:each) unless block_given?
|
13
|
-
|
14
|
-
changes(first_modified: first_modified, target: target).each do |change, meta|
|
15
|
-
next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
|
16
|
-
|
17
|
-
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
18
|
-
|
19
|
-
yield public_xml, change, self
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
13
|
def collection_members(druid)
|
24
14
|
return to_enum(:collection_members, druid) unless block_given?
|
25
15
|
|
26
|
-
paginated_get("/collections/druid:#{druid.
|
27
|
-
yield
|
16
|
+
paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", "purls").each do |obj, _meta|
|
17
|
+
yield obj["druid"].delete_prefix("druid:")
|
28
18
|
end
|
29
19
|
end
|
30
20
|
|
31
21
|
private
|
32
22
|
|
33
|
-
def first_modified
|
34
|
-
settings['purl_fetcher.first_modified']
|
35
|
-
end
|
36
|
-
|
37
|
-
def target
|
38
|
-
settings['purl_fetcher.target']
|
39
|
-
end
|
40
|
-
|
41
|
-
##
|
42
|
-
# @return [Enumerator]
|
43
|
-
def changes(params = {})
|
44
|
-
paginated_get('/docs/changes', 'changes', params)
|
45
|
-
end
|
46
|
-
|
47
|
-
##
|
48
|
-
# @return [Enumerator]
|
49
|
-
def deletes(params = {})
|
50
|
-
paginated_get('/docs/deletes', 'deletes', params)
|
51
|
-
end
|
52
|
-
|
53
23
|
##
|
54
24
|
# @return [Hash] a parsed JSON hash
|
55
|
-
def
|
56
|
-
|
57
|
-
end
|
25
|
+
def fetch(path, params)
|
26
|
+
response = conn.get(path, params: params)
|
58
27
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
28
|
+
unless response.success?
|
29
|
+
if defined?(Honeybadger)
|
30
|
+
Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
|
31
|
+
end
|
32
|
+
raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
|
64
33
|
end
|
34
|
+
|
35
|
+
response.body
|
65
36
|
end
|
66
37
|
|
67
38
|
##
|
68
39
|
# For performance, and enumberable object is returned.
|
69
40
|
#
|
70
41
|
# @example operating on each of the results as they come in
|
71
|
-
# paginated_get('/docs/
|
42
|
+
# paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
|
72
43
|
#
|
73
44
|
# @example getting all of the results and converting to an array
|
74
|
-
# paginated_get('/docs/
|
45
|
+
# paginated_get('/docs/collections/druid:123', 'purls').to_a
|
75
46
|
#
|
76
47
|
# @return [Enumerator] an enumberable object
|
77
48
|
def paginated_get(path, accessor, options = {})
|
@@ -83,8 +54,8 @@ class PurlFetcher::Client::Reader
|
|
83
54
|
total = 0
|
84
55
|
|
85
56
|
loop do
|
86
|
-
data =
|
87
|
-
@range = data[
|
57
|
+
data = fetch(path, { per_page: per_page, page: page }.merge(params))
|
58
|
+
@range = data["range"]
|
88
59
|
|
89
60
|
total += data[accessor].length
|
90
61
|
|
@@ -92,7 +63,7 @@ class PurlFetcher::Client::Reader
|
|
92
63
|
yielder.yield element, self
|
93
64
|
end
|
94
65
|
|
95
|
-
page = data[
|
66
|
+
page = data["pages"]["next_page"]
|
96
67
|
|
97
68
|
break if page.nil? || total >= max
|
98
69
|
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PurlFetcher
|
4
|
+
class Client
|
5
|
+
# The file uploading part of a transfer
|
6
|
+
class UploadFiles
|
7
|
+
# @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
|
8
|
+
# @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
|
9
|
+
def self.upload(file_metadata:, filepath_map:)
|
10
|
+
new(file_metadata: file_metadata, filepath_map: filepath_map).upload
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
|
14
|
+
# @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
|
15
|
+
def initialize(file_metadata:, filepath_map:)
|
16
|
+
@file_metadata = file_metadata
|
17
|
+
@filepath_map = filepath_map
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<DirectUploadResponse>] the responses from the server for the uploads
|
21
|
+
def upload
|
22
|
+
file_metadata.map do |filepath, metadata|
|
23
|
+
direct_upload(metadata.to_json).tap do |response|
|
24
|
+
# ActiveStorage modifies the filename provided in response, so setting here with the relative filename
|
25
|
+
response = response.with_filename(filepath)
|
26
|
+
upload_file(response)
|
27
|
+
logger.info("Upload of #{filepath} complete")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :file_metadata, :filepath_map
|
35
|
+
|
36
|
+
def logger
|
37
|
+
Client.config.logger
|
38
|
+
end
|
39
|
+
|
40
|
+
def client
|
41
|
+
Client.instance
|
42
|
+
end
|
43
|
+
|
44
|
+
def path
|
45
|
+
"/v1/direct_uploads"
|
46
|
+
end
|
47
|
+
|
48
|
+
def direct_upload(metadata_json)
|
49
|
+
logger.info("Starting an upload request: #{metadata_json}")
|
50
|
+
response = client.post(path: path, body: metadata_json)
|
51
|
+
|
52
|
+
logger.info("Response from server: #{response}")
|
53
|
+
DirectUploadResponse.new(**response.symbolize_keys)
|
54
|
+
end
|
55
|
+
|
56
|
+
def upload_file(response)
|
57
|
+
logger.info("Uploading `#{response.filename}' to #{response.direct_upload.fetch('url')}")
|
58
|
+
|
59
|
+
client.put(
|
60
|
+
path: response.direct_upload.fetch("url"),
|
61
|
+
body: ::File.open(filepath_map[response.filename]),
|
62
|
+
headers: {
|
63
|
+
"content-type" => response.content_type,
|
64
|
+
"content-length" => response.byte_size.to_s
|
65
|
+
}
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/purl_fetcher/client.rb
CHANGED
@@ -1,15 +1,92 @@
|
|
1
|
+
require "active_support"
|
2
|
+
require "active_support/core_ext"
|
3
|
+
require "faraday"
|
4
|
+
require "singleton"
|
5
|
+
require "logger"
|
6
|
+
|
1
7
|
require "purl_fetcher/client/version"
|
2
|
-
require
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
end
|
8
|
+
require "purl_fetcher/client/reader"
|
9
|
+
require "purl_fetcher/client/upload_files"
|
10
|
+
require "purl_fetcher/client/direct_upload_request"
|
11
|
+
require "purl_fetcher/client/direct_upload_response"
|
7
12
|
|
8
13
|
module PurlFetcher
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
#
|
14
|
+
class Client
|
15
|
+
# General error originating in PurlFetcher::Client
|
16
|
+
class Error < StandardError; end
|
17
|
+
|
18
|
+
# Raised when the response from the server is not successful
|
19
|
+
class ResponseError < Error; end
|
20
|
+
|
21
|
+
include Singleton
|
22
|
+
class << self
|
23
|
+
def configure(url:, logger: default_logger, token: nil)
|
24
|
+
instance.config = Config.new(
|
25
|
+
url: url,
|
26
|
+
logger: logger,
|
27
|
+
token: token
|
28
|
+
)
|
29
|
+
|
30
|
+
instance
|
31
|
+
end
|
32
|
+
|
33
|
+
def default_logger
|
34
|
+
Logger.new($stdout)
|
35
|
+
end
|
36
|
+
|
37
|
+
delegate :config, to: :instance
|
38
|
+
end
|
39
|
+
|
40
|
+
attr_accessor :config
|
41
|
+
|
42
|
+
# Send an POST request
|
43
|
+
# @param path [String] the path for the API request
|
44
|
+
# @param body [String] the body of the POST request
|
45
|
+
def post(path:, body:)
|
46
|
+
response = connection.post(path) do |request|
|
47
|
+
request.body = body
|
48
|
+
end
|
49
|
+
|
50
|
+
raise "unexpected response: #{response.status} #{response.body}" unless response.success?
|
51
|
+
|
52
|
+
response.body
|
53
|
+
end
|
54
|
+
|
55
|
+
# Send an PUT request
|
56
|
+
# @param path [String] the path for the API request
|
57
|
+
# @param body [String] the body of the POST request
|
58
|
+
# @param headers [Hash] extra headers to add to the SDR API request
|
59
|
+
def put(path:, body:, headers: {})
|
60
|
+
response = connection.put(path) do |request|
|
61
|
+
request.body = body
|
62
|
+
request.headers = default_headers.merge(headers)
|
63
|
+
end
|
64
|
+
|
65
|
+
raise "unexpected response: #{response.status} #{response.body}" unless response.success?
|
66
|
+
|
67
|
+
response.body
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
Config = Data.define(:url, :logger, :token)
|
73
|
+
|
74
|
+
def connection
|
75
|
+
Faraday.new(
|
76
|
+
url: config.url,
|
77
|
+
headers: default_headers
|
78
|
+
) do |conn|
|
79
|
+
conn.response :json
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def default_headers
|
84
|
+
{
|
85
|
+
accept: "application/json",
|
86
|
+
content_type: "application/json"
|
87
|
+
}.tap do |headers|
|
88
|
+
headers[:authorization] = "Bearer #{config.token}" if config.token
|
89
|
+
end
|
90
|
+
end
|
14
91
|
end
|
15
92
|
end
|
data/purl_fetcher-client.gemspec
CHANGED
@@ -1,32 +1,30 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path("../lib", __FILE__)
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require 'purl_fetcher/client/version'
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
6
|
+
spec.name = 'purl_fetcher-client'
|
8
7
|
spec.version = PurlFetcher::Client::VERSION
|
9
|
-
spec.authors = ["Chris Beer"]
|
10
|
-
spec.email = ["cabeer@stanford.edu"]
|
8
|
+
spec.authors = [ "Chris Beer" ]
|
9
|
+
spec.email = [ "cabeer@stanford.edu" ]
|
11
10
|
|
12
11
|
spec.summary = 'Traject-compatible reader implementation for streaming data from purl-fetcher'
|
13
12
|
|
14
13
|
# Specify which files should be added to the gem when it is released.
|
15
14
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
16
|
-
spec.files = Dir.chdir(File.expand_path(
|
15
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
17
16
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
17
|
end
|
19
|
-
spec.bindir =
|
18
|
+
spec.bindir = 'exe'
|
20
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
-
spec.require_paths = ["lib"]
|
20
|
+
spec.require_paths = [ "lib" ]
|
22
21
|
|
23
|
-
spec.add_dependency '
|
24
|
-
spec.add_dependency '
|
25
|
-
spec.add_dependency 'stanford-mods'
|
26
|
-
spec.add_dependency 'dor-rights-auth'
|
27
|
-
spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
|
22
|
+
spec.add_dependency 'activesupport'
|
23
|
+
spec.add_dependency 'faraday', '~> 2.1'
|
28
24
|
|
29
|
-
spec.add_development_dependency
|
30
|
-
spec.add_development_dependency
|
31
|
-
spec.add_development_dependency
|
25
|
+
spec.add_development_dependency 'bundler'
|
26
|
+
spec.add_development_dependency 'debug'
|
27
|
+
spec.add_development_dependency 'rake'
|
28
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
29
|
+
spec.add_development_dependency 'webmock'
|
32
30
|
end
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: purl_fetcher-client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Beer
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -25,27 +25,27 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: faraday
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '2.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '2.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
|
-
type: :
|
48
|
+
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
@@ -53,13 +53,13 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: debug
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
-
type: :
|
62
|
+
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
@@ -67,35 +67,35 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: rake
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
76
|
-
type: :
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
89
|
+
version: '3.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
96
|
+
version: '3.0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: webmock
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -108,20 +108,6 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: rspec
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - "~>"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '3.0'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '3.0'
|
125
111
|
description:
|
126
112
|
email:
|
127
113
|
- cabeer@stanford.edu
|
@@ -132,6 +118,7 @@ files:
|
|
132
118
|
- ".github/workflows/ruby.yml"
|
133
119
|
- ".gitignore"
|
134
120
|
- ".rspec"
|
121
|
+
- ".rubocop.yml"
|
135
122
|
- CODE_OF_CONDUCT.md
|
136
123
|
- Gemfile
|
137
124
|
- README.md
|
@@ -139,9 +126,10 @@ files:
|
|
139
126
|
- bin/console
|
140
127
|
- bin/setup
|
141
128
|
- lib/purl_fetcher/client.rb
|
142
|
-
- lib/purl_fetcher/client/
|
143
|
-
- lib/purl_fetcher/client/
|
129
|
+
- lib/purl_fetcher/client/direct_upload_request.rb
|
130
|
+
- lib/purl_fetcher/client/direct_upload_response.rb
|
144
131
|
- lib/purl_fetcher/client/reader.rb
|
132
|
+
- lib/purl_fetcher/client/upload_files.rb
|
145
133
|
- lib/purl_fetcher/client/version.rb
|
146
134
|
- purl_fetcher-client.gemspec
|
147
135
|
homepage:
|
@@ -162,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
150
|
- !ruby/object:Gem::Version
|
163
151
|
version: '0'
|
164
152
|
requirements: []
|
165
|
-
rubygems_version: 3.
|
153
|
+
rubygems_version: 3.4.19
|
166
154
|
signing_key:
|
167
155
|
specification_version: 4
|
168
156
|
summary: Traject-compatible reader implementation for streaming data from purl-fetcher
|
@@ -1,21 +0,0 @@
|
|
1
|
-
class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
|
2
|
-
# Enumerate objects that should be deleted.
|
3
|
-
def each
|
4
|
-
return to_enum(:each) unless block_given?
|
5
|
-
|
6
|
-
deletes(first_modified: first_modified).each do |change|
|
7
|
-
|
8
|
-
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
9
|
-
|
10
|
-
yield public_xml, change, self
|
11
|
-
end
|
12
|
-
|
13
|
-
changes(first_modified: first_modified, target: target).each do |change|
|
14
|
-
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
15
|
-
|
16
|
-
next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
|
17
|
-
|
18
|
-
yield public_xml, change, self
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,205 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
require 'stanford-mods'
|
3
|
-
require 'mods_display'
|
4
|
-
require 'dor/rights_auth'
|
5
|
-
|
6
|
-
module PurlFetcher::Client
|
7
|
-
class PublicXmlRecord
|
8
|
-
attr_reader :druid, :options
|
9
|
-
|
10
|
-
def self.fetch(url)
|
11
|
-
if defined?(JRUBY_VERSION)
|
12
|
-
response = Manticore.get(url)
|
13
|
-
response.body if response.code == 200
|
14
|
-
else
|
15
|
-
response = HTTP.get(url)
|
16
|
-
response.body if response.status.ok?
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def initialize(druid, options = {})
|
21
|
-
@druid = druid
|
22
|
-
@options = options
|
23
|
-
end
|
24
|
-
|
25
|
-
def searchworks_id
|
26
|
-
catkey.nil? ? druid : catkey
|
27
|
-
end
|
28
|
-
|
29
|
-
# @return catkey value from the DOR identity_metadata, or nil if there is no catkey
|
30
|
-
def catkey
|
31
|
-
get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
|
32
|
-
end
|
33
|
-
|
34
|
-
# @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
|
35
|
-
def label
|
36
|
-
get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
|
37
|
-
end
|
38
|
-
|
39
|
-
def get_value(node)
|
40
|
-
(node && node.first) ? node.first.content : nil
|
41
|
-
end
|
42
|
-
|
43
|
-
def stanford_mods
|
44
|
-
@smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
|
45
|
-
smods_rec.from_str(mods.to_s)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def mods_display
|
50
|
-
@mods_display ||= ModsDisplay::HTML.new(stanford_mods)
|
51
|
-
end
|
52
|
-
|
53
|
-
def public_xml
|
54
|
-
@public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
|
55
|
-
end
|
56
|
-
|
57
|
-
def public_xml?
|
58
|
-
!!public_xml
|
59
|
-
end
|
60
|
-
|
61
|
-
def public_xml_doc
|
62
|
-
@public_xml_doc ||= Nokogiri::XML(public_xml)
|
63
|
-
end
|
64
|
-
|
65
|
-
def mods
|
66
|
-
@mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
|
67
|
-
public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
|
68
|
-
else
|
69
|
-
if defined?(Honeybadger)
|
70
|
-
Honeybadger.notify(
|
71
|
-
'Unable to find MODS in the public xml; falling back to stand-along mods document',
|
72
|
-
context: { druid: druid }
|
73
|
-
)
|
74
|
-
end
|
75
|
-
|
76
|
-
Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
|
81
|
-
def is_collection
|
82
|
-
object_type_nodes = public_xml_doc.xpath('//objectType')
|
83
|
-
object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
|
84
|
-
end
|
85
|
-
|
86
|
-
# value is used to tell SearchWorks UI app of specific display needs for objects
|
87
|
-
# this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
|
88
|
-
# @return [String] filename or nil if none found
|
89
|
-
def thumb
|
90
|
-
return if is_collection
|
91
|
-
encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
|
92
|
-
end
|
93
|
-
|
94
|
-
# the value of the type attribute for a DOR object's contentMetadata
|
95
|
-
# more info about these values is here:
|
96
|
-
# https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
|
97
|
-
# https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
|
98
|
-
# @return [String]
|
99
|
-
def dor_content_type
|
100
|
-
public_xml_doc.xpath('//contentMetadata/@type').text
|
101
|
-
end
|
102
|
-
|
103
|
-
# the thumbnail in publicXML, falling back to the first image if no thumb node is found
|
104
|
-
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
|
105
|
-
def parse_thumb
|
106
|
-
unless public_xml_doc.nil?
|
107
|
-
thumb = public_xml_doc.xpath('//thumb')
|
108
|
-
# first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
|
109
|
-
if thumb.size == 1
|
110
|
-
thumb.first.content
|
111
|
-
elsif thumb.size == 0 && parse_sw_image_ids.size > 0
|
112
|
-
parse_sw_image_ids.first
|
113
|
-
else
|
114
|
-
nil
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
# the druid and id attribute of resource/file and objectId and fileId of the
|
120
|
-
# resource/externalFile elements that match the image, page, or thumb resource type, including extension
|
121
|
-
# Also, prepends the corresponding druid and / specifically for Searchworks use
|
122
|
-
# @return [Array<String>] filenames
|
123
|
-
def parse_sw_image_ids
|
124
|
-
public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
|
125
|
-
node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
|
126
|
-
"#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
|
127
|
-
end
|
128
|
-
end.flatten
|
129
|
-
end
|
130
|
-
|
131
|
-
def collections
|
132
|
-
@collections ||= predicate_druids('isMemberOfCollection').map do |druid|
|
133
|
-
PublicXmlRecord.new(druid, options)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
def constituents
|
138
|
-
@constituents ||= predicate_druids('isConstituentOf').map do |druid|
|
139
|
-
PublicXmlRecord.new(druid, options)
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def items(&block)
|
144
|
-
return [] unless is_collection
|
145
|
-
|
146
|
-
purl_fetcher_client.collection_members(druid, &block)
|
147
|
-
end
|
148
|
-
|
149
|
-
# the thumbnail in publicXML properly URI encoded, including the slash separator
|
150
|
-
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
|
151
|
-
def encoded_thumb
|
152
|
-
thumb=parse_thumb
|
153
|
-
return unless thumb
|
154
|
-
thumb_druid=thumb.split('/').first # the druid (before the first slash)
|
155
|
-
thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
|
156
|
-
"#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
|
157
|
-
end
|
158
|
-
|
159
|
-
# get the druids from predicate relationships in rels-ext from public_xml
|
160
|
-
# @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
|
161
|
-
def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
|
162
|
-
ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
|
163
|
-
xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
|
164
|
-
pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
|
165
|
-
pred_nodes.reject { |n| n.value.empty? }.map do |n|
|
166
|
-
n.value.split('druid:').last
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
def druid_tree
|
171
|
-
druid.match(/(..)(...)(..)(....)/).captures.join('/')
|
172
|
-
end
|
173
|
-
|
174
|
-
def rights_xml
|
175
|
-
@rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
|
176
|
-
end
|
177
|
-
|
178
|
-
def rights
|
179
|
-
@rights ||= ::Dor::RightsAuth.parse(rights_xml)
|
180
|
-
end
|
181
|
-
|
182
|
-
def public?
|
183
|
-
rights.world_unrestricted?
|
184
|
-
end
|
185
|
-
|
186
|
-
def stanford_only?
|
187
|
-
rights.stanford_only_unrestricted?
|
188
|
-
end
|
189
|
-
|
190
|
-
def purl_base_url
|
191
|
-
options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
|
192
|
-
end
|
193
|
-
|
194
|
-
def purl_fetcher_api_endpoint
|
195
|
-
options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
|
196
|
-
end
|
197
|
-
|
198
|
-
def purl_fetcher_client
|
199
|
-
@purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
|
200
|
-
nil,
|
201
|
-
'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
|
202
|
-
)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
end
|