purl_fetcher-client 0.5.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/.rubocop.yml +5 -0
- data/Gemfile +4 -1
- data/README.md +20 -2
- data/Rakefile +3 -3
- data/bin/console +4 -3
- data/lib/purl_fetcher/client/direct_upload_request.rb +39 -0
- data/lib/purl_fetcher/client/direct_upload_response.rb +13 -0
- data/lib/purl_fetcher/client/reader.rb +22 -51
- data/lib/purl_fetcher/client/upload_files.rb +70 -0
- data/lib/purl_fetcher/client/version.rb +2 -2
- data/lib/purl_fetcher/client.rb +87 -10
- data/purl_fetcher-client.gemspec +15 -17
- metadata +27 -39
- data/lib/purl_fetcher/client/deletes_reader.rb +0 -21
- data/lib/purl_fetcher/client/public_xml_record.rb +0 -205
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb1a08fc0010f80045db352cb2bfb2735dd2b7447513ba366281cc4d488895e4
|
4
|
+
data.tar.gz: a2213eda4272694dcf1e889ed9185956b0ba1da24e98056e2360a242e9eb6a0b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8a0200ff798158ca1b5c58c120c94b1cb3e8f172cec1c28b30b1a1cae47291483dc5d681467a17f08d7ef650a14bf9fffc0c8d33895fd36565e2b2ea7e70ac9
|
7
|
+
data.tar.gz: 4600d53676d35892f3485f9cbc280afb4cc1fd8cedd9028794376cb20b9a49ad6d3981279e84e89b56549b87173fdc106aaa09f459d1d461a72675b2c2c2ca12
|
data/.github/workflows/ruby.yml
CHANGED
data/.rubocop.yml
ADDED
data/Gemfile
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
source "https://rubygems.org"
|
2
2
|
|
3
|
-
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
3
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
4
4
|
|
5
5
|
# Specify your gem's dependencies in purl_fetcher-client.gemspec
|
6
6
|
gemspec
|
7
|
+
|
8
|
+
gem "rubocop-rails-omakase", require: false, group: [ :development ]
|
9
|
+
gem "debug"
|
data/README.md
CHANGED
@@ -22,7 +22,25 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
### Uploading a file
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
PurlFetcher::Client.configure(url:'http://127.0.0.1:3000', token: 'abc123')
|
29
|
+
|
30
|
+
PurlFetcher::Client::UploadFiles.upload(
|
31
|
+
file_metadata: {
|
32
|
+
'file1.txt' => PurlFetcher::Client::DirectUploadRequest.new(
|
33
|
+
checksum: '123',
|
34
|
+
byte_size: 10_000,
|
35
|
+
content_type: 'image/tiff',
|
36
|
+
filename: 'image.tiff'
|
37
|
+
)
|
38
|
+
},
|
39
|
+
filepath_map: {
|
40
|
+
'file1.txt' => File.expand_path('Gemfile.lock')
|
41
|
+
}
|
42
|
+
)
|
43
|
+
```
|
26
44
|
|
27
45
|
## Development
|
28
46
|
|
@@ -36,4 +54,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
|
|
36
54
|
|
37
55
|
## Code of Conduct
|
38
56
|
|
39
|
-
Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/
|
57
|
+
Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'purl_fetcher/client'
|
5
|
+
require 'debug'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "purl_fetcher/client"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start(__FILE__)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "digest"
|
4
|
+
|
5
|
+
module PurlFetcher
|
6
|
+
class Client
|
7
|
+
# This models the JSON that we send to the server.
|
8
|
+
DirectUploadRequest = Data.define(:checksum, :byte_size, :content_type, :filename) do
|
9
|
+
def self.from_file(hexdigest:, byte_size:, file_name:, content_type:)
|
10
|
+
new(checksum: hex_to_base64_digest(hexdigest),
|
11
|
+
byte_size: byte_size,
|
12
|
+
content_type: clean_content_type(content_type),
|
13
|
+
filename: file_name)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_h
|
17
|
+
{
|
18
|
+
blob: { filename: filename, byte_size: byte_size, checksum: checksum,
|
19
|
+
content_type: self.class.clean_content_type(content_type) }
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_json(*_args)
|
24
|
+
JSON.generate(to_h)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.clean_content_type(content_type)
|
28
|
+
return "application/octet-stream" if content_type.blank?
|
29
|
+
|
30
|
+
# ActiveStorage is expecting "application/x-stata-dta" not "application/x-stata-dta;version=14"
|
31
|
+
content_type.split(";").first
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.hex_to_base64_digest(hexdigest)
|
35
|
+
[ [ hexdigest ].pack("H*") ].pack("m0")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PurlFetcher
|
4
|
+
class Client
|
5
|
+
DirectUploadResponse = Data.define(:id, :key, :checksum, :byte_size, :content_type,
|
6
|
+
:filename, :metadata, :created_at, :direct_upload,
|
7
|
+
:signed_id, :service_name) do
|
8
|
+
def with_filename(filename)
|
9
|
+
self.class.new(**deconstruct_keys(nil).merge(filename:))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,77 +1,48 @@
|
|
1
1
|
class PurlFetcher::Client::Reader
|
2
2
|
include Enumerable
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :host, :conn, :range
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@
|
5
|
+
def initialize(host: "https://purl-fetcher.stanford.edu", conn: nil)
|
6
|
+
@host = host
|
7
|
+
@conn = conn || Faraday.new(host) do |f|
|
8
|
+
f.response :json
|
9
|
+
end
|
8
10
|
@range = {}
|
9
11
|
end
|
10
12
|
|
11
|
-
def each
|
12
|
-
return to_enum(:each) unless block_given?
|
13
|
-
|
14
|
-
changes(first_modified: first_modified, target: target).each do |change, meta|
|
15
|
-
next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
|
16
|
-
|
17
|
-
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
18
|
-
|
19
|
-
yield public_xml, change, self
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
13
|
def collection_members(druid)
|
24
14
|
return to_enum(:collection_members, druid) unless block_given?
|
25
15
|
|
26
|
-
paginated_get("/collections/druid:#{druid.
|
27
|
-
yield
|
16
|
+
paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", "purls").each do |obj, _meta|
|
17
|
+
yield obj["druid"].delete_prefix("druid:")
|
28
18
|
end
|
29
19
|
end
|
30
20
|
|
31
21
|
private
|
32
22
|
|
33
|
-
def first_modified
|
34
|
-
settings['purl_fetcher.first_modified']
|
35
|
-
end
|
36
|
-
|
37
|
-
def target
|
38
|
-
settings['purl_fetcher.target']
|
39
|
-
end
|
40
|
-
|
41
|
-
##
|
42
|
-
# @return [Enumerator]
|
43
|
-
def changes(params = {})
|
44
|
-
paginated_get('/docs/changes', 'changes', params)
|
45
|
-
end
|
46
|
-
|
47
|
-
##
|
48
|
-
# @return [Enumerator]
|
49
|
-
def deletes(params = {})
|
50
|
-
paginated_get('/docs/deletes', 'deletes', params)
|
51
|
-
end
|
52
|
-
|
53
23
|
##
|
54
24
|
# @return [Hash] a parsed JSON hash
|
55
|
-
def
|
56
|
-
|
57
|
-
end
|
25
|
+
def fetch(path, params)
|
26
|
+
response = conn.get(path, params: params)
|
58
27
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
28
|
+
unless response.success?
|
29
|
+
if defined?(Honeybadger)
|
30
|
+
Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
|
31
|
+
end
|
32
|
+
raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
|
64
33
|
end
|
34
|
+
|
35
|
+
response.body
|
65
36
|
end
|
66
37
|
|
67
38
|
##
|
68
39
|
# For performance, and enumberable object is returned.
|
69
40
|
#
|
70
41
|
# @example operating on each of the results as they come in
|
71
|
-
# paginated_get('/docs/
|
42
|
+
# paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
|
72
43
|
#
|
73
44
|
# @example getting all of the results and converting to an array
|
74
|
-
# paginated_get('/docs/
|
45
|
+
# paginated_get('/docs/collections/druid:123', 'purls').to_a
|
75
46
|
#
|
76
47
|
# @return [Enumerator] an enumberable object
|
77
48
|
def paginated_get(path, accessor, options = {})
|
@@ -83,8 +54,8 @@ class PurlFetcher::Client::Reader
|
|
83
54
|
total = 0
|
84
55
|
|
85
56
|
loop do
|
86
|
-
data =
|
87
|
-
@range = data[
|
57
|
+
data = fetch(path, { per_page: per_page, page: page }.merge(params))
|
58
|
+
@range = data["range"]
|
88
59
|
|
89
60
|
total += data[accessor].length
|
90
61
|
|
@@ -92,7 +63,7 @@ class PurlFetcher::Client::Reader
|
|
92
63
|
yielder.yield element, self
|
93
64
|
end
|
94
65
|
|
95
|
-
page = data[
|
66
|
+
page = data["pages"]["next_page"]
|
96
67
|
|
97
68
|
break if page.nil? || total >= max
|
98
69
|
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PurlFetcher
|
4
|
+
class Client
|
5
|
+
# The file uploading part of a transfer
|
6
|
+
class UploadFiles
|
7
|
+
# @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
|
8
|
+
# @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
|
9
|
+
def self.upload(file_metadata:, filepath_map:)
|
10
|
+
new(file_metadata: file_metadata, filepath_map: filepath_map).upload
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
|
14
|
+
# @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
|
15
|
+
def initialize(file_metadata:, filepath_map:)
|
16
|
+
@file_metadata = file_metadata
|
17
|
+
@filepath_map = filepath_map
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<DirectUploadResponse>] the responses from the server for the uploads
|
21
|
+
def upload
|
22
|
+
file_metadata.map do |filepath, metadata|
|
23
|
+
direct_upload(metadata.to_json).tap do |response|
|
24
|
+
# ActiveStorage modifies the filename provided in response, so setting here with the relative filename
|
25
|
+
response = response.with_filename(filepath)
|
26
|
+
upload_file(response)
|
27
|
+
logger.info("Upload of #{filepath} complete")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :file_metadata, :filepath_map
|
35
|
+
|
36
|
+
def logger
|
37
|
+
Client.config.logger
|
38
|
+
end
|
39
|
+
|
40
|
+
def client
|
41
|
+
Client.instance
|
42
|
+
end
|
43
|
+
|
44
|
+
def path
|
45
|
+
"/v1/direct_uploads"
|
46
|
+
end
|
47
|
+
|
48
|
+
def direct_upload(metadata_json)
|
49
|
+
logger.info("Starting an upload request: #{metadata_json}")
|
50
|
+
response = client.post(path: path, body: metadata_json)
|
51
|
+
|
52
|
+
logger.info("Response from server: #{response}")
|
53
|
+
DirectUploadResponse.new(**response.symbolize_keys)
|
54
|
+
end
|
55
|
+
|
56
|
+
def upload_file(response)
|
57
|
+
logger.info("Uploading `#{response.filename}' to #{response.direct_upload.fetch('url')}")
|
58
|
+
|
59
|
+
client.put(
|
60
|
+
path: response.direct_upload.fetch("url"),
|
61
|
+
body: ::File.open(filepath_map[response.filename]),
|
62
|
+
headers: {
|
63
|
+
"content-type" => response.content_type,
|
64
|
+
"content-length" => response.byte_size.to_s
|
65
|
+
}
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/purl_fetcher/client.rb
CHANGED
@@ -1,15 +1,92 @@
|
|
1
|
+
require "active_support"
|
2
|
+
require "active_support/core_ext"
|
3
|
+
require "faraday"
|
4
|
+
require "singleton"
|
5
|
+
require "logger"
|
6
|
+
|
1
7
|
require "purl_fetcher/client/version"
|
2
|
-
require
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
end
|
8
|
+
require "purl_fetcher/client/reader"
|
9
|
+
require "purl_fetcher/client/upload_files"
|
10
|
+
require "purl_fetcher/client/direct_upload_request"
|
11
|
+
require "purl_fetcher/client/direct_upload_response"
|
7
12
|
|
8
13
|
module PurlFetcher
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
#
|
14
|
+
class Client
|
15
|
+
# General error originating in PurlFetcher::Client
|
16
|
+
class Error < StandardError; end
|
17
|
+
|
18
|
+
# Raised when the response from the server is not successful
|
19
|
+
class ResponseError < Error; end
|
20
|
+
|
21
|
+
include Singleton
|
22
|
+
class << self
|
23
|
+
def configure(url:, logger: default_logger, token: nil)
|
24
|
+
instance.config = Config.new(
|
25
|
+
url: url,
|
26
|
+
logger: logger,
|
27
|
+
token: token
|
28
|
+
)
|
29
|
+
|
30
|
+
instance
|
31
|
+
end
|
32
|
+
|
33
|
+
def default_logger
|
34
|
+
Logger.new($stdout)
|
35
|
+
end
|
36
|
+
|
37
|
+
delegate :config, to: :instance
|
38
|
+
end
|
39
|
+
|
40
|
+
attr_accessor :config
|
41
|
+
|
42
|
+
# Send an POST request
|
43
|
+
# @param path [String] the path for the API request
|
44
|
+
# @param body [String] the body of the POST request
|
45
|
+
def post(path:, body:)
|
46
|
+
response = connection.post(path) do |request|
|
47
|
+
request.body = body
|
48
|
+
end
|
49
|
+
|
50
|
+
raise "unexpected response: #{response.status} #{response.body}" unless response.success?
|
51
|
+
|
52
|
+
response.body
|
53
|
+
end
|
54
|
+
|
55
|
+
# Send an PUT request
|
56
|
+
# @param path [String] the path for the API request
|
57
|
+
# @param body [String] the body of the POST request
|
58
|
+
# @param headers [Hash] extra headers to add to the SDR API request
|
59
|
+
def put(path:, body:, headers: {})
|
60
|
+
response = connection.put(path) do |request|
|
61
|
+
request.body = body
|
62
|
+
request.headers = default_headers.merge(headers)
|
63
|
+
end
|
64
|
+
|
65
|
+
raise "unexpected response: #{response.status} #{response.body}" unless response.success?
|
66
|
+
|
67
|
+
response.body
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
Config = Data.define(:url, :logger, :token)
|
73
|
+
|
74
|
+
def connection
|
75
|
+
Faraday.new(
|
76
|
+
url: config.url,
|
77
|
+
headers: default_headers
|
78
|
+
) do |conn|
|
79
|
+
conn.response :json
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def default_headers
|
84
|
+
{
|
85
|
+
accept: "application/json",
|
86
|
+
content_type: "application/json"
|
87
|
+
}.tap do |headers|
|
88
|
+
headers[:authorization] = "Bearer #{config.token}" if config.token
|
89
|
+
end
|
90
|
+
end
|
14
91
|
end
|
15
92
|
end
|
data/purl_fetcher-client.gemspec
CHANGED
@@ -1,32 +1,30 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path("../lib", __FILE__)
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require 'purl_fetcher/client/version'
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
6
|
+
spec.name = 'purl_fetcher-client'
|
8
7
|
spec.version = PurlFetcher::Client::VERSION
|
9
|
-
spec.authors = ["Chris Beer"]
|
10
|
-
spec.email = ["cabeer@stanford.edu"]
|
8
|
+
spec.authors = [ "Chris Beer" ]
|
9
|
+
spec.email = [ "cabeer@stanford.edu" ]
|
11
10
|
|
12
11
|
spec.summary = 'Traject-compatible reader implementation for streaming data from purl-fetcher'
|
13
12
|
|
14
13
|
# Specify which files should be added to the gem when it is released.
|
15
14
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
16
|
-
spec.files = Dir.chdir(File.expand_path(
|
15
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
17
16
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
17
|
end
|
19
|
-
spec.bindir =
|
18
|
+
spec.bindir = 'exe'
|
20
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
-
spec.require_paths = ["lib"]
|
20
|
+
spec.require_paths = [ "lib" ]
|
22
21
|
|
23
|
-
spec.add_dependency '
|
24
|
-
spec.add_dependency '
|
25
|
-
spec.add_dependency 'stanford-mods'
|
26
|
-
spec.add_dependency 'dor-rights-auth'
|
27
|
-
spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
|
22
|
+
spec.add_dependency 'activesupport'
|
23
|
+
spec.add_dependency 'faraday', '~> 2.1'
|
28
24
|
|
29
|
-
spec.add_development_dependency
|
30
|
-
spec.add_development_dependency
|
31
|
-
spec.add_development_dependency
|
25
|
+
spec.add_development_dependency 'bundler'
|
26
|
+
spec.add_development_dependency 'debug'
|
27
|
+
spec.add_development_dependency 'rake'
|
28
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
29
|
+
spec.add_development_dependency 'webmock'
|
32
30
|
end
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: purl_fetcher-client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Beer
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -25,27 +25,27 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: faraday
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '2.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '2.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
|
-
type: :
|
48
|
+
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
@@ -53,13 +53,13 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: debug
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
-
type: :
|
62
|
+
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
@@ -67,35 +67,35 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: rake
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
76
|
-
type: :
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
89
|
+
version: '3.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
96
|
+
version: '3.0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: webmock
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -108,20 +108,6 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: rspec
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - "~>"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '3.0'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '3.0'
|
125
111
|
description:
|
126
112
|
email:
|
127
113
|
- cabeer@stanford.edu
|
@@ -132,6 +118,7 @@ files:
|
|
132
118
|
- ".github/workflows/ruby.yml"
|
133
119
|
- ".gitignore"
|
134
120
|
- ".rspec"
|
121
|
+
- ".rubocop.yml"
|
135
122
|
- CODE_OF_CONDUCT.md
|
136
123
|
- Gemfile
|
137
124
|
- README.md
|
@@ -139,9 +126,10 @@ files:
|
|
139
126
|
- bin/console
|
140
127
|
- bin/setup
|
141
128
|
- lib/purl_fetcher/client.rb
|
142
|
-
- lib/purl_fetcher/client/
|
143
|
-
- lib/purl_fetcher/client/
|
129
|
+
- lib/purl_fetcher/client/direct_upload_request.rb
|
130
|
+
- lib/purl_fetcher/client/direct_upload_response.rb
|
144
131
|
- lib/purl_fetcher/client/reader.rb
|
132
|
+
- lib/purl_fetcher/client/upload_files.rb
|
145
133
|
- lib/purl_fetcher/client/version.rb
|
146
134
|
- purl_fetcher-client.gemspec
|
147
135
|
homepage:
|
@@ -162,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
150
|
- !ruby/object:Gem::Version
|
163
151
|
version: '0'
|
164
152
|
requirements: []
|
165
|
-
rubygems_version: 3.
|
153
|
+
rubygems_version: 3.4.19
|
166
154
|
signing_key:
|
167
155
|
specification_version: 4
|
168
156
|
summary: Traject-compatible reader implementation for streaming data from purl-fetcher
|
@@ -1,21 +0,0 @@
|
|
1
|
-
class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
|
2
|
-
# Enumerate objects that should be deleted.
|
3
|
-
def each
|
4
|
-
return to_enum(:each) unless block_given?
|
5
|
-
|
6
|
-
deletes(first_modified: first_modified).each do |change|
|
7
|
-
|
8
|
-
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
9
|
-
|
10
|
-
yield public_xml, change, self
|
11
|
-
end
|
12
|
-
|
13
|
-
changes(first_modified: first_modified, target: target).each do |change|
|
14
|
-
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
15
|
-
|
16
|
-
next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
|
17
|
-
|
18
|
-
yield public_xml, change, self
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,205 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
require 'stanford-mods'
|
3
|
-
require 'mods_display'
|
4
|
-
require 'dor/rights_auth'
|
5
|
-
|
6
|
-
module PurlFetcher::Client
|
7
|
-
class PublicXmlRecord
|
8
|
-
attr_reader :druid, :options
|
9
|
-
|
10
|
-
def self.fetch(url)
|
11
|
-
if defined?(JRUBY_VERSION)
|
12
|
-
response = Manticore.get(url)
|
13
|
-
response.body if response.code == 200
|
14
|
-
else
|
15
|
-
response = HTTP.get(url)
|
16
|
-
response.body if response.status.ok?
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def initialize(druid, options = {})
|
21
|
-
@druid = druid
|
22
|
-
@options = options
|
23
|
-
end
|
24
|
-
|
25
|
-
def searchworks_id
|
26
|
-
catkey.nil? ? druid : catkey
|
27
|
-
end
|
28
|
-
|
29
|
-
# @return catkey value from the DOR identity_metadata, or nil if there is no catkey
|
30
|
-
def catkey
|
31
|
-
get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
|
32
|
-
end
|
33
|
-
|
34
|
-
# @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
|
35
|
-
def label
|
36
|
-
get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
|
37
|
-
end
|
38
|
-
|
39
|
-
def get_value(node)
|
40
|
-
(node && node.first) ? node.first.content : nil
|
41
|
-
end
|
42
|
-
|
43
|
-
def stanford_mods
|
44
|
-
@smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
|
45
|
-
smods_rec.from_str(mods.to_s)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def mods_display
|
50
|
-
@mods_display ||= ModsDisplay::HTML.new(stanford_mods)
|
51
|
-
end
|
52
|
-
|
53
|
-
def public_xml
|
54
|
-
@public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
|
55
|
-
end
|
56
|
-
|
57
|
-
def public_xml?
|
58
|
-
!!public_xml
|
59
|
-
end
|
60
|
-
|
61
|
-
def public_xml_doc
|
62
|
-
@public_xml_doc ||= Nokogiri::XML(public_xml)
|
63
|
-
end
|
64
|
-
|
65
|
-
def mods
|
66
|
-
@mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
|
67
|
-
public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
|
68
|
-
else
|
69
|
-
if defined?(Honeybadger)
|
70
|
-
Honeybadger.notify(
|
71
|
-
'Unable to find MODS in the public xml; falling back to stand-along mods document',
|
72
|
-
context: { druid: druid }
|
73
|
-
)
|
74
|
-
end
|
75
|
-
|
76
|
-
Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
|
81
|
-
def is_collection
|
82
|
-
object_type_nodes = public_xml_doc.xpath('//objectType')
|
83
|
-
object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
|
84
|
-
end
|
85
|
-
|
86
|
-
# value is used to tell SearchWorks UI app of specific display needs for objects
|
87
|
-
# this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
|
88
|
-
# @return [String] filename or nil if none found
|
89
|
-
def thumb
|
90
|
-
return if is_collection
|
91
|
-
encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
|
92
|
-
end
|
93
|
-
|
94
|
-
# the value of the type attribute for a DOR object's contentMetadata
|
95
|
-
# more info about these values is here:
|
96
|
-
# https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
|
97
|
-
# https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
|
98
|
-
# @return [String]
|
99
|
-
def dor_content_type
|
100
|
-
public_xml_doc.xpath('//contentMetadata/@type').text
|
101
|
-
end
|
102
|
-
|
103
|
-
# the thumbnail in publicXML, falling back to the first image if no thumb node is found
|
104
|
-
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
|
105
|
-
def parse_thumb
|
106
|
-
unless public_xml_doc.nil?
|
107
|
-
thumb = public_xml_doc.xpath('//thumb')
|
108
|
-
# first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
|
109
|
-
if thumb.size == 1
|
110
|
-
thumb.first.content
|
111
|
-
elsif thumb.size == 0 && parse_sw_image_ids.size > 0
|
112
|
-
parse_sw_image_ids.first
|
113
|
-
else
|
114
|
-
nil
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
# the druid and id attribute of resource/file and objectId and fileId of the
|
120
|
-
# resource/externalFile elements that match the image, page, or thumb resource type, including extension
|
121
|
-
# Also, prepends the corresponding druid and / specifically for Searchworks use
|
122
|
-
# @return [Array<String>] filenames
|
123
|
-
def parse_sw_image_ids
|
124
|
-
public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
|
125
|
-
node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
|
126
|
-
"#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
|
127
|
-
end
|
128
|
-
end.flatten
|
129
|
-
end
|
130
|
-
|
131
|
-
def collections
|
132
|
-
@collections ||= predicate_druids('isMemberOfCollection').map do |druid|
|
133
|
-
PublicXmlRecord.new(druid, options)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
def constituents
|
138
|
-
@constituents ||= predicate_druids('isConstituentOf').map do |druid|
|
139
|
-
PublicXmlRecord.new(druid, options)
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def items(&block)
|
144
|
-
return [] unless is_collection
|
145
|
-
|
146
|
-
purl_fetcher_client.collection_members(druid, &block)
|
147
|
-
end
|
148
|
-
|
149
|
-
# the thumbnail in publicXML properly URI encoded, including the slash separator
|
150
|
-
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
|
151
|
-
def encoded_thumb
|
152
|
-
thumb=parse_thumb
|
153
|
-
return unless thumb
|
154
|
-
thumb_druid=thumb.split('/').first # the druid (before the first slash)
|
155
|
-
thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
|
156
|
-
"#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
|
157
|
-
end
|
158
|
-
|
159
|
-
# get the druids from predicate relationships in rels-ext from public_xml
|
160
|
-
# @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
|
161
|
-
def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
|
162
|
-
ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
|
163
|
-
xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
|
164
|
-
pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
|
165
|
-
pred_nodes.reject { |n| n.value.empty? }.map do |n|
|
166
|
-
n.value.split('druid:').last
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
def druid_tree
|
171
|
-
druid.match(/(..)(...)(..)(....)/).captures.join('/')
|
172
|
-
end
|
173
|
-
|
174
|
-
def rights_xml
|
175
|
-
@rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
|
176
|
-
end
|
177
|
-
|
178
|
-
def rights
|
179
|
-
@rights ||= ::Dor::RightsAuth.parse(rights_xml)
|
180
|
-
end
|
181
|
-
|
182
|
-
def public?
|
183
|
-
rights.world_unrestricted?
|
184
|
-
end
|
185
|
-
|
186
|
-
def stanford_only?
|
187
|
-
rights.stanford_only_unrestricted?
|
188
|
-
end
|
189
|
-
|
190
|
-
def purl_base_url
|
191
|
-
options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
|
192
|
-
end
|
193
|
-
|
194
|
-
def purl_fetcher_api_endpoint
|
195
|
-
options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
|
196
|
-
end
|
197
|
-
|
198
|
-
def purl_fetcher_client
|
199
|
-
@purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
|
200
|
-
nil,
|
201
|
-
'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
|
202
|
-
)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
end
|