purl_fetcher-client 0.5.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
4
- data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
3
+ metadata.gz: cb1a08fc0010f80045db352cb2bfb2735dd2b7447513ba366281cc4d488895e4
4
+ data.tar.gz: a2213eda4272694dcf1e889ed9185956b0ba1da24e98056e2360a242e9eb6a0b
5
5
  SHA512:
6
- metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
7
- data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
6
+ metadata.gz: f8a0200ff798158ca1b5c58c120c94b1cb3e8f172cec1c28b30b1a1cae47291483dc5d681467a17f08d7ef650a14bf9fffc0c8d33895fd36565e2b2ea7e70ac9
7
+ data.tar.gz: 4600d53676d35892f3485f9cbc280afb4cc1fd8cedd9028794376cb20b9a49ad6d3981279e84e89b56549b87173fdc106aaa09f459d1d461a72675b2c2c2ca12
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ master ]
5
+ branches: [ main ]
6
6
  pull_request:
7
- branches: [ master ]
7
+ branches: [ main ]
8
8
 
9
9
  jobs:
10
10
  tests:
data/.rubocop.yml ADDED
@@ -0,0 +1,5 @@
1
+ inherit_gem:
2
+ rubocop-rails-omakase: rubocop.yml
3
+
4
+ AllCops:
5
+ TargetRubyVersion: 3.2
data/Gemfile CHANGED
@@ -1,6 +1,9 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in purl_fetcher-client.gemspec
6
6
  gemspec
7
+
8
+ gem "rubocop-rails-omakase", require: false, group: [ :development ]
9
+ gem "debug"
data/README.md CHANGED
@@ -22,7 +22,25 @@ Or install it yourself as:
22
22
 
23
23
  ## Usage
24
24
 
25
- TODO: Write usage instructions here
25
+ ### Uploading a file
26
+
27
+ ```ruby
28
+ PurlFetcher::Client.configure(url:'http://127.0.0.1:3000', token: 'abc123')
29
+
30
+ PurlFetcher::Client::UploadFiles.upload(
31
+ file_metadata: {
32
+ 'file1.txt' => PurlFetcher::Client::DirectUploadRequest.new(
33
+ checksum: '123',
34
+ byte_size: 10_000,
35
+ content_type: 'image/tiff',
36
+ filename: 'image.tiff'
37
+ )
38
+ },
39
+ filepath_map: {
40
+ 'file1.txt' => File.expand_path('Gemfile.lock')
41
+ }
42
+ )
43
+ ```
26
44
 
27
45
  ## Development
28
46
 
@@ -36,4 +54,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
36
54
 
37
55
  ## Code of Conduct
38
56
 
39
- Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/master/CODE_OF_CONDUCT.md).
57
+ Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/bin/console CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "purl_fetcher/client"
3
+ require 'bundler/setup'
4
+ require 'purl_fetcher/client'
5
+ require 'debug'
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +11,5 @@ require "purl_fetcher/client"
10
11
  # require "pry"
11
12
  # Pry.start
12
13
 
13
- require "irb"
14
+ require 'irb'
14
15
  IRB.start(__FILE__)
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module PurlFetcher
6
+ class Client
7
+ # This models the JSON that we send to the server.
8
+ DirectUploadRequest = Data.define(:checksum, :byte_size, :content_type, :filename) do
9
+ def self.from_file(hexdigest:, byte_size:, file_name:, content_type:)
10
+ new(checksum: hex_to_base64_digest(hexdigest),
11
+ byte_size: byte_size,
12
+ content_type: clean_content_type(content_type),
13
+ filename: file_name)
14
+ end
15
+
16
+ def to_h
17
+ {
18
+ blob: { filename: filename, byte_size: byte_size, checksum: checksum,
19
+ content_type: self.class.clean_content_type(content_type) }
20
+ }
21
+ end
22
+
23
+ def to_json(*_args)
24
+ JSON.generate(to_h)
25
+ end
26
+
27
+ def self.clean_content_type(content_type)
28
+ return "application/octet-stream" if content_type.blank?
29
+
30
+ # ActiveStorage is expecting "application/x-stata-dta" not "application/x-stata-dta;version=14"
31
+ content_type.split(";").first
32
+ end
33
+
34
+ def self.hex_to_base64_digest(hexdigest)
35
+ [ [ hexdigest ].pack("H*") ].pack("m0")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PurlFetcher
4
+ class Client
5
+ DirectUploadResponse = Data.define(:id, :key, :checksum, :byte_size, :content_type,
6
+ :filename, :metadata, :created_at, :direct_upload,
7
+ :signed_id, :service_name) do
8
+ def with_filename(filename)
9
+ self.class.new(**deconstruct_keys(nil).merge(filename:))
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,77 +1,48 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings, :range
3
+ attr_reader :host, :conn, :range
4
4
 
5
- def initialize(input_stream, settings = {})
6
- @settings = settings
7
- @input_stream = input_stream
5
+ def initialize(host: "https://purl-fetcher.stanford.edu", conn: nil)
6
+ @host = host
7
+ @conn = conn || Faraday.new(host) do |f|
8
+ f.response :json
9
+ end
8
10
  @range = {}
9
11
  end
10
12
 
11
- def each
12
- return to_enum(:each) unless block_given?
13
-
14
- changes(first_modified: first_modified, target: target).each do |change, meta|
15
- next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
16
-
17
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
18
-
19
- yield public_xml, change, self
20
- end
21
- end
22
-
23
13
  def collection_members(druid)
24
14
  return to_enum(:collection_members, druid) unless block_given?
25
15
 
26
- paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
- yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
16
+ paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", "purls").each do |obj, _meta|
17
+ yield obj["druid"].delete_prefix("druid:")
28
18
  end
29
19
  end
30
20
 
31
21
  private
32
22
 
33
- def first_modified
34
- settings['purl_fetcher.first_modified']
35
- end
36
-
37
- def target
38
- settings['purl_fetcher.target']
39
- end
40
-
41
- ##
42
- # @return [Enumerator]
43
- def changes(params = {})
44
- paginated_get('/docs/changes', 'changes', params)
45
- end
46
-
47
- ##
48
- # @return [Enumerator]
49
- def deletes(params = {})
50
- paginated_get('/docs/deletes', 'deletes', params)
51
- end
52
-
53
23
  ##
54
24
  # @return [Hash] a parsed JSON hash
55
- def get(path, params = {})
56
- JSON.parse(fetch(settings.fetch('purl_fetcher.api_endpoint', 'https://purl-fetcher.stanford.edu') + path, params))
57
- end
25
+ def fetch(path, params)
26
+ response = conn.get(path, params: params)
58
27
 
59
- def fetch(url, params)
60
- if defined?(Manticore)
61
- Manticore.get(url, query: params).body
62
- else
63
- HTTP.get(url, params: params).body
28
+ unless response.success?
29
+ if defined?(Honeybadger)
30
+ Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
31
+ end
32
+ raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
64
33
  end
34
+
35
+ response.body
65
36
  end
66
37
 
67
38
  ##
68
39
  # For performance, and enumberable object is returned.
69
40
  #
70
41
  # @example operating on each of the results as they come in
71
- # paginated_get('/docs/changes', 'changes').map { |v| puts v.inspect }
42
+ # paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
72
43
  #
73
44
  # @example getting all of the results and converting to an array
74
- # paginated_get('/docs/changes', 'changes').to_a
45
+ # paginated_get('/docs/collections/druid:123', 'purls').to_a
75
46
  #
76
47
  # @return [Enumerator] an enumberable object
77
48
  def paginated_get(path, accessor, options = {})
@@ -83,8 +54,8 @@ class PurlFetcher::Client::Reader
83
54
  total = 0
84
55
 
85
56
  loop do
86
- data = get(path, { per_page: per_page, page: page }.merge(params))
87
- @range = data['range']
57
+ data = fetch(path, { per_page: per_page, page: page }.merge(params))
58
+ @range = data["range"]
88
59
 
89
60
  total += data[accessor].length
90
61
 
@@ -92,7 +63,7 @@ class PurlFetcher::Client::Reader
92
63
  yielder.yield element, self
93
64
  end
94
65
 
95
- page = data['pages']['next_page']
66
+ page = data["pages"]["next_page"]
96
67
 
97
68
  break if page.nil? || total >= max
98
69
  end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PurlFetcher
4
+ class Client
5
+ # The file uploading part of a transfer
6
+ class UploadFiles
7
+ # @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
8
+ # @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
9
+ def self.upload(file_metadata:, filepath_map:)
10
+ new(file_metadata: file_metadata, filepath_map: filepath_map).upload
11
+ end
12
+
13
+ # @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
14
+ # @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
15
+ def initialize(file_metadata:, filepath_map:)
16
+ @file_metadata = file_metadata
17
+ @filepath_map = filepath_map
18
+ end
19
+
20
+ # @return [Array<DirectUploadResponse>] the responses from the server for the uploads
21
+ def upload
22
+ file_metadata.map do |filepath, metadata|
23
+ direct_upload(metadata.to_json).tap do |response|
24
+ # ActiveStorage modifies the filename provided in response, so setting here with the relative filename
25
+ response = response.with_filename(filepath)
26
+ upload_file(response)
27
+ logger.info("Upload of #{filepath} complete")
28
+ end
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :file_metadata, :filepath_map
35
+
36
+ def logger
37
+ Client.config.logger
38
+ end
39
+
40
+ def client
41
+ Client.instance
42
+ end
43
+
44
+ def path
45
+ "/v1/direct_uploads"
46
+ end
47
+
48
+ def direct_upload(metadata_json)
49
+ logger.info("Starting an upload request: #{metadata_json}")
50
+ response = client.post(path: path, body: metadata_json)
51
+
52
+ logger.info("Response from server: #{response}")
53
+ DirectUploadResponse.new(**response.symbolize_keys)
54
+ end
55
+
56
+ def upload_file(response)
57
+ logger.info("Uploading `#{response.filename}' to #{response.direct_upload.fetch('url')}")
58
+
59
+ client.put(
60
+ path: response.direct_upload.fetch("url"),
61
+ body: ::File.open(filepath_map[response.filename]),
62
+ headers: {
63
+ "content-type" => response.content_type,
64
+ "content-length" => response.byte_size.to_s
65
+ }
66
+ )
67
+ end
68
+ end
69
+ end
70
+ end
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
- module Client
3
- VERSION = "0.5.0"
2
+ class Client
3
+ VERSION = "1.1.0"
4
4
  end
5
5
  end
@@ -1,15 +1,92 @@
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "faraday"
4
+ require "singleton"
5
+ require "logger"
6
+
1
7
  require "purl_fetcher/client/version"
2
- require 'http'
3
- begin
4
- require 'manticore' if defined? JRUBY_VERSION
5
- rescue LoadError
6
- end
8
+ require "purl_fetcher/client/reader"
9
+ require "purl_fetcher/client/upload_files"
10
+ require "purl_fetcher/client/direct_upload_request"
11
+ require "purl_fetcher/client/direct_upload_response"
7
12
 
8
13
  module PurlFetcher
9
- module Client
10
- require 'purl_fetcher/client/public_xml_record'
11
- require 'purl_fetcher/client/reader'
12
- require 'purl_fetcher/client/deletes_reader'
13
- # Your code goes here...
14
+ class Client
15
+ # General error originating in PurlFetcher::Client
16
+ class Error < StandardError; end
17
+
18
+ # Raised when the response from the server is not successful
19
+ class ResponseError < Error; end
20
+
21
+ include Singleton
22
+ class << self
23
+ def configure(url:, logger: default_logger, token: nil)
24
+ instance.config = Config.new(
25
+ url: url,
26
+ logger: logger,
27
+ token: token
28
+ )
29
+
30
+ instance
31
+ end
32
+
33
+ def default_logger
34
+ Logger.new($stdout)
35
+ end
36
+
37
+ delegate :config, to: :instance
38
+ end
39
+
40
+ attr_accessor :config
41
+
42
+ # Send an POST request
43
+ # @param path [String] the path for the API request
44
+ # @param body [String] the body of the POST request
45
+ def post(path:, body:)
46
+ response = connection.post(path) do |request|
47
+ request.body = body
48
+ end
49
+
50
+ raise "unexpected response: #{response.status} #{response.body}" unless response.success?
51
+
52
+ response.body
53
+ end
54
+
55
+ # Send an PUT request
56
+ # @param path [String] the path for the API request
57
+ # @param body [String] the body of the POST request
58
+ # @param headers [Hash] extra headers to add to the SDR API request
59
+ def put(path:, body:, headers: {})
60
+ response = connection.put(path) do |request|
61
+ request.body = body
62
+ request.headers = default_headers.merge(headers)
63
+ end
64
+
65
+ raise "unexpected response: #{response.status} #{response.body}" unless response.success?
66
+
67
+ response.body
68
+ end
69
+
70
+ private
71
+
72
+ Config = Data.define(:url, :logger, :token)
73
+
74
+ def connection
75
+ Faraday.new(
76
+ url: config.url,
77
+ headers: default_headers
78
+ ) do |conn|
79
+ conn.response :json
80
+ end
81
+ end
82
+
83
+ def default_headers
84
+ {
85
+ accept: "application/json",
86
+ content_type: "application/json"
87
+ }.tap do |headers|
88
+ headers[:authorization] = "Bearer #{config.token}" if config.token
89
+ end
90
+ end
14
91
  end
15
92
  end
@@ -1,32 +1,30 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require "purl_fetcher/client/version"
3
+ require 'purl_fetcher/client/version'
5
4
 
6
5
  Gem::Specification.new do |spec|
7
- spec.name = "purl_fetcher-client"
6
+ spec.name = 'purl_fetcher-client'
8
7
  spec.version = PurlFetcher::Client::VERSION
9
- spec.authors = ["Chris Beer"]
10
- spec.email = ["cabeer@stanford.edu"]
8
+ spec.authors = [ "Chris Beer" ]
9
+ spec.email = [ "cabeer@stanford.edu" ]
11
10
 
12
11
  spec.summary = 'Traject-compatible reader implementation for streaming data from purl-fetcher'
13
12
 
14
13
  # Specify which files should be added to the gem when it is released.
15
14
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
16
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
15
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
17
16
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
17
  end
19
- spec.bindir = "exe"
18
+ spec.bindir = 'exe'
20
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
- spec.require_paths = ["lib"]
20
+ spec.require_paths = [ "lib" ]
22
21
 
23
- spec.add_dependency 'http'
24
- spec.add_dependency 'nokogiri'
25
- spec.add_dependency 'stanford-mods'
26
- spec.add_dependency 'dor-rights-auth'
27
- spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
22
+ spec.add_dependency 'activesupport'
23
+ spec.add_dependency 'faraday', '~> 2.1'
28
24
 
29
- spec.add_development_dependency "bundler"
30
- spec.add_development_dependency "rake"
31
- spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency 'bundler'
26
+ spec.add_development_dependency 'debug'
27
+ spec.add_development_dependency 'rake'
28
+ spec.add_development_dependency 'rspec', '~> 3.0'
29
+ spec.add_development_dependency 'webmock'
32
30
  end
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-13 00:00:00.000000000 Z
11
+ date: 2024-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: http
14
+ name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
@@ -25,27 +25,27 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: faraday
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '2.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '2.1'
41
41
  - !ruby/object:Gem::Dependency
42
- name: stanford-mods
42
+ name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
- type: :runtime
48
+ type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
@@ -53,13 +53,13 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: dor-rights-auth
56
+ name: debug
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
- type: :runtime
62
+ type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
@@ -67,35 +67,35 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: mods_display
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 1.0.0.alpha1
76
- type: :runtime
75
+ version: '0'
76
+ type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
- version: 1.0.0.alpha1
82
+ version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: bundler
84
+ name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: '3.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: '3.0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: rake
98
+ name: webmock
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -108,20 +108,6 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: rspec
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '3.0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '3.0'
125
111
  description:
126
112
  email:
127
113
  - cabeer@stanford.edu
@@ -132,6 +118,7 @@ files:
132
118
  - ".github/workflows/ruby.yml"
133
119
  - ".gitignore"
134
120
  - ".rspec"
121
+ - ".rubocop.yml"
135
122
  - CODE_OF_CONDUCT.md
136
123
  - Gemfile
137
124
  - README.md
@@ -139,9 +126,10 @@ files:
139
126
  - bin/console
140
127
  - bin/setup
141
128
  - lib/purl_fetcher/client.rb
142
- - lib/purl_fetcher/client/deletes_reader.rb
143
- - lib/purl_fetcher/client/public_xml_record.rb
129
+ - lib/purl_fetcher/client/direct_upload_request.rb
130
+ - lib/purl_fetcher/client/direct_upload_response.rb
144
131
  - lib/purl_fetcher/client/reader.rb
132
+ - lib/purl_fetcher/client/upload_files.rb
145
133
  - lib/purl_fetcher/client/version.rb
146
134
  - purl_fetcher-client.gemspec
147
135
  homepage:
@@ -162,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
150
  - !ruby/object:Gem::Version
163
151
  version: '0'
164
152
  requirements: []
165
- rubygems_version: 3.2.32
153
+ rubygems_version: 3.4.19
166
154
  signing_key:
167
155
  specification_version: 4
168
156
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
@@ -1,21 +0,0 @@
1
- class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
2
- # Enumerate objects that should be deleted.
3
- def each
4
- return to_enum(:each) unless block_given?
5
-
6
- deletes(first_modified: first_modified).each do |change|
7
-
8
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
-
10
- yield public_xml, change, self
11
- end
12
-
13
- changes(first_modified: first_modified, target: target).each do |change|
14
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
15
-
16
- next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
17
-
18
- yield public_xml, change, self
19
- end
20
- end
21
- end
@@ -1,205 +0,0 @@
1
- require 'nokogiri'
2
- require 'stanford-mods'
3
- require 'mods_display'
4
- require 'dor/rights_auth'
5
-
6
- module PurlFetcher::Client
7
- class PublicXmlRecord
8
- attr_reader :druid, :options
9
-
10
- def self.fetch(url)
11
- if defined?(JRUBY_VERSION)
12
- response = Manticore.get(url)
13
- response.body if response.code == 200
14
- else
15
- response = HTTP.get(url)
16
- response.body if response.status.ok?
17
- end
18
- end
19
-
20
- def initialize(druid, options = {})
21
- @druid = druid
22
- @options = options
23
- end
24
-
25
- def searchworks_id
26
- catkey.nil? ? druid : catkey
27
- end
28
-
29
- # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
30
- def catkey
31
- get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
32
- end
33
-
34
- # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
35
- def label
36
- get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
37
- end
38
-
39
- def get_value(node)
40
- (node && node.first) ? node.first.content : nil
41
- end
42
-
43
- def stanford_mods
44
- @smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
45
- smods_rec.from_str(mods.to_s)
46
- end
47
- end
48
-
49
- def mods_display
50
- @mods_display ||= ModsDisplay::HTML.new(stanford_mods)
51
- end
52
-
53
- def public_xml
54
- @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
55
- end
56
-
57
- def public_xml?
58
- !!public_xml
59
- end
60
-
61
- def public_xml_doc
62
- @public_xml_doc ||= Nokogiri::XML(public_xml)
63
- end
64
-
65
- def mods
66
- @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
67
- public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
68
- else
69
- if defined?(Honeybadger)
70
- Honeybadger.notify(
71
- 'Unable to find MODS in the public xml; falling back to stand-along mods document',
72
- context: { druid: druid }
73
- )
74
- end
75
-
76
- Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
77
- end
78
- end
79
-
80
- # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
81
- def is_collection
82
- object_type_nodes = public_xml_doc.xpath('//objectType')
83
- object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
84
- end
85
-
86
- # value is used to tell SearchWorks UI app of specific display needs for objects
87
- # this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
88
- # @return [String] filename or nil if none found
89
- def thumb
90
- return if is_collection
91
- encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
92
- end
93
-
94
- # the value of the type attribute for a DOR object's contentMetadata
95
- # more info about these values is here:
96
- # https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
97
- # https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
98
- # @return [String]
99
- def dor_content_type
100
- public_xml_doc.xpath('//contentMetadata/@type').text
101
- end
102
-
103
- # the thumbnail in publicXML, falling back to the first image if no thumb node is found
104
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
105
- def parse_thumb
106
- unless public_xml_doc.nil?
107
- thumb = public_xml_doc.xpath('//thumb')
108
- # first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
109
- if thumb.size == 1
110
- thumb.first.content
111
- elsif thumb.size == 0 && parse_sw_image_ids.size > 0
112
- parse_sw_image_ids.first
113
- else
114
- nil
115
- end
116
- end
117
- end
118
-
119
- # the druid and id attribute of resource/file and objectId and fileId of the
120
- # resource/externalFile elements that match the image, page, or thumb resource type, including extension
121
- # Also, prepends the corresponding druid and / specifically for Searchworks use
122
- # @return [Array<String>] filenames
123
- def parse_sw_image_ids
124
- public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
125
- node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
126
- "#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
127
- end
128
- end.flatten
129
- end
130
-
131
- def collections
132
- @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
133
- PublicXmlRecord.new(druid, options)
134
- end
135
- end
136
-
137
- def constituents
138
- @constituents ||= predicate_druids('isConstituentOf').map do |druid|
139
- PublicXmlRecord.new(druid, options)
140
- end
141
- end
142
-
143
- def items(&block)
144
- return [] unless is_collection
145
-
146
- purl_fetcher_client.collection_members(druid, &block)
147
- end
148
-
149
- # the thumbnail in publicXML properly URI encoded, including the slash separator
150
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
151
- def encoded_thumb
152
- thumb=parse_thumb
153
- return unless thumb
154
- thumb_druid=thumb.split('/').first # the druid (before the first slash)
155
- thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
156
- "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
157
- end
158
-
159
- # get the druids from predicate relationships in rels-ext from public_xml
160
- # @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
161
- def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
162
- ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
163
- xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
164
- pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
165
- pred_nodes.reject { |n| n.value.empty? }.map do |n|
166
- n.value.split('druid:').last
167
- end
168
- end
169
-
170
- def druid_tree
171
- druid.match(/(..)(...)(..)(....)/).captures.join('/')
172
- end
173
-
174
- def rights_xml
175
- @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
176
- end
177
-
178
- def rights
179
- @rights ||= ::Dor::RightsAuth.parse(rights_xml)
180
- end
181
-
182
- def public?
183
- rights.world_unrestricted?
184
- end
185
-
186
- def stanford_only?
187
- rights.stanford_only_unrestricted?
188
- end
189
-
190
- def purl_base_url
191
- options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
192
- end
193
-
194
- def purl_fetcher_api_endpoint
195
- options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
196
- end
197
-
198
- def purl_fetcher_client
199
- @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
200
- nil,
201
- 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
202
- )
203
- end
204
- end
205
- end