purl_fetcher-client 0.5.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
4
- data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
3
+ metadata.gz: cb1a08fc0010f80045db352cb2bfb2735dd2b7447513ba366281cc4d488895e4
4
+ data.tar.gz: a2213eda4272694dcf1e889ed9185956b0ba1da24e98056e2360a242e9eb6a0b
5
5
  SHA512:
6
- metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
7
- data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
6
+ metadata.gz: f8a0200ff798158ca1b5c58c120c94b1cb3e8f172cec1c28b30b1a1cae47291483dc5d681467a17f08d7ef650a14bf9fffc0c8d33895fd36565e2b2ea7e70ac9
7
+ data.tar.gz: 4600d53676d35892f3485f9cbc280afb4cc1fd8cedd9028794376cb20b9a49ad6d3981279e84e89b56549b87173fdc106aaa09f459d1d461a72675b2c2c2ca12
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ master ]
5
+ branches: [ main ]
6
6
  pull_request:
7
- branches: [ master ]
7
+ branches: [ main ]
8
8
 
9
9
  jobs:
10
10
  tests:
data/.rubocop.yml ADDED
@@ -0,0 +1,5 @@
1
+ inherit_gem:
2
+ rubocop-rails-omakase: rubocop.yml
3
+
4
+ AllCops:
5
+ TargetRubyVersion: 3.2
data/Gemfile CHANGED
@@ -1,6 +1,9 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in purl_fetcher-client.gemspec
6
6
  gemspec
7
+
8
+ gem "rubocop-rails-omakase", require: false, group: [ :development ]
9
+ gem "debug"
data/README.md CHANGED
@@ -22,7 +22,25 @@ Or install it yourself as:
22
22
 
23
23
  ## Usage
24
24
 
25
- TODO: Write usage instructions here
25
+ ### Uploading a file
26
+
27
+ ```ruby
28
+ PurlFetcher::Client.configure(url:'http://127.0.0.1:3000', token: 'abc123')
29
+
30
+ PurlFetcher::Client::UploadFiles.upload(
31
+ file_metadata: {
32
+ 'file1.txt' => PurlFetcher::Client::DirectUploadRequest.new(
33
+ checksum: '123',
34
+ byte_size: 10_000,
35
+ content_type: 'image/tiff',
36
+ filename: 'image.tiff'
37
+ )
38
+ },
39
+ filepath_map: {
40
+ 'file1.txt' => File.expand_path('Gemfile.lock')
41
+ }
42
+ )
43
+ ```
26
44
 
27
45
  ## Development
28
46
 
@@ -36,4 +54,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
36
54
 
37
55
  ## Code of Conduct
38
56
 
39
- Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/master/CODE_OF_CONDUCT.md).
57
+ Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/bin/console CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "purl_fetcher/client"
3
+ require 'bundler/setup'
4
+ require 'purl_fetcher/client'
5
+ require 'debug'
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +11,5 @@ require "purl_fetcher/client"
10
11
  # require "pry"
11
12
  # Pry.start
12
13
 
13
- require "irb"
14
+ require 'irb'
14
15
  IRB.start(__FILE__)
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module PurlFetcher
6
+ class Client
7
+ # This models the JSON that we send to the server.
8
+ DirectUploadRequest = Data.define(:checksum, :byte_size, :content_type, :filename) do
9
+ def self.from_file(hexdigest:, byte_size:, file_name:, content_type:)
10
+ new(checksum: hex_to_base64_digest(hexdigest),
11
+ byte_size: byte_size,
12
+ content_type: clean_content_type(content_type),
13
+ filename: file_name)
14
+ end
15
+
16
+ def to_h
17
+ {
18
+ blob: { filename: filename, byte_size: byte_size, checksum: checksum,
19
+ content_type: self.class.clean_content_type(content_type) }
20
+ }
21
+ end
22
+
23
+ def to_json(*_args)
24
+ JSON.generate(to_h)
25
+ end
26
+
27
+ def self.clean_content_type(content_type)
28
+ return "application/octet-stream" if content_type.blank?
29
+
30
+ # ActiveStorage is expecting "application/x-stata-dta" not "application/x-stata-dta;version=14"
31
+ content_type.split(";").first
32
+ end
33
+
34
+ def self.hex_to_base64_digest(hexdigest)
35
+ [ [ hexdigest ].pack("H*") ].pack("m0")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PurlFetcher
4
+ class Client
5
+ DirectUploadResponse = Data.define(:id, :key, :checksum, :byte_size, :content_type,
6
+ :filename, :metadata, :created_at, :direct_upload,
7
+ :signed_id, :service_name) do
8
+ def with_filename(filename)
9
+ self.class.new(**deconstruct_keys(nil).merge(filename:))
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,77 +1,48 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings, :range
3
+ attr_reader :host, :conn, :range
4
4
 
5
- def initialize(input_stream, settings = {})
6
- @settings = settings
7
- @input_stream = input_stream
5
+ def initialize(host: "https://purl-fetcher.stanford.edu", conn: nil)
6
+ @host = host
7
+ @conn = conn || Faraday.new(host) do |f|
8
+ f.response :json
9
+ end
8
10
  @range = {}
9
11
  end
10
12
 
11
- def each
12
- return to_enum(:each) unless block_given?
13
-
14
- changes(first_modified: first_modified, target: target).each do |change, meta|
15
- next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
16
-
17
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
18
-
19
- yield public_xml, change, self
20
- end
21
- end
22
-
23
13
  def collection_members(druid)
24
14
  return to_enum(:collection_members, druid) unless block_given?
25
15
 
26
- paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
- yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
16
+ paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", "purls").each do |obj, _meta|
17
+ yield obj["druid"].delete_prefix("druid:")
28
18
  end
29
19
  end
30
20
 
31
21
  private
32
22
 
33
- def first_modified
34
- settings['purl_fetcher.first_modified']
35
- end
36
-
37
- def target
38
- settings['purl_fetcher.target']
39
- end
40
-
41
- ##
42
- # @return [Enumerator]
43
- def changes(params = {})
44
- paginated_get('/docs/changes', 'changes', params)
45
- end
46
-
47
- ##
48
- # @return [Enumerator]
49
- def deletes(params = {})
50
- paginated_get('/docs/deletes', 'deletes', params)
51
- end
52
-
53
23
  ##
54
24
  # @return [Hash] a parsed JSON hash
55
- def get(path, params = {})
56
- JSON.parse(fetch(settings.fetch('purl_fetcher.api_endpoint', 'https://purl-fetcher.stanford.edu') + path, params))
57
- end
25
+ def fetch(path, params)
26
+ response = conn.get(path, params: params)
58
27
 
59
- def fetch(url, params)
60
- if defined?(Manticore)
61
- Manticore.get(url, query: params).body
62
- else
63
- HTTP.get(url, params: params).body
28
+ unless response.success?
29
+ if defined?(Honeybadger)
30
+ Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
31
+ end
32
+ raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
64
33
  end
34
+
35
+ response.body
65
36
  end
66
37
 
67
38
  ##
68
39
  # For performance, and enumberable object is returned.
69
40
  #
70
41
  # @example operating on each of the results as they come in
71
- # paginated_get('/docs/changes', 'changes').map { |v| puts v.inspect }
42
+ # paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
72
43
  #
73
44
  # @example getting all of the results and converting to an array
74
- # paginated_get('/docs/changes', 'changes').to_a
45
+ # paginated_get('/docs/collections/druid:123', 'purls').to_a
75
46
  #
76
47
  # @return [Enumerator] an enumberable object
77
48
  def paginated_get(path, accessor, options = {})
@@ -83,8 +54,8 @@ class PurlFetcher::Client::Reader
83
54
  total = 0
84
55
 
85
56
  loop do
86
- data = get(path, { per_page: per_page, page: page }.merge(params))
87
- @range = data['range']
57
+ data = fetch(path, { per_page: per_page, page: page }.merge(params))
58
+ @range = data["range"]
88
59
 
89
60
  total += data[accessor].length
90
61
 
@@ -92,7 +63,7 @@ class PurlFetcher::Client::Reader
92
63
  yielder.yield element, self
93
64
  end
94
65
 
95
- page = data['pages']['next_page']
66
+ page = data["pages"]["next_page"]
96
67
 
97
68
  break if page.nil? || total >= max
98
69
  end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PurlFetcher
4
+ class Client
5
+ # The file uploading part of a transfer
6
+ class UploadFiles
7
+ # @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
8
+ # @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
9
+ def self.upload(file_metadata:, filepath_map:)
10
+ new(file_metadata: file_metadata, filepath_map: filepath_map).upload
11
+ end
12
+
13
+ # @param [Hash<String,DirectUploadRequest>] file_metadata map of relative filepaths to file metadata
14
+ # @param [Hash<String,String>] filepath_map map of relative filepaths to absolute filepaths
15
+ def initialize(file_metadata:, filepath_map:)
16
+ @file_metadata = file_metadata
17
+ @filepath_map = filepath_map
18
+ end
19
+
20
+ # @return [Array<DirectUploadResponse>] the responses from the server for the uploads
21
+ def upload
22
+ file_metadata.map do |filepath, metadata|
23
+ direct_upload(metadata.to_json).tap do |response|
24
+ # ActiveStorage modifies the filename provided in response, so setting here with the relative filename
25
+ response = response.with_filename(filepath)
26
+ upload_file(response)
27
+ logger.info("Upload of #{filepath} complete")
28
+ end
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :file_metadata, :filepath_map
35
+
36
+ def logger
37
+ Client.config.logger
38
+ end
39
+
40
+ def client
41
+ Client.instance
42
+ end
43
+
44
+ def path
45
+ "/v1/direct_uploads"
46
+ end
47
+
48
+ def direct_upload(metadata_json)
49
+ logger.info("Starting an upload request: #{metadata_json}")
50
+ response = client.post(path: path, body: metadata_json)
51
+
52
+ logger.info("Response from server: #{response}")
53
+ DirectUploadResponse.new(**response.symbolize_keys)
54
+ end
55
+
56
+ def upload_file(response)
57
+ logger.info("Uploading `#{response.filename}' to #{response.direct_upload.fetch('url')}")
58
+
59
+ client.put(
60
+ path: response.direct_upload.fetch("url"),
61
+ body: ::File.open(filepath_map[response.filename]),
62
+ headers: {
63
+ "content-type" => response.content_type,
64
+ "content-length" => response.byte_size.to_s
65
+ }
66
+ )
67
+ end
68
+ end
69
+ end
70
+ end
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
- module Client
3
- VERSION = "0.5.0"
2
+ class Client
3
+ VERSION = "1.1.0"
4
4
  end
5
5
  end
@@ -1,15 +1,92 @@
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "faraday"
4
+ require "singleton"
5
+ require "logger"
6
+
1
7
  require "purl_fetcher/client/version"
2
- require 'http'
3
- begin
4
- require 'manticore' if defined? JRUBY_VERSION
5
- rescue LoadError
6
- end
8
+ require "purl_fetcher/client/reader"
9
+ require "purl_fetcher/client/upload_files"
10
+ require "purl_fetcher/client/direct_upload_request"
11
+ require "purl_fetcher/client/direct_upload_response"
7
12
 
8
13
  module PurlFetcher
9
- module Client
10
- require 'purl_fetcher/client/public_xml_record'
11
- require 'purl_fetcher/client/reader'
12
- require 'purl_fetcher/client/deletes_reader'
13
- # Your code goes here...
14
+ class Client
15
+ # General error originating in PurlFetcher::Client
16
+ class Error < StandardError; end
17
+
18
+ # Raised when the response from the server is not successful
19
+ class ResponseError < Error; end
20
+
21
+ include Singleton
22
+ class << self
23
+ def configure(url:, logger: default_logger, token: nil)
24
+ instance.config = Config.new(
25
+ url: url,
26
+ logger: logger,
27
+ token: token
28
+ )
29
+
30
+ instance
31
+ end
32
+
33
+ def default_logger
34
+ Logger.new($stdout)
35
+ end
36
+
37
+ delegate :config, to: :instance
38
+ end
39
+
40
+ attr_accessor :config
41
+
42
+ # Send an POST request
43
+ # @param path [String] the path for the API request
44
+ # @param body [String] the body of the POST request
45
+ def post(path:, body:)
46
+ response = connection.post(path) do |request|
47
+ request.body = body
48
+ end
49
+
50
+ raise "unexpected response: #{response.status} #{response.body}" unless response.success?
51
+
52
+ response.body
53
+ end
54
+
55
+ # Send an PUT request
56
+ # @param path [String] the path for the API request
57
+ # @param body [String] the body of the POST request
58
+ # @param headers [Hash] extra headers to add to the SDR API request
59
+ def put(path:, body:, headers: {})
60
+ response = connection.put(path) do |request|
61
+ request.body = body
62
+ request.headers = default_headers.merge(headers)
63
+ end
64
+
65
+ raise "unexpected response: #{response.status} #{response.body}" unless response.success?
66
+
67
+ response.body
68
+ end
69
+
70
+ private
71
+
72
+ Config = Data.define(:url, :logger, :token)
73
+
74
+ def connection
75
+ Faraday.new(
76
+ url: config.url,
77
+ headers: default_headers
78
+ ) do |conn|
79
+ conn.response :json
80
+ end
81
+ end
82
+
83
+ def default_headers
84
+ {
85
+ accept: "application/json",
86
+ content_type: "application/json"
87
+ }.tap do |headers|
88
+ headers[:authorization] = "Bearer #{config.token}" if config.token
89
+ end
90
+ end
14
91
  end
15
92
  end
@@ -1,32 +1,30 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require "purl_fetcher/client/version"
3
+ require 'purl_fetcher/client/version'
5
4
 
6
5
  Gem::Specification.new do |spec|
7
- spec.name = "purl_fetcher-client"
6
+ spec.name = 'purl_fetcher-client'
8
7
  spec.version = PurlFetcher::Client::VERSION
9
- spec.authors = ["Chris Beer"]
10
- spec.email = ["cabeer@stanford.edu"]
8
+ spec.authors = [ "Chris Beer" ]
9
+ spec.email = [ "cabeer@stanford.edu" ]
11
10
 
12
11
  spec.summary = 'Traject-compatible reader implementation for streaming data from purl-fetcher'
13
12
 
14
13
  # Specify which files should be added to the gem when it is released.
15
14
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
16
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
15
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
17
16
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
17
  end
19
- spec.bindir = "exe"
18
+ spec.bindir = 'exe'
20
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
- spec.require_paths = ["lib"]
20
+ spec.require_paths = [ "lib" ]
22
21
 
23
- spec.add_dependency 'http'
24
- spec.add_dependency 'nokogiri'
25
- spec.add_dependency 'stanford-mods'
26
- spec.add_dependency 'dor-rights-auth'
27
- spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
22
+ spec.add_dependency 'activesupport'
23
+ spec.add_dependency 'faraday', '~> 2.1'
28
24
 
29
- spec.add_development_dependency "bundler"
30
- spec.add_development_dependency "rake"
31
- spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency 'bundler'
26
+ spec.add_development_dependency 'debug'
27
+ spec.add_development_dependency 'rake'
28
+ spec.add_development_dependency 'rspec', '~> 3.0'
29
+ spec.add_development_dependency 'webmock'
32
30
  end
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-13 00:00:00.000000000 Z
11
+ date: 2024-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: http
14
+ name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
@@ -25,27 +25,27 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: faraday
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '2.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '2.1'
41
41
  - !ruby/object:Gem::Dependency
42
- name: stanford-mods
42
+ name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
- type: :runtime
48
+ type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
@@ -53,13 +53,13 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: dor-rights-auth
56
+ name: debug
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
- type: :runtime
62
+ type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
@@ -67,35 +67,35 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: mods_display
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 1.0.0.alpha1
76
- type: :runtime
75
+ version: '0'
76
+ type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
- version: 1.0.0.alpha1
82
+ version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: bundler
84
+ name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: '3.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: '3.0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: rake
98
+ name: webmock
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -108,20 +108,6 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: rspec
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '3.0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '3.0'
125
111
  description:
126
112
  email:
127
113
  - cabeer@stanford.edu
@@ -132,6 +118,7 @@ files:
132
118
  - ".github/workflows/ruby.yml"
133
119
  - ".gitignore"
134
120
  - ".rspec"
121
+ - ".rubocop.yml"
135
122
  - CODE_OF_CONDUCT.md
136
123
  - Gemfile
137
124
  - README.md
@@ -139,9 +126,10 @@ files:
139
126
  - bin/console
140
127
  - bin/setup
141
128
  - lib/purl_fetcher/client.rb
142
- - lib/purl_fetcher/client/deletes_reader.rb
143
- - lib/purl_fetcher/client/public_xml_record.rb
129
+ - lib/purl_fetcher/client/direct_upload_request.rb
130
+ - lib/purl_fetcher/client/direct_upload_response.rb
144
131
  - lib/purl_fetcher/client/reader.rb
132
+ - lib/purl_fetcher/client/upload_files.rb
145
133
  - lib/purl_fetcher/client/version.rb
146
134
  - purl_fetcher-client.gemspec
147
135
  homepage:
@@ -162,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
150
  - !ruby/object:Gem::Version
163
151
  version: '0'
164
152
  requirements: []
165
- rubygems_version: 3.2.32
153
+ rubygems_version: 3.4.19
166
154
  signing_key:
167
155
  specification_version: 4
168
156
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
@@ -1,21 +0,0 @@
1
- class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
2
- # Enumerate objects that should be deleted.
3
- def each
4
- return to_enum(:each) unless block_given?
5
-
6
- deletes(first_modified: first_modified).each do |change|
7
-
8
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
-
10
- yield public_xml, change, self
11
- end
12
-
13
- changes(first_modified: first_modified, target: target).each do |change|
14
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
15
-
16
- next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
17
-
18
- yield public_xml, change, self
19
- end
20
- end
21
- end
@@ -1,205 +0,0 @@
1
- require 'nokogiri'
2
- require 'stanford-mods'
3
- require 'mods_display'
4
- require 'dor/rights_auth'
5
-
6
- module PurlFetcher::Client
7
- class PublicXmlRecord
8
- attr_reader :druid, :options
9
-
10
- def self.fetch(url)
11
- if defined?(JRUBY_VERSION)
12
- response = Manticore.get(url)
13
- response.body if response.code == 200
14
- else
15
- response = HTTP.get(url)
16
- response.body if response.status.ok?
17
- end
18
- end
19
-
20
- def initialize(druid, options = {})
21
- @druid = druid
22
- @options = options
23
- end
24
-
25
- def searchworks_id
26
- catkey.nil? ? druid : catkey
27
- end
28
-
29
- # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
30
- def catkey
31
- get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
32
- end
33
-
34
- # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
35
- def label
36
- get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
37
- end
38
-
39
- def get_value(node)
40
- (node && node.first) ? node.first.content : nil
41
- end
42
-
43
- def stanford_mods
44
- @smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
45
- smods_rec.from_str(mods.to_s)
46
- end
47
- end
48
-
49
- def mods_display
50
- @mods_display ||= ModsDisplay::HTML.new(stanford_mods)
51
- end
52
-
53
- def public_xml
54
- @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
55
- end
56
-
57
- def public_xml?
58
- !!public_xml
59
- end
60
-
61
- def public_xml_doc
62
- @public_xml_doc ||= Nokogiri::XML(public_xml)
63
- end
64
-
65
- def mods
66
- @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
67
- public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
68
- else
69
- if defined?(Honeybadger)
70
- Honeybadger.notify(
71
- 'Unable to find MODS in the public xml; falling back to stand-along mods document',
72
- context: { druid: druid }
73
- )
74
- end
75
-
76
- Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
77
- end
78
- end
79
-
80
- # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
81
- def is_collection
82
- object_type_nodes = public_xml_doc.xpath('//objectType')
83
- object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
84
- end
85
-
86
- # value is used to tell SearchWorks UI app of specific display needs for objects
87
- # this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
88
- # @return [String] filename or nil if none found
89
- def thumb
90
- return if is_collection
91
- encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
92
- end
93
-
94
- # the value of the type attribute for a DOR object's contentMetadata
95
- # more info about these values is here:
96
- # https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
97
- # https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
98
- # @return [String]
99
- def dor_content_type
100
- public_xml_doc.xpath('//contentMetadata/@type').text
101
- end
102
-
103
- # the thumbnail in publicXML, falling back to the first image if no thumb node is found
104
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
105
- def parse_thumb
106
- unless public_xml_doc.nil?
107
- thumb = public_xml_doc.xpath('//thumb')
108
- # first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
109
- if thumb.size == 1
110
- thumb.first.content
111
- elsif thumb.size == 0 && parse_sw_image_ids.size > 0
112
- parse_sw_image_ids.first
113
- else
114
- nil
115
- end
116
- end
117
- end
118
-
119
- # the druid and id attribute of resource/file and objectId and fileId of the
120
- # resource/externalFile elements that match the image, page, or thumb resource type, including extension
121
- # Also, prepends the corresponding druid and / specifically for Searchworks use
122
- # @return [Array<String>] filenames
123
- def parse_sw_image_ids
124
- public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
125
- node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
126
- "#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
127
- end
128
- end.flatten
129
- end
130
-
131
- def collections
132
- @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
133
- PublicXmlRecord.new(druid, options)
134
- end
135
- end
136
-
137
- def constituents
138
- @constituents ||= predicate_druids('isConstituentOf').map do |druid|
139
- PublicXmlRecord.new(druid, options)
140
- end
141
- end
142
-
143
- def items(&block)
144
- return [] unless is_collection
145
-
146
- purl_fetcher_client.collection_members(druid, &block)
147
- end
148
-
149
- # the thumbnail in publicXML properly URI encoded, including the slash separator
150
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
151
- def encoded_thumb
152
- thumb=parse_thumb
153
- return unless thumb
154
- thumb_druid=thumb.split('/').first # the druid (before the first slash)
155
- thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
156
- "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
157
- end
158
-
159
- # get the druids from predicate relationships in rels-ext from public_xml
160
- # @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
161
- def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
162
- ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
163
- xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
164
- pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
165
- pred_nodes.reject { |n| n.value.empty? }.map do |n|
166
- n.value.split('druid:').last
167
- end
168
- end
169
-
170
- def druid_tree
171
- druid.match(/(..)(...)(..)(....)/).captures.join('/')
172
- end
173
-
174
- def rights_xml
175
- @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
176
- end
177
-
178
- def rights
179
- @rights ||= ::Dor::RightsAuth.parse(rights_xml)
180
- end
181
-
182
- def public?
183
- rights.world_unrestricted?
184
- end
185
-
186
- def stanford_only?
187
- rights.stanford_only_unrestricted?
188
- end
189
-
190
- def purl_base_url
191
- options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
192
- end
193
-
194
- def purl_fetcher_api_endpoint
195
- options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
196
- end
197
-
198
- def purl_fetcher_client
199
- @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
200
- nil,
201
- 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
202
- )
203
- end
204
- end
205
- end