stash-sword 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +25 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/.yardopts +3 -0
- data/Gemfile +6 -0
- data/LICENSE.md +22 -0
- data/README.md +4 -0
- data/Rakefile +49 -0
- data/examples/example.rb +26 -0
- data/examples/uploads/example.zip +0 -0
- data/examples/uploads/example/lorem-ipsum.txt +7 -0
- data/examples/uploads/example/mrt-datacite.xml +22 -0
- data/examples/uploads/example/mrt-dc.xml +13 -0
- data/examples/uploads/example/stash-wrapper.xml +56 -0
- data/lib/stash/sword.rb +39 -0
- data/lib/stash/sword/client.rb +132 -0
- data/lib/stash/sword/deposit_receipt.rb +44 -0
- data/lib/stash/sword/header_utils.rb +42 -0
- data/lib/stash/sword/http_helper.rb +120 -0
- data/lib/stash/sword/iri.rb +12 -0
- data/lib/stash/sword/log_utils.rb +39 -0
- data/lib/stash/sword/module_info.rb +12 -0
- data/lib/stash/sword/namespace.rb +30 -0
- data/lib/stash/sword/sequence_io.rb +105 -0
- data/notes/Dash_Submission_To_Merritt.txt +40 -0
- data/notes/service-document.xml +15 -0
- data/spec/.rubocop.yml +10 -0
- data/spec/data/deposit_receipt_merritt.xml +25 -0
- data/spec/data/deposit_receipt_spec.xml +58 -0
- data/spec/rspec_custom_matchers.rb +118 -0
- data/spec/spec_helper.rb +33 -0
- data/spec/unit/stash/sword2/client_spec.rb +110 -0
- data/spec/unit/stash/sword2/deposit_receipt_spec.rb +48 -0
- data/spec/unit/stash/sword2/http_helper_get_spec.rb +131 -0
- data/spec/unit/stash/sword2/http_helper_post_spec.rb +143 -0
- data/spec/unit/stash/sword2/http_helper_put_spec.rb +143 -0
- data/spec/unit/stash/sword2/log_spec.rb +23 -0
- data/spec/unit/stash/sword2/namespaces_spec.rb +31 -0
- data/spec/unit/stash/sword2/sequence_io_spec.rb +153 -0
- data/stash-sword.gemspec +47 -0
- metadata +279 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'xml/mapping_extensions'
|
2
|
+
require 'stash/sword/namespace'
|
3
|
+
|
4
|
+
module Stash
|
5
|
+
module Sword
|
6
|
+
|
7
|
+
class Link
|
8
|
+
include ::XML::Mapping
|
9
|
+
|
10
|
+
text_node :rel, '@rel'
|
11
|
+
uri_node :href, '@href'
|
12
|
+
mime_type_node :type, '@type', default_value: nil
|
13
|
+
end
|
14
|
+
|
15
|
+
class DepositReceipt
|
16
|
+
include ::XML::MappingExtensions::Namespaced
|
17
|
+
|
18
|
+
root_element_name 'entry'
|
19
|
+
namespace Namespace::ATOM.value
|
20
|
+
|
21
|
+
array_node :links, 'link', class: Link, default_value: []
|
22
|
+
|
23
|
+
def link(rel:)
|
24
|
+
rel = rel.to_s if rel
|
25
|
+
links.find { |l| l.rel == rel }
|
26
|
+
end
|
27
|
+
|
28
|
+
def em_iri
|
29
|
+
em_iri = link(rel: 'edit-media')
|
30
|
+
em_iri.href if em_iri
|
31
|
+
end
|
32
|
+
|
33
|
+
def edit_iri
|
34
|
+
edit_iri = link(rel: 'edit')
|
35
|
+
edit_iri.href if edit_iri
|
36
|
+
end
|
37
|
+
|
38
|
+
def se_iri
|
39
|
+
se_iri = link(rel: URI('http://purl.org/net/sword/terms/add'))
|
40
|
+
se_iri.href if em_iri
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Stash
|
2
|
+
module Sword
|
3
|
+
module HeaderUtils
|
4
|
+
|
5
|
+
SIMPLE_ZIP = 'http://purl.org/net/sword/package/SimpleZip'.freeze
|
6
|
+
APPLICATION_ZIP = 'application/zip'.freeze
|
7
|
+
MULTIPART_RELATED_ATOM_XML = 'multipart/related; type="application/atom+xml"'.freeze
|
8
|
+
|
9
|
+
attr_reader :on_behalf_of
|
10
|
+
|
11
|
+
def create_request_headers(zipfile, slug)
|
12
|
+
{
|
13
|
+
'Content-Type' => APPLICATION_ZIP,
|
14
|
+
'Content-Disposition' => "attachment; filename=#{File.basename(zipfile)}",
|
15
|
+
'Packaging' => SIMPLE_ZIP,
|
16
|
+
'Content-MD5' => Digest::MD5.file(zipfile).to_s,
|
17
|
+
'On-Behalf-Of' => on_behalf_of,
|
18
|
+
'Slug' => slug
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def update_request_headers(stream, boundary)
|
23
|
+
{
|
24
|
+
'Content-Length' => stream.size.to_s,
|
25
|
+
'Content-Type' => "#{MULTIPART_RELATED_ATOM_XML}; boundary=\"#{boundary}\"",
|
26
|
+
'On-Behalf-Of' => on_behalf_of,
|
27
|
+
'MIME-Version' => '1.0'
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def update_mime_headers(zipfile)
|
32
|
+
{
|
33
|
+
'Content-Type' => APPLICATION_ZIP,
|
34
|
+
'Content-Disposition' => "attachment; name=payload; filename=\"#{File.basename(zipfile)}\"",
|
35
|
+
'Packaging' => SIMPLE_ZIP,
|
36
|
+
'Content-MD5' => Digest::MD5.file(zipfile).to_s,
|
37
|
+
'MIME-Version' => '1.0'
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'uri'
|
4
|
+
require 'stash/sword/log_utils'
|
5
|
+
|
6
|
+
module Stash
|
7
|
+
module Sword
|
8
|
+
# Utility class simplifying GET requests for HTTP/HTTPS resources.
|
9
|
+
class HTTPHelper
|
10
|
+
|
11
|
+
include LogUtils
|
12
|
+
|
13
|
+
# The default number of redirects to follow before erroring out.
|
14
|
+
DEFAULT_MAX_REDIRECTS = 5
|
15
|
+
|
16
|
+
# @return [String] the User-Agent string to send when making requests
|
17
|
+
attr_accessor :user_agent
|
18
|
+
|
19
|
+
# @return [Integer] the number of redirects to follow before erroring out
|
20
|
+
attr_accessor :redirect_limit
|
21
|
+
|
22
|
+
# @return [String] the HTTP Basic Authentication username
|
23
|
+
attr_reader :username
|
24
|
+
|
25
|
+
# @return [String] the HTTP Basic Authentication password
|
26
|
+
attr_reader :password
|
27
|
+
|
28
|
+
# Creates a new `HTTPHelper`
|
29
|
+
#
|
30
|
+
# @param user_agent [String] the User-Agent string to send when making requests
|
31
|
+
# @param redirect_limit [Integer] the number of redirects to follow before erroring out
|
32
|
+
# (defaults to {DEFAULT_MAX_REDIRECTS})
|
33
|
+
def initialize(user_agent:, username: nil, password: nil, redirect_limit: DEFAULT_MAX_REDIRECTS)
|
34
|
+
@user_agent = user_agent
|
35
|
+
@redirect_limit = redirect_limit
|
36
|
+
@username = username
|
37
|
+
@password = password
|
38
|
+
end
|
39
|
+
|
40
|
+
# Gets the content of the specified URI as a string.
|
41
|
+
# @param uri [URI] the URI to download
|
42
|
+
# @param limit [Integer] the number of redirects to follow (defaults to {#redirect_limit})
|
43
|
+
# @return [String] the content of the URI
|
44
|
+
def get(uri:, limit: redirect_limit)
|
45
|
+
do_get(uri, limit) do |success|
|
46
|
+
# not 100% clear why we need an explicit return here; it
|
47
|
+
# doesn't show up in unit tests but it does in example.rb
|
48
|
+
return success.body
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Posts the specified payload string to the specified URI.
|
53
|
+
def post(uri:, payload:, headers: {}, limit: redirect_limit)
|
54
|
+
do_post_or_put(method: :post, uri: uri, payload: payload, headers: headers, limit: limit)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Puts the specified payload string to the specified URI.
|
58
|
+
def put(uri:, payload:, headers: {}, limit: redirect_limit)
|
59
|
+
do_post_or_put(method: :put, uri: uri, payload: payload, headers: headers, limit: limit)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def default_headers
|
65
|
+
{
|
66
|
+
'User-Agent' => user_agent,
|
67
|
+
'Content-Transfer-Encoding' => 'binary'
|
68
|
+
}.freeze
|
69
|
+
end
|
70
|
+
|
71
|
+
def do_post_or_put(method:, uri:, payload:, headers:, limit:)
|
72
|
+
options = request_options(headers, limit, method, payload, uri)
|
73
|
+
log_hash(options)
|
74
|
+
RestClient::Request.execute(**options)
|
75
|
+
end
|
76
|
+
|
77
|
+
def request_options(headers, limit, method, payload, uri)
|
78
|
+
options = {
|
79
|
+
method: method,
|
80
|
+
url: uri.to_s,
|
81
|
+
payload: payload,
|
82
|
+
headers: headers.merge(default_headers),
|
83
|
+
max_redirects: limit
|
84
|
+
}
|
85
|
+
options[:user] = username if username
|
86
|
+
options[:password] = password if password
|
87
|
+
options
|
88
|
+
end
|
89
|
+
|
90
|
+
# TODO: Consider rewriting with RestClient
|
91
|
+
def do_get(uri, limit, &block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
92
|
+
raise "Redirect limit (#{redirect_limit}) exceeded retrieving URI #{uri}" if limit <= 0
|
93
|
+
req = Net::HTTP::Get.new(uri, 'User-Agent' => user_agent)
|
94
|
+
req.basic_auth(username, password) if username && password
|
95
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: (uri.scheme == 'https')) do |http|
|
96
|
+
http.request(req) do |response|
|
97
|
+
case response
|
98
|
+
when Net::HTTPSuccess
|
99
|
+
yield(response)
|
100
|
+
when Net::HTTPInformation, Net::HTTPRedirection
|
101
|
+
do_get(redirect_uri_for(response, uri), limit - 1, &block)
|
102
|
+
else
|
103
|
+
raise "Error #{response.code}: #{response.message} retrieving URI #{uri}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def redirect_uri_for(response, original_uri)
|
110
|
+
if response.is_a?(Net::HTTPInformation)
|
111
|
+
original_uri
|
112
|
+
else
|
113
|
+
location = response['location']
|
114
|
+
new_uri = URI(location)
|
115
|
+
new_uri.relative? ? original_uri + location : new_uri
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'typesafe_enum'
|
3
|
+
|
4
|
+
module Stash
|
5
|
+
module Sword
|
6
|
+
class IRI < TypesafeEnum::Base
|
7
|
+
|
8
|
+
new :BINARY, URI('http://purl.org/net/sword/package/Binary')
|
9
|
+
new :SIMPLE_ZIP, URI('http://purl.org/net/sword/package/SimpleZip')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Stash
|
2
|
+
module Sword
|
3
|
+
module LogUtils
|
4
|
+
def log
|
5
|
+
::Stash::Sword.log
|
6
|
+
end
|
7
|
+
|
8
|
+
def log_error(e)
|
9
|
+
if e.respond_to?(:response)
|
10
|
+
log.error(response_to_log_msg(e.response))
|
11
|
+
else
|
12
|
+
log.error('Unable to log response')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def response_to_log_msg(response)
|
17
|
+
[
|
18
|
+
'-----------------------------------------------------',
|
19
|
+
"code: #{response.code}",
|
20
|
+
'headers:', hash_to_log_msg(response.headers),
|
21
|
+
"body:\n#{response.body}",
|
22
|
+
'-----------------------------------------------------'
|
23
|
+
].join("\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_hash(hash)
|
27
|
+
msg = hash_to_log_msg(hash)
|
28
|
+
log.debug(msg)
|
29
|
+
end
|
30
|
+
|
31
|
+
def hash_to_log_msg(hash)
|
32
|
+
hash.map do |k, v|
|
33
|
+
value = v.is_a?(Hash) ? v.map { |k2, v2| "\n\t#{k2}: #{v2}" }.join : v
|
34
|
+
"#{k}: #{value}"
|
35
|
+
end.join("\n")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Stash
|
2
|
+
module Sword
|
3
|
+
# The name of this gem
|
4
|
+
NAME = 'stash-sword'.freeze
|
5
|
+
|
6
|
+
# The version of this gem
|
7
|
+
VERSION = '0.1.0'.freeze
|
8
|
+
|
9
|
+
# The copyright notice for this gem
|
10
|
+
COPYRIGHT = 'Copyright (c) 2016 The Regents of the University of California'.freeze
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'typesafe_enum'
|
2
|
+
require 'xml/mapping_extensions'
|
3
|
+
|
4
|
+
module Stash
|
5
|
+
module Sword
|
6
|
+
class Namespace < TypesafeEnum::Base
|
7
|
+
NS = XML::MappingExtensions::Namespace
|
8
|
+
private_constant(:NS)
|
9
|
+
|
10
|
+
new :SWORD, NS.new(uri: 'http://purl.org/net/sword/')
|
11
|
+
new :SWORD_TERMS, NS.new(uri: 'http://purl.org/net/sword/terms/', prefix: 'sword')
|
12
|
+
new :SWORD_PACKAGE, NS.new(uri: 'http://purl.org/net/sword/package')
|
13
|
+
new :SWORD_ERROR, NS.new(uri: 'http://purl.org/net/sword/error')
|
14
|
+
new :SWORD_STATE, NS.new(uri: 'http://purl.org/net/sword/state')
|
15
|
+
new :ATOM_PUB, NS.new(uri: 'http://www.w3.org/2007/app', prefix: 'app')
|
16
|
+
new :ATOM, NS.new(uri: 'http://www.w3.org/2005/Atom', prefix: 'atom')
|
17
|
+
new :DC_TERMS, NS.new(uri: 'http://purl.org/dc/terms/', prefix: 'dcterms')
|
18
|
+
new :RDF, NS.new(uri: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', prefix: 'rdf')
|
19
|
+
new :OAI_ORE, NS.new(uri: 'http://www.openarchives.org/ore/terms/', prefix: 'ore')
|
20
|
+
|
21
|
+
def uri
|
22
|
+
value.uri
|
23
|
+
end
|
24
|
+
|
25
|
+
def prefix
|
26
|
+
value.prefix
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module Stash
|
4
|
+
module Sword
|
5
|
+
# A read-only `IO`-like that concatenates a sequence of strings or IOs.
|
6
|
+
class SequenceIO
|
7
|
+
|
8
|
+
# Creates a new {SequenceIO} concatenating the specified input sources.
|
9
|
+
# Strings are wrapped internally as `StringIO`.
|
10
|
+
#
|
11
|
+
# @param inputs [Enumerable<String, IO>] an array of strings and/or IOs to
|
12
|
+
# concatenate
|
13
|
+
def initialize(inputs)
|
14
|
+
inputs = [inputs] unless inputs.respond_to?(:[]) && inputs.respond_to?(:map)
|
15
|
+
@inputs = to_ios(inputs)
|
16
|
+
binmode if any_binmode(@inputs)
|
17
|
+
@index = 0
|
18
|
+
@input = @inputs[index] unless inputs.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def size
|
22
|
+
@size ||= inputs.inject(0) do |sum, input|
|
23
|
+
raise "input #{input} does not respond to :size" unless input.respond_to?(:size)
|
24
|
+
sum + input.size
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def read(length = nil, outbuf = nil)
|
29
|
+
# use <= instead of == to get around https://github.com/bbatsov/rubocop/issues/3131
|
30
|
+
return nil if size <= 0
|
31
|
+
outbuf = outbuf ? outbuf.clear : ''
|
32
|
+
length ? read_segment(length, outbuf) : read_fully(outbuf)
|
33
|
+
outbuf
|
34
|
+
end
|
35
|
+
|
36
|
+
def binmode
|
37
|
+
return self if binmode?
|
38
|
+
inputs.each do |input|
|
39
|
+
input.binmode if input.respond_to?(:binmode)
|
40
|
+
end
|
41
|
+
@binmode = true
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
def binmode?
|
46
|
+
@binmode
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
next_input! until input.nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
def closed?
|
54
|
+
input.nil? && index >= inputs.length
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
attr_reader :input
|
60
|
+
attr_reader :index
|
61
|
+
attr_reader :inputs
|
62
|
+
|
63
|
+
def read_fully(buffer)
|
64
|
+
until input.nil?
|
65
|
+
buffer << input.read(nil)
|
66
|
+
next_input!
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def read_segment(length, buffer)
|
71
|
+
return unless input && length > 0
|
72
|
+
|
73
|
+
remaining = length
|
74
|
+
if (result = input.read(length))
|
75
|
+
buffer << result
|
76
|
+
remaining = length - result.length
|
77
|
+
end
|
78
|
+
return unless remaining > 0
|
79
|
+
|
80
|
+
next_input!
|
81
|
+
read_segment(remaining, buffer)
|
82
|
+
end
|
83
|
+
|
84
|
+
# TODO: Array.pop! or something
|
85
|
+
def next_input!
|
86
|
+
input.close if input && input.respond_to?(:close)
|
87
|
+
@index += 1
|
88
|
+
@input = index < inputs.length ? inputs[index] : nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def to_ios(inputs)
|
92
|
+
inputs.map do |input|
|
93
|
+
input.respond_to?(:read) ? input : StringIO.new(input.to_s)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def any_binmode(ios)
|
98
|
+
ios.each do |io|
|
99
|
+
return true if io.respond_to?(:binmode?) && io.binmode?
|
100
|
+
end
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
Status of submission is tracked - merritt_response {PROCESSING, “User not authorized for Merritt submission”,
|
2
|
+
|
3
|
+
How is dublincore called?
|
4
|
+
|
5
|
+
Review & Submit button —> record.review()
|
6
|
+
creates datacite.xml file and places it in the uploads/<local_id>/ directory
|
7
|
+
|
8
|
+
Submit button —> record.send_archive_to_merritt
|
9
|
+
|
10
|
+
Set up the user_email request headers
|
11
|
+
Create a new thread
|
12
|
+
Record.Generate Merritt Zip()
|
13
|
+
Record.Send Archive to Merritt
|
14
|
+
Create a submission Log
|
15
|
+
|
16
|
+
|
17
|
+
If submission successful, remove files from local storage and add logging information
|
18
|
+
Record.Purge_Files()
|
19
|
+
|
20
|
+
Generate_Merritt_Zip()
|
21
|
+
set file path to uploads_dir - this is a linked directory shared/uploads so it will stay persistent across releases
|
22
|
+
local_id is used to name the uploads directory for a dataset.
|
23
|
+
calls self.review
|
24
|
+
calls self.dublincore
|
25
|
+
calls self.dataone
|
26
|
+
creates the zip file
|
27
|
+
purges all temp files, etc
|
28
|
+
|
29
|
+
Record.Send_Archive_to_merritt
|
30
|
+
gets info to send in curl command from MERRITT_CONFIG and user and camps
|
31
|
+
sends curl command - no notification sent if no email
|
32
|
+
No DOI/EZID is not created. Merritt does this.
|
33
|
+
returns sys_output
|
34
|
+
|
35
|
+
|
36
|
+
SWORD - Slug identifier is the local_id
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
if @user_email.nil?
|
41
|
sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
|
1
42
|
else
|
2
43
|
sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"notification=#{@user_email}\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
|
3
44
|
end
|