stash-sword 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +25 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/.yardopts +3 -0
- data/Gemfile +6 -0
- data/LICENSE.md +22 -0
- data/README.md +4 -0
- data/Rakefile +49 -0
- data/examples/example.rb +26 -0
- data/examples/uploads/example.zip +0 -0
- data/examples/uploads/example/lorem-ipsum.txt +7 -0
- data/examples/uploads/example/mrt-datacite.xml +22 -0
- data/examples/uploads/example/mrt-dc.xml +13 -0
- data/examples/uploads/example/stash-wrapper.xml +56 -0
- data/lib/stash/sword.rb +39 -0
- data/lib/stash/sword/client.rb +132 -0
- data/lib/stash/sword/deposit_receipt.rb +44 -0
- data/lib/stash/sword/header_utils.rb +42 -0
- data/lib/stash/sword/http_helper.rb +120 -0
- data/lib/stash/sword/iri.rb +12 -0
- data/lib/stash/sword/log_utils.rb +39 -0
- data/lib/stash/sword/module_info.rb +12 -0
- data/lib/stash/sword/namespace.rb +30 -0
- data/lib/stash/sword/sequence_io.rb +105 -0
- data/notes/Dash_Submission_To_Merritt.txt +40 -0
- data/notes/service-document.xml +15 -0
- data/spec/.rubocop.yml +10 -0
- data/spec/data/deposit_receipt_merritt.xml +25 -0
- data/spec/data/deposit_receipt_spec.xml +58 -0
- data/spec/rspec_custom_matchers.rb +118 -0
- data/spec/spec_helper.rb +33 -0
- data/spec/unit/stash/sword2/client_spec.rb +110 -0
- data/spec/unit/stash/sword2/deposit_receipt_spec.rb +48 -0
- data/spec/unit/stash/sword2/http_helper_get_spec.rb +131 -0
- data/spec/unit/stash/sword2/http_helper_post_spec.rb +143 -0
- data/spec/unit/stash/sword2/http_helper_put_spec.rb +143 -0
- data/spec/unit/stash/sword2/log_spec.rb +23 -0
- data/spec/unit/stash/sword2/namespaces_spec.rb +31 -0
- data/spec/unit/stash/sword2/sequence_io_spec.rb +153 -0
- data/stash-sword.gemspec +47 -0
- metadata +279 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'xml/mapping_extensions'
|
2
|
+
require 'stash/sword/namespace'
|
3
|
+
|
4
|
+
module Stash
|
5
|
+
module Sword
|
6
|
+
|
7
|
+
class Link
|
8
|
+
include ::XML::Mapping
|
9
|
+
|
10
|
+
text_node :rel, '@rel'
|
11
|
+
uri_node :href, '@href'
|
12
|
+
mime_type_node :type, '@type', default_value: nil
|
13
|
+
end
|
14
|
+
|
15
|
+
class DepositReceipt
|
16
|
+
include ::XML::MappingExtensions::Namespaced
|
17
|
+
|
18
|
+
root_element_name 'entry'
|
19
|
+
namespace Namespace::ATOM.value
|
20
|
+
|
21
|
+
array_node :links, 'link', class: Link, default_value: []
|
22
|
+
|
23
|
+
def link(rel:)
|
24
|
+
rel = rel.to_s if rel
|
25
|
+
links.find { |l| l.rel == rel }
|
26
|
+
end
|
27
|
+
|
28
|
+
def em_iri
|
29
|
+
em_iri = link(rel: 'edit-media')
|
30
|
+
em_iri.href if em_iri
|
31
|
+
end
|
32
|
+
|
33
|
+
def edit_iri
|
34
|
+
edit_iri = link(rel: 'edit')
|
35
|
+
edit_iri.href if edit_iri
|
36
|
+
end
|
37
|
+
|
38
|
+
def se_iri
|
39
|
+
se_iri = link(rel: URI('http://purl.org/net/sword/terms/add'))
|
40
|
+
se_iri.href if em_iri
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Stash
|
2
|
+
module Sword
|
3
|
+
module HeaderUtils
|
4
|
+
|
5
|
+
SIMPLE_ZIP = 'http://purl.org/net/sword/package/SimpleZip'.freeze
|
6
|
+
APPLICATION_ZIP = 'application/zip'.freeze
|
7
|
+
MULTIPART_RELATED_ATOM_XML = 'multipart/related; type="application/atom+xml"'.freeze
|
8
|
+
|
9
|
+
attr_reader :on_behalf_of
|
10
|
+
|
11
|
+
def create_request_headers(zipfile, slug)
|
12
|
+
{
|
13
|
+
'Content-Type' => APPLICATION_ZIP,
|
14
|
+
'Content-Disposition' => "attachment; filename=#{File.basename(zipfile)}",
|
15
|
+
'Packaging' => SIMPLE_ZIP,
|
16
|
+
'Content-MD5' => Digest::MD5.file(zipfile).to_s,
|
17
|
+
'On-Behalf-Of' => on_behalf_of,
|
18
|
+
'Slug' => slug
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def update_request_headers(stream, boundary)
|
23
|
+
{
|
24
|
+
'Content-Length' => stream.size.to_s,
|
25
|
+
'Content-Type' => "#{MULTIPART_RELATED_ATOM_XML}; boundary=\"#{boundary}\"",
|
26
|
+
'On-Behalf-Of' => on_behalf_of,
|
27
|
+
'MIME-Version' => '1.0'
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def update_mime_headers(zipfile)
|
32
|
+
{
|
33
|
+
'Content-Type' => APPLICATION_ZIP,
|
34
|
+
'Content-Disposition' => "attachment; name=payload; filename=\"#{File.basename(zipfile)}\"",
|
35
|
+
'Packaging' => SIMPLE_ZIP,
|
36
|
+
'Content-MD5' => Digest::MD5.file(zipfile).to_s,
|
37
|
+
'MIME-Version' => '1.0'
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'uri'
|
4
|
+
require 'stash/sword/log_utils'
|
5
|
+
|
6
|
+
module Stash
|
7
|
+
module Sword
|
8
|
+
# Utility class simplifying GET requests for HTTP/HTTPS resources.
|
9
|
+
class HTTPHelper
|
10
|
+
|
11
|
+
include LogUtils
|
12
|
+
|
13
|
+
# The default number of redirects to follow before erroring out.
|
14
|
+
DEFAULT_MAX_REDIRECTS = 5
|
15
|
+
|
16
|
+
# @return [String] the User-Agent string to send when making requests
|
17
|
+
attr_accessor :user_agent
|
18
|
+
|
19
|
+
# @return [Integer] the number of redirects to follow before erroring out
|
20
|
+
attr_accessor :redirect_limit
|
21
|
+
|
22
|
+
# @return [String] the HTTP Basic Authentication username
|
23
|
+
attr_reader :username
|
24
|
+
|
25
|
+
# @return [String] the HTTP Basic Authentication password
|
26
|
+
attr_reader :password
|
27
|
+
|
28
|
+
# Creates a new `HTTPHelper`
|
29
|
+
#
|
30
|
+
# @param user_agent [String] the User-Agent string to send when making requests
|
31
|
+
# @param redirect_limit [Integer] the number of redirects to follow before erroring out
|
32
|
+
# (defaults to {DEFAULT_MAX_REDIRECTS})
|
33
|
+
def initialize(user_agent:, username: nil, password: nil, redirect_limit: DEFAULT_MAX_REDIRECTS)
|
34
|
+
@user_agent = user_agent
|
35
|
+
@redirect_limit = redirect_limit
|
36
|
+
@username = username
|
37
|
+
@password = password
|
38
|
+
end
|
39
|
+
|
40
|
+
# Gets the content of the specified URI as a string.
|
41
|
+
# @param uri [URI] the URI to download
|
42
|
+
# @param limit [Integer] the number of redirects to follow (defaults to {#redirect_limit})
|
43
|
+
# @return [String] the content of the URI
|
44
|
+
def get(uri:, limit: redirect_limit)
|
45
|
+
do_get(uri, limit) do |success|
|
46
|
+
# not 100% clear why we need an explicit return here; it
|
47
|
+
# doesn't show up in unit tests but it does in example.rb
|
48
|
+
return success.body
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Posts the specified payload string to the specified URI.
|
53
|
+
def post(uri:, payload:, headers: {}, limit: redirect_limit)
|
54
|
+
do_post_or_put(method: :post, uri: uri, payload: payload, headers: headers, limit: limit)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Puts the specified payload string to the specified URI.
|
58
|
+
def put(uri:, payload:, headers: {}, limit: redirect_limit)
|
59
|
+
do_post_or_put(method: :put, uri: uri, payload: payload, headers: headers, limit: limit)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def default_headers
|
65
|
+
{
|
66
|
+
'User-Agent' => user_agent,
|
67
|
+
'Content-Transfer-Encoding' => 'binary'
|
68
|
+
}.freeze
|
69
|
+
end
|
70
|
+
|
71
|
+
def do_post_or_put(method:, uri:, payload:, headers:, limit:)
|
72
|
+
options = request_options(headers, limit, method, payload, uri)
|
73
|
+
log_hash(options)
|
74
|
+
RestClient::Request.execute(**options)
|
75
|
+
end
|
76
|
+
|
77
|
+
def request_options(headers, limit, method, payload, uri)
|
78
|
+
options = {
|
79
|
+
method: method,
|
80
|
+
url: uri.to_s,
|
81
|
+
payload: payload,
|
82
|
+
headers: headers.merge(default_headers),
|
83
|
+
max_redirects: limit
|
84
|
+
}
|
85
|
+
options[:user] = username if username
|
86
|
+
options[:password] = password if password
|
87
|
+
options
|
88
|
+
end
|
89
|
+
|
90
|
+
# TODO: Consider rewriting with RestClient
|
91
|
+
def do_get(uri, limit, &block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
92
|
+
raise "Redirect limit (#{redirect_limit}) exceeded retrieving URI #{uri}" if limit <= 0
|
93
|
+
req = Net::HTTP::Get.new(uri, 'User-Agent' => user_agent)
|
94
|
+
req.basic_auth(username, password) if username && password
|
95
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: (uri.scheme == 'https')) do |http|
|
96
|
+
http.request(req) do |response|
|
97
|
+
case response
|
98
|
+
when Net::HTTPSuccess
|
99
|
+
yield(response)
|
100
|
+
when Net::HTTPInformation, Net::HTTPRedirection
|
101
|
+
do_get(redirect_uri_for(response, uri), limit - 1, &block)
|
102
|
+
else
|
103
|
+
raise "Error #{response.code}: #{response.message} retrieving URI #{uri}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def redirect_uri_for(response, original_uri)
|
110
|
+
if response.is_a?(Net::HTTPInformation)
|
111
|
+
original_uri
|
112
|
+
else
|
113
|
+
location = response['location']
|
114
|
+
new_uri = URI(location)
|
115
|
+
new_uri.relative? ? original_uri + location : new_uri
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'typesafe_enum'
|
3
|
+
|
4
|
+
module Stash
|
5
|
+
module Sword
|
6
|
+
class IRI < TypesafeEnum::Base
|
7
|
+
|
8
|
+
new :BINARY, URI('http://purl.org/net/sword/package/Binary')
|
9
|
+
new :SIMPLE_ZIP, URI('http://purl.org/net/sword/package/SimpleZip')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Stash
|
2
|
+
module Sword
|
3
|
+
module LogUtils
|
4
|
+
def log
|
5
|
+
::Stash::Sword.log
|
6
|
+
end
|
7
|
+
|
8
|
+
def log_error(e)
|
9
|
+
if e.respond_to?(:response)
|
10
|
+
log.error(response_to_log_msg(e.response))
|
11
|
+
else
|
12
|
+
log.error('Unable to log response')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def response_to_log_msg(response)
|
17
|
+
[
|
18
|
+
'-----------------------------------------------------',
|
19
|
+
"code: #{response.code}",
|
20
|
+
'headers:', hash_to_log_msg(response.headers),
|
21
|
+
"body:\n#{response.body}",
|
22
|
+
'-----------------------------------------------------'
|
23
|
+
].join("\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_hash(hash)
|
27
|
+
msg = hash_to_log_msg(hash)
|
28
|
+
log.debug(msg)
|
29
|
+
end
|
30
|
+
|
31
|
+
def hash_to_log_msg(hash)
|
32
|
+
hash.map do |k, v|
|
33
|
+
value = v.is_a?(Hash) ? v.map { |k2, v2| "\n\t#{k2}: #{v2}" }.join : v
|
34
|
+
"#{k}: #{value}"
|
35
|
+
end.join("\n")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Stash
|
2
|
+
module Sword
|
3
|
+
# The name of this gem
|
4
|
+
NAME = 'stash-sword'.freeze
|
5
|
+
|
6
|
+
# The version of this gem
|
7
|
+
VERSION = '0.1.0'.freeze
|
8
|
+
|
9
|
+
# The copyright notice for this gem
|
10
|
+
COPYRIGHT = 'Copyright (c) 2016 The Regents of the University of California'.freeze
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'typesafe_enum'
|
2
|
+
require 'xml/mapping_extensions'
|
3
|
+
|
4
|
+
module Stash
|
5
|
+
module Sword
|
6
|
+
class Namespace < TypesafeEnum::Base
|
7
|
+
NS = XML::MappingExtensions::Namespace
|
8
|
+
private_constant(:NS)
|
9
|
+
|
10
|
+
new :SWORD, NS.new(uri: 'http://purl.org/net/sword/')
|
11
|
+
new :SWORD_TERMS, NS.new(uri: 'http://purl.org/net/sword/terms/', prefix: 'sword')
|
12
|
+
new :SWORD_PACKAGE, NS.new(uri: 'http://purl.org/net/sword/package')
|
13
|
+
new :SWORD_ERROR, NS.new(uri: 'http://purl.org/net/sword/error')
|
14
|
+
new :SWORD_STATE, NS.new(uri: 'http://purl.org/net/sword/state')
|
15
|
+
new :ATOM_PUB, NS.new(uri: 'http://www.w3.org/2007/app', prefix: 'app')
|
16
|
+
new :ATOM, NS.new(uri: 'http://www.w3.org/2005/Atom', prefix: 'atom')
|
17
|
+
new :DC_TERMS, NS.new(uri: 'http://purl.org/dc/terms/', prefix: 'dcterms')
|
18
|
+
new :RDF, NS.new(uri: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', prefix: 'rdf')
|
19
|
+
new :OAI_ORE, NS.new(uri: 'http://www.openarchives.org/ore/terms/', prefix: 'ore')
|
20
|
+
|
21
|
+
def uri
|
22
|
+
value.uri
|
23
|
+
end
|
24
|
+
|
25
|
+
def prefix
|
26
|
+
value.prefix
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module Stash
|
4
|
+
module Sword
|
5
|
+
# A read-only `IO`-like that concatenates a sequence of strings or IOs.
|
6
|
+
class SequenceIO
|
7
|
+
|
8
|
+
# Creates a new {SequenceIO} concatenating the specified input sources.
|
9
|
+
# Strings are wrapped internally as `StringIO`.
|
10
|
+
#
|
11
|
+
# @param inputs [Enumerable<String, IO>] an array of strings and/or IOs to
|
12
|
+
# concatenate
|
13
|
+
def initialize(inputs)
|
14
|
+
inputs = [inputs] unless inputs.respond_to?(:[]) && inputs.respond_to?(:map)
|
15
|
+
@inputs = to_ios(inputs)
|
16
|
+
binmode if any_binmode(@inputs)
|
17
|
+
@index = 0
|
18
|
+
@input = @inputs[index] unless inputs.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def size
|
22
|
+
@size ||= inputs.inject(0) do |sum, input|
|
23
|
+
raise "input #{input} does not respond to :size" unless input.respond_to?(:size)
|
24
|
+
sum + input.size
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def read(length = nil, outbuf = nil)
|
29
|
+
# use <= instead of == to get around https://github.com/bbatsov/rubocop/issues/3131
|
30
|
+
return nil if size <= 0
|
31
|
+
outbuf = outbuf ? outbuf.clear : ''
|
32
|
+
length ? read_segment(length, outbuf) : read_fully(outbuf)
|
33
|
+
outbuf
|
34
|
+
end
|
35
|
+
|
36
|
+
def binmode
|
37
|
+
return self if binmode?
|
38
|
+
inputs.each do |input|
|
39
|
+
input.binmode if input.respond_to?(:binmode)
|
40
|
+
end
|
41
|
+
@binmode = true
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
def binmode?
|
46
|
+
@binmode
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
next_input! until input.nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
def closed?
|
54
|
+
input.nil? && index >= inputs.length
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
attr_reader :input
|
60
|
+
attr_reader :index
|
61
|
+
attr_reader :inputs
|
62
|
+
|
63
|
+
def read_fully(buffer)
|
64
|
+
until input.nil?
|
65
|
+
buffer << input.read(nil)
|
66
|
+
next_input!
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def read_segment(length, buffer)
|
71
|
+
return unless input && length > 0
|
72
|
+
|
73
|
+
remaining = length
|
74
|
+
if (result = input.read(length))
|
75
|
+
buffer << result
|
76
|
+
remaining = length - result.length
|
77
|
+
end
|
78
|
+
return unless remaining > 0
|
79
|
+
|
80
|
+
next_input!
|
81
|
+
read_segment(remaining, buffer)
|
82
|
+
end
|
83
|
+
|
84
|
+
# TODO: Array.pop! or something
|
85
|
+
def next_input!
|
86
|
+
input.close if input && input.respond_to?(:close)
|
87
|
+
@index += 1
|
88
|
+
@input = index < inputs.length ? inputs[index] : nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def to_ios(inputs)
|
92
|
+
inputs.map do |input|
|
93
|
+
input.respond_to?(:read) ? input : StringIO.new(input.to_s)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def any_binmode(ios)
|
98
|
+
ios.each do |io|
|
99
|
+
return true if io.respond_to?(:binmode?) && io.binmode?
|
100
|
+
end
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
Status of submission is tracked - merritt_response {PROCESSING, “User not authorized for Merritt submission”,
|
2
|
+
|
3
|
+
How is dublincore called?
|
4
|
+
|
5
|
+
Review & Submit button —> record.review()
|
6
|
+
creates datacite.xml file and places it in the uploads/<local_id>/ directory
|
7
|
+
|
8
|
+
Submit button —> record.send_archive_to_merritt
|
9
|
+
|
10
|
+
Set up the user_email request headers
|
11
|
+
Create a new thread
|
12
|
+
Record.Generate Merritt Zip()
|
13
|
+
Record.Send Archive to Merritt
|
14
|
+
Create a submission Log
|
15
|
+
|
16
|
+
|
17
|
+
If submission successful, remove files from local storage and add logging information
|
18
|
+
Record.Purge_Files()
|
19
|
+
|
20
|
+
Generate_Merritt_Zip()
|
21
|
+
set file path to uploads_dir - this is a linked directory shared/uploads so it will stay persistent across releases
|
22
|
+
local_id is used to name the uploads directory for a dataset.
|
23
|
+
calls self.review
|
24
|
+
calls self.dublincore
|
25
|
+
calls self.dataone
|
26
|
+
creates the zip file
|
27
|
+
purges all temp files, etc
|
28
|
+
|
29
|
+
Record.Send_Archive_to_merritt
|
30
|
+
gets info to send in curl command from MERRITT_CONFIG and user and camps
|
31
|
+
sends curl command - no notification sent if no email
|
32
|
+
No DOI/EZID is not created. Merritt does this.
|
33
|
+
returns sys_output
|
34
|
+
|
35
|
+
|
36
|
+
SWORD - Slug identifier is the local_id
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
if @user_email.nil?
|
41
|
sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
|
1
42
|
else
|
2
43
|
sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"notification=#{@user_email}\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
|
3
44
|
end
|