stash-sword 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +25 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/.yardopts +3 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.md +22 -0
  9. data/README.md +4 -0
  10. data/Rakefile +49 -0
  11. data/examples/example.rb +26 -0
  12. data/examples/uploads/example.zip +0 -0
  13. data/examples/uploads/example/lorem-ipsum.txt +7 -0
  14. data/examples/uploads/example/mrt-datacite.xml +22 -0
  15. data/examples/uploads/example/mrt-dc.xml +13 -0
  16. data/examples/uploads/example/stash-wrapper.xml +56 -0
  17. data/lib/stash/sword.rb +39 -0
  18. data/lib/stash/sword/client.rb +132 -0
  19. data/lib/stash/sword/deposit_receipt.rb +44 -0
  20. data/lib/stash/sword/header_utils.rb +42 -0
  21. data/lib/stash/sword/http_helper.rb +120 -0
  22. data/lib/stash/sword/iri.rb +12 -0
  23. data/lib/stash/sword/log_utils.rb +39 -0
  24. data/lib/stash/sword/module_info.rb +12 -0
  25. data/lib/stash/sword/namespace.rb +30 -0
  26. data/lib/stash/sword/sequence_io.rb +105 -0
  27. data/notes/Dash_Submission_To_Merritt.txt +40 -0
  28. data/notes/service-document.xml +15 -0
  29. data/spec/.rubocop.yml +10 -0
  30. data/spec/data/deposit_receipt_merritt.xml +25 -0
  31. data/spec/data/deposit_receipt_spec.xml +58 -0
  32. data/spec/rspec_custom_matchers.rb +118 -0
  33. data/spec/spec_helper.rb +33 -0
  34. data/spec/unit/stash/sword2/client_spec.rb +110 -0
  35. data/spec/unit/stash/sword2/deposit_receipt_spec.rb +48 -0
  36. data/spec/unit/stash/sword2/http_helper_get_spec.rb +131 -0
  37. data/spec/unit/stash/sword2/http_helper_post_spec.rb +143 -0
  38. data/spec/unit/stash/sword2/http_helper_put_spec.rb +143 -0
  39. data/spec/unit/stash/sword2/log_spec.rb +23 -0
  40. data/spec/unit/stash/sword2/namespaces_spec.rb +31 -0
  41. data/spec/unit/stash/sword2/sequence_io_spec.rb +153 -0
  42. data/stash-sword.gemspec +47 -0
  43. metadata +279 -0
@@ -0,0 +1,44 @@
1
+ require 'xml/mapping_extensions'
2
+ require 'stash/sword/namespace'
3
+
4
+ module Stash
5
+ module Sword
6
+
7
+ class Link
8
+ include ::XML::Mapping
9
+
10
+ text_node :rel, '@rel'
11
+ uri_node :href, '@href'
12
+ mime_type_node :type, '@type', default_value: nil
13
+ end
14
+
15
+ class DepositReceipt
16
+ include ::XML::MappingExtensions::Namespaced
17
+
18
+ root_element_name 'entry'
19
+ namespace Namespace::ATOM.value
20
+
21
+ array_node :links, 'link', class: Link, default_value: []
22
+
23
+ def link(rel:)
24
+ rel = rel.to_s if rel
25
+ links.find { |l| l.rel == rel }
26
+ end
27
+
28
+ def em_iri
29
+ em_iri = link(rel: 'edit-media')
30
+ em_iri.href if em_iri
31
+ end
32
+
33
+ def edit_iri
34
+ edit_iri = link(rel: 'edit')
35
+ edit_iri.href if edit_iri
36
+ end
37
+
38
+ def se_iri
39
+ se_iri = link(rel: URI('http://purl.org/net/sword/terms/add'))
40
+ se_iri.href if em_iri
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,42 @@
1
+ module Stash
2
+ module Sword
3
+ module HeaderUtils
4
+
5
+ SIMPLE_ZIP = 'http://purl.org/net/sword/package/SimpleZip'.freeze
6
+ APPLICATION_ZIP = 'application/zip'.freeze
7
+ MULTIPART_RELATED_ATOM_XML = 'multipart/related; type="application/atom+xml"'.freeze
8
+
9
+ attr_reader :on_behalf_of
10
+
11
+ def create_request_headers(zipfile, slug)
12
+ {
13
+ 'Content-Type' => APPLICATION_ZIP,
14
+ 'Content-Disposition' => "attachment; filename=#{File.basename(zipfile)}",
15
+ 'Packaging' => SIMPLE_ZIP,
16
+ 'Content-MD5' => Digest::MD5.file(zipfile).to_s,
17
+ 'On-Behalf-Of' => on_behalf_of,
18
+ 'Slug' => slug
19
+ }
20
+ end
21
+
22
+ def update_request_headers(stream, boundary)
23
+ {
24
+ 'Content-Length' => stream.size.to_s,
25
+ 'Content-Type' => "#{MULTIPART_RELATED_ATOM_XML}; boundary=\"#{boundary}\"",
26
+ 'On-Behalf-Of' => on_behalf_of,
27
+ 'MIME-Version' => '1.0'
28
+ }
29
+ end
30
+
31
+ def update_mime_headers(zipfile)
32
+ {
33
+ 'Content-Type' => APPLICATION_ZIP,
34
+ 'Content-Disposition' => "attachment; name=payload; filename=\"#{File.basename(zipfile)}\"",
35
+ 'Packaging' => SIMPLE_ZIP,
36
+ 'Content-MD5' => Digest::MD5.file(zipfile).to_s,
37
+ 'MIME-Version' => '1.0'
38
+ }
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,120 @@
1
+ require 'net/http'
2
+ require 'rest-client'
3
+ require 'uri'
4
+ require 'stash/sword/log_utils'
5
+
6
+ module Stash
7
+ module Sword
8
+ # Utility class simplifying GET requests for HTTP/HTTPS resources.
9
+ class HTTPHelper
10
+
11
+ include LogUtils
12
+
13
+ # The default number of redirects to follow before erroring out.
14
+ DEFAULT_MAX_REDIRECTS = 5
15
+
16
+ # @return [String] the User-Agent string to send when making requests
17
+ attr_accessor :user_agent
18
+
19
+ # @return [Integer] the number of redirects to follow before erroring out
20
+ attr_accessor :redirect_limit
21
+
22
+ # @return [String] the HTTP Basic Authentication username
23
+ attr_reader :username
24
+
25
+ # @return [String] the HTTP Basic Authentication password
26
+ attr_reader :password
27
+
28
+ # Creates a new `HTTPHelper`
29
+ #
30
+ # @param user_agent [String] the User-Agent string to send when making requests
31
+ # @param redirect_limit [Integer] the number of redirects to follow before erroring out
32
+ # (defaults to {DEFAULT_MAX_REDIRECTS})
33
+ def initialize(user_agent:, username: nil, password: nil, redirect_limit: DEFAULT_MAX_REDIRECTS)
34
+ @user_agent = user_agent
35
+ @redirect_limit = redirect_limit
36
+ @username = username
37
+ @password = password
38
+ end
39
+
40
+ # Gets the content of the specified URI as a string.
41
+ # @param uri [URI] the URI to download
42
+ # @param limit [Integer] the number of redirects to follow (defaults to {#redirect_limit})
43
+ # @return [String] the content of the URI
44
+ def get(uri:, limit: redirect_limit)
45
+ do_get(uri, limit) do |success|
46
+ # not 100% clear why we need an explicit return here; it
47
+ # doesn't show up in unit tests but it does in example.rb
48
+ return success.body
49
+ end
50
+ end
51
+
52
+ # Posts the specified payload string to the specified URI.
53
+ def post(uri:, payload:, headers: {}, limit: redirect_limit)
54
+ do_post_or_put(method: :post, uri: uri, payload: payload, headers: headers, limit: limit)
55
+ end
56
+
57
+ # Puts the specified payload string to the specified URI.
58
+ def put(uri:, payload:, headers: {}, limit: redirect_limit)
59
+ do_post_or_put(method: :put, uri: uri, payload: payload, headers: headers, limit: limit)
60
+ end
61
+
62
+ private
63
+
64
+ def default_headers
65
+ {
66
+ 'User-Agent' => user_agent,
67
+ 'Content-Transfer-Encoding' => 'binary'
68
+ }.freeze
69
+ end
70
+
71
+ def do_post_or_put(method:, uri:, payload:, headers:, limit:)
72
+ options = request_options(headers, limit, method, payload, uri)
73
+ log_hash(options)
74
+ RestClient::Request.execute(**options)
75
+ end
76
+
77
+ def request_options(headers, limit, method, payload, uri)
78
+ options = {
79
+ method: method,
80
+ url: uri.to_s,
81
+ payload: payload,
82
+ headers: headers.merge(default_headers),
83
+ max_redirects: limit
84
+ }
85
+ options[:user] = username if username
86
+ options[:password] = password if password
87
+ options
88
+ end
89
+
90
+ # TODO: Consider rewriting with RestClient
91
+ def do_get(uri, limit, &block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
92
+ raise "Redirect limit (#{redirect_limit}) exceeded retrieving URI #{uri}" if limit <= 0
93
+ req = Net::HTTP::Get.new(uri, 'User-Agent' => user_agent)
94
+ req.basic_auth(username, password) if username && password
95
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: (uri.scheme == 'https')) do |http|
96
+ http.request(req) do |response|
97
+ case response
98
+ when Net::HTTPSuccess
99
+ yield(response)
100
+ when Net::HTTPInformation, Net::HTTPRedirection
101
+ do_get(redirect_uri_for(response, uri), limit - 1, &block)
102
+ else
103
+ raise "Error #{response.code}: #{response.message} retrieving URI #{uri}"
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def redirect_uri_for(response, original_uri)
110
+ if response.is_a?(Net::HTTPInformation)
111
+ original_uri
112
+ else
113
+ location = response['location']
114
+ new_uri = URI(location)
115
+ new_uri.relative? ? original_uri + location : new_uri
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,12 @@
1
+ require 'uri'
2
+ require 'typesafe_enum'
3
+
4
+ module Stash
5
+ module Sword
6
+ class IRI < TypesafeEnum::Base
7
+
8
+ new :BINARY, URI('http://purl.org/net/sword/package/Binary')
9
+ new :SIMPLE_ZIP, URI('http://purl.org/net/sword/package/SimpleZip')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,39 @@
1
+ module Stash
2
+ module Sword
3
+ module LogUtils
4
+ def log
5
+ ::Stash::Sword.log
6
+ end
7
+
8
+ def log_error(e)
9
+ if e.respond_to?(:response)
10
+ log.error(response_to_log_msg(e.response))
11
+ else
12
+ log.error('Unable to log response')
13
+ end
14
+ end
15
+
16
+ def response_to_log_msg(response)
17
+ [
18
+ '-----------------------------------------------------',
19
+ "code: #{response.code}",
20
+ 'headers:', hash_to_log_msg(response.headers),
21
+ "body:\n#{response.body}",
22
+ '-----------------------------------------------------'
23
+ ].join("\n")
24
+ end
25
+
26
+ def log_hash(hash)
27
+ msg = hash_to_log_msg(hash)
28
+ log.debug(msg)
29
+ end
30
+
31
+ def hash_to_log_msg(hash)
32
+ hash.map do |k, v|
33
+ value = v.is_a?(Hash) ? v.map { |k2, v2| "\n\t#{k2}: #{v2}" }.join : v
34
+ "#{k}: #{value}"
35
+ end.join("\n")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,12 @@
1
+ module Stash
2
+ module Sword
3
+ # The name of this gem
4
+ NAME = 'stash-sword'.freeze
5
+
6
+ # The version of this gem
7
+ VERSION = '0.1.0'.freeze
8
+
9
+ # The copyright notice for this gem
10
+ COPYRIGHT = 'Copyright (c) 2016 The Regents of the University of California'.freeze
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ require 'typesafe_enum'
2
+ require 'xml/mapping_extensions'
3
+
4
+ module Stash
5
+ module Sword
6
+ class Namespace < TypesafeEnum::Base
7
+ NS = XML::MappingExtensions::Namespace
8
+ private_constant(:NS)
9
+
10
+ new :SWORD, NS.new(uri: 'http://purl.org/net/sword/')
11
+ new :SWORD_TERMS, NS.new(uri: 'http://purl.org/net/sword/terms/', prefix: 'sword')
12
+ new :SWORD_PACKAGE, NS.new(uri: 'http://purl.org/net/sword/package')
13
+ new :SWORD_ERROR, NS.new(uri: 'http://purl.org/net/sword/error')
14
+ new :SWORD_STATE, NS.new(uri: 'http://purl.org/net/sword/state')
15
+ new :ATOM_PUB, NS.new(uri: 'http://www.w3.org/2007/app', prefix: 'app')
16
+ new :ATOM, NS.new(uri: 'http://www.w3.org/2005/Atom', prefix: 'atom')
17
+ new :DC_TERMS, NS.new(uri: 'http://purl.org/dc/terms/', prefix: 'dcterms')
18
+ new :RDF, NS.new(uri: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', prefix: 'rdf')
19
+ new :OAI_ORE, NS.new(uri: 'http://www.openarchives.org/ore/terms/', prefix: 'ore')
20
+
21
+ def uri
22
+ value.uri
23
+ end
24
+
25
+ def prefix
26
+ value.prefix
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,105 @@
1
+ require 'stringio'
2
+
3
+ module Stash
4
+ module Sword
5
+ # A read-only `IO`-like that concatenates a sequence of strings or IOs.
6
+ class SequenceIO
7
+
8
+ # Creates a new {SequenceIO} concatenating the specified input sources.
9
+ # Strings are wrapped internally as `StringIO`.
10
+ #
11
+ # @param inputs [Enumerable<String, IO>] an array of strings and/or IOs to
12
+ # concatenate
13
+ def initialize(inputs)
14
+ inputs = [inputs] unless inputs.respond_to?(:[]) && inputs.respond_to?(:map)
15
+ @inputs = to_ios(inputs)
16
+ binmode if any_binmode(@inputs)
17
+ @index = 0
18
+ @input = @inputs[index] unless inputs.empty?
19
+ end
20
+
21
+ def size
22
+ @size ||= inputs.inject(0) do |sum, input|
23
+ raise "input #{input} does not respond to :size" unless input.respond_to?(:size)
24
+ sum + input.size
25
+ end
26
+ end
27
+
28
+ def read(length = nil, outbuf = nil)
29
+ # use <= instead of == to get around https://github.com/bbatsov/rubocop/issues/3131
30
+ return nil if size <= 0
31
+ outbuf = outbuf ? outbuf.clear : ''
32
+ length ? read_segment(length, outbuf) : read_fully(outbuf)
33
+ outbuf
34
+ end
35
+
36
+ def binmode
37
+ return self if binmode?
38
+ inputs.each do |input|
39
+ input.binmode if input.respond_to?(:binmode)
40
+ end
41
+ @binmode = true
42
+ self
43
+ end
44
+
45
+ def binmode?
46
+ @binmode
47
+ end
48
+
49
+ def close
50
+ next_input! until input.nil?
51
+ end
52
+
53
+ def closed?
54
+ input.nil? && index >= inputs.length
55
+ end
56
+
57
+ private
58
+
59
+ attr_reader :input
60
+ attr_reader :index
61
+ attr_reader :inputs
62
+
63
+ def read_fully(buffer)
64
+ until input.nil?
65
+ buffer << input.read(nil)
66
+ next_input!
67
+ end
68
+ end
69
+
70
+ def read_segment(length, buffer)
71
+ return unless input && length > 0
72
+
73
+ remaining = length
74
+ if (result = input.read(length))
75
+ buffer << result
76
+ remaining = length - result.length
77
+ end
78
+ return unless remaining > 0
79
+
80
+ next_input!
81
+ read_segment(remaining, buffer)
82
+ end
83
+
84
+ # TODO: Array.pop! or something
85
+ def next_input!
86
+ input.close if input && input.respond_to?(:close)
87
+ @index += 1
88
+ @input = index < inputs.length ? inputs[index] : nil
89
+ end
90
+
91
+ def to_ios(inputs)
92
+ inputs.map do |input|
93
+ input.respond_to?(:read) ? input : StringIO.new(input.to_s)
94
+ end
95
+ end
96
+
97
+ def any_binmode(ios)
98
+ ios.each do |io|
99
+ return true if io.respond_to?(:binmode?) && io.binmode?
100
+ end
101
+ false
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,40 @@
1
+ Status of submission is tracked - merritt_response {PROCESSING, “User not authorized for Merritt submission”,
2
+
3
+ How is dublincore called?
4
+
5
+ Review & Submit button —> record.review()
6
+ creates datacite.xml file and places it in the uploads/<local_id>/ directory
7
+
8
+ Submit button —> record.send_archive_to_merritt
9
+
10
+ Set up the user_email request headers
11
+ Create a new thread
12
+ Record.Generate Merritt Zip()
13
+ Record.Send Archive to Merritt
14
+ Create a submission Log
15
+
16
+
17
+ If submission successful, remove files from local storage and add logging information
18
+ Record.Purge_Files()
19
+
20
+ Generate_Merritt_Zip()
21
+ set file path to uploads_dir - this is a linked directory shared/uploads so it will stay persistent across releases
22
+ local_id is used to name the uploads directory for a dataset.
23
+ calls self.review
24
+ calls self.dublincore
25
+ calls self.dataone
26
+ creates the zip file
27
+ purges all temp files, etc
28
+
29
+ Record.Send_Archive_to_merritt
30
+ gets info to send in curl command from MERRITT_CONFIG and user and camps
31
+ sends curl command - no notification sent if no email
32
+ No DOI/EZID is not created. Merritt does this.
33
+ returns sys_output
34
+
35
+
36
+ SWORD - Slug identifier is the local_id
37
+
38
+
39
+
40
+ if @user_email.nil?
41
  sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
1
42
  else
2
43
  sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"notification=#{@user_email}\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
3
44
  end