stash-sword 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +25 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/.yardopts +3 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.md +22 -0
  9. data/README.md +4 -0
  10. data/Rakefile +49 -0
  11. data/examples/example.rb +26 -0
  12. data/examples/uploads/example.zip +0 -0
  13. data/examples/uploads/example/lorem-ipsum.txt +7 -0
  14. data/examples/uploads/example/mrt-datacite.xml +22 -0
  15. data/examples/uploads/example/mrt-dc.xml +13 -0
  16. data/examples/uploads/example/stash-wrapper.xml +56 -0
  17. data/lib/stash/sword.rb +39 -0
  18. data/lib/stash/sword/client.rb +132 -0
  19. data/lib/stash/sword/deposit_receipt.rb +44 -0
  20. data/lib/stash/sword/header_utils.rb +42 -0
  21. data/lib/stash/sword/http_helper.rb +120 -0
  22. data/lib/stash/sword/iri.rb +12 -0
  23. data/lib/stash/sword/log_utils.rb +39 -0
  24. data/lib/stash/sword/module_info.rb +12 -0
  25. data/lib/stash/sword/namespace.rb +30 -0
  26. data/lib/stash/sword/sequence_io.rb +105 -0
  27. data/notes/Dash_Submission_To_Merritt.txt +40 -0
  28. data/notes/service-document.xml +15 -0
  29. data/spec/.rubocop.yml +10 -0
  30. data/spec/data/deposit_receipt_merritt.xml +25 -0
  31. data/spec/data/deposit_receipt_spec.xml +58 -0
  32. data/spec/rspec_custom_matchers.rb +118 -0
  33. data/spec/spec_helper.rb +33 -0
  34. data/spec/unit/stash/sword2/client_spec.rb +110 -0
  35. data/spec/unit/stash/sword2/deposit_receipt_spec.rb +48 -0
  36. data/spec/unit/stash/sword2/http_helper_get_spec.rb +131 -0
  37. data/spec/unit/stash/sword2/http_helper_post_spec.rb +143 -0
  38. data/spec/unit/stash/sword2/http_helper_put_spec.rb +143 -0
  39. data/spec/unit/stash/sword2/log_spec.rb +23 -0
  40. data/spec/unit/stash/sword2/namespaces_spec.rb +31 -0
  41. data/spec/unit/stash/sword2/sequence_io_spec.rb +153 -0
  42. data/stash-sword.gemspec +47 -0
  43. metadata +279 -0
@@ -0,0 +1,44 @@
1
+ require 'xml/mapping_extensions'
2
+ require 'stash/sword/namespace'
3
+
4
+ module Stash
5
+ module Sword
6
+
7
+ class Link
8
+ include ::XML::Mapping
9
+
10
+ text_node :rel, '@rel'
11
+ uri_node :href, '@href'
12
+ mime_type_node :type, '@type', default_value: nil
13
+ end
14
+
15
+ class DepositReceipt
16
+ include ::XML::MappingExtensions::Namespaced
17
+
18
+ root_element_name 'entry'
19
+ namespace Namespace::ATOM.value
20
+
21
+ array_node :links, 'link', class: Link, default_value: []
22
+
23
+ def link(rel:)
24
+ rel = rel.to_s if rel
25
+ links.find { |l| l.rel == rel }
26
+ end
27
+
28
+ def em_iri
29
+ em_iri = link(rel: 'edit-media')
30
+ em_iri.href if em_iri
31
+ end
32
+
33
+ def edit_iri
34
+ edit_iri = link(rel: 'edit')
35
+ edit_iri.href if edit_iri
36
+ end
37
+
38
+ def se_iri
39
+ se_iri = link(rel: URI('http://purl.org/net/sword/terms/add'))
40
+ se_iri.href if em_iri
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,42 @@
1
+ module Stash
2
+ module Sword
3
+ module HeaderUtils
4
+
5
+ SIMPLE_ZIP = 'http://purl.org/net/sword/package/SimpleZip'.freeze
6
+ APPLICATION_ZIP = 'application/zip'.freeze
7
+ MULTIPART_RELATED_ATOM_XML = 'multipart/related; type="application/atom+xml"'.freeze
8
+
9
+ attr_reader :on_behalf_of
10
+
11
+ def create_request_headers(zipfile, slug)
12
+ {
13
+ 'Content-Type' => APPLICATION_ZIP,
14
+ 'Content-Disposition' => "attachment; filename=#{File.basename(zipfile)}",
15
+ 'Packaging' => SIMPLE_ZIP,
16
+ 'Content-MD5' => Digest::MD5.file(zipfile).to_s,
17
+ 'On-Behalf-Of' => on_behalf_of,
18
+ 'Slug' => slug
19
+ }
20
+ end
21
+
22
+ def update_request_headers(stream, boundary)
23
+ {
24
+ 'Content-Length' => stream.size.to_s,
25
+ 'Content-Type' => "#{MULTIPART_RELATED_ATOM_XML}; boundary=\"#{boundary}\"",
26
+ 'On-Behalf-Of' => on_behalf_of,
27
+ 'MIME-Version' => '1.0'
28
+ }
29
+ end
30
+
31
+ def update_mime_headers(zipfile)
32
+ {
33
+ 'Content-Type' => APPLICATION_ZIP,
34
+ 'Content-Disposition' => "attachment; name=payload; filename=\"#{File.basename(zipfile)}\"",
35
+ 'Packaging' => SIMPLE_ZIP,
36
+ 'Content-MD5' => Digest::MD5.file(zipfile).to_s,
37
+ 'MIME-Version' => '1.0'
38
+ }
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,120 @@
1
+ require 'net/http'
2
+ require 'rest-client'
3
+ require 'uri'
4
+ require 'stash/sword/log_utils'
5
+
6
+ module Stash
7
+ module Sword
8
+ # Utility class simplifying GET requests for HTTP/HTTPS resources.
9
+ class HTTPHelper
10
+
11
+ include LogUtils
12
+
13
+ # The default number of redirects to follow before erroring out.
14
+ DEFAULT_MAX_REDIRECTS = 5
15
+
16
+ # @return [String] the User-Agent string to send when making requests
17
+ attr_accessor :user_agent
18
+
19
+ # @return [Integer] the number of redirects to follow before erroring out
20
+ attr_accessor :redirect_limit
21
+
22
+ # @return [String] the HTTP Basic Authentication username
23
+ attr_reader :username
24
+
25
+ # @return [String] the HTTP Basic Authentication password
26
+ attr_reader :password
27
+
28
+ # Creates a new `HTTPHelper`
29
+ #
30
+ # @param user_agent [String] the User-Agent string to send when making requests
31
+ # @param redirect_limit [Integer] the number of redirects to follow before erroring out
32
+ # (defaults to {DEFAULT_MAX_REDIRECTS})
33
+ def initialize(user_agent:, username: nil, password: nil, redirect_limit: DEFAULT_MAX_REDIRECTS)
34
+ @user_agent = user_agent
35
+ @redirect_limit = redirect_limit
36
+ @username = username
37
+ @password = password
38
+ end
39
+
40
+ # Gets the content of the specified URI as a string.
41
+ # @param uri [URI] the URI to download
42
+ # @param limit [Integer] the number of redirects to follow (defaults to {#redirect_limit})
43
+ # @return [String] the content of the URI
44
+ def get(uri:, limit: redirect_limit)
45
+ do_get(uri, limit) do |success|
46
+ # not 100% clear why we need an explicit return here; it
47
+ # doesn't show up in unit tests but it does in example.rb
48
+ return success.body
49
+ end
50
+ end
51
+
52
+ # Posts the specified payload string to the specified URI.
53
+ def post(uri:, payload:, headers: {}, limit: redirect_limit)
54
+ do_post_or_put(method: :post, uri: uri, payload: payload, headers: headers, limit: limit)
55
+ end
56
+
57
+ # Puts the specified payload string to the specified URI.
58
+ def put(uri:, payload:, headers: {}, limit: redirect_limit)
59
+ do_post_or_put(method: :put, uri: uri, payload: payload, headers: headers, limit: limit)
60
+ end
61
+
62
+ private
63
+
64
+ def default_headers
65
+ {
66
+ 'User-Agent' => user_agent,
67
+ 'Content-Transfer-Encoding' => 'binary'
68
+ }.freeze
69
+ end
70
+
71
+ def do_post_or_put(method:, uri:, payload:, headers:, limit:)
72
+ options = request_options(headers, limit, method, payload, uri)
73
+ log_hash(options)
74
+ RestClient::Request.execute(**options)
75
+ end
76
+
77
+ def request_options(headers, limit, method, payload, uri)
78
+ options = {
79
+ method: method,
80
+ url: uri.to_s,
81
+ payload: payload,
82
+ headers: headers.merge(default_headers),
83
+ max_redirects: limit
84
+ }
85
+ options[:user] = username if username
86
+ options[:password] = password if password
87
+ options
88
+ end
89
+
90
+ # TODO: Consider rewriting with RestClient
91
+ def do_get(uri, limit, &block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
92
+ raise "Redirect limit (#{redirect_limit}) exceeded retrieving URI #{uri}" if limit <= 0
93
+ req = Net::HTTP::Get.new(uri, 'User-Agent' => user_agent)
94
+ req.basic_auth(username, password) if username && password
95
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: (uri.scheme == 'https')) do |http|
96
+ http.request(req) do |response|
97
+ case response
98
+ when Net::HTTPSuccess
99
+ yield(response)
100
+ when Net::HTTPInformation, Net::HTTPRedirection
101
+ do_get(redirect_uri_for(response, uri), limit - 1, &block)
102
+ else
103
+ raise "Error #{response.code}: #{response.message} retrieving URI #{uri}"
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def redirect_uri_for(response, original_uri)
110
+ if response.is_a?(Net::HTTPInformation)
111
+ original_uri
112
+ else
113
+ location = response['location']
114
+ new_uri = URI(location)
115
+ new_uri.relative? ? original_uri + location : new_uri
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,12 @@
1
+ require 'uri'
2
+ require 'typesafe_enum'
3
+
4
+ module Stash
5
+ module Sword
6
+ class IRI < TypesafeEnum::Base
7
+
8
+ new :BINARY, URI('http://purl.org/net/sword/package/Binary')
9
+ new :SIMPLE_ZIP, URI('http://purl.org/net/sword/package/SimpleZip')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,39 @@
1
+ module Stash
2
+ module Sword
3
+ module LogUtils
4
+ def log
5
+ ::Stash::Sword.log
6
+ end
7
+
8
+ def log_error(e)
9
+ if e.respond_to?(:response)
10
+ log.error(response_to_log_msg(e.response))
11
+ else
12
+ log.error('Unable to log response')
13
+ end
14
+ end
15
+
16
+ def response_to_log_msg(response)
17
+ [
18
+ '-----------------------------------------------------',
19
+ "code: #{response.code}",
20
+ 'headers:', hash_to_log_msg(response.headers),
21
+ "body:\n#{response.body}",
22
+ '-----------------------------------------------------'
23
+ ].join("\n")
24
+ end
25
+
26
+ def log_hash(hash)
27
+ msg = hash_to_log_msg(hash)
28
+ log.debug(msg)
29
+ end
30
+
31
+ def hash_to_log_msg(hash)
32
+ hash.map do |k, v|
33
+ value = v.is_a?(Hash) ? v.map { |k2, v2| "\n\t#{k2}: #{v2}" }.join : v
34
+ "#{k}: #{value}"
35
+ end.join("\n")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,12 @@
1
+ module Stash
2
+ module Sword
3
+ # The name of this gem
4
+ NAME = 'stash-sword'.freeze
5
+
6
+ # The version of this gem
7
+ VERSION = '0.1.0'.freeze
8
+
9
+ # The copyright notice for this gem
10
+ COPYRIGHT = 'Copyright (c) 2016 The Regents of the University of California'.freeze
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ require 'typesafe_enum'
2
+ require 'xml/mapping_extensions'
3
+
4
+ module Stash
5
+ module Sword
6
+ class Namespace < TypesafeEnum::Base
7
+ NS = XML::MappingExtensions::Namespace
8
+ private_constant(:NS)
9
+
10
+ new :SWORD, NS.new(uri: 'http://purl.org/net/sword/')
11
+ new :SWORD_TERMS, NS.new(uri: 'http://purl.org/net/sword/terms/', prefix: 'sword')
12
+ new :SWORD_PACKAGE, NS.new(uri: 'http://purl.org/net/sword/package')
13
+ new :SWORD_ERROR, NS.new(uri: 'http://purl.org/net/sword/error')
14
+ new :SWORD_STATE, NS.new(uri: 'http://purl.org/net/sword/state')
15
+ new :ATOM_PUB, NS.new(uri: 'http://www.w3.org/2007/app', prefix: 'app')
16
+ new :ATOM, NS.new(uri: 'http://www.w3.org/2005/Atom', prefix: 'atom')
17
+ new :DC_TERMS, NS.new(uri: 'http://purl.org/dc/terms/', prefix: 'dcterms')
18
+ new :RDF, NS.new(uri: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', prefix: 'rdf')
19
+ new :OAI_ORE, NS.new(uri: 'http://www.openarchives.org/ore/terms/', prefix: 'ore')
20
+
21
+ def uri
22
+ value.uri
23
+ end
24
+
25
+ def prefix
26
+ value.prefix
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,105 @@
1
+ require 'stringio'
2
+
3
+ module Stash
4
+ module Sword
5
+ # A read-only `IO`-like that concatenates a sequence of strings or IOs.
6
+ class SequenceIO
7
+
8
+ # Creates a new {SequenceIO} concatenating the specified input sources.
9
+ # Strings are wrapped internally as `StringIO`.
10
+ #
11
+ # @param inputs [Enumerable<String, IO>] an array of strings and/or IOs to
12
+ # concatenate
13
+ def initialize(inputs)
14
+ inputs = [inputs] unless inputs.respond_to?(:[]) && inputs.respond_to?(:map)
15
+ @inputs = to_ios(inputs)
16
+ binmode if any_binmode(@inputs)
17
+ @index = 0
18
+ @input = @inputs[index] unless inputs.empty?
19
+ end
20
+
21
+ def size
22
+ @size ||= inputs.inject(0) do |sum, input|
23
+ raise "input #{input} does not respond to :size" unless input.respond_to?(:size)
24
+ sum + input.size
25
+ end
26
+ end
27
+
28
+ def read(length = nil, outbuf = nil)
29
+ # use <= instead of == to get around https://github.com/bbatsov/rubocop/issues/3131
30
+ return nil if size <= 0
31
+ outbuf = outbuf ? outbuf.clear : ''
32
+ length ? read_segment(length, outbuf) : read_fully(outbuf)
33
+ outbuf
34
+ end
35
+
36
+ def binmode
37
+ return self if binmode?
38
+ inputs.each do |input|
39
+ input.binmode if input.respond_to?(:binmode)
40
+ end
41
+ @binmode = true
42
+ self
43
+ end
44
+
45
+ def binmode?
46
+ @binmode
47
+ end
48
+
49
+ def close
50
+ next_input! until input.nil?
51
+ end
52
+
53
+ def closed?
54
+ input.nil? && index >= inputs.length
55
+ end
56
+
57
+ private
58
+
59
+ attr_reader :input
60
+ attr_reader :index
61
+ attr_reader :inputs
62
+
63
+ def read_fully(buffer)
64
+ until input.nil?
65
+ buffer << input.read(nil)
66
+ next_input!
67
+ end
68
+ end
69
+
70
+ def read_segment(length, buffer)
71
+ return unless input && length > 0
72
+
73
+ remaining = length
74
+ if (result = input.read(length))
75
+ buffer << result
76
+ remaining = length - result.length
77
+ end
78
+ return unless remaining > 0
79
+
80
+ next_input!
81
+ read_segment(remaining, buffer)
82
+ end
83
+
84
+ # TODO: Array.pop! or something
85
+ def next_input!
86
+ input.close if input && input.respond_to?(:close)
87
+ @index += 1
88
+ @input = index < inputs.length ? inputs[index] : nil
89
+ end
90
+
91
+ def to_ios(inputs)
92
+ inputs.map do |input|
93
+ input.respond_to?(:read) ? input : StringIO.new(input.to_s)
94
+ end
95
+ end
96
+
97
+ def any_binmode(ios)
98
+ ios.each do |io|
99
+ return true if io.respond_to?(:binmode?) && io.binmode?
100
+ end
101
+ false
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,40 @@
1
+ Status of submission is tracked - merritt_response {PROCESSING, “User not authorized for Merritt submission”,
2
+
3
+ How is dublincore called?
4
+
5
+ Review & Submit button —> record.review()
6
+ creates datacite.xml file and places it in the uploads/<local_id>/ directory
7
+
8
+ Submit button —> record.send_archive_to_merritt
9
+
10
+ Set up the user_email request headers
11
+ Create a new thread
12
+ Record.Generate Merritt Zip()
13
+ Record.Send Archive to Merritt
14
+ Create a submission Log
15
+
16
+
17
+ If submission successful, remove files from local storage and add logging information
18
+ Record.Purge_Files()
19
+
20
+ Generate_Merritt_Zip()
21
+ set file path to uploads_dir - this is a linked directory shared/uploads so it will stay persistent across releases
22
+ local_id is used to name the uploads directory for a dataset.
23
+ calls self.review
24
+ calls self.dublincore
25
+ calls self.dataone
26
+ creates the zip file
27
+ purges all temp files, etc
28
+
29
+ Record.Send_Archive_to_merritt
30
+ gets info to send in curl command from MERRITT_CONFIG and user and camps
31
+ sends curl command - no notification sent if no email
32
+ No DOI/EZID is not created. Merritt does this.
33
+ returns sys_output
34
+
35
+
36
+ SWORD - Slug identifier is the local_id
37
+
38
+
39
+
40
+ if @user_email.nil?
41
  sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
1
42
  else
2
43
  sys_output = "curl --insecure --verbose -u #{merritt_username}:#{merritt_password} -F \"file=@./#{DATASHARE_CONFIG['uploads_dir']}/#{self.local_id}/#{self.local_id}.zip\" -F \"notification=#{@user_email}\" -F \"type=container\" -F \"submitter=Dash/#{external_id}\" -F \"responseForm=xml\" -F \"profile=#{merritt_profile}\" -F \"localIdentifier=#{self.local_id}\" #{merritt_endpoint} 2>&1"
3
44
  end