stash-sword 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +25 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/.yardopts +3 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.md +22 -0
  9. data/README.md +4 -0
  10. data/Rakefile +49 -0
  11. data/examples/example.rb +26 -0
  12. data/examples/uploads/example.zip +0 -0
  13. data/examples/uploads/example/lorem-ipsum.txt +7 -0
  14. data/examples/uploads/example/mrt-datacite.xml +22 -0
  15. data/examples/uploads/example/mrt-dc.xml +13 -0
  16. data/examples/uploads/example/stash-wrapper.xml +56 -0
  17. data/lib/stash/sword.rb +39 -0
  18. data/lib/stash/sword/client.rb +132 -0
  19. data/lib/stash/sword/deposit_receipt.rb +44 -0
  20. data/lib/stash/sword/header_utils.rb +42 -0
  21. data/lib/stash/sword/http_helper.rb +120 -0
  22. data/lib/stash/sword/iri.rb +12 -0
  23. data/lib/stash/sword/log_utils.rb +39 -0
  24. data/lib/stash/sword/module_info.rb +12 -0
  25. data/lib/stash/sword/namespace.rb +30 -0
  26. data/lib/stash/sword/sequence_io.rb +105 -0
  27. data/notes/Dash_Submission_To_Merritt.txt +40 -0
  28. data/notes/service-document.xml +15 -0
  29. data/spec/.rubocop.yml +10 -0
  30. data/spec/data/deposit_receipt_merritt.xml +25 -0
  31. data/spec/data/deposit_receipt_spec.xml +58 -0
  32. data/spec/rspec_custom_matchers.rb +118 -0
  33. data/spec/spec_helper.rb +33 -0
  34. data/spec/unit/stash/sword2/client_spec.rb +110 -0
  35. data/spec/unit/stash/sword2/deposit_receipt_spec.rb +48 -0
  36. data/spec/unit/stash/sword2/http_helper_get_spec.rb +131 -0
  37. data/spec/unit/stash/sword2/http_helper_post_spec.rb +143 -0
  38. data/spec/unit/stash/sword2/http_helper_put_spec.rb +143 -0
  39. data/spec/unit/stash/sword2/log_spec.rb +23 -0
  40. data/spec/unit/stash/sword2/namespaces_spec.rb +31 -0
  41. data/spec/unit/stash/sword2/sequence_io_spec.rb +153 -0
  42. data/stash-sword.gemspec +47 -0
  43. metadata +279 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3e0a9bb5934ab1313698dde7e68177a63ffb0597
4
+ data.tar.gz: 2ad46f630eb85d4349e24c8bf866607358c5ae4c
5
+ SHA512:
6
+ metadata.gz: 66c005b599f1359ad1fe9884a065d4a96619af9764b791c034f8c5f197f413ef50a2d08c0f654ad662496aae6c387cee9f2d9ebed96ae9503a996930dfcebaae
7
+ data.tar.gz: 124321ab9db2a6d31f43f16fb717f595337f92778219f45521adccb74d63af3e8ee26d5afbe65c078164f9175374c4aaa1435522a9809e124934d995addadc57
data/.gitignore ADDED
@@ -0,0 +1,42 @@
1
+ # Ruby defaults
2
+
3
+ /.bundle/
4
+ /.yardoc
5
+ /Gemfile.lock
6
+ /_yardoc/
7
+ /coverage/
8
+ /doc/
9
+ /pkg/
10
+ /spec/reports/
11
+ /tmp/
12
+ *.bundle
13
+ *.so
14
+ *.o
15
+ *.a
16
+ mkmf.log
17
+
18
+ # Database
19
+
20
+ db/*.sqlite3
21
+
22
+ # Logs
23
+
24
+ /log/
25
+
26
+ # IntellJ
27
+
28
+ *.iml
29
+ *.ipr
30
+ *.iws
31
+ *.ids
32
+ .rakeTasks
33
+
34
+ # Emacs
35
+
36
+ *~
37
+ \#*
38
+ .#*
39
+
40
+ # Mac OS
41
+
42
+ .DS_Store
data/.rubocop.yml ADDED
@@ -0,0 +1,25 @@
1
+ # Ruby version
2
+ AllCops:
3
+ TargetRubyVersion: 2.2
4
+ Exclude:
5
+ - 'old/**/*'
6
+
7
+ # Disable line-length check; it's too easy for the cure to be worse than the disease
8
+ Metrics/LineLength:
9
+ Enabled: False
10
+
11
+ # Disable problematic module documentation check (see https://github.com/bbatsov/rubocop/issues/947)
12
+ Style/Documentation:
13
+ Enabled: false
14
+
15
+ # Allow one line around class body (Style/EmptyLines will still disallow two or more)
16
+ Style/EmptyLinesAroundClassBody:
17
+ Enabled: false
18
+
19
+ # Allow one line around module body (Style/EmptyLines will still disallow two or more)
20
+ Style/EmptyLinesAroundModuleBody:
21
+ Enabled: false
22
+
23
+ # Allow one line around block body (Style/EmptyLines will still disallow two or more)
24
+ Style/EmptyLinesAroundBlockBody:
25
+ Enabled: false
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.3
data/.travis.yml ADDED
@@ -0,0 +1,2 @@
1
+ language: ruby
2
+
data/.yardopts ADDED
@@ -0,0 +1,3 @@
1
+ --markup-provider=redcarpet
2
+ --markup=markdown
3
+ --default-return ''
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ # gem 'sword2ruby', git: 'https://github.com/dmolesUC3/sword2ruby.git'
6
+ # gem 'multipart-post', git: 'https://github.com/dmolesUC3/multipart-post'
data/LICENSE.md ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 The Regents of the University of California
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,4 @@
1
+ # stash-sword
2
+
3
+ A minimal SWORD 2.0 connector providing those features needed for Stash.
4
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # ------------------------------------------------------------
2
+ # RSpec
3
+
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+
7
+ namespace :spec do
8
+
9
+ desc 'Run all unit tests'
10
+ RSpec::Core::RakeTask.new(:unit) do |task|
11
+ task.rspec_opts = %w(--color --format documentation --order default)
12
+ task.pattern = 'unit/**/*_spec.rb'
13
+ end
14
+
15
+ task all: [:unit]
16
+ end
17
+
18
+ desc 'Run all tests'
19
+ task spec: 'spec:all'
20
+
21
+ # ------------------------------------------------------------
22
+ # Coverage
23
+
24
+ desc 'Run all unit tests with coverage'
25
+ task :coverage do
26
+ ENV['COVERAGE'] = 'true'
27
+ Rake::Task['spec:unit'].execute
28
+ end
29
+
30
+ # ------------------------------------------------------------
31
+ # RuboCop
32
+
33
+ require 'rubocop/rake_task'
34
+ RuboCop::RakeTask.new
35
+
36
+ # ------------------------------------------------------------
37
+ # TODOs
38
+
39
+ desc 'List TODOs (from spec/todo.rb)'
40
+ RSpec::Core::RakeTask.new(:todo) do |task|
41
+ task.rspec_opts = %w(--color --format documentation --order default)
42
+ task.pattern = 'todo.rb'
43
+ end
44
+
45
+ # ------------------------------------------------------------
46
+ # Defaults
47
+
48
+ desc 'Run unit tests, check test coverage, run acceptance tests, check code style'
49
+ task default: [:coverage, :rubocop]
@@ -0,0 +1,26 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'stash/sword'
4
+
5
+ include Stash::Sword
6
+
7
+ username, password, collection = ARGV
8
+
9
+ client = Client.new(
10
+ username: username,
11
+ password: password,
12
+ collection_uri: URI("http://uc3-mrtsword-dev.cdlib.org:39001/mrtsword/collection/#{collection}")
13
+ )
14
+
15
+ doi = "doi:10.5072/FK#{Time.now.to_i}"
16
+ zipfile = File.expand_path('../uploads/example.zip', __FILE__)
17
+
18
+ receipt = client.create(doi: doi, zipfile: zipfile)
19
+ em_iri = receipt.em_iri
20
+ se_iri = receipt.se_iri
21
+
22
+ puts "em_iri: #{em_iri}"
23
+ puts "se_iri: #{se_iri}"
24
+
25
+ code = client.update(se_iri: se_iri, zipfile: zipfile)
26
+ puts "update response code: #{code}"
Binary file
@@ -0,0 +1,7 @@
1
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
2
+ eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
3
+ minim veniam, quis nostrud exercitation ullamco laboris nisi ut
4
+ aliquip ex ea commodo consequat. Duis aute irure dolor in
5
+ reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
6
+ pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
7
+ culpa qui officia deserunt mollit anim id est laborum.
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <resource xmlns='http://datacite.org/schema/kernel-3' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd'>
3
+ <identifier identifierType='DOI'>10.5072/FK20160408dmoles</identifier>
4
+ <creators>
5
+ <creator>
6
+ <creatorName>Moles, David</creatorName>
7
+ </creator>
8
+ </creators>
9
+ <titles>
10
+ <title xml:lang='en'>Merrit SWORD upload test</title>
11
+ </titles>
12
+ <publisher>UC Office of the President</publisher>
13
+ <publicationYear>2016</publicationYear>
14
+ <subjects>
15
+ <subject xml:lang='en'>Data literacy</subject>
16
+ </subjects>
17
+ <language>en</language>
18
+ <resourceType resourceTypeGeneral='Dataset'>Text</resourceType>
19
+ <descriptions>
20
+ <description xml:lang='en' descriptionType='Abstract'>Lorem ipsum dolor sit amet.</description>
21
+ </descriptions>
22
+ </resource>
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0"?>
2
+ <qualifieddc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xsi:noNamespaceSchemaLocation="http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd">
3
+ <dc:creator>Moles, David</dc:creator>
4
+ <dc:title>Merrit SWORD upload test</dc:title>
5
+ <dc:publisher>UC Office of the President</dc:publisher>
6
+ <dc:date>2016</dc:date>
7
+ <dc:subject>Data literacy</dc:subject>
8
+ <dc:format>Collection</dc:format>
9
+ <dcterms:extent>446</dcterms:extent>
10
+ <dc:rights>Creative Commons Attribution 4.0 International (CC-BY 4.0)</dc:rights>
11
+ <dcterms:license xsi:type="dcterms:URI">https://creativecommons.org/licenses/by/4.0/</dcterms:license>
12
+ <dc:description>Lorem ipsum dolor sit amet.</dc:description>
13
+ </qualifieddc>
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <st:stash_wrapper xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
+ xsi:schemaLocation="http://dash.cdlib.org/stash_wrapper/ http://dash.cdlib.org/stash_wrapper/stash_wrapper.xsd"
4
+ xmlns:st="http://dash.cdlib.org/stash_wrapper/">
5
+ <st:identifier type="DOI">10.5072/FK20160408dmoles</st:identifier>
6
+ <st:stash_administrative>
7
+ <st:version>
8
+ <st:version_number>1</st:version_number>
9
+ <st:date>2016-04-08</st:date>
10
+ </st:version>
11
+ <st:license>
12
+ <st:name>Creative Commons Attribution 4.0 International (CC-BY)</st:name>
13
+ <st:uri>https://creativecommons.org/licenses/by/4.0/legalcode</st:uri>
14
+ </st:license>
15
+ <st:embargo>
16
+ <st:type>download</st:type>
17
+ <st:period>1 year</st:period>
18
+ <st:start>2015-04-08</st:start>
19
+ <st:end>2016-04-08</st:end>
20
+ </st:embargo>
21
+ <st:inventory num_files="1">
22
+ <st:file>
23
+ <st:pathname>lorem-ipsum.txt</st:pathname>
24
+ <st:size unit="B">446</st:size>
25
+ <st:mime_type>text/plain</st:mime_type>
26
+ </st:file>
27
+ </st:inventory>
28
+ </st:stash_administrative>
29
+ <st:stash_descriptive>
30
+ <dcs:resource xmlns:dcs="http://datacite.org/schema/kernel-3"
31
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
32
+ xsi:schemaLocation="http://datacite.org/schema/kernel-3
33
+ http://schema.datacite.org/meta/kernel-3/metadata.xsd">
34
+ <dcs:identifier identifierType="DOI">doi:10.5072/FK20160408dmoles</dcs:identifier>
35
+ <dcs:creators>
36
+ <dcs:creator>
37
+ <dcs:creatorName>Moles, David</dcs:creatorName>
38
+ </dcs:creator>
39
+ </dcs:creators>
40
+ <dcs:titles>
41
+ <dcs:title>Merrit SWORD upload test</dcs:title>
42
+ </dcs:titles>
43
+ <dcs:publisher>UC Office of the President</dcs:publisher>
44
+ <dcs:publicationYear>2016</dcs:publicationYear>
45
+ <dcs:subjects>
46
+ <dcs:subject>Data literacy</dcs:subject>
47
+ </dcs:subjects>
48
+ <dcs:resourceType resourceTypeGeneral="Dataset">Text</dcs:resourceType>
49
+ <dcs:descriptions>
50
+ <dcs:description descriptionType="Abstract">
51
+ Lorem ipsum dolor sit amet.
52
+ </dcs:description>
53
+ </dcs:descriptions>
54
+ </dcs:resource>
55
+ </st:stash_descriptive>
56
+ </st:stash_wrapper>
@@ -0,0 +1,39 @@
1
+ require 'uri'
2
+ require 'logger'
3
+
4
+ module Stash
5
+
6
+ # TODO: Make this configurable
7
+ LOG_LEVEL = case ENV['RAILS_ENV'].to_s.downcase
8
+ when 'test'
9
+ Logger::DEBUG
10
+ when 'development'
11
+ Logger::INFO
12
+ else
13
+ Logger::WARN
14
+ end
15
+
16
+ module Sword
17
+
18
+ Dir.glob(File.expand_path('../sword/*.rb', __FILE__)).sort.each(&method(:require))
19
+
20
+ def self.log
21
+ @log ||= new_logger(logdev: $stdout)
22
+ end
23
+
24
+ def self.log_device=(value)
25
+ @log = new_logger(logdev: value)
26
+ end
27
+
28
+ def self.new_logger(logdev:, level: Stash::LOG_LEVEL, shift_age: 10, shift_size: 1024 * 1024)
29
+ logger = Logger.new(logdev, shift_age, shift_size)
30
+ logger.level = level
31
+ logger.formatter = proc do |severity, datetime, progname, msg|
32
+ "#{datetime.to_time.utc} #{severity} -#{progname}- #{msg}\n"
33
+ end
34
+ logger
35
+ end
36
+
37
+ private_class_method :new_logger
38
+ end
39
+ end
@@ -0,0 +1,132 @@
1
+ require 'digest'
2
+ require 'uri'
3
+ require 'stash/sword/header_utils'
4
+ require 'stash/sword/log_utils'
5
+ require 'stash/sword/http_helper'
6
+ require 'stash/sword/sequence_io'
7
+
8
+ module Stash
9
+ module Sword
10
+ class Client
11
+ include HeaderUtils, LogUtils
12
+
13
+ EOL = "\r\n".freeze
14
+
15
+ attr_reader :collection_uri
16
+ attr_reader :username
17
+ attr_reader :password
18
+ attr_reader :helper
19
+
20
+ # Creates a new {Client} for the specified collection URI, with the specified credentials.
21
+ #
22
+ # @param collection_uri [URI, String] The collection URI
23
+ # @param username [String] the username
24
+ # @param password [String] the password
25
+ # @param on_behalf_of [String, nil] the user for whom the original sword package was deposited on behalf of.
26
+ # Defaults to `username`.
27
+ def initialize(collection_uri:, username:, password:, on_behalf_of: nil, helper: nil)
28
+ raise 'no collection URI provided' unless collection_uri
29
+ raise 'no username provided' unless username
30
+ raise 'no password provided' unless password
31
+ @collection_uri = to_uri(collection_uri)
32
+ @username = username
33
+ @password = password
34
+ @on_behalf_of = on_behalf_of || username
35
+ @helper = helper || HTTPHelper.new(username: username, password: password, user_agent: "stash-sword #{VERSION}")
36
+ end
37
+
38
+ # Creates a new resource for the specified DOI with the specified zipfile
39
+ #
40
+ # @param doi [String] the DOI
41
+ # @param zipfile [String] the zipfile path
42
+ # @return [DepositReceipt] the deposit receipt
43
+ def create(doi:, zipfile:)
44
+ log.debug("Stash::Sword::Client.create(doi: #{doi}, zipfile: #{zipfile})")
45
+ uri = collection_uri.to_s
46
+ response = do_post(uri, zipfile, create_request_headers(zipfile, doi))
47
+ receipt_from(response)
48
+ rescue => e
49
+ log_error(e)
50
+ raise
51
+ end
52
+
53
+ # Updates a resource with a new zipfile
54
+ #
55
+ # @param edit_iri [URI, String] the Atom Edit-IRI
56
+ # @param zipfile [String] the zipfile path
57
+ def update(edit_iri:, zipfile:)
58
+ log.debug("Stash::Sword::Client.update(edit_iri: #{edit_iri}, zipfile: #{zipfile})")
59
+ uri = to_uri(edit_iri).to_s
60
+ response = maybe_redirect(do_put(uri, zipfile))
61
+ log.debug(response_to_log_msg(response))
62
+ response.code # TODO: what if anything should we return here?
63
+ rescue => e
64
+ log_error(e)
65
+ raise
66
+ end
67
+
68
+ private
69
+
70
+ def maybe_redirect(response)
71
+ return response unless [301, 302, 307].include?(response.code)
72
+ log.debug(response_to_log_msg(response))
73
+ log.debug("Response code #{response.code}; redirecting")
74
+ response.follow_get_redirection
75
+ end
76
+
77
+ def receipt_from(response)
78
+ log.debug(response_to_log_msg(response))
79
+
80
+ body = response.body.strip
81
+ return DepositReceipt.parse_xml(body) unless body.empty?
82
+
83
+ receipt_from_location(response)
84
+ end
85
+
86
+ def receipt_from_location(response)
87
+ log.debug('Desposit receipt not provided in SWORD response body')
88
+ edit_iri = response.headers[:location]
89
+ return nil unless edit_iri
90
+
91
+ log.debug("Retrieving deposit receipt from Location header Edit-IRI: #{edit_iri}")
92
+ body = helper.get(to_uri(edit_iri))
93
+ return nil unless body
94
+
95
+ DepositReceipt.parse_xml(body)
96
+ end
97
+
98
+ def do_post(uri, zipfile, headers)
99
+ File.open(zipfile, 'rb') do |file|
100
+ return helper.post(uri: uri, payload: file, headers: headers)
101
+ end
102
+ end
103
+
104
+ def do_put(uri, zipfile)
105
+ boundary = "========#{Time.now.to_i}=="
106
+ stream = stream_for(zipfile: File.open(zipfile, 'rb'), boundary: boundary)
107
+ begin
108
+ return helper.put(uri: uri, headers: update_request_headers(stream, boundary), payload: stream)
109
+ ensure
110
+ stream.close
111
+ end
112
+ end
113
+
114
+ def stream_for(zipfile:, boundary:)
115
+ content = []
116
+ # strictly speaking, do we need an Atom <entry/> first?
117
+ content << "--#{boundary}#{EOL}"
118
+ update_mime_headers(zipfile).each { |k, v| content << "#{k}: #{v}#{EOL}" }
119
+ content << EOL
120
+ content << zipfile
121
+ content << "--#{boundary}--#{EOL}"
122
+ SequenceIO.new(content).binmode
123
+ end
124
+
125
+ def to_uri(url)
126
+ ::XML::MappingExtensions.to_uri(url)
127
+ end
128
+ protected :to_uri
129
+
130
+ end
131
+ end
132
+ end