stash-sword 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +25 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/.yardopts +3 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.md +22 -0
  9. data/README.md +4 -0
  10. data/Rakefile +49 -0
  11. data/examples/example.rb +26 -0
  12. data/examples/uploads/example.zip +0 -0
  13. data/examples/uploads/example/lorem-ipsum.txt +7 -0
  14. data/examples/uploads/example/mrt-datacite.xml +22 -0
  15. data/examples/uploads/example/mrt-dc.xml +13 -0
  16. data/examples/uploads/example/stash-wrapper.xml +56 -0
  17. data/lib/stash/sword.rb +39 -0
  18. data/lib/stash/sword/client.rb +132 -0
  19. data/lib/stash/sword/deposit_receipt.rb +44 -0
  20. data/lib/stash/sword/header_utils.rb +42 -0
  21. data/lib/stash/sword/http_helper.rb +120 -0
  22. data/lib/stash/sword/iri.rb +12 -0
  23. data/lib/stash/sword/log_utils.rb +39 -0
  24. data/lib/stash/sword/module_info.rb +12 -0
  25. data/lib/stash/sword/namespace.rb +30 -0
  26. data/lib/stash/sword/sequence_io.rb +105 -0
  27. data/notes/Dash_Submission_To_Merritt.txt +40 -0
  28. data/notes/service-document.xml +15 -0
  29. data/spec/.rubocop.yml +10 -0
  30. data/spec/data/deposit_receipt_merritt.xml +25 -0
  31. data/spec/data/deposit_receipt_spec.xml +58 -0
  32. data/spec/rspec_custom_matchers.rb +118 -0
  33. data/spec/spec_helper.rb +33 -0
  34. data/spec/unit/stash/sword2/client_spec.rb +110 -0
  35. data/spec/unit/stash/sword2/deposit_receipt_spec.rb +48 -0
  36. data/spec/unit/stash/sword2/http_helper_get_spec.rb +131 -0
  37. data/spec/unit/stash/sword2/http_helper_post_spec.rb +143 -0
  38. data/spec/unit/stash/sword2/http_helper_put_spec.rb +143 -0
  39. data/spec/unit/stash/sword2/log_spec.rb +23 -0
  40. data/spec/unit/stash/sword2/namespaces_spec.rb +31 -0
  41. data/spec/unit/stash/sword2/sequence_io_spec.rb +153 -0
  42. data/stash-sword.gemspec +47 -0
  43. metadata +279 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3e0a9bb5934ab1313698dde7e68177a63ffb0597
4
+ data.tar.gz: 2ad46f630eb85d4349e24c8bf866607358c5ae4c
5
+ SHA512:
6
+ metadata.gz: 66c005b599f1359ad1fe9884a065d4a96619af9764b791c034f8c5f197f413ef50a2d08c0f654ad662496aae6c387cee9f2d9ebed96ae9503a996930dfcebaae
7
+ data.tar.gz: 124321ab9db2a6d31f43f16fb717f595337f92778219f45521adccb74d63af3e8ee26d5afbe65c078164f9175374c4aaa1435522a9809e124934d995addadc57
data/.gitignore ADDED
@@ -0,0 +1,42 @@
1
+ # Ruby defaults
2
+
3
+ /.bundle/
4
+ /.yardoc
5
+ /Gemfile.lock
6
+ /_yardoc/
7
+ /coverage/
8
+ /doc/
9
+ /pkg/
10
+ /spec/reports/
11
+ /tmp/
12
+ *.bundle
13
+ *.so
14
+ *.o
15
+ *.a
16
+ mkmf.log
17
+
18
+ # Database
19
+
20
+ db/*.sqlite3
21
+
22
+ # Logs
23
+
24
+ /log/
25
+
26
+ # IntellJ
27
+
28
+ *.iml
29
+ *.ipr
30
+ *.iws
31
+ *.ids
32
+ .rakeTasks
33
+
34
+ # Emacs
35
+
36
+ *~
37
+ \#*
38
+ .#*
39
+
40
+ # Mac OS
41
+
42
+ .DS_Store
data/.rubocop.yml ADDED
@@ -0,0 +1,25 @@
1
+ # Ruby version
2
+ AllCops:
3
+ TargetRubyVersion: 2.2
4
+ Exclude:
5
+ - 'old/**/*'
6
+
7
+ # Disable line-length check; it's too easy for the cure to be worse than the disease
8
+ Metrics/LineLength:
9
+ Enabled: False
10
+
11
+ # Disable problematic module documentation check (see https://github.com/bbatsov/rubocop/issues/947)
12
+ Style/Documentation:
13
+ Enabled: false
14
+
15
+ # Allow one line around class body (Style/EmptyLines will still disallow two or more)
16
+ Style/EmptyLinesAroundClassBody:
17
+ Enabled: false
18
+
19
+ # Allow one line around module body (Style/EmptyLines will still disallow two or more)
20
+ Style/EmptyLinesAroundModuleBody:
21
+ Enabled: false
22
+
23
+ # Allow one line around block body (Style/EmptyLines will still disallow two or more)
24
+ Style/EmptyLinesAroundBlockBody:
25
+ Enabled: false
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.3
data/.travis.yml ADDED
@@ -0,0 +1,2 @@
1
+ language: ruby
2
+
data/.yardopts ADDED
@@ -0,0 +1,3 @@
1
+ --markup-provider=redcarpet
2
+ --markup=markdown
3
+ --default-return ''
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ # gem 'sword2ruby', git: 'https://github.com/dmolesUC3/sword2ruby.git'
6
+ # gem 'multipart-post', git: 'https://github.com/dmolesUC3/multipart-post'
data/LICENSE.md ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 The Regents of the University of California
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,4 @@
1
+ # stash-sword
2
+
3
+ A minimal SWORD 2.0 connector providing those features needed for Stash.
4
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # ------------------------------------------------------------
2
+ # RSpec
3
+
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+
7
+ namespace :spec do
8
+
9
+ desc 'Run all unit tests'
10
+ RSpec::Core::RakeTask.new(:unit) do |task|
11
+ task.rspec_opts = %w(--color --format documentation --order default)
12
+ task.pattern = 'unit/**/*_spec.rb'
13
+ end
14
+
15
+ task all: [:unit]
16
+ end
17
+
18
+ desc 'Run all tests'
19
+ task spec: 'spec:all'
20
+
21
+ # ------------------------------------------------------------
22
+ # Coverage
23
+
24
+ desc 'Run all unit tests with coverage'
25
+ task :coverage do
26
+ ENV['COVERAGE'] = 'true'
27
+ Rake::Task['spec:unit'].execute
28
+ end
29
+
30
+ # ------------------------------------------------------------
31
+ # RuboCop
32
+
33
+ require 'rubocop/rake_task'
34
+ RuboCop::RakeTask.new
35
+
36
+ # ------------------------------------------------------------
37
+ # TODOs
38
+
39
+ desc 'List TODOs (from spec/todo.rb)'
40
+ RSpec::Core::RakeTask.new(:todo) do |task|
41
+ task.rspec_opts = %w(--color --format documentation --order default)
42
+ task.pattern = 'todo.rb'
43
+ end
44
+
45
+ # ------------------------------------------------------------
46
+ # Defaults
47
+
48
+ desc 'Run unit tests, check test coverage, run acceptance tests, check code style'
49
+ task default: [:coverage, :rubocop]
@@ -0,0 +1,26 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'stash/sword'
4
+
5
+ include Stash::Sword
6
+
7
+ username, password, collection = ARGV
8
+
9
+ client = Client.new(
10
+ username: username,
11
+ password: password,
12
+ collection_uri: URI("http://uc3-mrtsword-dev.cdlib.org:39001/mrtsword/collection/#{collection}")
13
+ )
14
+
15
+ doi = "doi:10.5072/FK#{Time.now.to_i}"
16
+ zipfile = File.expand_path('../uploads/example.zip', __FILE__)
17
+
18
+ receipt = client.create(doi: doi, zipfile: zipfile)
19
+ em_iri = receipt.em_iri
20
+ se_iri = receipt.se_iri
21
+
22
+ puts "em_iri: #{em_iri}"
23
+ puts "se_iri: #{se_iri}"
24
+
25
+ code = client.update(se_iri: se_iri, zipfile: zipfile)
26
+ puts "update response code: #{code}"
Binary file
@@ -0,0 +1,7 @@
1
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
2
+ eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
3
+ minim veniam, quis nostrud exercitation ullamco laboris nisi ut
4
+ aliquip ex ea commodo consequat. Duis aute irure dolor in
5
+ reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
6
+ pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
7
+ culpa qui officia deserunt mollit anim id est laborum.
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <resource xmlns='http://datacite.org/schema/kernel-3' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd'>
3
+ <identifier identifierType='DOI'>10.5072/FK20160408dmoles</identifier>
4
+ <creators>
5
+ <creator>
6
+ <creatorName>Moles, David</creatorName>
7
+ </creator>
8
+ </creators>
9
+ <titles>
10
+ <title xml:lang='en'>Merrit SWORD upload test</title>
11
+ </titles>
12
+ <publisher>UC Office of the President</publisher>
13
+ <publicationYear>2016</publicationYear>
14
+ <subjects>
15
+ <subject xml:lang='en'>Data literacy</subject>
16
+ </subjects>
17
+ <language>en</language>
18
+ <resourceType resourceTypeGeneral='Dataset'>Text</resourceType>
19
+ <descriptions>
20
+ <description xml:lang='en' descriptionType='Abstract'>Lorem ipsum dolor sit amet.</description>
21
+ </descriptions>
22
+ </resource>
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0"?>
2
+ <qualifieddc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xsi:noNamespaceSchemaLocation="http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd">
3
+ <dc:creator>Moles, David</dc:creator>
4
+ <dc:title>Merrit SWORD upload test</dc:title>
5
+ <dc:publisher>UC Office of the President</dc:publisher>
6
+ <dc:date>2016</dc:date>
7
+ <dc:subject>Data literacy</dc:subject>
8
+ <dc:format>Collection</dc:format>
9
+ <dcterms:extent>446</dcterms:extent>
10
+ <dc:rights>Creative Commons Attribution 4.0 International (CC-BY 4.0)</dc:rights>
11
+ <dcterms:license xsi:type="dcterms:URI">https://creativecommons.org/licenses/by/4.0/</dcterms:license>
12
+ <dc:description>Lorem ipsum dolor sit amet.</dc:description>
13
+ </qualifieddc>
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <st:stash_wrapper xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
+ xsi:schemaLocation="http://dash.cdlib.org/stash_wrapper/ http://dash.cdlib.org/stash_wrapper/stash_wrapper.xsd"
4
+ xmlns:st="http://dash.cdlib.org/stash_wrapper/">
5
+ <st:identifier type="DOI">10.5072/FK20160408dmoles</st:identifier>
6
+ <st:stash_administrative>
7
+ <st:version>
8
+ <st:version_number>1</st:version_number>
9
+ <st:date>2016-04-08</st:date>
10
+ </st:version>
11
+ <st:license>
12
+ <st:name>Creative Commons Attribution 4.0 International (CC-BY)</st:name>
13
+ <st:uri>https://creativecommons.org/licenses/by/4.0/legalcode</st:uri>
14
+ </st:license>
15
+ <st:embargo>
16
+ <st:type>download</st:type>
17
+ <st:period>1 year</st:period>
18
+ <st:start>2015-04-08</st:start>
19
+ <st:end>2016-04-08</st:end>
20
+ </st:embargo>
21
+ <st:inventory num_files="1">
22
+ <st:file>
23
+ <st:pathname>lorem-ipsum.txt</st:pathname>
24
+ <st:size unit="B">446</st:size>
25
+ <st:mime_type>text/plain</st:mime_type>
26
+ </st:file>
27
+ </st:inventory>
28
+ </st:stash_administrative>
29
+ <st:stash_descriptive>
30
+ <dcs:resource xmlns:dcs="http://datacite.org/schema/kernel-3"
31
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
32
+ xsi:schemaLocation="http://datacite.org/schema/kernel-3
33
+ http://schema.datacite.org/meta/kernel-3/metadata.xsd">
34
+ <dcs:identifier identifierType="DOI">doi:10.5072/FK20160408dmoles</dcs:identifier>
35
+ <dcs:creators>
36
+ <dcs:creator>
37
+ <dcs:creatorName>Moles, David</dcs:creatorName>
38
+ </dcs:creator>
39
+ </dcs:creators>
40
+ <dcs:titles>
41
+ <dcs:title>Merrit SWORD upload test</dcs:title>
42
+ </dcs:titles>
43
+ <dcs:publisher>UC Office of the President</dcs:publisher>
44
+ <dcs:publicationYear>2016</dcs:publicationYear>
45
+ <dcs:subjects>
46
+ <dcs:subject>Data literacy</dcs:subject>
47
+ </dcs:subjects>
48
+ <dcs:resourceType resourceTypeGeneral="Dataset">Text</dcs:resourceType>
49
+ <dcs:descriptions>
50
+ <dcs:description descriptionType="Abstract">
51
+ Lorem ipsum dolor sit amet.
52
+ </dcs:description>
53
+ </dcs:descriptions>
54
+ </dcs:resource>
55
+ </st:stash_descriptive>
56
+ </st:stash_wrapper>
@@ -0,0 +1,39 @@
1
+ require 'uri'
2
+ require 'logger'
3
+
4
+ module Stash
5
+
6
+ # TODO: Make this configurable
7
+ LOG_LEVEL = case ENV['RAILS_ENV'].to_s.downcase
8
+ when 'test'
9
+ Logger::DEBUG
10
+ when 'development'
11
+ Logger::INFO
12
+ else
13
+ Logger::WARN
14
+ end
15
+
16
+ module Sword
17
+
18
+ Dir.glob(File.expand_path('../sword/*.rb', __FILE__)).sort.each(&method(:require))
19
+
20
+ def self.log
21
+ @log ||= new_logger(logdev: $stdout)
22
+ end
23
+
24
+ def self.log_device=(value)
25
+ @log = new_logger(logdev: value)
26
+ end
27
+
28
+ def self.new_logger(logdev:, level: Stash::LOG_LEVEL, shift_age: 10, shift_size: 1024 * 1024)
29
+ logger = Logger.new(logdev, shift_age, shift_size)
30
+ logger.level = level
31
+ logger.formatter = proc do |severity, datetime, progname, msg|
32
+ "#{datetime.to_time.utc} #{severity} -#{progname}- #{msg}\n"
33
+ end
34
+ logger
35
+ end
36
+
37
+ private_class_method :new_logger
38
+ end
39
+ end
@@ -0,0 +1,132 @@
1
+ require 'digest'
2
+ require 'uri'
3
+ require 'stash/sword/header_utils'
4
+ require 'stash/sword/log_utils'
5
+ require 'stash/sword/http_helper'
6
+ require 'stash/sword/sequence_io'
7
+
8
+ module Stash
9
+ module Sword
10
+ class Client
11
+ include HeaderUtils, LogUtils
12
+
13
+ EOL = "\r\n".freeze
14
+
15
+ attr_reader :collection_uri
16
+ attr_reader :username
17
+ attr_reader :password
18
+ attr_reader :helper
19
+
20
+ # Creates a new {Client} for the specified collection URI, with the specified credentials.
21
+ #
22
+ # @param collection_uri [URI, String] The collection URI
23
+ # @param username [String] the username
24
+ # @param password [String] the password
25
+ # @param on_behalf_of [String, nil] the user for whom the original sword package was deposited on behalf of.
26
+ # Defaults to `username`.
27
+ def initialize(collection_uri:, username:, password:, on_behalf_of: nil, helper: nil)
28
+ raise 'no collection URI provided' unless collection_uri
29
+ raise 'no username provided' unless username
30
+ raise 'no password provided' unless password
31
+ @collection_uri = to_uri(collection_uri)
32
+ @username = username
33
+ @password = password
34
+ @on_behalf_of = on_behalf_of || username
35
+ @helper = helper || HTTPHelper.new(username: username, password: password, user_agent: "stash-sword #{VERSION}")
36
+ end
37
+
38
+ # Creates a new resource for the specified DOI with the specified zipfile
39
+ #
40
+ # @param doi [String] the DOI
41
+ # @param zipfile [String] the zipfile path
42
+ # @return [DepositReceipt] the deposit receipt
43
+ def create(doi:, zipfile:)
44
+ log.debug("Stash::Sword::Client.create(doi: #{doi}, zipfile: #{zipfile})")
45
+ uri = collection_uri.to_s
46
+ response = do_post(uri, zipfile, create_request_headers(zipfile, doi))
47
+ receipt_from(response)
48
+ rescue => e
49
+ log_error(e)
50
+ raise
51
+ end
52
+
53
+ # Updates a resource with a new zipfile
54
+ #
55
+ # @param edit_iri [URI, String] the Atom Edit-IRI
56
+ # @param zipfile [String] the zipfile path
57
+ def update(edit_iri:, zipfile:)
58
+ log.debug("Stash::Sword::Client.update(edit_iri: #{edit_iri}, zipfile: #{zipfile})")
59
+ uri = to_uri(edit_iri).to_s
60
+ response = maybe_redirect(do_put(uri, zipfile))
61
+ log.debug(response_to_log_msg(response))
62
+ response.code # TODO: what if anything should we return here?
63
+ rescue => e
64
+ log_error(e)
65
+ raise
66
+ end
67
+
68
+ private
69
+
70
+ def maybe_redirect(response)
71
+ return response unless [301, 302, 307].include?(response.code)
72
+ log.debug(response_to_log_msg(response))
73
+ log.debug("Response code #{response.code}; redirecting")
74
+ response.follow_get_redirection
75
+ end
76
+
77
+ def receipt_from(response)
78
+ log.debug(response_to_log_msg(response))
79
+
80
+ body = response.body.strip
81
+ return DepositReceipt.parse_xml(body) unless body.empty?
82
+
83
+ receipt_from_location(response)
84
+ end
85
+
86
+ def receipt_from_location(response)
87
+ log.debug('Desposit receipt not provided in SWORD response body')
88
+ edit_iri = response.headers[:location]
89
+ return nil unless edit_iri
90
+
91
+ log.debug("Retrieving deposit receipt from Location header Edit-IRI: #{edit_iri}")
92
+ body = helper.get(to_uri(edit_iri))
93
+ return nil unless body
94
+
95
+ DepositReceipt.parse_xml(body)
96
+ end
97
+
98
+ def do_post(uri, zipfile, headers)
99
+ File.open(zipfile, 'rb') do |file|
100
+ return helper.post(uri: uri, payload: file, headers: headers)
101
+ end
102
+ end
103
+
104
+ def do_put(uri, zipfile)
105
+ boundary = "========#{Time.now.to_i}=="
106
+ stream = stream_for(zipfile: File.open(zipfile, 'rb'), boundary: boundary)
107
+ begin
108
+ return helper.put(uri: uri, headers: update_request_headers(stream, boundary), payload: stream)
109
+ ensure
110
+ stream.close
111
+ end
112
+ end
113
+
114
+ def stream_for(zipfile:, boundary:)
115
+ content = []
116
+ # strictly speaking, do we need an Atom <entry/> first?
117
+ content << "--#{boundary}#{EOL}"
118
+ update_mime_headers(zipfile).each { |k, v| content << "#{k}: #{v}#{EOL}" }
119
+ content << EOL
120
+ content << zipfile
121
+ content << "--#{boundary}--#{EOL}"
122
+ SequenceIO.new(content).binmode
123
+ end
124
+
125
+ def to_uri(url)
126
+ ::XML::MappingExtensions.to_uri(url)
127
+ end
128
+ protected :to_uri
129
+
130
+ end
131
+ end
132
+ end