dataverse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env.template +3 -0
- data/.gitignore +16 -0
- data/.rspec +3 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +62 -0
- data/LICENSE.txt +21 -0
- data/README.md +598 -0
- data/Rakefile +8 -0
- data/bin/console +19 -0
- data/bin/setup +8 -0
- data/dataverse.gemspec +31 -0
- data/lib/dataverse.rb +7 -0
- data/lib/dataverse/base.rb +124 -0
- data/lib/dataverse/dataset.rb +376 -0
- data/lib/dataverse/dataverse.rb +157 -0
- data/lib/dataverse/errors.rb +27 -0
- data/lib/dataverse/version.rb +5 -0
- metadata +80 -0
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'dotenv/load'
|
5
|
+
|
6
|
+
require "bundler/setup"
|
7
|
+
require "dataverse"
|
8
|
+
|
9
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
10
|
+
# with your gem easier. You can also use a different console, if you like.
|
11
|
+
|
12
|
+
require 'awesome_print'
|
13
|
+
if ENV['IRB_CONSOLE']
|
14
|
+
require "irb"
|
15
|
+
IRB.start
|
16
|
+
else
|
17
|
+
require "pry"
|
18
|
+
Pry.start
|
19
|
+
end
|
data/bin/setup
ADDED
data/dataverse.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/dataverse/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "dataverse"
|
7
|
+
spec.version = Dataverse::VERSION
|
8
|
+
spec.authors = ["Kris Dekeyser"]
|
9
|
+
spec.email = ["kris.dekeyser@libis.be"]
|
10
|
+
|
11
|
+
spec.summary = "Dataverse API."
|
12
|
+
spec.description = "Dataverse.org API wrapper."
|
13
|
+
spec.homepage = "https://rubygems.org/gems/dataverse"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/libis/dataverse_api"
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/libis/dataverse_api/CHANGELOG.md"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_dependency "rest-client", "~> 2.0"
|
31
|
+
end
|
data/lib/dataverse.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rest-client'
|
4
|
+
require 'json'
|
5
|
+
require 'rexml/document'
|
6
|
+
|
7
|
+
require 'forwardable'
|
8
|
+
|
9
|
+
module Dataverse
|
10
|
+
class Base
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
attr_reader :api_data
|
14
|
+
|
15
|
+
def_delegators :@api_data, :[], :fetch, :keys, :dig
|
16
|
+
|
17
|
+
def refresh
|
18
|
+
init(get_data)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
def init(data)
|
24
|
+
@api_data = data
|
25
|
+
@api_data.freeze
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_data
|
29
|
+
@api_data
|
30
|
+
end
|
31
|
+
|
32
|
+
public
|
33
|
+
|
34
|
+
def ==(other)
|
35
|
+
self.api_data == other.api_data
|
36
|
+
end
|
37
|
+
|
38
|
+
def eql?(other)
|
39
|
+
self == other
|
40
|
+
end
|
41
|
+
|
42
|
+
def hash
|
43
|
+
api_data.hash
|
44
|
+
end
|
45
|
+
|
46
|
+
protected
|
47
|
+
|
48
|
+
def api_call(url, **args)
|
49
|
+
self.class.api_call(url, **args)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.api_call(url, method: :get, headers: {}, params: {}, body: nil, format: :api, block: nil, options: {})
|
53
|
+
|
54
|
+
unless ENV.has_key?('API_URL') && ENV.has_key?('API_TOKEN')
|
55
|
+
raise Error.new("Set environment variables 'API_URL' and 'API_TOKEN'")
|
56
|
+
end
|
57
|
+
|
58
|
+
url = ENV['API_URL'].chomp('/') + '/' + url.sub(/^\//, '')
|
59
|
+
|
60
|
+
headers['X-Dataverse-key'] = ENV['API_TOKEN']
|
61
|
+
headers[:params] = params unless params.empty?
|
62
|
+
|
63
|
+
format = :block if block
|
64
|
+
|
65
|
+
case format
|
66
|
+
when :xml
|
67
|
+
headers[:accept] = :xml
|
68
|
+
headers[:content_type] ||= :xml
|
69
|
+
when :api, :json
|
70
|
+
headers[:accept] = :json
|
71
|
+
headers[:content_type] ||= :json
|
72
|
+
when :raw
|
73
|
+
options[:raw_response] = true
|
74
|
+
when :block
|
75
|
+
options[:block_response] = block
|
76
|
+
end
|
77
|
+
|
78
|
+
body = body.to_json if body.is_a?(Hash) && headers[:content_type] == :json
|
79
|
+
body = body.write if body.is_a?(REXML::Document) && headers[:content_type] == :xml
|
80
|
+
|
81
|
+
response = RestClient::Request.execute(
|
82
|
+
method: method,
|
83
|
+
url: url,
|
84
|
+
headers: headers,
|
85
|
+
payload: body,
|
86
|
+
# log: STDOUT,
|
87
|
+
**options
|
88
|
+
)
|
89
|
+
|
90
|
+
case format
|
91
|
+
when :api
|
92
|
+
data = JSON.parse(response.body)
|
93
|
+
raise Error.new(data['message']) unless data['status'] == 'OK'
|
94
|
+
return data['data']
|
95
|
+
when :xml
|
96
|
+
REXML::Document.new(response.body)
|
97
|
+
when :json
|
98
|
+
return JSON.parse(response.body)
|
99
|
+
when :raw, :block, :response
|
100
|
+
return response
|
101
|
+
when :status
|
102
|
+
return response.code
|
103
|
+
else
|
104
|
+
return response.body
|
105
|
+
end
|
106
|
+
|
107
|
+
rescue RestClient::Exception => e
|
108
|
+
if e.http_body =~ /^\s*{\s*"status"\s*:\s*"ERROR"\s*,\s*"message"\s*:\s*"/
|
109
|
+
regex = /lib\/dataverse\/(?!.*:in\s*`.*(api_)?call'$)/
|
110
|
+
raise Error.new(JSON.parse(e.http_body)['message'],
|
111
|
+
backtrace: e.backtrace.drop_while {|x| !regex.match?(x)}
|
112
|
+
)
|
113
|
+
end
|
114
|
+
raise
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# if log = ENV['RESTCLIENT_LOG']
|
121
|
+
# RestClient.log = STDOUT if log.upcase == 'STDOUT'
|
122
|
+
# RestClient.log = STDERR if log.upcase == 'STDERR'
|
123
|
+
# RestClient.log = log
|
124
|
+
# end
|
@@ -0,0 +1,376 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Dataverse
|
6
|
+
class Dataset < Base
|
7
|
+
|
8
|
+
attr_reader :id
|
9
|
+
|
10
|
+
def self.id(id)
|
11
|
+
Dataset.new(id)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.pid(pid)
|
15
|
+
data = api_call('datasets/:persistentId', params: {'persistentId' => pid})
|
16
|
+
Dataset.new(data['id'])
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.create(data:, dataverse:)
|
20
|
+
new_dataset(dataverse, data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.import(data:, dataverse:, pid:, publish: false, ddi: false)
|
24
|
+
new_dataset(dataverse, data, import: pid, publish: publish, ddi: ddi)
|
25
|
+
end
|
26
|
+
|
27
|
+
def delete
|
28
|
+
url = raise Error.new 'Can only delete draft version' unless draft_version
|
29
|
+
versions
|
30
|
+
result = call('versions/:draft', method: :delete)
|
31
|
+
@version_data.delete(:draft)
|
32
|
+
@metadata.delete(:draft)
|
33
|
+
@files.delete(:draft)
|
34
|
+
@version_numbers&.delete(:draft)
|
35
|
+
init({}) if published_versions.empty?
|
36
|
+
result['message']
|
37
|
+
end
|
38
|
+
|
39
|
+
def submit
|
40
|
+
call('submitForReview', method: post)
|
41
|
+
end
|
42
|
+
|
43
|
+
def reject(reason)
|
44
|
+
call('returnToAuthor', method: post, body: reason)
|
45
|
+
end
|
46
|
+
|
47
|
+
def publish(major: true)
|
48
|
+
result = call('actions/:publish', method: :post,
|
49
|
+
params: {type: major ? 'major' : 'minor'}, format: :status
|
50
|
+
)
|
51
|
+
return "Dataset #{pid} published" if result == 200
|
52
|
+
return "Dataset #{pid} waiting for review" if result == 202
|
53
|
+
end
|
54
|
+
|
55
|
+
def call(url, **args)
|
56
|
+
api_call("datasets/#{id}/#{url}", **args)
|
57
|
+
end
|
58
|
+
|
59
|
+
def pid(version: :latest)
|
60
|
+
version_data(version).fetch('datasetPersistentId')
|
61
|
+
end
|
62
|
+
|
63
|
+
def size
|
64
|
+
data = call("storagesize". params: {includCached: 'true'})
|
65
|
+
data['message'][/[,\d]+/].delete(',').to_i
|
66
|
+
end
|
67
|
+
|
68
|
+
def versions
|
69
|
+
@version_numbers ||= begin
|
70
|
+
data = [:latest, :published] + [draft_version].compact + published_versions
|
71
|
+
data.delete(:published) unless published_versions.size > 0
|
72
|
+
data
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def draft_version
|
77
|
+
return :draft if @version_data.keys.include?(:draft)
|
78
|
+
end
|
79
|
+
|
80
|
+
def published_versions
|
81
|
+
@published_versions ||= call('versions').map do |x|
|
82
|
+
next unless x['versionState'] == 'RELEASED'
|
83
|
+
"#{x['versionNumber']}.#{x['versionMinorNumber']}".to_f
|
84
|
+
end.compact
|
85
|
+
end
|
86
|
+
|
87
|
+
def version(version = :latest)
|
88
|
+
resolve_version(version, raise_if_not_found: false)
|
89
|
+
end
|
90
|
+
|
91
|
+
def title(version: :latest)
|
92
|
+
metadata(version: version).fetch('title')
|
93
|
+
end
|
94
|
+
|
95
|
+
def author(version: :latest)
|
96
|
+
metadata(version: version).fetch('author').first.fetch('authorName')
|
97
|
+
end
|
98
|
+
|
99
|
+
def updated(version: :latest)
|
100
|
+
Time.parse(version_data(version).fetch('lastUpdateTime')).getlocal
|
101
|
+
end
|
102
|
+
|
103
|
+
def created(version: :latest)
|
104
|
+
Time.parse(version_data(version).fetch('createTime')).getlocal
|
105
|
+
end
|
106
|
+
|
107
|
+
def published(version: :published)
|
108
|
+
return nil unless version_data(version).has_key?('releaseTime')
|
109
|
+
Time.parse(version_data(version).fetch('releaseTime')).getlocal
|
110
|
+
end
|
111
|
+
|
112
|
+
def metadata_fields(version: :latest)
|
113
|
+
metadata(version: version)&.keys || []
|
114
|
+
end
|
115
|
+
|
116
|
+
MD_TYPES_XML=['ddi', 'oai_ddi', 'dcterms', 'oai_dc', 'Datacite', 'oai_datacite']
|
117
|
+
MD_TYPES_JSON=['schema.org', 'OAI_ORE', 'dataverse_json']
|
118
|
+
MD_TYPES=['rdm', 'raw'] + MD_TYPES_JSON + MD_TYPES_XML
|
119
|
+
|
120
|
+
def export_metadata(md_type)
|
121
|
+
return nil unless version(:published)
|
122
|
+
format = case md_type.to_s
|
123
|
+
when *MD_TYPES_XML
|
124
|
+
:xml
|
125
|
+
when *MD_TYPES_JSON
|
126
|
+
:json
|
127
|
+
when 'rdm'
|
128
|
+
return rdm_data
|
129
|
+
when 'raw'
|
130
|
+
return raw_data
|
131
|
+
else
|
132
|
+
raise Error.new("Unknown metadata format: '#{md_type}'")
|
133
|
+
end
|
134
|
+
api_call('datasets/export', params: {exporter: md_type, persistentId: pid}, format: format)
|
135
|
+
end
|
136
|
+
|
137
|
+
def rdm_data(version: :published)
|
138
|
+
return nil unless version(version)
|
139
|
+
api_data
|
140
|
+
.merge(version_data(version))
|
141
|
+
.merge('metadata' => metadata(version: version))
|
142
|
+
.merge('files' => files(version: version))
|
143
|
+
end
|
144
|
+
|
145
|
+
def raw_data(version: :latest, with_files: false)
|
146
|
+
result = api_data.dup.merge(version_data(resolve_version(version)))
|
147
|
+
result['metadataBlocks'] = call("/versions/#{version_string(version)}/metadata")
|
148
|
+
result['files'] = call("/versions/#{version_string(version)}/files") if with_files
|
149
|
+
{ 'datasetVersion' => result }
|
150
|
+
end
|
151
|
+
|
152
|
+
def metadata(version: :latest)
|
153
|
+
@metadata[resolve_version(version)] || {}
|
154
|
+
end
|
155
|
+
|
156
|
+
def files(version: :latest)
|
157
|
+
@files[resolve_version(version)] || []
|
158
|
+
end
|
159
|
+
|
160
|
+
def download_size(version: :latest)
|
161
|
+
data = call("versions/#{version_string(version)}/downloadsize")
|
162
|
+
data['message'][/[,\d]+/].delete(',').to_i
|
163
|
+
end
|
164
|
+
|
165
|
+
def download(filename = 'dataverse_files.zip', version: nil)
|
166
|
+
if version
|
167
|
+
v = version_string(version)
|
168
|
+
raise Error.new("Version '#{version}' does not exist") unless v
|
169
|
+
version = v
|
170
|
+
end
|
171
|
+
File.open(filename, 'w') do |f|
|
172
|
+
size = 0
|
173
|
+
block = proc do |response|
|
174
|
+
response.value
|
175
|
+
response.read_body do |chunk|
|
176
|
+
size += chunk.size
|
177
|
+
f.write chunk
|
178
|
+
end
|
179
|
+
rescue Net::HTTPServerException
|
180
|
+
return false
|
181
|
+
end
|
182
|
+
url = 'access/dataset/:persistentId'
|
183
|
+
url += "/versions/#{version}" if version
|
184
|
+
params = {persistentId: pid}
|
185
|
+
api_call(url, params: params, block: block)
|
186
|
+
f.close
|
187
|
+
size
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
protected
|
192
|
+
|
193
|
+
def initialize(id)
|
194
|
+
@id = id
|
195
|
+
init(get_data)
|
196
|
+
end
|
197
|
+
|
198
|
+
def init(data)
|
199
|
+
@version_data = {}
|
200
|
+
@metadata = {}
|
201
|
+
@files = {}
|
202
|
+
@version_numbers = nil
|
203
|
+
@published_versions = nil
|
204
|
+
super(process_data(data))
|
205
|
+
end
|
206
|
+
|
207
|
+
def get_data
|
208
|
+
api_call("datasets/#{id}")
|
209
|
+
end
|
210
|
+
|
211
|
+
def resolve_version(version, raise_if_not_found: true)
|
212
|
+
_version = version
|
213
|
+
|
214
|
+
version = case version
|
215
|
+
when ':draft', 'draft'
|
216
|
+
:draft
|
217
|
+
when ':latest', 'latest'
|
218
|
+
:latest
|
219
|
+
when ':published', 'published', ':latest-published', 'latest-published'
|
220
|
+
:published
|
221
|
+
when Numeric, String
|
222
|
+
version.to_f
|
223
|
+
else
|
224
|
+
version
|
225
|
+
end
|
226
|
+
|
227
|
+
case version
|
228
|
+
when :latest
|
229
|
+
version = draft_version || published_versions.max
|
230
|
+
when :published
|
231
|
+
version = published_versions.max
|
232
|
+
end
|
233
|
+
|
234
|
+
unless @version_data.keys.include?(version)
|
235
|
+
version = versions.find {|x| x == version}
|
236
|
+
raise VersionError.new(_version) if version.nil? && raise_if_not_found
|
237
|
+
return nil unless version
|
238
|
+
data = call("versions/#{version}")
|
239
|
+
process_version_data(data)
|
240
|
+
end
|
241
|
+
|
242
|
+
version
|
243
|
+
end
|
244
|
+
|
245
|
+
def version_string(version)
|
246
|
+
v = resolve_version(version)
|
247
|
+
case v
|
248
|
+
when Symbol
|
249
|
+
":#{v}"
|
250
|
+
when Numeric
|
251
|
+
v.to_s
|
252
|
+
else
|
253
|
+
v
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def version_data(version)
|
258
|
+
data = @version_data[resolve_version(version)].transform_keys {|k| k == 'id' ? 'versionId' : k}
|
259
|
+
end
|
260
|
+
|
261
|
+
private
|
262
|
+
|
263
|
+
def process_data(data)
|
264
|
+
return {} if data.nil? || data.empty?
|
265
|
+
version_data = data.delete('latestVersion')
|
266
|
+
process_version_data(version_data)
|
267
|
+
data
|
268
|
+
end
|
269
|
+
|
270
|
+
def process_version_data(data)
|
271
|
+
metadata = pack_metadata(data.delete('metadataBlocks'))
|
272
|
+
files = pack_files(data.delete('files'))
|
273
|
+
version = get_version_number(data)
|
274
|
+
store_data(version, data, metadata, files)
|
275
|
+
version
|
276
|
+
end
|
277
|
+
|
278
|
+
def get_version_number(data)
|
279
|
+
case data['versionState']
|
280
|
+
when 'DRAFT'
|
281
|
+
:draft
|
282
|
+
when 'RELEASED'
|
283
|
+
"#{data['versionNumber']}.#{data['versionMinorNumber']}".to_f
|
284
|
+
else
|
285
|
+
raise Error.new("Unsupported version state: '#{data['versionState']}")
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def store_data(version, data, metadata, files)
|
290
|
+
@version_data[version] = data.freeze
|
291
|
+
@metadata[version] = metadata.freeze
|
292
|
+
@files[version] = files.freeze
|
293
|
+
end
|
294
|
+
|
295
|
+
def pack_metadata(metadata)
|
296
|
+
data = {}
|
297
|
+
metadata.each_value do |block|
|
298
|
+
block['fields'].each do |field|
|
299
|
+
data[field['typeName']] = field_to_value(field)
|
300
|
+
end
|
301
|
+
end
|
302
|
+
data
|
303
|
+
end
|
304
|
+
|
305
|
+
def pack_files(files)
|
306
|
+
files.map do |file|
|
307
|
+
detail = file.delete('dataFile')
|
308
|
+
file.merge(detail)
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
def field_to_value(field)
|
313
|
+
case field['typeClass']
|
314
|
+
when 'primitive'
|
315
|
+
return field['value']
|
316
|
+
when 'controlledVocabulary'
|
317
|
+
return field['value']
|
318
|
+
when 'compound'
|
319
|
+
compound_to_value(field['value'])
|
320
|
+
else
|
321
|
+
raise Error.new("Unsupported typeClass: '#{field['typeClass']}'")
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def compound_to_value(data)
|
326
|
+
return data.map {|x| compound_to_value(x)} if data.is_a?(Array)
|
327
|
+
hash = {}
|
328
|
+
data.values.each do |v|
|
329
|
+
hash[v['typeName']] = field_to_value(v)
|
330
|
+
end
|
331
|
+
hash
|
332
|
+
end
|
333
|
+
|
334
|
+
def self.parse(dataverse, data, import: nil, publish: false, ddi: false)
|
335
|
+
|
336
|
+
dataverse = dataverse.id if dataverse.is_a?(Dataverse)
|
337
|
+
|
338
|
+
data = StringIO.new(data.to_json) if data.is_a?(Hash)
|
339
|
+
|
340
|
+
if data.is_a?(String)
|
341
|
+
begin
|
342
|
+
if File.exist?(data)
|
343
|
+
data = File.open(data, 'r')
|
344
|
+
elsif ddi || JSON::parse(data)
|
345
|
+
data = StringIO.new(data)
|
346
|
+
end
|
347
|
+
rescue JSON::ParserError, File
|
348
|
+
data = nil
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
unless data.is_a?(File) || data.is_a?(StringIO)
|
353
|
+
raise Error.new("Data could not be parsed. Should be a Hash, filename or JSON string.")
|
354
|
+
end
|
355
|
+
|
356
|
+
url = "dataverses/#{dataverse}/datasets"
|
357
|
+
url += '/:import' if import
|
358
|
+
|
359
|
+
params = {release: publish ? 'yes' : 'no'}
|
360
|
+
params[:pid] = import if import
|
361
|
+
|
362
|
+
headers = {content_type: :json}
|
363
|
+
headers[:content_type] = :xml if ddi
|
364
|
+
|
365
|
+
result = api_call(url, method: :post, headers: headers, body: data, params: params)
|
366
|
+
puts result
|
367
|
+
|
368
|
+
return Dataset.id(result['id'])
|
369
|
+
|
370
|
+
ensure
|
371
|
+
data.close if data.is_a?(File)
|
372
|
+
|
373
|
+
end
|
374
|
+
|
375
|
+
end
|
376
|
+
end
|