dataverse 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.env.template +3 -0
- data/.gitignore +16 -0
- data/.rspec +3 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +62 -0
- data/LICENSE.txt +21 -0
- data/README.md +598 -0
- data/Rakefile +8 -0
- data/bin/console +19 -0
- data/bin/setup +8 -0
- data/dataverse.gemspec +31 -0
- data/lib/dataverse.rb +7 -0
- data/lib/dataverse/base.rb +124 -0
- data/lib/dataverse/dataset.rb +376 -0
- data/lib/dataverse/dataverse.rb +157 -0
- data/lib/dataverse/errors.rb +27 -0
- data/lib/dataverse/version.rb +5 -0
- metadata +80 -0
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'dotenv/load'
|
5
|
+
|
6
|
+
require "bundler/setup"
|
7
|
+
require "dataverse"
|
8
|
+
|
9
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
10
|
+
# with your gem easier. You can also use a different console, if you like.
|
11
|
+
|
12
|
+
require 'awesome_print'
|
13
|
+
if ENV['IRB_CONSOLE']
|
14
|
+
require "irb"
|
15
|
+
IRB.start
|
16
|
+
else
|
17
|
+
require "pry"
|
18
|
+
Pry.start
|
19
|
+
end
|
data/bin/setup
ADDED
data/dataverse.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/dataverse/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "dataverse"
|
7
|
+
spec.version = Dataverse::VERSION
|
8
|
+
spec.authors = ["Kris Dekeyser"]
|
9
|
+
spec.email = ["kris.dekeyser@libis.be"]
|
10
|
+
|
11
|
+
spec.summary = "Dataverse API."
|
12
|
+
spec.description = "Dataverse.org API wrapper."
|
13
|
+
spec.homepage = "https://rubygems.org/gems/dataverse"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/libis/dataverse_api"
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/libis/dataverse_api/CHANGELOG.md"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_dependency "rest-client", "~> 2.0"
|
31
|
+
end
|
data/lib/dataverse.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rest-client'
|
4
|
+
require 'json'
|
5
|
+
require 'rexml/document'
|
6
|
+
|
7
|
+
require 'forwardable'
|
8
|
+
|
9
|
+
module Dataverse
|
10
|
+
class Base
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
attr_reader :api_data
|
14
|
+
|
15
|
+
def_delegators :@api_data, :[], :fetch, :keys, :dig
|
16
|
+
|
17
|
+
def refresh
|
18
|
+
init(get_data)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
def init(data)
|
24
|
+
@api_data = data
|
25
|
+
@api_data.freeze
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_data
|
29
|
+
@api_data
|
30
|
+
end
|
31
|
+
|
32
|
+
public
|
33
|
+
|
34
|
+
def ==(other)
|
35
|
+
self.api_data == other.api_data
|
36
|
+
end
|
37
|
+
|
38
|
+
def eql?(other)
|
39
|
+
self == other
|
40
|
+
end
|
41
|
+
|
42
|
+
def hash
|
43
|
+
api_data.hash
|
44
|
+
end
|
45
|
+
|
46
|
+
protected
|
47
|
+
|
48
|
+
def api_call(url, **args)
|
49
|
+
self.class.api_call(url, **args)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.api_call(url, method: :get, headers: {}, params: {}, body: nil, format: :api, block: nil, options: {})
|
53
|
+
|
54
|
+
unless ENV.has_key?('API_URL') && ENV.has_key?('API_TOKEN')
|
55
|
+
raise Error.new("Set environment variables 'API_URL' and 'API_TOKEN'")
|
56
|
+
end
|
57
|
+
|
58
|
+
url = ENV['API_URL'].chomp('/') + '/' + url.sub(/^\//, '')
|
59
|
+
|
60
|
+
headers['X-Dataverse-key'] = ENV['API_TOKEN']
|
61
|
+
headers[:params] = params unless params.empty?
|
62
|
+
|
63
|
+
format = :block if block
|
64
|
+
|
65
|
+
case format
|
66
|
+
when :xml
|
67
|
+
headers[:accept] = :xml
|
68
|
+
headers[:content_type] ||= :xml
|
69
|
+
when :api, :json
|
70
|
+
headers[:accept] = :json
|
71
|
+
headers[:content_type] ||= :json
|
72
|
+
when :raw
|
73
|
+
options[:raw_response] = true
|
74
|
+
when :block
|
75
|
+
options[:block_response] = block
|
76
|
+
end
|
77
|
+
|
78
|
+
body = body.to_json if body.is_a?(Hash) && headers[:content_type] == :json
|
79
|
+
body = body.write if body.is_a?(REXML::Document) && headers[:content_type] == :xml
|
80
|
+
|
81
|
+
response = RestClient::Request.execute(
|
82
|
+
method: method,
|
83
|
+
url: url,
|
84
|
+
headers: headers,
|
85
|
+
payload: body,
|
86
|
+
# log: STDOUT,
|
87
|
+
**options
|
88
|
+
)
|
89
|
+
|
90
|
+
case format
|
91
|
+
when :api
|
92
|
+
data = JSON.parse(response.body)
|
93
|
+
raise Error.new(data['message']) unless data['status'] == 'OK'
|
94
|
+
return data['data']
|
95
|
+
when :xml
|
96
|
+
REXML::Document.new(response.body)
|
97
|
+
when :json
|
98
|
+
return JSON.parse(response.body)
|
99
|
+
when :raw, :block, :response
|
100
|
+
return response
|
101
|
+
when :status
|
102
|
+
return response.code
|
103
|
+
else
|
104
|
+
return response.body
|
105
|
+
end
|
106
|
+
|
107
|
+
rescue RestClient::Exception => e
|
108
|
+
if e.http_body =~ /^\s*{\s*"status"\s*:\s*"ERROR"\s*,\s*"message"\s*:\s*"/
|
109
|
+
regex = /lib\/dataverse\/(?!.*:in\s*`.*(api_)?call'$)/
|
110
|
+
raise Error.new(JSON.parse(e.http_body)['message'],
|
111
|
+
backtrace: e.backtrace.drop_while {|x| !regex.match?(x)}
|
112
|
+
)
|
113
|
+
end
|
114
|
+
raise
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# if log = ENV['RESTCLIENT_LOG']
|
121
|
+
# RestClient.log = STDOUT if log.upcase == 'STDOUT'
|
122
|
+
# RestClient.log = STDERR if log.upcase == 'STDERR'
|
123
|
+
# RestClient.log = log
|
124
|
+
# end
|
@@ -0,0 +1,376 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Dataverse
|
6
|
+
class Dataset < Base
|
7
|
+
|
8
|
+
attr_reader :id
|
9
|
+
|
10
|
+
def self.id(id)
|
11
|
+
Dataset.new(id)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.pid(pid)
|
15
|
+
data = api_call('datasets/:persistentId', params: {'persistentId' => pid})
|
16
|
+
Dataset.new(data['id'])
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.create(data:, dataverse:)
|
20
|
+
new_dataset(dataverse, data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.import(data:, dataverse:, pid:, publish: false, ddi: false)
|
24
|
+
new_dataset(dataverse, data, import: pid, publish: publish, ddi: ddi)
|
25
|
+
end
|
26
|
+
|
27
|
+
def delete
|
28
|
+
url = raise Error.new 'Can only delete draft version' unless draft_version
|
29
|
+
versions
|
30
|
+
result = call('versions/:draft', method: :delete)
|
31
|
+
@version_data.delete(:draft)
|
32
|
+
@metadata.delete(:draft)
|
33
|
+
@files.delete(:draft)
|
34
|
+
@version_numbers&.delete(:draft)
|
35
|
+
init({}) if published_versions.empty?
|
36
|
+
result['message']
|
37
|
+
end
|
38
|
+
|
39
|
+
def submit
|
40
|
+
call('submitForReview', method: post)
|
41
|
+
end
|
42
|
+
|
43
|
+
def reject(reason)
|
44
|
+
call('returnToAuthor', method: post, body: reason)
|
45
|
+
end
|
46
|
+
|
47
|
+
def publish(major: true)
|
48
|
+
result = call('actions/:publish', method: :post,
|
49
|
+
params: {type: major ? 'major' : 'minor'}, format: :status
|
50
|
+
)
|
51
|
+
return "Dataset #{pid} published" if result == 200
|
52
|
+
return "Dataset #{pid} waiting for review" if result == 202
|
53
|
+
end
|
54
|
+
|
55
|
+
def call(url, **args)
|
56
|
+
api_call("datasets/#{id}/#{url}", **args)
|
57
|
+
end
|
58
|
+
|
59
|
+
def pid(version: :latest)
|
60
|
+
version_data(version).fetch('datasetPersistentId')
|
61
|
+
end
|
62
|
+
|
63
|
+
def size
|
64
|
+
data = call("storagesize". params: {includCached: 'true'})
|
65
|
+
data['message'][/[,\d]+/].delete(',').to_i
|
66
|
+
end
|
67
|
+
|
68
|
+
def versions
|
69
|
+
@version_numbers ||= begin
|
70
|
+
data = [:latest, :published] + [draft_version].compact + published_versions
|
71
|
+
data.delete(:published) unless published_versions.size > 0
|
72
|
+
data
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def draft_version
|
77
|
+
return :draft if @version_data.keys.include?(:draft)
|
78
|
+
end
|
79
|
+
|
80
|
+
def published_versions
|
81
|
+
@published_versions ||= call('versions').map do |x|
|
82
|
+
next unless x['versionState'] == 'RELEASED'
|
83
|
+
"#{x['versionNumber']}.#{x['versionMinorNumber']}".to_f
|
84
|
+
end.compact
|
85
|
+
end
|
86
|
+
|
87
|
+
def version(version = :latest)
|
88
|
+
resolve_version(version, raise_if_not_found: false)
|
89
|
+
end
|
90
|
+
|
91
|
+
def title(version: :latest)
|
92
|
+
metadata(version: version).fetch('title')
|
93
|
+
end
|
94
|
+
|
95
|
+
def author(version: :latest)
|
96
|
+
metadata(version: version).fetch('author').first.fetch('authorName')
|
97
|
+
end
|
98
|
+
|
99
|
+
def updated(version: :latest)
|
100
|
+
Time.parse(version_data(version).fetch('lastUpdateTime')).getlocal
|
101
|
+
end
|
102
|
+
|
103
|
+
def created(version: :latest)
|
104
|
+
Time.parse(version_data(version).fetch('createTime')).getlocal
|
105
|
+
end
|
106
|
+
|
107
|
+
def published(version: :published)
|
108
|
+
return nil unless version_data(version).has_key?('releaseTime')
|
109
|
+
Time.parse(version_data(version).fetch('releaseTime')).getlocal
|
110
|
+
end
|
111
|
+
|
112
|
+
def metadata_fields(version: :latest)
|
113
|
+
metadata(version: version)&.keys || []
|
114
|
+
end
|
115
|
+
|
116
|
+
MD_TYPES_XML=['ddi', 'oai_ddi', 'dcterms', 'oai_dc', 'Datacite', 'oai_datacite']
|
117
|
+
MD_TYPES_JSON=['schema.org', 'OAI_ORE', 'dataverse_json']
|
118
|
+
MD_TYPES=['rdm', 'raw'] + MD_TYPES_JSON + MD_TYPES_XML
|
119
|
+
|
120
|
+
def export_metadata(md_type)
|
121
|
+
return nil unless version(:published)
|
122
|
+
format = case md_type.to_s
|
123
|
+
when *MD_TYPES_XML
|
124
|
+
:xml
|
125
|
+
when *MD_TYPES_JSON
|
126
|
+
:json
|
127
|
+
when 'rdm'
|
128
|
+
return rdm_data
|
129
|
+
when 'raw'
|
130
|
+
return raw_data
|
131
|
+
else
|
132
|
+
raise Error.new("Unknown metadata format: '#{md_type}'")
|
133
|
+
end
|
134
|
+
api_call('datasets/export', params: {exporter: md_type, persistentId: pid}, format: format)
|
135
|
+
end
|
136
|
+
|
137
|
+
def rdm_data(version: :published)
|
138
|
+
return nil unless version(version)
|
139
|
+
api_data
|
140
|
+
.merge(version_data(version))
|
141
|
+
.merge('metadata' => metadata(version: version))
|
142
|
+
.merge('files' => files(version: version))
|
143
|
+
end
|
144
|
+
|
145
|
+
def raw_data(version: :latest, with_files: false)
|
146
|
+
result = api_data.dup.merge(version_data(resolve_version(version)))
|
147
|
+
result['metadataBlocks'] = call("/versions/#{version_string(version)}/metadata")
|
148
|
+
result['files'] = call("/versions/#{version_string(version)}/files") if with_files
|
149
|
+
{ 'datasetVersion' => result }
|
150
|
+
end
|
151
|
+
|
152
|
+
def metadata(version: :latest)
|
153
|
+
@metadata[resolve_version(version)] || {}
|
154
|
+
end
|
155
|
+
|
156
|
+
def files(version: :latest)
|
157
|
+
@files[resolve_version(version)] || []
|
158
|
+
end
|
159
|
+
|
160
|
+
def download_size(version: :latest)
|
161
|
+
data = call("versions/#{version_string(version)}/downloadsize")
|
162
|
+
data['message'][/[,\d]+/].delete(',').to_i
|
163
|
+
end
|
164
|
+
|
165
|
+
def download(filename = 'dataverse_files.zip', version: nil)
|
166
|
+
if version
|
167
|
+
v = version_string(version)
|
168
|
+
raise Error.new("Version '#{version}' does not exist") unless v
|
169
|
+
version = v
|
170
|
+
end
|
171
|
+
File.open(filename, 'w') do |f|
|
172
|
+
size = 0
|
173
|
+
block = proc do |response|
|
174
|
+
response.value
|
175
|
+
response.read_body do |chunk|
|
176
|
+
size += chunk.size
|
177
|
+
f.write chunk
|
178
|
+
end
|
179
|
+
rescue Net::HTTPServerException
|
180
|
+
return false
|
181
|
+
end
|
182
|
+
url = 'access/dataset/:persistentId'
|
183
|
+
url += "/versions/#{version}" if version
|
184
|
+
params = {persistentId: pid}
|
185
|
+
api_call(url, params: params, block: block)
|
186
|
+
f.close
|
187
|
+
size
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
protected
|
192
|
+
|
193
|
+
def initialize(id)
|
194
|
+
@id = id
|
195
|
+
init(get_data)
|
196
|
+
end
|
197
|
+
|
198
|
+
def init(data)
|
199
|
+
@version_data = {}
|
200
|
+
@metadata = {}
|
201
|
+
@files = {}
|
202
|
+
@version_numbers = nil
|
203
|
+
@published_versions = nil
|
204
|
+
super(process_data(data))
|
205
|
+
end
|
206
|
+
|
207
|
+
def get_data
|
208
|
+
api_call("datasets/#{id}")
|
209
|
+
end
|
210
|
+
|
211
|
+
def resolve_version(version, raise_if_not_found: true)
|
212
|
+
_version = version
|
213
|
+
|
214
|
+
version = case version
|
215
|
+
when ':draft', 'draft'
|
216
|
+
:draft
|
217
|
+
when ':latest', 'latest'
|
218
|
+
:latest
|
219
|
+
when ':published', 'published', ':latest-published', 'latest-published'
|
220
|
+
:published
|
221
|
+
when Numeric, String
|
222
|
+
version.to_f
|
223
|
+
else
|
224
|
+
version
|
225
|
+
end
|
226
|
+
|
227
|
+
case version
|
228
|
+
when :latest
|
229
|
+
version = draft_version || published_versions.max
|
230
|
+
when :published
|
231
|
+
version = published_versions.max
|
232
|
+
end
|
233
|
+
|
234
|
+
unless @version_data.keys.include?(version)
|
235
|
+
version = versions.find {|x| x == version}
|
236
|
+
raise VersionError.new(_version) if version.nil? && raise_if_not_found
|
237
|
+
return nil unless version
|
238
|
+
data = call("versions/#{version}")
|
239
|
+
process_version_data(data)
|
240
|
+
end
|
241
|
+
|
242
|
+
version
|
243
|
+
end
|
244
|
+
|
245
|
+
def version_string(version)
|
246
|
+
v = resolve_version(version)
|
247
|
+
case v
|
248
|
+
when Symbol
|
249
|
+
":#{v}"
|
250
|
+
when Numeric
|
251
|
+
v.to_s
|
252
|
+
else
|
253
|
+
v
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def version_data(version)
|
258
|
+
data = @version_data[resolve_version(version)].transform_keys {|k| k == 'id' ? 'versionId' : k}
|
259
|
+
end
|
260
|
+
|
261
|
+
private
|
262
|
+
|
263
|
+
def process_data(data)
|
264
|
+
return {} if data.nil? || data.empty?
|
265
|
+
version_data = data.delete('latestVersion')
|
266
|
+
process_version_data(version_data)
|
267
|
+
data
|
268
|
+
end
|
269
|
+
|
270
|
+
def process_version_data(data)
|
271
|
+
metadata = pack_metadata(data.delete('metadataBlocks'))
|
272
|
+
files = pack_files(data.delete('files'))
|
273
|
+
version = get_version_number(data)
|
274
|
+
store_data(version, data, metadata, files)
|
275
|
+
version
|
276
|
+
end
|
277
|
+
|
278
|
+
def get_version_number(data)
|
279
|
+
case data['versionState']
|
280
|
+
when 'DRAFT'
|
281
|
+
:draft
|
282
|
+
when 'RELEASED'
|
283
|
+
"#{data['versionNumber']}.#{data['versionMinorNumber']}".to_f
|
284
|
+
else
|
285
|
+
raise Error.new("Unsupported version state: '#{data['versionState']}")
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def store_data(version, data, metadata, files)
|
290
|
+
@version_data[version] = data.freeze
|
291
|
+
@metadata[version] = metadata.freeze
|
292
|
+
@files[version] = files.freeze
|
293
|
+
end
|
294
|
+
|
295
|
+
def pack_metadata(metadata)
|
296
|
+
data = {}
|
297
|
+
metadata.each_value do |block|
|
298
|
+
block['fields'].each do |field|
|
299
|
+
data[field['typeName']] = field_to_value(field)
|
300
|
+
end
|
301
|
+
end
|
302
|
+
data
|
303
|
+
end
|
304
|
+
|
305
|
+
def pack_files(files)
|
306
|
+
files.map do |file|
|
307
|
+
detail = file.delete('dataFile')
|
308
|
+
file.merge(detail)
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
def field_to_value(field)
|
313
|
+
case field['typeClass']
|
314
|
+
when 'primitive'
|
315
|
+
return field['value']
|
316
|
+
when 'controlledVocabulary'
|
317
|
+
return field['value']
|
318
|
+
when 'compound'
|
319
|
+
compound_to_value(field['value'])
|
320
|
+
else
|
321
|
+
raise Error.new("Unsupported typeClass: '#{field['typeClass']}'")
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def compound_to_value(data)
|
326
|
+
return data.map {|x| compound_to_value(x)} if data.is_a?(Array)
|
327
|
+
hash = {}
|
328
|
+
data.values.each do |v|
|
329
|
+
hash[v['typeName']] = field_to_value(v)
|
330
|
+
end
|
331
|
+
hash
|
332
|
+
end
|
333
|
+
|
334
|
+
def self.parse(dataverse, data, import: nil, publish: false, ddi: false)
|
335
|
+
|
336
|
+
dataverse = dataverse.id if dataverse.is_a?(Dataverse)
|
337
|
+
|
338
|
+
data = StringIO.new(data.to_json) if data.is_a?(Hash)
|
339
|
+
|
340
|
+
if data.is_a?(String)
|
341
|
+
begin
|
342
|
+
if File.exist?(data)
|
343
|
+
data = File.open(data, 'r')
|
344
|
+
elsif ddi || JSON::parse(data)
|
345
|
+
data = StringIO.new(data)
|
346
|
+
end
|
347
|
+
rescue JSON::ParserError, File
|
348
|
+
data = nil
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
unless data.is_a?(File) || data.is_a?(StringIO)
|
353
|
+
raise Error.new("Data could not be parsed. Should be a Hash, filename or JSON string.")
|
354
|
+
end
|
355
|
+
|
356
|
+
url = "dataverses/#{dataverse}/datasets"
|
357
|
+
url += '/:import' if import
|
358
|
+
|
359
|
+
params = {release: publish ? 'yes' : 'no'}
|
360
|
+
params[:pid] = import if import
|
361
|
+
|
362
|
+
headers = {content_type: :json}
|
363
|
+
headers[:content_type] = :xml if ddi
|
364
|
+
|
365
|
+
result = api_call(url, method: :post, headers: headers, body: data, params: params)
|
366
|
+
puts result
|
367
|
+
|
368
|
+
return Dataset.id(result['id'])
|
369
|
+
|
370
|
+
ensure
|
371
|
+
data.close if data.is_a?(File)
|
372
|
+
|
373
|
+
end
|
374
|
+
|
375
|
+
end
|
376
|
+
end
|