dryad2dataverse 0.7.11a0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dryad2dataverse/__init__.py +14 -12
- dryad2dataverse/auth.py +94 -0
- dryad2dataverse/config.py +180 -0
- dryad2dataverse/data/dryad2dataverse_config.yml +126 -0
- dryad2dataverse/handlers.py +6 -2
- dryad2dataverse/monitor.py +146 -140
- dryad2dataverse/scripts/dryadd.py +210 -293
- dryad2dataverse/serializer.py +129 -140
- dryad2dataverse/transfer.py +296 -396
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.0.dist-info}/METADATA +4 -3
- dryad2dataverse-0.8.0.dist-info/RECORD +14 -0
- dryad2dataverse/constants.py +0 -45
- dryad2dataverse-0.7.11a0.dist-info/RECORD +0 -12
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.0.dist-info}/WHEEL +0 -0
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.0.dist-info}/entry_points.txt +0 -0
dryad2dataverse/transfer.py
CHANGED
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
This module handles data downloads and uploads from a Dryad instance to a Dataverse instance
|
|
3
3
|
'''
|
|
4
4
|
|
|
5
|
-
#
|
|
5
|
+
#pylint: disable=invalid-name #Maybe one day
|
|
6
6
|
import hashlib
|
|
7
7
|
import io
|
|
8
8
|
import json
|
|
9
9
|
import logging
|
|
10
|
+
import pathlib
|
|
10
11
|
import os
|
|
11
12
|
import time
|
|
12
13
|
import traceback
|
|
@@ -17,11 +18,10 @@ import requests
|
|
|
17
18
|
from requests.adapters import HTTPAdapter
|
|
18
19
|
from requests_toolbelt.multipart.encoder import MultipartEncoder
|
|
19
20
|
|
|
20
|
-
from dryad2dataverse import
|
|
21
|
+
from dryad2dataverse import config
|
|
21
22
|
from dryad2dataverse import exceptions
|
|
22
23
|
from dryad2dataverse import USERAGENT
|
|
23
24
|
|
|
24
|
-
USER_AGENT = {'User-agent': USERAGENT}
|
|
25
25
|
LOGGER = logging.getLogger(__name__)
|
|
26
26
|
URL_LOGGER = logging.getLogger('urllib3')
|
|
27
27
|
|
|
@@ -39,15 +39,37 @@ class Transfer():
|
|
|
39
39
|
Transfers metadata and data files from a
|
|
40
40
|
Dryad installation to Dataverse installation.
|
|
41
41
|
'''
|
|
42
|
-
|
|
42
|
+
#pylint: disable=too-many-instance-attributes
|
|
43
|
+
def __init__(self, dryad, **kwargs):
|
|
43
44
|
'''
|
|
44
45
|
Creates a dryad2dataverse.transfer.Transfer instance.
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
dryad : dryad2dataverse.serializer.Serializer
|
|
50
|
+
|
|
51
|
+
**kwargs
|
|
52
|
+
Normally this would be a dryad2dataverse.constants.Config instance
|
|
53
|
+
|
|
54
|
+
Notes
|
|
55
|
+
-----
|
|
56
|
+
Minimum kwargs for function:
|
|
57
|
+
max_upload : int
|
|
58
|
+
Maximum size in bytes
|
|
59
|
+
tempfile_location : str
|
|
60
|
+
Path to temporary directory
|
|
61
|
+
dv_url : str
|
|
62
|
+
Base URL of dataverse instance
|
|
63
|
+
api_key : str
|
|
64
|
+
API key for Dataverse user
|
|
65
|
+
dv_contact_email : str
|
|
66
|
+
Contact email address for Dataverse record
|
|
67
|
+
dv_contact_name : str
|
|
68
|
+
Contact name
|
|
69
|
+
target : str
|
|
70
|
+
Target collection short name
|
|
71
|
+
'''
|
|
72
|
+
self.kwargs = kwargs
|
|
51
73
|
self.dryad = dryad
|
|
52
74
|
self._fileJson = None
|
|
53
75
|
self._files = [list(f) for f in self.dryad.files]
|
|
@@ -57,39 +79,46 @@ class Transfer():
|
|
|
57
79
|
self.dvStudy = None
|
|
58
80
|
self.jsonFlag = None #Whether or not new json uploaded
|
|
59
81
|
self.session = requests.Session()
|
|
60
|
-
self.session.mount('https://', HTTPAdapter(max_retries=
|
|
82
|
+
self.session.mount('https://', HTTPAdapter(max_retries=config.RETRY_STRATEGY))
|
|
83
|
+
self.check_kwargs()
|
|
84
|
+
|
|
85
|
+
def check_kwargs(self):
|
|
86
|
+
'''
|
|
87
|
+
Verify sufficient information
|
|
88
|
+
'''
|
|
89
|
+
required = ['max_upload',
|
|
90
|
+
'tempfile_location',
|
|
91
|
+
'dv_url',
|
|
92
|
+
'api_key',
|
|
93
|
+
'dv_contact_email',
|
|
94
|
+
'dv_contact_name',
|
|
95
|
+
'target']
|
|
96
|
+
keys = self.kwargs.keys()
|
|
97
|
+
for val in required:
|
|
98
|
+
if val not in keys:
|
|
99
|
+
try:
|
|
100
|
+
raise exceptions.Dryad2DataverseError(f'Required parameter missing: {val}')
|
|
101
|
+
except exceptions.Dryad2DataverseError as err:
|
|
102
|
+
LOGGER.exception(err)
|
|
103
|
+
raise
|
|
61
104
|
|
|
62
|
-
def _del__(self):
|
|
63
|
-
'''Expunges files from
|
|
105
|
+
def _del__(self):
|
|
106
|
+
'''Expunges files from temporary file on deletion'''
|
|
107
|
+
tmp = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
|
|
64
108
|
for f in self.files:
|
|
65
|
-
if
|
|
66
|
-
os.remove(
|
|
109
|
+
if pathlib.Path(tmp, f[1]).exists():
|
|
110
|
+
os.remove(pathlib.Path(tmp, f[1]))
|
|
67
111
|
|
|
68
|
-
def test_api_key(self
|
|
112
|
+
def test_api_key(self):
|
|
69
113
|
'''
|
|
70
114
|
Tests for an expired API key and raises
|
|
71
115
|
dryad2dataverse.exceptions.Dryad2dataverseBadApiKeyError
|
|
72
116
|
the API key is bad. Ignores other HTTP errors.
|
|
73
|
-
|
|
74
|
-
----------------------------------------
|
|
75
|
-
Parameters:
|
|
76
|
-
|
|
77
|
-
url : str
|
|
78
|
-
— Base URL to Dataverse installation.
|
|
79
|
-
Defaults to dryad2dataverse.constants.DVURL
|
|
80
|
-
|
|
81
|
-
apikey : str
|
|
82
|
-
— Default dryad2dataverse.constants.APIKEY.
|
|
83
|
-
|
|
84
|
-
----------------------------------------
|
|
85
117
|
'''
|
|
86
118
|
#API validity check appears to come before a PID validity check
|
|
87
119
|
params = {'persistentId': 'doi:000/000/000'} # PID is irrelevant
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
headers = {'X-Dataverse-key': apikey if apikey else constants.APIKEY}
|
|
91
|
-
headers.update(USER_AGENT)
|
|
92
|
-
bad_test = self.session.get(f'{url}/api/datasets/:persistentId',
|
|
120
|
+
headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
|
|
121
|
+
bad_test = self.session.get(f'{self.kwargs["dv_url"]}/api/datasets/:persistentId',
|
|
93
122
|
headers=headers,
|
|
94
123
|
params=params)
|
|
95
124
|
#There's an extra space in the message which Harvard
|
|
@@ -97,10 +126,8 @@ class Transfer():
|
|
|
97
126
|
if bad_test.json().get('message').startswith('Bad api key'):
|
|
98
127
|
try:
|
|
99
128
|
raise exceptions.DataverseBadApiKeyError('Bad API key')
|
|
100
|
-
except exceptions.DataverseBadApiKeyError as
|
|
101
|
-
LOGGER.
|
|
102
|
-
LOGGER.exception(e)
|
|
103
|
-
#LOGGER.exception(traceback.format_exc()) #not really necessary
|
|
129
|
+
except exceptions.DataverseBadApiKeyError as err:
|
|
130
|
+
LOGGER.exception(err)
|
|
104
131
|
raise
|
|
105
132
|
try: #other errors
|
|
106
133
|
bad_test.raise_for_status()
|
|
@@ -124,7 +151,7 @@ class Transfer():
|
|
|
124
151
|
Returns datavese authentication header dict.
|
|
125
152
|
ie: `{X-Dataverse-key' : 'APIKEYSTRING'}`
|
|
126
153
|
'''
|
|
127
|
-
return {'X-Dataverse-key' :
|
|
154
|
+
return {'X-Dataverse-key' : self.kwargs['api_key']}
|
|
128
155
|
|
|
129
156
|
@property
|
|
130
157
|
def fileJson(self):
|
|
@@ -162,16 +189,15 @@ class Transfer():
|
|
|
162
189
|
return self.dryad.doi
|
|
163
190
|
|
|
164
191
|
@staticmethod
|
|
165
|
-
def _dryad_file_id(url):
|
|
192
|
+
def _dryad_file_id(url:str):
|
|
166
193
|
'''
|
|
167
194
|
Returns Dryad fileID from dryad file download URL as integer.
|
|
168
|
-
----------------------------------------
|
|
169
|
-
Parameters:
|
|
170
195
|
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
171
198
|
url : str
|
|
172
|
-
|
|
199
|
+
Dryad file URL in format
|
|
173
200
|
'https://datadryad.org/api/v2/files/385820/download'.
|
|
174
|
-
----------------------------------------
|
|
175
201
|
'''
|
|
176
202
|
fid = url.strip('/download')
|
|
177
203
|
fid = int(fid[fid.rfind('/')+1:])
|
|
@@ -182,67 +208,52 @@ class Transfer():
|
|
|
182
208
|
'''
|
|
183
209
|
Returns Dataverse authentication header as dict.
|
|
184
210
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
188
213
|
apikey : str
|
|
189
|
-
|
|
190
|
-
----------------------------------------
|
|
214
|
+
Dataverse API key.
|
|
191
215
|
'''
|
|
192
216
|
return {'X-Dataverse-key' : apikey}
|
|
193
217
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
d_type='distributionDate',
|
|
197
|
-
apikey=None):
|
|
218
|
+
def set_correct_date(self, hdl=None,
|
|
219
|
+
d_type='distributionDate'):
|
|
198
220
|
'''
|
|
199
221
|
Sets "correct" publication date for Dataverse.
|
|
200
222
|
|
|
201
|
-
|
|
223
|
+
Parameters
|
|
224
|
+
----------
|
|
225
|
+
hdl : str
|
|
226
|
+
Persistent indentifier for Dataverse study.
|
|
227
|
+
Defaults to Transfer.dvpid (which can be None if the
|
|
228
|
+
study has not yet been uploaded).
|
|
229
|
+
d_type : str
|
|
230
|
+
Date type. One of 'distributionDate', 'productionDate',
|
|
231
|
+
`dateOfDeposit'. Default 'distributionDate'.
|
|
232
|
+
|
|
233
|
+
Notes
|
|
234
|
+
-----
|
|
235
|
+
self.kwargs are normally read from dryad2dataverse.config.Config
|
|
236
|
+
instances.
|
|
237
|
+
|
|
238
|
+
dryad2dataverse.serializer maps Dryad 'publicationDate'
|
|
202
239
|
to Dataverse 'distributionDate' (see serializer.py ~line 675).
|
|
203
240
|
|
|
204
241
|
Dataverse citation date default is ":publicationDate". See
|
|
205
242
|
Dataverse API reference:
|
|
206
|
-
https://guides.dataverse.org/en/4.20/api/native-api.html#id54
|
|
207
|
-
|
|
208
|
-
----------------------------------------
|
|
209
|
-
Parameters:
|
|
210
|
-
|
|
211
|
-
url : str
|
|
212
|
-
— Base URL to Dataverse installation.
|
|
213
|
-
Defaults to dryad2dataverse.constants.DVURL
|
|
214
|
-
|
|
215
|
-
hdl : str
|
|
216
|
-
— Persistent indentifier for Dataverse study.
|
|
217
|
-
Defaults to Transfer.dvpid (which can be None if the
|
|
218
|
-
study has not yet been uploaded).
|
|
243
|
+
<https://guides.dataverse.org/en/4.20/api/native-api.html#id54>.
|
|
219
244
|
|
|
220
|
-
d_type : str
|
|
221
|
-
— Date type. One of 'distributionDate', 'productionDate',
|
|
222
|
-
'dateOfDeposit'. Default 'distributionDate'.
|
|
223
|
-
|
|
224
|
-
apikey : str
|
|
225
|
-
— Default dryad2dataverse.constants.APIKEY.
|
|
226
|
-
----------------------------------------
|
|
227
245
|
'''
|
|
228
246
|
try:
|
|
229
|
-
if not url:
|
|
230
|
-
url = constants.DVURL
|
|
231
247
|
if not hdl:
|
|
232
248
|
hdl = self.dvpid
|
|
233
|
-
headers =
|
|
234
|
-
|
|
235
|
-
headers = {'X-Dataverse-key' : apikey}
|
|
236
|
-
else:
|
|
237
|
-
headers = {'X-Dataverse-key' : constants.APIKEY}
|
|
238
|
-
|
|
239
|
-
headers.update(USER_AGENT)
|
|
249
|
+
headers ={'X-Dataverse-key': self.kwargs['api_key'],
|
|
250
|
+
'User-agent': USERAGENT}
|
|
240
251
|
params = {'persistentId': hdl}
|
|
241
|
-
set_date = self.session.put(f'{
|
|
252
|
+
set_date = self.session.put(f'{self.kwargs["dv_url"]}/api/'
|
|
253
|
+
'datasets/:persistentId/citationdate',
|
|
242
254
|
headers=headers,
|
|
243
255
|
data=d_type,
|
|
244
|
-
params=params
|
|
245
|
-
timeout=45)
|
|
256
|
+
params=params)
|
|
246
257
|
set_date.raise_for_status()
|
|
247
258
|
|
|
248
259
|
except (requests.exceptions.HTTPError,
|
|
@@ -252,78 +263,59 @@ class Transfer():
|
|
|
252
263
|
LOGGER.warning(err)
|
|
253
264
|
LOGGER.warning(set_date.text)
|
|
254
265
|
|
|
255
|
-
def upload_study(self,
|
|
266
|
+
def upload_study(self, **kwargs):
|
|
256
267
|
'''
|
|
257
268
|
Uploads Dryad study metadata to target Dataverse or updates existing.
|
|
258
269
|
Supplying a `targetDv` kwarg creates a new study and supplying a
|
|
259
270
|
`dvpid` kwarg updates a currently existing Dataverse study.
|
|
260
271
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
url : str
|
|
265
|
-
— URL of Dataverse instance. Defaults to constants.DVURL.
|
|
266
|
-
|
|
267
|
-
apikey : str
|
|
268
|
-
— API key of user. Defaults to contants.APIKEY.
|
|
269
|
-
|
|
270
|
-
timeout : int
|
|
271
|
-
— timeout on POST request.
|
|
272
|
-
|
|
273
|
-
**KEYWORD ARGUMENTS**
|
|
274
|
-
|
|
275
|
-
One of these is required. Supplying both or neither raises a NoTargetError
|
|
272
|
+
**kwargs : dict
|
|
273
|
+
Normally this is one of the two parameters below
|
|
276
274
|
|
|
275
|
+
Other parameters
|
|
276
|
+
----------------
|
|
277
277
|
targetDv : str
|
|
278
|
-
|
|
278
|
+
Short name of target dataverse. Required if new dataset.
|
|
279
279
|
Specify as targetDV=value.
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
— Dataverse persistent ID (for updating metadata).
|
|
280
|
+
dvpid : str
|
|
281
|
+
Dataverse persistent ID (for updating metadata).
|
|
283
282
|
This is not required for new uploads, specify as dvpid=value
|
|
284
283
|
|
|
285
|
-
|
|
284
|
+
Notes
|
|
285
|
+
-----
|
|
286
|
+
One of targetDv or dvpid is required.
|
|
286
287
|
'''
|
|
287
|
-
|
|
288
|
-
url = constants.DVURL
|
|
289
|
-
if not apikey:
|
|
290
|
-
apikey = constants.APIKEY
|
|
291
|
-
headers = {'X-Dataverse-key' : apikey}
|
|
292
|
-
headers.update(USER_AGENT)
|
|
288
|
+
headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
|
|
293
289
|
targetDv = kwargs.get('targetDv')
|
|
294
290
|
dvpid = kwargs.get('dvpid')
|
|
295
291
|
#dryFid = kwargs.get('dryFid') #Why did I put this here?
|
|
296
292
|
if not targetDv and not dvpid:
|
|
297
293
|
try:
|
|
298
294
|
raise exceptions.NoTargetError('You must supply one of targetDv \
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
except exceptions.NoTargetError as
|
|
302
|
-
LOGGER.
|
|
303
|
-
LOGGER.exception(e)
|
|
295
|
+
(target dataverse) \
|
|
296
|
+
or dvpid (Dataverse persistent ID)')
|
|
297
|
+
except exceptions.NoTargetError as err:
|
|
298
|
+
LOGGER.exception(err)
|
|
304
299
|
raise
|
|
305
300
|
|
|
306
301
|
if targetDv and dvpid:
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
raise
|
|
302
|
+
msg = 'Supply only one of targetDv or dvpid'
|
|
303
|
+
LOGGER.exception(msg)
|
|
304
|
+
raise exceptions.Dryad2DataverseError(msg)
|
|
305
|
+
|
|
312
306
|
if not dvpid:
|
|
313
|
-
endpoint = f'{
|
|
307
|
+
endpoint = f'{self.kwargs["dv_url"]}/api/dataverses/{targetDv}/datasets'
|
|
314
308
|
upload = self.session.post(endpoint,
|
|
315
309
|
headers=headers,
|
|
316
|
-
json=self.dryad.dvJson
|
|
317
|
-
timeout=timeout)
|
|
310
|
+
json=self.dryad.dvJson)
|
|
318
311
|
LOGGER.debug(upload.text)
|
|
319
312
|
else:
|
|
320
|
-
endpoint = f'{
|
|
313
|
+
endpoint = f'{self.kwargs["dv_url"]}/api/datasets/:persistentId/versions/:draft'
|
|
321
314
|
params = {'persistentId':dvpid}
|
|
322
315
|
#Yes, dataverse uses *different* json for edits
|
|
323
316
|
upload = self.session.put(endpoint, params=params,
|
|
324
317
|
headers=headers,
|
|
325
|
-
json=self.dryad.dvJson['datasetVersion']
|
|
326
|
-
timeout=timeout)
|
|
318
|
+
json=self.dryad.dvJson['datasetVersion'])
|
|
327
319
|
#self._dvrecord = upload.json()
|
|
328
320
|
LOGGER.debug(upload.text)
|
|
329
321
|
|
|
@@ -331,20 +323,16 @@ class Transfer():
|
|
|
331
323
|
updata = upload.json()
|
|
332
324
|
self.dvStudy = updata
|
|
333
325
|
if updata.get('status') != 'OK':
|
|
326
|
+
msg = ('Status return is not OK.'
|
|
327
|
+
f'{upload.status_code}: '
|
|
328
|
+
f'{upload.reason}. '
|
|
329
|
+
f'{upload.request.url} '
|
|
330
|
+
f'{upload.text}')
|
|
334
331
|
try:
|
|
335
|
-
raise exceptions.DataverseUploadError(
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
f'{upload.request.url} '
|
|
339
|
-
f'{upload.text}'))
|
|
340
|
-
except exceptions.DataverseUploadError as e:
|
|
341
|
-
LOGGER.exception(e)
|
|
332
|
+
raise exceptions.DataverseUploadError(msg)
|
|
333
|
+
except exceptions.DataverseUploadError as err:
|
|
334
|
+
LOGGER.exception(err)
|
|
342
335
|
LOGGER.exception(traceback.format_exc())
|
|
343
|
-
raise exceptions.DataverseUploadError(('Status return is not OK.'
|
|
344
|
-
f'{upload.status_code}: '
|
|
345
|
-
f'{upload.reason}. '
|
|
346
|
-
f'{upload.request.url} '
|
|
347
|
-
f'{upload.text}'))
|
|
348
336
|
upload.raise_for_status()
|
|
349
337
|
except Exception as e: # Only accessible via non-requests exception
|
|
350
338
|
LOGGER.exception(e)
|
|
@@ -362,15 +350,12 @@ class Transfer():
|
|
|
362
350
|
'''
|
|
363
351
|
Returns the hex digest of a file (formerly just md5sum).
|
|
364
352
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
353
|
+
Parameters
|
|
354
|
+
----------
|
|
368
355
|
infile : str
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
— Digest type
|
|
373
|
-
----------------------------------------
|
|
356
|
+
Complete path to target file.
|
|
357
|
+
dig_type : Union[str, None]
|
|
358
|
+
Digest type
|
|
374
359
|
'''
|
|
375
360
|
#From Ryan Scherle
|
|
376
361
|
#When Dryad calculates a digest, it only uses MD5.
|
|
@@ -401,55 +386,44 @@ class Transfer():
|
|
|
401
386
|
fblock = m.read(blocksize)
|
|
402
387
|
curvalue = HASHTABLE[dig_type](fblock, curvalue)
|
|
403
388
|
return curvalue
|
|
389
|
+
LOGGER.exception('Unable to determine hash type for %s: %s', infile, dig_type)
|
|
404
390
|
raise exceptions.HashError(f'Unable to determine hash type for{infile}: {dig_type}')
|
|
405
391
|
|
|
406
392
|
|
|
407
|
-
def download_file(self, url=None, filename=None,
|
|
408
|
-
size=None, chk=None,
|
|
393
|
+
def download_file(self, url=None, filename=None,
|
|
394
|
+
size=None, chk=None, **kwargs):
|
|
409
395
|
'''
|
|
410
|
-
Downloads a file via requests streaming and saves to
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
----------------------------------------
|
|
414
|
-
Required keyword arguments:
|
|
396
|
+
Downloads a file via requests streaming and saves to the
|
|
397
|
+
the defined temporary file directory.
|
|
398
|
+
Returns checksum on success and an exception on failure.
|
|
415
399
|
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
416
402
|
url : str
|
|
417
|
-
|
|
418
|
-
|
|
403
|
+
URL of download.
|
|
419
404
|
filename : str
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
timeout : int
|
|
423
|
-
— Requests timeout.
|
|
424
|
-
|
|
425
|
-
tmp : str
|
|
426
|
-
— Temporary directory for downloads.
|
|
427
|
-
Defaults to dryad2dataverse.constants.TMP.
|
|
428
|
-
|
|
405
|
+
Output file name.
|
|
429
406
|
size : int
|
|
430
|
-
|
|
431
|
-
Defaults to dryad2dataverse.constants.MAX_UPLOAD.
|
|
432
|
-
|
|
433
|
-
digest_type: str
|
|
434
|
-
— checksum type (ie, md5, sha-256, etc)
|
|
435
|
-
|
|
407
|
+
Reported file size in bytes.
|
|
436
408
|
chk : str
|
|
437
|
-
|
|
438
|
-
|
|
409
|
+
checksum of file (if available and known).
|
|
410
|
+
kwargs : dict
|
|
411
|
+
|
|
412
|
+
Other parameters
|
|
413
|
+
----------------
|
|
414
|
+
digest_type : str
|
|
415
|
+
checksum type (ie, md5, sha-256, etc)
|
|
439
416
|
'''
|
|
417
|
+
#pylint: disable=too-many-branches
|
|
440
418
|
LOGGER.debug('Start download sequence')
|
|
441
|
-
LOGGER.debug('MAX SIZE = %s',
|
|
419
|
+
LOGGER.debug('MAX SIZE = %s', self.kwargs['max_upload'])
|
|
442
420
|
LOGGER.debug('Filename: %s, size=%s', filename, size)
|
|
443
|
-
|
|
444
|
-
tmp = constants.TMP
|
|
445
|
-
if tmp.endswith(os.sep):
|
|
446
|
-
tmp = tmp[:-1]
|
|
447
|
-
|
|
421
|
+
tmp = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
|
|
448
422
|
if size:
|
|
449
|
-
if size >
|
|
423
|
+
if size > self.kwargs['max_upload']:
|
|
450
424
|
#TOO BIG
|
|
451
425
|
LOGGER.warning('%s: File %s exceeds '
|
|
452
|
-
'Dataverse
|
|
426
|
+
'Dataverse maximum upload size. Skipping download.',
|
|
453
427
|
self.doi, filename)
|
|
454
428
|
md5 = 'this_file_is_too_big_to_upload__' #HA HA
|
|
455
429
|
for i in self._files:
|
|
@@ -458,27 +432,28 @@ class Transfer():
|
|
|
458
432
|
LOGGER.debug('Stop download sequence with large file skip')
|
|
459
433
|
return md5
|
|
460
434
|
try:
|
|
461
|
-
down = self.session.get(url,
|
|
435
|
+
down = self.session.get(url, stream=True,
|
|
436
|
+
headers=config.Config.update_headers(**self.kwargs))
|
|
462
437
|
down.raise_for_status()
|
|
463
|
-
with open(
|
|
438
|
+
with open(pathlib.Path(tmp,filename), 'wb') as fi:
|
|
464
439
|
for chunk in down.iter_content(chunk_size=8192):
|
|
465
440
|
fi.write(chunk)
|
|
466
441
|
|
|
467
442
|
#verify size
|
|
468
443
|
#https://stackoverflow.com/questions/2104080/how-can-i-check-file-size-in-python'
|
|
469
444
|
if size:
|
|
470
|
-
checkSize = os.stat(
|
|
445
|
+
checkSize = os.stat(pathlib.Path(tmp,filename)).st_size
|
|
471
446
|
if checkSize != size:
|
|
472
447
|
try:
|
|
473
|
-
raise exceptions.DownloadSizeError('Download size does not
|
|
474
|
-
'reported size')
|
|
448
|
+
raise exceptions.DownloadSizeError('Download size does not '
|
|
449
|
+
'match reported size')
|
|
475
450
|
except exceptions.DownloadSizeError as e:
|
|
476
451
|
LOGGER.exception(e)
|
|
477
452
|
raise
|
|
478
453
|
#now check the md5
|
|
479
454
|
md5 = None
|
|
480
455
|
if chk and kwargs.get('digest_type') in HASHTABLE:
|
|
481
|
-
md5 = Transfer._check_md5(
|
|
456
|
+
md5 = Transfer._check_md5(pathlib.Path(tmp,filename),
|
|
482
457
|
kwargs['digest_type'])
|
|
483
458
|
if md5 != chk:
|
|
484
459
|
try:
|
|
@@ -497,27 +472,27 @@ class Transfer():
|
|
|
497
472
|
requests.exceptions.ConnectionError) as err:
|
|
498
473
|
LOGGER.critical('Unable to download %s', url)
|
|
499
474
|
LOGGER.exception(err)
|
|
500
|
-
raise
|
|
475
|
+
raise
|
|
476
|
+
except Exception as err:
|
|
477
|
+
LOGGER.exception(err)
|
|
478
|
+
raise
|
|
501
479
|
|
|
502
480
|
def download_files(self, files=None):
|
|
503
481
|
'''
|
|
504
482
|
Bulk downloader for files.
|
|
505
483
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
484
|
+
Parameters
|
|
485
|
+
----------
|
|
509
486
|
files : list
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
The md5 sum should be the last member of the tuple.
|
|
515
|
-
|
|
516
|
-
Defaults to self.files.
|
|
487
|
+
Items in list can be tuples or list with a minimum of:
|
|
488
|
+
`(dryaddownloadurl, filenamewithoutpath, [md5sum])`
|
|
489
|
+
The md5 sum should be the last member of the tuple.
|
|
490
|
+
Defaults to self.files.
|
|
517
491
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
492
|
+
Notes
|
|
493
|
+
-----
|
|
494
|
+
Normally used without arguments to download all the associated
|
|
495
|
+
files with a Dryad study.
|
|
521
496
|
'''
|
|
522
497
|
if not files:
|
|
523
498
|
files = self.files
|
|
@@ -534,7 +509,7 @@ class Transfer():
|
|
|
534
509
|
LOGGER.exception('Unable to download file with info %s\n%s', f, e)
|
|
535
510
|
raise
|
|
536
511
|
|
|
537
|
-
def file_lock_check(self, study,
|
|
512
|
+
def file_lock_check(self, study, count=0):
|
|
538
513
|
'''
|
|
539
514
|
Checks for a study lock
|
|
540
515
|
|
|
@@ -543,37 +518,21 @@ class Transfer():
|
|
|
543
518
|
halts file ingest, there should be no locks on a
|
|
544
519
|
Dataverse study before performing a data file upload.
|
|
545
520
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
521
|
+
Parameters
|
|
522
|
+
----------
|
|
549
523
|
study : str
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
dv_url : str
|
|
553
|
-
— URL to base Dataverse installation.
|
|
554
|
-
|
|
555
|
-
apikey : str
|
|
556
|
-
— API key for user.
|
|
557
|
-
If not present authorization defaults to self.auth.
|
|
558
|
-
|
|
524
|
+
Persistent indentifer of study.
|
|
559
525
|
count : int
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
'''
|
|
564
|
-
if dv_url.endswith('/'):
|
|
565
|
-
dv_url = dv_url[:-1]
|
|
566
|
-
if apikey:
|
|
567
|
-
headers = {'X-Dataverse-key': apikey}
|
|
568
|
-
else:
|
|
569
|
-
headers = self.auth
|
|
570
|
-
|
|
571
|
-
headers.update(USER_AGENT)
|
|
526
|
+
Number of times the function has been called. Logs
|
|
527
|
+
lock messages only on 0.
|
|
528
|
+
'''
|
|
529
|
+
headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
|
|
572
530
|
params = {'persistentId': study}
|
|
573
531
|
try:
|
|
574
|
-
lock_status = self.session.get(f'{dv_url}
|
|
532
|
+
lock_status = self.session.get(f'{self.kwargs["dv_url"]}'
|
|
533
|
+
'/api/datasets/:persistentId/locks',
|
|
575
534
|
headers=headers,
|
|
576
|
-
params=params
|
|
535
|
+
params=params)
|
|
577
536
|
lock_status.raise_for_status()
|
|
578
537
|
if lock_status.json().get('data'):
|
|
579
538
|
if count == 0:
|
|
@@ -589,7 +548,7 @@ class Transfer():
|
|
|
589
548
|
#return True #Should I raise here?
|
|
590
549
|
raise
|
|
591
550
|
|
|
592
|
-
def force_notab_unlock(self, study
|
|
551
|
+
def force_notab_unlock(self, study):
|
|
593
552
|
'''
|
|
594
553
|
Checks for a study lock and forcibly unlocks and uningests
|
|
595
554
|
to prevent tabular file processing. Required if mime and filename
|
|
@@ -597,39 +556,31 @@ class Transfer():
|
|
|
597
556
|
|
|
598
557
|
**Forcible unlocks require a superuser API key.**
|
|
599
558
|
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
559
|
+
Parameters
|
|
560
|
+
----------
|
|
603
561
|
study : str
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
— URL to base Dataverse installation.
|
|
608
|
-
|
|
609
|
-
apikey : str
|
|
610
|
-
— API key for user.
|
|
611
|
-
If not present authorization defaults to self.auth.
|
|
612
|
-
----------------------------------------
|
|
613
|
-
'''
|
|
614
|
-
if dv_url.endswith('/'):
|
|
615
|
-
dv_url = dv_url[:-1]
|
|
616
|
-
if apikey:
|
|
617
|
-
headers = {'X-Dataverse-key': apikey}
|
|
618
|
-
else:
|
|
619
|
-
headers = self.auth
|
|
620
|
-
|
|
621
|
-
headers.update(USER_AGENT)
|
|
562
|
+
Persistent indentifer of study.
|
|
563
|
+
'''
|
|
564
|
+
headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
|
|
622
565
|
params = {'persistentId': study}
|
|
623
|
-
lock_status = self.session.get(f'{dv_url}/api/datasets/:persistentId/locks',
|
|
566
|
+
lock_status = self.session.get(f'{self.kwargs["dv_url"]}/api/datasets/:persistentId/locks',
|
|
624
567
|
headers=headers,
|
|
625
|
-
params=params
|
|
626
|
-
|
|
568
|
+
params=params)
|
|
569
|
+
try:
|
|
570
|
+
lock_status.raise_for_status()
|
|
571
|
+
except (requests.exceptions.HTTPError,
|
|
572
|
+
requests.exceptions.ConnectionError) as err:
|
|
573
|
+
LOGGER.exception(err)
|
|
574
|
+
raise
|
|
575
|
+
except Exception as err:
|
|
576
|
+
LOGGER.exception(err)
|
|
577
|
+
raise
|
|
627
578
|
if lock_status.json()['data']:
|
|
628
579
|
LOGGER.warning('Study %s has been locked', study)
|
|
629
580
|
LOGGER.warning('Lock info:\n%s', lock_status.json())
|
|
630
|
-
force_unlock = self.session.delete(f'{dv_url}/api/
|
|
631
|
-
|
|
632
|
-
|
|
581
|
+
force_unlock = self.session.delete(f'{self.kwargs["dv_url"]}/api/'
|
|
582
|
+
'datasets/:persistentId/locks',
|
|
583
|
+
params=params, headers=headers)
|
|
633
584
|
force_unlock.raise_for_status()
|
|
634
585
|
LOGGER.warning('Lock removed for %s', study)
|
|
635
586
|
LOGGER.warning('Lock status:\n %s', force_unlock.json())
|
|
@@ -655,86 +606,68 @@ class Transfer():
|
|
|
655
606
|
hashtype=None,
|
|
656
607
|
#md5=None, studyId=None, dest=None,
|
|
657
608
|
digest=None, studyId=None, dest=None,
|
|
658
|
-
fprefix=None, force_unlock=False
|
|
609
|
+
fprefix=None, force_unlock=False):
|
|
659
610
|
'''
|
|
660
611
|
Uploads file to Dataverse study. Returns a tuple of the
|
|
661
612
|
dryadFid (or None) and Dataverse JSON from the POST request.
|
|
662
613
|
Failures produce JSON with different status messages
|
|
663
|
-
rather than raising an exception
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
614
|
+
rather than raising an exception, unless it's some
|
|
615
|
+
horrendous failure whereupon you will get an actual
|
|
616
|
+
exception.
|
|
617
|
+
|
|
618
|
+
Parameters
|
|
619
|
+
----------
|
|
620
|
+
dryadURL : str
|
|
621
|
+
Dryad download URL
|
|
668
622
|
filename : str
|
|
669
|
-
|
|
670
|
-
|
|
623
|
+
Filename (not including path).
|
|
671
624
|
mimetype : str
|
|
672
|
-
|
|
673
|
-
|
|
625
|
+
Mimetype of file.
|
|
674
626
|
size : int
|
|
675
|
-
|
|
676
|
-
|
|
627
|
+
Size in bytes.
|
|
677
628
|
studyId : str
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
dest : str
|
|
682
|
-
— Destination dataverse installation url.
|
|
683
|
-
Defaults to constants.DVURL.
|
|
629
|
+
Persistent Dataverse study identifier.
|
|
630
|
+
Defaults to Transfer.dvpid.
|
|
684
631
|
hashtype: str
|
|
685
632
|
original Dryad hash type
|
|
686
|
-
|
|
687
|
-
#md5 : str
|
|
688
|
-
digest
|
|
689
|
-
— md5 checksum for file.
|
|
690
|
-
|
|
691
633
|
fprefix : str
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
timeout : int
|
|
695
|
-
- Timeout in seconds for POST request. Default 300.
|
|
696
|
-
|
|
634
|
+
Path to file, not including a trailing slash.
|
|
697
635
|
dryadUrl : str
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
636
|
+
Dryad download URL if you want to include a Dryad file id.
|
|
701
637
|
force_unlock : bool
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
#
|
|
712
|
-
#TODONE remove above
|
|
638
|
+
Attempt forcible unlock instead of waiting for tabular
|
|
639
|
+
file processing.
|
|
640
|
+
Defaults to False.
|
|
641
|
+
The Dataverse `/locks` endpoint blocks POST and DELETE requests
|
|
642
|
+
from non-superusers (undocumented as of 31 March 2021).
|
|
643
|
+
**Forcible unlock requires a superuser API key.**
|
|
644
|
+
'''
|
|
645
|
+
#pylint: disable = consider-using-with, too-many-arguments, too-many-positional-arguments
|
|
646
|
+
#pylint:disable=too-many-locals, too-many-branches, too-many-statements
|
|
647
|
+
#Fix the arguments one day
|
|
713
648
|
if not studyId:
|
|
714
649
|
studyId = self.dvpid
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
if not fprefix:
|
|
718
|
-
fprefix = constants.TMP
|
|
650
|
+
dest = self.kwargs['dv_url']
|
|
651
|
+
fprefix = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
|
|
719
652
|
if dryadUrl:
|
|
720
653
|
fid = dryadUrl.strip('/download')
|
|
721
654
|
fid = int(fid[fid.rfind('/')+1:])
|
|
722
655
|
else:
|
|
723
656
|
fid = 0 #dummy fid for non-Dryad use
|
|
724
657
|
params = {'persistentId' : studyId}
|
|
725
|
-
upfile = fprefix
|
|
658
|
+
upfile = pathlib.Path(fprefix, filename[:])
|
|
726
659
|
badExt = filename[filename.rfind('.'):].lower()
|
|
727
660
|
#Descriptions are technically possible, although how to add
|
|
728
661
|
#them is buried in Dryad's API documentation
|
|
729
662
|
dv4meta = {'label' : filename[:], 'description' : descr}
|
|
730
663
|
#if mimetype == 'application/zip' or filename.lower().endswith('.zip'):
|
|
731
|
-
if mimetype == 'application/zip' or badExt in
|
|
664
|
+
if mimetype == 'application/zip' or badExt in self.kwargs.get('notab',[]):
|
|
732
665
|
mimetype = 'application/octet-stream' # stop unzipping automatically
|
|
733
666
|
filename += '.NOPROCESS' # Also screw with their naming convention
|
|
734
667
|
#debug log about file names to see what is up with XSLX
|
|
735
668
|
#see doi:10.5061/dryad.z8w9ghxb6
|
|
736
669
|
LOGGER.debug('File renamed to %s for upload', filename)
|
|
737
|
-
if size >=
|
|
670
|
+
if size >= self.kwargs['max_upload']:
|
|
738
671
|
fail = (fid, {'status' : 'Failure: MAX_UPLOAD size exceeded'})
|
|
739
672
|
self.fileUpRecord.append(fail)
|
|
740
673
|
LOGGER.warning('%s: File %s of '
|
|
@@ -748,14 +681,21 @@ class Transfer():
|
|
|
748
681
|
ctype = {'Content-type' : multi.content_type}
|
|
749
682
|
tmphead = self.auth.copy()
|
|
750
683
|
tmphead.update(ctype)
|
|
751
|
-
tmphead.update(
|
|
684
|
+
tmphead.update({'User-agent':USERAGENT})
|
|
752
685
|
url = dest + '/api/datasets/:persistentId/add'
|
|
753
|
-
|
|
754
|
-
upload = self.session.post(url, params=params,
|
|
686
|
+
upload = self.session.post(url, params=params,
|
|
755
687
|
headers=tmphead,
|
|
756
|
-
data=multi
|
|
757
|
-
|
|
688
|
+
data=multi)
|
|
689
|
+
try:
|
|
758
690
|
upload.raise_for_status()
|
|
691
|
+
|
|
692
|
+
except (requests.exceptions.HTTPError,
|
|
693
|
+
requests.exceptions.ConnectionError):
|
|
694
|
+
LOGGER.critical('Error %s: %s, upload.status_code, upload.reason')
|
|
695
|
+
return (fid, {'status' : f'Failure: Reason - {upload.status_code}: {upload.reason}'})
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
try:
|
|
759
699
|
self.fileUpRecord.append((fid, upload.json()))
|
|
760
700
|
upmd5 = upload.json()['data']['files'][0]['dataFile']['checksum']['value']
|
|
761
701
|
#Dataverse hash type
|
|
@@ -773,11 +713,11 @@ class Transfer():
|
|
|
773
713
|
#if md5 and (upmd5 != md5):
|
|
774
714
|
if upmd5 != comparator:
|
|
775
715
|
try:
|
|
776
|
-
raise exceptions.HashError(f'{_type} mismatch:\nlocal:
|
|
716
|
+
raise exceptions.HashError(f'{_type} mismatch:\nlocal: '
|
|
717
|
+
f'{comparator}\nuploaded: {upmd5}')
|
|
777
718
|
except exceptions.HashError as e:
|
|
778
719
|
LOGGER.exception(e)
|
|
779
|
-
|
|
780
|
-
|
|
720
|
+
return (fid, {'status': e})
|
|
781
721
|
#Make damn sure that the study isn't locked because of
|
|
782
722
|
#tab file processing
|
|
783
723
|
##SPSS files still process despite spoofing MIME and extension
|
|
@@ -787,12 +727,12 @@ class Transfer():
|
|
|
787
727
|
#fid not required for unlock
|
|
788
728
|
#self.force_notab_unlock(studyId, dest, fid)
|
|
789
729
|
if force_unlock:
|
|
790
|
-
self.force_notab_unlock(studyId
|
|
730
|
+
self.force_notab_unlock(studyId)
|
|
791
731
|
else:
|
|
792
732
|
count = 0
|
|
793
733
|
wait = True
|
|
794
734
|
while wait:
|
|
795
|
-
wait = self.file_lock_check(studyId,
|
|
735
|
+
wait = self.file_lock_check(studyId, count)
|
|
796
736
|
if wait:
|
|
797
737
|
time.sleep(15) # Don't hit it too often
|
|
798
738
|
count += 1
|
|
@@ -800,51 +740,40 @@ class Transfer():
|
|
|
800
740
|
|
|
801
741
|
return (fid, upload.json())
|
|
802
742
|
|
|
803
|
-
except
|
|
743
|
+
except requests.exceptions.JSONDecodeError as e:
|
|
744
|
+
LOGGER.warning('JSON error with upload')
|
|
804
745
|
LOGGER.exception(e)
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
LOGGER.exception(e)
|
|
812
|
-
LOGGER.warning(upload.text)
|
|
813
|
-
return (fid, {'status' : f'Failure: Reason {upload.reason}'})
|
|
746
|
+
return (fid, {'status' : f'Failure: Reason {upload.reason}'})
|
|
747
|
+
|
|
748
|
+
#It can crash later
|
|
749
|
+
except Exception as f_plus: #pylint: disable=broad-except
|
|
750
|
+
LOGGER.exception(f_plus)
|
|
751
|
+
return (fid, {'status' : f'Failure: Reason: {f_plus}'})
|
|
814
752
|
|
|
815
753
|
def upload_files(self, files=None, pid=None, fprefix=None, force_unlock=False):
|
|
816
754
|
'''
|
|
817
755
|
Uploads multiple files to study with persistentId pid.
|
|
818
756
|
Returns a list of the original tuples plus JSON responses.
|
|
819
757
|
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
758
|
+
Parameters
|
|
759
|
+
----------
|
|
823
760
|
files : list
|
|
824
|
-
|
|
761
|
+
List contains tuples with
|
|
825
762
|
(dryadDownloadURL, filename, mimetype, size).
|
|
826
|
-
|
|
827
763
|
pid : str
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
fprefix : str
|
|
832
|
-
— File location prefix.
|
|
833
|
-
Defaults to dryad2dataverse.constants.TMP
|
|
834
|
-
|
|
764
|
+
Defaults to self.dvpid, which is generated by calling
|
|
765
|
+
dryad2dataverse.transfer.Transfer.upload_study().
|
|
835
766
|
force_unlock : bool
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
----------------------------------------
|
|
767
|
+
Attempt forcible unlock instead of waiting for tabular
|
|
768
|
+
file processing.
|
|
769
|
+
Defaults to False.
|
|
770
|
+
The Dataverse `/locks` endpoint blocks POST and DELETE requests
|
|
771
|
+
from non-superusers (undocumented as of 31 March 2021).
|
|
772
|
+
**Forcible unlock requires a superuser API key.**
|
|
843
773
|
'''
|
|
844
774
|
if not files:
|
|
845
775
|
files = self.files
|
|
846
|
-
|
|
847
|
-
fprefix = constants.TMP
|
|
776
|
+
fprefix = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
|
|
848
777
|
out = []
|
|
849
778
|
for f in files:
|
|
850
779
|
#out.append(self.upload_file(f[0], f[1], f[2], f[3],
|
|
@@ -860,24 +789,17 @@ class Transfer():
|
|
|
860
789
|
'''
|
|
861
790
|
Uploads Dryad json as a separate file for archival purposes.
|
|
862
791
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
792
|
+
Parameters
|
|
793
|
+
----------
|
|
866
794
|
studyId : str
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
dest : str
|
|
873
|
-
— Base URL for transfer.
|
|
874
|
-
Default dryad2datavese.constants.DVURL
|
|
875
|
-
----------------------------------------
|
|
795
|
+
Dataverse persistent identifier.
|
|
796
|
+
Default dryad2dataverse.transfer.Transfer.dvpid,
|
|
797
|
+
which is only generated on
|
|
798
|
+
dryad2dataverse.transfer.Transfer.upload_study()
|
|
876
799
|
'''
|
|
877
800
|
if not studyId:
|
|
878
801
|
studyId = self.dvpid
|
|
879
|
-
|
|
880
|
-
dest = constants.DVURL
|
|
802
|
+
dest = self.kwargs['dv_url']
|
|
881
803
|
if not self.jsonFlag:
|
|
882
804
|
url = dest + '/api/datasets/:persistentId/add'
|
|
883
805
|
pack = io.StringIO(json.dumps(self.dryad.dryadJson))
|
|
@@ -905,7 +827,6 @@ class Transfer():
|
|
|
905
827
|
except (requests.exceptions.HTTPError,
|
|
906
828
|
requests.exceptions.ConnectionError) as err:
|
|
907
829
|
LOGGER.error('Unable to upload Dryad JSON to %s', studyId)
|
|
908
|
-
LOGGER.error('ERROR message: %s', meta.text)
|
|
909
830
|
LOGGER.exception(err)
|
|
910
831
|
#And further checking as to what is happening
|
|
911
832
|
self.fileUpRecord.append((0, {'status':'Failure: Unable to upload Dryad JSON'}))
|
|
@@ -914,8 +835,9 @@ class Transfer():
|
|
|
914
835
|
except Exception as err:
|
|
915
836
|
LOGGER.error('Unable to upload Dryad JSON')
|
|
916
837
|
LOGGER.exception(err)
|
|
838
|
+
raise
|
|
917
839
|
|
|
918
|
-
def delete_dv_file(self, dvfid
|
|
840
|
+
def delete_dv_file(self, dvfid)->bool:
|
|
919
841
|
#WTAF curl -u $API_TOKEN: -X DELETE
|
|
920
842
|
#https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/123
|
|
921
843
|
|
|
@@ -926,54 +848,32 @@ class Transfer():
|
|
|
926
848
|
|
|
927
849
|
Returns 1 on success (204 response), or 0 on other response.
|
|
928
850
|
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
dvurl : str
|
|
933
|
-
— Base URL of dataverse instance.
|
|
934
|
-
Defaults to dryad2dataverse.constants.DVURL.
|
|
935
|
-
|
|
851
|
+
Parameters
|
|
852
|
+
----------
|
|
936
853
|
dvfid : str
|
|
937
|
-
|
|
938
|
-
----------------------------------------
|
|
854
|
+
Dataverse file ID number.
|
|
939
855
|
'''
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
if not key:
|
|
943
|
-
key = constants.APIKEY
|
|
944
|
-
|
|
945
|
-
delme = self.session.delete(f'{dvurl}/dvn/api/data-deposit/v1.1/swordv2/edit-media'
|
|
856
|
+
delme = self.session.delete(f'{self.kwargs["dv_url"]}/'
|
|
857
|
+
'dvn/api/data-deposit/v1.1/swordv2/edit-media'
|
|
946
858
|
f'/file/{dvfid}',
|
|
947
|
-
auth=(
|
|
859
|
+
auth=(self.kwargs['api_key'], ''))
|
|
948
860
|
if delme.status_code == 204:
|
|
949
861
|
self.fileDelRecord.append(dvfid)
|
|
950
862
|
return 1
|
|
951
863
|
return 0
|
|
952
864
|
|
|
953
|
-
def delete_dv_files(self, dvfids=None
|
|
865
|
+
def delete_dv_files(self, dvfids=None):
|
|
954
866
|
'''
|
|
955
867
|
Deletes all files in list of Dataverse file ids from
|
|
956
868
|
a Dataverse installation.
|
|
957
869
|
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
870
|
+
Parameters
|
|
871
|
+
----------
|
|
961
872
|
dvfids : list
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
dvurl : str
|
|
966
|
-
— Base URL of Dataverse. Defaults to dryad2dataverse.constants.DVURL.
|
|
967
|
-
|
|
968
|
-
key : str
|
|
969
|
-
— API key for Dataverse. Defaults to dryad2dataverse.constants.APIKEY.
|
|
970
|
-
----------------------------------------
|
|
873
|
+
List of Dataverse file ids.
|
|
874
|
+
Defaults to dryad2dataverse.transfer.Transfer.fileDelRecord.
|
|
971
875
|
'''
|
|
972
876
|
#if not dvfids:
|
|
973
877
|
# dvfids = self.fileDelRecord
|
|
974
|
-
if not dvurl:
|
|
975
|
-
dvurl = constants.DVURL
|
|
976
|
-
if not key:
|
|
977
|
-
key = constants.APIKEY
|
|
978
878
|
for fid in dvfids:
|
|
979
|
-
self.delete_dv_file(fid
|
|
879
|
+
self.delete_dv_file(fid)
|