dryad2dataverse 0.7.11a0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,12 @@
2
2
  This module handles data downloads and uploads from a Dryad instance to a Dataverse instance
3
3
  '''
4
4
 
5
- #TODO harmonize headers instead of hideous copypasta
5
+ #pylint: disable=invalid-name #Maybe one day
6
6
  import hashlib
7
7
  import io
8
8
  import json
9
9
  import logging
10
+ import pathlib
10
11
  import os
11
12
  import time
12
13
  import traceback
@@ -17,11 +18,10 @@ import requests
17
18
  from requests.adapters import HTTPAdapter
18
19
  from requests_toolbelt.multipart.encoder import MultipartEncoder
19
20
 
20
- from dryad2dataverse import constants
21
+ from dryad2dataverse import config
21
22
  from dryad2dataverse import exceptions
22
23
  from dryad2dataverse import USERAGENT
23
24
 
24
- USER_AGENT = {'User-agent': USERAGENT}
25
25
  LOGGER = logging.getLogger(__name__)
26
26
  URL_LOGGER = logging.getLogger('urllib3')
27
27
 
@@ -39,15 +39,37 @@ class Transfer():
39
39
  Transfers metadata and data files from a
40
40
  Dryad installation to Dataverse installation.
41
41
  '''
42
- def __init__(self, dryad):
42
+ #pylint: disable=too-many-instance-attributes
43
+ def __init__(self, dryad, **kwargs):
43
44
  '''
44
45
  Creates a dryad2dataverse.transfer.Transfer instance.
45
46
 
46
- ----------------------------------------
47
- Parameters:
48
- dryad : dryad2dataverse.serializer.Serializer instance
49
- ----------------------------------------
50
- '''
47
+ Parameters
48
+ ----------
49
+ dryad : dryad2dataverse.serializer.Serializer
50
+
51
+ **kwargs
52
+ Normally this would be a dryad2dataverse.constants.Config instance
53
+
54
+ Notes
55
+ -----
56
+ Minimum kwargs for function:
57
+ max_upload : int
58
+ Maximum size in bytes
59
+ tempfile_location : str
60
+ Path to temporary directory
61
+ dv_url : str
62
+ Base URL of dataverse instance
63
+ api_key : str
64
+ API key for Dataverse user
65
+ dv_contact_email : str
66
+ Contact email address for Dataverse record
67
+ dv_contact_name : str
68
+ Contact name
69
+ target : str
70
+ Target collection short name
71
+ '''
72
+ self.kwargs = kwargs
51
73
  self.dryad = dryad
52
74
  self._fileJson = None
53
75
  self._files = [list(f) for f in self.dryad.files]
@@ -57,39 +79,46 @@ class Transfer():
57
79
  self.dvStudy = None
58
80
  self.jsonFlag = None #Whether or not new json uploaded
59
81
  self.session = requests.Session()
60
- self.session.mount('https://', HTTPAdapter(max_retries=constants.RETRY_STRATEGY))
82
+ self.session.mount('https://', HTTPAdapter(max_retries=config.RETRY_STRATEGY))
83
+ self.check_kwargs()
84
+
85
+ def check_kwargs(self):
86
+ '''
87
+ Verify sufficient information
88
+ '''
89
+ required = ['max_upload',
90
+ 'tempfile_location',
91
+ 'dv_url',
92
+ 'api_key',
93
+ 'dv_contact_email',
94
+ 'dv_contact_name',
95
+ 'target']
96
+ keys = self.kwargs.keys()
97
+ for val in required:
98
+ if val not in keys:
99
+ try:
100
+ raise exceptions.Dryad2DataverseError(f'Required parameter missing: {val}')
101
+ except exceptions.Dryad2DataverseError as err:
102
+ LOGGER.exception(err)
103
+ raise
61
104
 
62
- def _del__(self): #TODONE: Change name to __del__ to make a destructor
63
- '''Expunges files from constants.TMP on deletion'''
105
+ def _del__(self):
106
+ '''Expunges files from temporary file on deletion'''
107
+ tmp = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
64
108
  for f in self.files:
65
- if os.path.exists(f'{constants.TMP}{os.sep}{f[1]}'):
66
- os.remove(f'{constants.TMP}{os.sep}{f[1]}')
109
+ if pathlib.Path(tmp, f[1]).exists():
110
+ os.remove(pathlib.Path(tmp, f[1]))
67
111
 
68
- def test_api_key(self, url=None, apikey=None):
112
+ def test_api_key(self):
69
113
  '''
70
114
  Tests for an expired API key and raises
71
115
  dryad2dataverse.exceptions.Dryad2dataverseBadApiKeyError
72
116
  the API key is bad. Ignores other HTTP errors.
73
-
74
- ----------------------------------------
75
- Parameters:
76
-
77
- url : str
78
- — Base URL to Dataverse installation.
79
- Defaults to dryad2dataverse.constants.DVURL
80
-
81
- apikey : str
82
- — Default dryad2dataverse.constants.APIKEY.
83
-
84
- ----------------------------------------
85
117
  '''
86
118
  #API validity check appears to come before a PID validity check
87
119
  params = {'persistentId': 'doi:000/000/000'} # PID is irrelevant
88
- if not url:
89
- url = constants.DVURL
90
- headers = {'X-Dataverse-key': apikey if apikey else constants.APIKEY}
91
- headers.update(USER_AGENT)
92
- bad_test = self.session.get(f'{url}/api/datasets/:persistentId',
120
+ headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
121
+ bad_test = self.session.get(f'{self.kwargs["dv_url"]}/api/datasets/:persistentId',
93
122
  headers=headers,
94
123
  params=params)
95
124
  #There's an extra space in the message which Harvard
@@ -97,10 +126,8 @@ class Transfer():
97
126
  if bad_test.json().get('message').startswith('Bad api key'):
98
127
  try:
99
128
  raise exceptions.DataverseBadApiKeyError('Bad API key')
100
- except exceptions.DataverseBadApiKeyError as e:
101
- LOGGER.critical('API key has expired or is otherwise invalid')
102
- LOGGER.exception(e)
103
- #LOGGER.exception(traceback.format_exc()) #not really necessary
129
+ except exceptions.DataverseBadApiKeyError as err:
130
+ LOGGER.exception(err)
104
131
  raise
105
132
  try: #other errors
106
133
  bad_test.raise_for_status()
@@ -124,7 +151,7 @@ class Transfer():
124
151
  Returns datavese authentication header dict.
125
152
  ie: `{X-Dataverse-key' : 'APIKEYSTRING'}`
126
153
  '''
127
- return {'X-Dataverse-key' : constants.APIKEY}
154
+ return {'X-Dataverse-key' : self.kwargs['api_key']}
128
155
 
129
156
  @property
130
157
  def fileJson(self):
@@ -162,16 +189,15 @@ class Transfer():
162
189
  return self.dryad.doi
163
190
 
164
191
  @staticmethod
165
- def _dryad_file_id(url):
192
+ def _dryad_file_id(url:str):
166
193
  '''
167
194
  Returns Dryad fileID from dryad file download URL as integer.
168
- ----------------------------------------
169
- Parameters:
170
195
 
196
+ Parameters
197
+ ----------
171
198
  url : str
172
- Dryad file URL in format
199
+ Dryad file URL in format
173
200
  'https://datadryad.org/api/v2/files/385820/download'.
174
- ----------------------------------------
175
201
  '''
176
202
  fid = url.strip('/download')
177
203
  fid = int(fid[fid.rfind('/')+1:])
@@ -182,67 +208,52 @@ class Transfer():
182
208
  '''
183
209
  Returns Dataverse authentication header as dict.
184
210
 
185
- ----------------------------------------
186
- Parameters:
187
-
211
+ Parameters
212
+ ----------
188
213
  apikey : str
189
- Dataverse API key.
190
- ----------------------------------------
214
+ Dataverse API key.
191
215
  '''
192
216
  return {'X-Dataverse-key' : apikey}
193
217
 
194
- #@staticmethod
195
- def set_correct_date(self, url=None, hdl=None,
196
- d_type='distributionDate',
197
- apikey=None):
218
+ def set_correct_date(self, hdl=None,
219
+ d_type='distributionDate'):
198
220
  '''
199
221
  Sets "correct" publication date for Dataverse.
200
222
 
201
- Note: dryad2dataverse.serializer maps Dryad 'publicationDate'
223
+ Parameters
224
+ ----------
225
+ hdl : str
226
+ Persistent indentifier for Dataverse study.
227
+ Defaults to Transfer.dvpid (which can be None if the
228
+ study has not yet been uploaded).
229
+ d_type : str
230
+ Date type. One of 'distributionDate', 'productionDate',
231
+ `dateOfDeposit'. Default 'distributionDate'.
232
+
233
+ Notes
234
+ -----
235
+ self.kwargs are normally read from dryad2dataverse.config.Config
236
+ instances.
237
+
238
+ dryad2dataverse.serializer maps Dryad 'publicationDate'
202
239
  to Dataverse 'distributionDate' (see serializer.py ~line 675).
203
240
 
204
241
  Dataverse citation date default is ":publicationDate". See
205
242
  Dataverse API reference:
206
- https://guides.dataverse.org/en/4.20/api/native-api.html#id54.
207
-
208
- ----------------------------------------
209
- Parameters:
210
-
211
- url : str
212
- — Base URL to Dataverse installation.
213
- Defaults to dryad2dataverse.constants.DVURL
214
-
215
- hdl : str
216
- — Persistent indentifier for Dataverse study.
217
- Defaults to Transfer.dvpid (which can be None if the
218
- study has not yet been uploaded).
243
+ <https://guides.dataverse.org/en/4.20/api/native-api.html#id54>.
219
244
 
220
- d_type : str
221
- — Date type. One of 'distributionDate', 'productionDate',
222
- 'dateOfDeposit'. Default 'distributionDate'.
223
-
224
- apikey : str
225
- — Default dryad2dataverse.constants.APIKEY.
226
- ----------------------------------------
227
245
  '''
228
246
  try:
229
- if not url:
230
- url = constants.DVURL
231
247
  if not hdl:
232
248
  hdl = self.dvpid
233
- headers = {'X-Dataverse-key' : apikey}
234
- if apikey:
235
- headers = {'X-Dataverse-key' : apikey}
236
- else:
237
- headers = {'X-Dataverse-key' : constants.APIKEY}
238
-
239
- headers.update(USER_AGENT)
249
+ headers ={'X-Dataverse-key': self.kwargs['api_key'],
250
+ 'User-agent': USERAGENT}
240
251
  params = {'persistentId': hdl}
241
- set_date = self.session.put(f'{url}/api/datasets/:persistentId/citationdate',
252
+ set_date = self.session.put(f'{self.kwargs["dv_url"]}/api/'
253
+ 'datasets/:persistentId/citationdate',
242
254
  headers=headers,
243
255
  data=d_type,
244
- params=params,
245
- timeout=45)
256
+ params=params)
246
257
  set_date.raise_for_status()
247
258
 
248
259
  except (requests.exceptions.HTTPError,
@@ -252,78 +263,59 @@ class Transfer():
252
263
  LOGGER.warning(err)
253
264
  LOGGER.warning(set_date.text)
254
265
 
255
- def upload_study(self, url=None, apikey=None, timeout=45, **kwargs):
266
+ def upload_study(self, **kwargs):
256
267
  '''
257
268
  Uploads Dryad study metadata to target Dataverse or updates existing.
258
269
  Supplying a `targetDv` kwarg creates a new study and supplying a
259
270
  `dvpid` kwarg updates a currently existing Dataverse study.
260
271
 
261
- ----------------------------------------
262
- Parameters:
263
-
264
- url : str
265
- — URL of Dataverse instance. Defaults to constants.DVURL.
266
-
267
- apikey : str
268
- — API key of user. Defaults to contants.APIKEY.
269
-
270
- timeout : int
271
- — timeout on POST request.
272
-
273
- **KEYWORD ARGUMENTS**
274
-
275
- One of these is required. Supplying both or neither raises a NoTargetError
272
+ **kwargs : dict
273
+ Normally this is one of the two parameters below
276
274
 
275
+ Other parameters
276
+ ----------------
277
277
  targetDv : str
278
- Short name of target dataverse. Required if new dataset.
278
+ Short name of target dataverse. Required if new dataset.
279
279
  Specify as targetDV=value.
280
-
281
- dvpid = str
282
- — Dataverse persistent ID (for updating metadata).
280
+ dvpid : str
281
+ Dataverse persistent ID (for updating metadata).
283
282
  This is not required for new uploads, specify as dvpid=value
284
283
 
285
- ----------------------------------------
284
+ Notes
285
+ -----
286
+ One of targetDv or dvpid is required.
286
287
  '''
287
- if not url:
288
- url = constants.DVURL
289
- if not apikey:
290
- apikey = constants.APIKEY
291
- headers = {'X-Dataverse-key' : apikey}
292
- headers.update(USER_AGENT)
288
+ headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
293
289
  targetDv = kwargs.get('targetDv')
294
290
  dvpid = kwargs.get('dvpid')
295
291
  #dryFid = kwargs.get('dryFid') #Why did I put this here?
296
292
  if not targetDv and not dvpid:
297
293
  try:
298
294
  raise exceptions.NoTargetError('You must supply one of targetDv \
299
- (target dataverse) \
300
- or dvpid (Dataverse persistent ID)')
301
- except exceptions.NoTargetError as e:
302
- LOGGER.error('No target dataverse or dvpid supplied')
303
- LOGGER.exception(e)
295
+ (target dataverse) \
296
+ or dvpid (Dataverse persistent ID)')
297
+ except exceptions.NoTargetError as err:
298
+ LOGGER.exception(err)
304
299
  raise
305
300
 
306
301
  if targetDv and dvpid:
307
- try:
308
- raise ValueError('Supply only one of targetDv or dvpid')
309
- except ValueError as e:
310
- LOGGER.exception(e)
311
- raise
302
+ msg = 'Supply only one of targetDv or dvpid'
303
+ LOGGER.exception(msg)
304
+ raise exceptions.Dryad2DataverseError(msg)
305
+
312
306
  if not dvpid:
313
- endpoint = f'{url}/api/dataverses/{targetDv}/datasets'
307
+ endpoint = f'{self.kwargs["dv_url"]}/api/dataverses/{targetDv}/datasets'
314
308
  upload = self.session.post(endpoint,
315
309
  headers=headers,
316
- json=self.dryad.dvJson,
317
- timeout=timeout)
310
+ json=self.dryad.dvJson)
318
311
  LOGGER.debug(upload.text)
319
312
  else:
320
- endpoint = f'{url}/api/datasets/:persistentId/versions/:draft'
313
+ endpoint = f'{self.kwargs["dv_url"]}/api/datasets/:persistentId/versions/:draft'
321
314
  params = {'persistentId':dvpid}
322
315
  #Yes, dataverse uses *different* json for edits
323
316
  upload = self.session.put(endpoint, params=params,
324
317
  headers=headers,
325
- json=self.dryad.dvJson['datasetVersion'],
326
- timeout=timeout)
318
+ json=self.dryad.dvJson['datasetVersion'])
327
319
  #self._dvrecord = upload.json()
328
320
  LOGGER.debug(upload.text)
329
321
 
@@ -331,20 +323,16 @@ class Transfer():
331
323
  updata = upload.json()
332
324
  self.dvStudy = updata
333
325
  if updata.get('status') != 'OK':
326
+ msg = ('Status return is not OK.'
327
+ f'{upload.status_code}: '
328
+ f'{upload.reason}. '
329
+ f'{upload.request.url} '
330
+ f'{upload.text}')
334
331
  try:
335
- raise exceptions.DataverseUploadError(('Status return is not OK.'
336
- f'{upload.status_code}: '
337
- f'{upload.reason}. '
338
- f'{upload.request.url} '
339
- f'{upload.text}'))
340
- except exceptions.DataverseUploadError as e:
341
- LOGGER.exception(e)
332
+ raise exceptions.DataverseUploadError(msg)
333
+ except exceptions.DataverseUploadError as err:
334
+ LOGGER.exception(err)
342
335
  LOGGER.exception(traceback.format_exc())
343
- raise exceptions.DataverseUploadError(('Status return is not OK.'
344
- f'{upload.status_code}: '
345
- f'{upload.reason}. '
346
- f'{upload.request.url} '
347
- f'{upload.text}'))
348
336
  upload.raise_for_status()
349
337
  except Exception as e: # Only accessible via non-requests exception
350
338
  LOGGER.exception(e)
@@ -362,15 +350,12 @@ class Transfer():
362
350
  '''
363
351
  Returns the hex digest of a file (formerly just md5sum).
364
352
 
365
- ----------------------------------------
366
- Parameters:
367
-
353
+ Parameters
354
+ ----------
368
355
  infile : str
369
- Complete path to target file.
370
-
371
- dig_type : str or None
372
- — Digest type
373
- ----------------------------------------
356
+ Complete path to target file.
357
+ dig_type : Union[str, None]
358
+ Digest type
374
359
  '''
375
360
  #From Ryan Scherle
376
361
  #When Dryad calculates a digest, it only uses MD5.
@@ -401,55 +386,44 @@ class Transfer():
401
386
  fblock = m.read(blocksize)
402
387
  curvalue = HASHTABLE[dig_type](fblock, curvalue)
403
388
  return curvalue
389
+ LOGGER.exception('Unable to determine hash type for %s: %s', infile, dig_type)
404
390
  raise exceptions.HashError(f'Unable to determine hash type for{infile}: {dig_type}')
405
391
 
406
392
 
407
- def download_file(self, url=None, filename=None, tmp=None,
408
- size=None, chk=None, timeout=45, **kwargs):
393
+ def download_file(self, url=None, filename=None,
394
+ size=None, chk=None, **kwargs):
409
395
  '''
410
- Downloads a file via requests streaming and saves to constants.TMP.
411
- returns checksum on success and an exception on failure.
412
-
413
- ----------------------------------------
414
- Required keyword arguments:
396
+ Downloads a file via requests streaming and saves to the
397
+ the defined temporary file directory.
398
+ Returns checksum on success and an exception on failure.
415
399
 
400
+ Parameters
401
+ ----------
416
402
  url : str
417
- URL of download.
418
-
403
+ URL of download.
419
404
  filename : str
420
- Output file name.
421
-
422
- timeout : int
423
- — Requests timeout.
424
-
425
- tmp : str
426
- — Temporary directory for downloads.
427
- Defaults to dryad2dataverse.constants.TMP.
428
-
405
+ Output file name.
429
406
  size : int
430
- Reported file size in bytes.
431
- Defaults to dryad2dataverse.constants.MAX_UPLOAD.
432
-
433
- digest_type: str
434
- — checksum type (ie, md5, sha-256, etc)
435
-
407
+ Reported file size in bytes.
436
408
  chk : str
437
- checksum of file (if available and known).
438
- ----------------------------------------
409
+ checksum of file (if available and known).
410
+ kwargs : dict
411
+
412
+ Other parameters
413
+ ----------------
414
+ digest_type : str
415
+ checksum type (ie, md5, sha-256, etc)
439
416
  '''
417
+ #pylint: disable=too-many-branches
440
418
  LOGGER.debug('Start download sequence')
441
- LOGGER.debug('MAX SIZE = %s', constants.MAX_UPLOAD)
419
+ LOGGER.debug('MAX SIZE = %s', self.kwargs['max_upload'])
442
420
  LOGGER.debug('Filename: %s, size=%s', filename, size)
443
- if not tmp:
444
- tmp = constants.TMP
445
- if tmp.endswith(os.sep):
446
- tmp = tmp[:-1]
447
-
421
+ tmp = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
448
422
  if size:
449
- if size > constants.MAX_UPLOAD:
423
+ if size > self.kwargs['max_upload']:
450
424
  #TOO BIG
451
425
  LOGGER.warning('%s: File %s exceeds '
452
- 'Dataverse MAX_UPLOAD size. Skipping download.',
426
+ 'Dataverse maximum upload size. Skipping download.',
453
427
  self.doi, filename)
454
428
  md5 = 'this_file_is_too_big_to_upload__' #HA HA
455
429
  for i in self._files:
@@ -458,27 +432,28 @@ class Transfer():
458
432
  LOGGER.debug('Stop download sequence with large file skip')
459
433
  return md5
460
434
  try:
461
- down = self.session.get(url, timeout=timeout, stream=True)
435
+ down = self.session.get(url, stream=True,
436
+ headers=config.Config.update_headers(**self.kwargs))
462
437
  down.raise_for_status()
463
- with open(f'{tmp}{os.sep}{filename}', 'wb') as fi:
438
+ with open(pathlib.Path(tmp,filename), 'wb') as fi:
464
439
  for chunk in down.iter_content(chunk_size=8192):
465
440
  fi.write(chunk)
466
441
 
467
442
  #verify size
468
443
  #https://stackoverflow.com/questions/2104080/how-can-i-check-file-size-in-python'
469
444
  if size:
470
- checkSize = os.stat(f'{tmp}{os.sep}{filename}').st_size
445
+ checkSize = os.stat(pathlib.Path(tmp,filename)).st_size
471
446
  if checkSize != size:
472
447
  try:
473
- raise exceptions.DownloadSizeError('Download size does not match '
474
- 'reported size')
448
+ raise exceptions.DownloadSizeError('Download size does not '
449
+ 'match reported size')
475
450
  except exceptions.DownloadSizeError as e:
476
451
  LOGGER.exception(e)
477
452
  raise
478
453
  #now check the md5
479
454
  md5 = None
480
455
  if chk and kwargs.get('digest_type') in HASHTABLE:
481
- md5 = Transfer._check_md5(f'{tmp}{os.sep}{filename}',
456
+ md5 = Transfer._check_md5(pathlib.Path(tmp,filename),
482
457
  kwargs['digest_type'])
483
458
  if md5 != chk:
484
459
  try:
@@ -497,27 +472,27 @@ class Transfer():
497
472
  requests.exceptions.ConnectionError) as err:
498
473
  LOGGER.critical('Unable to download %s', url)
499
474
  LOGGER.exception(err)
500
- raise exceptions.DataverseDownloadError
475
+ raise
476
+ except Exception as err:
477
+ LOGGER.exception(err)
478
+ raise
501
479
 
502
480
  def download_files(self, files=None):
503
481
  '''
504
482
  Bulk downloader for files.
505
483
 
506
- ----------------------------------------
507
- Parameters:
508
-
484
+ Parameters
485
+ ----------
509
486
  files : list
510
- Items in list can be tuples or list with a minimum of:
511
-
512
- (dryaddownloadurl, filenamewithoutpath, [md5sum])
513
-
514
- The md5 sum should be the last member of the tuple.
515
-
516
- Defaults to self.files.
487
+ Items in list can be tuples or list with a minimum of:
488
+ `(dryaddownloadurl, filenamewithoutpath, [md5sum])`
489
+ The md5 sum should be the last member of the tuple.
490
+ Defaults to self.files.
517
491
 
518
- Normally used without arguments to download all the associated
519
- files with a Dryad study.
520
- ----------------------------------------
492
+ Notes
493
+ -----
494
+ Normally used without arguments to download all the associated
495
+ files with a Dryad study.
521
496
  '''
522
497
  if not files:
523
498
  files = self.files
@@ -534,7 +509,7 @@ class Transfer():
534
509
  LOGGER.exception('Unable to download file with info %s\n%s', f, e)
535
510
  raise
536
511
 
537
- def file_lock_check(self, study, dv_url, apikey=None, count=0):
512
+ def file_lock_check(self, study, count=0):
538
513
  '''
539
514
  Checks for a study lock
540
515
 
@@ -543,37 +518,21 @@ class Transfer():
543
518
  halts file ingest, there should be no locks on a
544
519
  Dataverse study before performing a data file upload.
545
520
 
546
- ----------------------------------------
547
- Parameters:
548
-
521
+ Parameters
522
+ ----------
549
523
  study : str
550
- Persistent indentifer of study.
551
-
552
- dv_url : str
553
- — URL to base Dataverse installation.
554
-
555
- apikey : str
556
- — API key for user.
557
- If not present authorization defaults to self.auth.
558
-
524
+ Persistent indentifer of study.
559
525
  count : int
560
- Number of times the function has been called. Logs
561
- lock messages only on 0.
562
- ----------------------------------------
563
- '''
564
- if dv_url.endswith('/'):
565
- dv_url = dv_url[:-1]
566
- if apikey:
567
- headers = {'X-Dataverse-key': apikey}
568
- else:
569
- headers = self.auth
570
-
571
- headers.update(USER_AGENT)
526
+ Number of times the function has been called. Logs
527
+ lock messages only on 0.
528
+ '''
529
+ headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
572
530
  params = {'persistentId': study}
573
531
  try:
574
- lock_status = self.session.get(f'{dv_url}/api/datasets/:persistentId/locks',
532
+ lock_status = self.session.get(f'{self.kwargs["dv_url"]}'
533
+ '/api/datasets/:persistentId/locks',
575
534
  headers=headers,
576
- params=params, timeout=300)
535
+ params=params)
577
536
  lock_status.raise_for_status()
578
537
  if lock_status.json().get('data'):
579
538
  if count == 0:
@@ -589,7 +548,7 @@ class Transfer():
589
548
  #return True #Should I raise here?
590
549
  raise
591
550
 
592
- def force_notab_unlock(self, study, dv_url, apikey=None):
551
+ def force_notab_unlock(self, study):
593
552
  '''
594
553
  Checks for a study lock and forcibly unlocks and uningests
595
554
  to prevent tabular file processing. Required if mime and filename
@@ -597,39 +556,31 @@ class Transfer():
597
556
 
598
557
  **Forcible unlocks require a superuser API key.**
599
558
 
600
- ----------------------------------------
601
- Parameters:
602
-
559
+ Parameters
560
+ ----------
603
561
  study : str
604
- Persistent indentifer of study.
605
-
606
- dv_url : str
607
- — URL to base Dataverse installation.
608
-
609
- apikey : str
610
- — API key for user.
611
- If not present authorization defaults to self.auth.
612
- ----------------------------------------
613
- '''
614
- if dv_url.endswith('/'):
615
- dv_url = dv_url[:-1]
616
- if apikey:
617
- headers = {'X-Dataverse-key': apikey}
618
- else:
619
- headers = self.auth
620
-
621
- headers.update(USER_AGENT)
562
+ Persistent indentifer of study.
563
+ '''
564
+ headers = {'X-Dataverse-key': self.kwargs['api_key'], 'User-agent': USERAGENT}
622
565
  params = {'persistentId': study}
623
- lock_status = self.session.get(f'{dv_url}/api/datasets/:persistentId/locks',
566
+ lock_status = self.session.get(f'{self.kwargs["dv_url"]}/api/datasets/:persistentId/locks',
624
567
  headers=headers,
625
- params=params, timeout=300)
626
- lock_status.raise_for_status()
568
+ params=params)
569
+ try:
570
+ lock_status.raise_for_status()
571
+ except (requests.exceptions.HTTPError,
572
+ requests.exceptions.ConnectionError) as err:
573
+ LOGGER.exception(err)
574
+ raise
575
+ except Exception as err:
576
+ LOGGER.exception(err)
577
+ raise
627
578
  if lock_status.json()['data']:
628
579
  LOGGER.warning('Study %s has been locked', study)
629
580
  LOGGER.warning('Lock info:\n%s', lock_status.json())
630
- force_unlock = self.session.delete(f'{dv_url}/api/datasets/:persistentId/locks',
631
- params=params, headers=headers,
632
- timeout=300)
581
+ force_unlock = self.session.delete(f'{self.kwargs["dv_url"]}/api/'
582
+ 'datasets/:persistentId/locks',
583
+ params=params, headers=headers)
633
584
  force_unlock.raise_for_status()
634
585
  LOGGER.warning('Lock removed for %s', study)
635
586
  LOGGER.warning('Lock status:\n %s', force_unlock.json())
@@ -655,86 +606,68 @@ class Transfer():
655
606
  hashtype=None,
656
607
  #md5=None, studyId=None, dest=None,
657
608
  digest=None, studyId=None, dest=None,
658
- fprefix=None, force_unlock=False, timeout=300):
609
+ fprefix=None, force_unlock=False):
659
610
  '''
660
611
  Uploads file to Dataverse study. Returns a tuple of the
661
612
  dryadFid (or None) and Dataverse JSON from the POST request.
662
613
  Failures produce JSON with different status messages
663
- rather than raising an exception.
664
-
665
- ----------------------------------------
666
- Parameters:
667
-
614
+ rather than raising an exception, unless it's some
615
+ horrendous failure whereupon you will get an actual
616
+ exception.
617
+
618
+ Parameters
619
+ ----------
620
+ dryadURL : str
621
+ Dryad download URL
668
622
  filename : str
669
- Filename (not including path).
670
-
623
+ Filename (not including path).
671
624
  mimetype : str
672
- Mimetype of file.
673
-
625
+ Mimetype of file.
674
626
  size : int
675
- Size in bytes.
676
-
627
+ Size in bytes.
677
628
  studyId : str
678
- Persistent Dataverse study identifier.
679
- Defaults to Transfer.dvpid.
680
-
681
- dest : str
682
- — Destination dataverse installation url.
683
- Defaults to constants.DVURL.
629
+ Persistent Dataverse study identifier.
630
+ Defaults to Transfer.dvpid.
684
631
  hashtype: str
685
632
  original Dryad hash type
686
-
687
- #md5 : str
688
- digest
689
- — md5 checksum for file.
690
-
691
633
  fprefix : str
692
- Path to file, not including a trailing slash.
693
-
694
- timeout : int
695
- - Timeout in seconds for POST request. Default 300.
696
-
634
+ Path to file, not including a trailing slash.
697
635
  dryadUrl : str
698
- - Dryad download URL if you want to include a Dryad file id.
699
-
700
-
636
+ Dryad download URL if you want to include a Dryad file id.
701
637
  force_unlock : bool
702
- Attempt forcible unlock instead of waiting for tabular
703
- file processing.
704
- Defaults to False.
705
- The Dataverse `/locks` endpoint blocks POST and DELETE requests
706
- from non-superusers (undocumented as of 31 March 2021).
707
- **Forcible unlock requires a superuser API key.**
708
-
709
- ----------------------------------------
710
- '''
711
- #return locals()
712
- #TODONE remove above
638
+ Attempt forcible unlock instead of waiting for tabular
639
+ file processing.
640
+ Defaults to False.
641
+ The Dataverse `/locks` endpoint blocks POST and DELETE requests
642
+ from non-superusers (undocumented as of 31 March 2021).
643
+ **Forcible unlock requires a superuser API key.**
644
+ '''
645
+ #pylint: disable = consider-using-with, too-many-arguments, too-many-positional-arguments
646
+ #pylint:disable=too-many-locals, too-many-branches, too-many-statements
647
+ #Fix the arguments one day
713
648
  if not studyId:
714
649
  studyId = self.dvpid
715
- if not dest:
716
- dest = constants.DVURL
717
- if not fprefix:
718
- fprefix = constants.TMP
650
+ dest = self.kwargs['dv_url']
651
+ fprefix = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
719
652
  if dryadUrl:
720
653
  fid = dryadUrl.strip('/download')
721
654
  fid = int(fid[fid.rfind('/')+1:])
722
655
  else:
723
656
  fid = 0 #dummy fid for non-Dryad use
724
657
  params = {'persistentId' : studyId}
725
- upfile = fprefix + os.sep + filename[:]
658
+ upfile = pathlib.Path(fprefix, filename[:])
726
659
  badExt = filename[filename.rfind('.'):].lower()
727
660
  #Descriptions are technically possible, although how to add
728
661
  #them is buried in Dryad's API documentation
729
662
  dv4meta = {'label' : filename[:], 'description' : descr}
730
663
  #if mimetype == 'application/zip' or filename.lower().endswith('.zip'):
731
- if mimetype == 'application/zip' or badExt in constants.NOTAB:
664
+ if mimetype == 'application/zip' or badExt in self.kwargs.get('notab',[]):
732
665
  mimetype = 'application/octet-stream' # stop unzipping automatically
733
666
  filename += '.NOPROCESS' # Also screw with their naming convention
734
667
  #debug log about file names to see what is up with XSLX
735
668
  #see doi:10.5061/dryad.z8w9ghxb6
736
669
  LOGGER.debug('File renamed to %s for upload', filename)
737
- if size >= constants.MAX_UPLOAD:
670
+ if size >= self.kwargs['max_upload']:
738
671
  fail = (fid, {'status' : 'Failure: MAX_UPLOAD size exceeded'})
739
672
  self.fileUpRecord.append(fail)
740
673
  LOGGER.warning('%s: File %s of '
@@ -748,14 +681,21 @@ class Transfer():
748
681
  ctype = {'Content-type' : multi.content_type}
749
682
  tmphead = self.auth.copy()
750
683
  tmphead.update(ctype)
751
- tmphead.update(USER_AGENT)
684
+ tmphead.update({'User-agent':USERAGENT})
752
685
  url = dest + '/api/datasets/:persistentId/add'
753
- try:
754
- upload = self.session.post(url, params=params,
686
+ upload = self.session.post(url, params=params,
755
687
  headers=tmphead,
756
- data=multi, timeout=timeout)
757
- #print(upload.text)
688
+ data=multi)
689
+ try:
758
690
  upload.raise_for_status()
691
+
692
+ except (requests.exceptions.HTTPError,
693
+ requests.exceptions.ConnectionError):
694
+ LOGGER.critical('Error %s: %s, upload.status_code, upload.reason')
695
+ return (fid, {'status' : f'Failure: Reason - {upload.status_code}: {upload.reason}'})
696
+
697
+
698
+ try:
759
699
  self.fileUpRecord.append((fid, upload.json()))
760
700
  upmd5 = upload.json()['data']['files'][0]['dataFile']['checksum']['value']
761
701
  #Dataverse hash type
@@ -773,11 +713,11 @@ class Transfer():
773
713
  #if md5 and (upmd5 != md5):
774
714
  if upmd5 != comparator:
775
715
  try:
776
- raise exceptions.HashError(f'{_type} mismatch:\nlocal: {comparator}\nuploaded: {upmd5}')
716
+ raise exceptions.HashError(f'{_type} mismatch:\nlocal: '
717
+ f'{comparator}\nuploaded: {upmd5}')
777
718
  except exceptions.HashError as e:
778
719
  LOGGER.exception(e)
779
- raise
780
-
720
+ return (fid, {'status': e})
781
721
  #Make damn sure that the study isn't locked because of
782
722
  #tab file processing
783
723
  ##SPSS files still process despite spoofing MIME and extension
@@ -787,12 +727,12 @@ class Transfer():
787
727
  #fid not required for unlock
788
728
  #self.force_notab_unlock(studyId, dest, fid)
789
729
  if force_unlock:
790
- self.force_notab_unlock(studyId, dest)
730
+ self.force_notab_unlock(studyId)
791
731
  else:
792
732
  count = 0
793
733
  wait = True
794
734
  while wait:
795
- wait = self.file_lock_check(studyId, dest, count=count)
735
+ wait = self.file_lock_check(studyId, count)
796
736
  if wait:
797
737
  time.sleep(15) # Don't hit it too often
798
738
  count += 1
@@ -800,51 +740,40 @@ class Transfer():
800
740
 
801
741
  return (fid, upload.json())
802
742
 
803
- except Exception as e:
743
+ except requests.exceptions.JSONDecodeError as e:
744
+ LOGGER.warning('JSON error with upload')
804
745
  LOGGER.exception(e)
805
- try:
806
- reason = upload.json()['message']
807
- LOGGER.warning(upload.json())
808
- return (fid, {'status' : f'Failure: {reason}'})
809
- except Exception as e:
810
- LOGGER.warning('Further exceptions!')
811
- LOGGER.exception(e)
812
- LOGGER.warning(upload.text)
813
- return (fid, {'status' : f'Failure: Reason {upload.reason}'})
746
+ return (fid, {'status' : f'Failure: Reason {upload.reason}'})
747
+
748
+ #It can crash later
749
+ except Exception as f_plus: #pylint: disable=broad-except
750
+ LOGGER.exception(f_plus)
751
+ return (fid, {'status' : f'Failure: Reason: {f_plus}'})
814
752
 
815
753
  def upload_files(self, files=None, pid=None, fprefix=None, force_unlock=False):
816
754
  '''
817
755
  Uploads multiple files to study with persistentId pid.
818
756
  Returns a list of the original tuples plus JSON responses.
819
757
 
820
- ----------------------------------------
821
- Parameters:
822
-
758
+ Parameters
759
+ ----------
823
760
  files : list
824
- List contains tuples with
761
+ List contains tuples with
825
762
  (dryadDownloadURL, filename, mimetype, size).
826
-
827
763
  pid : str
828
- Defaults to self.dvpid, which is generated by calling
829
- dryad2dataverse.transfer.Transfer.upload_study().
830
-
831
- fprefix : str
832
- — File location prefix.
833
- Defaults to dryad2dataverse.constants.TMP
834
-
764
+ Defaults to self.dvpid, which is generated by calling
765
+ dryad2dataverse.transfer.Transfer.upload_study().
835
766
  force_unlock : bool
836
- Attempt forcible unlock instead of waiting for tabular
837
- file processing.
838
- Defaults to False.
839
- The Dataverse `/locks` endpoint blocks POST and DELETE requests
840
- from non-superusers (undocumented as of 31 March 2021).
841
- **Forcible unlock requires a superuser API key.**
842
- ----------------------------------------
767
+ Attempt forcible unlock instead of waiting for tabular
768
+ file processing.
769
+ Defaults to False.
770
+ The Dataverse `/locks` endpoint blocks POST and DELETE requests
771
+ from non-superusers (undocumented as of 31 March 2021).
772
+ **Forcible unlock requires a superuser API key.**
843
773
  '''
844
774
  if not files:
845
775
  files = self.files
846
- if not fprefix:
847
- fprefix = constants.TMP
776
+ fprefix = pathlib.Path(self.kwargs['tempfile_location']).expanduser().absolute()
848
777
  out = []
849
778
  for f in files:
850
779
  #out.append(self.upload_file(f[0], f[1], f[2], f[3],
@@ -860,24 +789,17 @@ class Transfer():
860
789
  '''
861
790
  Uploads Dryad json as a separate file for archival purposes.
862
791
 
863
- ----------------------------------------
864
- Parameters:
865
-
792
+ Parameters
793
+ ----------
866
794
  studyId : str
867
- Dataverse persistent identifier.
868
- Default dryad2dataverse.transfer.Transfer.dvpid,
869
- which is only generated on
870
- dryad2dataverse.transfer.Transfer.upload_study()
871
-
872
- dest : str
873
- — Base URL for transfer.
874
- Default dryad2datavese.constants.DVURL
875
- ----------------------------------------
795
+ Dataverse persistent identifier.
796
+ Default dryad2dataverse.transfer.Transfer.dvpid,
797
+ which is only generated on
798
+ dryad2dataverse.transfer.Transfer.upload_study()
876
799
  '''
877
800
  if not studyId:
878
801
  studyId = self.dvpid
879
- if not dest:
880
- dest = constants.DVURL
802
+ dest = self.kwargs['dv_url']
881
803
  if not self.jsonFlag:
882
804
  url = dest + '/api/datasets/:persistentId/add'
883
805
  pack = io.StringIO(json.dumps(self.dryad.dryadJson))
@@ -905,7 +827,6 @@ class Transfer():
905
827
  except (requests.exceptions.HTTPError,
906
828
  requests.exceptions.ConnectionError) as err:
907
829
  LOGGER.error('Unable to upload Dryad JSON to %s', studyId)
908
- LOGGER.error('ERROR message: %s', meta.text)
909
830
  LOGGER.exception(err)
910
831
  #And further checking as to what is happening
911
832
  self.fileUpRecord.append((0, {'status':'Failure: Unable to upload Dryad JSON'}))
@@ -914,8 +835,9 @@ class Transfer():
914
835
  except Exception as err:
915
836
  LOGGER.error('Unable to upload Dryad JSON')
916
837
  LOGGER.exception(err)
838
+ raise
917
839
 
918
- def delete_dv_file(self, dvfid, dvurl=None, key=None):
840
+ def delete_dv_file(self, dvfid)->bool:
919
841
  #WTAF curl -u $API_TOKEN: -X DELETE
920
842
  #https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/123
921
843
 
@@ -926,54 +848,32 @@ class Transfer():
926
848
 
927
849
  Returns 1 on success (204 response), or 0 on other response.
928
850
 
929
- ----------------------------------------
930
- Parameters:
931
-
932
- dvurl : str
933
- — Base URL of dataverse instance.
934
- Defaults to dryad2dataverse.constants.DVURL.
935
-
851
+ Parameters
852
+ ----------
936
853
  dvfid : str
937
- Dataverse file ID number.
938
- ----------------------------------------
854
+ Dataverse file ID number.
939
855
  '''
940
- if not dvurl:
941
- dvurl = constants.DVURL
942
- if not key:
943
- key = constants.APIKEY
944
-
945
- delme = self.session.delete(f'{dvurl}/dvn/api/data-deposit/v1.1/swordv2/edit-media'
856
+ delme = self.session.delete(f'{self.kwargs["dv_url"]}/'
857
+ 'dvn/api/data-deposit/v1.1/swordv2/edit-media'
946
858
  f'/file/{dvfid}',
947
- auth=(key, ''))
859
+ auth=(self.kwargs['api_key'], ''))
948
860
  if delme.status_code == 204:
949
861
  self.fileDelRecord.append(dvfid)
950
862
  return 1
951
863
  return 0
952
864
 
953
- def delete_dv_files(self, dvfids=None, dvurl=None, key=None):
865
+ def delete_dv_files(self, dvfids=None):
954
866
  '''
955
867
  Deletes all files in list of Dataverse file ids from
956
868
  a Dataverse installation.
957
869
 
958
- ----------------------------------------
959
- Parameters:
960
-
870
+ Parameters
871
+ ----------
961
872
  dvfids : list
962
- List of Dataverse file ids.
963
- Defaults to dryad2dataverse.transfer.Transfer.fileDelRecord.
964
-
965
- dvurl : str
966
- — Base URL of Dataverse. Defaults to dryad2dataverse.constants.DVURL.
967
-
968
- key : str
969
- — API key for Dataverse. Defaults to dryad2dataverse.constants.APIKEY.
970
- ----------------------------------------
873
+ List of Dataverse file ids.
874
+ Defaults to dryad2dataverse.transfer.Transfer.fileDelRecord.
971
875
  '''
972
876
  #if not dvfids:
973
877
  # dvfids = self.fileDelRecord
974
- if not dvurl:
975
- dvurl = constants.DVURL
976
- if not key:
977
- key = constants.APIKEY
978
878
  for fid in dvfids:
979
- self.delete_dv_file(fid, dvurl, key)
879
+ self.delete_dv_file(fid)