dryad2dataverse 0.6.2__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,6 @@ Modules included:
23
23
  dryad2dataverse.exceptions : Custom exceptions.
24
24
  '''
25
25
 
26
- VERSION = (0, 6, 2)
26
+ VERSION = (0, 7, 1)
27
27
 
28
28
  __version__ = '.'.join([str(x) for x in VERSION])
@@ -202,7 +202,11 @@ class Serializer():
202
202
  files = page['_embedded'].get('stash:files')
203
203
  if files:
204
204
  for f in files:
205
- downLink = f['_links']['stash:file-download']['href']
205
+ #This broke with this commit:
206
+ # https://github.com/datadryad/dryad-app/commit/b8a333ba34b14e55cbc1d7ed5aa4451e0f41db66
207
+
208
+ #downLink = f['_links']['stash:file-download']['href']
209
+ downLink = f['_links']['stash:download']['href']
206
210
  downLink = f'{constants.DRYURL}{downLink}'
207
211
  name = f['path']
208
212
  mimeType = f['mimeType']
@@ -9,7 +9,9 @@ import logging
9
9
  import os
10
10
  import time
11
11
  import traceback
12
+ import zlib #crc32, adler32
12
13
 
14
+ import Crypto.Hash.MD2 #md2
13
15
  import requests
14
16
  from requests.adapters import HTTPAdapter
15
17
  from requests_toolbelt.multipart.encoder import MultipartEncoder
@@ -20,6 +22,15 @@ from dryad2dataverse import exceptions
20
22
  LOGGER = logging.getLogger(__name__)
21
23
  URL_LOGGER = logging.getLogger('urllib3')
22
24
 
25
+ HASHTABLE = {'adler-32' : zlib.adler32, #zlib?
26
+ 'crc-32' : zlib.crc32, #zlib
27
+ 'md2' : Crypto.Hash.MD2, #insecure
28
+ 'md5' : hashlib.md5,
29
+ 'sha-1' : hashlib.sha1,
30
+ 'sha-256' : hashlib.sha256,
31
+ 'sha-384' : hashlib.sha384,
32
+ 'sha-512': hashlib.sha512}
33
+
23
34
  class Transfer():
24
35
  '''
25
36
  Transfers metadata and data files from a
@@ -342,7 +353,7 @@ class Transfer():
342
353
  return self.dvpid
343
354
 
344
355
  @staticmethod
345
- def _check_md5(infile):
356
+ def _check_md5(infile, dig_type):
346
357
  '''
347
358
  Returns the md5 checksum of a file.
348
359
 
@@ -351,25 +362,51 @@ class Transfer():
351
362
 
352
363
  infile : str
353
364
  — Complete path to target file.
365
+
366
+ dig_type : str or None
367
+ — Digest type
354
368
  ----------------------------------------
355
369
  '''
370
+ #From Ryan Scherle
371
+ #When Dryad calculates a digest, it only uses MD5.
372
+ #But if you have precomputed some other type of digest, we should accept it.
373
+ #The list of allowed values is:
374
+ #('adler-32','crc-32','md2','md5','sha-1','sha-256','sha-384','sha-512')
375
+ #hashlib doesn't support adler-32, crc-32, md2
376
+
356
377
  blocksize = 2**16
378
+ #Well, this is inelegant
357
379
  with open(infile, 'rb') as m:
358
- fmd5 = hashlib.md5()
359
- fblock = m.read(blocksize)
360
- while fblock:
361
- fmd5.update(fblock)
380
+ #fmd5 = hashlib.md5()
381
+ ## var name kept for posterity. Maybe refactor
382
+ if dig_type in ['sha-1', 'sha-256', 'sha-384', 'sha-512', 'md5', 'md2']:
383
+ if dig_type == 'md2':
384
+ fmd5 = Crypto.Hash.MD2.new()
385
+ else:
386
+ fmd5 = HASHTABLE[dig_type]()
387
+ fblock = m.read(blocksize)
388
+ while fblock:
389
+ fmd5.update(fblock)
390
+ fblock = m.read(blocksize)
391
+ return fmd5.hexdigest()
392
+ if dig_type in ['adler-32', 'crc-32']:
362
393
  fblock = m.read(blocksize)
363
- return fmd5.hexdigest()
394
+ curvalue = HASHTABLE[dig_type](fblock)
395
+ while fblock:
396
+ fblock = m.read(blocksize)
397
+ curvalue = HASHTABLE[dig_type](fblock, curvalue)
398
+ return curvalue
399
+ raise exceptions.HashError(f'Unable to determine hash type for{infile}: {dig_type}')
364
400
 
365
- def download_file(self, url, filename, tmp=None,
366
- size=None, chk=None, timeout=45):
401
+
402
+ def download_file(self, url=None, filename=None, tmp=None,
403
+ size=None, chk=None, timeout=45, **kwargs):
367
404
  '''
368
405
  Downloads a file via requests streaming and saves to constants.TMP.
369
- returns md5sum on success and an exception on failure.
406
+ returns checksum on success and an exception on failure.
370
407
 
371
408
  ----------------------------------------
372
- Parameters:
409
+ Required keyword arguments:
373
410
 
374
411
  url : str
375
412
  — URL of download.
@@ -388,8 +425,11 @@ class Transfer():
388
425
  — Reported file size in bytes.
389
426
  Defaults to dryad2dataverse.constants.MAX_UPLOAD.
390
427
 
428
+ digest_type: str
429
+ — checksum type (ie, md5, sha-256, etc)
430
+
391
431
  chk : str
392
- - md5 sum of file (if available and known).
432
+ — checksum of file (if available and known).
393
433
  ----------------------------------------
394
434
  '''
395
435
  LOGGER.debug('Start download sequence')
@@ -430,11 +470,13 @@ class Transfer():
430
470
  LOGGER.exception(e)
431
471
  raise
432
472
  #now check the md5
433
- md5 = Transfer._check_md5(f'{tmp}{os.sep}{filename}')
434
- if chk:
473
+ md5 = None
474
+ if chk and kwargs.get('digest_type') in HASHTABLE:
475
+ md5 = Transfer._check_md5(f'{tmp}{os.sep}{filename}',
476
+ kwargs['digest_type'])
435
477
  if md5 != chk:
436
478
  try:
437
- raise exceptions.HashError('Hex digest mismatch: {md5} : {chk}')
479
+ raise exceptions.HashError(f'Hex digest mismatch: {md5} : {chk}')
438
480
  #is this really what I want to do on a bad checksum?
439
481
  except exceptions.HashError as e:
440
482
  LOGGER.exception(e)
@@ -474,7 +516,13 @@ class Transfer():
474
516
  files = self.files
475
517
  try:
476
518
  for f in files:
477
- self.download_file(f[0], f[1], size=f[3], chk=f[-1])
519
+ self.download_file(url=f[0],
520
+ filename=f[1],
521
+ mimetype=f[2],
522
+ size=f[3],
523
+ descr=f[4],
524
+ digest_type=f[5],
525
+ chk=f[-1])
478
526
  except exceptions.DataverseDownloadError as e:
479
527
  LOGGER.exception('Unable to download file with info %s\n%s', f, e)
480
528
  raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dryad2dataverse
3
- Version: 0.6.2
3
+ Version: 0.7.1
4
4
  Summary: Utility for copying and syncing data from a Dryad data repository to a Dataverse repository
5
5
  Author-email: Paul Lesack <paul.lesack@ubc.ca>
6
6
  Project-URL: Homepage, https://ubc-library-rc.github.io/dryad2dataverse
@@ -21,6 +21,7 @@ Requires-Dist: certifi >=2022.12.7
21
21
  Requires-Dist: charset-normalizer >=2.0.4
22
22
  Requires-Dist: chardet >=3.0.4
23
23
  Requires-Dist: idna >=2.10
24
+ Requires-Dist: pycryptodome >=3.20.0
24
25
  Requires-Dist: requests >=2.26.0
25
26
  Requires-Dist: requests-toolbelt >=0.9.1
26
27
  Requires-Dist: urllib3 >=1.26.6
@@ -0,0 +1,13 @@
1
+ dryad2dataverse/__init__.py,sha256=OM5T0B0PkVEvsHStz5XY4bdEAmWI3P_r3wy6xo_vQF4,712
2
+ dryad2dataverse/constants.py,sha256=ZfD2N0f742nnP8NPUV0QsDdVVAbrW-3Py8Lg9al1Z5c,1429
3
+ dryad2dataverse/exceptions.py,sha256=oIP1_fSEvLF3HpK6gOYb05vUisY-IAxwXZDeNoAvCPM,1008
4
+ dryad2dataverse/handlers.py,sha256=Xb0vvs1HE92qaK6g-Gu3eyHkLrSwU0-RQjLcl6FZPUY,1487
5
+ dryad2dataverse/monitor.py,sha256=qj-jXl3_SJBPof1qKOiSHQCvc083PHH2afkl-y_qQAU,26267
6
+ dryad2dataverse/serializer.py,sha256=1RKfV6flLCR61s51MqY-AUMIMOoLrxai3Wi9cOGtt1s,33706
7
+ dryad2dataverse/transfer.py,sha256=W8b-sHTjClM-CzVx_OrBAsrZ5hQ1-XmTpq1qB4PFlVA,36233
8
+ dryad2dataverse/scripts/dryadd.py,sha256=chQVEAYWTHvKa5QZH0PIj1EgBlJO4qd1Xw2vkf1c_i8,26291
9
+ dryad2dataverse-0.7.1.dist-info/METADATA,sha256=fttUJsmVA2e0to1oE5JJc2qH-DQO8ByR7LpbOVEdurc,3297
10
+ dryad2dataverse-0.7.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
11
+ dryad2dataverse-0.7.1.dist-info/entry_points.txt,sha256=9kBsBa5SivAtfAox__vZGL7H-HI7Vd-jGztCh_eIJEc,63
12
+ dryad2dataverse-0.7.1.dist-info/top_level.txt,sha256=0X45AghpKfL69Oc51sRddeiHtq8o-OyOhFX3AMal6YI,16
13
+ dryad2dataverse-0.7.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- dryad2dataverse/__init__.py,sha256=j0GU7htgnQj_RNsQmz3ni5uBcUgipdqhwsvpakMaWFM,712
2
- dryad2dataverse/constants.py,sha256=ZfD2N0f742nnP8NPUV0QsDdVVAbrW-3Py8Lg9al1Z5c,1429
3
- dryad2dataverse/exceptions.py,sha256=oIP1_fSEvLF3HpK6gOYb05vUisY-IAxwXZDeNoAvCPM,1008
4
- dryad2dataverse/handlers.py,sha256=Xb0vvs1HE92qaK6g-Gu3eyHkLrSwU0-RQjLcl6FZPUY,1487
5
- dryad2dataverse/monitor.py,sha256=qj-jXl3_SJBPof1qKOiSHQCvc083PHH2afkl-y_qQAU,26267
6
- dryad2dataverse/serializer.py,sha256=fM5owzRfdZb799fTg45n0iqzBWZSUHpTp2pXnuKg_z0,33476
7
- dryad2dataverse/transfer.py,sha256=tfuSSfOsXTCMEJ_K65J0hyOI9O_5GFUHEpPyNQGXVbs,34125
8
- dryad2dataverse/scripts/dryadd.py,sha256=chQVEAYWTHvKa5QZH0PIj1EgBlJO4qd1Xw2vkf1c_i8,26291
9
- dryad2dataverse-0.6.2.dist-info/METADATA,sha256=t7WcbZ40n3nZOHrQYTGCl32Fi2tCuLEhtKus_PKYMTg,3260
10
- dryad2dataverse-0.6.2.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
- dryad2dataverse-0.6.2.dist-info/entry_points.txt,sha256=9kBsBa5SivAtfAox__vZGL7H-HI7Vd-jGztCh_eIJEc,63
12
- dryad2dataverse-0.6.2.dist-info/top_level.txt,sha256=0X45AghpKfL69Oc51sRddeiHtq8o-OyOhFX3AMal6YI,16
13
- dryad2dataverse-0.6.2.dist-info/RECORD,,