dryad2dataverse 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dryad2dataverse/__init__.py +1 -1
- dryad2dataverse/scripts/dryadd.py +2 -2
- dryad2dataverse/serializer.py +5 -1
- dryad2dataverse/transfer.py +63 -15
- {dryad2dataverse-0.6.1.dist-info → dryad2dataverse-0.7.1.dist-info}/METADATA +9 -8
- dryad2dataverse-0.7.1.dist-info/RECORD +13 -0
- {dryad2dataverse-0.6.1.dist-info → dryad2dataverse-0.7.1.dist-info}/WHEEL +1 -1
- dryad2dataverse-0.6.1.dist-info/RECORD +0 -13
- {dryad2dataverse-0.6.1.dist-info → dryad2dataverse-0.7.1.dist-info}/entry_points.txt +0 -0
- {dryad2dataverse-0.6.1.dist-info → dryad2dataverse-0.7.1.dist-info}/top_level.txt +0 -0
dryad2dataverse/__init__.py
CHANGED
|
@@ -27,7 +27,7 @@ import dryad2dataverse.serializer
|
|
|
27
27
|
import dryad2dataverse.transfer
|
|
28
28
|
from dryad2dataverse.handlers import SSLSMTPHandler
|
|
29
29
|
|
|
30
|
-
VERSION = (0, 5,
|
|
30
|
+
VERSION = (0, 5, 3)
|
|
31
31
|
__version__ = '.'.join([str(x) for x in VERSION])
|
|
32
32
|
|
|
33
33
|
DRY = 'https://datadryad.org/api/v2'
|
|
@@ -509,7 +509,7 @@ def verbo(verbosity:bool, **kwargs)->None:
|
|
|
509
509
|
for key, value in kwargs.items():
|
|
510
510
|
print(f'{key}: {value}')
|
|
511
511
|
|
|
512
|
-
def main(log='/var/log/dryadd.log', level=logging.
|
|
512
|
+
def main(log='/var/log/dryadd.log', level=logging.WARNING):
|
|
513
513
|
'''
|
|
514
514
|
Main Dryad transfer daemon
|
|
515
515
|
|
dryad2dataverse/serializer.py
CHANGED
|
@@ -202,7 +202,11 @@ class Serializer():
|
|
|
202
202
|
files = page['_embedded'].get('stash:files')
|
|
203
203
|
if files:
|
|
204
204
|
for f in files:
|
|
205
|
-
|
|
205
|
+
#This broke with this commit:
|
|
206
|
+
# https://github.com/datadryad/dryad-app/commit/b8a333ba34b14e55cbc1d7ed5aa4451e0f41db66
|
|
207
|
+
|
|
208
|
+
#downLink = f['_links']['stash:file-download']['href']
|
|
209
|
+
downLink = f['_links']['stash:download']['href']
|
|
206
210
|
downLink = f'{constants.DRYURL}{downLink}'
|
|
207
211
|
name = f['path']
|
|
208
212
|
mimeType = f['mimeType']
|
dryad2dataverse/transfer.py
CHANGED
|
@@ -9,7 +9,9 @@ import logging
|
|
|
9
9
|
import os
|
|
10
10
|
import time
|
|
11
11
|
import traceback
|
|
12
|
+
import zlib #crc32, adler32
|
|
12
13
|
|
|
14
|
+
import Crypto.Hash.MD2 #md2
|
|
13
15
|
import requests
|
|
14
16
|
from requests.adapters import HTTPAdapter
|
|
15
17
|
from requests_toolbelt.multipart.encoder import MultipartEncoder
|
|
@@ -20,6 +22,15 @@ from dryad2dataverse import exceptions
|
|
|
20
22
|
LOGGER = logging.getLogger(__name__)
|
|
21
23
|
URL_LOGGER = logging.getLogger('urllib3')
|
|
22
24
|
|
|
25
|
+
HASHTABLE = {'adler-32' : zlib.adler32, #zlib?
|
|
26
|
+
'crc-32' : zlib.crc32, #zlib
|
|
27
|
+
'md2' : Crypto.Hash.MD2, #insecure
|
|
28
|
+
'md5' : hashlib.md5,
|
|
29
|
+
'sha-1' : hashlib.sha1,
|
|
30
|
+
'sha-256' : hashlib.sha256,
|
|
31
|
+
'sha-384' : hashlib.sha384,
|
|
32
|
+
'sha-512': hashlib.sha512}
|
|
33
|
+
|
|
23
34
|
class Transfer():
|
|
24
35
|
'''
|
|
25
36
|
Transfers metadata and data files from a
|
|
@@ -342,7 +353,7 @@ class Transfer():
|
|
|
342
353
|
return self.dvpid
|
|
343
354
|
|
|
344
355
|
@staticmethod
|
|
345
|
-
def _check_md5(infile):
|
|
356
|
+
def _check_md5(infile, dig_type):
|
|
346
357
|
'''
|
|
347
358
|
Returns the md5 checksum of a file.
|
|
348
359
|
|
|
@@ -351,25 +362,51 @@ class Transfer():
|
|
|
351
362
|
|
|
352
363
|
infile : str
|
|
353
364
|
— Complete path to target file.
|
|
365
|
+
|
|
366
|
+
dig_type : str or None
|
|
367
|
+
— Digest type
|
|
354
368
|
----------------------------------------
|
|
355
369
|
'''
|
|
370
|
+
#From Ryan Scherle
|
|
371
|
+
#When Dryad calculates a digest, it only uses MD5.
|
|
372
|
+
#But if you have precomputed some other type of digest, we should accept it.
|
|
373
|
+
#The list of allowed values is:
|
|
374
|
+
#('adler-32','crc-32','md2','md5','sha-1','sha-256','sha-384','sha-512')
|
|
375
|
+
#hashlib doesn't support adler-32, crc-32, md2
|
|
376
|
+
|
|
356
377
|
blocksize = 2**16
|
|
378
|
+
#Well, this is inelegant
|
|
357
379
|
with open(infile, 'rb') as m:
|
|
358
|
-
fmd5 = hashlib.md5()
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
380
|
+
#fmd5 = hashlib.md5()
|
|
381
|
+
## var name kept for posterity. Maybe refactor
|
|
382
|
+
if dig_type in ['sha-1', 'sha-256', 'sha-384', 'sha-512', 'md5', 'md2']:
|
|
383
|
+
if dig_type == 'md2':
|
|
384
|
+
fmd5 = Crypto.Hash.MD2.new()
|
|
385
|
+
else:
|
|
386
|
+
fmd5 = HASHTABLE[dig_type]()
|
|
387
|
+
fblock = m.read(blocksize)
|
|
388
|
+
while fblock:
|
|
389
|
+
fmd5.update(fblock)
|
|
390
|
+
fblock = m.read(blocksize)
|
|
391
|
+
return fmd5.hexdigest()
|
|
392
|
+
if dig_type in ['adler-32', 'crc-32']:
|
|
362
393
|
fblock = m.read(blocksize)
|
|
363
|
-
|
|
394
|
+
curvalue = HASHTABLE[dig_type](fblock)
|
|
395
|
+
while fblock:
|
|
396
|
+
fblock = m.read(blocksize)
|
|
397
|
+
curvalue = HASHTABLE[dig_type](fblock, curvalue)
|
|
398
|
+
return curvalue
|
|
399
|
+
raise exceptions.HashError(f'Unable to determine hash type for{infile}: {dig_type}')
|
|
364
400
|
|
|
365
|
-
|
|
366
|
-
|
|
401
|
+
|
|
402
|
+
def download_file(self, url=None, filename=None, tmp=None,
|
|
403
|
+
size=None, chk=None, timeout=45, **kwargs):
|
|
367
404
|
'''
|
|
368
405
|
Downloads a file via requests streaming and saves to constants.TMP.
|
|
369
|
-
returns
|
|
406
|
+
returns checksum on success and an exception on failure.
|
|
370
407
|
|
|
371
408
|
----------------------------------------
|
|
372
|
-
|
|
409
|
+
Required keyword arguments:
|
|
373
410
|
|
|
374
411
|
url : str
|
|
375
412
|
— URL of download.
|
|
@@ -388,8 +425,11 @@ class Transfer():
|
|
|
388
425
|
— Reported file size in bytes.
|
|
389
426
|
Defaults to dryad2dataverse.constants.MAX_UPLOAD.
|
|
390
427
|
|
|
428
|
+
digest_type: str
|
|
429
|
+
— checksum type (ie, md5, sha-256, etc)
|
|
430
|
+
|
|
391
431
|
chk : str
|
|
392
|
-
|
|
432
|
+
— checksum of file (if available and known).
|
|
393
433
|
----------------------------------------
|
|
394
434
|
'''
|
|
395
435
|
LOGGER.debug('Start download sequence')
|
|
@@ -430,11 +470,13 @@ class Transfer():
|
|
|
430
470
|
LOGGER.exception(e)
|
|
431
471
|
raise
|
|
432
472
|
#now check the md5
|
|
433
|
-
md5 =
|
|
434
|
-
if chk:
|
|
473
|
+
md5 = None
|
|
474
|
+
if chk and kwargs.get('digest_type') in HASHTABLE:
|
|
475
|
+
md5 = Transfer._check_md5(f'{tmp}{os.sep}{filename}',
|
|
476
|
+
kwargs['digest_type'])
|
|
435
477
|
if md5 != chk:
|
|
436
478
|
try:
|
|
437
|
-
raise exceptions.HashError('Hex digest mismatch: {md5} : {chk}')
|
|
479
|
+
raise exceptions.HashError(f'Hex digest mismatch: {md5} : {chk}')
|
|
438
480
|
#is this really what I want to do on a bad checksum?
|
|
439
481
|
except exceptions.HashError as e:
|
|
440
482
|
LOGGER.exception(e)
|
|
@@ -474,7 +516,13 @@ class Transfer():
|
|
|
474
516
|
files = self.files
|
|
475
517
|
try:
|
|
476
518
|
for f in files:
|
|
477
|
-
self.download_file(f[0],
|
|
519
|
+
self.download_file(url=f[0],
|
|
520
|
+
filename=f[1],
|
|
521
|
+
mimetype=f[2],
|
|
522
|
+
size=f[3],
|
|
523
|
+
descr=f[4],
|
|
524
|
+
digest_type=f[5],
|
|
525
|
+
chk=f[-1])
|
|
478
526
|
except exceptions.DataverseDownloadError as e:
|
|
479
527
|
LOGGER.exception('Unable to download file with info %s\n%s', f, e)
|
|
480
528
|
raise
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dryad2dataverse
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: Utility for copying and syncing data from a Dryad data repository to a Dataverse repository
|
|
5
5
|
Author-email: Paul Lesack <paul.lesack@ubc.ca>
|
|
6
6
|
Project-URL: Homepage, https://ubc-library-rc.github.io/dryad2dataverse
|
|
@@ -17,13 +17,14 @@ Classifier: Topic :: Internet :: WWW/HTTP :: Site Management
|
|
|
17
17
|
Classifier: Topic :: Utilities
|
|
18
18
|
Requires-Python: >=3.6
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: certifi
|
|
21
|
-
Requires-Dist: charset-normalizer
|
|
22
|
-
Requires-Dist: chardet
|
|
23
|
-
Requires-Dist: idna
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist: requests
|
|
26
|
-
Requires-Dist:
|
|
20
|
+
Requires-Dist: certifi >=2022.12.7
|
|
21
|
+
Requires-Dist: charset-normalizer >=2.0.4
|
|
22
|
+
Requires-Dist: chardet >=3.0.4
|
|
23
|
+
Requires-Dist: idna >=2.10
|
|
24
|
+
Requires-Dist: pycryptodome >=3.20.0
|
|
25
|
+
Requires-Dist: requests >=2.26.0
|
|
26
|
+
Requires-Dist: requests-toolbelt >=0.9.1
|
|
27
|
+
Requires-Dist: urllib3 >=1.26.6
|
|
27
28
|
|
|
28
29
|
# dryad2dataverse
|
|
29
30
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
dryad2dataverse/__init__.py,sha256=OM5T0B0PkVEvsHStz5XY4bdEAmWI3P_r3wy6xo_vQF4,712
|
|
2
|
+
dryad2dataverse/constants.py,sha256=ZfD2N0f742nnP8NPUV0QsDdVVAbrW-3Py8Lg9al1Z5c,1429
|
|
3
|
+
dryad2dataverse/exceptions.py,sha256=oIP1_fSEvLF3HpK6gOYb05vUisY-IAxwXZDeNoAvCPM,1008
|
|
4
|
+
dryad2dataverse/handlers.py,sha256=Xb0vvs1HE92qaK6g-Gu3eyHkLrSwU0-RQjLcl6FZPUY,1487
|
|
5
|
+
dryad2dataverse/monitor.py,sha256=qj-jXl3_SJBPof1qKOiSHQCvc083PHH2afkl-y_qQAU,26267
|
|
6
|
+
dryad2dataverse/serializer.py,sha256=1RKfV6flLCR61s51MqY-AUMIMOoLrxai3Wi9cOGtt1s,33706
|
|
7
|
+
dryad2dataverse/transfer.py,sha256=W8b-sHTjClM-CzVx_OrBAsrZ5hQ1-XmTpq1qB4PFlVA,36233
|
|
8
|
+
dryad2dataverse/scripts/dryadd.py,sha256=chQVEAYWTHvKa5QZH0PIj1EgBlJO4qd1Xw2vkf1c_i8,26291
|
|
9
|
+
dryad2dataverse-0.7.1.dist-info/METADATA,sha256=fttUJsmVA2e0to1oE5JJc2qH-DQO8ByR7LpbOVEdurc,3297
|
|
10
|
+
dryad2dataverse-0.7.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
11
|
+
dryad2dataverse-0.7.1.dist-info/entry_points.txt,sha256=9kBsBa5SivAtfAox__vZGL7H-HI7Vd-jGztCh_eIJEc,63
|
|
12
|
+
dryad2dataverse-0.7.1.dist-info/top_level.txt,sha256=0X45AghpKfL69Oc51sRddeiHtq8o-OyOhFX3AMal6YI,16
|
|
13
|
+
dryad2dataverse-0.7.1.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
dryad2dataverse/__init__.py,sha256=LNerhTEGz21nqTNhmnfFp0IpVzANJnAUjy1ls5qmyQc,712
|
|
2
|
-
dryad2dataverse/constants.py,sha256=ZfD2N0f742nnP8NPUV0QsDdVVAbrW-3Py8Lg9al1Z5c,1429
|
|
3
|
-
dryad2dataverse/exceptions.py,sha256=oIP1_fSEvLF3HpK6gOYb05vUisY-IAxwXZDeNoAvCPM,1008
|
|
4
|
-
dryad2dataverse/handlers.py,sha256=Xb0vvs1HE92qaK6g-Gu3eyHkLrSwU0-RQjLcl6FZPUY,1487
|
|
5
|
-
dryad2dataverse/monitor.py,sha256=qj-jXl3_SJBPof1qKOiSHQCvc083PHH2afkl-y_qQAU,26267
|
|
6
|
-
dryad2dataverse/serializer.py,sha256=fM5owzRfdZb799fTg45n0iqzBWZSUHpTp2pXnuKg_z0,33476
|
|
7
|
-
dryad2dataverse/transfer.py,sha256=tfuSSfOsXTCMEJ_K65J0hyOI9O_5GFUHEpPyNQGXVbs,34125
|
|
8
|
-
dryad2dataverse/scripts/dryadd.py,sha256=RMsXK2shWjQzNjicD4v9dpYPH0ctSmXHqC0gZ3_x_Dk,26291
|
|
9
|
-
dryad2dataverse-0.6.1.dist-info/METADATA,sha256=Jz8fH-Dp7E-RbvxfSQR7iwv3PNDDURiAqAb-aQkmhGA,3274
|
|
10
|
-
dryad2dataverse-0.6.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
11
|
-
dryad2dataverse-0.6.1.dist-info/entry_points.txt,sha256=9kBsBa5SivAtfAox__vZGL7H-HI7Vd-jGztCh_eIJEc,63
|
|
12
|
-
dryad2dataverse-0.6.1.dist-info/top_level.txt,sha256=0X45AghpKfL69Oc51sRddeiHtq8o-OyOhFX3AMal6YI,16
|
|
13
|
-
dryad2dataverse-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|