dryad2dataverse 0.7.1__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,9 @@ Modules included:
22
22
 
23
23
  dryad2dataverse.exceptions : Custom exceptions.
24
24
  '''
25
+ import sys
25
26
 
26
- VERSION = (0, 7, 1)
27
-
27
+ VERSION = (0, 7, 5)
28
28
  __version__ = '.'.join([str(x) for x in VERSION])
29
+ USERAGENT = (f'dryad2dataverse/v{__version__} ({sys.platform.capitalize()}); '
30
+ f'Python {sys.version[:sys.version.find("(")-1]}')
@@ -325,7 +325,10 @@ class Monitor():
325
325
  # because of code duplication below.
326
326
  for f in oldFiles:
327
327
  #Download links are not persistent. Be warned
328
- downLink = f['_links']['stash:file-download']['href']
328
+ try:
329
+ downLink = f['_links']['stash:file-download']['href']
330
+ except KeyError:
331
+ downLink = f['_links']['stash:download']['href']
329
332
  downLink = f'{constants.DRYURL}{downLink}'
330
333
  name = f['path']
331
334
  mimeType = f['mimeType']
@@ -14,6 +14,7 @@ import glob
14
14
  import logging
15
15
  import logging.handlers
16
16
  import os
17
+ import pathlib
17
18
  import shutil
18
19
  import smtplib
19
20
  import sys
@@ -27,7 +28,7 @@ import dryad2dataverse.serializer
27
28
  import dryad2dataverse.transfer
28
29
  from dryad2dataverse.handlers import SSLSMTPHandler
29
30
 
30
- VERSION = (0, 5, 3)
31
+ VERSION = (0, 6, 2)
31
32
  __version__ = '.'.join([str(x) for x in VERSION])
32
33
 
33
34
  DRY = 'https://datadryad.org/api/v2'
@@ -372,6 +373,21 @@ def argp():
372
373
  type=int,
373
374
  dest='warn',
374
375
  default=15)
376
+ parser.add_argument('--testmode-on',
377
+ help=('Turn on test mode. '
378
+ 'Number of transfers will be limited '
379
+ 'to the value in --testmode-limit '
380
+ 'or 5 if you don\'t set --testmode-limit '),
381
+ action='store_true',
382
+ dest='testmode')
383
+ parser.add_argument('--testmode-limit',
384
+ help=('Test mode - only transfer first [n] '
385
+ 'of the total number of (new) records. Old ones will '
386
+ 'still be updated, though. '
387
+ 'Default: 5'),
388
+ type=int,
389
+ default=5,
390
+ dest='testlimit')
375
391
  parser.add_argument('--version', action='version',
376
392
  version='%(prog)s '+__version__
377
393
  +'; dryad2dataverse '+
@@ -416,7 +432,8 @@ def email_log(mailhost, fromaddr, toaddrs, credentials, port=465, secure=(),
416
432
  '''
417
433
  #pylint: disable=too-many-arguments
418
434
  #Because consistency is for suckers and yahoo requires full hostname
419
- subject = 'Dryad to Dataverse transfer error'
435
+ #subject = 'Dryad to Dataverse transfer error'
436
+ subject = 'Dryad to Dataverse logger message'
420
437
  elog = logging.getLogger('email_log')
421
438
  mailer = SSLSMTPHandler(mailhost=(mailhost, port),
422
439
  fromaddr=fromaddr,
@@ -491,11 +508,11 @@ def checkwarn(val:int, **kwargs) -> None:
491
508
  mess = ('Large number of updates detected. '
492
509
  f'{val} new studies exceeds threshold of {kwargs.get("warn", 0)}. '
493
510
  'Program execution halted.')
494
- subject = ('Dryad to Dataverse large update warning')
511
+ subject = 'Dryad to Dataverse large update warning'
495
512
  for logme in kwargs.get('loggers'):
496
513
  logme.warning(mess)
497
514
  notify(msgtxt=(subject, mess),
498
- **vars(kwargs))
515
+ **kwargs)
499
516
  sys.exit()
500
517
 
501
518
  def verbo(verbosity:bool, **kwargs)->None:
@@ -536,9 +553,18 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
536
553
  monitor = dryad2dataverse.monitor.Monitor(args.dbase)
537
554
  #copy the database to make a backup, because paranoia is your friend
538
555
  if os.path.exists(dryad2dataverse.constants.DBASE):
539
- shutil.copyfile(dryad2dataverse.constants.DBASE,
540
- dryad2dataverse.constants.DBASE+'.'+
541
- datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
556
+ bu_db = pathlib.Path(dryad2dataverse.constants.DBASE)
557
+ try:
558
+ shutil.copyfile( bu_db,
559
+ pathlib.Path(bu_db.parent,
560
+ bu_db.stem + '_' +
561
+ datetime.datetime.now().strftime('%Y-%m-%d-%H%M') +
562
+ bu_db.suffix)
563
+ )
564
+ except FileNotFoundError:
565
+ print(dryad2dataverse.constants.DBASE)
566
+ print(bu_db)
567
+ sys.exit()
542
568
  #list comprehension includes untimestamped dbase name, hence 2+
543
569
  fnames = glob.glob(os.path.abspath(dryad2dataverse.constants.DBASE)
544
570
  +'*')
@@ -554,15 +580,24 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
554
580
  logger.info('Total new files: %s', len(updates))
555
581
  elog.info('Total new files: %s', len(updates))
556
582
 
557
- checkwarn(val=len(updates),
583
+ checkwarn(val=len(updates) if not args.testmode else
584
+ min(args.testlimit, len(updates)),
558
585
  loggers=[logger],
559
586
  **vars(args))
587
+ if args.testmode:
588
+ logger.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
589
+ elog.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
560
590
 
561
591
  #update all the new files
562
592
  verbo(args.verbosity, **{'Total to process': len(updates)})
593
+
563
594
  try:
564
595
  count = 0
596
+ testcount = 0
565
597
  for doi in updates:
598
+ if args.testmode and (testcount >= args.testlimit):
599
+ logger.info('Test limit of %s reached', args.testlimit)
600
+ break
566
601
  count += 1
567
602
  logger.info('Start processing %s of %s', count, len(updates))
568
603
  logger.info('DOI: %s, Dryad URL: https://datadryad.org/stash/dataset/%s',
@@ -608,6 +643,7 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
608
643
  transfer.set_correct_date()
609
644
  notify(new_content(study),
610
645
  **vars(args))
646
+ testcount+=1
611
647
 
612
648
  elif update_type == 'updated':
613
649
  logger.info('Updated metadata: %s', doi[0])
@@ -673,3 +709,7 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
673
709
 
674
710
  if __name__ == '__main__':
675
711
  main()
712
+ _parser = argp()
713
+ _args = _parser.parse_args()
714
+ print('This is what you would have done had you actually run this')
715
+ print(_args)
@@ -11,11 +11,13 @@ import requests
11
11
  from requests.adapters import HTTPAdapter
12
12
 
13
13
  from dryad2dataverse import constants
14
+ from dryad2dataverse import USERAGENT
14
15
 
15
16
  LOGGER = logging.getLogger(__name__)
16
17
  #Connection monitoring as per
17
18
  #https://stackoverflow.com/questions/16337511/log-all-requests-from-the-python-requests-module
18
19
  URL_LOGGER = logging.getLogger('urllib3')
20
+ USER_AGENT = {'User-agent': USERAGENT}
19
21
 
20
22
  class Serializer():
21
23
  '''
@@ -72,6 +74,7 @@ class Serializer():
72
74
  try:
73
75
  headers = {'accept':'application/json',
74
76
  'Content-Type':'application/json'}
77
+ headers.update(USER_AGENT)
75
78
  doiClean = urllib.parse.quote(self.doi, safe='')
76
79
  resp = self.session.get(f'{url}/api/v2/datasets/{doiClean}',
77
80
  headers=headers, timeout=timeout)
@@ -164,6 +167,7 @@ class Serializer():
164
167
  self._fileJson = []
165
168
  headers = {'accept':'application/json',
166
169
  'Content-Type':'application/json'}
170
+ headers.update(USER_AGENT)
167
171
  fileList = self.session.get(f'{constants.DRYURL}/api/v2/versions/{self.id}/files',
168
172
  headers=headers,
169
173
  timeout=timeout)
@@ -2,6 +2,7 @@
2
2
  This module handles data downloads and uploads from a Dryad instance to a Dataverse instance
3
3
  '''
4
4
 
5
+ #TODO harmonize headers instead of hideous copypasta
5
6
  import hashlib
6
7
  import io
7
8
  import json
@@ -18,7 +19,9 @@ from requests_toolbelt.multipart.encoder import MultipartEncoder
18
19
 
19
20
  from dryad2dataverse import constants
20
21
  from dryad2dataverse import exceptions
22
+ from dryad2dataverse import USERAGENT
21
23
 
24
+ USER_AGENT = {'User-agent': USERAGENT}
22
25
  LOGGER = logging.getLogger(__name__)
23
26
  URL_LOGGER = logging.getLogger('urllib3')
24
27
 
@@ -85,6 +88,7 @@ class Transfer():
85
88
  if not url:
86
89
  url = constants.DVURL
87
90
  headers = {'X-Dataverse-key': apikey if apikey else constants.APIKEY}
91
+ headers.update(USER_AGENT)
88
92
  bad_test = self.session.get(f'{url}/api/datasets/:persistentId',
89
93
  headers=headers,
90
94
  params=params)
@@ -232,6 +236,7 @@ class Transfer():
232
236
  else:
233
237
  headers = {'X-Dataverse-key' : constants.APIKEY}
234
238
 
239
+ headers.update(USER_AGENT)
235
240
  params = {'persistentId': hdl}
236
241
  set_date = self.session.put(f'{url}/api/datasets/:persistentId/citationdate',
237
242
  headers=headers,
@@ -284,7 +289,7 @@ class Transfer():
284
289
  if not apikey:
285
290
  apikey = constants.APIKEY
286
291
  headers = {'X-Dataverse-key' : apikey}
287
-
292
+ headers.update(USER_AGENT)
288
293
  targetDv = kwargs.get('targetDv')
289
294
  dvpid = kwargs.get('dvpid')
290
295
  #dryFid = kwargs.get('dryFid') #Why did I put this here?
@@ -355,7 +360,7 @@ class Transfer():
355
360
  @staticmethod
356
361
  def _check_md5(infile, dig_type):
357
362
  '''
358
- Returns the md5 checksum of a file.
363
+ Returns the hex digest of a file (formerly just md5sum).
359
364
 
360
365
  ----------------------------------------
361
366
  Parameters:
@@ -442,6 +447,7 @@ class Transfer():
442
447
 
443
448
  if size:
444
449
  if size > constants.MAX_UPLOAD:
450
+ #TOO BIG
445
451
  LOGGER.warning('%s: File %s exceeds '
446
452
  'Dataverse MAX_UPLOAD size. Skipping download.',
447
453
  self.doi, filename)
@@ -485,6 +491,7 @@ class Transfer():
485
491
  if url == i[0]:
486
492
  i[-1] = md5
487
493
  LOGGER.debug('Complete download sequence')
494
+ #This doesn't actually return an md5, just the hash value
488
495
  return md5
489
496
  except (requests.exceptions.HTTPError,
490
497
  requests.exceptions.ConnectionError) as err:
@@ -560,6 +567,8 @@ class Transfer():
560
567
  headers = {'X-Dataverse-key': apikey}
561
568
  else:
562
569
  headers = self.auth
570
+
571
+ headers.update(USER_AGENT)
563
572
  params = {'persistentId': study}
564
573
  try:
565
574
  lock_status = self.session.get(f'{dv_url}/api/datasets/:persistentId/locks',
@@ -608,6 +617,8 @@ class Transfer():
608
617
  headers = {'X-Dataverse-key': apikey}
609
618
  else:
610
619
  headers = self.auth
620
+
621
+ headers.update(USER_AGENT)
611
622
  params = {'persistentId': study}
612
623
  lock_status = self.session.get(f'{dv_url}/api/datasets/:persistentId/locks',
613
624
  headers=headers,
@@ -641,7 +652,9 @@ class Transfer():
641
652
 
642
653
  def upload_file(self, dryadUrl=None, filename=None,
643
654
  mimetype=None, size=None, descr=None,
644
- md5=None, studyId=None, dest=None,
655
+ hashtype=None,
656
+ #md5=None, studyId=None, dest=None,
657
+ digest=None, studyId=None, dest=None,
645
658
  fprefix=None, force_unlock=False, timeout=300):
646
659
  '''
647
660
  Uploads file to Dataverse study. Returns a tuple of the
@@ -668,8 +681,11 @@ class Transfer():
668
681
  dest : str
669
682
  — Destination dataverse installation url.
670
683
  Defaults to constants.DVURL.
684
+ hashtype: str
685
+ original Dryad hash type
671
686
 
672
- md5 : str
687
+ #md5 : str
688
+ digest
673
689
  — md5 checksum for file.
674
690
 
675
691
  fprefix : str
@@ -692,6 +708,8 @@ class Transfer():
692
708
 
693
709
  ----------------------------------------
694
710
  '''
711
+ #return locals()
712
+ #TODONE remove above
695
713
  if not studyId:
696
714
  studyId = self.dvpid
697
715
  if not dest:
@@ -730,6 +748,7 @@ class Transfer():
730
748
  ctype = {'Content-type' : multi.content_type}
731
749
  tmphead = self.auth.copy()
732
750
  tmphead.update(ctype)
751
+ tmphead.update(USER_AGENT)
733
752
  url = dest + '/api/datasets/:persistentId/add'
734
753
  try:
735
754
  upload = self.session.post(url, params=params,
@@ -739,9 +758,22 @@ class Transfer():
739
758
  upload.raise_for_status()
740
759
  self.fileUpRecord.append((fid, upload.json()))
741
760
  upmd5 = upload.json()['data']['files'][0]['dataFile']['checksum']['value']
742
- if md5 and upmd5 != md5:
761
+ #Dataverse hash type
762
+ _type = upload.json()['data']['files'][0]['dataFile']['checksum']['type']
763
+ if _type.lower() != hashtype.lower():
764
+ comparator = self._check_md5(upfile, _type.lower())
765
+ else:
766
+ comparator = digest
767
+ #if hashtype.lower () != 'md5':
768
+ # #get an md5 because dataverse uses md5s. Or most of them do anyway.
769
+ # #One day this will be rewritten properly.
770
+ # md5 = self._check_md5(filename, 'md5')
771
+ #else:
772
+ # md5 = digest
773
+ #if md5 and (upmd5 != md5):
774
+ if upmd5 != comparator:
743
775
  try:
744
- raise exceptions.HashError(f'md5sum mismatch:\nlocal: {md5}\nuploaded: {upmd5}')
776
+ raise exceptions.HashError(f'{_type} mismatch:\nlocal: {comparator}\nuploaded: {upmd5}')
745
777
  except exceptions.HashError as e:
746
778
  LOGGER.exception(e)
747
779
  raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dryad2dataverse
3
- Version: 0.7.1
3
+ Version: 0.7.5
4
4
  Summary: Utility for copying and syncing data from a Dryad data repository to a Dataverse repository
5
5
  Author-email: Paul Lesack <paul.lesack@ubc.ca>
6
6
  Project-URL: Homepage, https://ubc-library-rc.github.io/dryad2dataverse
@@ -20,7 +20,7 @@ Description-Content-Type: text/markdown
20
20
  Requires-Dist: certifi >=2022.12.7
21
21
  Requires-Dist: charset-normalizer >=2.0.4
22
22
  Requires-Dist: chardet >=3.0.4
23
- Requires-Dist: idna >=2.10
23
+ Requires-Dist: idna >=2.10.0
24
24
  Requires-Dist: pycryptodome >=3.20.0
25
25
  Requires-Dist: requests >=2.26.0
26
26
  Requires-Dist: requests-toolbelt >=0.9.1
@@ -0,0 +1,13 @@
1
+ dryad2dataverse/__init__.py,sha256=RfOrzdCc9OUYlQfBAapkPy_D_SyHziQTob_N5EqAKEM,865
2
+ dryad2dataverse/constants.py,sha256=ZfD2N0f742nnP8NPUV0QsDdVVAbrW-3Py8Lg9al1Z5c,1429
3
+ dryad2dataverse/exceptions.py,sha256=oIP1_fSEvLF3HpK6gOYb05vUisY-IAxwXZDeNoAvCPM,1008
4
+ dryad2dataverse/handlers.py,sha256=Xb0vvs1HE92qaK6g-Gu3eyHkLrSwU0-RQjLcl6FZPUY,1487
5
+ dryad2dataverse/monitor.py,sha256=KOyWCpPTZLYRStB-RN0e5kgHTfbxHsByD72K1VtEPP8,26406
6
+ dryad2dataverse/serializer.py,sha256=DoIjHYKtoH047X5Gd-WUdoLpL-kvTtSAPg-lUElCx8c,33865
7
+ dryad2dataverse/transfer.py,sha256=83tju_o4DSgSkF7JDLsgTpAwm03b0CMb0OjcKAEACuY,37548
8
+ dryad2dataverse/scripts/dryadd.py,sha256=i_y5V0dLz32_nYUFsKfqD_Nz0HIsr4dLV-PQKKH1GhM,28126
9
+ dryad2dataverse-0.7.5.dist-info/METADATA,sha256=9VL4v5kgaGXdCGMV8x3kUS7LbLdZBJXDy7ZDw_Ybvb4,3299
10
+ dryad2dataverse-0.7.5.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
11
+ dryad2dataverse-0.7.5.dist-info/entry_points.txt,sha256=9kBsBa5SivAtfAox__vZGL7H-HI7Vd-jGztCh_eIJEc,63
12
+ dryad2dataverse-0.7.5.dist-info/top_level.txt,sha256=0X45AghpKfL69Oc51sRddeiHtq8o-OyOhFX3AMal6YI,16
13
+ dryad2dataverse-0.7.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- dryad2dataverse/__init__.py,sha256=OM5T0B0PkVEvsHStz5XY4bdEAmWI3P_r3wy6xo_vQF4,712
2
- dryad2dataverse/constants.py,sha256=ZfD2N0f742nnP8NPUV0QsDdVVAbrW-3Py8Lg9al1Z5c,1429
3
- dryad2dataverse/exceptions.py,sha256=oIP1_fSEvLF3HpK6gOYb05vUisY-IAxwXZDeNoAvCPM,1008
4
- dryad2dataverse/handlers.py,sha256=Xb0vvs1HE92qaK6g-Gu3eyHkLrSwU0-RQjLcl6FZPUY,1487
5
- dryad2dataverse/monitor.py,sha256=qj-jXl3_SJBPof1qKOiSHQCvc083PHH2afkl-y_qQAU,26267
6
- dryad2dataverse/serializer.py,sha256=1RKfV6flLCR61s51MqY-AUMIMOoLrxai3Wi9cOGtt1s,33706
7
- dryad2dataverse/transfer.py,sha256=W8b-sHTjClM-CzVx_OrBAsrZ5hQ1-XmTpq1qB4PFlVA,36233
8
- dryad2dataverse/scripts/dryadd.py,sha256=chQVEAYWTHvKa5QZH0PIj1EgBlJO4qd1Xw2vkf1c_i8,26291
9
- dryad2dataverse-0.7.1.dist-info/METADATA,sha256=fttUJsmVA2e0to1oE5JJc2qH-DQO8ByR7LpbOVEdurc,3297
10
- dryad2dataverse-0.7.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
11
- dryad2dataverse-0.7.1.dist-info/entry_points.txt,sha256=9kBsBa5SivAtfAox__vZGL7H-HI7Vd-jGztCh_eIJEc,63
12
- dryad2dataverse-0.7.1.dist-info/top_level.txt,sha256=0X45AghpKfL69Oc51sRddeiHtq8o-OyOhFX3AMal6YI,16
13
- dryad2dataverse-0.7.1.dist-info/RECORD,,