dryad2dataverse 0.7.11a0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,22 +2,22 @@
2
2
  Serializes Dryad study JSON to Dataverse JSON, as well as
3
3
  producing associated file information.
4
4
  '''
5
-
6
-
7
5
  import logging
8
6
  import urllib.parse
9
7
 
10
8
  import requests
11
9
  from requests.adapters import HTTPAdapter
12
10
 
13
- from dryad2dataverse import constants
14
- from dryad2dataverse import USERAGENT
11
+ from dryad2dataverse import config
12
+ import dryad2dataverse.auth
15
13
 
16
14
  LOGGER = logging.getLogger(__name__)
17
15
  #Connection monitoring as per
18
16
  #https://stackoverflow.com/questions/16337511/log-all-requests-from-the-python-requests-module
19
17
  URL_LOGGER = logging.getLogger('urllib3')
20
- USER_AGENT = {'User-agent': USERAGENT}
18
+
19
+ #pylint: disable=invalid-name, line-too-long
20
+ #Note: Metadata downloads do not (as of 2026-01) require authentication
21
21
 
22
22
  class Serializer():
23
23
  '''
@@ -27,20 +27,44 @@ class Serializer():
27
27
  <img src="https://licensebuttons.net/p/zero/1.0/88x31.png" title="Creative Commons CC0 1.0 Universal Public Domain Dedication. " style="display:none" onload="this.style.display='inline'" />
28
28
  <a href="http://creativecommons.org/publicdomain/zero/1.0" title="Creative Commons CC0 1.0 Universal Public Domain Dedication. " target="_blank">CC0 1.0</a>
29
29
  </p>'''
30
-
31
- def __init__(self, doi):
30
+
31
+ def __init__(self, doi:str, **kwargs):
32
32
  '''
33
33
  Creates Dryad study metadata instance.
34
34
 
35
- ----------------------------------------
36
- Parameters:
35
+ Parameters
36
+ ----------
37
+ doi : str
38
+ DOI of Dryad study. Required for downloading.
39
+ eg: 'doi:10.5061/dryad.2rbnzs7jp'
40
+
41
+ kwargs : dict
42
+ Other keyword parameters
43
+
44
+ Other parameters
45
+ ----------------
46
+ token : dryad2dataverse.auth.Token
47
+ If present, will use authenticated API
48
+
49
+ Notes
50
+ -----
51
+ Unpacking a dryad2dataverse.config.Config instance holding
52
+ global setup should give all of the
53
+ required kwargs. ie, Serializer(doi, **config_instance)
37
54
 
38
- doi : str
39
- — DOI of Dryad study. Required for downloading.
40
- eg: 'doi:10.5061/dryad.2rbnzs7jp'
41
- ----------------------------------------
42
55
  '''
43
56
  self.doi = doi
57
+ self.kwargs = kwargs
58
+ self.kwargs['dry_url'] = kwargs.get('dry_url', 'https://datadryad.org')
59
+ self.kwargs['api_path'] = kwargs.get('api_path', '/api/v2')
60
+ self.kwargs['max_upload'] = kwargs.get('max_upload', 3221225472)
61
+ self.kwargs['dv_contact_name'] = kwargs.get('dv_contact_name')
62
+ self.kwargs['dv_contact_email'] = kwargs.get('dv_contact_email')
63
+ if self.kwargs.get('token'):
64
+ if not isinstance(self.kwargs['token'],dryad2dataverse.auth.Token):
65
+ raise ValueError('Token must be a dryad2dataverse.auth.Token instance')
66
+ #Don't need timeout if have RETRY_STRATEGY
67
+ self.kwargs['timeout'] = kwargs.get('timeout', 100)
44
68
  self._dryadJson = None
45
69
  self._fileJson = None
46
70
  self._dvJson = None
@@ -49,35 +73,28 @@ class Serializer():
49
73
  self.dvpid = None
50
74
  self.session = requests.Session()
51
75
  self.session.mount('https://',
52
- HTTPAdapter(max_retries=constants.RETRY_STRATEGY))
76
+ HTTPAdapter(max_retries=config.RETRY_STRATEGY))
53
77
  LOGGER.debug('Creating Serializer instance object')
54
78
 
55
- def fetch_record(self, url=None, timeout=45):
79
+ def fetch_record(self, url=None) :
56
80
  '''
57
81
  Fetches Dryad study record JSON from Dryad V2 API at
58
82
  https://datadryad.org/api/v2/datasets/.
59
83
  Saves to self._dryadJson. Querying Serializer.dryadJson
60
84
  will call this function automatically.
61
85
 
62
- ----------------------------------------
63
- Parameters:
64
-
86
+ Parameters
87
+ ----------
65
88
  url : str
66
- Dryad instance base URL (eg: 'https://datadryad.org').
67
-
68
- timeout : int
69
- — Timeout in seconds. Default 45.
70
- ----------------------------------------
89
+ Dryad instance base URL (eg: 'https://datadryad.org').
71
90
  '''
72
91
  if not url:
73
- url = constants.DRYURL
92
+ url = self.kwargs['dry_url']
74
93
  try:
75
- headers = {'accept':'application/json',
76
- 'Content-Type':'application/json'}
77
- headers.update(USER_AGENT)
94
+ headers = config.Config.update_headers(**self.kwargs)
78
95
  doiClean = urllib.parse.quote(self.doi, safe='')
79
- resp = self.session.get(f'{url}/api/v2/datasets/{doiClean}',
80
- headers=headers, timeout=timeout)
96
+ resp = self.session.get(f'{url}{self.kwargs["api_path"]}/datasets/{doiClean}',
97
+ headers=headers, timeout=self.kwargs['timeout'])
81
98
  resp.raise_for_status()
82
99
  self._dryadJson = resp.json()
83
100
  except (requests.exceptions.HTTPError,
@@ -118,12 +135,10 @@ class Serializer():
118
135
  If supplying it, make sure it's correct or you will run into trouble
119
136
  with processing later.
120
137
 
121
- ----------------------------------------
122
- Parameters:
123
-
138
+ Parameters
139
+ ----------
124
140
  value : dict
125
- Dryad JSON.
126
-
141
+ Dryad JSON.
127
142
  '''
128
143
  if value:
129
144
  self._dryadJson = value
@@ -131,9 +146,10 @@ class Serializer():
131
146
  self.fetch_record()
132
147
 
133
148
  @property
134
- def embargo(self):
149
+ def embargo(self)->bool:
135
150
  '''
136
151
  Check embargo status. Returns boolean True if embargoed.
152
+
137
153
  '''
138
154
  if self.dryadJson.get('curationStatus') == 'Embargoed':
139
155
  return True
@@ -148,39 +164,31 @@ class Serializer():
148
164
  return self._dvJson
149
165
 
150
166
  @property
151
- def fileJson(self, timeout=45):
167
+ def fileJson(self):
152
168
  '''
153
169
  Returns a list of file JSONs from call to Dryad API /files/{id},
154
170
  where the ID is parsed from the Dryad JSON. Dryad file listings
155
171
  are paginated, so the return consists of a list of dicts, one
156
172
  per page.
157
-
158
- ----------------------------------------
159
- Parameters:
160
-
161
- timeout : int
162
- — Request timeout in seconds.
163
- ----------------------------------------
164
173
  '''
165
174
  if not self._fileJson:
166
175
  try:
167
176
  self._fileJson = []
168
- headers = {'accept':'application/json',
169
- 'Content-Type':'application/json'}
170
- headers.update(USER_AGENT)
171
- fileList = self.session.get(f'{constants.DRYURL}/api/v2/versions/{self.id}/files',
177
+ headers = config.Config.update_headers(**self.kwargs)
178
+ fileList = self.session.get(f'{self.kwargs["dry_url"]}'
179
+ f'{self.kwargs["api_path"]}/versions/{self.id}/files',
172
180
  headers=headers,
173
- timeout=timeout)
181
+ timeout=self.kwargs['timeout'])
174
182
  fileList.raise_for_status()
175
183
  #total = fileList.json()['total'] #Not needed
176
184
  lastPage = fileList.json()['_links']['last']['href']
177
185
  pages = int(lastPage[lastPage.rfind('=')+1:])
178
186
  self._fileJson.append(fileList.json())
179
187
  for i in range(2, pages+1):
180
- fileCont = self.session.get(f'{constants.DRYURL}/api/v2'
188
+ fileCont = self.session.get(f'{self.kwargs["dry_url"]}{self.kwargs["api_path"]}'
181
189
  f'/versions/{self.id}/files?page={i}',
182
190
  headers=headers,
183
- timeout=timeout)
191
+ timeout=self.kwargs['timeout'])
184
192
  fileCont.raise_for_status()
185
193
  self._fileJson.append(fileCont.json())
186
194
  except Exception as e:
@@ -189,7 +197,7 @@ class Serializer():
189
197
  return self._fileJson
190
198
 
191
199
  @property
192
- def files(self):
200
+ def files(self)->list:
193
201
  '''
194
202
  Returns a list of tuples with:
195
203
 
@@ -211,7 +219,7 @@ class Serializer():
211
219
 
212
220
  #downLink = f['_links']['stash:file-download']['href']
213
221
  downLink = f['_links']['stash:download']['href']
214
- downLink = f'{constants.DRYURL}{downLink}'
222
+ downLink = f'{self.kwargs["dry_url"]}{downLink}'
215
223
  name = f['path']
216
224
  mimeType = f['mimeType']
217
225
  size = f['size']
@@ -235,22 +243,13 @@ class Serializer():
235
243
  return out
236
244
 
237
245
  @property
238
- def oversize(self, maxsize=None):
246
+ def oversize(self):
239
247
  '''
240
248
  Returns a list of Dryad files whose size value
241
249
  exceeds maxsize. Maximum size defaults to
242
- dryad2dataverse.constants.MAX_UPLOAD
243
-
244
- ----------------------------------------
245
- Parameters:
246
-
247
- maxsize : int
248
- — Size in bytes in which to flag as oversize.
249
- Defaults to constants.MAX_UPLOAD.
250
- ----------------------------------------
250
+ dryad2dataverse.config.MAX_UPLOAD
251
251
  '''
252
- if not maxsize:
253
- maxsize = constants.MAX_UPLOAD
252
+ maxsize = self.kwargs['max_upload']
254
253
  toobig = []
255
254
  for f in self.files:
256
255
  if f[3] >= maxsize:
@@ -264,19 +263,17 @@ class Serializer():
264
263
  Creates wrapper around single or multiple Dataverse JSON objects.
265
264
  Returns a dict *without* the Dataverse 'value' key'.
266
265
 
267
- ----------------------------------------
268
- Parameters:
269
-
266
+ Parameters
267
+ ----------
270
268
  typeName : str
271
- Dataverse typeName (eg: 'author').
269
+ Dataverse typeName (eg: 'author').
272
270
 
273
271
  multiple : boolean
274
- "Multiple" value in Dataverse JSON.
272
+ "Multiple" value in Dataverse JSON.
275
273
 
276
274
  typeClass : str
277
- Dataverse typeClass. Usually one of 'compound', 'primitive,
275
+ Dataverse typeClass. Usually one of 'compound', 'primitive,
278
276
  'controlledVocabulary').
279
- ----------------------------------------
280
277
  '''
281
278
  return {'typeName':typeName, 'multiple':multiple,
282
279
  'typeClass':typeClass}
@@ -293,29 +290,32 @@ class Serializer():
293
290
  Suitable for generalized conversions. Only provides fields with
294
291
  multiple: False and typeclass:Primitive
295
292
 
296
- ----------------------------------------
297
- Parameters:
293
+ Parameters
294
+ ----------
295
+ kwargs : dict
296
+ Dict from Dataverse JSON segment
298
297
 
298
+ Other parameters
299
+ ----------------
299
300
  dvField : str
300
- Dataverse output field
301
+ Dataverse output field
301
302
 
302
303
  dryField : str
303
- Dryad JSON field to convert
304
+ Dryad JSON field to convert
304
305
 
305
306
  inJson : dict
306
- Dryad JSON **segment** to convert
307
+ Dryad JSON **segment** to convert
307
308
 
308
309
  addJSON : dict (optional)
309
- any other JSON required to complete (cf ISNI)
310
+ any other JSON required to complete (cf ISNI)
310
311
 
311
312
  rType : str
312
- 'dict' (default) or 'list'.
313
+ 'dict' (default) or 'list'.
313
314
  Returns 'value' field as dict value or list.
314
315
 
315
316
  pNotes : str
316
317
  Notes to be prepended to list type values.
317
318
  No trailing space required.
318
- ----------------------------------------
319
319
  '''
320
320
 
321
321
  dvField = kwargs.get('dvField')
@@ -359,12 +359,11 @@ class Serializer():
359
359
  '''
360
360
  Produces required author json fields.
361
361
  This is a special case, requiring concatenation of several fields.
362
- ----------------------------------------
363
- Parameters:
364
362
 
363
+ Parameters
364
+ ----------
365
365
  author : dict
366
- dryad['author'] JSON segment.
367
- ----------------------------------------
366
+ dryad['author'] JSON segment.
368
367
  '''
369
368
  first = author.get('firstName')
370
369
  last = author.get('lastName')
@@ -381,13 +380,12 @@ class Serializer():
381
380
  Produces the insane keyword structure Dataverse JSON segment
382
381
  from a list of words.
383
382
 
384
- ----------------------------------------
385
- Parameters:
386
-
387
- args : list with str elements
388
- Generally input is Dryad JSON 'keywords', ie *Dryad['keywords'].
389
- Don't forget to expand the list using *.
390
- ----------------------------------------
383
+ Parameters
384
+ ----------
385
+ args : list
386
+ List with elements as strings.
387
+ Generally input is Dryad JSON 'keywords', ie *Dryad['keywords'].
388
+ Don't forget to expand the list using *.
391
389
  '''
392
390
  outlist = []
393
391
  for arg in args:
@@ -402,12 +400,10 @@ class Serializer():
402
400
  Returns formatted notes field with Dryad JSON values that
403
401
  don't really fit anywhere into the Dataverse JSON.
404
402
 
405
- ----------------------------------------
406
- Parameters:
407
-
403
+ Parameters
404
+ ----------
408
405
  dryJson : dict
409
- Dryad JSON as dict.
410
- ----------------------------------------
406
+ Dryad JSON as dict.
411
407
  '''
412
408
  notes = ''
413
409
  #these fields should be concatenated into notes
@@ -423,34 +419,24 @@ class Serializer():
423
419
  'storageSize',
424
420
  'visibility',
425
421
  'skipEmails']
422
+ lookup = {'versionNumber': '<b>Dryad version number:</b>',
423
+ 'versionStatus': '<b>Version status:</b>',
424
+ 'manuscriptNumber': '<b>Manuscript number:</b>',
425
+ 'curationStatus': '<b>Dryad curation status:</b>',
426
+ 'preserveCurationStatus': '<b>Dryad preserve curation status:</b>',
427
+ 'invoiceId': '<b>Invoice ID:</b>',
428
+ 'sharingLink': '<b>Sharing link:</b>',
429
+ 'loosenValidation': '<b>Loosen validation:</b>',
430
+ 'skipDataciteUpdate': '<b>Skip Datacite update:</b>',
431
+ 'storageSize': '<b>Storage size:</b>',
432
+ 'visibility': '<b>Visibility:</b>',
433
+ 'skipEmails': '<b>Skip emails:</b>'}
426
434
  for note in notable:
427
435
  text = dryJson.get(note)
428
436
  if text:
429
437
  text = str(text).strip()
430
- if note == 'versionNumber':
431
- text = f'<b>Dryad version number:</b> {text}'
432
- if note == 'versionStatus':
433
- text = f'<b>Version status:</b> {text}'
434
- if note == 'manuscriptNumber':
435
- text = f'<b>Manuscript number:</b> {text}'
436
- if note == 'curationStatus':
437
- text = f'<b>Dryad curation status:</b> {text}'
438
- if note == 'preserveCurationStatus':
439
- text = f'<b>Dryad preserve curation status:</b> {text}'
440
- if note == 'invoiceId':
441
- text = f'<b>Invoice ID:</b> {text}'
442
- if note == 'sharingLink':
443
- text = f'<b>Sharing link:</b> {text}'
444
- if note == 'loosenValidation':
445
- text = f'<b>Loosen validation:</b> {text}'
446
- if note == 'skipDataciteUpdate':
447
- text = f'<b>Skip Datacite update:</b> {text}'
448
- if note == 'storageSize':
449
- text = f'<b>Storage size:</b> {text}'
450
- if note == 'visibility':
451
- text = f'<b>Visibility:</b> {text}'
452
- if note == 'skipEmails':
453
- text = f'<b>Skip emails:</b> {text}'
438
+ if note in lookup:
439
+ text = f'{lookup.get(note)} {text}'
454
440
 
455
441
  notes += f'<p>{text}</p>\n'
456
442
  concat = {'typeName':'notesText',
@@ -465,12 +451,16 @@ class Serializer():
465
451
  Makes a Dataverse bounding box from appropriate coordinates.
466
452
  Returns Dataverse JSON segment as dict.
467
453
 
468
- ----------------------------------------
469
- Parameters:
454
+ Parameters
455
+ ----------
456
+ north : float
457
+ south : float
458
+ east : float
459
+ west : float
470
460
 
471
- north, south, east, west : float
472
- — Coordinates in decimal degrees.
473
- ----------------------------------------
461
+ Notes
462
+ -----
463
+ Coordinates in decimal degrees.
474
464
  '''
475
465
  names = ['north', 'south', 'east', 'west']
476
466
  points = [str(x) for x in [north, south, east, west]]
@@ -482,7 +472,8 @@ class Serializer():
482
472
  out.append(Serializer._convert_generic(inJson=coord[1],
483
473
  dvField=coord[0],
484
474
  #dryField='north'))
485
- dryField=[k for k in coord[1].keys()][0]))
475
+ #dryField=[k for k in coord[1].keys()][0]))
476
+ dryField=list(coord[1].keys())[0]))
486
477
  return out
487
478
 
488
479
  @staticmethod
@@ -490,12 +481,10 @@ class Serializer():
490
481
  '''
491
482
  Outputs Dataverse geospatial metadata block.
492
483
 
493
- ----------------------------------------
494
- Parameters:
495
-
484
+ Parameters
485
+ ----------
496
486
  dryJson : dict
497
- Dryad json as dict.
498
- ----------------------------------------
487
+ Dryad json as dict.
499
488
  '''
500
489
  if dryJson.get('locations'):
501
490
  #out = {}
@@ -552,30 +541,30 @@ class Serializer():
552
541
 
553
542
  def _assemble_json(self, dryJson=None, dvContact=None,
554
543
  dvEmail=None, defContact=True):
544
+ #pylint: disable = too-many-statements, too-many-locals, too-many-branches
555
545
  '''
546
+
556
547
  Assembles Dataverse json from Dryad JSON components.
557
548
  Dataverse JSON is a nightmare, so this function is too.
558
549
 
559
- ----------------------------------------
560
- Parameters:
561
-
550
+ Parameters
551
+ ----------
562
552
  dryJson : dict
563
- Dryad json as dict.
553
+ Dryad json as dict.
564
554
 
565
555
  dvContact : str
566
- Default Dataverse contact name.
556
+ Default Dataverse contact name.
567
557
 
568
558
  dvEmail : str
569
- Default Dataverse 4 contact email address.
559
+ Default Dataverse 4 contact email address.
570
560
 
571
561
  defContact : boolean
572
- Flag to include default contact information with record.
573
- ----------------------------------------
562
+ Flag to include default contact information with record.
574
563
  '''
575
564
  if not dvContact:
576
- dvContact = constants.DV_CONTACT_NAME
565
+ dvContact = self.kwargs['dv_contact_name']
577
566
  if not dvEmail:
578
- dvEmail = constants.DV_CONTACT_EMAIL
567
+ dvEmail = self.kwargs['dv_contact_email']
579
568
  if not dryJson:
580
569
  dryJson = self.dryadJson
581
570
  LOGGER.debug(dryJson)