dryad2dataverse 0.7.11a0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dryad2dataverse/__init__.py +14 -12
- dryad2dataverse/auth.py +94 -0
- dryad2dataverse/config.py +180 -0
- dryad2dataverse/data/dryad2dataverse_config.yml +127 -0
- dryad2dataverse/handlers.py +6 -2
- dryad2dataverse/monitor.py +146 -140
- dryad2dataverse/scripts/dryadd.py +224 -291
- dryad2dataverse/serializer.py +129 -140
- dryad2dataverse/transfer.py +296 -396
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.1.dist-info}/METADATA +4 -3
- dryad2dataverse-0.8.1.dist-info/RECORD +14 -0
- dryad2dataverse/constants.py +0 -45
- dryad2dataverse-0.7.11a0.dist-info/RECORD +0 -12
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.1.dist-info}/WHEEL +0 -0
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#! python
|
|
2
1
|
'''
|
|
3
2
|
Dryad daemon for monitoring and automatically uploading studies associated with a particular ROR
|
|
4
3
|
|
|
@@ -23,27 +22,109 @@ import textwrap
|
|
|
23
22
|
import time
|
|
24
23
|
|
|
25
24
|
import requests
|
|
25
|
+
import yaml
|
|
26
|
+
from requests.adapters import HTTPAdapter
|
|
27
|
+
|
|
26
28
|
import dryad2dataverse
|
|
29
|
+
import dryad2dataverse.auth
|
|
30
|
+
import dryad2dataverse.config
|
|
27
31
|
import dryad2dataverse.monitor
|
|
28
32
|
import dryad2dataverse.serializer
|
|
29
33
|
import dryad2dataverse.transfer
|
|
30
34
|
from dryad2dataverse.handlers import SSLSMTPHandler
|
|
31
35
|
|
|
32
|
-
|
|
33
|
-
|
|
36
|
+
DEFAULT_LOCATIONS = {'ios': '~/.config/dryad2dataverse',
|
|
37
|
+
'linux' : '~/.config/dryad2dataverse',
|
|
38
|
+
'darwin': '~/Library/Application Support/dryad2dataverse',
|
|
39
|
+
'win32' : 'AppData/Roaming/dryad2dataverse',
|
|
40
|
+
'cygwin' : '~/.config/dryad2dataverse'}
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
def argp():
|
|
43
|
+
'''
|
|
44
|
+
Argument parser
|
|
45
|
+
'''
|
|
46
|
+
description = ('Dryad to Dataverse importer/monitor. '
|
|
47
|
+
'All arguments enclosed by square brackets are OPTIONAL for '
|
|
48
|
+
'and are used for overriding defaults and/or providing sensitive'
|
|
49
|
+
'information.'
|
|
50
|
+
)
|
|
36
51
|
|
|
52
|
+
epilog = textwrap.dedent(
|
|
53
|
+
'''
|
|
54
|
+
**Dryad configuration file**
|
|
55
|
+
|
|
56
|
+
All dryadd options can be included in the file, but you can
|
|
57
|
+
also specify the Dryad secret and Dataverse API key with other
|
|
58
|
+
options.
|
|
59
|
+
|
|
60
|
+
If this file is not specified,
|
|
61
|
+
then the configuration file at the default location will
|
|
62
|
+
be used.
|
|
63
|
+
|
|
64
|
+
**Dryad secret**
|
|
65
|
+
|
|
66
|
+
The dryadd program requires both an application and a secret to use.
|
|
67
|
+
App IDs and secrets are provided by Dryad and can only
|
|
68
|
+
be obtained directly from them at http://datadryad.org.
|
|
69
|
+
The app id and secret are used to create a bearer token
|
|
70
|
+
for API authentication.
|
|
71
|
+
|
|
72
|
+
Use this option if you have not stored the secret
|
|
73
|
+
in the configuration file or wish to override it.
|
|
74
|
+
|
|
75
|
+
**Dataverse API key**
|
|
76
|
+
|
|
77
|
+
The Dataverse API is required in order to upload both
|
|
78
|
+
metadata and data. While administrator-level keys
|
|
79
|
+
are recommended, any key which grants upload privileges
|
|
80
|
+
should be sufficient (note: not covered by warranty).
|
|
81
|
+
|
|
82
|
+
Use this option if you have not stored the key in the
|
|
83
|
+
configuration file or wish to override it.
|
|
84
|
+
''').strip()
|
|
85
|
+
parser = argparse.ArgumentParser(description=description,
|
|
86
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
87
|
+
epilog=epilog)
|
|
88
|
+
parser.add_argument('-c', '--config-file',
|
|
89
|
+
help=textwrap.dedent(
|
|
90
|
+
f'''
|
|
91
|
+
Dryad configuration file.
|
|
92
|
+
Default:
|
|
93
|
+
{DEFAULT_LOCATIONS[sys.platform]}/dryad2dataverse_config.yml
|
|
94
|
+
''').strip(),
|
|
95
|
+
required=False,
|
|
96
|
+
default=f'{DEFAULT_LOCATIONS[sys.platform]}/dryad2dataverse_config.yml',
|
|
97
|
+
dest='config')
|
|
98
|
+
parser.add_argument('-s', '--secret',
|
|
99
|
+
help='Secret for Dryad API.',
|
|
100
|
+
required=False,
|
|
101
|
+
dest='secret')
|
|
102
|
+
parser.add_argument('-k', '--api-key',
|
|
103
|
+
help='Dataverse API key',
|
|
104
|
+
required=False,
|
|
105
|
+
dest='api_key')
|
|
106
|
+
parser.add_argument('-v', '--verbosity',
|
|
107
|
+
help='Verbose output',
|
|
108
|
+
required=False,
|
|
109
|
+
action='store_true')
|
|
110
|
+
parser.add_argument('--version', action='version',
|
|
111
|
+
version='dryad2dataverse ' + dryad2dataverse.__version__,
|
|
112
|
+
help='Show version number and exit')
|
|
37
113
|
|
|
38
|
-
|
|
114
|
+
return parser
|
|
39
115
|
|
|
40
|
-
def new_content(serial):
|
|
116
|
+
def new_content(serial, **kwargs):
|
|
41
117
|
'''
|
|
42
118
|
Creates content for new study upload message (potentially redundant
|
|
43
119
|
with Dataverse emailer).
|
|
44
|
-
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
serial : dryad2dataverse.serializer.Serializer
|
|
124
|
+
**kwargs
|
|
125
|
+
Keyword arguments. Just pass dryad2dataverse.config.Config
|
|
45
126
|
'''
|
|
46
|
-
dv_link = (
|
|
127
|
+
dv_link = (kwargs['dv_url'] +
|
|
47
128
|
'/dataset.xhtml?persistentId=' +
|
|
48
129
|
serial.dvpid +
|
|
49
130
|
'&version=DRAFT')
|
|
@@ -57,13 +138,18 @@ def new_content(serial):
|
|
|
57
138
|
\n{serial.oversize}'
|
|
58
139
|
return (subject, content)
|
|
59
140
|
|
|
60
|
-
def changed_content(serial, monitor):
|
|
141
|
+
def changed_content(serial, monitor, **kwargs):
|
|
61
142
|
'''
|
|
62
143
|
Creates content for file update message.
|
|
63
|
-
|
|
64
|
-
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
serial : dryad2dataverse.serializer.Serializer
|
|
148
|
+
monitor : dryad2dataverse.monitor.Monitor
|
|
149
|
+
**kwargs
|
|
150
|
+
Keyword arguments. Just pass dryad2dataverse.config.Config
|
|
65
151
|
'''
|
|
66
|
-
dv_link = (
|
|
152
|
+
dv_link = (kwargs['dv_url'] +
|
|
67
153
|
'/dataset.xhtml?persistentId=' +
|
|
68
154
|
serial.dvpid +
|
|
69
155
|
'&version=DRAFT')
|
|
@@ -146,14 +232,14 @@ def notify(msgtxt, width=100, **kwargs):
|
|
|
146
232
|
|
|
147
233
|
msg = Em()
|
|
148
234
|
msg['Subject'] = msgtxt[0]
|
|
149
|
-
msg['From'] = kwargs['
|
|
235
|
+
msg['From'] = kwargs['sending_email']
|
|
150
236
|
msg['To'] = kwargs['recipients']
|
|
151
237
|
content = __clean_msg(msgtxt[1], max(width, 1000))
|
|
152
238
|
msg.set_content(content)
|
|
153
239
|
|
|
154
|
-
server = smtplib.SMTP_SSL(kwargs['
|
|
240
|
+
server = smtplib.SMTP_SSL(kwargs['smtp_server'], kwargs.get('ssl_port', 465))
|
|
155
241
|
|
|
156
|
-
server.login(kwargs['
|
|
242
|
+
server.login(kwargs['sending_email_username'], kwargs['email_send_password'])
|
|
157
243
|
#To must be split. See
|
|
158
244
|
#https://stackoverflow.com/questions/8856117/
|
|
159
245
|
#how-to-send-email-to-multiple-recipients-using-python-smtplib
|
|
@@ -196,15 +282,11 @@ def __bad_dates(rectuple: tuple, mod_date: str) -> tuple:
|
|
|
196
282
|
return tuple(records)
|
|
197
283
|
return rectuple
|
|
198
284
|
|
|
199
|
-
def get_records(
|
|
285
|
+
def get_records(mod_date=None, verbosity=True, **kwargs):
|
|
200
286
|
'''
|
|
201
287
|
returns a tuple of ((doi, metadata), ...). Dryad searches return complete
|
|
202
288
|
study metadata from the search, surprisingly.
|
|
203
289
|
|
|
204
|
-
ror : str
|
|
205
|
-
ROR string including http. To find your ROR, see
|
|
206
|
-
https://ror.org/
|
|
207
|
-
|
|
208
290
|
mod_date : str
|
|
209
291
|
UTC datetime string in the format suitable for the Dryad API.
|
|
210
292
|
eg. 2021-01-21T21:42:40Z
|
|
@@ -214,32 +296,32 @@ def get_records(ror: 'str', mod_date=None, verbosity=True, timeout=100):
|
|
|
214
296
|
verbosity : bool
|
|
215
297
|
Output some data to stdout
|
|
216
298
|
|
|
217
|
-
|
|
218
|
-
|
|
299
|
+
**kwargs
|
|
300
|
+
Keyword arguments. Just unpack dryad2dataverse.config.Config
|
|
219
301
|
'''
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
302
|
+
|
|
303
|
+
session = requests.Session()
|
|
304
|
+
session.mount('https://', HTTPAdapter(max_retries=dryad2dataverse.config.RETRY_STRATEGY))
|
|
305
|
+
headers = dryad2dataverse.config.Config.update_headers(**kwargs)
|
|
223
306
|
per_page = 1
|
|
224
|
-
params = {'affiliation' : ror,
|
|
307
|
+
params = {'affiliation' : kwargs['ror'],
|
|
225
308
|
'per_page' : per_page}
|
|
226
309
|
if mod_date:
|
|
227
310
|
params['modifiedSince'] = mod_date
|
|
228
|
-
stud =
|
|
229
|
-
params=params
|
|
311
|
+
stud = session.get(f'{kwargs["dry_url"]}{kwargs["api_path"]}/search', headers=headers,
|
|
312
|
+
params=params)
|
|
230
313
|
records = []
|
|
231
314
|
total = stud.json()['total']
|
|
232
315
|
if verbosity:
|
|
233
|
-
print(f'Total Records: {total}')
|
|
316
|
+
print(f'Total Records: {total}', file=sys.stdout)
|
|
234
317
|
params['per_page'] = 100
|
|
235
318
|
for data in range(total//100+1):
|
|
236
319
|
if verbosity:
|
|
237
|
-
print(f'Records page: {data+1}')
|
|
320
|
+
print(f'Records page: {data+1}', file=sys.stdout)
|
|
238
321
|
params['page'] = data+1
|
|
239
|
-
stud =
|
|
322
|
+
stud = session.get(f'{kwargs["dry_url"]}{kwargs["api_path"]}/search',
|
|
240
323
|
headers=headers,
|
|
241
|
-
params=params
|
|
242
|
-
timeout=timeout)
|
|
324
|
+
params=params)
|
|
243
325
|
time.sleep(10) # don't overload their system with API calls
|
|
244
326
|
stud.raise_for_status()
|
|
245
327
|
records += stud.json()['_embedded']['stash:datasets']
|
|
@@ -251,186 +333,6 @@ def get_records(ror: 'str', mod_date=None, verbosity=True, timeout=100):
|
|
|
251
333
|
for x in records),
|
|
252
334
|
mod_date)
|
|
253
335
|
|
|
254
|
-
def argp():
|
|
255
|
-
'''
|
|
256
|
-
Argument parser
|
|
257
|
-
'''
|
|
258
|
-
description = ('Dryad to Dataverse importer/monitor. '
|
|
259
|
-
'All arguments NOT enclosed by square brackets are required for '
|
|
260
|
-
'the script to run but some may already have defaults, specified '
|
|
261
|
-
'by "Default". '
|
|
262
|
-
'The "optional arguments" below refers to the use of the option switch, '
|
|
263
|
-
'(like -u), meaning "not a positional argument."'
|
|
264
|
-
)
|
|
265
|
-
parser = argparse.ArgumentParser(description=description)
|
|
266
|
-
parser.add_argument('-u', '--dv-url',
|
|
267
|
-
help='Destination Dataverse root url. '
|
|
268
|
-
'Default: https://borealisdata.ca',
|
|
269
|
-
required=False,
|
|
270
|
-
default='https://borealisdata.ca',
|
|
271
|
-
dest='url')
|
|
272
|
-
parser.add_argument('-k', '--key',
|
|
273
|
-
help='REQUIRED: API key for dataverse user',
|
|
274
|
-
required=True,
|
|
275
|
-
dest='key')
|
|
276
|
-
parser.add_argument('-t', '--target',
|
|
277
|
-
help='REQUIRED: Target dataverse short name',
|
|
278
|
-
required=True,
|
|
279
|
-
dest='target')
|
|
280
|
-
parser.add_argument('-e', '--email',
|
|
281
|
-
help='REQUIRED: Email address '
|
|
282
|
-
'which sends update notifications. ie: '
|
|
283
|
-
'"user@website.invalid".',
|
|
284
|
-
required=True,
|
|
285
|
-
dest='email')
|
|
286
|
-
parser.add_argument('-s', '--user',
|
|
287
|
-
help=('REQUIRED: User name for SMTP server. Check '
|
|
288
|
-
'your server for details. '),
|
|
289
|
-
required=True,
|
|
290
|
-
dest='user')
|
|
291
|
-
parser.add_argument('-r', '--recipient',
|
|
292
|
-
help='REQUIRED: Recipient(s) of email notification. '
|
|
293
|
-
'Separate addresses with spaces',
|
|
294
|
-
required=True,
|
|
295
|
-
nargs='+',
|
|
296
|
-
dest='recipients')
|
|
297
|
-
parser.add_argument('-p', '--pwd',
|
|
298
|
-
help='REQUIRED: Password for sending email account. '
|
|
299
|
-
'Enclose in single quotes to avoid OS errors with special '
|
|
300
|
-
'characters.',
|
|
301
|
-
required=True,
|
|
302
|
-
dest='pwd')
|
|
303
|
-
parser.add_argument('--server',
|
|
304
|
-
help='Mail server for sending account. '
|
|
305
|
-
'Default: smtp.mail.yahoo.com',
|
|
306
|
-
required=False,
|
|
307
|
-
default='smtp.mail.yahoo.com',
|
|
308
|
-
dest='mailserv')
|
|
309
|
-
parser.add_argument('--port',
|
|
310
|
-
help='Mail server port. Default: 465. '
|
|
311
|
-
'Mail is sent using SSL.',
|
|
312
|
-
required=False,
|
|
313
|
-
type=int,
|
|
314
|
-
#default=587,
|
|
315
|
-
default=465,
|
|
316
|
-
dest='port')
|
|
317
|
-
parser.add_argument('-c', '--contact',
|
|
318
|
-
help='REQUIRED: Contact email address for Dataverse records. '
|
|
319
|
-
'Must pass Dataverse email validation rules (so "test@test.invalid" '
|
|
320
|
-
'is not acceptable).',
|
|
321
|
-
required=True,
|
|
322
|
-
dest='contact')
|
|
323
|
-
parser.add_argument('-n', '--contact-name',
|
|
324
|
-
help='REQUIRED: Contact name for Dataverse records',
|
|
325
|
-
required=True,
|
|
326
|
-
dest='cname')
|
|
327
|
-
parser.add_argument('-v', '--verbosity',
|
|
328
|
-
help='Verbose output',
|
|
329
|
-
required=False,
|
|
330
|
-
action='store_true')
|
|
331
|
-
parser.add_argument('-i', '--ror',
|
|
332
|
-
help='REQUIRED: Institutional ROR URL. '
|
|
333
|
-
'Eg: "https://ror.org/03rmrcq20". This identifies the '
|
|
334
|
-
'institution in Dryad repositories.',
|
|
335
|
-
required=True,
|
|
336
|
-
dest='ror')
|
|
337
|
-
parser.add_argument('--tmpfile',
|
|
338
|
-
help='Temporary file location. Default: /tmp',
|
|
339
|
-
required=False,
|
|
340
|
-
dest='tmp')
|
|
341
|
-
parser.add_argument('--db',
|
|
342
|
-
help='Tracking database location and name. '
|
|
343
|
-
'Default: $HOME/dryad_dataverse_monitor.sqlite3',
|
|
344
|
-
required=False,
|
|
345
|
-
dest='dbase')
|
|
346
|
-
parser.add_argument('--log',
|
|
347
|
-
help='Complete path to log. '
|
|
348
|
-
'Default: /var/log/dryadd.log',
|
|
349
|
-
required=False,
|
|
350
|
-
dest='log',
|
|
351
|
-
default='/var/log/dryadd.log')
|
|
352
|
-
parser.add_argument('--loglevel',
|
|
353
|
-
help='Log level of server rotating log. Choose one of '
|
|
354
|
-
'debug, info, warning, error or critical. '
|
|
355
|
-
'Note: case sensitive. '
|
|
356
|
-
'Default: logging.warning.',
|
|
357
|
-
required=False,
|
|
358
|
-
dest='loglevel',
|
|
359
|
-
default='warning',
|
|
360
|
-
choices=['debug', 'info', 'warning','error','critical'])
|
|
361
|
-
parser.add_argument('--email-loglevel',
|
|
362
|
-
help='Log level of email log. Choose one of '
|
|
363
|
-
'debug, info, warning, error or critical. '
|
|
364
|
-
'Note: case sensitive. '
|
|
365
|
-
'Default: warning',
|
|
366
|
-
required=False,
|
|
367
|
-
dest='email_loglevel',
|
|
368
|
-
default='warning',
|
|
369
|
-
choices=['debug', 'info', 'warning','error','critical'])
|
|
370
|
-
parser.add_argument('-l', '--no_force_unlock',
|
|
371
|
-
help='No forcible file unlock. Required '
|
|
372
|
-
'if /lock endpint is restricted',
|
|
373
|
-
required=False,
|
|
374
|
-
action='store_false',
|
|
375
|
-
dest='force_unlock')
|
|
376
|
-
parser.add_argument('-x', '--exclude',
|
|
377
|
-
help='Exclude these DOIs. Separate by spaces',
|
|
378
|
-
required=False,
|
|
379
|
-
default=[],
|
|
380
|
-
nargs='+',
|
|
381
|
-
dest='exclude')
|
|
382
|
-
parser.add_argument('-b', '--num-backups',
|
|
383
|
-
help=('Number of database backups to keep. '
|
|
384
|
-
'Default 3'),
|
|
385
|
-
required=False,
|
|
386
|
-
type=int,
|
|
387
|
-
default=3)
|
|
388
|
-
parser.add_argument('-w', '--warn-too-many',
|
|
389
|
-
help=('Warn and halt execution if abnormally large '
|
|
390
|
-
'number of updates present.'),
|
|
391
|
-
action='store_true',)
|
|
392
|
-
parser.add_argument('--warn-threshold',
|
|
393
|
-
help=('Do not transfer studies if number of updates '
|
|
394
|
-
'is greater than or equal to this number. '
|
|
395
|
-
'Default: 15'),
|
|
396
|
-
type=int,
|
|
397
|
-
dest='warn',
|
|
398
|
-
default=15)
|
|
399
|
-
parser.add_argument('--testmode-on',
|
|
400
|
-
help=('Turn on test mode. '
|
|
401
|
-
'Number of transfers will be limited '
|
|
402
|
-
'to the value in --testmode-limit '
|
|
403
|
-
'or 5 if you don\'t set --testmode-limit '),
|
|
404
|
-
action='store_true',
|
|
405
|
-
dest='testmode')
|
|
406
|
-
parser.add_argument('--testmode-limit',
|
|
407
|
-
help=('Test mode - only transfer first [n] '
|
|
408
|
-
'of the total number of (new) records. Old ones will '
|
|
409
|
-
'still be updated, though. '
|
|
410
|
-
'Default: 5'),
|
|
411
|
-
type=int,
|
|
412
|
-
default=5,
|
|
413
|
-
dest='testlimit')
|
|
414
|
-
parser.add_argument('--version', action='version',
|
|
415
|
-
version='dryad2dataverse ' + dryad2dataverse.__version__,
|
|
416
|
-
help='Show version number and exit')
|
|
417
|
-
|
|
418
|
-
return parser
|
|
419
|
-
|
|
420
|
-
def set_constants(args):
|
|
421
|
-
'''
|
|
422
|
-
Set the appropriate dryad2dataverse "constants"
|
|
423
|
-
'''
|
|
424
|
-
dryad2dataverse.constants.DV_CONTACT_EMAIL = args.contact
|
|
425
|
-
dryad2dataverse.constants.DV_CONTACT_ = args.contact
|
|
426
|
-
dryad2dataverse.constants.APIKEY = args.key
|
|
427
|
-
if args.url:
|
|
428
|
-
dryad2dataverse.constants.DVURL = args.url
|
|
429
|
-
if args.dbase:
|
|
430
|
-
dryad2dataverse.constants.DBASE = args.dbase
|
|
431
|
-
if args.tmp:
|
|
432
|
-
dryad2dataverse.constants.TMP = args.tmp
|
|
433
|
-
|
|
434
336
|
def email_log(mailhost, fromaddr, toaddrs, credentials, port=465, secure=(),
|
|
435
337
|
level=logging.WARNING, timeout=100):
|
|
436
338
|
'''
|
|
@@ -453,7 +355,7 @@ def email_log(mailhost, fromaddr, toaddrs, credentials, port=465, secure=(),
|
|
|
453
355
|
level : int
|
|
454
356
|
logging level. Default logging.WARNING
|
|
455
357
|
'''
|
|
456
|
-
#pylint: disable=too-many-arguments
|
|
358
|
+
#pylint: disable=too-many-arguments, too-many-positional-arguments
|
|
457
359
|
#Because consistency is for suckers and yahoo requires full hostname
|
|
458
360
|
#subject = 'Dryad to Dataverse transfer error'
|
|
459
361
|
subject = 'Dryad to Dataverse logger message'
|
|
@@ -488,7 +390,9 @@ def rotating_log(path, level):
|
|
|
488
390
|
#python-logging-disable-logging-from-imported-modules
|
|
489
391
|
for name in ['dryad2dataverse.serializer',
|
|
490
392
|
'dryad2dataverse.transfer',
|
|
491
|
-
'dryad2dataverse.monitor'
|
|
393
|
+
'dryad2dataverse.monitor',
|
|
394
|
+
'dryad2dataverse.auth',
|
|
395
|
+
'dryad2dataverse.config']:
|
|
492
396
|
logging.getLogger(name).setLevel(level)
|
|
493
397
|
rotator = logging.handlers.RotatingFileHandler(filename=path,
|
|
494
398
|
maxBytes=10*1024**2,
|
|
@@ -527,11 +431,12 @@ def checkwarn(val:int, **kwargs) -> None:
|
|
|
527
431
|
'''
|
|
528
432
|
if not kwargs.get('warn_too_many'):
|
|
529
433
|
return
|
|
530
|
-
if val >= kwargs.get('
|
|
434
|
+
if val >= kwargs.get('warning_threshold',0):
|
|
531
435
|
mess = ('Large number of updates detected. '
|
|
532
|
-
f'{val} new studies exceeds threshold of {kwargs.get("
|
|
436
|
+
f'{val} new studies exceeds threshold of {kwargs.get("warning_threshold", 0)}. '
|
|
533
437
|
'Program execution halted.')
|
|
534
|
-
|
|
438
|
+
print(mess, file=sys.stderr)
|
|
439
|
+
subject = 'Dryad2Dataverse large update warning'
|
|
535
440
|
for logme in kwargs.get('loggers'):
|
|
536
441
|
logme.warning(mess)
|
|
537
442
|
notify(msgtxt=(subject, mess),
|
|
@@ -547,7 +452,7 @@ def verbo(verbosity:bool, **kwargs)->None:
|
|
|
547
452
|
'''
|
|
548
453
|
if verbosity:
|
|
549
454
|
for key, value in kwargs.items():
|
|
550
|
-
print(f'{key}: {value}')
|
|
455
|
+
print(f'{key}: {value}', file=sys.stdout)
|
|
551
456
|
|
|
552
457
|
def anonymizer(args: argparse.Namespace) -> dict:
|
|
553
458
|
'''
|
|
@@ -555,10 +460,7 @@ def anonymizer(args: argparse.Namespace) -> dict:
|
|
|
555
460
|
with cleaner values.
|
|
556
461
|
'''
|
|
557
462
|
clean_me = args.__dict__.copy()#Don't work on the real thing!
|
|
558
|
-
cleanser = {x : 'REDACTED' for x in ['
|
|
559
|
-
'key', 'mailserve',
|
|
560
|
-
'pwd', 'recipients',
|
|
561
|
-
'user']}
|
|
463
|
+
cleanser = {x : 'REDACTED' for x in ['secret', 'api_key']}
|
|
562
464
|
clean_me.update(cleanser)
|
|
563
465
|
return clean_me
|
|
564
466
|
|
|
@@ -573,78 +475,111 @@ def bulklog(message, *logfuncs):
|
|
|
573
475
|
for log in logfuncs:
|
|
574
476
|
log('%s', message)
|
|
575
477
|
|
|
576
|
-
def
|
|
478
|
+
def test_config(cfile:pathlib.Path):
|
|
577
479
|
'''
|
|
578
|
-
|
|
480
|
+
Ensure that the config file can be loaded, and if not
|
|
481
|
+
raise a helpful error because it can't be logged yet
|
|
579
482
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
483
|
+
Parameters
|
|
484
|
+
----------
|
|
485
|
+
cfile : pathlib.Path
|
|
486
|
+
Config yaml file
|
|
584
487
|
'''
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
set_constants(args)
|
|
597
|
-
|
|
598
|
-
logger = rotating_log(args.log,
|
|
599
|
-
level=logging.getLevelName(args.loglevel.upper()))
|
|
488
|
+
try:
|
|
489
|
+
with open(cfile.expanduser().absolute(), encoding='utf-8') as y:
|
|
490
|
+
yaml.safe_load(y)
|
|
491
|
+
except yaml.YAMLError as e:
|
|
492
|
+
print('Configuration file error', file=sys.stdout)
|
|
493
|
+
print(e, file=sys.stderr)
|
|
494
|
+
sys.exit()
|
|
495
|
+
#this shouldn't happen ever, but just in case
|
|
496
|
+
except FileNotFoundError as e:
|
|
497
|
+
print(e, file=sys.stderr)
|
|
498
|
+
sys.exit()
|
|
600
499
|
|
|
601
|
-
elog = email_log(args.mailserv, args.email, args.recipients,
|
|
602
|
-
(args.user, args.pwd), port=args.port,
|
|
603
|
-
level = logging.getLevelName(args.email_loglevel.upper()))
|
|
604
500
|
|
|
605
501
|
|
|
502
|
+
def main():
|
|
503
|
+
'''
|
|
504
|
+
Primary function
|
|
505
|
+
'''
|
|
506
|
+
#pylint: disable=too-many-branches, too-many-locals, too-many-statements
|
|
507
|
+
args = argp().parse_args()
|
|
508
|
+
configfile = pathlib.Path(args.config).expanduser().absolute()
|
|
509
|
+
if not configfile.exists():
|
|
510
|
+
print(('Config file not found. Creating it at '
|
|
511
|
+
f'{str(configfile)} and exiting.') , file=sys.stderr)
|
|
512
|
+
config = dryad2dataverse.config.Config(configfile.parent, configfile.name)
|
|
513
|
+
sys.exit()
|
|
514
|
+
else:
|
|
515
|
+
config = dryad2dataverse.config.Config(configfile.parent, configfile.name)
|
|
516
|
+
test_config(configfile)
|
|
517
|
+
for val in ['api_key', 'secret']:
|
|
518
|
+
if getattr(args,val):
|
|
519
|
+
config[val] = getattr(args,val)
|
|
520
|
+
try:
|
|
521
|
+
config.validate()
|
|
522
|
+
except ValueError as e:
|
|
523
|
+
print(e, file=sys.stderr)
|
|
524
|
+
sys.exit()
|
|
525
|
+
config['token'] = dryad2dataverse.auth.Token(**config)
|
|
526
|
+
|
|
527
|
+
logpath = pathlib.Path(config['log']).expanduser().absolute()
|
|
528
|
+
logpath.parent.mkdir(parents=True, exist_ok=True)
|
|
529
|
+
|
|
530
|
+
logger = rotating_log(logpath,
|
|
531
|
+
level=logging.getLevelName(config['loglevel'].upper()))
|
|
532
|
+
elog = email_log(config['smtp_server'],
|
|
533
|
+
config['sending_email'],
|
|
534
|
+
config['recipients'],
|
|
535
|
+
(config['sending_email_username'], config['email_send_password']),
|
|
536
|
+
port=config['ssl_port'],
|
|
537
|
+
level = logging.getLevelName(config['email_loglevel'].upper()))
|
|
606
538
|
logger.info('Beginning update process')
|
|
607
539
|
for logme in [elog, logger]:
|
|
608
|
-
logme.debug('Command line arguments: %s' , pprint.
|
|
540
|
+
logme.debug('Command line arguments: %s' , pprint.pformat(anonymizer(args)))
|
|
609
541
|
|
|
610
|
-
monitor = dryad2dataverse.monitor.Monitor(
|
|
542
|
+
monitor = dryad2dataverse.monitor.Monitor(**config)
|
|
611
543
|
#copy the database to make a backup, because paranoia is your friend
|
|
612
|
-
|
|
613
|
-
|
|
544
|
+
db_full = pathlib.Path(config['dbase']).expanduser().absolute()
|
|
545
|
+
if db_full.exists():
|
|
614
546
|
try:
|
|
615
|
-
shutil.copyfile(
|
|
616
|
-
pathlib.Path(
|
|
617
|
-
|
|
547
|
+
shutil.copyfile(db_full,
|
|
548
|
+
pathlib.Path(db_full.parent,
|
|
549
|
+
db_full.stem + '_' +
|
|
618
550
|
datetime.datetime.now().strftime('%Y-%m-%d-%H%M') +
|
|
619
|
-
|
|
551
|
+
db_full.suffix)
|
|
620
552
|
)
|
|
621
553
|
except FileNotFoundError:
|
|
622
|
-
|
|
623
|
-
|
|
554
|
+
for _ in [logger, elog]:
|
|
555
|
+
_.exception('Database not found: %s', config['dbase'])
|
|
556
|
+
print(f'Database not found: {config["dbase"]}', file=sys.stderr)
|
|
624
557
|
sys.exit()
|
|
625
558
|
#list comprehension includes untimestamped dbase name, hence 2+
|
|
626
|
-
fnames = glob.glob(
|
|
627
|
-
|
|
628
|
-
fnames.remove(
|
|
559
|
+
fnames = glob.glob((str(pathlib.Path(db_full.parent,
|
|
560
|
+
db_full.stem + '*' + db_full.suffix))))
|
|
561
|
+
fnames.remove(str(db_full))
|
|
629
562
|
fnames.sort(reverse=True)
|
|
630
|
-
fnames = fnames[
|
|
563
|
+
fnames = fnames[config['number_of_backups']:]
|
|
631
564
|
for fil in fnames:
|
|
632
565
|
os.remove(fil)
|
|
566
|
+
logger.info('Deleted database backup: %s', fil)
|
|
633
567
|
logger.info('Last update time: %s', monitor.lastmod)
|
|
634
568
|
#get all updates since the last update check
|
|
635
|
-
updates = get_records(
|
|
636
|
-
verbosity=args.verbosity
|
|
569
|
+
updates = get_records(monitor.lastmod,
|
|
570
|
+
verbosity=args.verbosity,
|
|
571
|
+
**config)
|
|
637
572
|
logger.info('Total new files: %s', len(updates))
|
|
638
573
|
elog.info('Total new files: %s', len(updates))
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
min(args.testlimit, len(updates)),
|
|
574
|
+
checkwarn(val=len(updates) if not config['test_mode'] else
|
|
575
|
+
min(config['test_mode_limit'], len(updates)),
|
|
642
576
|
loggers=[logger],
|
|
643
|
-
**
|
|
644
|
-
if args.testmode:
|
|
645
|
-
logger.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
|
|
646
|
-
elog.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
|
|
577
|
+
**config)
|
|
647
578
|
|
|
579
|
+
if config['test_mode']:
|
|
580
|
+
for _ in [logger, elog]:
|
|
581
|
+
_.warning('Test mode is ON - number of updates limited to %s',
|
|
582
|
+
config['test_mode_limit'])
|
|
648
583
|
#update all the new files
|
|
649
584
|
verbo(args.verbosity, **{'Total to process': len(updates)})
|
|
650
585
|
|
|
@@ -652,8 +587,8 @@ def main():
|
|
|
652
587
|
count = 0
|
|
653
588
|
testcount = 0
|
|
654
589
|
for doi in updates:
|
|
655
|
-
if
|
|
656
|
-
logger.info('Test limit of %s reached',
|
|
590
|
+
if config['test_mode'] and (testcount >= config['test_mode_limit']):
|
|
591
|
+
logger.info('Test limit of %s reached', config['test_mode_limit'])
|
|
657
592
|
break
|
|
658
593
|
count += 1
|
|
659
594
|
logger.info('Start processing %s of %s', count, len(updates))
|
|
@@ -661,11 +596,12 @@ def main():
|
|
|
661
596
|
doi[0], doi[0])
|
|
662
597
|
if not updates:
|
|
663
598
|
break #no new files in this case
|
|
664
|
-
|
|
599
|
+
#use get in this case because people *will* have nothing to exclude
|
|
600
|
+
if doi[0] in config.get('exclude_list',[]):
|
|
665
601
|
logger.warning('Skipping excluded doi: %s', doi[0])
|
|
666
602
|
continue
|
|
667
603
|
#Create study object
|
|
668
|
-
study = dryad2dataverse.serializer.Serializer(doi[0])
|
|
604
|
+
study = dryad2dataverse.serializer.Serializer(doi[0], **config)
|
|
669
605
|
#verbose output
|
|
670
606
|
verbo(args.verbosity,
|
|
671
607
|
**{'Processing': count,
|
|
@@ -685,21 +621,21 @@ def main():
|
|
|
685
621
|
update_type = monitor.status(study)['status']
|
|
686
622
|
verbo(args.verbosity, **{'Status': update_type})
|
|
687
623
|
#create a transfer object to copy the files over
|
|
688
|
-
transfer = dryad2dataverse.transfer.Transfer(study)
|
|
624
|
+
transfer = dryad2dataverse.transfer.Transfer(study, **config)
|
|
689
625
|
transfer.test_api_key()
|
|
690
626
|
#Now start the action
|
|
691
627
|
if update_type == 'new':
|
|
692
628
|
logger.info('New study: %s, %s', doi[0], doi[1]['title'])
|
|
693
629
|
logger.info('Uploading study metadata')
|
|
694
|
-
transfer.upload_study(targetDv=
|
|
630
|
+
transfer.upload_study(targetDv=config['target'])
|
|
695
631
|
#New files are in now in monitor.diff_files()['add']
|
|
696
632
|
#with 2 Feb 2022 API change
|
|
697
633
|
#so we can ignore them here
|
|
698
634
|
logger.info('Uploading Dryad JSON metadata')
|
|
699
635
|
transfer.upload_json()
|
|
700
636
|
transfer.set_correct_date()
|
|
701
|
-
notify(new_content(study),
|
|
702
|
-
**
|
|
637
|
+
notify(new_content(study, **config),
|
|
638
|
+
**config)
|
|
703
639
|
testcount+=1
|
|
704
640
|
|
|
705
641
|
elif update_type == 'updated':
|
|
@@ -707,12 +643,11 @@ def main():
|
|
|
707
643
|
logger.info('Updating metadata')
|
|
708
644
|
transfer.upload_study(dvpid=study.dvpid)
|
|
709
645
|
#remove old JSON files
|
|
710
|
-
|
|
711
|
-
transfer.delete_dv_files(rem)
|
|
646
|
+
transfer.delete_dv_files(monitor.get_json_dvfids(study))
|
|
712
647
|
transfer.upload_json()
|
|
713
648
|
transfer.set_correct_date()
|
|
714
|
-
notify(changed_content(study, monitor),
|
|
715
|
-
**
|
|
649
|
+
notify(changed_content(study, monitor, **config),
|
|
650
|
+
**config)
|
|
716
651
|
|
|
717
652
|
#new, identical, updated, lastmodsame
|
|
718
653
|
elif update_type in ('unchanged', 'lastmodsame'):
|
|
@@ -732,7 +667,7 @@ def main():
|
|
|
732
667
|
transfer.download_files(diff['add'])
|
|
733
668
|
#now send them to Dataverse
|
|
734
669
|
transfer.upload_files(diff['add'], pid=study.dvpid,
|
|
735
|
-
force_unlock=
|
|
670
|
+
force_unlock=config['force_unlock'])
|
|
736
671
|
#Update the tracking database for that record
|
|
737
672
|
monitor.update(transfer)
|
|
738
673
|
|
|
@@ -745,13 +680,14 @@ def main():
|
|
|
745
680
|
elog.info('Completed update process')
|
|
746
681
|
finished = ('Dryad to Dataverse transfers completed',
|
|
747
682
|
('Dryad to Dataverse transfer daemon has completed.\n'
|
|
748
|
-
f'Log available at: {
|
|
749
|
-
notify(finished, **
|
|
683
|
+
f'Log available at: {config["log"]}'))
|
|
684
|
+
notify(finished, **config)
|
|
750
685
|
|
|
751
686
|
except dryad2dataverse.exceptions.DataverseBadApiKeyError as api_err:
|
|
752
687
|
logger.exception(api_err)
|
|
753
688
|
elog.exception(api_err)
|
|
754
|
-
print(f'Error: {api_err}. Exiting. For details see log at {args.log}.'
|
|
689
|
+
print(f'Error: {api_err}. Exiting. For details see log at {args.log}.',
|
|
690
|
+
file=sys.stderr)
|
|
755
691
|
sys.exit()#graceful exit is graceful
|
|
756
692
|
|
|
757
693
|
except Exception as err: # pylint: disable=broad-except
|
|
@@ -761,12 +697,9 @@ def main():
|
|
|
761
697
|
logger.exception('%s\nCritical failure with DOI: %s : %s\n%s', err,
|
|
762
698
|
doi[0], doi[1]['title'], doi[1].get('sharingLink'),
|
|
763
699
|
stack_info=True, exc_info=True)
|
|
764
|
-
print(f'Error: {err}. Exiting. For details see log at {
|
|
700
|
+
print(f'Error: {err}. Exiting. For details see log at {config["log"]}.',
|
|
701
|
+
file=sys.stderr)
|
|
765
702
|
sys.exit()
|
|
766
703
|
|
|
767
704
|
if __name__ == '__main__':
|
|
768
705
|
main()
|
|
769
|
-
_parser = argp()
|
|
770
|
-
_args = _parser.parse_args()
|
|
771
|
-
print('This is what you would have done had you actually run this')
|
|
772
|
-
print(_args)
|