dryad2dataverse 0.7.11a0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dryad2dataverse/__init__.py +14 -12
- dryad2dataverse/auth.py +94 -0
- dryad2dataverse/config.py +180 -0
- dryad2dataverse/data/dryad2dataverse_config.yml +126 -0
- dryad2dataverse/handlers.py +6 -2
- dryad2dataverse/monitor.py +146 -140
- dryad2dataverse/scripts/dryadd.py +210 -293
- dryad2dataverse/serializer.py +129 -140
- dryad2dataverse/transfer.py +296 -396
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.0.dist-info}/METADATA +4 -3
- dryad2dataverse-0.8.0.dist-info/RECORD +14 -0
- dryad2dataverse/constants.py +0 -45
- dryad2dataverse-0.7.11a0.dist-info/RECORD +0 -12
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.0.dist-info}/WHEEL +0 -0
- {dryad2dataverse-0.7.11a0.dist-info → dryad2dataverse-0.8.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#! python
|
|
2
1
|
'''
|
|
3
2
|
Dryad daemon for monitoring and automatically uploading studies associated with a particular ROR
|
|
4
3
|
|
|
@@ -23,27 +22,109 @@ import textwrap
|
|
|
23
22
|
import time
|
|
24
23
|
|
|
25
24
|
import requests
|
|
25
|
+
import yaml
|
|
26
|
+
from requests.adapters import HTTPAdapter
|
|
27
|
+
|
|
26
28
|
import dryad2dataverse
|
|
29
|
+
import dryad2dataverse.auth
|
|
30
|
+
import dryad2dataverse.config
|
|
27
31
|
import dryad2dataverse.monitor
|
|
28
32
|
import dryad2dataverse.serializer
|
|
29
33
|
import dryad2dataverse.transfer
|
|
30
34
|
from dryad2dataverse.handlers import SSLSMTPHandler
|
|
31
35
|
|
|
32
|
-
|
|
33
|
-
|
|
36
|
+
DEFAULT_LOCATIONS = {'ios': '~/.config/dryad2dataverse',
|
|
37
|
+
'linux' : '~/.config/dryad2dataverse',
|
|
38
|
+
'darwin': '~/Library/Application Support/dryad2dataverse',
|
|
39
|
+
'win32' : 'AppData/Roaming/dryad2dataverse',
|
|
40
|
+
'cygwin' : '~/.config/dryad2dataverse'}
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
def argp():
|
|
43
|
+
'''
|
|
44
|
+
Argument parser
|
|
45
|
+
'''
|
|
46
|
+
description = ('Dryad to Dataverse importer/monitor. '
|
|
47
|
+
'All arguments enclosed by square brackets are OPTIONAL for '
|
|
48
|
+
'and are used for overriding defaults and/or providing sensitive'
|
|
49
|
+
'information.'
|
|
50
|
+
)
|
|
36
51
|
|
|
52
|
+
epilog = textwrap.dedent(
|
|
53
|
+
'''
|
|
54
|
+
**Dryad configuration file**
|
|
55
|
+
|
|
56
|
+
All dryadd options can be included in the file, but you can
|
|
57
|
+
also specify the Dryad secret and Dataverse API key with other
|
|
58
|
+
options.
|
|
59
|
+
|
|
60
|
+
If this file is not specified,
|
|
61
|
+
then the configuration file at the default location will
|
|
62
|
+
be used.
|
|
63
|
+
|
|
64
|
+
**Dryad secret**
|
|
65
|
+
|
|
66
|
+
The dryadd program requires both an application and a secret to use.
|
|
67
|
+
App IDs and secrets are provided by Dryad and can only
|
|
68
|
+
be obtained directly from them at http://datadryad.org.
|
|
69
|
+
The app id and secret are used to create a bearer token
|
|
70
|
+
for API authentication.
|
|
71
|
+
|
|
72
|
+
Use this option if you have not stored the secret
|
|
73
|
+
in the configuration file or wish to override it.
|
|
74
|
+
|
|
75
|
+
**Dataverse API key**
|
|
76
|
+
|
|
77
|
+
The Dataverse API is required in order to upload both
|
|
78
|
+
metadata and data. While administrator-level keys
|
|
79
|
+
are recommended, any key which grants upload privileges
|
|
80
|
+
should be sufficient (note: not covered by warranty).
|
|
81
|
+
|
|
82
|
+
Use this option if you have not stored the key in the
|
|
83
|
+
configuration file or wish to override it.
|
|
84
|
+
''').strip()
|
|
85
|
+
parser = argparse.ArgumentParser(description=description,
|
|
86
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
87
|
+
epilog=epilog)
|
|
88
|
+
parser.add_argument('-c', '--config-file',
|
|
89
|
+
help=textwrap.dedent(
|
|
90
|
+
f'''
|
|
91
|
+
Dryad configuration file.
|
|
92
|
+
Default:
|
|
93
|
+
{DEFAULT_LOCATIONS[sys.platform]}/dryad2dataverse_config.yml
|
|
94
|
+
''').strip(),
|
|
95
|
+
required=False,
|
|
96
|
+
default=f'{DEFAULT_LOCATIONS[sys.platform]}/dryad2dataverse_config.yml',
|
|
97
|
+
dest='config')
|
|
98
|
+
parser.add_argument('-s', '--secret',
|
|
99
|
+
help='Secret for Dryad API.',
|
|
100
|
+
required=False,
|
|
101
|
+
dest='secret')
|
|
102
|
+
parser.add_argument('-k', '--api-key',
|
|
103
|
+
help='Dataverse API key',
|
|
104
|
+
required=False,
|
|
105
|
+
dest='api_key')
|
|
106
|
+
parser.add_argument('-v', '--verbosity',
|
|
107
|
+
help='Verbose output',
|
|
108
|
+
required=False,
|
|
109
|
+
action='store_true')
|
|
110
|
+
parser.add_argument('--version', action='version',
|
|
111
|
+
version='dryad2dataverse ' + dryad2dataverse.__version__,
|
|
112
|
+
help='Show version number and exit')
|
|
37
113
|
|
|
38
|
-
|
|
114
|
+
return parser
|
|
39
115
|
|
|
40
|
-
def new_content(serial):
|
|
116
|
+
def new_content(serial, **kwargs):
|
|
41
117
|
'''
|
|
42
118
|
Creates content for new study upload message (potentially redundant
|
|
43
119
|
with Dataverse emailer).
|
|
44
|
-
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
serial : dryad2dataverse.serializer.Serializer
|
|
124
|
+
**kwargs
|
|
125
|
+
Keyword arguments. Just pass dryad2dataverse.config.Config
|
|
45
126
|
'''
|
|
46
|
-
dv_link = (
|
|
127
|
+
dv_link = (kwargs['dv_url'] +
|
|
47
128
|
'/dataset.xhtml?persistentId=' +
|
|
48
129
|
serial.dvpid +
|
|
49
130
|
'&version=DRAFT')
|
|
@@ -57,13 +138,18 @@ def new_content(serial):
|
|
|
57
138
|
\n{serial.oversize}'
|
|
58
139
|
return (subject, content)
|
|
59
140
|
|
|
60
|
-
def changed_content(serial, monitor):
|
|
141
|
+
def changed_content(serial, monitor, **kwargs):
|
|
61
142
|
'''
|
|
62
143
|
Creates content for file update message.
|
|
63
|
-
|
|
64
|
-
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
serial : dryad2dataverse.serializer.Serializer
|
|
148
|
+
monitor : dryad2dataverse.monitor.Monitor
|
|
149
|
+
**kwargs
|
|
150
|
+
Keyword arguments. Just pass dryad2dataverse.config.Config
|
|
65
151
|
'''
|
|
66
|
-
dv_link = (
|
|
152
|
+
dv_link = (kwargs['dv_url'] +
|
|
67
153
|
'/dataset.xhtml?persistentId=' +
|
|
68
154
|
serial.dvpid +
|
|
69
155
|
'&version=DRAFT')
|
|
@@ -146,14 +232,14 @@ def notify(msgtxt, width=100, **kwargs):
|
|
|
146
232
|
|
|
147
233
|
msg = Em()
|
|
148
234
|
msg['Subject'] = msgtxt[0]
|
|
149
|
-
msg['From'] = kwargs['
|
|
235
|
+
msg['From'] = kwargs['sending_email']
|
|
150
236
|
msg['To'] = kwargs['recipients']
|
|
151
237
|
content = __clean_msg(msgtxt[1], max(width, 1000))
|
|
152
238
|
msg.set_content(content)
|
|
153
239
|
|
|
154
|
-
server = smtplib.SMTP_SSL(kwargs['
|
|
240
|
+
server = smtplib.SMTP_SSL(kwargs['smtp_server'], kwargs.get('ssl_port', 465))
|
|
155
241
|
|
|
156
|
-
server.login(kwargs['
|
|
242
|
+
server.login(kwargs['sending_email_username'], kwargs['email_send_password'])
|
|
157
243
|
#To must be split. See
|
|
158
244
|
#https://stackoverflow.com/questions/8856117/
|
|
159
245
|
#how-to-send-email-to-multiple-recipients-using-python-smtplib
|
|
@@ -196,15 +282,11 @@ def __bad_dates(rectuple: tuple, mod_date: str) -> tuple:
|
|
|
196
282
|
return tuple(records)
|
|
197
283
|
return rectuple
|
|
198
284
|
|
|
199
|
-
def get_records(
|
|
285
|
+
def get_records(mod_date=None, verbosity=True, **kwargs):
|
|
200
286
|
'''
|
|
201
287
|
returns a tuple of ((doi, metadata), ...). Dryad searches return complete
|
|
202
288
|
study metadata from the search, surprisingly.
|
|
203
289
|
|
|
204
|
-
ror : str
|
|
205
|
-
ROR string including http. To find your ROR, see
|
|
206
|
-
https://ror.org/
|
|
207
|
-
|
|
208
290
|
mod_date : str
|
|
209
291
|
UTC datetime string in the format suitable for the Dryad API.
|
|
210
292
|
eg. 2021-01-21T21:42:40Z
|
|
@@ -214,32 +296,32 @@ def get_records(ror: 'str', mod_date=None, verbosity=True, timeout=100):
|
|
|
214
296
|
verbosity : bool
|
|
215
297
|
Output some data to stdout
|
|
216
298
|
|
|
217
|
-
|
|
218
|
-
|
|
299
|
+
**kwargs
|
|
300
|
+
Keyword arguments. Just unpack dryad2dataverse.config.Config
|
|
219
301
|
'''
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
302
|
+
|
|
303
|
+
session = requests.Session()
|
|
304
|
+
session.mount('https://', HTTPAdapter(max_retries=dryad2dataverse.config.RETRY_STRATEGY))
|
|
305
|
+
headers = dryad2dataverse.config.Config.update_headers(**kwargs)
|
|
223
306
|
per_page = 1
|
|
224
|
-
params = {'affiliation' : ror,
|
|
307
|
+
params = {'affiliation' : kwargs['ror'],
|
|
225
308
|
'per_page' : per_page}
|
|
226
309
|
if mod_date:
|
|
227
310
|
params['modifiedSince'] = mod_date
|
|
228
|
-
stud =
|
|
229
|
-
params=params
|
|
311
|
+
stud = session.get(f'{kwargs["dry_url"]}{kwargs["api_path"]}/search', headers=headers,
|
|
312
|
+
params=params)
|
|
230
313
|
records = []
|
|
231
314
|
total = stud.json()['total']
|
|
232
315
|
if verbosity:
|
|
233
|
-
print(f'Total Records: {total}')
|
|
316
|
+
print(f'Total Records: {total}', file=sys.stdout)
|
|
234
317
|
params['per_page'] = 100
|
|
235
318
|
for data in range(total//100+1):
|
|
236
319
|
if verbosity:
|
|
237
|
-
print(f'Records page: {data+1}')
|
|
320
|
+
print(f'Records page: {data+1}', file=sys.stdout)
|
|
238
321
|
params['page'] = data+1
|
|
239
|
-
stud =
|
|
322
|
+
stud = session.get(f'{kwargs["dry_url"]}{kwargs["api_path"]}/search',
|
|
240
323
|
headers=headers,
|
|
241
|
-
params=params
|
|
242
|
-
timeout=timeout)
|
|
324
|
+
params=params)
|
|
243
325
|
time.sleep(10) # don't overload their system with API calls
|
|
244
326
|
stud.raise_for_status()
|
|
245
327
|
records += stud.json()['_embedded']['stash:datasets']
|
|
@@ -251,186 +333,6 @@ def get_records(ror: 'str', mod_date=None, verbosity=True, timeout=100):
|
|
|
251
333
|
for x in records),
|
|
252
334
|
mod_date)
|
|
253
335
|
|
|
254
|
-
def argp():
|
|
255
|
-
'''
|
|
256
|
-
Argument parser
|
|
257
|
-
'''
|
|
258
|
-
description = ('Dryad to Dataverse importer/monitor. '
|
|
259
|
-
'All arguments NOT enclosed by square brackets are required for '
|
|
260
|
-
'the script to run but some may already have defaults, specified '
|
|
261
|
-
'by "Default". '
|
|
262
|
-
'The "optional arguments" below refers to the use of the option switch, '
|
|
263
|
-
'(like -u), meaning "not a positional argument."'
|
|
264
|
-
)
|
|
265
|
-
parser = argparse.ArgumentParser(description=description)
|
|
266
|
-
parser.add_argument('-u', '--dv-url',
|
|
267
|
-
help='Destination Dataverse root url. '
|
|
268
|
-
'Default: https://borealisdata.ca',
|
|
269
|
-
required=False,
|
|
270
|
-
default='https://borealisdata.ca',
|
|
271
|
-
dest='url')
|
|
272
|
-
parser.add_argument('-k', '--key',
|
|
273
|
-
help='REQUIRED: API key for dataverse user',
|
|
274
|
-
required=True,
|
|
275
|
-
dest='key')
|
|
276
|
-
parser.add_argument('-t', '--target',
|
|
277
|
-
help='REQUIRED: Target dataverse short name',
|
|
278
|
-
required=True,
|
|
279
|
-
dest='target')
|
|
280
|
-
parser.add_argument('-e', '--email',
|
|
281
|
-
help='REQUIRED: Email address '
|
|
282
|
-
'which sends update notifications. ie: '
|
|
283
|
-
'"user@website.invalid".',
|
|
284
|
-
required=True,
|
|
285
|
-
dest='email')
|
|
286
|
-
parser.add_argument('-s', '--user',
|
|
287
|
-
help=('REQUIRED: User name for SMTP server. Check '
|
|
288
|
-
'your server for details. '),
|
|
289
|
-
required=True,
|
|
290
|
-
dest='user')
|
|
291
|
-
parser.add_argument('-r', '--recipient',
|
|
292
|
-
help='REQUIRED: Recipient(s) of email notification. '
|
|
293
|
-
'Separate addresses with spaces',
|
|
294
|
-
required=True,
|
|
295
|
-
nargs='+',
|
|
296
|
-
dest='recipients')
|
|
297
|
-
parser.add_argument('-p', '--pwd',
|
|
298
|
-
help='REQUIRED: Password for sending email account. '
|
|
299
|
-
'Enclose in single quotes to avoid OS errors with special '
|
|
300
|
-
'characters.',
|
|
301
|
-
required=True,
|
|
302
|
-
dest='pwd')
|
|
303
|
-
parser.add_argument('--server',
|
|
304
|
-
help='Mail server for sending account. '
|
|
305
|
-
'Default: smtp.mail.yahoo.com',
|
|
306
|
-
required=False,
|
|
307
|
-
default='smtp.mail.yahoo.com',
|
|
308
|
-
dest='mailserv')
|
|
309
|
-
parser.add_argument('--port',
|
|
310
|
-
help='Mail server port. Default: 465. '
|
|
311
|
-
'Mail is sent using SSL.',
|
|
312
|
-
required=False,
|
|
313
|
-
type=int,
|
|
314
|
-
#default=587,
|
|
315
|
-
default=465,
|
|
316
|
-
dest='port')
|
|
317
|
-
parser.add_argument('-c', '--contact',
|
|
318
|
-
help='REQUIRED: Contact email address for Dataverse records. '
|
|
319
|
-
'Must pass Dataverse email validation rules (so "test@test.invalid" '
|
|
320
|
-
'is not acceptable).',
|
|
321
|
-
required=True,
|
|
322
|
-
dest='contact')
|
|
323
|
-
parser.add_argument('-n', '--contact-name',
|
|
324
|
-
help='REQUIRED: Contact name for Dataverse records',
|
|
325
|
-
required=True,
|
|
326
|
-
dest='cname')
|
|
327
|
-
parser.add_argument('-v', '--verbosity',
|
|
328
|
-
help='Verbose output',
|
|
329
|
-
required=False,
|
|
330
|
-
action='store_true')
|
|
331
|
-
parser.add_argument('-i', '--ror',
|
|
332
|
-
help='REQUIRED: Institutional ROR URL. '
|
|
333
|
-
'Eg: "https://ror.org/03rmrcq20". This identifies the '
|
|
334
|
-
'institution in Dryad repositories.',
|
|
335
|
-
required=True,
|
|
336
|
-
dest='ror')
|
|
337
|
-
parser.add_argument('--tmpfile',
|
|
338
|
-
help='Temporary file location. Default: /tmp',
|
|
339
|
-
required=False,
|
|
340
|
-
dest='tmp')
|
|
341
|
-
parser.add_argument('--db',
|
|
342
|
-
help='Tracking database location and name. '
|
|
343
|
-
'Default: $HOME/dryad_dataverse_monitor.sqlite3',
|
|
344
|
-
required=False,
|
|
345
|
-
dest='dbase')
|
|
346
|
-
parser.add_argument('--log',
|
|
347
|
-
help='Complete path to log. '
|
|
348
|
-
'Default: /var/log/dryadd.log',
|
|
349
|
-
required=False,
|
|
350
|
-
dest='log',
|
|
351
|
-
default='/var/log/dryadd.log')
|
|
352
|
-
parser.add_argument('--loglevel',
|
|
353
|
-
help='Log level of server rotating log. Choose one of '
|
|
354
|
-
'debug, info, warning, error or critical. '
|
|
355
|
-
'Note: case sensitive. '
|
|
356
|
-
'Default: logging.warning.',
|
|
357
|
-
required=False,
|
|
358
|
-
dest='loglevel',
|
|
359
|
-
default='warning',
|
|
360
|
-
choices=['debug', 'info', 'warning','error','critical'])
|
|
361
|
-
parser.add_argument('--email-loglevel',
|
|
362
|
-
help='Log level of email log. Choose one of '
|
|
363
|
-
'debug, info, warning, error or critical. '
|
|
364
|
-
'Note: case sensitive. '
|
|
365
|
-
'Default: warning',
|
|
366
|
-
required=False,
|
|
367
|
-
dest='email_loglevel',
|
|
368
|
-
default='warning',
|
|
369
|
-
choices=['debug', 'info', 'warning','error','critical'])
|
|
370
|
-
parser.add_argument('-l', '--no_force_unlock',
|
|
371
|
-
help='No forcible file unlock. Required '
|
|
372
|
-
'if /lock endpint is restricted',
|
|
373
|
-
required=False,
|
|
374
|
-
action='store_false',
|
|
375
|
-
dest='force_unlock')
|
|
376
|
-
parser.add_argument('-x', '--exclude',
|
|
377
|
-
help='Exclude these DOIs. Separate by spaces',
|
|
378
|
-
required=False,
|
|
379
|
-
default=[],
|
|
380
|
-
nargs='+',
|
|
381
|
-
dest='exclude')
|
|
382
|
-
parser.add_argument('-b', '--num-backups',
|
|
383
|
-
help=('Number of database backups to keep. '
|
|
384
|
-
'Default 3'),
|
|
385
|
-
required=False,
|
|
386
|
-
type=int,
|
|
387
|
-
default=3)
|
|
388
|
-
parser.add_argument('-w', '--warn-too-many',
|
|
389
|
-
help=('Warn and halt execution if abnormally large '
|
|
390
|
-
'number of updates present.'),
|
|
391
|
-
action='store_true',)
|
|
392
|
-
parser.add_argument('--warn-threshold',
|
|
393
|
-
help=('Do not transfer studies if number of updates '
|
|
394
|
-
'is greater than or equal to this number. '
|
|
395
|
-
'Default: 15'),
|
|
396
|
-
type=int,
|
|
397
|
-
dest='warn',
|
|
398
|
-
default=15)
|
|
399
|
-
parser.add_argument('--testmode-on',
|
|
400
|
-
help=('Turn on test mode. '
|
|
401
|
-
'Number of transfers will be limited '
|
|
402
|
-
'to the value in --testmode-limit '
|
|
403
|
-
'or 5 if you don\'t set --testmode-limit '),
|
|
404
|
-
action='store_true',
|
|
405
|
-
dest='testmode')
|
|
406
|
-
parser.add_argument('--testmode-limit',
|
|
407
|
-
help=('Test mode - only transfer first [n] '
|
|
408
|
-
'of the total number of (new) records. Old ones will '
|
|
409
|
-
'still be updated, though. '
|
|
410
|
-
'Default: 5'),
|
|
411
|
-
type=int,
|
|
412
|
-
default=5,
|
|
413
|
-
dest='testlimit')
|
|
414
|
-
parser.add_argument('--version', action='version',
|
|
415
|
-
version='dryad2dataverse ' + dryad2dataverse.__version__,
|
|
416
|
-
help='Show version number and exit')
|
|
417
|
-
|
|
418
|
-
return parser
|
|
419
|
-
|
|
420
|
-
def set_constants(args):
|
|
421
|
-
'''
|
|
422
|
-
Set the appropriate dryad2dataverse "constants"
|
|
423
|
-
'''
|
|
424
|
-
dryad2dataverse.constants.DV_CONTACT_EMAIL = args.contact
|
|
425
|
-
dryad2dataverse.constants.DV_CONTACT_ = args.contact
|
|
426
|
-
dryad2dataverse.constants.APIKEY = args.key
|
|
427
|
-
if args.url:
|
|
428
|
-
dryad2dataverse.constants.DVURL = args.url
|
|
429
|
-
if args.dbase:
|
|
430
|
-
dryad2dataverse.constants.DBASE = args.dbase
|
|
431
|
-
if args.tmp:
|
|
432
|
-
dryad2dataverse.constants.TMP = args.tmp
|
|
433
|
-
|
|
434
336
|
def email_log(mailhost, fromaddr, toaddrs, credentials, port=465, secure=(),
|
|
435
337
|
level=logging.WARNING, timeout=100):
|
|
436
338
|
'''
|
|
@@ -453,7 +355,7 @@ def email_log(mailhost, fromaddr, toaddrs, credentials, port=465, secure=(),
|
|
|
453
355
|
level : int
|
|
454
356
|
logging level. Default logging.WARNING
|
|
455
357
|
'''
|
|
456
|
-
#pylint: disable=too-many-arguments
|
|
358
|
+
#pylint: disable=too-many-arguments, too-many-positional-arguments
|
|
457
359
|
#Because consistency is for suckers and yahoo requires full hostname
|
|
458
360
|
#subject = 'Dryad to Dataverse transfer error'
|
|
459
361
|
subject = 'Dryad to Dataverse logger message'
|
|
@@ -488,7 +390,9 @@ def rotating_log(path, level):
|
|
|
488
390
|
#python-logging-disable-logging-from-imported-modules
|
|
489
391
|
for name in ['dryad2dataverse.serializer',
|
|
490
392
|
'dryad2dataverse.transfer',
|
|
491
|
-
'dryad2dataverse.monitor'
|
|
393
|
+
'dryad2dataverse.monitor',
|
|
394
|
+
'dryad2dataverse.auth',
|
|
395
|
+
'dryad2dataverse.config']:
|
|
492
396
|
logging.getLogger(name).setLevel(level)
|
|
493
397
|
rotator = logging.handlers.RotatingFileHandler(filename=path,
|
|
494
398
|
maxBytes=10*1024**2,
|
|
@@ -527,11 +431,12 @@ def checkwarn(val:int, **kwargs) -> None:
|
|
|
527
431
|
'''
|
|
528
432
|
if not kwargs.get('warn_too_many'):
|
|
529
433
|
return
|
|
530
|
-
if val >= kwargs.get('
|
|
434
|
+
if val >= kwargs.get('warning_threshold',0):
|
|
531
435
|
mess = ('Large number of updates detected. '
|
|
532
|
-
f'{val} new studies exceeds threshold of {kwargs.get("
|
|
436
|
+
f'{val} new studies exceeds threshold of {kwargs.get("warning_threshold", 0)}. '
|
|
533
437
|
'Program execution halted.')
|
|
534
|
-
|
|
438
|
+
print(mess, file=sys.stderr)
|
|
439
|
+
subject = 'Dryad2Dataverse large update warning'
|
|
535
440
|
for logme in kwargs.get('loggers'):
|
|
536
441
|
logme.warning(mess)
|
|
537
442
|
notify(msgtxt=(subject, mess),
|
|
@@ -547,7 +452,7 @@ def verbo(verbosity:bool, **kwargs)->None:
|
|
|
547
452
|
'''
|
|
548
453
|
if verbosity:
|
|
549
454
|
for key, value in kwargs.items():
|
|
550
|
-
print(f'{key}: {value}')
|
|
455
|
+
print(f'{key}: {value}', file=sys.stdout)
|
|
551
456
|
|
|
552
457
|
def anonymizer(args: argparse.Namespace) -> dict:
|
|
553
458
|
'''
|
|
@@ -555,10 +460,7 @@ def anonymizer(args: argparse.Namespace) -> dict:
|
|
|
555
460
|
with cleaner values.
|
|
556
461
|
'''
|
|
557
462
|
clean_me = args.__dict__.copy()#Don't work on the real thing!
|
|
558
|
-
cleanser = {x : 'REDACTED' for x in ['
|
|
559
|
-
'key', 'mailserve',
|
|
560
|
-
'pwd', 'recipients',
|
|
561
|
-
'user']}
|
|
463
|
+
cleanser = {x : 'REDACTED' for x in ['secret', 'api_key']}
|
|
562
464
|
clean_me.update(cleanser)
|
|
563
465
|
return clean_me
|
|
564
466
|
|
|
@@ -573,78 +475,95 @@ def bulklog(message, *logfuncs):
|
|
|
573
475
|
for log in logfuncs:
|
|
574
476
|
log('%s', message)
|
|
575
477
|
|
|
576
|
-
def
|
|
478
|
+
def test_config(cfile:pathlib.Path):
|
|
577
479
|
'''
|
|
578
|
-
|
|
480
|
+
Ensure that the config file can be loaded, and if not
|
|
481
|
+
raise a helpful error because it can't be logged yet
|
|
579
482
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
483
|
+
Parameters
|
|
484
|
+
----------
|
|
485
|
+
cfile : pathlib.Path
|
|
486
|
+
Config yaml file
|
|
584
487
|
'''
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
logpath = pathlib.Path(args.log)
|
|
593
|
-
if not logpath.parent.exists():
|
|
594
|
-
os.makedirs(logpath.parent)
|
|
595
|
-
|
|
596
|
-
set_constants(args)
|
|
597
|
-
|
|
598
|
-
logger = rotating_log(args.log,
|
|
599
|
-
level=logging.getLevelName(args.loglevel.upper()))
|
|
600
|
-
|
|
601
|
-
elog = email_log(args.mailserv, args.email, args.recipients,
|
|
602
|
-
(args.user, args.pwd), port=args.port,
|
|
603
|
-
level = logging.getLevelName(args.email_loglevel.upper()))
|
|
604
|
-
|
|
488
|
+
try:
|
|
489
|
+
with open(cfile.expanduser().absolute(), encoding='utf-8') as y:
|
|
490
|
+
yaml.safe_load(y)
|
|
491
|
+
except yaml.YAMLError as e:
|
|
492
|
+
print('Configuration file error', file=sys.stdout)
|
|
493
|
+
print(e, file=sys.stderr)
|
|
494
|
+
sys.exit()
|
|
605
495
|
|
|
496
|
+
def main():
|
|
497
|
+
'''
|
|
498
|
+
Primary function
|
|
499
|
+
'''
|
|
500
|
+
#pylint: disable=too-many-branches, too-many-locals, too-many-statements
|
|
501
|
+
args = argp().parse_args()
|
|
502
|
+
configfile = pathlib.Path(args.config)
|
|
503
|
+
test_config(configfile)
|
|
504
|
+
config = dryad2dataverse.config.Config(configfile.parent, configfile.name)
|
|
505
|
+
for val in ['api_key', 'secret']:
|
|
506
|
+
if getattr(args,val):
|
|
507
|
+
config[val] = getattr(args,val)
|
|
508
|
+
#TODONE remove this for prod
|
|
509
|
+
config['token'] = dryad2dataverse.auth.Token(**config)
|
|
510
|
+
|
|
511
|
+
logpath = pathlib.Path(config['log']).expanduser().absolute()
|
|
512
|
+
logpath.parent.mkdir(parents=True, exist_ok=True)
|
|
513
|
+
|
|
514
|
+
logger = rotating_log(logpath,
|
|
515
|
+
level=logging.getLevelName(config['loglevel'].upper()))
|
|
516
|
+
elog = email_log(config['smtp_server'],
|
|
517
|
+
config['sending_email'],
|
|
518
|
+
config['recipients'],
|
|
519
|
+
(config['sending_email_username'], config['email_send_password']),
|
|
520
|
+
port=config['ssl_port'],
|
|
521
|
+
level = logging.getLevelName(config['email_loglevel'].upper()))
|
|
606
522
|
logger.info('Beginning update process')
|
|
607
523
|
for logme in [elog, logger]:
|
|
608
|
-
logme.debug('Command line arguments: %s' , pprint.
|
|
524
|
+
logme.debug('Command line arguments: %s' , pprint.pformat(anonymizer(args)))
|
|
609
525
|
|
|
610
|
-
monitor = dryad2dataverse.monitor.Monitor(
|
|
526
|
+
monitor = dryad2dataverse.monitor.Monitor(**config)
|
|
611
527
|
#copy the database to make a backup, because paranoia is your friend
|
|
612
|
-
|
|
613
|
-
|
|
528
|
+
db_full = pathlib.Path(config['dbase']).expanduser().absolute()
|
|
529
|
+
if db_full.exists():
|
|
614
530
|
try:
|
|
615
|
-
shutil.copyfile(
|
|
616
|
-
pathlib.Path(
|
|
617
|
-
|
|
531
|
+
shutil.copyfile(db_full,
|
|
532
|
+
pathlib.Path(db_full.parent,
|
|
533
|
+
db_full.stem + '_' +
|
|
618
534
|
datetime.datetime.now().strftime('%Y-%m-%d-%H%M') +
|
|
619
|
-
|
|
535
|
+
db_full.suffix)
|
|
620
536
|
)
|
|
621
537
|
except FileNotFoundError:
|
|
622
|
-
|
|
623
|
-
|
|
538
|
+
for _ in [logger, elog]:
|
|
539
|
+
_.exception('Database not found: %s', config['dbase'])
|
|
540
|
+
print(f'Database not found: {config["dbase"]}', file=sys.stderr)
|
|
624
541
|
sys.exit()
|
|
625
542
|
#list comprehension includes untimestamped dbase name, hence 2+
|
|
626
|
-
fnames = glob.glob(
|
|
627
|
-
|
|
628
|
-
fnames.remove(
|
|
543
|
+
fnames = glob.glob((str(pathlib.Path(db_full.parent,
|
|
544
|
+
db_full.stem + '*' + db_full.suffix))))
|
|
545
|
+
fnames.remove(str(db_full))
|
|
629
546
|
fnames.sort(reverse=True)
|
|
630
|
-
fnames = fnames[
|
|
547
|
+
fnames = fnames[config['number_of_backups']:]
|
|
631
548
|
for fil in fnames:
|
|
632
549
|
os.remove(fil)
|
|
550
|
+
logger.info('Deleted database backup: %s', fil)
|
|
633
551
|
logger.info('Last update time: %s', monitor.lastmod)
|
|
634
552
|
#get all updates since the last update check
|
|
635
|
-
updates = get_records(
|
|
636
|
-
verbosity=args.verbosity
|
|
553
|
+
updates = get_records(monitor.lastmod,
|
|
554
|
+
verbosity=args.verbosity,
|
|
555
|
+
**config)
|
|
637
556
|
logger.info('Total new files: %s', len(updates))
|
|
638
557
|
elog.info('Total new files: %s', len(updates))
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
min(args.testlimit, len(updates)),
|
|
558
|
+
checkwarn(val=len(updates) if not config['test_mode'] else
|
|
559
|
+
min(config['test_mode_limit'], len(updates)),
|
|
642
560
|
loggers=[logger],
|
|
643
|
-
**
|
|
644
|
-
if args.testmode:
|
|
645
|
-
logger.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
|
|
646
|
-
elog.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
|
|
561
|
+
**config)
|
|
647
562
|
|
|
563
|
+
if config['test_mode']:
|
|
564
|
+
for _ in [logger, elog]:
|
|
565
|
+
_.warning('Test mode is ON - number of updates limited to %s',
|
|
566
|
+
config['test_mode_limit'])
|
|
648
567
|
#update all the new files
|
|
649
568
|
verbo(args.verbosity, **{'Total to process': len(updates)})
|
|
650
569
|
|
|
@@ -652,8 +571,8 @@ def main():
|
|
|
652
571
|
count = 0
|
|
653
572
|
testcount = 0
|
|
654
573
|
for doi in updates:
|
|
655
|
-
if
|
|
656
|
-
logger.info('Test limit of %s reached',
|
|
574
|
+
if config['test_mode'] and (testcount >= config['test_mode_limit']):
|
|
575
|
+
logger.info('Test limit of %s reached', config['test_mode_limit'])
|
|
657
576
|
break
|
|
658
577
|
count += 1
|
|
659
578
|
logger.info('Start processing %s of %s', count, len(updates))
|
|
@@ -661,11 +580,12 @@ def main():
|
|
|
661
580
|
doi[0], doi[0])
|
|
662
581
|
if not updates:
|
|
663
582
|
break #no new files in this case
|
|
664
|
-
|
|
583
|
+
#use get in this case because people *will* have nothing to exclude
|
|
584
|
+
if doi[0] in config.get('exclude_list',[]):
|
|
665
585
|
logger.warning('Skipping excluded doi: %s', doi[0])
|
|
666
586
|
continue
|
|
667
587
|
#Create study object
|
|
668
|
-
study = dryad2dataverse.serializer.Serializer(doi[0])
|
|
588
|
+
study = dryad2dataverse.serializer.Serializer(doi[0], **config)
|
|
669
589
|
#verbose output
|
|
670
590
|
verbo(args.verbosity,
|
|
671
591
|
**{'Processing': count,
|
|
@@ -685,21 +605,21 @@ def main():
|
|
|
685
605
|
update_type = monitor.status(study)['status']
|
|
686
606
|
verbo(args.verbosity, **{'Status': update_type})
|
|
687
607
|
#create a transfer object to copy the files over
|
|
688
|
-
transfer = dryad2dataverse.transfer.Transfer(study)
|
|
608
|
+
transfer = dryad2dataverse.transfer.Transfer(study, **config)
|
|
689
609
|
transfer.test_api_key()
|
|
690
610
|
#Now start the action
|
|
691
611
|
if update_type == 'new':
|
|
692
612
|
logger.info('New study: %s, %s', doi[0], doi[1]['title'])
|
|
693
613
|
logger.info('Uploading study metadata')
|
|
694
|
-
transfer.upload_study(targetDv=
|
|
614
|
+
transfer.upload_study(targetDv=config['target'])
|
|
695
615
|
#New files are in now in monitor.diff_files()['add']
|
|
696
616
|
#with 2 Feb 2022 API change
|
|
697
617
|
#so we can ignore them here
|
|
698
618
|
logger.info('Uploading Dryad JSON metadata')
|
|
699
619
|
transfer.upload_json()
|
|
700
620
|
transfer.set_correct_date()
|
|
701
|
-
notify(new_content(study),
|
|
702
|
-
**
|
|
621
|
+
notify(new_content(study, **config),
|
|
622
|
+
**config)
|
|
703
623
|
testcount+=1
|
|
704
624
|
|
|
705
625
|
elif update_type == 'updated':
|
|
@@ -707,12 +627,11 @@ def main():
|
|
|
707
627
|
logger.info('Updating metadata')
|
|
708
628
|
transfer.upload_study(dvpid=study.dvpid)
|
|
709
629
|
#remove old JSON files
|
|
710
|
-
|
|
711
|
-
transfer.delete_dv_files(rem)
|
|
630
|
+
transfer.delete_dv_files(monitor.get_json_dvfids(study))
|
|
712
631
|
transfer.upload_json()
|
|
713
632
|
transfer.set_correct_date()
|
|
714
|
-
notify(changed_content(study, monitor),
|
|
715
|
-
**
|
|
633
|
+
notify(changed_content(study, monitor, **config),
|
|
634
|
+
**config)
|
|
716
635
|
|
|
717
636
|
#new, identical, updated, lastmodsame
|
|
718
637
|
elif update_type in ('unchanged', 'lastmodsame'):
|
|
@@ -732,7 +651,7 @@ def main():
|
|
|
732
651
|
transfer.download_files(diff['add'])
|
|
733
652
|
#now send them to Dataverse
|
|
734
653
|
transfer.upload_files(diff['add'], pid=study.dvpid,
|
|
735
|
-
force_unlock=
|
|
654
|
+
force_unlock=config['force_unlock'])
|
|
736
655
|
#Update the tracking database for that record
|
|
737
656
|
monitor.update(transfer)
|
|
738
657
|
|
|
@@ -745,13 +664,14 @@ def main():
|
|
|
745
664
|
elog.info('Completed update process')
|
|
746
665
|
finished = ('Dryad to Dataverse transfers completed',
|
|
747
666
|
('Dryad to Dataverse transfer daemon has completed.\n'
|
|
748
|
-
f'Log available at: {
|
|
749
|
-
notify(finished, **
|
|
667
|
+
f'Log available at: {config["log"]}'))
|
|
668
|
+
notify(finished, **config)
|
|
750
669
|
|
|
751
670
|
except dryad2dataverse.exceptions.DataverseBadApiKeyError as api_err:
|
|
752
671
|
logger.exception(api_err)
|
|
753
672
|
elog.exception(api_err)
|
|
754
|
-
print(f'Error: {api_err}. Exiting. For details see log at {args.log}.'
|
|
673
|
+
print(f'Error: {api_err}. Exiting. For details see log at {args.log}.',
|
|
674
|
+
file=sys.stderr)
|
|
755
675
|
sys.exit()#graceful exit is graceful
|
|
756
676
|
|
|
757
677
|
except Exception as err: # pylint: disable=broad-except
|
|
@@ -761,12 +681,9 @@ def main():
|
|
|
761
681
|
logger.exception('%s\nCritical failure with DOI: %s : %s\n%s', err,
|
|
762
682
|
doi[0], doi[1]['title'], doi[1].get('sharingLink'),
|
|
763
683
|
stack_info=True, exc_info=True)
|
|
764
|
-
print(f'Error: {err}. Exiting. For details see log at {
|
|
684
|
+
print(f'Error: {err}. Exiting. For details see log at {config["log"]}.',
|
|
685
|
+
file=sys.stderr)
|
|
765
686
|
sys.exit()
|
|
766
687
|
|
|
767
688
|
if __name__ == '__main__':
|
|
768
689
|
main()
|
|
769
|
-
_parser = argp()
|
|
770
|
-
_args = _parser.parse_args()
|
|
771
|
-
print('This is what you would have done had you actually run this')
|
|
772
|
-
print(_args)
|