dryad2dataverse 0.7.11a0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,14 +8,14 @@ The monitor's primary function is to allow for state checking
8
8
  for Dryad studies so that files and studies aren't downloaded
9
9
  unneccessarily.
10
10
  '''
11
-
11
+ #pylint: disable=invalid-name
12
12
  import copy
13
- import logging
13
+ import datetime
14
14
  import json
15
+ import logging
16
+ import pathlib
15
17
  import sqlite3
16
- import datetime
17
18
 
18
- from dryad2dataverse import constants
19
19
  from dryad2dataverse import exceptions
20
20
 
21
21
  LOGGER = logging.getLogger(__name__)
@@ -26,82 +26,88 @@ class Monitor():
26
26
  Dryad files can be monitored and updated over time. Monitor is a singleton,
27
27
  but is not thread-safe.
28
28
  '''
29
- __instance = None
30
-
31
- def __new__(cls, dbase=None, *args, **kwargs):
29
+ def __new__(cls, *args, **kwargs):
32
30
  '''
33
31
  Creates a new singleton instance of Monitor.
34
32
 
35
- Also creates a database if existing database is not present.
36
-
37
- ----------------------------------------
38
- Parameters:
33
+ Parameters
34
+ ----------
35
+ *args
36
+ **kwargs
37
+ '''
38
+ if not hasattr(cls, 'inst'):
39
+ cls.inst = super().__new__(cls)
40
+ #This ensures only the first set of kwargs (on instantiation)
41
+ #are used.
42
+ cls.init = 0
43
+ cls.kwargs = kwargs
44
+ if not cls.kwargs.get('dbase'):
45
+ try:
46
+ cls.kwargs['dbase'] = args[0]
47
+ except ValueError as e:
48
+ raise KeyError from e
49
+ cls.conn = sqlite3.connect(pathlib.Path(cls.kwargs['dbase']).expanduser().absolute())
50
+ cls.cursor = cls.conn.cursor()
51
+ LOGGER.info('Open database %s', cls.kwargs['dbase'])
52
+ return cls.inst
39
53
 
54
+ def __init__(self, *args, **kwargs):
55
+ '''
56
+ Initialize singleton instance of Monitor
57
+
58
+ Parameters
59
+ ----------
60
+ *args
61
+ Positional arguments. Only the first is used
62
+ **kwargs
63
+ Keyword arguments. Only dbase is used, and it overwrites args[0] if present
64
+
65
+ Notes
66
+ -----
67
+ Normally you would just pass a dryad2dataverse.config.Config object,
68
+ ie. Monitor(**config)
69
+
70
+ These keyword parameters are required at a minimum, and are included as part of a
71
+ Config instance.
40
72
  dbase : str
41
- Path to sqlite3 database. That is:
42
- /path/to/file.sqlite3
43
- ----------------------------------------
73
+ Path to dryad2dataverse monitor database
74
+ dry_url : str
75
+ Dryad base URL
44
76
  '''
45
- if cls.__instance is None:
46
- cls.__instance = super(Monitor, cls).__new__(cls)
47
- cls.__instance.__initialized = False
48
- cls.dbase = dbase
49
- if not cls.dbase:
50
- cls.dbase = constants.DBASE
51
- cls.conn = sqlite3.Connection(cls.dbase)
52
- cls.cursor = cls.conn.cursor()
77
+ #pylint: disable=unused-argument
78
+ #arguments are parsed in __new__ to make a singleton
79
+ #but they need to be passed in __init__
80
+ if not self.init:
81
+
82
+ conn = sqlite3.connect(pathlib.Path(self.kwargs['dbase']).expanduser().absolute())
83
+ cursor = conn.cursor()
53
84
  create = ['CREATE TABLE IF NOT EXISTS dryadStudy \
54
- (uid INTEGER PRIMARY KEY AUTOINCREMENT, \
55
- doi TEXT, lastmoddate TEXT, dryadjson TEXT, \
56
- dvjson TEXT);',
57
- 'CREATE TABLE IF NOT EXISTS dryadFiles \
58
- (dryaduid INTEGER REFERENCES dryadStudy (uid), \
59
- dryfilesjson TEXT);',
60
- 'CREATE TABLE IF NOT EXISTS dvStudy \
61
- (dryaduid INTEGER references dryadStudy (uid), \
62
- dvpid TEXT);',
63
- 'CREATE TABLE IF NOT EXISTS dvFiles \
64
- (dryaduid INTEGER references dryadStudy (uid), \
65
- dryfid INT, \
66
- drymd5 TEXT, dvfid TEXT, dvmd5 TEXT, \
67
- dvfilejson TEXT);',
68
- 'CREATE TABLE IF NOT EXISTS lastcheck \
69
- (checkdate TEXT);',
70
- 'CREATE TABLE IF NOT EXISTS failed_uploads \
71
- (dryaduid INTEGER references dryadstudy (uid), \
72
- dryfid INT, status TEXT);'
85
+ (uid INTEGER PRIMARY KEY AUTOINCREMENT, \
86
+ doi TEXT, lastmoddate TEXT, dryadjson TEXT, \
87
+ dvjson TEXT);',
88
+ 'CREATE TABLE IF NOT EXISTS dryadFiles \
89
+ (dryaduid INTEGER REFERENCES dryadStudy (uid), \
90
+ dryfilesjson TEXT);',
91
+ 'CREATE TABLE IF NOT EXISTS dvStudy \
92
+ (dryaduid INTEGER references dryadStudy (uid), \
93
+ dvpid TEXT);',
94
+ 'CREATE TABLE IF NOT EXISTS dvFiles \
95
+ (dryaduid INTEGER references dryadStudy (uid), \
96
+ dryfid INT, \
97
+ drymd5 TEXT, dvfid TEXT, dvmd5 TEXT, \
98
+ dvfilejson TEXT);',
99
+ 'CREATE TABLE IF NOT EXISTS lastcheck \
100
+ (checkdate TEXT);',
101
+ 'CREATE TABLE IF NOT EXISTS failed_uploads \
102
+ (dryaduid INTEGER references dryadstudy (uid), \
103
+ dryfid INT, status TEXT);'
73
104
  ]
74
105
 
75
106
  for line in create:
76
- cls.cursor.execute(line)
77
- cls.conn.commit()
78
- LOGGER.info('Using database %s', cls.dbase)
79
-
80
- return cls.__instance
81
-
82
- def __init__(self, dbase=None, *args, **kwargs):
83
- # remove args and kwargs when you find out how init interacts with new.
84
- '''
85
- Initialize the Monitor instance if not instantiated already (ie, Monitor
86
- is a singleton).
87
-
88
- ----------------------------------------
89
- Parameters:
90
-
91
- dbase : str
92
- — Complete path to desired location of tracking database
93
- (eg: /tmp/test.db).
94
-
95
- Defaults to dryad2dataverse.constants.DBASE.
96
- ----------------------------------------
97
- '''
98
- if self.__initialized:
99
- return
100
- self.__initialized = True
101
- if not dbase:
102
- self.dbase = constants.DBASE
103
- else:
104
- self.dbase = dbase
107
+ cursor.execute(line)
108
+ conn.commit()
109
+ conn.close()
110
+ self.init = 1
105
111
 
106
112
  def __del__(self):
107
113
  '''
@@ -121,31 +127,40 @@ class Monitor():
121
127
  return last_mod[0][0]
122
128
  return None
123
129
 
124
- def status(self, serial):
130
+ def status(self, serial)->dict:
125
131
  '''
126
132
  Returns a dictionary with keys 'status' and 'dvpid' and 'notes'.
133
+
134
+ Parameters
135
+ ----------
136
+ serial : dryad2dataverse.serializer.Serializer
137
+
138
+ Returns
139
+ -------
127
140
  `{status :'updated', 'dvpid':'doi://some/ident'}`.
128
141
 
142
+ Notes
143
+ ------
129
144
  `status` is one of 'new', 'identical', 'lastmodsame',
130
145
  'updated'
131
146
 
132
- 'new' is a completely new file.
147
+ 'new' is a completely new file.
133
148
 
134
- 'identical' The metadata from Dryad is *identical* to the last time
135
- the check was run.
149
+ 'identical' The metadata from Dryad is *identical* to the last time
150
+ the check was run.
136
151
 
137
- 'lastmodsame' Dryad lastModificationDate == last modification date
138
- in database AND output JSON is different.
139
- This can indicate a Dryad
140
- API output change, reindexing or something else.
141
- But the lastModificationDate
142
- is supposed to be an indicator of meaningful change, so this option
143
- exists so you can decide what to do given this option
152
+ 'lastmodsame' Dryad lastModificationDate == last modification date
153
+ in database AND output JSON is different.
154
+ This can indicate a Dryad
155
+ API output change, reindexing or something else.
156
+ But the lastModificationDate
157
+ is supposed to be an indicator of meaningful change, so this option
158
+ exists so you can decide what to do given this option
144
159
 
145
- 'updated' Indicates changes to lastModificationDate
160
+ 'updated' Indicates changes to lastModificationDate
146
161
 
147
- Note that Dryad constantly changes their API output, so the changes
148
- may not actually be meaningful.
162
+ Note that Dryad constantly changes their API output, so the changes
163
+ may not actually be meaningful.
149
164
 
150
165
  `dvpid` is a Dataverse persistent identifier.
151
166
  `None` in the case of status='new'
@@ -155,12 +170,6 @@ class Monitor():
155
170
  not `new` or `identical`. Note that Dryad has no way to indicate *both*
156
171
  a file and metadata change, so this value reflects only the *last* change
157
172
  in the Dryad state.
158
-
159
- ----------------------------------------
160
- Parameters:
161
-
162
- serial : dryad2dataverse.serializer instance
163
- ----------------------------------------
164
173
  '''
165
174
  # Last mod date is indicator of change.
166
175
  # From email w/Ryan Scherle 10 Nov 2020
@@ -199,13 +208,11 @@ class Monitor():
199
208
  dryaduid = ?', (dryaduid,))
200
209
  dvpid = self.cursor.fetchall()[-1][0]
201
210
  serial.dvpid = dvpid
202
- except TypeError:
203
- try:
204
- raise exceptions.DatabaseError
205
- except exceptions.DatabaseError as e:
206
- LOGGER.error('Dryad DOI : %s. Error finding Dataverse PID', doi)
207
- LOGGER.exception(e)
208
- raise
211
+ except TypeError as exc:
212
+ LOGGER.error('Dryad DOI : %s. Error finding Dataverse PID', doi)
213
+ LOGGER.exception(exc)
214
+ raise exceptions.DatabaseError from exc
215
+
209
216
  newfile = copy.deepcopy(serial.dryadJson)
210
217
  testfile = copy.deepcopy(json.loads(result[-1][3]))
211
218
  if newfile == testfile:
@@ -220,23 +227,25 @@ class Monitor():
220
227
  '''
221
228
  Analyzes differences in metadata between current serializer
222
229
  instance and last updated serializer instance.
223
- Returns a list of field changes consisting of:
224
230
 
231
+ Parameters
232
+ ----------
233
+ serial : dryad2dataverse.serializer.Serializer
234
+
235
+ Returns
236
+ -------
237
+ Returns a list of field changes consisting of:
225
238
  [{key: (old_value, new_value}] or None if no changes.
226
239
 
240
+ Notes
241
+ -----
227
242
  For example:
228
-
229
243
  ```
230
244
  [{'title':
231
245
  ('Cascading effects of algal warming in a freshwater community',
232
246
  'Cascading effects of algal warming in a freshwater community theatre')}
233
247
  ]
234
248
  ```
235
- ----------------------------------------
236
- Parameters:
237
-
238
- serial : dryad2dataverse.serializer.Serializer instance
239
- ----------------------------------------
240
249
  '''
241
250
  if self.status(serial)['status'] == 'updated':
242
251
  self.cursor.execute('SELECT dryadjson from dryadStudy \
@@ -261,10 +270,12 @@ class Monitor():
261
270
  Assumes name, mimeType, size, descr all unchanged, which is not
262
271
  necessarily a valid assumption
263
272
 
264
- oldFiles: list or tuple:
273
+ Parameters
274
+ ----------
275
+ oldFiles : Union[list, tuple]
265
276
  (name, mimeType, size, descr, digestType, digest)
266
277
 
267
- newFiles: list or tuple:
278
+ newFiles : Union[list, tuple]
268
279
  (name, mimeType, size, descr, digestType, digest)
269
280
  '''
270
281
  hash_change = []
@@ -294,12 +305,12 @@ class Monitor():
294
305
  `{'add':[dyadfiletuples], 'delete:[dryadfiletuples],
295
306
  'hash_change': [dryadfiletuples]}`
296
307
 
297
- ----------------------------------------
298
- Parameters:
299
-
300
- serial : dryad2dataverse.serializer.Serializer instance
301
- ----------------------------------------
308
+ Parameters
309
+ ----------
310
+ serial : dryad2dataverse.serializer.Serializer
302
311
  '''
312
+ #pylint: disable=too-many-locals
313
+
303
314
  diffReport = {}
304
315
  if self.status(serial)['status'] == 'new':
305
316
  #do we want to show what needs to be added?
@@ -329,7 +340,7 @@ class Monitor():
329
340
  downLink = f['_links']['stash:file-download']['href']
330
341
  except KeyError:
331
342
  downLink = f['_links']['stash:download']['href']
332
- downLink = f'{constants.DRYURL}{downLink}'
343
+ downLink = f'{self.kwargs.get("dry_url", "https://datadryad.org")}{downLink}'
333
344
  name = f['path']
334
345
  mimeType = f['mimeType']
335
346
  size = f['size']
@@ -379,13 +390,11 @@ class Monitor():
379
390
  file download link. Normally used for determining dataverse
380
391
  file ids for *deletion* in case of dryad file changes.
381
392
 
382
- ----------------------------------------
383
- Parameters:
384
-
393
+ Parameters
394
+ ----------
385
395
  url : str
386
- *Dryad* file URL in form of
387
- 'https://datadryad.org/api/v2/files/385819/download'.
388
- ----------------------------------------
396
+ *Dryad* file URL in form of
397
+ 'https://datadryad.org/api/v2/files/385819/download'.
389
398
  '''
390
399
  fid = url[url.rfind('/', 0, -10)+1:].strip('/download')
391
400
  try:
@@ -413,11 +422,10 @@ class Monitor():
413
422
  dryad2dataverse.monitor.Monitor.diff_files['delete']
414
423
  to discover Dataverse file ids for deletion.
415
424
 
416
- ----------------------------------------
417
- Parameters:
418
-
425
+ Parameters
426
+ ----------
419
427
  filelist : list
420
- List of Dryad file tuples: eg:
428
+ List of Dryad file tuples: eg:
421
429
 
422
430
  ```
423
431
  [('https://datadryad.org/api/v2/files/385819/download',
@@ -427,7 +435,6 @@ class Monitor():
427
435
  'Readme_ACG_Mortality.txt',
428
436
  'text/plain', 1350)]
429
437
  ```
430
- ----------------------------------------
431
438
  '''
432
439
  fids = []
433
440
  for f in filelist:
@@ -435,18 +442,20 @@ class Monitor():
435
442
  return fids
436
443
  # return [self.get_dv_fid(f[0]) for f in filelist]
437
444
 
438
- def get_json_dvfids(self, serial):
445
+ def get_json_dvfids(self, serial)->list:
439
446
  '''
440
447
  Return a list of Dataverse file ids for Dryad JSONs which were
441
448
  uploaded to Dataverse.
442
449
  Normally used to discover the file IDs to remove Dryad JSONs
443
450
  which have changed.
444
451
 
445
- ----------------------------------------
446
- Parameters:
452
+ Parameters
453
+ ----------
454
+ serial : dryad2dataverse.serializer.Serializer
447
455
 
448
- serial : dryad2dataverse.serializer.Serializer instance
449
- ----------------------------------------
456
+ Returns
457
+ -------
458
+ list
450
459
  '''
451
460
  self.cursor.execute('SELECT max(uid) FROM dryadStudy WHERE doi=?',
452
461
  (serial.doi,))
@@ -471,12 +480,11 @@ class Monitor():
471
480
  This method should be called after all transfers are completed,
472
481
  including Dryad JSON updates, as the last action for transfer.
473
482
 
474
- ----------------------------------------
475
- Parameters:
476
-
477
- transfer : dryad2dataverse.transfer.Transfer instance
478
- ----------------------------------------
483
+ Parameters
484
+ ----------
485
+ transfer : dryad2dataverse.transfer.Transfer
479
486
  '''
487
+ #pylint: disable=too-many-branches, too-many-statements, too-many-locals
480
488
  # get the pre-update dryad uid in case we need it.
481
489
  self.cursor.execute('SELECT max(uid) FROM dryadStudy WHERE doi = ?',
482
490
  (transfer.dryad.dryadJson['identifier'],))
@@ -612,14 +620,12 @@ class Monitor():
612
620
  for subsequent checking for updates. To query last modification time,
613
621
  use the dataverse2dryad.monitor.Monitor.lastmod attribute.
614
622
 
615
- ----------------------------------------
616
- Parameters:
617
-
623
+ Parameters
624
+ ----------
618
625
  curdate : str
619
- UTC datetime string in the format suitable for the Dryad API.
620
- eg. 2021-01-21T21:42:40Z
621
- or .strftime('%Y-%m-%dT%H:%M:%SZ').
622
- ----------------------------------------
626
+ UTC datetime string in the format suitable for the Dryad API.
627
+ eg. 2021-01-21T21:42:40Z
628
+ or .strftime('%Y-%m-%dT%H:%M:%SZ').
623
629
  '''
624
630
  #Dryad API uses Zulu time
625
631
  if not curdate: