dryad2dataverse 0.7.10__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,30 +1,32 @@
1
1
  '''
2
2
  Dryad to Dataverse utilities. No modules are loaded by default, so
3
3
 
4
- >>> import dryad2dataverse
4
+ `>>> import dryad2dataverse`
5
5
 
6
6
  will work, but will have no effect.
7
7
 
8
8
  Modules included:
9
9
 
10
- dryad2dataverse.constants : "Constants" for all modules. URLs, API keys,
11
- etc are all here.
10
+ * **dryad2dataverse.config** : Configuration for all modules. URLs, API keys,
11
+ etc are all here.
12
+ Base configurations are read out of a yaml file in ./data
12
13
 
13
- dryad2dataverse.serializer : Download and serialize Dryad
14
- JSON to Dataverse JSON.
14
+ * **dryad2dataverse.serializer** : Download and serialize Dryad
15
+ JSON to Dataverse JSON.
15
16
 
16
- dryad2dataverse.transfer : metadata and file transfer
17
- utilities.
17
+ * **dryad2dataverse.transfer** : metadata and file transfer
18
+ utilities.
18
19
 
19
- dryad2dataverse.monitor : Monitoring and database tools
20
- for maintaining a pipeline to Dataverse without unnecessary
21
- downloading and file duplication.
20
+ * **dryad2dataverse.monitor** : Monitoring and database tools
21
+ for maintaining a pipeline to Dataverse without unnecessary
22
+ downloading and file duplication.
22
23
 
23
- dryad2dataverse.exceptions : Custom exceptions.
24
+ * **dryad2dataverse.exceptions** : Custom exceptions.
24
25
  '''
26
+
25
27
  import sys
26
28
 
27
- VERSION = (0, 7, 10)
29
+ VERSION = (0, 8, 0)
28
30
  __version__ = '.'.join([str(x) for x in VERSION])
29
31
  USERAGENT = (f'dryad2dataverse/v{__version__} ({sys.platform.capitalize()}); '
30
32
  f'Python {sys.version[:sys.version.find("(")-1]}')
@@ -0,0 +1,94 @@
1
+ '''
2
+ Handles authentication and bearer tokens using
3
+ Dryad's application ID and secret
4
+ '''
5
+ import datetime
6
+ import logging
7
+ import requests
8
+ from dryad2dataverse import USERAGENT
9
+
10
+ LOGGER = logging.getLogger(__name__)
11
+
12
+ class Token:
13
+ '''
14
+ Self updating bearer token generator
15
+ '''
16
+ def __init__(self, **kwargs):
17
+ '''
18
+ Obtain bearer token
19
+
20
+ Parameters
21
+ ----------
22
+ **kwargs
23
+ Must include required keyword arguments as below
24
+ dry_url : str
25
+ Dryad base url (eg: https://datadryad.org)
26
+ app_id : str
27
+ Dryad application ID
28
+ secret : str
29
+ Application secret
30
+
31
+ Other parameters
32
+ ----------------
33
+ timeout : int
34
+ timeout in seconds
35
+
36
+ '''
37
+ self.kwargs = kwargs
38
+ self.path = '/oauth/token'
39
+ self.data = {'client_id': kwargs['app_id'],
40
+ 'client_secret' : kwargs['secret'],
41
+ 'grant_type': 'client_credentials'}
42
+ self.headers = {'User-agent': USERAGENT,
43
+ 'charset' : 'UTF-8'}
44
+ self.timeout = kwargs.get('timeout', 100)
45
+ self.expiry_time = None
46
+ self.__token_info = None
47
+
48
+ def get_bearer_token(self):
49
+ '''
50
+ Obtain a brand new bearer token
51
+ '''
52
+ try:
53
+ tokenr = requests.post(f"{self.kwargs['dry_url']}{self.path}",
54
+ headers=self.headers,
55
+ data=self.data,
56
+ timeout=self.timeout)
57
+ tokenr.raise_for_status()
58
+ self.__token_info = tokenr.json()
59
+
60
+ except (requests.exceptions.HTTPError,
61
+ requests.exceptions.RequestException) as err:
62
+ LOGGER.exception('HTTP Error:, %s', err)
63
+ raise err
64
+
65
+ def check_token_valid(self)->bool:
66
+ '''
67
+ Checks to see if token is still valid
68
+ '''
69
+ expiry_time = (datetime.datetime.fromtimestamp(self.__token_info['created_at']) +
70
+ datetime.timedelta(seconds=self.__token_info['expires_in']))
71
+ self.expiry_time = expiry_time.strftime('%Y-%m-%dT%H:%M:%SZ')
72
+ if datetime.datetime.now() > expiry_time:
73
+ return False
74
+ return True
75
+
76
+ @property
77
+ def token(self)->str:
78
+ '''
79
+ Return only a valid token
80
+ '''
81
+ if not self.__token_info:
82
+ self.get_bearer_token()
83
+ if not self.check_token_valid():
84
+ self.get_bearer_token()
85
+ return self.__token_info['access_token']
86
+
87
+ @property
88
+ def auth_header(self)->dict:
89
+ '''
90
+ Return valid authorization header
91
+ '''
92
+ return {'Accept' : 'application/json',
93
+ 'Content-Type' : 'application/json',
94
+ 'Authorization' : f'Bearer {self.token}'}
@@ -0,0 +1,180 @@
1
+ '''
2
+ This module contains the information that configures all the parameters
3
+ required to transfer data from Dryad to Dataverse.
4
+
5
+ "Constants" may be a bit strong, but the only constant is the
6
+ presence of change.
7
+ '''
8
+ import logging
9
+ import pathlib
10
+ import importlib.resources
11
+ import sys
12
+
13
+ from typing import Union
14
+ #from requests.packages.urllib3.util.retry import Retry
15
+ #Above causes Pylint error. WHY?
16
+ #Because it's a fake path and just a pointer. See requests source
17
+ from urllib3.util import Retry
18
+ import yaml
19
+
20
+ from dryad2dataverse import USERAGENT
21
+
22
+ LOGGER = logging.getLogger(__name__)
23
+ #Requests session retry strategy in case of bad connections
24
+ #See :https://findwork.dev/blog/
25
+ #advanced-usage-python-requests-timeouts-retries-hooks/#retry-on-failure
26
+ #also
27
+ #https://stackoverflow.com/questions/15431044/
28
+ #can-i-set-max-retries-for-requests-request
29
+ RETRY_STRATEGY = Retry(total=10,
30
+ status_forcelist=[429, 500, 502, 503, 504],
31
+ allowed_methods=['HEAD', 'GET', 'OPTIONS',
32
+ 'POST', 'PUT'],
33
+ backoff_factor=1)
34
+
35
+ #Variable listings from previous versions of this file
36
+ #that are now included in Constants
37
+ #
38
+ ##used in dryad2dataverse.serializer
39
+ #DRYURL = 'https://datadryad.org'
40
+ #TMP = '/tmp'
41
+ #
42
+ ##used in dryad2dataverse.transfer
43
+ #DVURL = 'https://borealisdata.ca'
44
+ #APIKEY = None
45
+ #MAX_UPLOAD = 3221225472 #Max 3GB upload
46
+ #DV_CONTACT_EMAIL = None
47
+ #DV_CONTACT_NAME = None
48
+ #NOTAB = ['.sav', '.por', '.zip', '.csv', '.tsv', '.dta',
49
+ # '.rdata', '.xslx', '.xls']
50
+ #
51
+ ##used in dryad2dataverse.monitor
52
+ #HOME = os.path.expanduser('~')
53
+ #DBASE = pathlib.Path(HOME, 'dryad_dataverse_monitor.sqlite3')
54
+
55
+ class Config(dict):
56
+ '''
57
+ Holds all the information about dryad2dataverse parameters
58
+ '''
59
+ def __init__(self, cpath: Union[pathlib.Path, str]=None,
60
+ fname:str=None,
61
+ force:bool=False):
62
+ '''
63
+ Initalize
64
+
65
+ Parameters
66
+ ----------
67
+ force : bool
68
+ Force writing a new config file
69
+ '''
70
+ self.cpath = cpath
71
+ self.fname = fname
72
+ self.force = force
73
+ self.default_locations = {'ios': '~/.config/dryad2dataverse',
74
+ 'linux' : '~/.config/dryad2dataverse',
75
+ 'darwin': '~/Library/Application Support/dryad2dataverse',
76
+ 'win32' : 'AppData/Roaming/dryad2dataverse',
77
+ 'cygwin' : '~/.config/dryad2dataverse'}
78
+
79
+ #Use read() instead of yaml.safe_load.read_text() so that
80
+ #comments are preserved
81
+ with open(importlib.resources.files(
82
+ 'dryad2dataverse.data').joinpath(
83
+ 'dryad2dataverse_config.yml'), mode='r',
84
+ encoding='utf-8') as w:
85
+ self.template = w.read()
86
+
87
+ if not self.cpath:
88
+ self.cpath = self.default_locations[sys.platform]
89
+ if not self.fname:
90
+ self.fname = 'dryad2dataverse_config.yml'
91
+ self.configfile = pathlib.Path(self.cpath, self.fname).expanduser()
92
+
93
+ if self.make_config_template():
94
+ self.load_config()
95
+ else:
96
+ raise FileNotFoundError(f'Can\'t find {self.configfile}')
97
+
98
+ @classmethod
99
+ def update_headers(cls,
100
+ inheader:Union[None, dict]=None,
101
+ **kwargs)->dict:
102
+ '''
103
+ Update headers with user agent and token information (if present)
104
+
105
+ Parameters
106
+ ----------
107
+ inheader : dict
108
+ Existing header if present
109
+
110
+ **kwargs
111
+ Keyword arguments, one of which should be 'token' containing
112
+ a dryad2dataverse.auth.Token instance
113
+ '''
114
+ if not kwargs:
115
+ kwargs = {}
116
+ if not inheader:
117
+ inheader = {}
118
+ headers = {'Accept':'application/json',
119
+ 'Content-Type':'application/json'}
120
+ headers.update({'User-agent' : USERAGENT})
121
+ if kwargs.get('token'):
122
+ headers.update(kwargs['token'].auth_header)
123
+ headers.update(inheader)
124
+ return headers
125
+
126
+ def make_config_template(self):
127
+ '''
128
+ Make a default config if one does not exist
129
+ Returns
130
+ -------
131
+ True if created
132
+ False if not
133
+ '''
134
+ if self.configfile.exists() and not self.force:
135
+ return 1
136
+ if not self.configfile.parent.exists():
137
+ self.configfile.parent.mkdir(parents=True)
138
+ with open(self.configfile, 'w', encoding='utf-8') as f:
139
+ f.write(self.template)
140
+ if self.configfile.exists():
141
+ return 1
142
+ return 0
143
+
144
+ def load_config(self):
145
+ '''
146
+ Loads the config to a dict
147
+ '''
148
+ try:
149
+ with open(self.configfile, 'r', encoding='utf-8') as f:
150
+ self.update(yaml.safe_load(f))
151
+ except yaml.YAMLError as e:
152
+ LOGGER.exception('Unable to load config file, %s', e)
153
+ sys.exit()
154
+
155
+ def overwrite(self):
156
+ '''
157
+ Overwrite the config file with current contents.
158
+
159
+ Note that this will remove the comments from the YAML file.
160
+ '''
161
+ with open(self.configfile, 'w', encoding='utf-8') as w:
162
+ yaml.safe_dump(self, w)
163
+
164
+ def validate(self):
165
+ '''
166
+ Ensure all keys have values
167
+ '''
168
+ can_be_false = ['force_unlock', 'test_mode']
169
+ badkey = [k for k, v in self.items() if not v]
170
+ for rm in can_be_false:
171
+ badkey.remove(rm)#It can be false
172
+ listkeys = {k:v for k,v in self.items() if isinstance(v, list)}
173
+ for k, v in listkeys.items():
174
+ for sub_v in v:
175
+ if not sub_v:
176
+ badkey.append(k)
177
+ break
178
+ if badkey:
179
+ raise ValueError('Null values in configuration. '
180
+ f'See:\n{"\n".join([str(_) for _ in badkey])}')
@@ -0,0 +1,126 @@
1
+ #Sample configuration file dryad2dataverse
2
+ #It will *not* work unless you fill it in, because both
3
+ #Dryad and Dataverse require user information.
4
+
5
+ #------
6
+ #Dryad configuration
7
+ #------
8
+ #Dryad base URL
9
+ dry_url: https://datadryad.org
10
+ #API path
11
+ api_path: /api/v2
12
+ #Application ID (contact Dryad to get an institutional account)
13
+ app_id: null
14
+ #Secret key, should have come with your application ID.
15
+ secret: null
16
+
17
+ #------
18
+ #Dataverse configuration
19
+ #------
20
+ #Base url of Dataverse instance (eg: https://borealisdata.ca)
21
+ dv_url: null
22
+ #Dataverse API KEY
23
+ apikey: null
24
+ #Maximum upload size in bytes (contact Dataverse administrator for value if unknown)
25
+ max_upload: 3221225472
26
+ #Contact email address for Dataverse record, eg: research.data@test.invalid
27
+ dv_contact_email: null
28
+ #Contact name associated with the address (like, say, "[University] Research Data Services")
29
+ dv_contact_name: null
30
+ #Dataverse target collection shortname
31
+ target: Null
32
+ #To stop conversion to tabular data, add extensions here. Tabular processing can cause
33
+ #problems and the original files were not processed that way. It is recommended to
34
+ #keep this as is and add more if required.
35
+ notab:
36
+ - .sav
37
+ - .por
38
+ - .zip
39
+ - .csv
40
+ - .tsv
41
+ - .dta
42
+ - .rdata
43
+ - .xslx
44
+ - .xls
45
+
46
+ #------
47
+ #Monitoring configuration
48
+ #------
49
+ #Location of persistent database which tracks transfers over time.
50
+ #If you ever move the database, you must change this to the new location or everything will be transferred again
51
+ dbase: ~/dryad_dataverse_monitor.sqlite3
52
+
53
+ #------
54
+ #Transfer information
55
+ #------
56
+ #Institutional ROR. Find your ROR here: https://ror.org/search
57
+ ror: null
58
+
59
+ #Location of temporarily downloaded files. This doesn't default to the normal
60
+ #temp file location because the files can be gigantic, and so is manually specified
61
+ tempfile_location: /tmp
62
+
63
+ #Email address which sends update notifications.
64
+ #Note, OATH2 is not supported. Yahoo is free
65
+ #and you may as well use it
66
+ sending_email: null
67
+ #Account username. Check provider for details
68
+ sending_email_username: null
69
+ #Account password. Check provider for details; may be different than
70
+ #an ordinary account if using an application
71
+ email_send_password: null
72
+ #SMTP server configuration
73
+ smtp_server: smtp.mail.yahoo.com
74
+ #Mail is sent using SSL; check with provider for details
75
+ ssl_port: 465
76
+ #List of email addresses that will receive notifications
77
+ recipients:
78
+ - null
79
+ #location of dryadd log
80
+ #include full file name: eg: /var/log/dryadd.log
81
+ log: null
82
+
83
+ #level at which to write a log message. Select from:
84
+ # debug, info, warning, error or critical
85
+ loglevel: warning
86
+ #level at which to send an email message about problems.
87
+ #Same levels as above, obviously.
88
+ email_loglevel: warning
89
+
90
+ #Forcible file unlock. Forcible file unlocking requires admin privileges in Dataverse.
91
+ #Normally you wouldn't need to change this.
92
+ force_unlock: false
93
+ #Number of database backups to keep
94
+ number_of_backups: 3
95
+
96
+ #------
97
+ #Troubleshooting options
98
+ #------
99
+ #Warn if too many new updates. Occasionally, Dryad will change their
100
+ #"persistent" IDs and then everything looks new, which causes everything
101
+ #to be loaded again. It's recommended that this be "true" to stop an accidental
102
+ #complete reingest
103
+ warn_too_many: true
104
+ #Number of new Dryad surveys which will trigger a warning and stop execution.
105
+ #This is to prevent accidentally ingesting thousands of surveys if you
106
+ #misconfigure something
107
+ warning_threshold: 15
108
+ #Force dryadd into test mode
109
+ test_mode: false
110
+ #Test mode - only transfer first [n] of the total number of (new) records.
111
+ #Old ones will still be updated, though
112
+ test_mode_limit: 5
113
+
114
+
115
+ #------
116
+ #Exclusion list
117
+ #------
118
+ #Dryad DOIs to exclude from transfers. This is usually because the files in the
119
+ #study are too large to be ingested into Dataverse, but may also be used for
120
+ #studies with errors or any other reason
121
+ #
122
+ #IMPORTANT!
123
+ #
124
+ #Uncomment below and add dois in place of null, one per line.
125
+ #exclude_list:
126
+ #- null
@@ -12,9 +12,13 @@ class SSLSMTPHandler(SMTPHandler):
12
12
  An SSL handler for logging.handlers
13
13
  '''
14
14
  def emit(self, record:logging.LogRecord):
15
- """
15
+ '''
16
16
  Emit a record while using an SSL mail server.
17
- """
17
+
18
+ Parameters
19
+ ----------
20
+ record : logging.LogRecord
21
+ '''
18
22
  #Praise be to
19
23
  #https://stackoverflow.com/questions/36937461/
20
24
  #how-can-i-send-an-email-using-python-loggings-