pyxetabase 4.0.0.dev56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxetabase might be problematic. Click here for more details.

Files changed (36) hide show
  1. pyxetabase/__init__.py +0 -0
  2. pyxetabase/commons.py +347 -0
  3. pyxetabase/exceptions.py +8 -0
  4. pyxetabase/opencga_client.py +344 -0
  5. pyxetabase/opencga_config.py +211 -0
  6. pyxetabase/rest_clients/__init__.py +0 -0
  7. pyxetabase/rest_clients/_parent_rest_clients.py +144 -0
  8. pyxetabase/rest_clients/admin_client.py +179 -0
  9. pyxetabase/rest_clients/alignment_client.py +373 -0
  10. pyxetabase/rest_clients/clinical_analysis_client.py +1216 -0
  11. pyxetabase/rest_clients/cohort_client.py +349 -0
  12. pyxetabase/rest_clients/cvdb_client.py +2285 -0
  13. pyxetabase/rest_clients/disease_panel_client.py +345 -0
  14. pyxetabase/rest_clients/family_client.py +355 -0
  15. pyxetabase/rest_clients/federation_client.py +133 -0
  16. pyxetabase/rest_clients/file_client.py +710 -0
  17. pyxetabase/rest_clients/ga4gh_client.py +86 -0
  18. pyxetabase/rest_clients/individual_client.py +435 -0
  19. pyxetabase/rest_clients/job_client.py +416 -0
  20. pyxetabase/rest_clients/meta_client.py +85 -0
  21. pyxetabase/rest_clients/organization_client.py +216 -0
  22. pyxetabase/rest_clients/project_client.py +128 -0
  23. pyxetabase/rest_clients/sample_client.py +446 -0
  24. pyxetabase/rest_clients/study_client.py +462 -0
  25. pyxetabase/rest_clients/user_client.py +212 -0
  26. pyxetabase/rest_clients/user_tool_client.py +471 -0
  27. pyxetabase/rest_clients/variant_client.py +1378 -0
  28. pyxetabase/rest_clients/variant_operation_client.py +718 -0
  29. pyxetabase/rest_clients/workflow_client.py +263 -0
  30. pyxetabase/rest_response.py +220 -0
  31. pyxetabase/retry.py +57 -0
  32. pyxetabase-4.0.0.dev56.dist-info/METADATA +159 -0
  33. pyxetabase-4.0.0.dev56.dist-info/RECORD +36 -0
  34. pyxetabase-4.0.0.dev56.dist-info/WHEEL +5 -0
  35. pyxetabase-4.0.0.dev56.dist-info/licenses/LICENSE +202 -0
  36. pyxetabase-4.0.0.dev56.dist-info/top_level.txt +1 -0
pyxetabase/__init__.py ADDED
File without changes
pyxetabase/commons.py ADDED
@@ -0,0 +1,347 @@
1
+ import sys
2
+ import threading
3
+ from time import sleep
4
+ import warnings
5
+
6
+ import requests
7
+
8
+ from pyxetabase.exceptions import OpencgaInvalidToken, OpencgaAuthorisationError
9
+
10
+ try:
11
+ from Queue import Queue
12
+ except ImportError:
13
+ from queue import Queue
14
+
15
+ _CALL_BATCH_SIZE = 2000
16
+ _NUM_THREADS_DEFAULT = 4
17
+
18
+
19
+ def deprecated(func):
20
+ """Prints a warning for functions marked as deprecated"""
21
+ def new_func(*args, **kwargs):
22
+ warnings.simplefilter('always', DeprecationWarning) # turn off filter
23
+ warnings.warn('Call to deprecated function "{}".'.format(func.__name__),
24
+ category=DeprecationWarning, stacklevel=2)
25
+ warnings.simplefilter('default', DeprecationWarning) # reset filter
26
+ return func(*args, **kwargs)
27
+ return new_func
28
+
29
+
30
+ def snake_to_camel_case(text):
31
+ """Converts snake_case to camelCase"""
32
+ components = text.split('_')
33
+ return components[0] + ''.join(x.title() for x in components[1:])
34
+
35
+
36
+ def _create_rest_url(host, version, sid, category, resource, subcategory=None, query_id=None,
37
+ second_query_id=None, options=None):
38
+ """Creates the URL for querying the REST service"""
39
+
40
+ # Creating the basic URL
41
+ url = ('/'.join([host,
42
+ 'webservices/rest',
43
+ version,
44
+ category
45
+ ]))
46
+
47
+ # If subcategory is queried, query_id can be absent
48
+ if query_id is not None:
49
+ url += '/' + query_id
50
+
51
+ if subcategory is not None:
52
+ url += '/' + subcategory
53
+
54
+ if second_query_id is not None:
55
+ url += '/' + second_query_id
56
+
57
+ url += '/' + resource
58
+
59
+ header = {"Accept-Encoding": "gzip"}
60
+ if sid is not None:
61
+ header['Authorization'] = 'Bearer {}'.format(sid)
62
+
63
+ # Checking optional params
64
+ if options is not None:
65
+ opts = []
66
+ for k, v in options.items():
67
+ k = snake_to_camel_case(k)
68
+ if k == 'debug':
69
+ continue
70
+ if isinstance(v, list):
71
+ opts.append(k + '=' + ','.join(map(str, v)))
72
+ else:
73
+ opts.append(k + '=' + str(v))
74
+ if opts:
75
+ url += '?' + '&'.join(opts)
76
+ return url, header
77
+
78
+
79
+ def _fetch(config, sid, category, resource, method, subcategory=None, query_id=None,
80
+ second_query_id=None, data=None, options=None):
81
+ """Queries the REST service retrieving results until exhaustion or limit"""
82
+ # HERE BE DRAGONS
83
+ final_response = None
84
+
85
+ # Setting up skip and limit default parameters
86
+ call_skip = 0
87
+ call_limit = 1000
88
+ max_limit = None
89
+ if options is None:
90
+ opts = {'skip': call_skip, 'limit': call_limit}
91
+ else:
92
+ opts = options.copy() # Do not modify original data!
93
+ if 'skip' not in opts:
94
+ opts['skip'] = call_skip
95
+ # If 'limit' is specified, a maximum of 'limit' results will be returned
96
+ if 'limit' in opts:
97
+ max_limit = int(opts['limit'])
98
+ # Server must be always queried for results in groups of 1000
99
+ opts['limit'] = call_limit
100
+
101
+ # If there is a query_id, the next variables will be used
102
+ total_id_list = [] # All initial ids
103
+ next_id_list = [] # Ids which should be queried again for more results
104
+ next_id_indexes = [] # Ids position in the final response
105
+ if query_id is not None:
106
+ total_id_list = query_id.split(',')
107
+
108
+ # If some query has more than 'call_limit' results, the server will be
109
+ # queried again to retrieve the next 'call_limit results'
110
+ call = True
111
+ current_query_id = None # Current REST query
112
+ current_id_list = None # Current list of ids
113
+ time_out_counter = 0 # Number of times a query is repeated due to time-out
114
+ while call:
115
+ # Check 'limit' parameter if there is a maximum limit of results
116
+ if max_limit is not None and max_limit <= call_limit:
117
+ opts['limit'] = max_limit
118
+
119
+ # Updating query_id and list of ids to query
120
+ if query_id is not None:
121
+ if current_query_id is None:
122
+ current_query_id = query_id
123
+ current_id_list = total_id_list
124
+ current_id_indexes = range(len(total_id_list))
125
+ else:
126
+ current_query_id = ','.join(next_id_list)
127
+ current_id_list = next_id_list
128
+ current_id_indexes = next_id_indexes
129
+
130
+ # Retrieving url
131
+ url, header = _create_rest_url(host=config.host,
132
+ version=config.version,
133
+ category=category,
134
+ sid=sid,
135
+ subcategory=subcategory,
136
+ query_id=current_query_id,
137
+ second_query_id=second_query_id,
138
+ resource=resource,
139
+ options=opts)
140
+
141
+ # DEBUG param
142
+ if opts is not None and 'debug' in opts and opts['debug']:
143
+ sys.stderr.write(url + '\n')
144
+
145
+ # Getting REST response
146
+ if method == 'get':
147
+ try:
148
+ r = requests.get(url, headers=header, cookies=config.cookies, verify=not config.tlsAllowInvalidCertificates)
149
+ except requests.exceptions.ConnectionError:
150
+ sleep(1)
151
+ r = requests.get(url, headers=header, cookies=config.cookies, verify=not config.tlsAllowInvalidCertificates)
152
+ elif method == 'post':
153
+ try:
154
+ r = requests.post(url, json=data, headers=header, cookies=config.cookies, verify=not config.tlsAllowInvalidCertificates)
155
+ except requests.exceptions.ConnectionError:
156
+ sleep(1)
157
+ r = requests.post(url, json=data, headers=header, cookies=config.cookies, verify=not config.tlsAllowInvalidCertificates)
158
+ elif method == 'delete':
159
+ try:
160
+ r = requests.delete(url, headers=header, cookies=config.cookies, verify=not config.tlsAllowInvalidCertificates)
161
+ except requests.exceptions.ConnectionError:
162
+ sleep(1)
163
+ r = requests.delete(url, headers=header, cookies=config.cookies, verify=not config.tlsAllowInvalidCertificates)
164
+ else:
165
+ raise NotImplementedError('method: ' + method + ' not implemented.')
166
+
167
+ if r.status_code == 504: # Gateway Time-out
168
+ if time_out_counter == 99:
169
+ msg = 'Server not responding in time'
170
+ raise requests.ConnectionError(msg)
171
+ time_out_counter += 1
172
+ continue
173
+ time_out_counter = 0
174
+
175
+ if r.status_code == 401:
176
+ raise OpencgaInvalidToken(r.content)
177
+ elif r.status_code == 403:
178
+ raise OpencgaAuthorisationError(r.content)
179
+ elif r.status_code != 200:
180
+ raise Exception(r.content)
181
+
182
+ if r.headers['Content-Type'] == 'application/json':
183
+ try:
184
+ response = r.json()
185
+
186
+ # TODO Remove deprecated response and result in future release. Added for backwards compatibility
187
+ if 'response' in response:
188
+ response['responses'] = response['response']
189
+ for query_result in response['responses']:
190
+ if 'result' in query_result:
191
+ query_result['results'] = query_result['result']
192
+
193
+ except ValueError:
194
+ raise ValueError('Bad JSON format retrieved from server')
195
+ elif r.headers['Content-Type'] == 'application/octet-stream':
196
+ return r.content
197
+ else:
198
+ raise ValueError('Unexpected content type retrieved from server ("{}"): "{}"'.format(
199
+ r.headers['Content-Type'], r.content)
200
+ )
201
+
202
+ # Setting up final_response
203
+ if final_response is None:
204
+ final_response = response
205
+ # Concatenating results
206
+ else:
207
+ if query_id is not None:
208
+ for index, res in enumerate(response['responses']):
209
+ id_index = current_id_indexes[index]
210
+ final_response[id_index]['results'] += res['results']
211
+ else:
212
+ final_response['responses'][0]['results'] += response['responses'][0]['results']
213
+
214
+ if query_id is not None:
215
+ # Checking which ids are completely retrieved
216
+ next_id_list = []
217
+ next_id_indexes = []
218
+ for index, res in enumerate(response['responses']):
219
+ if res['numResults'] == call_limit:
220
+ next_id_list.append(current_id_list[index])
221
+ next_id_indexes.append(current_id_indexes[index])
222
+ # Ending REST calling when there are no more ids to retrieve
223
+ if not next_id_list:
224
+ call = False
225
+ else:
226
+ # Ending REST calling when there are no more results to retrieve
227
+ if response['responses'][0]['numResults'] != call_limit:
228
+ call = False
229
+
230
+ # Skipping the first 'limit' results to retrieve the next ones
231
+ opts['skip'] += call_limit
232
+
233
+ # Subtracting the number of returned results from the maximum goal
234
+ if max_limit is not None:
235
+ max_limit -= call_limit
236
+ # When 'limit' is 0 returns all the results. So, break the loop if 0
237
+ if max_limit == 0:
238
+ break
239
+
240
+ return final_response
241
+
242
+
243
+ def _worker(queue, results, config, sid, category, resource, method, subcategory=None,
244
+ second_query_id=None, data=None, options=None):
245
+
246
+ """Manages the queue system for the threads"""
247
+ while True:
248
+ # Fetching new element from the queue
249
+ index, query_id = queue.get()
250
+ response = _fetch(config=config, sid=sid, category=category, subcategory=subcategory,
251
+ resource=resource, method=method, data=data, query_id=query_id,
252
+ second_query_id=second_query_id, options=options)
253
+ # Store data in results at correct index
254
+ results[index] = response
255
+ # Signaling to the queue that task has been processed
256
+ queue.task_done()
257
+
258
+
259
+ def merge_query_responses(query_response_list):
260
+ final_response = query_response_list[0]
261
+ for i, query_response in enumerate(query_response_list):
262
+ if i != 0:
263
+ final_response['events'] += query_response['events']
264
+ final_response['time'] += query_response['time']
265
+ # final_response['responses'] += response['responses']
266
+
267
+ for key in query_response['params']:
268
+ if final_response['params'][key] != query_response['params'][key]:
269
+ final_response['params'][key] += ',' + query_response['params'][key]
270
+
271
+ for j, query_result in enumerate(query_response['responses']):
272
+ if len(final_response['responses'])-1 < j:
273
+ final_response['responses'] += []
274
+ for key in query_result:
275
+ if key not in final_response['responses'][j]:
276
+ final_response['responses'][j][key] = query_result[key]
277
+ else:
278
+ if isinstance(query_result[key], (int, list)):
279
+ final_response['responses'][j][key] += query_result[key]
280
+ return final_response
281
+
282
+
283
+ def execute(config, sid, category, resource, method, subcategory=None, query_id=None,
284
+ second_query_id=None, data=None, options=None):
285
+ """Queries the REST service using multiple threads if needed"""
286
+
287
+ # If query_id is an array, convert to comma-separated string
288
+ if query_id is not None:
289
+ if isinstance(query_id, list):
290
+ query_id = ','.join([str(item) for item in query_id])
291
+ else:
292
+ query_id = str(query_id) # convert to string so we can call this method with int ids
293
+
294
+ # Multithread if the number of queries is greater than _CALL_BATCH_SIZE
295
+ if query_id is None or len(query_id.split(',')) <= _CALL_BATCH_SIZE:
296
+ response = _fetch(config=config, sid=sid, category=category, subcategory=subcategory,
297
+ resource=resource, method=method, data=data, query_id=query_id,
298
+ second_query_id=second_query_id, options=options)
299
+ return response
300
+ else:
301
+ if options is not None and 'num_threads' in options:
302
+ num_threads = options['num_threads']
303
+ else:
304
+ num_threads = _NUM_THREADS_DEFAULT
305
+
306
+ # Splitting query_id into batches depending on the call batch size
307
+ id_list = query_id.split(',')
308
+ id_batches = [','.join(id_list[x:x + _CALL_BATCH_SIZE])
309
+ for x in range(0, len(id_list), _CALL_BATCH_SIZE)]
310
+
311
+ # Setting up the queue to hold all the id batches
312
+ q = Queue(maxsize=0)
313
+ # Creating a size defined list to store thread results
314
+ res = [''] * len(id_batches)
315
+
316
+ # Setting up the threads
317
+ for thread in range(num_threads):
318
+ t = threading.Thread(target=_worker,
319
+ kwargs={'queue': q,
320
+ 'results': res,
321
+ 'config': config,
322
+ 'sid': sid,
323
+ 'category': category,
324
+ 'subcategory': subcategory,
325
+ 'second_query_id': second_query_id,
326
+ 'resource': resource,
327
+ 'method': method,
328
+ 'data': data,
329
+ 'options': options})
330
+ # Setting threads as "daemon" allows main program to exit eventually
331
+ # even if these do not finish correctly
332
+ t.daemon = True
333
+ t.start()
334
+
335
+ # Loading up the queue with index and id batches for each job
336
+ for index, batch in enumerate(id_batches):
337
+ q.put((index, batch)) # Notice this is a tuple
338
+
339
+ # Waiting until the queue has been processed
340
+ q.join()
341
+
342
+ # Joining all the responses into a one final response
343
+ final_query_response = merge_query_responses(res)
344
+
345
+ return final_query_response
346
+
347
+
@@ -0,0 +1,8 @@
1
+ class OpencgaAuthorisationError(Exception):
2
+ def __init__(self, message):
3
+ super(OpencgaAuthorisationError, self).__init__(message)
4
+
5
+
6
+ class OpencgaInvalidToken(Exception):
7
+ def __init__(self, message):
8
+ super(OpencgaInvalidToken, self).__init__(message)