cloudos-cli 2.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloudos_cli/__init__.py +11 -0
- cloudos_cli/__main__.py +1297 -0
- cloudos_cli/_version.py +1 -0
- cloudos_cli/clos.py +726 -0
- cloudos_cli/jobs/__init__.py +8 -0
- cloudos_cli/jobs/job.py +555 -0
- cloudos_cli/queue/__init__.py +8 -0
- cloudos_cli/queue/queue.py +139 -0
- cloudos_cli/utils/__init__.py +9 -0
- cloudos_cli/utils/errors.py +32 -0
- cloudos_cli/utils/requests.py +75 -0
- cloudos_cli-2.17.0.dist-info/LICENSE +674 -0
- cloudos_cli-2.17.0.dist-info/METADATA +1060 -0
- cloudos_cli-2.17.0.dist-info/RECORD +41 -0
- cloudos_cli-2.17.0.dist-info/WHEEL +5 -0
- cloudos_cli-2.17.0.dist-info/entry_points.txt +2 -0
- cloudos_cli-2.17.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/functions_for_pytest.py +7 -0
- tests/test_clos/__init__.py +0 -0
- tests/test_clos/test_create_cromwell_header.py +35 -0
- tests/test_clos/test_cromwell_switch.py +77 -0
- tests/test_clos/test_detect_workflow.py +47 -0
- tests/test_clos/test_get_cromwell_status.py +77 -0
- tests/test_clos/test_get_curated_workflow_list.py +72 -0
- tests/test_clos/test_get_job_list.py +79 -0
- tests/test_clos/test_get_job_status.py +75 -0
- tests/test_clos/test_get_project_list.py +74 -0
- tests/test_clos/test_get_user_info.py +68 -0
- tests/test_clos/test_get_workflow_list.py +87 -0
- tests/test_clos/test_is_module.py +48 -0
- tests/test_clos/test_process_job_list.py +74 -0
- tests/test_clos/test_process_project_list.py +36 -0
- tests/test_clos/test_process_workflow_list.py +36 -0
- tests/test_clos/test_wait_job_completion.py +40 -0
- tests/test_clos/test_workflow_import.py +77 -0
- tests/test_jobs/__init__.py +0 -0
- tests/test_jobs/test_convert_nextflow_to_json.py +104 -0
- tests/test_jobs/test_project_id.py +67 -0
- tests/test_jobs/test_send_job.py +84 -0
- tests/test_jobs/test_workflow_id.py +67 -0
cloudos_cli/clos.py
ADDED
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is the main class of the package.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import time
|
|
7
|
+
import json
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from cloudos_cli.utils.errors import BadRequestException
|
|
10
|
+
from cloudos_cli.utils.requests import retry_requests_get, retry_requests_post
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
# GLOBAL VARS
|
|
14
|
+
JOB_COMPLETED = 'completed'
|
|
15
|
+
JOB_FAILED = 'failed'
|
|
16
|
+
JOB_ABORTED = 'aborted'
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Cloudos:
|
|
21
|
+
"""A simple class to contain the required connection information.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
cloudos_url : string
|
|
26
|
+
The CloudOS service url.
|
|
27
|
+
apikey : string
|
|
28
|
+
Your CloudOS API key.
|
|
29
|
+
cromwell_token : string
|
|
30
|
+
Cromwell server token. If None, apikey will be used instead.
|
|
31
|
+
"""
|
|
32
|
+
cloudos_url: str
|
|
33
|
+
apikey: str
|
|
34
|
+
cromwell_token: str
|
|
35
|
+
|
|
36
|
+
def get_job_status(self, j_id, verify=True):
|
|
37
|
+
"""Get job status from CloudOS.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
j_id : string
|
|
42
|
+
The CloudOS job id of the job just launched.
|
|
43
|
+
verify: [bool|string]
|
|
44
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
45
|
+
a string is passed, it will be interpreted as the path to
|
|
46
|
+
the SSL certificate file.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
r : requests.models.Response
|
|
51
|
+
The server response
|
|
52
|
+
"""
|
|
53
|
+
cloudos_url = self.cloudos_url
|
|
54
|
+
apikey = self.apikey
|
|
55
|
+
headers = {
|
|
56
|
+
"Content-type": "application/json",
|
|
57
|
+
"apikey": apikey
|
|
58
|
+
}
|
|
59
|
+
r = retry_requests_get("{}/api/v1/jobs/{}".format(cloudos_url,
|
|
60
|
+
j_id),
|
|
61
|
+
headers=headers, verify=verify)
|
|
62
|
+
if r.status_code >= 400:
|
|
63
|
+
raise BadRequestException(r)
|
|
64
|
+
return r
|
|
65
|
+
|
|
66
|
+
def wait_job_completion(self, job_id, wait_time=3600, request_interval=30, verbose=False,
|
|
67
|
+
verify=True):
|
|
68
|
+
"""Checks job status from CloudOS and wait for its complation.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
j_id : string
|
|
73
|
+
The CloudOS job id of the job just launched.
|
|
74
|
+
wait_time : int
|
|
75
|
+
Max time to wait (in seconds) to job completion.
|
|
76
|
+
request_interval : int
|
|
77
|
+
Time interval (in seconds) to request job status.
|
|
78
|
+
verbose : bool
|
|
79
|
+
Whether to output status on every request or not.
|
|
80
|
+
verify: [bool|string]
|
|
81
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
82
|
+
a string is passed, it will be interpreted as the path to
|
|
83
|
+
the SSL certificate file.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
: dict
|
|
88
|
+
A dict with three elements collected from the job status: 'name', 'id', 'status'.
|
|
89
|
+
"""
|
|
90
|
+
j_url = f'{self.cloudos_url}/app/jobs/{job_id}'
|
|
91
|
+
elapsed = 0
|
|
92
|
+
j_status_h_old = ''
|
|
93
|
+
# make sure user doesn't surpass the wait time
|
|
94
|
+
if request_interval > wait_time:
|
|
95
|
+
request_interval = wait_time
|
|
96
|
+
while elapsed < wait_time:
|
|
97
|
+
j_status = self.get_job_status(job_id, verify)
|
|
98
|
+
j_status_content = json.loads(j_status.content)
|
|
99
|
+
j_status_h = j_status_content["status"]
|
|
100
|
+
j_name = j_status_content["name"]
|
|
101
|
+
if j_status_h == JOB_COMPLETED:
|
|
102
|
+
if verbose:
|
|
103
|
+
print(f'\tYour job "{j_name}" (ID: {job_id}) took {elapsed} seconds to complete ' +
|
|
104
|
+
'successfully.')
|
|
105
|
+
return {'name': j_name, 'id': job_id, 'status': j_status_h}
|
|
106
|
+
elif j_status_h == JOB_FAILED:
|
|
107
|
+
if verbose:
|
|
108
|
+
print(f'\tYour job "{j_name}" (ID: {job_id}) took {elapsed} seconds to fail.')
|
|
109
|
+
return {'name': j_name, 'id': job_id, 'status': j_status_h}
|
|
110
|
+
elif j_status_h == JOB_ABORTED:
|
|
111
|
+
if verbose:
|
|
112
|
+
print(f'\tYour job "{j_name}" (ID: {job_id}) took {elapsed} seconds to abort.')
|
|
113
|
+
return {'name': j_name, 'id': job_id, 'status': j_status_h}
|
|
114
|
+
else:
|
|
115
|
+
elapsed += request_interval
|
|
116
|
+
if j_status_h != j_status_h_old:
|
|
117
|
+
if verbose:
|
|
118
|
+
print(f'\tYour current job "{j_name}" (ID: {job_id}) status is: {j_status_h}.')
|
|
119
|
+
j_status_h_old = j_status_h
|
|
120
|
+
time.sleep(request_interval)
|
|
121
|
+
j_status = self.get_job_status(job_id, verify)
|
|
122
|
+
j_status_content = json.loads(j_status.content)
|
|
123
|
+
j_status_h = j_status_content["status"]
|
|
124
|
+
j_name = j_status_content["name"]
|
|
125
|
+
if j_status_h != JOB_COMPLETED and verbose:
|
|
126
|
+
print(f'\tYour current job "{j_name}" (ID: {job_id}) status is: {j_status_h}. The ' +
|
|
127
|
+
f'selected wait-time of {wait_time} was exceeded. Please, ' +
|
|
128
|
+
'consider to set a longer wait-time.')
|
|
129
|
+
print('\tTo further check your job status you can either go to ' +
|
|
130
|
+
f'{j_url} or use the following command:\n' +
|
|
131
|
+
'\tcloudos job status \\\n' +
|
|
132
|
+
'\t\t--apikey $MY_API_KEY \\\n' +
|
|
133
|
+
f'\t\t--cloudos-url {self.cloudos_url} \\\n' +
|
|
134
|
+
f'\t\t--job-id {job_id}\n')
|
|
135
|
+
return {'name': j_name, 'id': job_id, 'status': j_status_h}
|
|
136
|
+
|
|
137
|
+
def _create_cromwell_header(self):
|
|
138
|
+
"""Generates cromwell header.
|
|
139
|
+
|
|
140
|
+
This methods is responsible for using personal API key instead of
|
|
141
|
+
specific Cromwell API when the later is not provided.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
headers : dict
|
|
146
|
+
The correct headers based on using cromwell specific token or
|
|
147
|
+
personal API key.
|
|
148
|
+
"""
|
|
149
|
+
if self.cromwell_token is None:
|
|
150
|
+
headers = {
|
|
151
|
+
"Accept": "application/json",
|
|
152
|
+
"apikey": self.apikey
|
|
153
|
+
}
|
|
154
|
+
else:
|
|
155
|
+
headers = {
|
|
156
|
+
"Accept": "application/json",
|
|
157
|
+
"Authorization": f'Bearer {self.cromwell_token}'
|
|
158
|
+
}
|
|
159
|
+
return headers
|
|
160
|
+
|
|
161
|
+
def get_cromwell_status(self, workspace_id, verify=True):
|
|
162
|
+
"""Get Cromwell server status from CloudOS.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
workspace_id : string
|
|
167
|
+
The CloudOS workspace id from to check the Cromwell status.
|
|
168
|
+
verify: [bool|string]
|
|
169
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
170
|
+
a string is passed, it will be interpreted as the path to
|
|
171
|
+
the SSL certificate file.
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
r : requests.models.Response
|
|
176
|
+
The server response
|
|
177
|
+
"""
|
|
178
|
+
cloudos_url = self.cloudos_url
|
|
179
|
+
headers = self._create_cromwell_header()
|
|
180
|
+
r = retry_requests_get("{}/api/v1/cromwell?teamId={}".format(cloudos_url,
|
|
181
|
+
workspace_id),
|
|
182
|
+
headers=headers, verify=verify)
|
|
183
|
+
if r.status_code >= 400:
|
|
184
|
+
raise BadRequestException(r)
|
|
185
|
+
return r
|
|
186
|
+
|
|
187
|
+
def cromwell_switch(self, workspace_id, action, verify=True):
|
|
188
|
+
"""Restart Cromwell server.
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
workspace_id : string
|
|
193
|
+
The CloudOS workspace id in which restart/stop Cromwell status.
|
|
194
|
+
action : string [restart|stop]
|
|
195
|
+
The action to perform.
|
|
196
|
+
verify: [bool|string]
|
|
197
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
198
|
+
a string is passed, it will be interpreted as the path to
|
|
199
|
+
the SSL certificate file.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
r : requests.models.Response
|
|
204
|
+
The server response
|
|
205
|
+
"""
|
|
206
|
+
cloudos_url = self.cloudos_url
|
|
207
|
+
headers = self._create_cromwell_header()
|
|
208
|
+
r = requests.put("{}/api/v1/cromwell/{}?teamId={}".format(cloudos_url,
|
|
209
|
+
action,
|
|
210
|
+
workspace_id),
|
|
211
|
+
headers=headers, verify=verify)
|
|
212
|
+
if r.status_code >= 400:
|
|
213
|
+
raise BadRequestException(r)
|
|
214
|
+
return r
|
|
215
|
+
|
|
216
|
+
def get_job_list(self, workspace_id, last_n_jobs=30, page=1, archived=False,
|
|
217
|
+
verify=True):
|
|
218
|
+
"""Get jobs from a CloudOS workspace.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
workspace_id : string
|
|
223
|
+
The CloudOS workspace id from to collect the jobs.
|
|
224
|
+
last_n_jobs : [int | 'all']
|
|
225
|
+
How many of the last jobs from the user to retrieve. You can specify a
|
|
226
|
+
very large int or 'all' to get all user's jobs.
|
|
227
|
+
page : int
|
|
228
|
+
Response page to get.
|
|
229
|
+
archived : bool
|
|
230
|
+
When True, only the archived jobs are retrieved.
|
|
231
|
+
verify: [bool|string]
|
|
232
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
233
|
+
a string is passed, it will be interpreted as the path to
|
|
234
|
+
the SSL certificate file.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
r : list
|
|
239
|
+
A list of dicts, each corresponding to a jobs from the user and the workspace.
|
|
240
|
+
"""
|
|
241
|
+
headers = {
|
|
242
|
+
"Content-type": "application/json",
|
|
243
|
+
"apikey": self.apikey
|
|
244
|
+
}
|
|
245
|
+
if archived:
|
|
246
|
+
archived_status = "true"
|
|
247
|
+
else:
|
|
248
|
+
archived_status = "false"
|
|
249
|
+
r = retry_requests_get("{}/api/v2/jobs?teamId={}&page={}&archived.status={}".format(
|
|
250
|
+
self.cloudos_url, workspace_id, page, archived_status),
|
|
251
|
+
headers=headers, verify=verify)
|
|
252
|
+
if r.status_code >= 400:
|
|
253
|
+
raise BadRequestException(r)
|
|
254
|
+
content = json.loads(r.content)
|
|
255
|
+
n_jobs = len(content['jobs'])
|
|
256
|
+
if last_n_jobs == 'all':
|
|
257
|
+
jobs_to_get = n_jobs
|
|
258
|
+
elif last_n_jobs > 0:
|
|
259
|
+
jobs_to_get = last_n_jobs - n_jobs
|
|
260
|
+
else:
|
|
261
|
+
raise TypeError("[ERROR] Please select an int > 0 or 'all' for 'last_n_jobs'")
|
|
262
|
+
if jobs_to_get == 0 or n_jobs == 0:
|
|
263
|
+
return content['jobs']
|
|
264
|
+
if jobs_to_get > 0:
|
|
265
|
+
if last_n_jobs == 'all':
|
|
266
|
+
next_to_get = 'all'
|
|
267
|
+
else:
|
|
268
|
+
next_to_get = jobs_to_get
|
|
269
|
+
return content['jobs'] + self.get_job_list(workspace_id, last_n_jobs=next_to_get,
|
|
270
|
+
page=page+1, archived=archived,
|
|
271
|
+
verify=verify)
|
|
272
|
+
if jobs_to_get < 0:
|
|
273
|
+
return content['jobs'][:jobs_to_get]
|
|
274
|
+
|
|
275
|
+
@staticmethod
|
|
276
|
+
def process_job_list(r, all_fields=False):
|
|
277
|
+
"""Process a job list from a self.get_job_list call.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
r : list
|
|
282
|
+
A list of dicts, each corresponding to a job from the user and the workspace.
|
|
283
|
+
all_fields : bool. Default=False
|
|
284
|
+
Whether to return a reduced version of the DataFrame containing
|
|
285
|
+
only the selected columns or the full DataFrame.
|
|
286
|
+
|
|
287
|
+
Returns
|
|
288
|
+
-------
|
|
289
|
+
df : pandas.DataFrame
|
|
290
|
+
A DataFrame with the requested columns from the jobs.
|
|
291
|
+
"""
|
|
292
|
+
COLUMNS = ['_id',
|
|
293
|
+
'team',
|
|
294
|
+
'name',
|
|
295
|
+
'parameters',
|
|
296
|
+
'status',
|
|
297
|
+
'startTime',
|
|
298
|
+
'endTime',
|
|
299
|
+
'createdAt',
|
|
300
|
+
'updatedAt',
|
|
301
|
+
'computeCostSpent',
|
|
302
|
+
'masterInstanceStorageCost',
|
|
303
|
+
'user.id',
|
|
304
|
+
'workflow._id',
|
|
305
|
+
'workflow.name',
|
|
306
|
+
'workflow.description',
|
|
307
|
+
'workflow.createdAt',
|
|
308
|
+
'workflow.updatedAt',
|
|
309
|
+
'workflow.workflowType',
|
|
310
|
+
'project._id',
|
|
311
|
+
'project.name',
|
|
312
|
+
'project.createdAt',
|
|
313
|
+
'project.updatedAt'
|
|
314
|
+
]
|
|
315
|
+
df_full = pd.json_normalize(r)
|
|
316
|
+
if all_fields:
|
|
317
|
+
df = df_full
|
|
318
|
+
else:
|
|
319
|
+
df = df_full.loc[:, COLUMNS]
|
|
320
|
+
return df
|
|
321
|
+
|
|
322
|
+
def get_curated_workflow_list(self, workspace_id, get_all=True, page=1, verify=True):
|
|
323
|
+
"""Get all the curated workflows from a CloudOS workspace.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
workspace_id : string
|
|
328
|
+
The CloudOS workspace id from to collect the workflows.
|
|
329
|
+
get_all : bool
|
|
330
|
+
Whether to get all available curated workflows or just the indicated page.
|
|
331
|
+
page : int
|
|
332
|
+
The page number to retrieve, from the paginated response.
|
|
333
|
+
verify: [bool|string]
|
|
334
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
335
|
+
a string is passed, it will be interpreted as the path to
|
|
336
|
+
the SSL certificate file.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
r : list
|
|
341
|
+
A list of dicts, each corresponding to a workflow.
|
|
342
|
+
"""
|
|
343
|
+
headers = {
|
|
344
|
+
"Content-type": "application/json",
|
|
345
|
+
"apikey": self.apikey
|
|
346
|
+
}
|
|
347
|
+
r = retry_requests_get(
|
|
348
|
+
"{}/api/v3/workflows?search=&groups[]=curated&groups[]=featured&groups[]=predefined&page={}&teamId={}".format(
|
|
349
|
+
self.cloudos_url, page, workspace_id),
|
|
350
|
+
headers=headers, verify=verify)
|
|
351
|
+
if r.status_code >= 400:
|
|
352
|
+
raise BadRequestException(r)
|
|
353
|
+
content = json.loads(r.content)
|
|
354
|
+
if get_all:
|
|
355
|
+
workflows_collected = len(content['workflows'])
|
|
356
|
+
workflows_to_get = content['paginationMetadata']['Pagination-Count']
|
|
357
|
+
if workflows_to_get <= workflows_collected or workflows_collected == 0:
|
|
358
|
+
return content['workflows']
|
|
359
|
+
if workflows_to_get > workflows_collected:
|
|
360
|
+
return content['workflows'] + self.get_curated_workflow_list(workspace_id,
|
|
361
|
+
get_all=True,
|
|
362
|
+
page=page+1,
|
|
363
|
+
verify=verify)
|
|
364
|
+
else:
|
|
365
|
+
return content['workflows']
|
|
366
|
+
|
|
367
|
+
def get_workflow_list(self, workspace_id, verify=True, get_all=True,
|
|
368
|
+
page=1, page_size=10, max_page_size=1000,
|
|
369
|
+
archived_status=False):
|
|
370
|
+
"""Get all the workflows from a CloudOS workspace.
|
|
371
|
+
|
|
372
|
+
Parameters
|
|
373
|
+
----------
|
|
374
|
+
workspace_id : string
|
|
375
|
+
The CloudOS workspace id from to collect the workflows.
|
|
376
|
+
verify : [bool|string]
|
|
377
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
378
|
+
a string is passed, it will be interpreted as the path to
|
|
379
|
+
the SSL certificate file.
|
|
380
|
+
get_all : bool
|
|
381
|
+
Whether to get all available curated workflows or just the
|
|
382
|
+
indicated page.
|
|
383
|
+
page : int
|
|
384
|
+
The page number to retrieve, from the paginated response.
|
|
385
|
+
page_size : int
|
|
386
|
+
The number of workflows by page. From 1 to 1000.
|
|
387
|
+
max_page_size : int
|
|
388
|
+
Max page size defined by the API server. It is currently 1000.
|
|
389
|
+
archived_status : bool
|
|
390
|
+
Whether to retrieve archived workflows or not.
|
|
391
|
+
|
|
392
|
+
Returns
|
|
393
|
+
-------
|
|
394
|
+
r : list
|
|
395
|
+
A list of dicts, each corresponding to a workflow.
|
|
396
|
+
"""
|
|
397
|
+
headers = {
|
|
398
|
+
"Content-type": "application/json",
|
|
399
|
+
"apikey": self.apikey
|
|
400
|
+
}
|
|
401
|
+
archived_status = str(archived_status).lower()
|
|
402
|
+
r = retry_requests_get(
|
|
403
|
+
"{}/api/v3/workflows?teamId={}&pageSize={}&page={}&archived.status={}".format(
|
|
404
|
+
self.cloudos_url, workspace_id, page_size, page, archived_status),
|
|
405
|
+
headers=headers, verify=verify)
|
|
406
|
+
if r.status_code >= 400:
|
|
407
|
+
raise BadRequestException(r)
|
|
408
|
+
content = json.loads(r.content)
|
|
409
|
+
if get_all:
|
|
410
|
+
total_workflows = content['paginationMetadata']['Pagination-Count']
|
|
411
|
+
if total_workflows <= max_page_size:
|
|
412
|
+
r = retry_requests_get(
|
|
413
|
+
"{}/api/v3/workflows?teamId={}&pageSize={}&page={}&archived.status={}".format(
|
|
414
|
+
self.cloudos_url, workspace_id, total_workflows, 1, archived_status),
|
|
415
|
+
headers=headers, verify=verify)
|
|
416
|
+
if r.status_code >= 400:
|
|
417
|
+
raise BadRequestException(r)
|
|
418
|
+
return json.loads(r.content)['workflows']
|
|
419
|
+
else:
|
|
420
|
+
n_pages = (total_workflows // max_page_size) + int((total_workflows % max_page_size) > 0)
|
|
421
|
+
for p in range(n_pages):
|
|
422
|
+
p += 1
|
|
423
|
+
r = retry_requests_get(
|
|
424
|
+
"{}/api/v3/workflows?teamId={}&pageSize={}&page={}&archived.status={}".format(
|
|
425
|
+
self.cloudos_url, workspace_id, max_page_size, p, archived_status),
|
|
426
|
+
headers=headers, verify=verify)
|
|
427
|
+
if r.status_code >= 400:
|
|
428
|
+
raise BadRequestException(r)
|
|
429
|
+
if p == 1:
|
|
430
|
+
all_content = json.loads(r.content)['workflows']
|
|
431
|
+
else:
|
|
432
|
+
all_content += json.loads(r.content)['workflows']
|
|
433
|
+
return all_content
|
|
434
|
+
else:
|
|
435
|
+
return content['workflows']
|
|
436
|
+
|
|
437
|
+
@staticmethod
|
|
438
|
+
def process_workflow_list(r, all_fields=False):
|
|
439
|
+
"""Process a server response from a self.get_workflow_list call.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
r : list
|
|
444
|
+
A list of dicts, each corresponding to a workflow.
|
|
445
|
+
all_fields : bool. Default=False
|
|
446
|
+
Whether to return a reduced version of the DataFrame containing
|
|
447
|
+
only the selected columns or the full DataFrame.
|
|
448
|
+
|
|
449
|
+
Returns
|
|
450
|
+
-------
|
|
451
|
+
df : pandas.DataFrame
|
|
452
|
+
A DataFrame with the requested columns from the workflows.
|
|
453
|
+
"""
|
|
454
|
+
COLUMNS = ['_id',
|
|
455
|
+
'name',
|
|
456
|
+
'archived.status',
|
|
457
|
+
'mainFile',
|
|
458
|
+
'workflowType',
|
|
459
|
+
'group',
|
|
460
|
+
'repository.name',
|
|
461
|
+
'repository.platform',
|
|
462
|
+
'repository.url',
|
|
463
|
+
'repository.isPrivate'
|
|
464
|
+
]
|
|
465
|
+
df_full = pd.json_normalize(r)
|
|
466
|
+
if all_fields:
|
|
467
|
+
df = df_full
|
|
468
|
+
else:
|
|
469
|
+
present_columns = []
|
|
470
|
+
for column in COLUMNS:
|
|
471
|
+
if column in df_full.columns:
|
|
472
|
+
present_columns.append(column)
|
|
473
|
+
df = df_full.loc[:, present_columns]
|
|
474
|
+
return df
|
|
475
|
+
|
|
476
|
+
def detect_workflow(self, workflow_name, workspace_id, verify=True):
|
|
477
|
+
"""Detects workflow type: nextflow or wdl.
|
|
478
|
+
|
|
479
|
+
Parameters
|
|
480
|
+
----------
|
|
481
|
+
workflow_name : string
|
|
482
|
+
Name of the workflow.
|
|
483
|
+
workspace_id : string
|
|
484
|
+
The CloudOS workspace id from to collect the workflows.
|
|
485
|
+
verify: [bool|string]
|
|
486
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
487
|
+
a string is passed, it will be interpreted as the path to
|
|
488
|
+
the SSL certificate file.
|
|
489
|
+
|
|
490
|
+
Returns
|
|
491
|
+
-------
|
|
492
|
+
wt : string ['nextflow'|'wdl']
|
|
493
|
+
The workflow type detected
|
|
494
|
+
"""
|
|
495
|
+
my_workflows_r = self.get_workflow_list(workspace_id, verify=verify)
|
|
496
|
+
my_workflows = self.process_workflow_list(my_workflows_r)
|
|
497
|
+
wt_all = my_workflows.loc[
|
|
498
|
+
(my_workflows['name'] == workflow_name) & (my_workflows['archived.status'] == False),
|
|
499
|
+
'workflowType']
|
|
500
|
+
if len(wt_all) == 0:
|
|
501
|
+
raise ValueError(f'No workflow found with name: {workflow_name}')
|
|
502
|
+
wt = wt_all.unique()
|
|
503
|
+
if len(wt) > 1:
|
|
504
|
+
raise ValueError(f'More than one workflow type detected for {workflow_name}: {wt}')
|
|
505
|
+
return str(wt[0])
|
|
506
|
+
|
|
507
|
+
def is_module(self, workflow_name, workspace_id, verify=True):
|
|
508
|
+
"""Detects whether the workflow is a system module or not.
|
|
509
|
+
|
|
510
|
+
System modules use fixed queues, so this check is important to
|
|
511
|
+
properly manage queue selection.
|
|
512
|
+
|
|
513
|
+
Parameters
|
|
514
|
+
----------
|
|
515
|
+
workflow_name : string
|
|
516
|
+
Name of the workflow.
|
|
517
|
+
workspace_id : string
|
|
518
|
+
The CloudOS workspace id from to collect the workflows.
|
|
519
|
+
verify: [bool|string]
|
|
520
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
521
|
+
a string is passed, it will be interpreted as the path to
|
|
522
|
+
the SSL certificate file.
|
|
523
|
+
|
|
524
|
+
Returns
|
|
525
|
+
-------
|
|
526
|
+
bool
|
|
527
|
+
True, if the workflow is a system module, false otherwise.
|
|
528
|
+
"""
|
|
529
|
+
my_workflows_r = self.get_workflow_list(workspace_id, verify=verify)
|
|
530
|
+
my_workflows = self.process_workflow_list(my_workflows_r)
|
|
531
|
+
group = my_workflows.loc[
|
|
532
|
+
(my_workflows['name'] == workflow_name) & (my_workflows['archived.status'] == False),
|
|
533
|
+
'group']
|
|
534
|
+
if len(group) == 0:
|
|
535
|
+
raise ValueError(f'No workflow found with name: {workflow_name}')
|
|
536
|
+
if len(group) > 1:
|
|
537
|
+
raise ValueError(f'More than one workflow found with name: {workflow_name}')
|
|
538
|
+
module_groups = ['system-tools',
|
|
539
|
+
'data-factory-data-connection-etl',
|
|
540
|
+
'data-factory',
|
|
541
|
+
'data-factory-omics-etl',
|
|
542
|
+
'drug-discovery',
|
|
543
|
+
'data-factory-omics-insights',
|
|
544
|
+
'intermediate'
|
|
545
|
+
]
|
|
546
|
+
if group.values[0] in module_groups:
|
|
547
|
+
return True
|
|
548
|
+
else:
|
|
549
|
+
return False
|
|
550
|
+
|
|
551
|
+
def get_project_list(self, workspace_id, verify=True):
|
|
552
|
+
"""Get all the project from a CloudOS workspace.
|
|
553
|
+
|
|
554
|
+
Parameters
|
|
555
|
+
----------
|
|
556
|
+
workspace_id : string
|
|
557
|
+
The CloudOS workspace id from to collect the projects.
|
|
558
|
+
verify: [bool|string]
|
|
559
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
560
|
+
a string is passed, it will be interpreted as the path to
|
|
561
|
+
the SSL certificate file.
|
|
562
|
+
|
|
563
|
+
Returns
|
|
564
|
+
-------
|
|
565
|
+
r : requests.models.Response
|
|
566
|
+
The server response
|
|
567
|
+
"""
|
|
568
|
+
headers = {
|
|
569
|
+
"Content-type": "application/json",
|
|
570
|
+
"apikey": self.apikey
|
|
571
|
+
}
|
|
572
|
+
r = retry_requests_get("{}/api/v1/projects?teamId={}".format(self.cloudos_url, workspace_id),
|
|
573
|
+
headers=headers, verify=verify)
|
|
574
|
+
if r.status_code >= 400:
|
|
575
|
+
raise BadRequestException(r)
|
|
576
|
+
return r
|
|
577
|
+
|
|
578
|
+
@staticmethod
|
|
579
|
+
def process_project_list(r, all_fields=False):
|
|
580
|
+
"""Process a server response from a self.get_project_list call.
|
|
581
|
+
|
|
582
|
+
Parameters
|
|
583
|
+
----------
|
|
584
|
+
r : requests.models.Response
|
|
585
|
+
The server response. There are two types of responses:
|
|
586
|
+
- A list with 2 elements: 'total' and 'projects', being 'projects' a list of dicts,
|
|
587
|
+
one for each project.
|
|
588
|
+
- A list of dicts, one for each project.
|
|
589
|
+
all_fields : bool. Default=False
|
|
590
|
+
Whether to return a reduced version of the DataFrame containing
|
|
591
|
+
only the selected columns or the full DataFrame.
|
|
592
|
+
|
|
593
|
+
Returns
|
|
594
|
+
-------
|
|
595
|
+
df : pandas.DataFrame
|
|
596
|
+
A DataFrame with the requested columns from the projects.
|
|
597
|
+
"""
|
|
598
|
+
COLUMNS = ['_id',
|
|
599
|
+
'name',
|
|
600
|
+
'user.id',
|
|
601
|
+
'user.name',
|
|
602
|
+
'user.surname',
|
|
603
|
+
'user.email',
|
|
604
|
+
'createdAt',
|
|
605
|
+
'updatedAt',
|
|
606
|
+
'workflowCount',
|
|
607
|
+
'jobCount',
|
|
608
|
+
'notebookSessionCount'
|
|
609
|
+
]
|
|
610
|
+
my_projects = json.loads(r.content)
|
|
611
|
+
if 'projects' in my_projects:
|
|
612
|
+
my_projects = my_projects['projects']
|
|
613
|
+
df_full = pd.json_normalize(my_projects)
|
|
614
|
+
if all_fields:
|
|
615
|
+
df = df_full
|
|
616
|
+
else:
|
|
617
|
+
df = df_full.loc[:, COLUMNS]
|
|
618
|
+
return df
|
|
619
|
+
|
|
620
|
+
def workflow_import(self, workspace_id, workflow_url, workflow_name,
|
|
621
|
+
repository_project_id, workflow_docs_link='',
|
|
622
|
+
repository_id=None, verify=True):
|
|
623
|
+
"""Imports workflows to CloudOS.
|
|
624
|
+
|
|
625
|
+
Parameters
|
|
626
|
+
----------
|
|
627
|
+
workspace_id : string
|
|
628
|
+
The CloudOS workspace id from to collect the projects.
|
|
629
|
+
workflow_url : string
|
|
630
|
+
The URL of the workflow. Only Github or Bitbucket are allowed.
|
|
631
|
+
workflow_name : string
|
|
632
|
+
A name for the imported pipeline in CloudOS.
|
|
633
|
+
repository_project_id : int
|
|
634
|
+
The repository project ID.
|
|
635
|
+
workflow_docs_link : string
|
|
636
|
+
Link to the documentation URL.
|
|
637
|
+
repository_id : int
|
|
638
|
+
The repository ID. Only required for GitHub repositories.
|
|
639
|
+
verify: [bool|string]
|
|
640
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
641
|
+
a string is passed, it will be interpreted as the path to
|
|
642
|
+
the SSL certificate file.
|
|
643
|
+
|
|
644
|
+
returns
|
|
645
|
+
-------
|
|
646
|
+
workflow_id : string
|
|
647
|
+
The newly imported worflow ID.
|
|
648
|
+
"""
|
|
649
|
+
platform_url = workflow_url.split('/')[2].split('.')[0]
|
|
650
|
+
repository_name = workflow_url.split('/')[-1]
|
|
651
|
+
if platform_url == 'github':
|
|
652
|
+
platform = 'github'
|
|
653
|
+
repository_project = workflow_url.split('/')[3]
|
|
654
|
+
if repository_id is None:
|
|
655
|
+
raise ValueError('Please, specify --repository-id when importing a GitHub repository')
|
|
656
|
+
elif platform_url == 'bitbucket':
|
|
657
|
+
platform = 'bitbucketServer'
|
|
658
|
+
repository_project = workflow_url.split('/')[4]
|
|
659
|
+
repository_id = repository_name
|
|
660
|
+
else:
|
|
661
|
+
raise ValueError(f'Your repository platform is not supported: {platform_url}. ' +
|
|
662
|
+
'Please use either GitHub or BitbucketServer.')
|
|
663
|
+
repository_name = workflow_url.split('/')[-1]
|
|
664
|
+
|
|
665
|
+
data = {
|
|
666
|
+
"workflowType": "nextflow",
|
|
667
|
+
"repository": {
|
|
668
|
+
"platform": platform,
|
|
669
|
+
"repositoryId": repository_id,
|
|
670
|
+
"name": repository_name,
|
|
671
|
+
"owner": {
|
|
672
|
+
"login": repository_project,
|
|
673
|
+
"id": repository_project_id},
|
|
674
|
+
"isPrivate": True,
|
|
675
|
+
"url": workflow_url,
|
|
676
|
+
"commit": "",
|
|
677
|
+
"branch": ""
|
|
678
|
+
},
|
|
679
|
+
"name": workflow_name,
|
|
680
|
+
"description": "",
|
|
681
|
+
"isPublic": False,
|
|
682
|
+
"mainFile": "main.nf",
|
|
683
|
+
"defaultContainer": None,
|
|
684
|
+
"processes": [],
|
|
685
|
+
"docsLink": workflow_docs_link,
|
|
686
|
+
"team": workspace_id
|
|
687
|
+
}
|
|
688
|
+
headers = {
|
|
689
|
+
"Content-type": "application/json",
|
|
690
|
+
"apikey": self.apikey
|
|
691
|
+
}
|
|
692
|
+
r = retry_requests_post("{}/api/v1/workflows?teamId={}".format(self.cloudos_url,
|
|
693
|
+
workspace_id),
|
|
694
|
+
json=data, headers=headers, verify=verify)
|
|
695
|
+
if r.status_code == 401:
|
|
696
|
+
raise ValueError('It seems your API key is not authorised. Please check if ' +
|
|
697
|
+
'your workspace has support for importing workflows using cloudos-cli')
|
|
698
|
+
elif r.status_code >= 400:
|
|
699
|
+
raise BadRequestException(r)
|
|
700
|
+
content = json.loads(r.content)
|
|
701
|
+
return content['_id']
|
|
702
|
+
|
|
703
|
+
def get_user_info(self, verify=True):
|
|
704
|
+
"""Gets user information from users/me endpoint
|
|
705
|
+
|
|
706
|
+
Parameters
|
|
707
|
+
----------
|
|
708
|
+
verify: [bool|string]
|
|
709
|
+
Whether to use SSL verification or not. Alternatively, if
|
|
710
|
+
a string is passed, it will be interpreted as the path to
|
|
711
|
+
the SSL certificate file.
|
|
712
|
+
|
|
713
|
+
Returns
|
|
714
|
+
-------
|
|
715
|
+
r : requests.models.Response.content
|
|
716
|
+
The server response content
|
|
717
|
+
"""
|
|
718
|
+
headers = {
|
|
719
|
+
"Content-type": "application/json",
|
|
720
|
+
"apikey": self.apikey
|
|
721
|
+
}
|
|
722
|
+
r = retry_requests_get("{}/api/v1/users/me".format(self.cloudos_url),
|
|
723
|
+
headers=headers, verify=verify)
|
|
724
|
+
if r.status_code >= 400:
|
|
725
|
+
raise BadRequestException(r)
|
|
726
|
+
return json.loads(r.content)
|