cloudos-cli 2.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cloudos_cli/__init__.py +11 -0
  2. cloudos_cli/__main__.py +1297 -0
  3. cloudos_cli/_version.py +1 -0
  4. cloudos_cli/clos.py +726 -0
  5. cloudos_cli/jobs/__init__.py +8 -0
  6. cloudos_cli/jobs/job.py +555 -0
  7. cloudos_cli/queue/__init__.py +8 -0
  8. cloudos_cli/queue/queue.py +139 -0
  9. cloudos_cli/utils/__init__.py +9 -0
  10. cloudos_cli/utils/errors.py +32 -0
  11. cloudos_cli/utils/requests.py +75 -0
  12. cloudos_cli-2.17.0.dist-info/LICENSE +674 -0
  13. cloudos_cli-2.17.0.dist-info/METADATA +1060 -0
  14. cloudos_cli-2.17.0.dist-info/RECORD +41 -0
  15. cloudos_cli-2.17.0.dist-info/WHEEL +5 -0
  16. cloudos_cli-2.17.0.dist-info/entry_points.txt +2 -0
  17. cloudos_cli-2.17.0.dist-info/top_level.txt +2 -0
  18. tests/__init__.py +0 -0
  19. tests/functions_for_pytest.py +7 -0
  20. tests/test_clos/__init__.py +0 -0
  21. tests/test_clos/test_create_cromwell_header.py +35 -0
  22. tests/test_clos/test_cromwell_switch.py +77 -0
  23. tests/test_clos/test_detect_workflow.py +47 -0
  24. tests/test_clos/test_get_cromwell_status.py +77 -0
  25. tests/test_clos/test_get_curated_workflow_list.py +72 -0
  26. tests/test_clos/test_get_job_list.py +79 -0
  27. tests/test_clos/test_get_job_status.py +75 -0
  28. tests/test_clos/test_get_project_list.py +74 -0
  29. tests/test_clos/test_get_user_info.py +68 -0
  30. tests/test_clos/test_get_workflow_list.py +87 -0
  31. tests/test_clos/test_is_module.py +48 -0
  32. tests/test_clos/test_process_job_list.py +74 -0
  33. tests/test_clos/test_process_project_list.py +36 -0
  34. tests/test_clos/test_process_workflow_list.py +36 -0
  35. tests/test_clos/test_wait_job_completion.py +40 -0
  36. tests/test_clos/test_workflow_import.py +77 -0
  37. tests/test_jobs/__init__.py +0 -0
  38. tests/test_jobs/test_convert_nextflow_to_json.py +104 -0
  39. tests/test_jobs/test_project_id.py +67 -0
  40. tests/test_jobs/test_send_job.py +84 -0
  41. tests/test_jobs/test_workflow_id.py +67 -0
@@ -0,0 +1,8 @@
1
+ """
2
+ Functions and classes related to jobs.
3
+ """
4
+
5
+ from .job import Job
6
+
7
+
8
+ __all__ = ['job']
@@ -0,0 +1,555 @@
1
+ """
2
+ This is the main class to create jobs.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Union
7
+ import json
8
+ from cloudos_cli.clos import Cloudos
9
+ from cloudos_cli.utils.errors import BadRequestException
10
+ from cloudos_cli.utils.requests import retry_requests_post
11
+
12
+
13
+ @dataclass
14
+ class Job(Cloudos):
15
+ """Class to store and operate jobs.
16
+
17
+ Parameters
18
+ ----------
19
+ cloudos_url : string
20
+ The CloudOS service url.
21
+ apikey : string
22
+ Your CloudOS API key.
23
+ cromwell_token : string
24
+ Cromwell server token.
25
+ workspace_id : string
26
+ The specific Cloudos workspace id.
27
+ project_name : string
28
+ The name of a CloudOS project.
29
+ workflow_name : string
30
+ The name of a CloudOS workflow or pipeline.
31
+ verify: [bool|string]
32
+ Whether to use SSL verification or not. Alternatively, if
33
+ a string is passed, it will be interpreted as the path to
34
+ the SSL certificate file.
35
+ mainfile : string
36
+ The name of the mainFile used by the workflow. Required for WDL pipelines as different
37
+ mainFiles could be loaded for a single pipeline.
38
+ importsfile : string
39
+ The name of the importsFile used by the workflow. Optional and only used for WDL pipelines
40
+ as different importsFiles could be loaded for a single pipeline.
41
+ repository_platform : string
42
+ The name of the repository platform of the workflow.
43
+ project_id : string
44
+ The CloudOS project id for a given project name.
45
+ workflow_id : string
46
+ The CloudOS workflow id for a given workflow_name.
47
+ """
48
+ workspace_id: str
49
+ project_name: str
50
+ workflow_name: str
51
+ verify: Union[bool, str] = True
52
+ mainfile: str = None
53
+ importsfile: str = None
54
+ repository_platform: str = 'github'
55
+ project_id: str = None
56
+ workflow_id: str = None
57
+
58
+ @property
59
+ def project_id(self) -> str:
60
+ return self._project_id
61
+
62
+ @project_id.setter
63
+ def project_id(self, v) -> None:
64
+ if isinstance(v, property):
65
+ # Fetch the value as not defined by user.
66
+ self._project_id = self.fetch_cloudos_id(
67
+ self.apikey,
68
+ self.cloudos_url,
69
+ 'projects',
70
+ self.workspace_id,
71
+ self.project_name,
72
+ verify=self.verify)
73
+ else:
74
+ # Let the user define the value.
75
+ self._project_id = v
76
+
77
+ @property
78
+ def workflow_id(self) -> str:
79
+ return self._workflow_id
80
+
81
+ @workflow_id.setter
82
+ def workflow_id(self, v) -> None:
83
+ if isinstance(v, property):
84
+ # Fetch the value as not defined by user.
85
+ self._workflow_id = self.fetch_cloudos_id(
86
+ self.apikey,
87
+ self.cloudos_url,
88
+ 'workflows',
89
+ self.workspace_id,
90
+ self.workflow_name,
91
+ self.mainfile,
92
+ self.importsfile,
93
+ self.repository_platform,
94
+ self.verify)
95
+ else:
96
+ # Let the user define the value.
97
+ self._workflow_id = v
98
+
99
+ def fetch_cloudos_id(self,
100
+ apikey,
101
+ cloudos_url,
102
+ resource,
103
+ workspace_id,
104
+ name,
105
+ mainfile=None,
106
+ importsfile=None,
107
+ repository_platform='github',
108
+ verify=True):
109
+ """Fetch the cloudos id for a given name.
110
+
111
+ Paramters
112
+ ---------
113
+ apikey : string
114
+ Your CloudOS API key
115
+ cloudos_url : string
116
+ The CloudOS service url.
117
+ resource : string
118
+ The resource you want to fetch from. E.g.: projects.
119
+ workspace_id : string
120
+ The specific Cloudos workspace id.
121
+ name : string
122
+ The name of a CloudOS resource element.
123
+ mainfile : string
124
+ The name of the mainFile used by the workflow. Only used when resource == 'workflows'.
125
+ Required for WDL pipelines as different mainFiles could be loaded for a single
126
+ pipeline.
127
+ importsfile : string
128
+ The name of the importsFile used by the workflow. Optional and only used for WDL pipelines
129
+ as different importsFiles could be loaded for a single pipeline.
130
+ repository_platform : string
131
+ The name of the repository platform of the workflow resides.
132
+ verify: [bool|string]
133
+ Whether to use SSL verification or not. Alternatively, if
134
+ a string is passed, it will be interpreted as the path to
135
+ the SSL certificate file.
136
+
137
+ Returns
138
+ -------
139
+ project_id : string
140
+ The CloudOS project id for a given project name.
141
+ """
142
+ allowed_resources = ['projects', 'workflows']
143
+ if resource not in allowed_resources:
144
+ raise ValueError('Your specified resource is not supported. ' +
145
+ f'Use one of the following: {allowed_resources}')
146
+ if resource == 'workflows':
147
+ content = self.get_workflow_list(workspace_id, verify=verify)
148
+ for element in content:
149
+ if (element["name"] == name and
150
+ element["repository"]["platform"] == repository_platform and
151
+ not element["archived"]["status"]):
152
+ if mainfile is None:
153
+ return element["_id"]
154
+ elif element["mainFile"] == mainfile:
155
+ if importsfile is None and "importsFile" not in element.keys():
156
+ return element["_id"]
157
+ elif "importsFile" in element.keys() and element["importsFile"] == importsfile:
158
+ return element["_id"]
159
+ elif resource == 'projects':
160
+ r = self.get_project_list(workspace_id, verify=verify)
161
+ content = json.loads(r.content)
162
+ # New API projects endpoint spec
163
+ if type(content) is dict:
164
+ for element in content["projects"]:
165
+ if element["name"] == name:
166
+ return element["_id"]
167
+ # Old API projects endpoint spec added for backwards compatibility
168
+ elif type(content) is list:
169
+ for element in content:
170
+ if element["name"] == name:
171
+ return element["_id"]
172
+ if mainfile is not None:
173
+ raise ValueError(f'[ERROR] A workflow named \'{name}\' with a mainFile \'{mainfile}\'' +
174
+ f' and an importsFile \'{importsfile}\' was not found')
175
+ else:
176
+ raise ValueError(f'[ERROR] No {name} element in {resource} was found')
177
+
178
+ def convert_nextflow_to_json(self,
179
+ job_config,
180
+ parameter,
181
+ example_parameters,
182
+ git_commit,
183
+ git_tag,
184
+ project_id,
185
+ workflow_id,
186
+ job_name,
187
+ resumable,
188
+ save_logs,
189
+ batch,
190
+ job_queue_id,
191
+ nextflow_profile,
192
+ nextflow_version,
193
+ instance_type,
194
+ instance_disk,
195
+ storage_mode,
196
+ lustre_size,
197
+ execution_platform,
198
+ hpc_id,
199
+ workflow_type,
200
+ cromwell_id,
201
+ cost_limit,
202
+ use_mountpoints,
203
+ docker_login):
204
+ """Converts a nextflow.config file into a json formatted dict.
205
+
206
+ Parameters
207
+ ----------
208
+ job_config : string
209
+ Path to a nextflow.config file with parameters scope.
210
+ parameter : tuple
211
+ Tuple of strings indicating the parameters to pass to the pipeline call.
212
+ They are in the following form: ('param1=param1val', 'param2=param2val', ...)
213
+ example_parameters : list
214
+ A list of dicts, with the parameters required for the API request in JSON format.
215
+ It is typically used to run curated pipelines using the already available
216
+ example parameters.
217
+ git_commit : string
218
+ The exact commit of the pipeline to use. Equivalent to -r
219
+ option in Nextflow. If not specified, the last commit of the
220
+ default branch will be used.
221
+ git_tag : string
222
+ The tag of the pipeline to use. If not specified, the last
223
+ commit of the default branch will be used.
224
+ project_id : string
225
+ The CloudOS project id for a given project name.
226
+ workflow_id : string
227
+ The CloudOS workflow id for a given workflow_name.
228
+ job_name : string.
229
+ The name to assign to the job.
230
+ resumable: bool
231
+ Whether to create a resumable job or not.
232
+ save_logs : bool
233
+ Whether to save job logs or not.
234
+ batch: bool
235
+ Whether to create a batch job or an ignite one.
236
+ job_queue_id : string
237
+ Job queue Id to use in the batch job.
238
+ nextflow_profile: string
239
+ A comma separated string with the profiles to be used.
240
+ nextflow_version: string
241
+ Nextflow version to use when executing the workflow in CloudOS.
242
+ instance_type : string
243
+ Name of the instance type to be used for the job master node, for example for AWS EC2 c5.xlarge
244
+ instance_disk : int
245
+ The disk space of the instance, in GB.
246
+ storage_mode : string
247
+ Either 'lustre' or 'regular'. Indicates if the user wants to select regular
248
+ or lustre storage.
249
+ lustre_size : int
250
+ The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or
251
+ a multiple of it.
252
+ execution_platform : string ['aws'|'azure'|'hpc']
253
+ The execution platform implemented in your CloudOS.
254
+ hpc_id : string
255
+ The ID of your HPC in CloudOS.
256
+ workflow_type : str
257
+ The type of workflow to run. Either 'nextflow' or 'wdl'.
258
+ cromwell_id : str
259
+ Cromwell server ID.
260
+ cost_limit : float
261
+ Job cost limit. -1 means no cost limit.
262
+ use_mountpoints : bool
263
+ Whether to use or not AWS S3 mountpoint for quicker file staging.
264
+ docker_login : bool
265
+ Whether to use private docker images, provided the users have linked their docker.io accounts.
266
+
267
+ Returns
268
+ -------
269
+ params : dict
270
+ A JSON formatted dict.
271
+ """
272
+ workflow_params = []
273
+ if workflow_type == 'wdl':
274
+ # This is required as non-resumable jobs fails always using WDL workflows.
275
+ resumable = True
276
+ if (
277
+ nextflow_profile is None and
278
+ job_config is None and
279
+ len(parameter) == 0 and
280
+ len(example_parameters) == 0
281
+ ):
282
+ raise ValueError('No --job-config, --nextflow_profile, --parameter or ' +
283
+ '--example_parameters were specified,' +
284
+ ' please use at least one of these options.')
285
+ if workflow_type == 'wdl' and job_config is None and len(parameter) == 0:
286
+ raise ValueError('No --job-config or --parameter were provided. At least one of ' +
287
+ 'these are required for WDL workflows.')
288
+ if job_config is not None:
289
+ with open(job_config, 'r') as p:
290
+ reading = False
291
+ for p_l in p:
292
+ if 'params' in p_l.lower():
293
+ reading = True
294
+ else:
295
+ if reading:
296
+ if workflow_type == 'wdl':
297
+ p_l_strip = p_l.strip().replace(
298
+ ' ', '')
299
+ else:
300
+ p_l_strip = p_l.strip().replace(
301
+ ' ', '').replace('\"', '').replace('\'', '')
302
+ if len(p_l_strip) == 0:
303
+ continue
304
+ elif p_l_strip[0] == '/' or p_l_strip[0] == '#':
305
+ continue
306
+ elif p_l_strip == '}':
307
+ reading = False
308
+ else:
309
+ p_list = p_l_strip.split('=')
310
+ p_name = p_list[0]
311
+ p_value = '='.join(p_list[1:])
312
+ if len(p_list) < 2:
313
+ raise ValueError('Please, specify your ' +
314
+ 'parameters in ' +
315
+ f'{job_config} using ' +
316
+ 'the \'=\' as spacer. ' +
317
+ 'E.g: name = my_name')
318
+ elif workflow_type == 'wdl':
319
+ param = {"prefix": "",
320
+ "name": p_name,
321
+ "parameterKind": "textValue",
322
+ "textValue": p_value}
323
+ workflow_params.append(param)
324
+ else:
325
+ param = {"prefix": "--",
326
+ "name": p_name,
327
+ "parameterKind": "textValue",
328
+ "textValue": p_value}
329
+ workflow_params.append(param)
330
+ if len(workflow_params) == 0:
331
+ raise ValueError(f'The {job_config} file did not contain any ' +
332
+ 'valid parameter')
333
+ if len(parameter) > 0:
334
+ for p in parameter:
335
+ p_split = p.split('=')
336
+ if len(p_split) < 2:
337
+ raise ValueError('Please, specify -p / --parameter using a single \'=\' ' +
338
+ 'as spacer. E.g: input=value')
339
+ p_name = p_split[0]
340
+ p_value = '='.join(p_split[1:])
341
+ if workflow_type == 'wdl':
342
+ param = {"prefix": "",
343
+ "name": p_name,
344
+ "parameterKind": "textValue",
345
+ "textValue": p_value}
346
+ workflow_params.append(param)
347
+ else:
348
+ param = {"prefix": "--",
349
+ "name": p_name,
350
+ "parameterKind": "textValue",
351
+ "textValue": p_value}
352
+ workflow_params.append(param)
353
+ if len(workflow_params) == 0:
354
+ raise ValueError(f'The provided parameters are not valid: {parameter}')
355
+ if len(example_parameters) > 0:
356
+ for example_param in example_parameters:
357
+ workflow_params.append(example_param)
358
+ if git_tag is not None and git_commit is not None:
359
+ raise ValueError('Please, specify none or only one of --git-tag' +
360
+ ' or --git-commit options but not both.')
361
+ if git_commit is not None:
362
+ revision_block = {
363
+ "commit": git_commit,
364
+ "isLatest": False
365
+ }
366
+ elif git_tag is not None:
367
+ revision_block = {
368
+ "tag": git_tag,
369
+ "isLatest": False
370
+ }
371
+ else:
372
+ revision_block = ""
373
+ if storage_mode == "lustre":
374
+ print('\n[WARNING] Lustre storage has been selected. Please, be sure that this kind of ' +
375
+ 'storage is available in your CloudOS workspace.\n')
376
+ if lustre_size % 1200:
377
+ raise ValueError('Please, specify a lustre storage size of 1200 or a multiple of it. ' +
378
+ f'{lustre_size} is not a valid number.')
379
+ if storage_mode not in ['lustre', 'regular']:
380
+ raise ValueError('Please, use either \'lustre\' or \'regular\' for --storage-mode ' +
381
+ f'{storage_mode} is not allowed')
382
+ params = {
383
+ "parameters": workflow_params,
384
+ "project": project_id,
385
+ "workflow": workflow_id,
386
+ "name": job_name,
387
+ "nextflowVersion": nextflow_version,
388
+ "resumable": resumable,
389
+ "saveProcessLogs": save_logs,
390
+ "batch": {
391
+ "dockerLogin": docker_login,
392
+ "enabled": batch,
393
+ "jobQueue": job_queue_id
394
+ },
395
+ "cromwellCloudResources": cromwell_id,
396
+ "executionPlatform": execution_platform,
397
+ "hpc": hpc_id,
398
+ "storageSizeInGb": instance_disk,
399
+ "execution": {
400
+ "computeCostLimit": cost_limit,
401
+ "optim": "test"
402
+ },
403
+ "lusterFsxStorageSizeInGb": lustre_size,
404
+ "storageMode": storage_mode,
405
+ "revision": revision_block,
406
+ "profile": nextflow_profile,
407
+ "instanceType": instance_type,
408
+ "usesFusionFileSystem": use_mountpoints
409
+ }
410
+ if execution_platform != 'hpc':
411
+ params['masterInstance'] = {
412
+ "requestedInstance": {
413
+ "type": instance_type,
414
+ "asSpot": False
415
+ }
416
+ }
417
+ return params
418
+
419
+ def send_job(self,
420
+ job_config=None,
421
+ parameter=(),
422
+ example_parameters=[],
423
+ git_commit=None,
424
+ git_tag=None,
425
+ job_name='new_job',
426
+ resumable=False,
427
+ save_logs=True,
428
+ batch=True,
429
+ job_queue_id=None,
430
+ nextflow_profile=None,
431
+ nextflow_version='22.10.8',
432
+ instance_type='c5.xlarge',
433
+ instance_disk=500,
434
+ storage_mode='regular',
435
+ lustre_size=1200,
436
+ execution_platform='aws',
437
+ hpc_id=None,
438
+ workflow_type='nextflow',
439
+ cromwell_id=None,
440
+ cost_limit=30.0,
441
+ use_mountpoints=False,
442
+ docker_login=False,
443
+ verify=True):
444
+ """Send a job to CloudOS.
445
+
446
+ Parameters
447
+ ----------
448
+ job_config : string
449
+ Path to a nextflow.config file with parameters scope.
450
+ parameter : tuple
451
+ Tuple of strings indicating the parameters to pass to the pipeline call.
452
+ They are in the following form: ('param1=param1val', 'param2=param2val', ...)
453
+ example_parameters : list
454
+ A list of dicts, with the parameters required for the API request in JSON format.
455
+ It is typically used to run curated pipelines using the already available
456
+ example parameters.
457
+ git_commit : string
458
+ The exact commit of the pipeline to use. Equivalent to -r
459
+ option in Nextflow. If not specified, the last commit of the
460
+ default branch will be used.
461
+ git_tag : string
462
+ The tag of the pipeline to use. If not specified, the last
463
+ commit of the default branch will be used.
464
+ job_name : string
465
+ The name to assign to the job.
466
+ resumable : bool
467
+ Whether to create a resumable job or not.
468
+ save_logs : bool
469
+ Whether to save job logs or not.
470
+ batch: bool
471
+ Whether to create a batch job or an ignite one.
472
+ job_queue_id : string
473
+ Job queue Id to use in the batch job.
474
+ nextflow_profile: string
475
+ A comma separated string with the profiles to be used.
476
+ nextflow_version: string
477
+ Nextflow version to use when executing the workflow in CloudOS.
478
+ instance_type : string
479
+ Name of the instance type to be used for the job master node, for example for AWS EC2 c5.xlarge
480
+ instance_disk : int
481
+ The disk space of the instance, in GB.
482
+ storage_mode : string
483
+ Either 'lustre' or 'regular'. Indicates if the user wants to select regular
484
+ or lustre storage.
485
+ lustre_size : int
486
+ The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or
487
+ a multiple of it.
488
+ execution_platform : string ['aws'|'azure'|'hpc']
489
+ The execution platform implemented in your CloudOS.
490
+ hpc_id : string
491
+ The ID of your HPC in CloudOS.
492
+ workflow_type : str
493
+ The type of workflow to run. Either 'nextflow' or 'wdl'.
494
+ cromwell_id : str
495
+ Cromwell server ID.
496
+ cost_limit : float
497
+ Job cost limit. -1 means no cost limit.
498
+ use_mountpoints : bool
499
+ Whether to use or not AWS S3 mountpoint for quicker file staging.
500
+ docker_login : bool
501
+ Whether to use private docker images, provided the users have linked their docker.io accounts.
502
+ verify: [bool|string]
503
+ Whether to use SSL verification or not. Alternatively, if
504
+ a string is passed, it will be interpreted as the path to
505
+ the SSL certificate file.
506
+
507
+ Returns
508
+ -------
509
+ j_id : string
510
+ The CloudOS job id of the job just launched.
511
+ """
512
+ apikey = self.apikey
513
+ cloudos_url = self.cloudos_url
514
+ workspace_id = self.workspace_id
515
+ workflow_id = self.workflow_id
516
+ project_id = self.project_id
517
+ # Prepare api request for CloudOS to run a job
518
+ headers = {
519
+ "Content-type": "application/json",
520
+ "apikey": apikey
521
+ }
522
+ params = self.convert_nextflow_to_json(job_config,
523
+ parameter,
524
+ example_parameters,
525
+ git_commit,
526
+ git_tag,
527
+ project_id,
528
+ workflow_id,
529
+ job_name,
530
+ resumable,
531
+ save_logs,
532
+ batch,
533
+ job_queue_id,
534
+ nextflow_profile,
535
+ nextflow_version,
536
+ instance_type,
537
+ instance_disk,
538
+ storage_mode,
539
+ lustre_size,
540
+ execution_platform,
541
+ hpc_id,
542
+ workflow_type,
543
+ cromwell_id,
544
+ cost_limit,
545
+ use_mountpoints,
546
+ docker_login)
547
+ r = retry_requests_post("{}/api/v1/jobs?teamId={}".format(cloudos_url,
548
+ workspace_id),
549
+ data=json.dumps(params), headers=headers, verify=verify)
550
+ if r.status_code >= 400:
551
+ raise BadRequestException(r)
552
+ j_id = json.loads(r.content)["_id"]
553
+ print('\tJob successfully launched to CloudOS, please check the ' +
554
+ f'following link: {cloudos_url}/app/jobs/{j_id}')
555
+ return j_id
@@ -0,0 +1,8 @@
1
+ """
2
+ Functions and classes related to job queues.
3
+ """
4
+
5
+ from .queue import Queue
6
+
7
+
8
+ __all__ = ['queue']