cloudos-cli 2.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cloudos_cli/__init__.py +11 -0
  2. cloudos_cli/__main__.py +1297 -0
  3. cloudos_cli/_version.py +1 -0
  4. cloudos_cli/clos.py +726 -0
  5. cloudos_cli/jobs/__init__.py +8 -0
  6. cloudos_cli/jobs/job.py +555 -0
  7. cloudos_cli/queue/__init__.py +8 -0
  8. cloudos_cli/queue/queue.py +139 -0
  9. cloudos_cli/utils/__init__.py +9 -0
  10. cloudos_cli/utils/errors.py +32 -0
  11. cloudos_cli/utils/requests.py +75 -0
  12. cloudos_cli-2.17.0.dist-info/LICENSE +674 -0
  13. cloudos_cli-2.17.0.dist-info/METADATA +1060 -0
  14. cloudos_cli-2.17.0.dist-info/RECORD +41 -0
  15. cloudos_cli-2.17.0.dist-info/WHEEL +5 -0
  16. cloudos_cli-2.17.0.dist-info/entry_points.txt +2 -0
  17. cloudos_cli-2.17.0.dist-info/top_level.txt +2 -0
  18. tests/__init__.py +0 -0
  19. tests/functions_for_pytest.py +7 -0
  20. tests/test_clos/__init__.py +0 -0
  21. tests/test_clos/test_create_cromwell_header.py +35 -0
  22. tests/test_clos/test_cromwell_switch.py +77 -0
  23. tests/test_clos/test_detect_workflow.py +47 -0
  24. tests/test_clos/test_get_cromwell_status.py +77 -0
  25. tests/test_clos/test_get_curated_workflow_list.py +72 -0
  26. tests/test_clos/test_get_job_list.py +79 -0
  27. tests/test_clos/test_get_job_status.py +75 -0
  28. tests/test_clos/test_get_project_list.py +74 -0
  29. tests/test_clos/test_get_user_info.py +68 -0
  30. tests/test_clos/test_get_workflow_list.py +87 -0
  31. tests/test_clos/test_is_module.py +48 -0
  32. tests/test_clos/test_process_job_list.py +74 -0
  33. tests/test_clos/test_process_project_list.py +36 -0
  34. tests/test_clos/test_process_workflow_list.py +36 -0
  35. tests/test_clos/test_wait_job_completion.py +40 -0
  36. tests/test_clos/test_workflow_import.py +77 -0
  37. tests/test_jobs/__init__.py +0 -0
  38. tests/test_jobs/test_convert_nextflow_to_json.py +104 -0
  39. tests/test_jobs/test_project_id.py +67 -0
  40. tests/test_jobs/test_send_job.py +84 -0
  41. tests/test_jobs/test_workflow_id.py +67 -0
@@ -0,0 +1,1060 @@
1
+ Metadata-Version: 2.2
2
+ Name: cloudos_cli
3
+ Version: 2.17.0
4
+ Summary: Python package for interacting with CloudOS
5
+ Home-page: https://github.com/lifebit-ai/cloudos-cli
6
+ Author: David Piñeyro
7
+ Author-email: david.pineyro@lifebit.ai
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: POSIX :: Linux
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: click>=8.0.1
15
+ Requires-Dist: pandas>=1.3.4
16
+ Requires-Dist: numpy==1.26.4
17
+ Requires-Dist: requests>=2.26.0
18
+ Provides-Extra: test
19
+ Requires-Dist: pytest; extra == "test"
20
+ Requires-Dist: mock; extra == "test"
21
+ Requires-Dist: responses; extra == "test"
22
+ Requires-Dist: requests_mock; extra == "test"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
33
+
34
+ # cloudos-cli
35
+
36
+ [![CI_tests](https://github.com/lifebit-ai/cloudos-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/lifebit-ai/cloudos-cli/actions/workflows/ci.yml)
37
+
38
+ Python package for interacting with CloudOS
39
+
40
+ ## Requirements
41
+
42
+ The package requires Python >= 3.7 and the following python packages:
43
+
44
+ ```
45
+ click>=8.0.1
46
+ pandas>=1.3.4
47
+ numpy==1.26.4
48
+ requests>=2.26.0
49
+ ```
50
+
51
+ ## Installation
52
+
53
+ ### Docker image
54
+ It is recommended to install it as a docker image using the `Dockerfile`
55
+ and the `environment.yml` files provided.
56
+
57
+ To run the existing docker image at `quay.io`:
58
+
59
+ ```bash
60
+ docker run --rm -it quay.io/lifebitaiorg/cloudos-cli:latest
61
+ ```
62
+
63
+ ### From PyPI
64
+ The repository is also available from [PyPI](https://pypi.org/project/cloudos-cli/):
65
+
66
+ ```bash
67
+ pip install cloudos-cli
68
+ ```
69
+
70
+ ### From Github
71
+
72
+ You will need Python >= 3.7 and pip installed.
73
+
74
+ Clone the repo and install it using pip:
75
+
76
+ ```bash
77
+ git clone https://github.com/lifebit-ai/cloudos-cli
78
+ cd cloudos-cli
79
+ pip install -r requirements.txt
80
+ pip install .
81
+ ```
82
+
83
+ > NOTE: To be able to call the `cloudos` executable, ensure that the local clone of the `cloudos-cli` folder is included in the `PATH` variable ,using for example the command `export PATH="/absolute/path/to/cloudos-cli:$PATH"`.
84
+ ## Usage
85
+
86
+ The package is meant to be used both as a CLI tool and as a regular package to
87
+ import to your own scripts.
88
+
89
+ ### Usage as a Command Line Interface tool
90
+
91
+ To get general information about the tool:
92
+
93
+ ```bash
94
+ cloudos --help
95
+ ```
96
+ ```console
97
+ Usage: cloudos [OPTIONS] COMMAND [ARGS]...
98
+
99
+ CloudOS python package: a package for interacting with CloudOS.
100
+
101
+ Options:
102
+ --version Show the version and exit.
103
+ --help Show this message and exit.
104
+
105
+ Commands:
106
+ cromwell Cromwell server functionality: check status, start and stop.
107
+ job CloudOS job functionality: run and check jobs in CloudOS.
108
+ project CloudOS project functionality: list projects in CloudOS.
109
+ workflow CloudOS workflow functionality: list workflows in CloudOS.
110
+ ```
111
+
112
+ This will tell you the implemented commands. Each implemented command has its
113
+ own subcommands with its own `--help`:
114
+
115
+ ```bash
116
+ cloudos job run --help
117
+ ```
118
+ ```console
119
+ Options:
120
+ -k, --apikey TEXT Your CloudOS API key [required]
121
+ -c, --cloudos-url TEXT The CloudOS url you are trying to access to.
122
+ Default=https://cloudos.lifebit.ai.
123
+ --workspace-id TEXT The specific CloudOS workspace id.
124
+ [required]
125
+ --project-name TEXT The name of a CloudOS project. [required]
126
+ --workflow-name TEXT The name of a CloudOS workflow or pipeline.
127
+ [required]
128
+ --job-config TEXT A config file similar to a nextflow.config
129
+ file, but only with the parameters to use
130
+ with your job.
131
+ -p, --parameter TEXT A single parameter to pass to the job call.
132
+ It should be in the following form:
133
+ parameter_name=parameter_value. E.g.: -p
134
+ input=s3://path_to_my_file. You can use this
135
+ option as many times as parameters you want
136
+ to include.
137
+ --nextflow-profile TEXT A comma separated string indicating the
138
+ nextflow profile/s to use with your job.
139
+ --nextflow-version [22.10.8|24.04.4|latest]
140
+ Nextflow version to use when executing the
141
+ workflow in CloudOS. Please, note that
142
+ versions above 22.10.8 are only DSL2
143
+ compatible. Default=22.10.8.
144
+ --git-commit TEXT The exact whole 40 character commit hash to
145
+ run for the selected pipeline. If not
146
+ specified it defaults to the last commit of
147
+ the default branch.
148
+ --git-tag TEXT The tag to run for the selected pipeline. If
149
+ not specified it defaults to the last commit
150
+ of the default branch.
151
+ --job-name TEXT The name of the job. Default=new_job.
152
+ --resumable Whether to make the job able to be resumed
153
+ or not.
154
+ --do-not-save-logs Avoids process log saving. If you select
155
+ this option, your job process logs will not
156
+ be stored.
157
+ --spot [Deprecated in 2.11.0] This option has been
158
+ deprecated and has no effect. Spot instances
159
+ are no longer available in CloudOS.
160
+ --batch [Deprecated in 2.7.0] Since v2.7.0, the
161
+ default executor is AWSbatch so there is no
162
+ need to use this flag. It is maintained for
163
+ backwards compatibility.
164
+ --ignite This flag allows running ignite executor if
165
+ available. Please, note that ignite executor
166
+ is being deprecated and may not be available
167
+ in your CloudOS.
168
+ --job-queue TEXT Name of the job queue to use with a batch
169
+ job.
170
+ --instance-type TEXT The type of execution platform compute
171
+ instance to use. Default=c5.xlarge(aws)|Stan
172
+ dard_D4as_v4(azure).
173
+ --instance-disk INTEGER The amount of disk storage to configure.
174
+ Default=500.
175
+ --storage-mode TEXT Either 'lustre' or 'regular'. Indicates if
176
+ the user wants to select regular or lustre
177
+ storage. Default=regular.
178
+ --lustre-size INTEGER The lustre storage to be used when
179
+ --storage-mode=lustre, in GB. It should be
180
+ 1200 or a multiple of it. Default=1200.
181
+ --wait-completion Whether to wait to job completion and report
182
+ final job status.
183
+ --wait-time INTEGER Max time to wait (in seconds) to job
184
+ completion. Default=3600.
185
+ --wdl-mainfile TEXT For WDL workflows, which mainFile (.wdl) is
186
+ configured to use.
187
+ --wdl-importsfile TEXT For WDL workflows, which importsFile (.zip)
188
+ is configured to use.
189
+ -t, --cromwell-token TEXT Specific Cromwell server authentication
190
+ token. Currently, not necessary as apikey
191
+ can be used instead, but maintained for
192
+ backwards compatibility.
193
+ --repository-platform TEXT Name of the repository platform of the
194
+ workflow. Default=github.
195
+ --execution-platform [aws|azure|hpc]
196
+ Name of the execution platform implemented
197
+ in your CloudOS. Default=aws.
198
+ --hpc-id TEXT ID of your HPC, only applicable when
199
+ --execution-platform=hpc.
200
+ Default=660fae20f93358ad61e0104b
201
+ --cost-limit FLOAT Add a cost limit to your job. Default=30.0
202
+ (For no cost limit please use -1).
203
+ --accelerate-file-staging Enables AWS S3 mountpoint for quicker file
204
+ staging.
205
+ --use-private-docker-repository
206
+ Allows to use private docker repository for
207
+ running jobs. The Docker user account has to
208
+ be already linked to CloudOS.
209
+ --verbose Whether to print information messages or
210
+ not.
211
+ --request-interval INTEGER Time interval to request (in seconds) the
212
+ job status. For large jobs is important to
213
+ use a high number to make fewer requests so
214
+ that is not considered spamming by the API.
215
+ Default=30.
216
+ --disable-ssl-verification Disable SSL certificate verification.
217
+ Please, remember that this option is not
218
+ generally recommended for security reasons.
219
+ --ssl-cert TEXT Path to your SSL certificate file.
220
+ --help Show this message and exit.
221
+ ```
222
+
223
+ #### Send a job to CloudOS
224
+
225
+ First, configure your local environment to ease parameters input. We will
226
+ try to submit a small toy example already available.
227
+
228
+ ```bash
229
+ MY_API_KEY="xxxxx"
230
+ CLOUDOS="https://cloudos.lifebit.ai"
231
+ WORKSPACE_ID="xxxxx"
232
+ PROJECT_NAME="API jobs"
233
+ WORKFLOW_NAME="rnatoy"
234
+ JOB_PARAMS="cloudos/examples/rnatoy.config"
235
+ ```
236
+
237
+ As you can see, a file with the job parameters is used to configure the
238
+ job. This file could be a regular `nextflow.config` file or any file with the
239
+ following structure:
240
+
241
+ ```
242
+ params {
243
+ reads = s3://lifebit-featured-datasets/pipelines/rnatoy-data
244
+ genome = s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa
245
+ annot = s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff
246
+ }
247
+ ```
248
+
249
+ To submit our job:
250
+
251
+ ```bash
252
+ cloudos job run \
253
+ --cloudos-url $CLOUDOS \
254
+ --apikey $MY_API_KEY \
255
+ --workspace-id $WORKSPACE_ID \
256
+ --project-name "$PROJECT_NAME" \
257
+ --workflow-name $WORKFLOW_NAME \
258
+ --job-config $JOB_PARAMS \
259
+ --resumable
260
+ ```
261
+
262
+ In addition, parameters can also be specified using the command-line `-p` or `--parameter`. For instance,
263
+ the previous command is equivalent to:
264
+
265
+ ```bash
266
+ cloudos job run \
267
+ --cloudos-url $CLOUDOS \
268
+ --apikey $MY_API_KEY \
269
+ --workspace-id $WORKSPACE_ID \
270
+ --project-name "$PROJECT_NAME" \
271
+ --workflow-name $WORKFLOW_NAME \
272
+ --parameter reads=s3://lifebit-featured-datasets/pipelines/rnatoy-data \
273
+ --parameter genome=s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa \
274
+ --parameter annot=s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff \
275
+ --resumable
276
+ ```
277
+
278
+ > NOTE: options `--job-config` and `--parameter` are completely compatible and complementary, so you can use a
279
+ `--job-config` and adding additional parameters using `--parameter` in the same call.
280
+
281
+ If everything went well, you should see something like:
282
+
283
+ ```console
284
+ Executing run...
285
+ Job successfully launched to CloudOS, please check the following link: https://cloudos.lifebit.ai/app/jobs/62c83a1191fe06013b7ef355
286
+ Your assigned job id is: 62c83a1191fe06013b7ef355
287
+ Your current job status is: initializing
288
+ To further check your job status you can either go to https://cloudos.lifebit.ai/app/jobs/62c83a1191fe06013b7ef355 or use the following command:
289
+ cloudos job status \
290
+ --apikey $MY_API_KEY \
291
+ --cloudos-url https://cloudos.lifebit.ai \
292
+ --job-id 62c83a1191fe06013b7ef355
293
+ ```
294
+
295
+ As you can see, the current status is `initializing`. This will change
296
+ while the job progresses. To check the status, just apply the suggested
297
+ command.
298
+
299
+ Another option is to set the `--wait-completion` parameter, which run the same
300
+ job run command but waiting for its completion:
301
+
302
+ ```bash
303
+ cloudos job run \
304
+ --cloudos-url $CLOUDOS \
305
+ --apikey $MY_API_KEY \
306
+ --workspace-id $WORKSPACE_ID \
307
+ --project-name "$PROJECT_NAME" \
308
+ --workflow-name $WORKFLOW_NAME \
309
+ --job-config $JOB_PARAMS \
310
+ --resumable \
311
+ --wait-completion
312
+ ```
313
+
314
+ When setting this parameter, you can also set `--request-interval` to a bigger number (default is 30s) if the job is quite large. This will ensure that the status requests are not sent too close from each other and recognized as spam by the API.
315
+
316
+ If the job takes less than `--wait-time` (3600 seconds by default), the
317
+ previous command should have an output similar to:
318
+
319
+ ```console
320
+ Executing run...
321
+ Job successfully launched to CloudOS, please check the following link: https://cloudos.lifebit.ai/app/jobs/62c83a6191fe06013b7ef363
322
+ Your assigned job id is: 62c83a6191fe06013b7ef363
323
+ Please, wait until job completion or max wait time of 3600 seconds is reached.
324
+ Your current job status is: initializing.
325
+ Your current job status is: running.
326
+ Your job took 420 seconds to complete successfully.
327
+ ```
328
+
329
+ #### Executor support
330
+
331
+ CloudOS supports [AWS batch](https://www.nextflow.io/docs/latest/executor.html?highlight=executors#aws-batch) executor by default.
332
+ You can specify the AWS batch queue to
333
+ use, from the ones available in your workspace (see [here](#get-a-list-of-the-available-job-queues))
334
+ by specifying its name with the `--job-queue` parameter.
335
+ If none is specified, the most recent suitable queue in your workspace will be selected by default.
336
+ Example command:
337
+
338
+ ```bash
339
+ cloudos job run \
340
+ --cloudos-url $CLOUDOS \
341
+ --apikey $MY_API_KEY \
342
+ --workspace-id $WORKSPACE_ID \
343
+ --project-name "$PROJECT_NAME" \
344
+ --workflow-name $WORKFLOW_NAME \
345
+ --job-config $JOB_PARAMS \
346
+ --resumable
347
+ ```
348
+
349
+ > Note: from cloudos-cli 2.7.0, the default executor is AWS batch. The previous Apache [ignite](https://www.nextflow.io/docs/latest/ignite.html#apache-ignite)
350
+ > executor is being removed progressively from CloudOS, so most likely will not be available in your CloudOS. Cloudos-cli is still supporting ignite during this
351
+ > period, by adding the `--ignite` flag to the `cloudos job run` command. Please, note that if you use `--ignite` flag in a CloudOS without ignite support,
352
+ > the command fail.
353
+
354
+ #### Azure execution platform support
355
+
356
+ CloudOS can also be configured to use Microsoft Azure compute platforms.
357
+ If your CloudOS is configured to
358
+ use Azure, you will need to take into consideration the following:
359
+
360
+ - When sending jobs to CloudOS using `cloudos job run` or `cloudos job run-curated-examples` commands, please use the option `--execution-platform azure`.
361
+ - Due to the lack of AWS batch queues in Azure, `cloudos queue list` command is not working.
362
+
363
+ Other than that, `cloudos-cli` will work very similarly. For instance, this is a typical send job command:
364
+
365
+ ```bash
366
+ cloudos job run \
367
+ --cloudos-url $CLOUDOS \
368
+ --apikey $MY_API_KEY \
369
+ --workspace-id $WORKSPACE_ID \
370
+ --project-name "$PROJECT_NAME" \
371
+ --workflow-name $WORKFLOW_NAME \
372
+ --job-config $JOB_PARAMS \
373
+ --resumable \
374
+ --execution-platform azure
375
+ ```
376
+
377
+ #### HPC execution support
378
+
379
+ CloudOS is also prepared to use an HPC compute infrastructure. For such cases, you will need to take into account the following for your job submissions using `cloudos job run` command:
380
+
381
+ - Use the following parameter: `--execution-platform hpc`.
382
+ - Indicate the HPC ID using: `--hpc-id XXXX`.
383
+
384
+ Example command:
385
+
386
+ ```bash
387
+ cloudos job run \
388
+ --cloudos-url $CLOUDOS \
389
+ --apikey $MY_API_KEY \
390
+ --workspace-id $WORKSPACE_ID \
391
+ --project-name "$PROJECT_NAME" \
392
+ --workflow-name $WORKFLOW_NAME \
393
+ --job-config $JOB_PARAMS \
394
+ --execution-platform hpc \
395
+ --hpc-id $YOUR_HPC_ID
396
+ ```
397
+
398
+ Please, note that HPC execution do not support the following parameters and all of them will be ignored:
399
+
400
+ - `--job-queue`
401
+ - `--resumable | --do-not-save-logs`
402
+ - `--instance-type` | `--instance-disk` | `--cost-limit`
403
+ - `--storage-mode` | `--lustre-size`
404
+ - `--wdl-mainfile` | `--wdl-importsfile` | `--cromwell-token`
405
+
406
+ #### Check job status
407
+
408
+ To check the status of a submitted job, just use the suggested command:
409
+
410
+ ```bash
411
+ cloudos job status \
412
+ --apikey $MY_API_KEY \
413
+ --cloudos-url $CLOUDOS \
414
+ --job-id 62c83a1191fe06013b7ef355
415
+ ```
416
+
417
+ The expected output should be something similar to:
418
+
419
+ ```console
420
+ Executing status...
421
+ Your current job status is: completed
422
+
423
+ To further check your job status you can either go to https://cloudos.lifebit.ai/app/jobs/62c83a1191fe06013b7ef355 or repeat the command you just used.
424
+ ```
425
+
426
+ #### Get a list of your jobs from a CloudOS workspace
427
+
428
+ You can get a summary of your last 30 submitted jobs (or your selected number of last jobs using `--last-n-jobs n`
429
+ parameter) in two different formats:
430
+
431
+ - CSV: this is a table with a minimum predefined set of columns by default, or all the
432
+ available columns using the `--all-fields` argument.
433
+ - JSON: all the available information from your jobs, in JSON format.
434
+
435
+ To get a list with your last 30 submitted jobs to a given workspace, in CSV format, use
436
+ the following command:
437
+
438
+ ```bash
439
+ cloudos job list \
440
+ --cloudos-url $CLOUDOS \
441
+ --apikey $MY_API_KEY \
442
+ --workspace-id $WORKSPACE_ID \
443
+ --output-format csv \
444
+ --all-fields
445
+ ```
446
+
447
+ The expected output is something similar to:
448
+
449
+ ```console
450
+ Executing list...
451
+ Job list collected with a total of 30 jobs.
452
+ Job list saved to joblist.csv
453
+ ```
454
+
455
+ In addition, a file named `joblist.csv` is created.
456
+
457
+ To get the same information, but for all your jobs and in JSON format, use the following command:
458
+
459
+ ```bash
460
+ cloudos job list \
461
+ --cloudos-url $CLOUDOS \
462
+ --apikey $MY_API_KEY \
463
+ --workspace-id $WORKSPACE_ID \
464
+ --last-n-jobs all \
465
+ --output-format json
466
+ ```
467
+ ```console
468
+ Executing list...
469
+ Job list collected with a total of 276 jobs.
470
+ Job list saved to joblist.json
471
+ ```
472
+
473
+ #### Get a list of all available workflows from a CloudOS workspace
474
+
475
+ You can get a summary of all the available workspace workflows in two different formats:
476
+ - CSV: this is a table with a minimum predefined set of columns by default, or all the
477
+ available columns using the `--all-fields` parameter.
478
+ - JSON: all the available information from workflows, in JSON format.
479
+
480
+ To get a CSV table with all the available workflows for a given workspace, use
481
+ the following command:
482
+
483
+ ```bash
484
+ cloudos workflow list \
485
+ --cloudos-url $CLOUDOS \
486
+ --apikey $MY_API_KEY \
487
+ --workspace-id $WORKSPACE_ID \
488
+ --output-format csv \
489
+ --all-fields
490
+ ```
491
+
492
+ The expected output is something similar to:
493
+
494
+ ```console
495
+ Executing list...
496
+ Workflow list collected with a total of 609 workflows.
497
+ Workflow list saved to workflow_list.csv
498
+ ```
499
+
500
+ To get the same information, but in JSON format, use the following command:
501
+
502
+ ```bash
503
+ cloudos workflow list \
504
+ --cloudos-url $CLOUDOS \
505
+ --apikey $MY_API_KEY \
506
+ --workspace-id $WORKSPACE_ID \
507
+ --output-format json
508
+ ```
509
+
510
+ ```console
511
+ Executing list...
512
+ Workflow list collected with a total of 609 workflows.
513
+ Workflow list saved to workflow_list.json
514
+ ```
515
+
516
+ Normally, collected workflows are those that can be found in "WORKSPACE TOOLS" section in CloudOS.
517
+ By using `--curated` flag, the collected workflows will instead include "CURATED PIPELINES & TOOLS" only.
518
+
519
+ ```bash
520
+ cloudos workflow list \
521
+ --cloudos-url $CLOUDOS \
522
+ --apikey $MY_API_KEY \
523
+ --workspace-id $WORKSPACE_ID \
524
+ --curated
525
+ ```
526
+ ```console
527
+ Executing list...
528
+ Workflow list collected with a total of 73 workflows.
529
+ Workflow list saved to workflow_list.csv
530
+ ```
531
+
532
+ #### Import a Nextflow workflow to a CloudOS workspace
533
+
534
+ You can import new workflows to your CloudOS workspaces. The only requirements are:
535
+
536
+ - The workflow is a Nextflow pipeline.
537
+ - The workflow repository is located at GitHub or Bitbucket server.
538
+ - If your repository is private, you have access to the repository and you have linked your GitHub or Bitbucket server accounts to CloudOS.
539
+ - You have got the `repository_id` and the `repository_project_id`.
540
+
541
+ **How to get `repository_id` and `repository_project_id` from a GitHub repository**
542
+
543
+ **Option 1: searching in the page source code**
544
+
545
+ 1. Go to the repository URL. Click on the right button of your mouse to get the following menu and click on "View Page Source".
546
+
547
+ ![Github Repo right click](docs/github_right_click.png)
548
+
549
+ 2. For collecting the `repository_project_id`, search for `octolytics-dimension-user_id` string in the source code. The `content` value is your `repository_project_id` (`30871219` in the example image).
550
+
551
+ ![Github Repo owner id](docs/github_user_id.png)
552
+
553
+ 3. For collecting the `repository_id`, search for `octolytics-dimension-repository_id` string in the source code. The `content` value is your `repository_id` (`122059362` in the example image).
554
+
555
+ ![Github Repo id](docs/github_repository_id.png)
556
+
557
+ **Option 2: using github CLI**
558
+
559
+ If you have access to the repository, you can use the following tools to collect the required values:
560
+
561
+ - [gh](https://cli.github.com/)
562
+ - [jq](https://jqlang.github.io/jq/download/)
563
+
564
+ For collecting the `repository_project_id`:
565
+
566
+ ```
567
+ # If your repo URL is https://github.com/lifebit-ai/DeepVariant
568
+ OWNER="lifebit-ai"
569
+ REPO="DeepVariant"
570
+ repository_project_id=$(gh api -H "Accept: application/vnd.github+json" repos/$OWNER/$REPO | jq .owner.id)
571
+ echo $repository_project_id
572
+ 30871219
573
+ ```
574
+
575
+ For collecting the `repository_id`:
576
+
577
+ ```
578
+ # If your repo URL is https://github.com/lifebit-ai/DeepVariant
579
+ OWNER="lifebit-ai"
580
+ REPO="DeepVariant"
581
+ repository_id=$(gh api -H "Accept: application/vnd.github+json" repos/$OWNER/$REPO | jq .id)
582
+ echo $repository_id
583
+ 122059362
584
+ ```
585
+
586
+ **How to get `repository_project_id` from a Bitbucket server repository**
587
+
588
+ For Bitbucket server repositories, only `repository_project_id` is required. To collect it:
589
+
590
+ **Option 1: using the REST API from your browser**
591
+
592
+ 1. Create a REST API URL from your repo URL by adding `/rest/api/latest` to the URL:
593
+
594
+ ```
595
+ Original URL: https://bitbucket.com/projects/MYPROJECT/repos/my-repo
596
+ REST API URL: https://bitbucket.com/rest/api/latest/projects/MYPROJECT/repos/my-repo
597
+ ```
598
+
599
+ > IMPORTANT NOTE: Please, as your repository original URL, do not use the "clone" URL provided by Bitbucket (the one with `.git` extension), use the actual browser URL, removing the terminal `/browse`.
600
+
601
+ 2. Use the REST API URL in a browser and it will generate a JSON output.
602
+
603
+ 3. Your `repository_project_id` is the value of the `project.id` field.
604
+
605
+ ![bitbucket project id](docs/bitbucket_project_id.png)
606
+
607
+ **Option 2: using cURL**
608
+
609
+ If you have access to the repository, you can use the following tools to collect the required value:
610
+
611
+ - [cURL](https://curl.se/)
612
+ - [jq](https://jqlang.github.io/jq/download/)
613
+
614
+ For collecting the `repository_project_id`:
615
+
616
+ ```
617
+ BITBUCKET_TOKEN="xxx"
618
+ repository_project_id=$(curl https://bitbucket.com/rest/api/latest/projects/MYPROJECT/repos/my-repo -H "Authorization: Bearer $BITBUCKET_TOKEN" | jq .project.id)
619
+ echo $repository_project_id
620
+ 1234
621
+ ```
622
+
623
+ #### Usage of the workflow import command
624
+
625
+ To import GitHub workflows to CloudOS, you can use the following command:
626
+
627
+ ```bash
628
+ # Example workflow to import: https://github.com/lifebit-ai/DeepVariant
629
+ WORKFLOW_URL="https://github.com/lifebit-ai/DeepVariant"
630
+
631
+ # You will need the repository_project_id and repository_id values explained above
632
+ REPOSITORY_PROJECT_ID=30871219
633
+ REPOSITORY_ID=122059362
634
+
635
+ cloudos workflow import \
636
+ --cloudos-url $CLOUDOS \
637
+ --apikey $MY_API_KEY \
638
+ --workspace-id $WORKSPACE_ID \
639
+ --workflow-url $WORKFLOW_URL \
640
+ --workflow-name "new_name_for_the_github_workflow" \
641
+ --repository-project-id $REPOSITORY_PROJECT_ID \
642
+ --repository-id $REPOSITORY_ID
643
+ ```
644
+
645
+ The expected output will be:
646
+
647
+ ```console
648
+ CloudOS workflow functionality: list and import workflows.
649
+
650
+ Executing workflow import...
651
+
652
+ [Message] Only Nextflow workflows are currently supported.
653
+
654
+ Workflow test_import_github_3 was imported successfully with the following ID: 6616a8cb454b09bbb3d9dc20
655
+ ```
656
+
657
+ Optionally, you can add a link to your workflow documentation by providing the URL using the parameter `--workflow-docs-link`. E.g.:
658
+
659
+ ```bash
660
+ cloudos workflow import \
661
+ --cloudos-url $CLOUDOS \
662
+ --apikey $MY_API_KEY \
663
+ --workspace-id $WORKSPACE_ID \
664
+ --workflow-url $WORKFLOW_URL \
665
+ --workflow-name "new_name_for_the_github_workflow" \
666
+ --workflow-docs-link "https://github.com/lifebit-ai/DeepVariant/blob/master/README.md" \
667
+ --repository-project-id $REPOSITORY_PROJECT_ID \
668
+ --repository-id $REPOSITORY_ID
669
+ ```
670
+
671
+ To import bitbucket server workflows, `--repository-id` parameter is not required:
672
+
673
+ ```bash
674
+ WORKFLOW_URL="https://bitbucket.com/projects/MYPROJECT/repos/my-repo"
675
+
676
+ # You will need only the repository_project_id
677
+ REPOSITORY_PROJECT_ID=1234
678
+
679
+ cloudos workflow import \
680
+ --cloudos-url $CLOUDOS \
681
+ --apikey $MY_API_KEY \
682
+ --workspace-id $WORKSPACE_ID \
683
+ --workflow-url $WORKFLOW_URL \
684
+ --workflow-name "new_name_for_the_bitbucket_workflow" \
685
+ --repository-project-id $REPOSITORY_PROJECT_ID
686
+ ```
687
+
688
+ > NOTE: please, take into account that importing workflows using cloudos-cli is not yet available in all the CloudOS workspaces. If you try to use this feature in a non-prepared workspace you will get the following error message: `It seems your API key is not authorised. Please check if your workspace has support for importing workflows using cloudos-cli`.
689
+
690
+ #### Get a list of all available projects from a CloudOS workspace
691
+
692
+ Similarly to the `workflows` functionality, you can get a summary of all the available workspace
693
+ projects in two different formats:
694
+ - CSV: this is a table with a minimum predefined set of columns by default, or all the
695
+ available columns using the `--all-fields` parameter.
696
+ - JSON: all the available information from projects, in JSON format.
697
+
698
+ To get a CSV table with all the available projects for a given workspace, use
699
+ the following command:
700
+
701
+ ```bash
702
+ cloudos project list \
703
+ --cloudos-url $CLOUDOS \
704
+ --apikey $MY_API_KEY \
705
+ --workspace-id $WORKSPACE_ID \
706
+ --output-format csv \
707
+ --all-fields
708
+ ```
709
+
710
+ The expected output is something similar to:
711
+
712
+ ```console
713
+ Executing list...
714
+ Workflow list collected with a total of 320 projects.
715
+ Workflow list saved to project_list.csv
716
+ ```
717
+
718
+ #### Run all Curated Workflows with example parameters
719
+
720
+ In "Pipelines" section in CloudOS, there is a special type of workflows called "CURATED PIPELINES & TOOLS". These workflows are
721
+ curated and maintained by our team. Some of them also offer the possibility of testing them using example parameters. We have
722
+ added the following CLI functionality to be able to run all of these curated workflows with example parameters.
723
+
724
+ The following example will launch all the workspace curated workflows with example parameters:
725
+
726
+ ```bash
727
+ cloudos job run-curated-examples \
728
+ --cloudos-url $CLOUDOS \
729
+ --apikey $MY_API_KEY \
730
+ --workspace-id $WORKSPACE_ID \
731
+ --project-name "$PROJECT_NAME"
732
+ ```
733
+
734
+ ```console
735
+ All 39 curated job launched successfully!
736
+ ```
737
+
738
+ You can also wait for all jobs completion and get a final summary of their status using the `--wait-completion` flag:
739
+
740
+ ```bash
741
+ cloudos job run-curated-examples \
742
+ --cloudos-url $CLOUDOS \
743
+ --apikey $MY_API_KEY \
744
+ --workspace-id $WORKSPACE_ID \
745
+ --project-name "$PROJECT_NAME" \
746
+ --wait-completion
747
+ ```
748
+
749
+ >NOTE: currently, this command only runs Nextflow curated workflows.
750
+
751
+ #### Get a list of the available job queues
752
+
753
+ Job queues are required for running jobs using AWS batch executor. The available job queues in your CloudOS workspace are
754
+ listed in the "Compute Resources" section in "Settings".
755
+ You can get a summary of all the available workspace job queues in two formats:
756
+ - CSV: this is a table with a selection of the available job queue information. Alternatively, you can
757
+ get all the information using the `--all-fields` flag.
758
+ - JSON: all the available information from job queues, in JSON format.
759
+
760
+ Example command: getting all available job queues in JSON format.
761
+
762
+ ```bash
763
+ cloudos queue list \
764
+ --cloudos-url $CLOUDOS \
765
+ --apikey $MY_API_KEY \
766
+ --workspace-id $WORKSPACE_ID \
767
+ --output-format json \
768
+ --output-basename "available_queues"
769
+ ```
770
+
771
+ ```
772
+ Executing list...
773
+ Job queue list collected with a total of 5 queues.
774
+ Job queue list saved to available_queues.json
775
+ ```
776
+
777
+ > NOTE: the queue name that is visible in CloudOS and has to be used in combination with `--job-queue` parameter is
778
+ the one in `label` field.
779
+
780
+ ##### Job queues for platform workflows
781
+
782
+ Platform workflows, i.e., those provided by CloudOS in your workspace as modules, run on separated and specific AWS batch queues.
783
+ Therefore, CloudOS will automatically assign the valid queue and the user should not specify any queue using the `--job-queue` paramater.
784
+ Any attempt of using this parameter will be ignored. Examples of such platform workflows are "System Tools" and "Data Factory" workflows.
785
+
786
+ ### WDL pipeline support
787
+
788
+ #### Cromwell server managing
789
+
790
+ In order to run WDL pipelines, a Cromwell server in CloudOS should be running. This server can
791
+ be accessed to check its status, restart it or stop it, using the following commands:
792
+
793
+ ```bash
794
+ # Check Cromwell status
795
+ cloudos cromwell status \
796
+ --cloudos-url $CLOUDOS \
797
+ --apikey $MY_API_KEY \
798
+ --workspace-id $WORKSPACE_ID
799
+ ```
800
+
801
+ ```console
802
+ Executing status...
803
+ Current Cromwell server status is: Stopped
804
+ ```
805
+
806
+ ```bash
807
+ # Cromwell start
808
+ cloudos cromwell start \
809
+ --cloudos-url $CLOUDOS \
810
+ --apikey $MY_API_KEY \
811
+ --workspace-id $WORKSPACE_ID
812
+ ```
813
+
814
+ ```console
815
+ Starting Cromwell server...
816
+ Current Cromwell server status is: Initializing
817
+
818
+ Current Cromwell server status is: Running
819
+ ```
820
+
821
+ ```bash
822
+ # Cromwell stop
823
+ cloudos cromwell stop \
824
+ --cloudos-url $CLOUDOS \
825
+ --apikey $MY_API_KEY \
826
+ --workspace-id $WORKSPACE_ID
827
+ ```
828
+
829
+ ```console
830
+ Stopping Cromwell server...
831
+ Current Cromwell server status is: Stopped
832
+ ```
833
+
834
+ #### Running WDL workflows
835
+
836
+ To run WDL workflows, `cloudos job run` command can be used normally, but adding two extra
837
+ parameters:
838
+
839
+ - `--wdl-mainfile`: name of the mainFile (*.wdl) file used by the CloudOS workflow.
840
+ - `--wdl-importsfile` [Optional]: name of the worfklow imported file (importsFile, *.zip).
841
+
842
+ All the rest of the `cloudos job run` functionality is available.
843
+
844
+ > NOTE: WDL does not support `profiles` and therefore, `--nextflow-profile` option is not
845
+ available. Instead, use `--job-config` and/or `--parameter`. The format of the job config file is
846
+ expected to be the same as for nextflow pipelines.
847
+
848
+ Example of job config file for WDL workflows:
849
+
850
+ ```bash
851
+ params {
852
+ test.hello.name = aasdajdad
853
+ test.bye.nameTwo = asijdadads
854
+ test.number.x = 2
855
+ test.greeter.morning = true
856
+ test.wf_hello_in = bomba
857
+ test.arrayTest = ["lala"]
858
+ test.mapTest = {"some":"props"}
859
+ }
860
+ ```
861
+
862
+ > NOTE: when using `--parameter` option, if the value needs quotes (`"`) you will need to escape them.
863
+ E.g.: `--parameter test.arrayTest=[\"lala\"]`
864
+
865
+ ```bash
866
+ # Configure variables
867
+ MY_API_KEY="xxxxx"
868
+ CLOUDOS="https://cloudos.lifebit.ai"
869
+ WORKSPACE_ID="xxxxx"
870
+ PROJECT_NAME="wdl-test"
871
+ WORKFLOW_NAME="wdl- test"
872
+ MAINFILE="hello.wdl"
873
+ IMPORTSFILE="imports_7mb.zip"
874
+ JOB_PARAMS="cloudos/examples/wdl.config"
875
+
876
+ # Run job
877
+ cloudos job run \
878
+ --cloudos-url $CLOUDOS \
879
+ --apikey $MY_API_KEY \
880
+ --workspace-id $WORKSPACE_ID \
881
+ --project-name $PROJECT_NAME \
882
+ --workflow-name "$WORKFLOW_NAME" \
883
+ --wdl-mainfile $MAINFILE \
884
+ --wdl-importsfile $IMPORTSFILE \
885
+ --job-config $JOB_PARAMS \
886
+ --wait-completion
887
+ ```
888
+
889
+ ```console
890
+ Executing run...
891
+ WDL workflow detected
892
+
893
+ Current Cromwell server status is: Stopped
894
+
895
+ Starting Cromwell server...
896
+
897
+ Current Cromwell server status is: Initializing
898
+
899
+
900
+ Current Cromwell server status is: Running
901
+
902
+ *******************************************************************************
903
+ [WARNING] Cromwell server is now running. Plase, remember to stop it when your
904
+ job finishes. You can use the following command:
905
+ cloudos cromwell stop \
906
+ --cromwell-token $CROMWELL_TOKEN \
907
+ --cloudos-url $CLOUDOS \
908
+ --workspace-id $WORKSPACE_ID
909
+ *******************************************************************************
910
+
911
+ Job successfully launched to CloudOS, please check the following link: ****
912
+ Your assigned job id is: ****
913
+ Please, wait until job completion or max wait time of 3600 seconds is reached.
914
+ Your current job status is: initializing.
915
+ Your current job status is: running.
916
+ Your job took 60 seconds to complete successfully.
917
+ ```
918
+
919
+ ### Import the functionality to your own python scripts
920
+
921
+ To illustrate how to import the package and use its functionality inside
922
+ your own python scripts, we will perform a job submission and check its
923
+ status from inside a python script.
924
+
925
+ Again, we will set up the environment to ease the work:
926
+
927
+ ```python
928
+ import cloudos_cli.jobs.job as jb
929
+ import json
930
+
931
+
932
+ # GLOBAL VARS.
933
+ apikey = 'xxxxx'
934
+ cloudos_url = 'https://cloudos.lifebit.ai'
935
+ workspace_id = 'xxxxx'
936
+ project_name = 'API jobs'
937
+ workflow_name = 'rnatoy'
938
+ job_config = 'cloudos/examples/rnatoy.config'
939
+ ```
940
+
941
+ First, create the `Job` object:
942
+
943
+ ```python
944
+ j = jb.Job(cloudos_url, apikey, None, workspace_id, project_name, workflow_name)
945
+ print(j)
946
+ ```
947
+
948
+ Then, send the job:
949
+
950
+ ```python
951
+ j_id = j.send_job(job_config)
952
+ ```
953
+
954
+ To check the status:
955
+
956
+ ```python
957
+ j_status = j.get_job_status(j_id)
958
+ j_status_h = json.loads(j_status.content)["status"]
959
+ print(j_status_h)
960
+ ```
961
+
962
+ The status will change while your job progresses, so to check again just
963
+ repeat the above code.
964
+
965
+ You can also collect your last 30 submitted jobs for a given workspace using the
966
+ following command.
967
+
968
+ ```python
969
+ my_jobs_r = j.get_job_list(workspace_id)
970
+ my_jobs = j.process_job_list(my_jobs_r)
971
+ print(my_jobs)
972
+ ```
973
+
974
+ Or inspect all the available workflows for a given workspace using the
975
+ following command.
976
+
977
+ ```python
978
+ my_workflows_r = j.get_workflow_list(workspace_id)
979
+ my_workflows = j.process_workflow_list(my_workflows_r)
980
+ print(my_workflows)
981
+ ```
982
+
983
+ Similarly, you can inspect all the available projects for a given workspace using the
984
+ following command.
985
+
986
+ ```python
987
+ my_projects_r = j.get_project_list(workspace_id)
988
+ my_projects = j.process_project_list(my_projects_r)
989
+ print(my_projects)
990
+ ```
991
+
992
+ #### Running WDL pipelines using your own scripts
993
+
994
+ You can even run WDL pipelines. First check the Cromwell server status and restart it if Stopped:
995
+
996
+ ```python
997
+ import cloudos_cli.clos as cl
998
+ import cloudos_cli.jobs.job as jb
999
+ import json
1000
+
1001
+
1002
+ # GLOBAL VARS.
1003
+ apikey = 'xxxxx'
1004
+ cloudos_url = 'https://cloudos.lifebit.ai'
1005
+ workspace_id = 'xxxxx'
1006
+ project_name = 'wdl-test'
1007
+ workflow_name = 'wdl- test'
1008
+ mainfile = 'hello.wdl'
1009
+ importsfile = 'imports_7mb.zip'
1010
+ job_config = 'cloudos/examples/wdl.config'
1011
+
1012
+ # First create cloudos object
1013
+ cl = cl.Cloudos(cloudos_url, apikey, None)
1014
+
1015
+ # Then, check Cromwell status
1016
+ c_status = cl.get_cromwell_status(workspace_id)
1017
+ c_status_h = json.loads(c_status.content)["status"]
1018
+ print(c_status_h)
1019
+
1020
+ # Start Cromwell server
1021
+ cl.cromwell_switch(workspace_id, 'restart')
1022
+
1023
+ # Check again Cromwell status (wait until status: 'Running')
1024
+ c_status = cl.get_cromwell_status(workspace_id)
1025
+ c_status_h = json.loads(c_status.content)["status"]
1026
+ print(c_status_h)
1027
+
1028
+ # Send a job (wait until job has status: 'Completed')
1029
+ j = jb.Job(cloudos_url, apikey, None, workspace_id, project_name, workflow_name, True, mainfile,
1030
+ importsfile)
1031
+ j_id = j.send_job(job_config, workflow_type='wdl', cromwell_id=json.loads(c_status.content)["_id"])
1032
+ j_status = j.get_job_status(j_id)
1033
+ j_status_h = json.loads(j_status.content)["status"]
1034
+ print(j_status_h)
1035
+
1036
+ # Stop Cromwell server
1037
+ cl.cromwell_switch(workspace_id, 'stop')
1038
+
1039
+ # Check again Cromwell status
1040
+ c_status = cl.get_cromwell_status(workspace_id)
1041
+ c_status_h = json.loads(c_status.content)["status"]
1042
+ print(c_status_h)
1043
+ ```
1044
+
1045
+ ### unit testing
1046
+
1047
+ Unit tests require 4 additional packages:
1048
+
1049
+ ```
1050
+ pytest>=6.2.5
1051
+ requests-mock>=1.9.3
1052
+ responses>=0.21.0
1053
+ mock>=3.0.5
1054
+ ```
1055
+
1056
+ Command to run tests from the `cloudos-cli` main folder:
1057
+
1058
+ ```
1059
+ python -m pytest -s -v
1060
+ ```