cloudos-cli 2.36.0__tar.gz → 2.37.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/PKG-INFO +46 -1
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/README.md +45 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/__main__.py +20 -33
- cloudos_cli-2.37.0/cloudos_cli/_version.py +1 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/jobs/job.py +98 -8
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/utils/__init__.py +2 -1
- cloudos_cli-2.37.0/cloudos_cli/utils/array_job.py +254 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli.egg-info/PKG-INFO +46 -1
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli.egg-info/SOURCES.txt +1 -0
- cloudos_cli-2.36.0/cloudos_cli/_version.py +0 -1
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/LICENSE +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/clos.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/configure/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/configure/configure.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/datasets/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/datasets/datasets.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/import_wf/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/import_wf/import_wf.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/jobs/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/queue/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/queue/queue.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/utils/cloud.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/utils/details.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/utils/errors.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/utils/requests.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli/utils/resources.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli.egg-info/dependency_links.txt +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli.egg-info/entry_points.txt +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli.egg-info/requires.txt +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/cloudos_cli.egg-info/top_level.txt +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/setup.cfg +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/setup.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/tests/__init__.py +0 -0
- {cloudos_cli-2.36.0 → cloudos_cli-2.37.0}/tests/functions_for_pytest.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloudos_cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.37.0
|
|
4
4
|
Summary: Python package for interacting with CloudOS
|
|
5
5
|
Home-page: https://github.com/lifebit-ai/cloudos-cli
|
|
6
6
|
Author: David Piñeyro
|
|
@@ -512,6 +512,51 @@ This assumes the interpreter is available on the container’s $PATH. If not, yo
|
|
|
512
512
|
|
|
513
513
|
These options provide flexibility for configuring and running bash array jobs, allowing to tailor the execution for specific requirements.
|
|
514
514
|
|
|
515
|
+
#### Use multiple projects for files in `--parameter` option
|
|
516
|
+
|
|
517
|
+
The option `--parameter`, could specify a file input located in a different project than option `--project-name`. The files can only be located inside the project's `Data` subfolder, not `Cohorts` or `Analyses Results`. The accepted structures for different parameter projects are:
|
|
518
|
+
- `-p/--parameter "--file=<project>/Data/file.txt"`
|
|
519
|
+
- `-p/--parameter "--file=<project>/Data/subfolder/file.txt"`
|
|
520
|
+
- `-p/--parameter "--file=Data/subfolder/file.txt"` (the same project as `--project-name`)
|
|
521
|
+
- `-p/--parameter "--file=<project>/Data/subfolder/*.txt"`
|
|
522
|
+
- `-p/--parameter "--file=<project>/Data/*.txt"`
|
|
523
|
+
- `-p/--parameter "--file=Data/*.txt"` (the same project as `--project-name`)
|
|
524
|
+
|
|
525
|
+
The project, should be specified at the beginning of the file path. For example:
|
|
526
|
+
|
|
527
|
+
```console
|
|
528
|
+
cloudos bash array-job \
|
|
529
|
+
-p file=Data/input.csv
|
|
530
|
+
...
|
|
531
|
+
```
|
|
532
|
+
This will point to the global project, specified with `--project-name`. In contrast:
|
|
533
|
+
|
|
534
|
+
```console
|
|
535
|
+
cloudos bash array-job \
|
|
536
|
+
-p data=Data/input.csv
|
|
537
|
+
-p exp=PROJECT_EXPRESSION/Data/input.csv \
|
|
538
|
+
--project-name "ADIPOSE"
|
|
539
|
+
...
|
|
540
|
+
```
|
|
541
|
+
for parameter `exp` it will point to a project named `PROJECT_EXPRESSION` in the File Explorer, and `data` parameter will be found in the global project `ADIPOSE`.
|
|
542
|
+
|
|
543
|
+
Apart from files, the parameter can also take glob patterns, for example:
|
|
544
|
+
|
|
545
|
+
```console
|
|
546
|
+
cloudos bash array-job \
|
|
547
|
+
-p data=Data/input.csv
|
|
548
|
+
-p exp="PROJECT_EXPRESSION/Data/*.csv" \
|
|
549
|
+
--project-name "ADIPOSE"
|
|
550
|
+
...
|
|
551
|
+
```
|
|
552
|
+
will take all `csv` file extensions in the specified folder.
|
|
553
|
+
|
|
554
|
+
> [!NOTE]
|
|
555
|
+
> When specifying glob patterns, depending on the terminal is best to add it in double quotes to avoid the terminal searching for the glob pattern locally, e.g. `-p exp="PROJECT_EXPRESSION/Data/*.csv"`.
|
|
556
|
+
|
|
557
|
+
> [!NOTE]
|
|
558
|
+
> Project names in the `--parameter` option can start with either forward slash `/` or without. The following are the same `-p data=/PROJECT1/Data/input.csv` and `-p data=PROJECT1/Data/input.csv`.
|
|
559
|
+
|
|
515
560
|
#### Get path to logs of job from CloudOS
|
|
516
561
|
|
|
517
562
|
Get the path to "Nextflow logs", "Nextflow standard output", and "trace" files. It can be used only on your user's jobs, with any status.
|
|
@@ -477,6 +477,51 @@ This assumes the interpreter is available on the container’s $PATH. If not, yo
|
|
|
477
477
|
|
|
478
478
|
These options provide flexibility for configuring and running bash array jobs, allowing to tailor the execution for specific requirements.
|
|
479
479
|
|
|
480
|
+
#### Use multiple projects for files in `--parameter` option
|
|
481
|
+
|
|
482
|
+
The option `--parameter`, could specify a file input located in a different project than option `--project-name`. The files can only be located inside the project's `Data` subfolder, not `Cohorts` or `Analyses Results`. The accepted structures for different parameter projects are:
|
|
483
|
+
- `-p/--parameter "--file=<project>/Data/file.txt"`
|
|
484
|
+
- `-p/--parameter "--file=<project>/Data/subfolder/file.txt"`
|
|
485
|
+
- `-p/--parameter "--file=Data/subfolder/file.txt"` (the same project as `--project-name`)
|
|
486
|
+
- `-p/--parameter "--file=<project>/Data/subfolder/*.txt"`
|
|
487
|
+
- `-p/--parameter "--file=<project>/Data/*.txt"`
|
|
488
|
+
- `-p/--parameter "--file=Data/*.txt"` (the same project as `--project-name`)
|
|
489
|
+
|
|
490
|
+
The project, should be specified at the beginning of the file path. For example:
|
|
491
|
+
|
|
492
|
+
```console
|
|
493
|
+
cloudos bash array-job \
|
|
494
|
+
-p file=Data/input.csv
|
|
495
|
+
...
|
|
496
|
+
```
|
|
497
|
+
This will point to the global project, specified with `--project-name`. In contrast:
|
|
498
|
+
|
|
499
|
+
```console
|
|
500
|
+
cloudos bash array-job \
|
|
501
|
+
-p data=Data/input.csv
|
|
502
|
+
-p exp=PROJECT_EXPRESSION/Data/input.csv \
|
|
503
|
+
--project-name "ADIPOSE"
|
|
504
|
+
...
|
|
505
|
+
```
|
|
506
|
+
for parameter `exp` it will point to a project named `PROJECT_EXPRESSION` in the File Explorer, and `data` parameter will be found in the global project `ADIPOSE`.
|
|
507
|
+
|
|
508
|
+
Apart from files, the parameter can also take glob patterns, for example:
|
|
509
|
+
|
|
510
|
+
```console
|
|
511
|
+
cloudos bash array-job \
|
|
512
|
+
-p data=Data/input.csv
|
|
513
|
+
-p exp="PROJECT_EXPRESSION/Data/*.csv" \
|
|
514
|
+
--project-name "ADIPOSE"
|
|
515
|
+
...
|
|
516
|
+
```
|
|
517
|
+
will take all `csv` file extensions in the specified folder.
|
|
518
|
+
|
|
519
|
+
> [!NOTE]
|
|
520
|
+
> When specifying glob patterns, depending on the terminal is best to add it in double quotes to avoid the terminal searching for the glob pattern locally, e.g. `-p exp="PROJECT_EXPRESSION/Data/*.csv"`.
|
|
521
|
+
|
|
522
|
+
> [!NOTE]
|
|
523
|
+
> Project names in the `--parameter` option can start with either forward slash `/` or without. The following are the same `-p data=/PROJECT1/Data/input.csv` and `-p data=PROJECT1/Data/input.csv`.
|
|
524
|
+
|
|
480
525
|
#### Get path to logs of job from CloudOS
|
|
481
526
|
|
|
482
527
|
Get the path to "Nextflow logs", "Nextflow standard output", and "trace" files. It can be used only on your user's jobs, with any status.
|
|
@@ -16,6 +16,7 @@ from rich.table import Table
|
|
|
16
16
|
from cloudos_cli.datasets import Datasets
|
|
17
17
|
from cloudos_cli.utils.resources import ssl_selector, format_bytes
|
|
18
18
|
from rich.style import Style
|
|
19
|
+
from cloudos_cli.utils.array_job import generate_datasets_for_project
|
|
19
20
|
from cloudos_cli.utils.details import get_path
|
|
20
21
|
|
|
21
22
|
|
|
@@ -2159,7 +2160,7 @@ def run_bash_job(ctx,
|
|
|
2159
2160
|
hpc_id=None,
|
|
2160
2161
|
cost_limit=cost_limit,
|
|
2161
2162
|
verify=verify_ssl,
|
|
2162
|
-
command=command,
|
|
2163
|
+
command={"command": command},
|
|
2163
2164
|
cpus=cpus,
|
|
2164
2165
|
memory=memory)
|
|
2165
2166
|
|
|
@@ -2219,7 +2220,12 @@ def run_bash_job(ctx,
|
|
|
2219
2220
|
help=('A single parameter to pass to the job call. It should be in the ' +
|
|
2220
2221
|
'following form: parameter_name=parameter_value. E.g.: ' +
|
|
2221
2222
|
'-p --test=value or -p -test=value or -p test=value. You can use this option as many ' +
|
|
2222
|
-
'times as parameters you want to include.'
|
|
2223
|
+
'times as parameters you want to include. ' +
|
|
2224
|
+
'For parameters pointing to a file, the format expected is ' +
|
|
2225
|
+
'parameter_name=<project>/Data/parameter_value. The parameter value must be a ' +
|
|
2226
|
+
'file located in the `Data` subfolder. If no <project> is specified, it defaults to ' +
|
|
2227
|
+
'the project specified by the profile or --project-name parameter. ' +
|
|
2228
|
+
'E.g.: -p "--file=Data/file.txt" or "--file=<project>/Data/folder/file.txt"'))
|
|
2223
2229
|
@click.option('--job-name',
|
|
2224
2230
|
help='The name of the job. Default=new_job.',
|
|
2225
2231
|
default='new_job')
|
|
@@ -2405,35 +2411,6 @@ def run_bash_array_job(ctx,
|
|
|
2405
2411
|
"|": { "api": "%7C", "file": "|" }
|
|
2406
2412
|
}
|
|
2407
2413
|
|
|
2408
|
-
# Setup datasets
|
|
2409
|
-
try:
|
|
2410
|
-
ds = Datasets(
|
|
2411
|
-
cloudos_url=cloudos_url,
|
|
2412
|
-
apikey=apikey,
|
|
2413
|
-
workspace_id=workspace_id,
|
|
2414
|
-
project_name=array_file_project,
|
|
2415
|
-
verify=verify_ssl,
|
|
2416
|
-
cromwell_token=None
|
|
2417
|
-
)
|
|
2418
|
-
if custom_script_project is not None:
|
|
2419
|
-
# If a custom script project is specified, create a new Datasets object for it
|
|
2420
|
-
# This allows the user to run custom scripts in a different project
|
|
2421
|
-
ds_custom = Datasets(
|
|
2422
|
-
cloudos_url=cloudos_url,
|
|
2423
|
-
apikey=apikey,
|
|
2424
|
-
workspace_id=workspace_id,
|
|
2425
|
-
project_name=custom_script_project,
|
|
2426
|
-
verify=verify_ssl,
|
|
2427
|
-
cromwell_token=None
|
|
2428
|
-
)
|
|
2429
|
-
except BadRequestException as e:
|
|
2430
|
-
if 'Forbidden' in str(e):
|
|
2431
|
-
print('[Error] It seems your call is not authorised. Please check if ' +
|
|
2432
|
-
'your workspace is restricted by Airlock and if your API key is valid.')
|
|
2433
|
-
sys.exit(1)
|
|
2434
|
-
else:
|
|
2435
|
-
raise e
|
|
2436
|
-
|
|
2437
2414
|
# setup important options for the job
|
|
2438
2415
|
if do_not_save_logs:
|
|
2439
2416
|
save_logs = False
|
|
@@ -2453,7 +2430,12 @@ def run_bash_array_job(ctx,
|
|
|
2453
2430
|
repository_platform=repository_platform, verify=verify_ssl)
|
|
2454
2431
|
|
|
2455
2432
|
# retrieve columns
|
|
2456
|
-
r = j.retrieve_cols_from_array_file(
|
|
2433
|
+
r = j.retrieve_cols_from_array_file(
|
|
2434
|
+
array_file,
|
|
2435
|
+
generate_datasets_for_project(cloudos_url, apikey, workspace_id, project_name, verify_ssl),
|
|
2436
|
+
separators[separator]['api'],
|
|
2437
|
+
verify_ssl
|
|
2438
|
+
)
|
|
2457
2439
|
|
|
2458
2440
|
if not disable_column_check:
|
|
2459
2441
|
columns = json.loads(r.content).get("headers", None)
|
|
@@ -2470,7 +2452,12 @@ def run_bash_array_job(ctx,
|
|
|
2470
2452
|
columns = []
|
|
2471
2453
|
|
|
2472
2454
|
# setup parameters for the job
|
|
2473
|
-
cmd = j.setup_params_array_file(
|
|
2455
|
+
cmd = j.setup_params_array_file(
|
|
2456
|
+
custom_script_path,
|
|
2457
|
+
generate_datasets_for_project(cloudos_url, apikey, workspace_id, custom_script_project, verify_ssl),
|
|
2458
|
+
command,
|
|
2459
|
+
separators[separator]['file']
|
|
2460
|
+
)
|
|
2474
2461
|
|
|
2475
2462
|
# check columns in the array file vs parameters added
|
|
2476
2463
|
if not disable_column_check and array_parameter:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.37.0'
|
|
@@ -10,6 +10,8 @@ from cloudos_cli.utils.errors import BadRequestException
|
|
|
10
10
|
from cloudos_cli.utils.requests import retry_requests_post, retry_requests_get
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
import base64
|
|
13
|
+
from cloudos_cli.utils.array_job import classify_pattern, get_file_or_folder_id, extract_project
|
|
14
|
+
import os
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
@dataclass
|
|
@@ -382,14 +384,8 @@ class Job(Cloudos):
|
|
|
382
384
|
p_name = p_split[0]
|
|
383
385
|
p_value = '='.join(p_split[1:])
|
|
384
386
|
if workflow_type == 'docker':
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
parameter_kind = "textValue"
|
|
388
|
-
param = {"prefix": prefix,
|
|
389
|
-
"name": p_name.lstrip('-'),
|
|
390
|
-
"parameterKind": parameter_kind,
|
|
391
|
-
"textValue": p_value}
|
|
392
|
-
workflow_params.append(param)
|
|
387
|
+
# will differentiate between text, data items and glob patterns
|
|
388
|
+
workflow_params.append(self.docker_workflow_param_processing(p, self.project_name))
|
|
393
389
|
elif workflow_type == 'wdl':
|
|
394
390
|
param = {"prefix": "",
|
|
395
391
|
"name": p_name,
|
|
@@ -834,3 +830,97 @@ class Job(Cloudos):
|
|
|
834
830
|
}
|
|
835
831
|
|
|
836
832
|
return ap_param
|
|
833
|
+
|
|
834
|
+
def docker_workflow_param_processing(self, param, project_name):
|
|
835
|
+
"""
|
|
836
|
+
Processes a Docker workflow parameter and determines its type and associated metadata.
|
|
837
|
+
|
|
838
|
+
Parameters
|
|
839
|
+
----------
|
|
840
|
+
param : str
|
|
841
|
+
The parameter string in the format '--param_name=value'.
|
|
842
|
+
It can represent a file path, a glob pattern, or a simple text value.
|
|
843
|
+
project_name : str
|
|
844
|
+
The name of the current project to use if no specific project is extracted from the parameter.
|
|
845
|
+
|
|
846
|
+
Returns:
|
|
847
|
+
dict: A dictionary containing the processed parameter details. The structure of the dictionary depends on the type of the parameter:
|
|
848
|
+
- For glob patterns:
|
|
849
|
+
{
|
|
850
|
+
"name": str, # Parameter name without leading dashes.
|
|
851
|
+
"prefix": str, # Prefix ('--' or '-') based on the parameter format.
|
|
852
|
+
"globPattern": str, # The glob pattern extracted from the parameter.
|
|
853
|
+
"parameterKind": str, # Always "globPattern".
|
|
854
|
+
"folder": str # Folder ID associated with the glob pattern.
|
|
855
|
+
- For file paths:
|
|
856
|
+
{
|
|
857
|
+
"name": str, # Parameter name without leading dashes.
|
|
858
|
+
"prefix": str, # Prefix ('--' or '-') based on the parameter format.
|
|
859
|
+
"parameterKind": str, # Always "dataItem".
|
|
860
|
+
"dataItem": {
|
|
861
|
+
"kind": str, # Always "File".
|
|
862
|
+
"item": str # File ID associated with the file path.
|
|
863
|
+
- For text values:
|
|
864
|
+
{
|
|
865
|
+
"name": str, # Parameter name without leading dashes.
|
|
866
|
+
"prefix": str, # Prefix ('--' or '-') based on the parameter format.
|
|
867
|
+
"parameterKind": str, # Always "textValue".
|
|
868
|
+
"textValue": str # The text value extracted from the parameter.
|
|
869
|
+
|
|
870
|
+
Notes
|
|
871
|
+
-----
|
|
872
|
+
- The function uses helper methods `extract_project`, `classify_pattern`, and `get_file_or_folder_id` to process the parameter.
|
|
873
|
+
- If the parameter represents a file path or glob pattern, the function retrieves the corresponding file or folder ID from the cloud workspace.
|
|
874
|
+
- If the parameter does not match any specific pattern or file extension, it is treated as a simple text value.
|
|
875
|
+
"""
|
|
876
|
+
|
|
877
|
+
# split '--param_name=example_test'
|
|
878
|
+
# name -> '--param_name'
|
|
879
|
+
# rest -> 'example_test'
|
|
880
|
+
name, rest = param.split('=', 1)
|
|
881
|
+
|
|
882
|
+
# e.g. "/Project/Subproject/file.csv", project is "Project"
|
|
883
|
+
# e.g "Data/input.csv", project is '', leaving the global project name
|
|
884
|
+
# e.g "-p --test=value", project is ''
|
|
885
|
+
project, file_path = extract_project(rest)
|
|
886
|
+
current_project = project if project != '' else project_name
|
|
887
|
+
|
|
888
|
+
# e.g. "/Project/Subproject/file.csv"
|
|
889
|
+
command_path = Path(file_path)
|
|
890
|
+
command_dir = str(command_path.parent)
|
|
891
|
+
command_name = command_path.name
|
|
892
|
+
_, ext = os.path.splitext(command_name)
|
|
893
|
+
prefix = "--" if name.startswith('--') else ("-" if name.startswith('-') else "")
|
|
894
|
+
if classify_pattern(rest) in ["regex", "glob"]:
|
|
895
|
+
if not (file_path.startswith('/Data') or file_path.startswith('Data')):
|
|
896
|
+
raise ValueError("[ERROR] The file path inside the project must start with '/Data' or 'Data'. ")
|
|
897
|
+
|
|
898
|
+
folder = get_file_or_folder_id(self.cloudos_url, self.apikey, self.workspace_id, current_project, self.verify, command_dir, command_name, is_file=False)
|
|
899
|
+
return {
|
|
900
|
+
"name": f"{name.lstrip('-')}",
|
|
901
|
+
"prefix": f"{prefix}",
|
|
902
|
+
'globPattern': command_name,
|
|
903
|
+
"parameterKind": "globPattern",
|
|
904
|
+
"folder": f"{folder}"
|
|
905
|
+
}
|
|
906
|
+
elif ext:
|
|
907
|
+
if not (file_path.startswith('/Data') or file_path.startswith('Data')):
|
|
908
|
+
raise ValueError("[ERROR] The file path inside the project must start with '/Data' or 'Data'. ")
|
|
909
|
+
|
|
910
|
+
file = get_file_or_folder_id(self.cloudos_url, self.apikey, self.workspace_id, current_project, self.verify, command_dir, command_name, is_file=True)
|
|
911
|
+
return {
|
|
912
|
+
"name": f"{name.lstrip('-')}",
|
|
913
|
+
"prefix": f"{prefix}",
|
|
914
|
+
"parameterKind": "dataItem",
|
|
915
|
+
"dataItem": {
|
|
916
|
+
"kind": "File",
|
|
917
|
+
"item": f"{file}"
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
else:
|
|
921
|
+
return {
|
|
922
|
+
"name": f"{name.lstrip('-')}",
|
|
923
|
+
"prefix": f"{prefix}",
|
|
924
|
+
"parameterKind": "textValue",
|
|
925
|
+
"textValue": f"{rest}"
|
|
926
|
+
}
|
|
@@ -7,6 +7,7 @@ from .requests import retry_requests_get, retry_requests_post, retry_requests_pu
|
|
|
7
7
|
from .resources import format_bytes, ssl_selector
|
|
8
8
|
from .cloud import find_cloud
|
|
9
9
|
from .cloud import find_cloud
|
|
10
|
+
from .array_job import is_valid_regex, is_glob_pattern, is_probably_regex, classify_pattern, generate_datasets_for_project, get_file_or_folder_id
|
|
10
11
|
from .details import get_path
|
|
11
12
|
|
|
12
|
-
__all__ = ['errors', 'requests', 'resources', 'cloud', 'details']
|
|
13
|
+
__all__ = ['errors', 'requests', 'resources', 'cloud', 'details', 'array_job']
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import sys
|
|
3
|
+
from cloudos_cli.utils.errors import BadRequestException
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def is_valid_regex(s):
|
|
7
|
+
"""
|
|
8
|
+
Validates whether the given string is a valid regular expression.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
s : str
|
|
13
|
+
The string to be checked as a regular expression.
|
|
14
|
+
|
|
15
|
+
Returns
|
|
16
|
+
-------
|
|
17
|
+
bool
|
|
18
|
+
True if the string is a valid regular expression, False otherwise.
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
re.compile(s)
|
|
22
|
+
return True
|
|
23
|
+
except re.error:
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
def is_glob_pattern(s):
|
|
27
|
+
"""
|
|
28
|
+
Check if a given string contains glob pattern characters.
|
|
29
|
+
|
|
30
|
+
Glob patterns are commonly used for filename matching and include
|
|
31
|
+
special characters such as '*', '?', and '['.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
s : str
|
|
36
|
+
The string to check for glob pattern characters.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
bool
|
|
41
|
+
True if the string contains any glob pattern characters, otherwise False.
|
|
42
|
+
"""
|
|
43
|
+
return any(char in s for char in "*?[")
|
|
44
|
+
|
|
45
|
+
def is_probably_regex(s):
|
|
46
|
+
"""
|
|
47
|
+
Determines if a given string is likely a regular expression.
|
|
48
|
+
|
|
49
|
+
This function checks whether the input string matches common patterns
|
|
50
|
+
that are indicative of regular expressions. It first validates the
|
|
51
|
+
string using `is_valid_regex(s)` and then searches for specific regex
|
|
52
|
+
indicators such as quantifiers, character classes, anchors, and
|
|
53
|
+
alternation.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
s : str
|
|
58
|
+
The string to evaluate.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
bool
|
|
63
|
+
True if the string is likely a regular expression, False otherwise.
|
|
64
|
+
|
|
65
|
+
Notes
|
|
66
|
+
-----
|
|
67
|
+
The function assumes the existence of `is_valid_regex(s)` which
|
|
68
|
+
validates whether the input string is a valid regex.
|
|
69
|
+
"""
|
|
70
|
+
if not is_valid_regex(s):
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
# Patterns that usually indicate actual regex use (not just file names)
|
|
74
|
+
regex_indicators = [
|
|
75
|
+
r"\.\*", r"\.\+", r"\\[dws]", r"\[[^\]]+\]", r"\([^\)]+\)",
|
|
76
|
+
r"\{\d+(,\d*)?\}", r"\^", r"\$", r"\|"
|
|
77
|
+
]
|
|
78
|
+
return any(re.search(pat, s) for pat in regex_indicators)
|
|
79
|
+
|
|
80
|
+
def classify_pattern(s):
|
|
81
|
+
"""
|
|
82
|
+
Classifies a given string pattern into one of three categories: "regex", "glob", or "exact".
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
s : str
|
|
87
|
+
The string pattern to classify.
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
str: A string indicating the type of pattern:
|
|
92
|
+
- "regex" if the pattern is likely a regular expression.
|
|
93
|
+
- "glob" if the pattern matches glob-style syntax.
|
|
94
|
+
- "exact" if the pattern does not match regex or glob syntax.
|
|
95
|
+
"""
|
|
96
|
+
if is_probably_regex(s):
|
|
97
|
+
return "regex"
|
|
98
|
+
elif is_glob_pattern(s):
|
|
99
|
+
return "glob"
|
|
100
|
+
else:
|
|
101
|
+
return "exact"
|
|
102
|
+
|
|
103
|
+
def generate_datasets_for_project(cloudos_url, apikey, workspace_id, project_name, verify_ssl):
|
|
104
|
+
"""
|
|
105
|
+
Generate datasets for a specified project in a CloudOS workspace.
|
|
106
|
+
|
|
107
|
+
This function initializes a `Datasets` object for the given project and handles
|
|
108
|
+
potential errors such as missing project elements or unauthorized API calls.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
cloudos_url : str
|
|
113
|
+
The URL of the CloudOS instance.
|
|
114
|
+
apikey : str
|
|
115
|
+
The API key for authentication.
|
|
116
|
+
workspace_id : str
|
|
117
|
+
The ID of the workspace where the project resides.
|
|
118
|
+
project_name : str
|
|
119
|
+
The name of the project for which datasets are generated.
|
|
120
|
+
verify_ssl : bool
|
|
121
|
+
Whether to verify SSL certificates during API calls.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
Datasets
|
|
126
|
+
An instance of the `Datasets` class initialized for the specified project.
|
|
127
|
+
|
|
128
|
+
Raises
|
|
129
|
+
------
|
|
130
|
+
ValueError
|
|
131
|
+
If the specified project is not found in the workspace.
|
|
132
|
+
BadRequestException
|
|
133
|
+
If the API call is unauthorized or encounters other issues.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
# this avoids circular import error if import is added at the top
|
|
137
|
+
from cloudos_cli.datasets import Datasets
|
|
138
|
+
try:
|
|
139
|
+
ds = Datasets(
|
|
140
|
+
cloudos_url=cloudos_url,
|
|
141
|
+
apikey=apikey,
|
|
142
|
+
workspace_id=workspace_id,
|
|
143
|
+
project_name=project_name,
|
|
144
|
+
verify=verify_ssl,
|
|
145
|
+
cromwell_token=None
|
|
146
|
+
)
|
|
147
|
+
except ValueError:
|
|
148
|
+
print(f"[ERROR] No {project_name} element in projects was found")
|
|
149
|
+
sys.exit(1)
|
|
150
|
+
|
|
151
|
+
except BadRequestException as e:
|
|
152
|
+
if 'Forbidden' in str(e):
|
|
153
|
+
print('[Error] It seems your call is not authorised. Please check if ' +
|
|
154
|
+
'your workspace is restricted by Airlock and if your API key is valid.')
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
else:
|
|
157
|
+
raise e
|
|
158
|
+
|
|
159
|
+
return ds
|
|
160
|
+
|
|
161
|
+
def get_file_or_folder_id(cloudos_url, apikey, workspace_id, project_name, verify_ssl, command_dir, command_name, is_file=True):
|
|
162
|
+
"""Retrieve the ID of a specific file or folder within a CloudOS workspace.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
cloudos_url : str
|
|
167
|
+
The base URL of the CloudOS API.
|
|
168
|
+
apikey : str
|
|
169
|
+
The API key for authenticating requests to the CloudOS API.
|
|
170
|
+
workspace_id : str
|
|
171
|
+
The ID of the workspace containing the project.
|
|
172
|
+
project_name : str
|
|
173
|
+
The name of the project within the workspace.
|
|
174
|
+
verify_ssl : bool
|
|
175
|
+
Whether to verify SSL certificates for the API requests.
|
|
176
|
+
name : str
|
|
177
|
+
The name of the file or folder whose ID is to be retrieved.
|
|
178
|
+
is_file : bool, optional
|
|
179
|
+
Whether to retrieve a file ID (True) or folder ID (False). Default is True.
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
str: The ID of the specified file or folder.
|
|
184
|
+
|
|
185
|
+
Raises
|
|
186
|
+
------
|
|
187
|
+
ValueError
|
|
188
|
+
If the specified file or folder is not found.
|
|
189
|
+
Exception
|
|
190
|
+
If there is an error during the API interaction or data retrieval.
|
|
191
|
+
|
|
192
|
+
Notes
|
|
193
|
+
-----
|
|
194
|
+
- This function uses the `generate_datasets_for_project` function to create a Datasets object for the specified project.
|
|
195
|
+
- The `list_folder_content` method is used for files, and `list_project_content` is used for folders.
|
|
196
|
+
- The function assumes that the IDs are stored in the `"_id"` field of the metadata.
|
|
197
|
+
"""
|
|
198
|
+
# create a Datasets() class
|
|
199
|
+
ds = generate_datasets_for_project(cloudos_url, apikey, workspace_id, project_name, verify_ssl)
|
|
200
|
+
|
|
201
|
+
if is_file:
|
|
202
|
+
# get all files from a folder
|
|
203
|
+
content = ds.list_folder_content(command_dir)
|
|
204
|
+
for file in content['files']:
|
|
205
|
+
if file.get("name") == command_name:
|
|
206
|
+
return file.get("_id", '')
|
|
207
|
+
raise ValueError(f"File '{command_name}' not found in directory '{command_dir}'.")
|
|
208
|
+
else:
|
|
209
|
+
# get all folders from the project
|
|
210
|
+
# check if the command_dir has a sub-folder
|
|
211
|
+
if len(command_dir.split("/")) > 1:
|
|
212
|
+
# get the first folder which is just below the project
|
|
213
|
+
folders = ds.list_folder_content(command_dir.split("/")[0])
|
|
214
|
+
# use the last folder as is listed in the first folder
|
|
215
|
+
folder_to_search = command_dir.split("/")[-1]
|
|
216
|
+
else:
|
|
217
|
+
folders = ds.list_project_content()
|
|
218
|
+
folder_to_search = command_dir
|
|
219
|
+
|
|
220
|
+
for folder in folders['folders']:
|
|
221
|
+
if folder.get("name") == folder_to_search:
|
|
222
|
+
return folder.get("_id", '')
|
|
223
|
+
raise ValueError(f"Folder '{folder_to_search}' not found in project.")
|
|
224
|
+
|
|
225
|
+
def extract_project(path):
|
|
226
|
+
"""
|
|
227
|
+
Extracts the project name and the remaining path from a given file path.
|
|
228
|
+
|
|
229
|
+
The function assumes that a "project" exists if the path contains at least three parts
|
|
230
|
+
when split by slashes. If the path has fewer than three parts, the project name is
|
|
231
|
+
considered empty, and the entire path is returned as the remaining path.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
path : str
|
|
236
|
+
The file path to process.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
tuple: A tuple containing:
|
|
241
|
+
- str: The project name (empty string if no project exists).
|
|
242
|
+
- str: The remaining path after the project name.
|
|
243
|
+
"""
|
|
244
|
+
# Strip slashes and split the path
|
|
245
|
+
parts = path.strip("/").split("/")
|
|
246
|
+
# A "project" exists only if there are at least 3 parts
|
|
247
|
+
# globs needs more than 3 parts i.e. PROJECT/Data/Downloads/*.csv
|
|
248
|
+
if (len(parts) >= 3 and not is_glob_pattern(path)) or \
|
|
249
|
+
(len(parts) > 3 and is_glob_pattern(path)):
|
|
250
|
+
# Return the first part as project name and the rest as remaining path
|
|
251
|
+
return parts[0], "/".join(parts[1:])
|
|
252
|
+
else:
|
|
253
|
+
# project is empty, use the project_name of the function
|
|
254
|
+
return "", "/".join(parts)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloudos_cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.37.0
|
|
4
4
|
Summary: Python package for interacting with CloudOS
|
|
5
5
|
Home-page: https://github.com/lifebit-ai/cloudos-cli
|
|
6
6
|
Author: David Piñeyro
|
|
@@ -512,6 +512,51 @@ This assumes the interpreter is available on the container’s $PATH. If not, yo
|
|
|
512
512
|
|
|
513
513
|
These options provide flexibility for configuring and running bash array jobs, allowing to tailor the execution for specific requirements.
|
|
514
514
|
|
|
515
|
+
#### Use multiple projects for files in `--parameter` option
|
|
516
|
+
|
|
517
|
+
The option `--parameter`, could specify a file input located in a different project than option `--project-name`. The files can only be located inside the project's `Data` subfolder, not `Cohorts` or `Analyses Results`. The accepted structures for different parameter projects are:
|
|
518
|
+
- `-p/--parameter "--file=<project>/Data/file.txt"`
|
|
519
|
+
- `-p/--parameter "--file=<project>/Data/subfolder/file.txt"`
|
|
520
|
+
- `-p/--parameter "--file=Data/subfolder/file.txt"` (the same project as `--project-name`)
|
|
521
|
+
- `-p/--parameter "--file=<project>/Data/subfolder/*.txt"`
|
|
522
|
+
- `-p/--parameter "--file=<project>/Data/*.txt"`
|
|
523
|
+
- `-p/--parameter "--file=Data/*.txt"` (the same project as `--project-name`)
|
|
524
|
+
|
|
525
|
+
The project, should be specified at the beginning of the file path. For example:
|
|
526
|
+
|
|
527
|
+
```console
|
|
528
|
+
cloudos bash array-job \
|
|
529
|
+
-p file=Data/input.csv
|
|
530
|
+
...
|
|
531
|
+
```
|
|
532
|
+
This will point to the global project, specified with `--project-name`. In contrast:
|
|
533
|
+
|
|
534
|
+
```console
|
|
535
|
+
cloudos bash array-job \
|
|
536
|
+
-p data=Data/input.csv
|
|
537
|
+
-p exp=PROJECT_EXPRESSION/Data/input.csv \
|
|
538
|
+
--project-name "ADIPOSE"
|
|
539
|
+
...
|
|
540
|
+
```
|
|
541
|
+
for parameter `exp` it will point to a project named `PROJECT_EXPRESSION` in the File Explorer, and `data` parameter will be found in the global project `ADIPOSE`.
|
|
542
|
+
|
|
543
|
+
Apart from files, the parameter can also take glob patterns, for example:
|
|
544
|
+
|
|
545
|
+
```console
|
|
546
|
+
cloudos bash array-job \
|
|
547
|
+
-p data=Data/input.csv
|
|
548
|
+
-p exp="PROJECT_EXPRESSION/Data/*.csv" \
|
|
549
|
+
--project-name "ADIPOSE"
|
|
550
|
+
...
|
|
551
|
+
```
|
|
552
|
+
will take all `csv` file extensions in the specified folder.
|
|
553
|
+
|
|
554
|
+
> [!NOTE]
|
|
555
|
+
> When specifying glob patterns, depending on the terminal is best to add it in double quotes to avoid the terminal searching for the glob pattern locally, e.g. `-p exp="PROJECT_EXPRESSION/Data/*.csv"`.
|
|
556
|
+
|
|
557
|
+
> [!NOTE]
|
|
558
|
+
> Project names in the `--parameter` option can start with either forward slash `/` or without. The following are the same `-p data=/PROJECT1/Data/input.csv` and `-p data=PROJECT1/Data/input.csv`.
|
|
559
|
+
|
|
515
560
|
#### Get path to logs of job from CloudOS
|
|
516
561
|
|
|
517
562
|
Get the path to "Nextflow logs", "Nextflow standard output", and "trace" files. It can be used only on your user's jobs, with any status.
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '2.36.0'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|