pybioos 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioos/__about__.py +1 -1
- bioos/bioos_workflow.py +28 -5
- bioos/errors.py +6 -3
- bioos/resource/workflows.py +235 -10
- bioos/workflow_info.py +208 -0
- {pybioos-0.0.14.dist-info → pybioos-0.0.16.dist-info}/METADATA +10 -12
- {pybioos-0.0.14.dist-info → pybioos-0.0.16.dist-info}/RECORD +11 -10
- {pybioos-0.0.14.dist-info → pybioos-0.0.16.dist-info}/WHEEL +1 -1
- {pybioos-0.0.14.dist-info → pybioos-0.0.16.dist-info}/entry_points.txt +0 -1
- {pybioos-0.0.14.dist-info → pybioos-0.0.16.dist-info}/LICENSE +0 -0
- {pybioos-0.0.14.dist-info → pybioos-0.0.16.dist-info}/top_level.txt +0 -0
bioos/__about__.py
CHANGED
bioos/bioos_workflow.py
CHANGED
|
@@ -4,12 +4,24 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
6
|
import time
|
|
7
|
+
from typing import Dict, Any
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
9
10
|
|
|
10
11
|
from bioos import bioos
|
|
11
12
|
from bioos.errors import NotFoundError, ParameterError
|
|
12
13
|
|
|
14
|
+
def uniquify_columns(cols: list[str]) -> list[str]:
|
|
15
|
+
seen, out = {}, []
|
|
16
|
+
for col in cols:
|
|
17
|
+
base = col.split(".")[-1]
|
|
18
|
+
if base not in seen:
|
|
19
|
+
seen[base] = 0
|
|
20
|
+
out.append(base)
|
|
21
|
+
else:
|
|
22
|
+
seen[base] += 1
|
|
23
|
+
out.append(f"{base}_{seen[base]}") # fastq → fastq_1 → fastq_2
|
|
24
|
+
return out
|
|
13
25
|
|
|
14
26
|
def recognize_files_from_input_json(workflow_input_json: dict) -> dict:
|
|
15
27
|
putative_files = {}
|
|
@@ -168,6 +180,9 @@ class Bioos_workflow:
|
|
|
168
180
|
force_reupload: bool = False):
|
|
169
181
|
if not os.path.isfile(input_json_file):
|
|
170
182
|
raise ParameterError('Input_json_file is not found.')
|
|
183
|
+
#给每一个data_model加一个uuid,保证不重复
|
|
184
|
+
if data_model_name == "dm":
|
|
185
|
+
data_model_name = f"dm_{int(time.time())}"
|
|
171
186
|
|
|
172
187
|
input_json = json.load(open(input_json_file))
|
|
173
188
|
self.logger.info("Load json input successfully.")
|
|
@@ -231,7 +246,8 @@ class Bioos_workflow:
|
|
|
231
246
|
df[id_col] = [f"tmp_{x}" for x in list(range(len(df)))]
|
|
232
247
|
df = df.reindex(columns=columns)
|
|
233
248
|
columns = [key.split(".")[-1] for key in df.columns.to_list()]
|
|
234
|
-
df.columns = pd.Index(columns)
|
|
249
|
+
#df.columns = pd.Index(columns)
|
|
250
|
+
df.columns = pd.Index(uniquify_columns(df.columns.to_list()))
|
|
235
251
|
|
|
236
252
|
# write data models
|
|
237
253
|
self.ws.data_models.write({data_model_name: df.applymap(str)},
|
|
@@ -243,6 +259,7 @@ class Bioos_workflow:
|
|
|
243
259
|
for key, _ in unupdate_dict.items():
|
|
244
260
|
unupdate_dict[key] = f'this.{key.split(".")[-1]}'
|
|
245
261
|
|
|
262
|
+
|
|
246
263
|
self.params_submit["inputs"] = json.dumps(unupdate_dict)
|
|
247
264
|
self.params_submit["data_model_name"] = data_model_name
|
|
248
265
|
self.params_submit["row_ids"] = df[id_col].to_list()
|
|
@@ -254,7 +271,7 @@ class Bioos_workflow:
|
|
|
254
271
|
self.logger.info("Build params dict successfully.")
|
|
255
272
|
return self.params_submit
|
|
256
273
|
|
|
257
|
-
def postprocess(self, download=False):
|
|
274
|
+
def postprocess(self, download=False,download_dir="."):
|
|
258
275
|
# 假设全部执行完毕
|
|
259
276
|
# 对运行完成的目录进行下载
|
|
260
277
|
# 证实bioos包只能对文件的list进行下载,不支持文件夹
|
|
@@ -268,9 +285,10 @@ class Bioos_workflow:
|
|
|
268
285
|
|
|
269
286
|
files.append(file)
|
|
270
287
|
|
|
271
|
-
if download:
|
|
288
|
+
if download and files:
|
|
289
|
+
os.makedirs(download_dir, exist_ok=True)
|
|
272
290
|
try:
|
|
273
|
-
self.ws.files.download(files,
|
|
291
|
+
self.ws.files.download(files, download_dir, flatten=False)
|
|
274
292
|
except Exception as e:
|
|
275
293
|
print(f'Some file can not download. \n {e}')
|
|
276
294
|
|
|
@@ -357,6 +375,11 @@ def bioos_workflow():
|
|
|
357
375
|
"--download_results",
|
|
358
376
|
action='store_true',
|
|
359
377
|
help="Download the submission run result files to local current path.")
|
|
378
|
+
parser.add_argument(
|
|
379
|
+
"--download_dir",
|
|
380
|
+
type=str,
|
|
381
|
+
default=".",
|
|
382
|
+
help="本地保存下载结果的目录(默认当前目录)")
|
|
360
383
|
|
|
361
384
|
parsed_args = parser.parse_args()
|
|
362
385
|
|
|
@@ -395,5 +418,5 @@ def bioos_workflow():
|
|
|
395
418
|
print(bw.runs)
|
|
396
419
|
|
|
397
420
|
bw.logger.info("Start to postprocess.")
|
|
398
|
-
bw.postprocess(download=parsed_args.download_results)
|
|
421
|
+
bw.postprocess(download=parsed_args.download_results,download_dir = parsed_args.download_dir)
|
|
399
422
|
bw.logger.info("Postprocess finished.")
|
bioos/errors.py
CHANGED
|
@@ -21,7 +21,7 @@ class EnvironmentConfigurationError(ConfigurationError):
|
|
|
21
21
|
|
|
22
22
|
def __init__(self, env: str):
|
|
23
23
|
"""Initialize the EnvironmentConfigurationError .
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
:param env: environment name of the configuration
|
|
26
26
|
:type env: str
|
|
27
27
|
"""
|
|
@@ -51,13 +51,16 @@ class ParameterError(Exception):
|
|
|
51
51
|
"""Exception indicating a required parameter not valid
|
|
52
52
|
"""
|
|
53
53
|
|
|
54
|
-
def __init__(self, name: str):
|
|
54
|
+
def __init__(self, name: str, msg: str = None):
|
|
55
55
|
"""Initialize the ParameterError .
|
|
56
56
|
|
|
57
57
|
:param name: name of the parameter
|
|
58
58
|
:type name: str
|
|
59
59
|
"""
|
|
60
|
-
self.
|
|
60
|
+
self.name = name
|
|
61
|
+
self.message = f"parameter '{name}' invalid / not found"
|
|
62
|
+
if msg:
|
|
63
|
+
self.message += f": {msg}"
|
|
61
64
|
super().__init__(self.message)
|
|
62
65
|
|
|
63
66
|
|
bioos/resource/workflows.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
|
3
3
|
import zipfile
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from io import BytesIO
|
|
6
|
-
from typing import List
|
|
6
|
+
from typing import List, Dict, Optional, Any
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from cachetools import TTLCache, cached
|
|
@@ -417,7 +417,7 @@ class WorkflowResource(metaclass=SingletonType):
|
|
|
417
417
|
raise ParameterError("name", name)
|
|
418
418
|
|
|
419
419
|
if language != "WDL":
|
|
420
|
-
raise ParameterError("language", language)
|
|
420
|
+
raise ParameterError("language", f"Unsupported language: '{language}'. Only 'WDL' is supported.")
|
|
421
421
|
|
|
422
422
|
if source.startswith("http://") or source.startswith("https://"):
|
|
423
423
|
params = {
|
|
@@ -432,7 +432,50 @@ class WorkflowResource(metaclass=SingletonType):
|
|
|
432
432
|
}
|
|
433
433
|
if token:
|
|
434
434
|
params["Token"] = token
|
|
435
|
-
|
|
435
|
+
return Config.service().create_workflow(params)
|
|
436
|
+
elif os.path.isdir(source):
|
|
437
|
+
# 扫描文件夹中的所有 WDL 文件,并构建相对路径
|
|
438
|
+
# 用 source 来检验上传的是否是文件夹
|
|
439
|
+
source_files = []
|
|
440
|
+
for root, _, files in os.walk(source):
|
|
441
|
+
for file in files:
|
|
442
|
+
if file.endswith('.wdl'):
|
|
443
|
+
full_path = os.path.join(root, file)
|
|
444
|
+
relative_path = os.path.relpath(full_path, source) # 获取文件相对于source的相对路径。
|
|
445
|
+
source_files.append({
|
|
446
|
+
"name": relative_path, # 使用相对路径
|
|
447
|
+
"originFile": open(full_path, "rb").read()
|
|
448
|
+
})
|
|
449
|
+
|
|
450
|
+
if not source_files:
|
|
451
|
+
raise ParameterError("source", "No WDL files found in the specified folder")
|
|
452
|
+
|
|
453
|
+
# 确保主工作流路径是相对路径
|
|
454
|
+
if main_workflow_path:
|
|
455
|
+
if not os.path.exists(main_workflow_path):
|
|
456
|
+
raise ParameterError("main_workflow_path", f"Main workflow file {main_workflow_path} not found")
|
|
457
|
+
main_relative = os.path.relpath(main_workflow_path, source)
|
|
458
|
+
else:
|
|
459
|
+
main_relative = None
|
|
460
|
+
|
|
461
|
+
zip_base64 = zip_files(source_files, "base64")
|
|
462
|
+
|
|
463
|
+
params = {
|
|
464
|
+
"WorkspaceID": self.workspace_id,
|
|
465
|
+
"Name": name,
|
|
466
|
+
"Description": description,
|
|
467
|
+
"Language": language,
|
|
468
|
+
"SourceType": "file",
|
|
469
|
+
"Content": zip_base64,
|
|
470
|
+
}
|
|
471
|
+
if main_relative:
|
|
472
|
+
params["MainWorkflowPath"] = os.path.basename(main_relative)
|
|
473
|
+
if token:
|
|
474
|
+
params["Token"] = token
|
|
475
|
+
|
|
476
|
+
return Config.service().create_workflow(params)
|
|
477
|
+
#单文件上传
|
|
478
|
+
elif os.path.isfile(source) and source.endswith('.wdl'):
|
|
436
479
|
source_files = [{
|
|
437
480
|
"name": os.path.basename(source),
|
|
438
481
|
"originFile": open(source, "rb").read()
|
|
@@ -450,10 +493,9 @@ class WorkflowResource(metaclass=SingletonType):
|
|
|
450
493
|
"Content": zip_base64,
|
|
451
494
|
"MainWorkflowPath": main_workflow_path,
|
|
452
495
|
}
|
|
496
|
+
return Config.service().create_workflow(params)
|
|
453
497
|
else:
|
|
454
|
-
raise ParameterError("source", source)
|
|
455
|
-
|
|
456
|
-
return Config.service().create_workflow(params)
|
|
498
|
+
raise ParameterError("source",f"Workflow source '{source}' does not exist.")
|
|
457
499
|
|
|
458
500
|
def list(self) -> DataFrame:
|
|
459
501
|
"""Lists all workflows' information .
|
|
@@ -506,17 +548,62 @@ class WorkflowResource(metaclass=SingletonType):
|
|
|
506
548
|
|
|
507
549
|
|
|
508
550
|
class Workflow(metaclass=SingletonType):
|
|
551
|
+
"""Represents a workflow in Bio-OS.
|
|
552
|
+
|
|
553
|
+
This class encapsulates all the information and operations related to a workflow,
|
|
554
|
+
including its metadata, inputs, outputs, and execution capabilities.
|
|
555
|
+
"""
|
|
509
556
|
|
|
510
557
|
def __init__(self,
|
|
511
558
|
name: str,
|
|
512
559
|
workspace_id: str,
|
|
513
560
|
bucket: str,
|
|
514
561
|
check: bool = False):
|
|
562
|
+
"""Initialize a workflow instance.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
name: The name of the workflow
|
|
566
|
+
workspace_id: The ID of the workspace containing this workflow
|
|
567
|
+
bucket: The S3 bucket associated with this workflow
|
|
568
|
+
check: Whether to check the workflow existence immediately
|
|
569
|
+
"""
|
|
515
570
|
self.name = name
|
|
516
571
|
self.workspace_id = workspace_id
|
|
517
572
|
self.bucket = bucket
|
|
573
|
+
self._description: str = ""
|
|
574
|
+
self._create_time: int = 0
|
|
575
|
+
self._update_time: int = 0
|
|
576
|
+
self._language: str = "WDL"
|
|
577
|
+
self._source: str = ""
|
|
578
|
+
self._tag: str = ""
|
|
579
|
+
self._token: Optional[str] = None
|
|
580
|
+
self._main_workflow_path: str = ""
|
|
581
|
+
self._status: Dict[str, Optional[str]] = {"Phase": "", "Message": None}
|
|
582
|
+
self._inputs: List[Dict[str, Any]] = []
|
|
583
|
+
self._outputs: List[Dict[str, Any]] = []
|
|
584
|
+
self._owner_name: str = ""
|
|
585
|
+
self._graph: str = ""
|
|
586
|
+
self._source_type: str = ""
|
|
587
|
+
|
|
518
588
|
if check:
|
|
519
|
-
self.
|
|
589
|
+
self.sync()
|
|
590
|
+
|
|
591
|
+
def __repr__(self):
|
|
592
|
+
"""Return a string representation of the workflow."""
|
|
593
|
+
info_dict = dict_str({
|
|
594
|
+
"id": self.id,
|
|
595
|
+
"name": self.name,
|
|
596
|
+
"description": self.description,
|
|
597
|
+
"language": self.language,
|
|
598
|
+
"source": self.source,
|
|
599
|
+
"tag": self.tag,
|
|
600
|
+
"main_workflow_path": self.main_workflow_path,
|
|
601
|
+
"status": self.status,
|
|
602
|
+
"owner_name": self.owner_name,
|
|
603
|
+
"create_time": self.create_time,
|
|
604
|
+
"update_time": self.update_time
|
|
605
|
+
})
|
|
606
|
+
return f"WorkflowInfo:\n{info_dict}"
|
|
520
607
|
|
|
521
608
|
@property
|
|
522
609
|
@cached(cache=TTLCache(maxsize=100, ttl=1))
|
|
@@ -532,6 +619,140 @@ class Workflow(metaclass=SingletonType):
|
|
|
532
619
|
raise ParameterError("name")
|
|
533
620
|
return res["ID"].iloc[0]
|
|
534
621
|
|
|
622
|
+
@property
|
|
623
|
+
def description(self) -> str:
|
|
624
|
+
"""Get the workflow description."""
|
|
625
|
+
if not self._description:
|
|
626
|
+
self.sync()
|
|
627
|
+
return self._description
|
|
628
|
+
|
|
629
|
+
@property
|
|
630
|
+
def create_time(self) -> int:
|
|
631
|
+
"""Get the workflow creation timestamp."""
|
|
632
|
+
if not self._create_time:
|
|
633
|
+
self.sync()
|
|
634
|
+
return self._create_time
|
|
635
|
+
|
|
636
|
+
@property
|
|
637
|
+
def update_time(self) -> int:
|
|
638
|
+
"""Get the workflow last update timestamp."""
|
|
639
|
+
if not self._update_time:
|
|
640
|
+
self.sync()
|
|
641
|
+
return self._update_time
|
|
642
|
+
|
|
643
|
+
@property
|
|
644
|
+
def language(self) -> str:
|
|
645
|
+
"""Get the workflow language (e.g., WDL)."""
|
|
646
|
+
if not self._language:
|
|
647
|
+
self.sync()
|
|
648
|
+
return self._language
|
|
649
|
+
|
|
650
|
+
@property
|
|
651
|
+
def source(self) -> str:
|
|
652
|
+
"""Get the workflow source location."""
|
|
653
|
+
if not self._source:
|
|
654
|
+
self.sync()
|
|
655
|
+
return self._source
|
|
656
|
+
|
|
657
|
+
@property
|
|
658
|
+
def tag(self) -> str:
|
|
659
|
+
"""Get the workflow version tag."""
|
|
660
|
+
if not self._tag:
|
|
661
|
+
self.sync()
|
|
662
|
+
return self._tag
|
|
663
|
+
|
|
664
|
+
@property
|
|
665
|
+
def token(self) -> Optional[str]:
|
|
666
|
+
"""Get the workflow access token if any."""
|
|
667
|
+
if not self._token:
|
|
668
|
+
self.sync()
|
|
669
|
+
return self._token
|
|
670
|
+
|
|
671
|
+
@property
|
|
672
|
+
def main_workflow_path(self) -> str:
|
|
673
|
+
"""Get the main workflow file path."""
|
|
674
|
+
if not self._main_workflow_path:
|
|
675
|
+
self.sync()
|
|
676
|
+
return self._main_workflow_path
|
|
677
|
+
|
|
678
|
+
@property
|
|
679
|
+
def status(self) -> Dict[str, Optional[str]]:
|
|
680
|
+
"""Get the workflow status information."""
|
|
681
|
+
if not self._status["Phase"]:
|
|
682
|
+
self.sync()
|
|
683
|
+
return self._status
|
|
684
|
+
@property
|
|
685
|
+
def inputs(self) -> List[Dict[str, Any]]:
|
|
686
|
+
"""Get the workflow input parameters."""
|
|
687
|
+
if not self._inputs:
|
|
688
|
+
self.sync()
|
|
689
|
+
return self._inputs
|
|
690
|
+
|
|
691
|
+
@property
|
|
692
|
+
def outputs(self) -> List[Dict[str, Any]]:
|
|
693
|
+
"""Get the workflow output parameters."""
|
|
694
|
+
if not self._outputs:
|
|
695
|
+
self.sync()
|
|
696
|
+
return self._outputs
|
|
697
|
+
|
|
698
|
+
@property
|
|
699
|
+
def owner_name(self) -> str:
|
|
700
|
+
"""Get the workflow owner's name."""
|
|
701
|
+
if not self._owner_name:
|
|
702
|
+
self.sync()
|
|
703
|
+
return self._owner_name
|
|
704
|
+
|
|
705
|
+
@property
|
|
706
|
+
def graph(self) -> str:
|
|
707
|
+
"""Get the workflow graph representation."""
|
|
708
|
+
if not self._graph:
|
|
709
|
+
self.sync()
|
|
710
|
+
return self._graph
|
|
711
|
+
|
|
712
|
+
@property
|
|
713
|
+
def source_type(self) -> str:
|
|
714
|
+
"""Get the workflow source type."""
|
|
715
|
+
if not self._source_type:
|
|
716
|
+
self.sync()
|
|
717
|
+
return self._source_type
|
|
718
|
+
|
|
719
|
+
@cached(cache=TTLCache(maxsize=100, ttl=1))
|
|
720
|
+
def sync(self):
|
|
721
|
+
"""Synchronize workflow information with the remote service."""
|
|
722
|
+
res = WorkflowResource(self.workspace_id). \
|
|
723
|
+
list().query(f"Name=='{self.name}'")
|
|
724
|
+
if res.empty:
|
|
725
|
+
raise ParameterError("name")
|
|
726
|
+
|
|
727
|
+
# Get detailed workflow information
|
|
728
|
+
params = {
|
|
729
|
+
'WorkspaceID': self.workspace_id,
|
|
730
|
+
'Filter': {
|
|
731
|
+
'IDs': [res["ID"].iloc[0]]
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
workflows = Config.service().list_workflows(params).get('Items')
|
|
735
|
+
if len(workflows) != 1:
|
|
736
|
+
raise NotFoundError("workflow", self.name)
|
|
737
|
+
|
|
738
|
+
detail = workflows[0]
|
|
739
|
+
|
|
740
|
+
# Update all properties
|
|
741
|
+
self._description = detail.get("Description", "")
|
|
742
|
+
self._create_time = detail.get("CreateTime", 0)
|
|
743
|
+
self._update_time = detail.get("UpdateTime", 0)
|
|
744
|
+
self._language = detail.get("Language", "WDL")
|
|
745
|
+
self._source = detail.get("Source", "")
|
|
746
|
+
self._tag = detail.get("Tag", "")
|
|
747
|
+
self._token = detail.get("Token")
|
|
748
|
+
self._main_workflow_path = detail.get("MainWorkflowPath", "")
|
|
749
|
+
self._status = detail.get("Status", {"Phase": "", "Message": None})
|
|
750
|
+
self._inputs = detail.get("Inputs", [])
|
|
751
|
+
self._outputs = detail.get("Outputs", [])
|
|
752
|
+
self._owner_name = detail.get("OwnerName", "")
|
|
753
|
+
self._graph = detail.get("Graph", "")
|
|
754
|
+
self._source_type = detail.get("SourceType", "")
|
|
755
|
+
|
|
535
756
|
@property
|
|
536
757
|
@cached(cache=TTLCache(maxsize=100, ttl=1))
|
|
537
758
|
def get_cluster(self):
|
|
@@ -552,11 +773,14 @@ class Workflow(metaclass=SingletonType):
|
|
|
552
773
|
return info['ID']
|
|
553
774
|
raise NotFoundError("cluster", "workflow")
|
|
554
775
|
|
|
555
|
-
def query_data_model_id(self, name: str) ->
|
|
776
|
+
def query_data_model_id(self, name: str) -> str:
|
|
556
777
|
"""Gets the id of given data_models among those accessible
|
|
557
778
|
|
|
558
|
-
:
|
|
559
|
-
|
|
779
|
+
Args:
|
|
780
|
+
name: The name of the data model
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
str: The ID of the data model, or empty string if not found
|
|
560
784
|
"""
|
|
561
785
|
res = DataModelResource(self.workspace_id).list(). \
|
|
562
786
|
query(f"Name=='{name}'")
|
|
@@ -640,3 +864,4 @@ class Workflow(metaclass=SingletonType):
|
|
|
640
864
|
submission_id = Config.service().create_submission(params).get("ID")
|
|
641
865
|
|
|
642
866
|
return Submission(self.workspace_id, submission_id).runs
|
|
867
|
+
|
bioos/workflow_info.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from typing import Dict, Any, List, Optional
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
from bioos import bioos
|
|
5
|
+
from bioos.config import Config
|
|
6
|
+
from bioos.errors import NotFoundError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class WorkflowInfo:
|
|
10
|
+
"""Bio-OS 工作流信息查询类"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, ak: str, sk: str, endpoint: str = "https://bio-top.miracle.ac.cn"):
|
|
13
|
+
"""
|
|
14
|
+
初始化工作流信息查询类
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
ak: Bio-OS 访问密钥
|
|
18
|
+
sk: Bio-OS 私钥
|
|
19
|
+
endpoint: Bio-OS API 端点,默认为 https://bio-top.miracle.ac.cn
|
|
20
|
+
"""
|
|
21
|
+
self.ak = ak
|
|
22
|
+
self.sk = sk
|
|
23
|
+
self.endpoint = endpoint
|
|
24
|
+
# 配置 Bio-OS
|
|
25
|
+
Config.set_access_key(ak)
|
|
26
|
+
Config.set_secret_key(sk)
|
|
27
|
+
Config.set_endpoint(endpoint)
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def _fmt_default(raw: Any) -> str | None:
|
|
31
|
+
"""
|
|
32
|
+
将 Default 字段格式化成人类可读形式:
|
|
33
|
+
- None → None
|
|
34
|
+
- int/float → 123 / 1.23
|
|
35
|
+
- bool → true / false
|
|
36
|
+
- 其余字符串 → "value"(保留双引号)
|
|
37
|
+
"""
|
|
38
|
+
if raw is None:
|
|
39
|
+
return None
|
|
40
|
+
if isinstance(raw, (int, float, bool)):
|
|
41
|
+
return str(raw).lower() # bool 转成 'true' / 'false'
|
|
42
|
+
if isinstance(raw, str):
|
|
43
|
+
lo = raw.lower()
|
|
44
|
+
# 尝试将字符串视为数字或布尔
|
|
45
|
+
if lo in {"true", "false"}:
|
|
46
|
+
return lo
|
|
47
|
+
try:
|
|
48
|
+
int(raw); return raw
|
|
49
|
+
except ValueError:
|
|
50
|
+
pass
|
|
51
|
+
try:
|
|
52
|
+
float(raw); return raw
|
|
53
|
+
except ValueError:
|
|
54
|
+
pass
|
|
55
|
+
return f"\"{raw}\""
|
|
56
|
+
return str(raw)
|
|
57
|
+
|
|
58
|
+
def get_workspace_id(self, workspace_name: str) -> str:
|
|
59
|
+
"""
|
|
60
|
+
获取工作区ID
|
|
61
|
+
Args:
|
|
62
|
+
workspace_name: 工作区名称
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
str: 工作区ID
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
NotFoundError: 未找到指定工作区
|
|
69
|
+
"""
|
|
70
|
+
df = bioos.list_workspaces()
|
|
71
|
+
ser = df[df.Name == workspace_name].ID
|
|
72
|
+
if len(ser) != 1:
|
|
73
|
+
raise NotFoundError("Workspace", workspace_name)
|
|
74
|
+
return ser.to_list()[0]
|
|
75
|
+
|
|
76
|
+
def get_workflow(self, workspace_name: str, workflow_name: str):
|
|
77
|
+
"""
|
|
78
|
+
获取工作流对象
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
workspace_name: 工作区名称
|
|
82
|
+
workflow_name: 工作流名称
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Workflow: 工作流对象
|
|
86
|
+
Raises:
|
|
87
|
+
NotFoundError: 未找到指定工作区或工作流
|
|
88
|
+
"""
|
|
89
|
+
workspace_id = self.get_workspace_id(workspace_name)
|
|
90
|
+
ws = bioos.workspace(workspace_id)
|
|
91
|
+
return ws.workflow(name=workflow_name)
|
|
92
|
+
|
|
93
|
+
def list_workflows(self, workspace_name: str) -> List[Dict[str, Any]]:
|
|
94
|
+
"""
|
|
95
|
+
列出工作区下的所有工作流
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
workspace_name: 工作区名称
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List[Dict[str, Any]]: 工作流列表
|
|
102
|
+
"""
|
|
103
|
+
workspace_id = self.get_workspace_id(workspace_name)
|
|
104
|
+
ws = bioos.workspace(workspace_id)
|
|
105
|
+
return ws.list_workflows()
|
|
106
|
+
|
|
107
|
+
def get_workflow_inputs(self, workspace_name: str, workflow_name: str) -> Dict[str, str]:
|
|
108
|
+
"""
|
|
109
|
+
获取工作流的输入参数模板
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
workspace_name: 工作区名称
|
|
113
|
+
workflow_name: 工作流名称
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Dict[str, str]: 包含工作流输入参数模板的字典,格式为:
|
|
117
|
+
{
|
|
118
|
+
"param_name": "Type (optional, default = value)", # 对于可选参数
|
|
119
|
+
"param_name": "Type" # 对于必需参数
|
|
120
|
+
}
|
|
121
|
+
其中:
|
|
122
|
+
- Type 为参数类型(如 String, Int, File 等)
|
|
123
|
+
- optional 表示参数为可选
|
|
124
|
+
- value 为默认值(数字和布尔值直接显示,字符串加引号)
|
|
125
|
+
"""
|
|
126
|
+
try:
|
|
127
|
+
wf = self.get_workflow(workspace_name, workflow_name)
|
|
128
|
+
result = {}
|
|
129
|
+
|
|
130
|
+
for item in wf.inputs:
|
|
131
|
+
type_str = item.get("Type", "")
|
|
132
|
+
optional = item.get("Optional", False)
|
|
133
|
+
default = self._fmt_default(item.get("Default"))
|
|
134
|
+
|
|
135
|
+
if optional:
|
|
136
|
+
value = f"{type_str} (optional" + (f", default = {default})" if default is not None else ")")
|
|
137
|
+
else:
|
|
138
|
+
value = type_str
|
|
139
|
+
|
|
140
|
+
result[item["Name"]] = value
|
|
141
|
+
|
|
142
|
+
return result
|
|
143
|
+
|
|
144
|
+
except NotFoundError as e:
|
|
145
|
+
return {"error": str(e)}
|
|
146
|
+
except Exception as e:
|
|
147
|
+
return {"error": str(e)}
|
|
148
|
+
|
|
149
|
+
def get_workflow_outputs(self, workspace_name: str, workflow_name: str) -> Dict[str, str]:
|
|
150
|
+
"""
|
|
151
|
+
获取工作流的输出参数信息
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
workspace_name: 工作区名称
|
|
155
|
+
workflow_name: 工作流名称
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Dict[str, str]: 包含工作流输出参数的字典
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
wf = self.get_workflow(workspace_name, workflow_name)
|
|
162
|
+
return {output["Name"]: output["Type"] for output in wf.outputs}
|
|
163
|
+
except NotFoundError as e:
|
|
164
|
+
return {"error": str(e)}
|
|
165
|
+
except Exception as e:
|
|
166
|
+
return {"error": str(e)}
|
|
167
|
+
|
|
168
|
+
def get_workflow_metadata(self, workspace_name: str, workflow_name: str) -> Dict[str, Any]:
|
|
169
|
+
"""
|
|
170
|
+
获取工作流的元数据信息
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
workspace_name: 工作区名称
|
|
174
|
+
workflow_name: 工作流名称
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Dict[str, Any]: 包含工作流元数据的字典,包括:
|
|
178
|
+
- name: 工作流名称
|
|
179
|
+
- description: 工作流描述
|
|
180
|
+
- language: 工作流语言
|
|
181
|
+
- source: 工作流源
|
|
182
|
+
- tag: 版本标签
|
|
183
|
+
- status: 工作流状态
|
|
184
|
+
- owner_name: 所有者
|
|
185
|
+
- create_time: 创建时间
|
|
186
|
+
- update_time: 更新时间
|
|
187
|
+
- main_workflow_path: 主工作流文件路径
|
|
188
|
+
- source_type: 源类型
|
|
189
|
+
"""
|
|
190
|
+
try:
|
|
191
|
+
wf = self.get_workflow(workspace_name, workflow_name)
|
|
192
|
+
return {
|
|
193
|
+
"name": wf.name,
|
|
194
|
+
"description": wf.description,
|
|
195
|
+
"language": wf.language,
|
|
196
|
+
"source": wf.source,
|
|
197
|
+
"tag": wf.tag,
|
|
198
|
+
"status": wf.status,
|
|
199
|
+
"owner_name": wf.owner_name,
|
|
200
|
+
"create_time": wf.create_time,
|
|
201
|
+
"update_time": wf.update_time,
|
|
202
|
+
"main_workflow_path": wf.main_workflow_path,
|
|
203
|
+
"source_type": wf.source_type
|
|
204
|
+
}
|
|
205
|
+
except NotFoundError as e:
|
|
206
|
+
return {"error": str(e)}
|
|
207
|
+
except Exception as e:
|
|
208
|
+
return {"error": str(e)}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pybioos
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.16
|
|
4
4
|
Summary: BioOS SDK for Python
|
|
5
5
|
Home-page: https://github.com/GBA-BI/pybioos
|
|
6
6
|
Author: Jilong Liu
|
|
@@ -13,15 +13,13 @@ Classifier: Intended Audience :: Science/Research
|
|
|
13
13
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator
|
|
15
15
|
License-File: LICENSE
|
|
16
|
-
Requires-Dist: volcengine
|
|
17
|
-
Requires-Dist: tabulate
|
|
18
|
-
Requires-Dist: click
|
|
19
|
-
Requires-Dist: pandas
|
|
20
|
-
Requires-Dist: tos
|
|
21
|
-
Requires-Dist: cachetools
|
|
22
|
-
Requires-Dist: typing-extensions
|
|
23
|
-
Requires-Dist: apscheduler
|
|
24
|
-
Requires-Dist: colorama
|
|
25
|
-
|
|
26
|
-
UNKNOWN
|
|
16
|
+
Requires-Dist: volcengine >=1.0.61
|
|
17
|
+
Requires-Dist: tabulate >=0.8.10
|
|
18
|
+
Requires-Dist: click >=8.0.0
|
|
19
|
+
Requires-Dist: pandas >=1.3.0
|
|
20
|
+
Requires-Dist: tos ==2.5.6
|
|
21
|
+
Requires-Dist: cachetools >=5.2.0
|
|
22
|
+
Requires-Dist: typing-extensions >=4.4.0
|
|
23
|
+
Requires-Dist: apscheduler >=3.10.4
|
|
24
|
+
Requires-Dist: colorama >=0.4.6
|
|
27
25
|
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
-
bioos/__about__.py,sha256=
|
|
1
|
+
bioos/__about__.py,sha256=TRnHZsxI7bVBqxAdnBiKbEhtnboUzdISbOREW1kz-io,56
|
|
2
2
|
bioos/__init__.py,sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c,22
|
|
3
3
|
bioos/bioos.py,sha256=fHzOb1l5wYxw6NVYYZDiFcgk4V28BAgWEc3ev12reWs,2409
|
|
4
|
-
bioos/bioos_workflow.py,sha256=
|
|
4
|
+
bioos/bioos_workflow.py,sha256=BgooweeFT4O7SyProup2gOVSeXDb3pXWge--PD7J-lE,15232
|
|
5
5
|
bioos/bw_import.py,sha256=lQk_ch_tTz8l4bnWniOzWZ1IxI6ZvKlaASkNMsdDGfA,5697
|
|
6
6
|
bioos/bw_import_status_check.py,sha256=sJuso2SAfZWvPzypnGge25Ayv5PsSGRXqSNNwIhNu-E,2794
|
|
7
7
|
bioos/bw_status_check.py,sha256=FVilkawRA7GD1JXUBeaR28W1DfN9bAzYBIAjqi4JIno,2916
|
|
8
8
|
bioos/config.py,sha256=CvFabYqV1BkFWO8fnr5vBf6xNtNzA8hAEVeEIbvAOm8,4307
|
|
9
|
-
bioos/errors.py,sha256=
|
|
9
|
+
bioos/errors.py,sha256=p0fH6JSMYBjul88lMJ7PPwGNh4SYg62-7VMNuUXWl-E,2540
|
|
10
10
|
bioos/get_submission_logs.py,sha256=jUtT8Vic8h_VOcqrqJsTBSonve64RjbKNAyp0wUtIpg,3934
|
|
11
11
|
bioos/log.py,sha256=twiCvf5IgJB7uvzANwBluSlztJN8ZrxbGZUBGlZ0vps,3204
|
|
12
|
+
bioos/workflow_info.py,sha256=bE9h1pE7Co8icS-J5WVHfSAbjef9EtLhpWjbHQcnFfc,6838
|
|
12
13
|
bioos/internal/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
13
14
|
bioos/internal/tos.py,sha256=0R6YN2lxjjZsuMfv0yLSkBmz_LqmzQGb8GagnUMc8EY,12264
|
|
14
15
|
bioos/models/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
@@ -17,7 +18,7 @@ bioos/resource/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
|
17
18
|
bioos/resource/data_models.py,sha256=enKp8yyQI8IbRqe--0Xtyg1XzOwQQPQzoQsx_hNuZ6E,5089
|
|
18
19
|
bioos/resource/files.py,sha256=1HY0IHvq8H843VM2XZIHDdCuXXNcMrlEFhSNqWXmFzE,8456
|
|
19
20
|
bioos/resource/utility.py,sha256=emY7qVLLLvGmQYlVj-_bLAxU7i1GfQOUybdRkfEDwVA,1300
|
|
20
|
-
bioos/resource/workflows.py,sha256=
|
|
21
|
+
bioos/resource/workflows.py,sha256=RZkREd7EzyRqk3gP09HSzeuI3i7Cn7VMhqjp4jj_cjg,29418
|
|
21
22
|
bioos/resource/workspaces.py,sha256=Gmr8y_sjK7TQbhMhQ_7rxqR1KFcwU72I95YYCFrrLBQ,3995
|
|
22
23
|
bioos/service/BioOsService.py,sha256=HuYUEwomHCLpA1MYgVqGyWAQWHM-_BHB-jmy9VsOlnQ,6724
|
|
23
24
|
bioos/service/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
@@ -34,9 +35,9 @@ bioos/tests/workspaces.py,sha256=LuuRrTs2XqfE5mGQyJNl9RBtuMb4NZHBJFoO8HMZVYQ,522
|
|
|
34
35
|
bioos/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
36
|
bioos/utils/common_tools.py,sha256=fgMoE_-qZjgfQtUj_pmCTyYDtbJasyfH4Gm3VQsbgBQ,1651
|
|
36
37
|
bioos/utils/workflows.py,sha256=zRbwTUigoM5V5LFOgzQPm3kwxt5Ogz95OFfefJc6Fjo,133
|
|
37
|
-
pybioos-0.0.
|
|
38
|
-
pybioos-0.0.
|
|
39
|
-
pybioos-0.0.
|
|
40
|
-
pybioos-0.0.
|
|
41
|
-
pybioos-0.0.
|
|
42
|
-
pybioos-0.0.
|
|
38
|
+
pybioos-0.0.16.dist-info/LICENSE,sha256=cPkGXsgfPgEhIns7Lt3Avxx0Uy-VbdsoP8jvNGuj3cE,1063
|
|
39
|
+
pybioos-0.0.16.dist-info/METADATA,sha256=9M9p2P6lr9kiUB1KIQ15MZPIADDBFhw8YD1Zjho1oBk,803
|
|
40
|
+
pybioos-0.0.16.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
41
|
+
pybioos-0.0.16.dist-info/entry_points.txt,sha256=O-8Gg-zLuYcnOYbx1BMqAgEsoEZfKwtM19cUctHuYus,327
|
|
42
|
+
pybioos-0.0.16.dist-info/top_level.txt,sha256=llpzydkKVDSaWZgz3bsTUsQmhoQpc_JcRJg2-H-5a2U,6
|
|
43
|
+
pybioos-0.0.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|