pybioos 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pybioos might be problematic. Click here for more details.

bioos/__about__.py ADDED
@@ -0,0 +1,4 @@
1
+ # coding:utf-8
2
+
3
+ # Package version
4
+ __version__ = "0.0.3"
bioos/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.0.3"
bioos/bioos.py ADDED
@@ -0,0 +1,90 @@
1
+ from pandas import DataFrame
2
+ from volcengine.const.Const import REGION_CN_NORTH1
3
+
4
+ from bioos.config import Config
5
+ from bioos.resource.utility import UtilityResource
6
+ from bioos.resource.workspaces import Workspace
7
+
8
+
9
+ def status() -> Config.LoginInfo:
10
+ """Get the current login information.
11
+
12
+ *Example*:
13
+ ::
14
+
15
+ bioos.status()
16
+
17
+ :return: Current login information
18
+ :rtype: Config.LoginInfo
19
+ """
20
+ return Config.login_info()
21
+
22
+
23
+ def login(endpoint: str,
24
+ access_key: str,
25
+ secret_key: str,
26
+ region: str = REGION_CN_NORTH1) -> bool:
27
+ """Login to the given endpoint using specified account and password.
28
+
29
+ **If bioos sdk runs inside the miracle private cloud env** such as on a notebook under a
30
+ workspace, the login procedure will be finished automatically.
31
+
32
+ **If bioos sdk runs outside the miracle cloud env** such as on user's local machine,
33
+ the login procedure should be explicitly executed.
34
+
35
+ *Example*:
36
+ ::
37
+
38
+ bioos.login(endpoint="https://cloud.xxxxx.xxx.cn",access_key="xxxxxxxx",secret_key="xxxxxxxx")
39
+
40
+ :param endpoint: The environment to be logged in
41
+ :type endpoint: str
42
+ :param access_key: The specified account's access key
43
+ :type access_key: str
44
+ :param secret_key: Corresponding secret key of the access key
45
+ :type secret_key: str
46
+ :param region: The region to be logged in
47
+ :type region: str
48
+ :return: Login result
49
+ :rtype: bool
50
+ """
51
+ Config.set_access_key(access_key)
52
+ Config.set_secret_key(secret_key)
53
+ Config.set_endpoint(endpoint)
54
+ Config.set_region(region)
55
+ return Config.login_info().login_status == "Already logged in"
56
+
57
+
58
+ def list_workspaces() -> DataFrame:
59
+ """Lists all workspaces in the login environment .
60
+
61
+ *Example*:
62
+ ::
63
+
64
+ bioos.list_workspaces()
65
+
66
+ """
67
+ return DataFrame.from_records(Config.service().list_workspaces({
68
+ "PageSize":
69
+ 0
70
+ }).get("Items"))
71
+
72
+
73
+ def workspace(id_: str) -> Workspace: # 这里是workspace的入口
74
+ """Returns the workspace for the given name .
75
+
76
+ :param id_: Workspace id
77
+ :type id_: str
78
+ :return: Specified workspace object
79
+ :rtype: Workspace
80
+ """
81
+ return Workspace(id_)
82
+
83
+
84
+ def utility() -> UtilityResource:
85
+ """Returns Common tool collection Resource object
86
+
87
+ :return: Tool collection Resource object
88
+ :rtype: UtilityResource
89
+ """
90
+ return UtilityResource()
@@ -0,0 +1,284 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import os
5
+ import time
6
+
7
+ import pandas as pd
8
+
9
+ from bioos import bioos
10
+ from bioos.errors import NotFoundError
11
+
12
+
13
+ def recognize_files_from_input_json(workflow_input_json: dict) -> dict:
14
+ putative_files = {}
15
+
16
+ # this version only support absolute path
17
+
18
+ for key, value in workflow_input_json.items():
19
+ if str(value).startswith("s3"):
20
+ continue
21
+
22
+ if "registry-vpc" in str(value):
23
+ continue
24
+
25
+ if "/" in str(value):
26
+ putative_files[key] = value
27
+
28
+ return putative_files
29
+
30
+
31
+ def get_logger():
32
+ global LOGGER
33
+
34
+ LOGGER = logging.getLogger(__name__)
35
+ LOGGER.setLevel(logging.DEBUG)
36
+ formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
37
+
38
+ handler = logging.StreamHandler()
39
+ handler.setLevel(logging.DEBUG)
40
+ handler.setFormatter(formatter)
41
+ LOGGER.addHandler(handler)
42
+
43
+ return LOGGER
44
+
45
+
46
+ # 引入langchain,利用dash起一个界面?
47
+ # 将启明的内容整合进来
48
+ # 与Workspace SPEC做映射
49
+ class Bioos_workflow:
50
+
51
+ def __init__(self, workspace_name: str, workflow_name: str) -> None:
52
+ # global LOGGER
53
+ self.logger = get_logger()
54
+
55
+ # get workspace id
56
+ df = bioos.list_workspaces()
57
+ ser = df[df.Name == workspace_name].ID
58
+ if len(ser) != 1:
59
+ raise NotFoundError("Workspace", workspace_name)
60
+ workspace_id = ser.to_list()[0]
61
+
62
+ self.ws = bioos.workspace(workspace_id)
63
+ self.wf = self.ws.workflow(name=workflow_name)
64
+
65
+ # 需要有推定上传目的地址的机制,由WES endpoint的配置来指定
66
+ def input_provision(self, workflow_input_json: dict):
67
+ # need to support different source and target
68
+ # 输入的是WDL的标准json,有两种形式,单例的{}和多例的[{}],为简单表述,这里以单例形式处理
69
+
70
+ # find files
71
+ putative_files = recognize_files_from_input_json(workflow_input_json)
72
+
73
+ # upload files
74
+ update_dict = {}
75
+ for key, value in putative_files.items():
76
+ target = f"input_provision/{os.path.basename(value)}"
77
+ # 这里如果多行记录,即多个样本的run中有相同的文件,可能会触发多次上传。可能可以通过判断文件是否存在来判断
78
+ # 需要对file的存在性进行检验
79
+ # 这里的target是prefix
80
+ self.logger.info(f"Start upload {value}.")
81
+ self.ws.files.upload(value,
82
+ target="input_provision/",
83
+ flatten=True)
84
+ self.logger.info(f"Finish upload {value}.")
85
+
86
+ s3_location = self.ws.files.s3_urls(target)[0]
87
+ update_dict[key] = s3_location
88
+
89
+ # update json
90
+ workflow_input_json.update(update_dict)
91
+ return workflow_input_json
92
+
93
+ def output_provision(self):
94
+ pass
95
+
96
+ def preprocess(
97
+ self,
98
+ input_json_file: str,
99
+ data_model_name: str = "dm",
100
+ submission_desc: str = "Submit by pybioos",
101
+ call_caching: bool = True,
102
+ ):
103
+ input_json = json.load(open(input_json_file))
104
+ self.logger.info("Load json input successfully.")
105
+
106
+ # 将单例的模式转换成向量形式
107
+ if isinstance(input_json, list):
108
+ inputs_list = input_json
109
+ else:
110
+ inputs_list = [
111
+ input_json,
112
+ ]
113
+
114
+ # 处理provision,更新inputs_list
115
+ inputs_list_update = []
116
+ for input_dict in inputs_list:
117
+ input_dict_update = self.input_provision(input_dict)
118
+ inputs_list_update.append(input_dict_update)
119
+
120
+ # 生成datamodel并上传
121
+ # 这里还需要处理id列的内容
122
+ df = pd.DataFrame(inputs_list_update)
123
+ id_col = f"{data_model_name}_id"
124
+ columns = [
125
+ id_col,
126
+ ]
127
+ columns.extend(df.columns)
128
+ df[id_col] = [f"tmp_{x}" for x in list(range(len(df)))]
129
+ df = df.reindex(columns=columns)
130
+ columns = [key.split(".")[-1] for key in df.columns.to_list()]
131
+ df.columns = pd.Index(columns)
132
+
133
+ # 这里可能要对每次新上传的datamodel进行重命名
134
+ # 这里经证实只支持全str类型的df
135
+ self.ws.data_models.write({data_model_name: df.map(str)}, force=True)
136
+ self.logger.info("Set data model successfully.")
137
+
138
+ # 生成veapi需要的输入结构
139
+ unupdate_dict = inputs_list[0]
140
+ for key, value in unupdate_dict.items():
141
+ unupdate_dict[key] = f'this.{key.split(".")[-1]}'
142
+
143
+ self.params_submit = {
144
+ "inputs": json.dumps(unupdate_dict),
145
+ "outputs": "{}",
146
+ "data_model_name": data_model_name,
147
+ "row_ids": df[id_col].to_list(),
148
+ "submission_desc": submission_desc,
149
+ "call_caching": call_caching,
150
+ }
151
+ self.logger.info("Build submission params successfully.")
152
+
153
+ return self.params_submit
154
+
155
+ def postprocess(self, download=False):
156
+ # 假设全部执行完毕
157
+ # 对运行完成的目录进行下载
158
+ # 证实bioos包只能对文件的list进行下载,不支持文件夹
159
+ # ws.files.list方法不能指定起始路径,需要改进
160
+ # 需要有一个地方执行定时任务,对run的status进行查询,并记录状态,对每次新完成的run进行后处理
161
+ files = []
162
+ for file in self.ws.files.list().key:
163
+ for run in self.runs:
164
+ if run.submission in file:
165
+ print(file)
166
+ if "%" in file:
167
+ continue
168
+ files.append(file)
169
+
170
+ if download:
171
+ try:
172
+ self.ws.files.download(files, ".", flatten=False)
173
+ except: # noqa: E722
174
+ print('Some file can not download.')
175
+
176
+ self.logger.info("Download finish.")
177
+
178
+ def submit_workflow_bioosapi(self):
179
+ self.runs = self.wf.submit(**self.params_submit)
180
+ self.logger.info("Submit workflow run successfully.")
181
+ return self.runs
182
+
183
+ def monitor_workflow(self):
184
+ # wf是否有对应的查询方法
185
+ runs = []
186
+ for run in self.runs:
187
+ run.sync()
188
+ runs.append(run)
189
+
190
+ self.runs = runs
191
+ return self.runs
192
+
193
+
194
+ def bioos_workflow():
195
+
196
+ # argparse
197
+ parser = argparse.ArgumentParser(
198
+ description="Bio-OS instance platform workflow submitter program.")
199
+ parser.add_argument("--endpoint",
200
+ type=str,
201
+ help="Bio-OS instance platform endpoint",
202
+ default="https://bio-top.miracle.ac.cn")
203
+ parser.add_argument(
204
+ "--ak",
205
+ type=str,
206
+ help="Access_key for your Bio-OS instance platform account.")
207
+ parser.add_argument(
208
+ "--sk",
209
+ type=str,
210
+ help="Secret_key for your Bio-OS instance platform account.")
211
+
212
+ parser.add_argument("--workspace_name",
213
+ type=str,
214
+ help="Target workspace name.")
215
+ parser.add_argument("--workflow_name",
216
+ type=str,
217
+ help="Target workflow name.")
218
+ parser.add_argument(
219
+ "--input_json",
220
+ type=str,
221
+ help="The input_json file in Cromwell Womtools format.")
222
+ parser.add_argument(
223
+ "--data_model_name",
224
+ type=str,
225
+ help=
226
+ "Intended name for the generated data_model on the Bio-OS instance platform workspace page.",
227
+ default="dm")
228
+ parser.add_argument(
229
+ "--call_caching",
230
+ action='store_true',
231
+ help="Call_caching for the submission run.",
232
+ )
233
+ parser.add_argument('--submission_desc',
234
+ type=str,
235
+ help="Description for the submission run.",
236
+ default="Submit by pybioos.")
237
+
238
+ parser.add_argument(
239
+ "--monitor",
240
+ action='store_true',
241
+ help="Moniter the status of submission run until finishment.")
242
+ parser.add_argument(
243
+ "--monitor_interval",
244
+ type=int,
245
+ default=600,
246
+ help="Time interval for query the status for the submission runs.")
247
+ parser.add_argument(
248
+ "--download_results",
249
+ action='store_true',
250
+ help="Download the submission run result files to local current path.")
251
+
252
+ parsed_args = parser.parse_args()
253
+
254
+ # login and submit
255
+ bioos.login(endpoint=parsed_args.endpoint,
256
+ access_key=parsed_args.ak,
257
+ secret_key=parsed_args.sk)
258
+ bw = Bioos_workflow(workspace_name=parsed_args.workspace_name,
259
+ workflow_name=parsed_args.workflow_name)
260
+ bw.preprocess(input_json_file=parsed_args.input_json,
261
+ data_model_name=parsed_args.data_model_name,
262
+ submission_desc=parsed_args.submission_desc,
263
+ call_caching=parsed_args.call_caching)
264
+ bw.submit_workflow_bioosapi()
265
+
266
+ # moniter
267
+ def all_runs_done() -> bool:
268
+
269
+ statuses = []
270
+ for run in bw.runs:
271
+ statuses.append(True if run.status in ("Succeeded",
272
+ "Failed") else False)
273
+
274
+ return all(statuses)
275
+
276
+ if parsed_args.monitor or parsed_args.download_results:
277
+ while not all_runs_done():
278
+ bw.logger.info("Monitoring submission run.")
279
+ print(bw.runs)
280
+ time.sleep(parsed_args.monitor_interval)
281
+ bw.monitor_workflow()
282
+
283
+ time.sleep(60)
284
+ bw.postprocess(download=parsed_args.download_results)
bioos/config.py ADDED
@@ -0,0 +1,147 @@
1
+ import os
2
+
3
+ from typing_extensions import Literal
4
+ from volcengine.const.Const import REGION_CN_NORTH1
5
+
6
+ from bioos.errors import ConfigurationError
7
+ from bioos.log import PyLogger
8
+ from bioos.service.BioOsService import BioOsService
9
+
10
+ LOGIN_STATUS = Literal['Already logged in', 'Not logged in']
11
+
12
+
13
+ class Config:
14
+ _service: BioOsService = None
15
+ _access_key: str = os.environ.get('VOLC_ACCESSKEY')
16
+ _secret_key: str = os.environ.get('VOLC_SECRETKEY')
17
+ _endpoint: str = os.environ.get('BIOOS_ENDPOINT')
18
+ _region: str = REGION_CN_NORTH1
19
+ Logger = PyLogger
20
+
21
+ class LoginInfo:
22
+ """[Only Read]Record the current login information .
23
+ """
24
+
25
+ @property
26
+ def access_key(self):
27
+ """Returns the Login AccessKey .
28
+ """
29
+ return Config._access_key
30
+
31
+ @property
32
+ def secret_key(self) -> str:
33
+ """Returns the Login SecretKey .
34
+ """
35
+ return Config._secret_key
36
+
37
+ @property
38
+ def endpoint(self) -> str:
39
+ """Returns the Login Endpoint .
40
+ """
41
+ return Config._endpoint
42
+
43
+ @property
44
+ def region(self) -> str:
45
+ """Returns the Login Region .
46
+ """
47
+ return Config._region
48
+
49
+ @property
50
+ def login_status(self) -> LOGIN_STATUS:
51
+ """Return the login status .
52
+
53
+ :return: Login status: 'Already logged in' or 'Not logged in'
54
+ :rtype: str
55
+ """
56
+ try:
57
+ Config._ping_func()
58
+ except ConfigurationError:
59
+ return "Not logged in"
60
+ except Exception as e:
61
+ Config.Logger.error(e)
62
+ return "Not logged in"
63
+ return "Already logged in"
64
+
65
+ def __repr__(self):
66
+ return f"{self.login_status}\n" \
67
+ f"endpoint: {self.endpoint}\n" \
68
+ f"access_key: {self.access_key}\n" \
69
+ f"secret_key: {self.secret_key}\n" \
70
+ f"region: {self.region}"
71
+
72
+ @classmethod
73
+ def _same_endpoint(cls):
74
+ return (cls._service.service_info.scheme + "://" +
75
+ cls._service.service_info.host) == cls._endpoint
76
+
77
+ @classmethod
78
+ def _same_region(cls):
79
+ return cls._service.service_info.credentials.region == cls._region
80
+
81
+ @classmethod
82
+ def service(cls):
83
+ if cls._service:
84
+ return cls._service
85
+ cls._init_service()
86
+ return cls._service
87
+
88
+ @classmethod
89
+ def _ping_func(cls):
90
+ if not cls._service:
91
+ cls._init_service()
92
+ cls._service.list_workspaces({}) #通过该函数能否正常执行来判断是否登陆成功。
93
+
94
+ @classmethod
95
+ def login_info(cls):
96
+ return Config.LoginInfo()
97
+
98
+ @classmethod
99
+ def set_access_key(cls, access_key: str):
100
+ cls._access_key = access_key
101
+ if cls._service:
102
+ cls._service.set_ak(cls._access_key)
103
+
104
+ @classmethod
105
+ def set_secret_key(cls, secret_key: str):
106
+ cls._secret_key = secret_key
107
+ if cls._service:
108
+ cls._service.set_sk(cls._secret_key)
109
+
110
+ @classmethod
111
+ def set_endpoint(cls, endpoint: str):
112
+ cls._endpoint = endpoint
113
+ if cls._service and cls._same_endpoint():
114
+ return
115
+
116
+ cls._init_service()
117
+
118
+ @classmethod
119
+ def set_region(cls, region: str):
120
+ cls._region = region
121
+ if cls._service and cls._same_region():
122
+ return
123
+
124
+ cls._init_service()
125
+
126
+ @classmethod
127
+ def _init_service(cls):
128
+ if cls._service and cls._same_region() and cls._same_endpoint():
129
+ return
130
+
131
+ if not cls._endpoint:
132
+ raise ConfigurationError('ENDPOINT')
133
+
134
+ if not cls._region:
135
+ raise ConfigurationError('REGION')
136
+
137
+ if not cls._access_key:
138
+ raise ConfigurationError('ACCESS_KEY')
139
+
140
+ if not cls._secret_key:
141
+ raise ConfigurationError('SECRET_KEY')
142
+
143
+ cls._service = BioOsService(
144
+ endpoint=cls._endpoint,
145
+ region=cls._region) #cls._service 属性保持登陆状态,并做为下游的调用入口
146
+ cls._service.set_ak(cls._access_key)
147
+ cls._service.set_sk(cls._secret_key)
bioos/errors.py ADDED
@@ -0,0 +1,89 @@
1
+ # coding:utf-8
2
+ class ConfigurationError(Exception):
3
+ """Exception indicating a required configuration not set .
4
+ """
5
+
6
+ def __init__(self, conf: str):
7
+ """Initialize the ConfigurationError .
8
+
9
+ :param conf: name of the configuration
10
+ :type conf: str
11
+ """
12
+
13
+ self.conf = conf
14
+ self.message = "configuration '{}' must be set".format(conf)
15
+ super().__init__(self.message)
16
+
17
+
18
+ class EnvironmentConfigurationError(ConfigurationError):
19
+ """Exception indicating a required configuration **environment** not set .
20
+ """
21
+
22
+ def __init__(self, env: str):
23
+ """Initialize the EnvironmentConfigurationError .
24
+
25
+ :param env: environment name of the configuration
26
+ :type env: str
27
+ """
28
+
29
+ self.env = env
30
+ self.message = "environment '{}' must be set".format(env)
31
+ super().__init__(self.message)
32
+
33
+
34
+ class NotFoundError(Exception):
35
+ """Exception indicating an object not found error
36
+ """
37
+
38
+ def __init__(self, typ: str, name: str):
39
+ """Initialize the NotFoundError .
40
+
41
+ :param typ: object type, e.g. Table Workflow
42
+ :type typ: str
43
+ :param name: object name
44
+ :type name: str
45
+ """
46
+ self.message = "{} '{}' not found".format(typ, name)
47
+ super().__init__(self.message)
48
+
49
+
50
+ class ParameterError(Exception):
51
+ """Exception indicating a required parameter not valid
52
+ """
53
+
54
+ def __init__(self, name: str):
55
+ """Initialize the ParameterError .
56
+
57
+ :param name: name of the parameter
58
+ :type name: str
59
+ """
60
+ self.message = "parameter '{}' invalid / not found".format(name)
61
+ super().__init__(self.message)
62
+
63
+
64
+ class ConflictError(Exception):
65
+ """Exception indicating a conflict
66
+ """
67
+
68
+ def __init__(self, name: str, msg: str = ""):
69
+ """Initialize the ConflictError .
70
+
71
+ :param name: name of the parameter
72
+ :type name: str
73
+ :param msg: conflict reason
74
+ :type msg: str
75
+ """
76
+ self.message = "parameter '{}' conflicts".format(
77
+ name) + f": {msg}" if msg else ""
78
+ super().__init__(self.message)
79
+
80
+
81
+ class NotLoggedInError(Exception):
82
+ """Exception indicating current user has not logged in yet
83
+ """
84
+
85
+ def __init__(self):
86
+ """Initialize the NotLoggedInError .
87
+ """
88
+ self.message = "not logged in yet, please call bioos.login to login"
89
+ super().__init__(self.message)
@@ -0,0 +1 @@
1
+