pybioos 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pybioos might be problematic. Click here for more details.
- bioos/__about__.py +1 -1
- bioos/bioos_workflow.py +101 -6
- bioos/resource/workflows.py +36 -34
- {pybioos-0.0.6.dist-info → pybioos-0.0.7.dist-info}/METADATA +1 -1
- {pybioos-0.0.6.dist-info → pybioos-0.0.7.dist-info}/RECORD +9 -9
- {pybioos-0.0.6.dist-info → pybioos-0.0.7.dist-info}/LICENSE +0 -0
- {pybioos-0.0.6.dist-info → pybioos-0.0.7.dist-info}/WHEEL +0 -0
- {pybioos-0.0.6.dist-info → pybioos-0.0.7.dist-info}/entry_points.txt +0 -0
- {pybioos-0.0.6.dist-info → pybioos-0.0.7.dist-info}/top_level.txt +0 -0
bioos/__about__.py
CHANGED
bioos/bioos_workflow.py
CHANGED
|
@@ -2,12 +2,13 @@ import argparse
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import re
|
|
5
6
|
import time
|
|
6
7
|
|
|
7
8
|
import pandas as pd
|
|
8
9
|
|
|
9
10
|
from bioos import bioos
|
|
10
|
-
from bioos.errors import NotFoundError
|
|
11
|
+
from bioos.errors import NotFoundError, ParameterError
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def recognize_files_from_input_json(workflow_input_json: dict) -> dict:
|
|
@@ -159,6 +160,100 @@ class Bioos_workflow:
|
|
|
159
160
|
|
|
160
161
|
return self.params_submit
|
|
161
162
|
|
|
163
|
+
def preprocess2(self,
|
|
164
|
+
input_json_file: str,
|
|
165
|
+
data_model_name: str = "dm",
|
|
166
|
+
submission_desc: str = "Submit by pybioos",
|
|
167
|
+
call_caching: bool = True,
|
|
168
|
+
force_reupload: bool = False):
|
|
169
|
+
if not os.path.isfile(input_json_file):
|
|
170
|
+
raise ParameterError('Input_json_file is not found.')
|
|
171
|
+
|
|
172
|
+
input_json = json.load(open(input_json_file))
|
|
173
|
+
self.logger.info("Load json input successfully.")
|
|
174
|
+
|
|
175
|
+
# putative files
|
|
176
|
+
input_json_str = json.dumps(input_json)
|
|
177
|
+
|
|
178
|
+
# capture strings containing "/" the test if the file exists
|
|
179
|
+
putative_files = [
|
|
180
|
+
s.strip('"\'') for s in re.findall(
|
|
181
|
+
r'''"[-_\w./:]+?/[-_\w./:]+?"''', input_json_str)
|
|
182
|
+
if os.path.isfile(s.strip('"\''))
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
putative_files = set(putative_files)
|
|
186
|
+
file_str = ''
|
|
187
|
+
for putative_file in putative_files:
|
|
188
|
+
file_str = file_str + '\t' + putative_file + '\n'
|
|
189
|
+
|
|
190
|
+
self.logger.info(
|
|
191
|
+
f"Putative files need to upload includes:\n{file_str}")
|
|
192
|
+
|
|
193
|
+
# provision upload and file path replace
|
|
194
|
+
df = self.ws.files.list('input_provision')
|
|
195
|
+
uploaded_files = [] if df.empty else df.key.to_list()
|
|
196
|
+
for putative_file in putative_files:
|
|
197
|
+
target = f"input_provision/{os.path.basename(putative_file)}"
|
|
198
|
+
|
|
199
|
+
if not force_reupload and target in uploaded_files:
|
|
200
|
+
self.logger.info(
|
|
201
|
+
f"Skip target site already existed file {putative_file}.")
|
|
202
|
+
else:
|
|
203
|
+
self.logger.info(f"Start upload {putative_file}.")
|
|
204
|
+
self.ws.files.upload(putative_file,
|
|
205
|
+
target="input_provision/",
|
|
206
|
+
flatten=True)
|
|
207
|
+
self.logger.info(f"Finish upload {putative_file}.")
|
|
208
|
+
s3_location = self.ws.files.s3_urls(target)[0]
|
|
209
|
+
input_json_str = re.sub(putative_file, s3_location, input_json_str)
|
|
210
|
+
|
|
211
|
+
# start build params_submit
|
|
212
|
+
self.params_submit = {
|
|
213
|
+
"outputs": "{}",
|
|
214
|
+
"submission_desc": submission_desc,
|
|
215
|
+
"call_caching": call_caching,
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
# if the input json is a batch or singleton submission
|
|
219
|
+
input_json = json.loads(input_json_str)
|
|
220
|
+
if isinstance(input_json, list): # batch mode
|
|
221
|
+
self.logger.info("Batch mode found.")
|
|
222
|
+
|
|
223
|
+
# build data model for batch mode
|
|
224
|
+
inputs_list = input_json
|
|
225
|
+
df = pd.DataFrame(inputs_list)
|
|
226
|
+
id_col = f"{data_model_name}_id"
|
|
227
|
+
columns = [
|
|
228
|
+
id_col,
|
|
229
|
+
]
|
|
230
|
+
columns.extend(df.columns)
|
|
231
|
+
df[id_col] = [f"tmp_{x}" for x in list(range(len(df)))]
|
|
232
|
+
df = df.reindex(columns=columns)
|
|
233
|
+
columns = [key.split(".")[-1] for key in df.columns.to_list()]
|
|
234
|
+
df.columns = pd.Index(columns)
|
|
235
|
+
|
|
236
|
+
# write data models
|
|
237
|
+
self.ws.data_models.write({data_model_name: df.map(str)},
|
|
238
|
+
force=True)
|
|
239
|
+
self.logger.info("Set data model successfully.")
|
|
240
|
+
|
|
241
|
+
# match the batch sytax of Bio-OS
|
|
242
|
+
unupdate_dict = inputs_list[0]
|
|
243
|
+
for key, _ in unupdate_dict.items():
|
|
244
|
+
unupdate_dict[key] = f'this.{key.split(".")[-1]}'
|
|
245
|
+
|
|
246
|
+
self.params_submit["inputs"] = json.dumps(unupdate_dict)
|
|
247
|
+
self.params_submit["data_model_name"] = data_model_name
|
|
248
|
+
self.params_submit["row_ids"] = df[id_col].to_list()
|
|
249
|
+
|
|
250
|
+
else: # singleton mode
|
|
251
|
+
self.logger.info("Singleton mode found.")
|
|
252
|
+
self.params_submit["inputs"] = json.dumps(input_json)
|
|
253
|
+
|
|
254
|
+
self.logger.info("Build params dict successfully.")
|
|
255
|
+
return self.params_submit
|
|
256
|
+
|
|
162
257
|
def postprocess(self, download=False):
|
|
163
258
|
# 假设全部执行完毕
|
|
164
259
|
# 对运行完成的目录进行下载
|
|
@@ -266,11 +361,11 @@ def bioos_workflow():
|
|
|
266
361
|
secret_key=parsed_args.sk)
|
|
267
362
|
bw = Bioos_workflow(workspace_name=parsed_args.workspace_name,
|
|
268
363
|
workflow_name=parsed_args.workflow_name)
|
|
269
|
-
bw.
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
364
|
+
bw.preprocess2(input_json_file=parsed_args.input_json,
|
|
365
|
+
data_model_name=parsed_args.data_model_name,
|
|
366
|
+
submission_desc=parsed_args.submission_desc,
|
|
367
|
+
call_caching=parsed_args.call_caching,
|
|
368
|
+
force_reupload=parsed_args.force_reupload)
|
|
274
369
|
bw.submit_workflow_bioosapi()
|
|
275
370
|
|
|
276
371
|
# moniter
|
bioos/resource/workflows.py
CHANGED
|
@@ -292,10 +292,11 @@ class Submission(metaclass=SingletonType): # 与run class行为相同
|
|
|
292
292
|
'WorkspaceID':
|
|
293
293
|
self.workspace_id,
|
|
294
294
|
}).get("Items")
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
295
|
+
if "DataModelID" in item.keys():
|
|
296
|
+
for model in models:
|
|
297
|
+
if model["ID"] == item["DataModelID"]:
|
|
298
|
+
self.data_model = model.get("Name")
|
|
299
|
+
break
|
|
299
300
|
|
|
300
301
|
self.call_cache = item.get("ExposedOptions").get("ReadFromCache")
|
|
301
302
|
self.outputs = item.get("Outputs")
|
|
@@ -513,9 +514,14 @@ class Workflow(metaclass=SingletonType):
|
|
|
513
514
|
return ""
|
|
514
515
|
return res["ID"].iloc[0]
|
|
515
516
|
|
|
516
|
-
def submit(self,
|
|
517
|
-
|
|
518
|
-
|
|
517
|
+
def submit(self,
|
|
518
|
+
inputs: str,
|
|
519
|
+
outputs: str,
|
|
520
|
+
submission_desc: str,
|
|
521
|
+
call_caching: bool,
|
|
522
|
+
submission_name_suffix: str = "",
|
|
523
|
+
row_ids: List[str] = [],
|
|
524
|
+
data_model_name: str = '') -> List[Run]:
|
|
519
525
|
"""Submit an existed workflow.
|
|
520
526
|
|
|
521
527
|
*Example*:
|
|
@@ -547,44 +553,40 @@ class Workflow(metaclass=SingletonType):
|
|
|
547
553
|
:return: Result Runs corresponding to submitted workflows
|
|
548
554
|
:rtype: List[Run]
|
|
549
555
|
"""
|
|
550
|
-
|
|
551
|
-
raise ParameterError("row_ids")
|
|
556
|
+
|
|
552
557
|
if not inputs and not is_json(inputs):
|
|
553
558
|
raise ParameterError('inputs')
|
|
554
559
|
if not outputs and not is_json(outputs):
|
|
555
560
|
raise ParameterError('outputs')
|
|
556
|
-
|
|
557
|
-
data_model_id = self.query_data_model_id(data_model_name)
|
|
558
|
-
if not data_model_id:
|
|
559
|
-
raise ParameterError("data_model_name")
|
|
560
|
-
|
|
561
561
|
if not submission_name_suffix:
|
|
562
562
|
submission_name_suffix = datetime.now().strftime(
|
|
563
563
|
'%Y-%m-%d-%H-%M-%S')
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
self.get_cluster,
|
|
567
|
-
'WorkspaceID':
|
|
568
|
-
self.
|
|
569
|
-
'
|
|
570
|
-
|
|
571
|
-
'
|
|
572
|
-
|
|
573
|
-
'Description':
|
|
574
|
-
submission_desc,
|
|
575
|
-
'DataModelID':
|
|
576
|
-
data_model_id,
|
|
577
|
-
'DataModelRowIDs':
|
|
578
|
-
row_ids,
|
|
579
|
-
'Inputs':
|
|
580
|
-
inputs,
|
|
564
|
+
|
|
565
|
+
params = {
|
|
566
|
+
"ClusterID": self.get_cluster,
|
|
567
|
+
'WorkspaceID': self.workspace_id,
|
|
568
|
+
'WorkflowID': self.id,
|
|
569
|
+
'Name': workflows.submission_name(self.name,
|
|
570
|
+
submission_name_suffix),
|
|
571
|
+
'Description': submission_desc,
|
|
572
|
+
'Inputs': inputs,
|
|
581
573
|
'ExposedOptions': {
|
|
582
574
|
"ReadFromCache": call_caching,
|
|
583
575
|
# TODO this may change in the future
|
|
584
576
|
"ExecutionRootDir": f"s3://{self.bucket}"
|
|
585
577
|
},
|
|
586
|
-
'Outputs':
|
|
587
|
-
|
|
588
|
-
|
|
578
|
+
'Outputs': outputs,
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
# It is batch mode when data_model_name and row_ids are specified.
|
|
582
|
+
if data_model_name and row_ids:
|
|
583
|
+
data_model_id = self.query_data_model_id(data_model_name)
|
|
584
|
+
if not data_model_id:
|
|
585
|
+
raise ParameterError("data_model_name")
|
|
586
|
+
|
|
587
|
+
params['DataModelID'] = data_model_id
|
|
588
|
+
params['DataModelRowIDs'] = row_ids
|
|
589
|
+
|
|
590
|
+
submission_id = Config.service().create_submission(params).get("ID")
|
|
589
591
|
|
|
590
592
|
return Submission(self.workspace_id, submission_id).runs
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
bioos/__about__.py,sha256=
|
|
1
|
+
bioos/__about__.py,sha256=okNssbP5M6Wx_3bJKWb7uNW9yTV_ylNEEhx1uaNcEAc,56
|
|
2
2
|
bioos/__init__.py,sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c,22
|
|
3
3
|
bioos/bioos.py,sha256=fHzOb1l5wYxw6NVYYZDiFcgk4V28BAgWEc3ev12reWs,2409
|
|
4
|
-
bioos/bioos_workflow.py,sha256=
|
|
4
|
+
bioos/bioos_workflow.py,sha256=ZhlgIqsCMUBnNvzW1hQIlagJtZpSeDVFp99yXpIU358,14122
|
|
5
5
|
bioos/config.py,sha256=CvFabYqV1BkFWO8fnr5vBf6xNtNzA8hAEVeEIbvAOm8,4307
|
|
6
6
|
bioos/errors.py,sha256=Lzz2rkjDOTR2X9CnVkmsmqeOgmNqbi46WAxnC6LEGm0,2459
|
|
7
7
|
bioos/log.py,sha256=twiCvf5IgJB7uvzANwBluSlztJN8ZrxbGZUBGlZ0vps,3204
|
|
@@ -13,7 +13,7 @@ bioos/resource/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
|
13
13
|
bioos/resource/data_models.py,sha256=enKp8yyQI8IbRqe--0Xtyg1XzOwQQPQzoQsx_hNuZ6E,5089
|
|
14
14
|
bioos/resource/files.py,sha256=zMxsLg1CfZyrgvMesphMz-qROU-38fE_E8XtN9DlWtE,8281
|
|
15
15
|
bioos/resource/utility.py,sha256=emY7qVLLLvGmQYlVj-_bLAxU7i1GfQOUybdRkfEDwVA,1300
|
|
16
|
-
bioos/resource/workflows.py,sha256=
|
|
16
|
+
bioos/resource/workflows.py,sha256=A-fEUqPGuCmqZNNaW1zc8cozFJoQcg8bzXSyI2ZLypM,19724
|
|
17
17
|
bioos/resource/workspaces.py,sha256=Gmr8y_sjK7TQbhMhQ_7rxqR1KFcwU72I95YYCFrrLBQ,3995
|
|
18
18
|
bioos/service/BioOsService.py,sha256=HuYUEwomHCLpA1MYgVqGyWAQWHM-_BHB-jmy9VsOlnQ,6724
|
|
19
19
|
bioos/service/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
@@ -30,9 +30,9 @@ bioos/tests/workspaces.py,sha256=LuuRrTs2XqfE5mGQyJNl9RBtuMb4NZHBJFoO8HMZVYQ,522
|
|
|
30
30
|
bioos/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
31
|
bioos/utils/common_tools.py,sha256=fgMoE_-qZjgfQtUj_pmCTyYDtbJasyfH4Gm3VQsbgBQ,1651
|
|
32
32
|
bioos/utils/workflows.py,sha256=zRbwTUigoM5V5LFOgzQPm3kwxt5Ogz95OFfefJc6Fjo,133
|
|
33
|
-
pybioos-0.0.
|
|
34
|
-
pybioos-0.0.
|
|
35
|
-
pybioos-0.0.
|
|
36
|
-
pybioos-0.0.
|
|
37
|
-
pybioos-0.0.
|
|
38
|
-
pybioos-0.0.
|
|
33
|
+
pybioos-0.0.7.dist-info/LICENSE,sha256=cPkGXsgfPgEhIns7Lt3Avxx0Uy-VbdsoP8jvNGuj3cE,1063
|
|
34
|
+
pybioos-0.0.7.dist-info/METADATA,sha256=a-HFZWmOAZg1U9jIc5v2I-1mh7hJHA25pZbEeU3uPu8,770
|
|
35
|
+
pybioos-0.0.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
36
|
+
pybioos-0.0.7.dist-info/entry_points.txt,sha256=8TRx1zyu7ja3x5RNaeFxeiYTj_-tiWydbuSulxi3TM0,59
|
|
37
|
+
pybioos-0.0.7.dist-info/top_level.txt,sha256=llpzydkKVDSaWZgz3bsTUsQmhoQpc_JcRJg2-H-5a2U,6
|
|
38
|
+
pybioos-0.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|