pybioos 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pybioos might be problematic. Click here for more details.

bioos/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # coding:utf-8
2
2
 
3
3
  # Package version
4
- __version__ = "0.0.5"
4
+ __version__ = "0.0.7"
bioos/bioos_workflow.py CHANGED
@@ -2,12 +2,13 @@ import argparse
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ import re
5
6
  import time
6
7
 
7
8
  import pandas as pd
8
9
 
9
10
  from bioos import bioos
10
- from bioos.errors import NotFoundError
11
+ from bioos.errors import NotFoundError, ParameterError
11
12
 
12
13
 
13
14
  def recognize_files_from_input_json(workflow_input_json: dict) -> dict:
@@ -159,6 +160,100 @@ class Bioos_workflow:
159
160
 
160
161
  return self.params_submit
161
162
 
163
+ def preprocess2(self,
164
+ input_json_file: str,
165
+ data_model_name: str = "dm",
166
+ submission_desc: str = "Submit by pybioos",
167
+ call_caching: bool = True,
168
+ force_reupload: bool = False):
169
+ if not os.path.isfile(input_json_file):
170
+ raise ParameterError('Input_json_file is not found.')
171
+
172
+ input_json = json.load(open(input_json_file))
173
+ self.logger.info("Load json input successfully.")
174
+
175
+ # putative files
176
+ input_json_str = json.dumps(input_json)
177
+
178
+ # capture strings containing "/" the test if the file exists
179
+ putative_files = [
180
+ s.strip('"\'') for s in re.findall(
181
+ r'''"[-_\w./:]+?/[-_\w./:]+?"''', input_json_str)
182
+ if os.path.isfile(s.strip('"\''))
183
+ ]
184
+
185
+ putative_files = set(putative_files)
186
+ file_str = ''
187
+ for putative_file in putative_files:
188
+ file_str = file_str + '\t' + putative_file + '\n'
189
+
190
+ self.logger.info(
191
+ f"Putative files need to upload includes:\n{file_str}")
192
+
193
+ # provision upload and file path replace
194
+ df = self.ws.files.list('input_provision')
195
+ uploaded_files = [] if df.empty else df.key.to_list()
196
+ for putative_file in putative_files:
197
+ target = f"input_provision/{os.path.basename(putative_file)}"
198
+
199
+ if not force_reupload and target in uploaded_files:
200
+ self.logger.info(
201
+ f"Skip target site already existed file {putative_file}.")
202
+ else:
203
+ self.logger.info(f"Start upload {putative_file}.")
204
+ self.ws.files.upload(putative_file,
205
+ target="input_provision/",
206
+ flatten=True)
207
+ self.logger.info(f"Finish upload {putative_file}.")
208
+ s3_location = self.ws.files.s3_urls(target)[0]
209
+ input_json_str = re.sub(putative_file, s3_location, input_json_str)
210
+
211
+ # start build params_submit
212
+ self.params_submit = {
213
+ "outputs": "{}",
214
+ "submission_desc": submission_desc,
215
+ "call_caching": call_caching,
216
+ }
217
+
218
+ # if the input json is a batch or singleton submission
219
+ input_json = json.loads(input_json_str)
220
+ if isinstance(input_json, list): # batch mode
221
+ self.logger.info("Batch mode found.")
222
+
223
+ # build data model for batch mode
224
+ inputs_list = input_json
225
+ df = pd.DataFrame(inputs_list)
226
+ id_col = f"{data_model_name}_id"
227
+ columns = [
228
+ id_col,
229
+ ]
230
+ columns.extend(df.columns)
231
+ df[id_col] = [f"tmp_{x}" for x in list(range(len(df)))]
232
+ df = df.reindex(columns=columns)
233
+ columns = [key.split(".")[-1] for key in df.columns.to_list()]
234
+ df.columns = pd.Index(columns)
235
+
236
+ # write data models
237
+ self.ws.data_models.write({data_model_name: df.map(str)},
238
+ force=True)
239
+ self.logger.info("Set data model successfully.")
240
+
241
+ # match the batch sytax of Bio-OS
242
+ unupdate_dict = inputs_list[0]
243
+ for key, _ in unupdate_dict.items():
244
+ unupdate_dict[key] = f'this.{key.split(".")[-1]}'
245
+
246
+ self.params_submit["inputs"] = json.dumps(unupdate_dict)
247
+ self.params_submit["data_model_name"] = data_model_name
248
+ self.params_submit["row_ids"] = df[id_col].to_list()
249
+
250
+ else: # singleton mode
251
+ self.logger.info("Singleton mode found.")
252
+ self.params_submit["inputs"] = json.dumps(input_json)
253
+
254
+ self.logger.info("Build params dict successfully.")
255
+ return self.params_submit
256
+
162
257
  def postprocess(self, download=False):
163
258
  # 假设全部执行完毕
164
259
  # 对运行完成的目录进行下载
@@ -266,11 +361,11 @@ def bioos_workflow():
266
361
  secret_key=parsed_args.sk)
267
362
  bw = Bioos_workflow(workspace_name=parsed_args.workspace_name,
268
363
  workflow_name=parsed_args.workflow_name)
269
- bw.preprocess(input_json_file=parsed_args.input_json,
270
- data_model_name=parsed_args.data_model_name,
271
- submission_desc=parsed_args.submission_desc,
272
- call_caching=parsed_args.call_caching,
273
- force_reupload=parsed_args.force_reupload)
364
+ bw.preprocess2(input_json_file=parsed_args.input_json,
365
+ data_model_name=parsed_args.data_model_name,
366
+ submission_desc=parsed_args.submission_desc,
367
+ call_caching=parsed_args.call_caching,
368
+ force_reupload=parsed_args.force_reupload)
274
369
  bw.submit_workflow_bioosapi()
275
370
 
276
371
  # moniter
@@ -291,4 +386,9 @@ def bioos_workflow():
291
386
  bw.monitor_workflow()
292
387
 
293
388
  time.sleep(60)
389
+ bw.logger.info("Submission finished. Print final status for runs.")
390
+ print(bw.runs)
391
+
392
+ bw.logger.info("Start to postprocess.")
294
393
  bw.postprocess(download=parsed_args.download_results)
394
+ bw.logger.info("Postprocess finished.")
@@ -292,10 +292,11 @@ class Submission(metaclass=SingletonType): # 与run class行为相同
292
292
  'WorkspaceID':
293
293
  self.workspace_id,
294
294
  }).get("Items")
295
- for model in models:
296
- if model["ID"] == item["DataModelID"]:
297
- self.data_model = model.get("Name")
298
- break
295
+ if "DataModelID" in item.keys():
296
+ for model in models:
297
+ if model["ID"] == item["DataModelID"]:
298
+ self.data_model = model.get("Name")
299
+ break
299
300
 
300
301
  self.call_cache = item.get("ExposedOptions").get("ReadFromCache")
301
302
  self.outputs = item.get("Outputs")
@@ -513,9 +514,14 @@ class Workflow(metaclass=SingletonType):
513
514
  return ""
514
515
  return res["ID"].iloc[0]
515
516
 
516
- def submit(self, data_model_name: str, row_ids: List[str], inputs: str, outputs: str,
517
- submission_desc: str, call_caching: bool, submission_name_suffix: str = "") \
518
- -> List[Run]:
517
+ def submit(self,
518
+ inputs: str,
519
+ outputs: str,
520
+ submission_desc: str,
521
+ call_caching: bool,
522
+ submission_name_suffix: str = "",
523
+ row_ids: List[str] = [],
524
+ data_model_name: str = '') -> List[Run]:
519
525
  """Submit an existed workflow.
520
526
 
521
527
  *Example*:
@@ -547,44 +553,40 @@ class Workflow(metaclass=SingletonType):
547
553
  :return: Result Runs corresponding to submitted workflows
548
554
  :rtype: List[Run]
549
555
  """
550
- if not row_ids:
551
- raise ParameterError("row_ids")
556
+
552
557
  if not inputs and not is_json(inputs):
553
558
  raise ParameterError('inputs')
554
559
  if not outputs and not is_json(outputs):
555
560
  raise ParameterError('outputs')
556
-
557
- data_model_id = self.query_data_model_id(data_model_name)
558
- if not data_model_id:
559
- raise ParameterError("data_model_name")
560
-
561
561
  if not submission_name_suffix:
562
562
  submission_name_suffix = datetime.now().strftime(
563
563
  '%Y-%m-%d-%H-%M-%S')
564
- submission_id = Config.service().create_submission({
565
- "ClusterID":
566
- self.get_cluster,
567
- 'WorkspaceID':
568
- self.workspace_id,
569
- 'WorkflowID':
570
- self.id,
571
- 'Name':
572
- workflows.submission_name(self.name, submission_name_suffix),
573
- 'Description':
574
- submission_desc,
575
- 'DataModelID':
576
- data_model_id,
577
- 'DataModelRowIDs':
578
- row_ids,
579
- 'Inputs':
580
- inputs,
564
+
565
+ params = {
566
+ "ClusterID": self.get_cluster,
567
+ 'WorkspaceID': self.workspace_id,
568
+ 'WorkflowID': self.id,
569
+ 'Name': workflows.submission_name(self.name,
570
+ submission_name_suffix),
571
+ 'Description': submission_desc,
572
+ 'Inputs': inputs,
581
573
  'ExposedOptions': {
582
574
  "ReadFromCache": call_caching,
583
575
  # TODO this may change in the future
584
576
  "ExecutionRootDir": f"s3://{self.bucket}"
585
577
  },
586
- 'Outputs':
587
- outputs,
588
- }).get("ID")
578
+ 'Outputs': outputs,
579
+ }
580
+
581
+ # It is batch mode when data_model_name and row_ids are specified.
582
+ if data_model_name and row_ids:
583
+ data_model_id = self.query_data_model_id(data_model_name)
584
+ if not data_model_id:
585
+ raise ParameterError("data_model_name")
586
+
587
+ params['DataModelID'] = data_model_id
588
+ params['DataModelRowIDs'] = row_ids
589
+
590
+ submission_id = Config.service().create_submission(params).get("ID")
589
591
 
590
592
  return Submission(self.workspace_id, submission_id).runs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybioos
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: BioOS SDK for Python
5
5
  Home-page: https://github.com/GBA-BI/pybioos
6
6
  Author: Jilong Liu
@@ -1,7 +1,7 @@
1
- bioos/__about__.py,sha256=AbfDZKyySPiWBjcsh8N93m6C2wXayn43ZS5G_U3Cx5A,56
1
+ bioos/__about__.py,sha256=okNssbP5M6Wx_3bJKWb7uNW9yTV_ylNEEhx1uaNcEAc,56
2
2
  bioos/__init__.py,sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c,22
3
3
  bioos/bioos.py,sha256=fHzOb1l5wYxw6NVYYZDiFcgk4V28BAgWEc3ev12reWs,2409
4
- bioos/bioos_workflow.py,sha256=QOyoFXFkE8dMAuTUT5hf8vXy2l4EEt5YzztsBu_s9Fk,10070
4
+ bioos/bioos_workflow.py,sha256=ZhlgIqsCMUBnNvzW1hQIlagJtZpSeDVFp99yXpIU358,14122
5
5
  bioos/config.py,sha256=CvFabYqV1BkFWO8fnr5vBf6xNtNzA8hAEVeEIbvAOm8,4307
6
6
  bioos/errors.py,sha256=Lzz2rkjDOTR2X9CnVkmsmqeOgmNqbi46WAxnC6LEGm0,2459
7
7
  bioos/log.py,sha256=twiCvf5IgJB7uvzANwBluSlztJN8ZrxbGZUBGlZ0vps,3204
@@ -13,7 +13,7 @@ bioos/resource/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
13
13
  bioos/resource/data_models.py,sha256=enKp8yyQI8IbRqe--0Xtyg1XzOwQQPQzoQsx_hNuZ6E,5089
14
14
  bioos/resource/files.py,sha256=zMxsLg1CfZyrgvMesphMz-qROU-38fE_E8XtN9DlWtE,8281
15
15
  bioos/resource/utility.py,sha256=emY7qVLLLvGmQYlVj-_bLAxU7i1GfQOUybdRkfEDwVA,1300
16
- bioos/resource/workflows.py,sha256=06e9KS3Bm73mh9U6cDI7g8TmReM62EcgIjAxPGaqXaw,19541
16
+ bioos/resource/workflows.py,sha256=A-fEUqPGuCmqZNNaW1zc8cozFJoQcg8bzXSyI2ZLypM,19724
17
17
  bioos/resource/workspaces.py,sha256=Gmr8y_sjK7TQbhMhQ_7rxqR1KFcwU72I95YYCFrrLBQ,3995
18
18
  bioos/service/BioOsService.py,sha256=HuYUEwomHCLpA1MYgVqGyWAQWHM-_BHB-jmy9VsOlnQ,6724
19
19
  bioos/service/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
@@ -30,9 +30,9 @@ bioos/tests/workspaces.py,sha256=LuuRrTs2XqfE5mGQyJNl9RBtuMb4NZHBJFoO8HMZVYQ,522
30
30
  bioos/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  bioos/utils/common_tools.py,sha256=fgMoE_-qZjgfQtUj_pmCTyYDtbJasyfH4Gm3VQsbgBQ,1651
32
32
  bioos/utils/workflows.py,sha256=zRbwTUigoM5V5LFOgzQPm3kwxt5Ogz95OFfefJc6Fjo,133
33
- pybioos-0.0.5.dist-info/LICENSE,sha256=cPkGXsgfPgEhIns7Lt3Avxx0Uy-VbdsoP8jvNGuj3cE,1063
34
- pybioos-0.0.5.dist-info/METADATA,sha256=NiirXZl6Vyh3dnxRhrgMit-xyu6NCNdR5kIHeKM6nMM,770
35
- pybioos-0.0.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
36
- pybioos-0.0.5.dist-info/entry_points.txt,sha256=8TRx1zyu7ja3x5RNaeFxeiYTj_-tiWydbuSulxi3TM0,59
37
- pybioos-0.0.5.dist-info/top_level.txt,sha256=llpzydkKVDSaWZgz3bsTUsQmhoQpc_JcRJg2-H-5a2U,6
38
- pybioos-0.0.5.dist-info/RECORD,,
33
+ pybioos-0.0.7.dist-info/LICENSE,sha256=cPkGXsgfPgEhIns7Lt3Avxx0Uy-VbdsoP8jvNGuj3cE,1063
34
+ pybioos-0.0.7.dist-info/METADATA,sha256=a-HFZWmOAZg1U9jIc5v2I-1mh7hJHA25pZbEeU3uPu8,770
35
+ pybioos-0.0.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
36
+ pybioos-0.0.7.dist-info/entry_points.txt,sha256=8TRx1zyu7ja3x5RNaeFxeiYTj_-tiWydbuSulxi3TM0,59
37
+ pybioos-0.0.7.dist-info/top_level.txt,sha256=llpzydkKVDSaWZgz3bsTUsQmhoQpc_JcRJg2-H-5a2U,6
38
+ pybioos-0.0.7.dist-info/RECORD,,