pybiolib 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. biolib/__init__.py +18 -5
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -0
  4. biolib/_internal/data_record/data_record.py +97 -0
  5. biolib/_internal/data_record/remote_storage_endpoint.py +38 -0
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +42 -23
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +22 -37
  14. biolib/_internal/runtime.py +19 -0
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/__init__.py +18 -0
  22. biolib/_runtime/runtime.py +80 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +40 -72
  26. biolib/app/search_apps.py +8 -12
  27. biolib/biolib_api_client/api_client.py +22 -10
  28. biolib/biolib_api_client/app_types.py +2 -1
  29. biolib/biolib_api_client/biolib_app_api.py +1 -1
  30. biolib/biolib_api_client/biolib_job_api.py +6 -0
  31. biolib/biolib_api_client/job_types.py +4 -4
  32. biolib/biolib_api_client/lfs_types.py +8 -2
  33. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  34. biolib/biolib_binary_format/utils.py +41 -4
  35. biolib/cli/__init__.py +6 -2
  36. biolib/cli/auth.py +58 -0
  37. biolib/cli/data_record.py +80 -0
  38. biolib/cli/download_container.py +3 -1
  39. biolib/cli/init.py +1 -0
  40. biolib/cli/lfs.py +45 -11
  41. biolib/cli/push.py +1 -1
  42. biolib/cli/run.py +3 -2
  43. biolib/cli/start.py +1 -0
  44. biolib/compute_node/cloud_utils/cloud_utils.py +15 -18
  45. biolib/compute_node/job_worker/cache_state.py +1 -1
  46. biolib/compute_node/job_worker/executors/docker_executor.py +134 -114
  47. biolib/compute_node/job_worker/job_storage.py +3 -4
  48. biolib/compute_node/job_worker/job_worker.py +31 -15
  49. biolib/compute_node/remote_host_proxy.py +75 -70
  50. biolib/compute_node/webserver/webserver_types.py +0 -1
  51. biolib/experiments/experiment.py +75 -44
  52. biolib/jobs/job.py +125 -47
  53. biolib/jobs/job_result.py +46 -21
  54. biolib/jobs/types.py +1 -1
  55. biolib/runtime/__init__.py +14 -1
  56. biolib/sdk/__init__.py +29 -5
  57. biolib/typing_utils.py +2 -7
  58. biolib/user/sign_in.py +10 -14
  59. biolib/utils/__init__.py +1 -1
  60. biolib/utils/app_uri.py +11 -4
  61. biolib/utils/cache_state.py +2 -2
  62. biolib/utils/seq_util.py +38 -30
  63. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
  64. pybiolib-1.1.2193.dist-info/RECORD +123 -0
  65. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +1 -1
  66. biolib/biolib_api_client/biolib_account_api.py +0 -8
  67. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
  68. biolib/experiments/types.py +0 -9
  69. biolib/lfs/__init__.py +0 -6
  70. biolib/lfs/utils.py +0 -237
  71. biolib/runtime/results.py +0 -20
  72. pybiolib-1.1.1747.dist-info/RECORD +0 -108
  73. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  74. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
  75. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
biolib/utils/seq_util.py CHANGED
@@ -1,27 +1,26 @@
1
1
  import re
2
2
  from io import BufferedIOBase
3
- from biolib.typing_utils import List, Optional, Dict, Union
4
3
 
5
- allowed_sequence_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.")
6
- def find_invalid_sequence_characters(sequence):
7
- invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
8
- return invalid_chars
4
+ from biolib.typing_utils import Dict, List, Optional, Union
5
+
9
6
 
10
7
  class SeqUtilRecord:
11
- def __init__(self,
12
- sequence: str,
13
- sequence_id: str,
14
- description: Optional['str'],
15
- properties: Optional[Dict[str, str]] = None):
8
+ def __init__(
9
+ self,
10
+ sequence: str,
11
+ sequence_id: str,
12
+ description: Optional['str'],
13
+ properties: Optional[Dict[str, str]] = None,
14
+ ):
16
15
  self.sequence = sequence
17
16
  self.id = sequence_id # pylint: disable=invalid-name
18
17
  self.description = description
19
18
 
20
19
  if properties:
21
- disallowed_pattern = re.compile(r"[=\[\]\n]")
20
+ disallowed_pattern = re.compile(r'[=\[\]\n]')
22
21
  for key, value in properties.items():
23
- assert not bool(disallowed_pattern.search(key)), "Key cannot contain characters =[] and newline"
24
- assert not bool(disallowed_pattern.search(value)), "Value cannot contain characters =[] and newline"
22
+ assert not bool(disallowed_pattern.search(key)), 'Key cannot contain characters =[] and newline'
23
+ assert not bool(disallowed_pattern.search(value)), 'Value cannot contain characters =[] and newline'
25
24
  self.properties = properties
26
25
  else:
27
26
  self.properties = {}
@@ -33,24 +32,24 @@ class SeqUtilRecord:
33
32
  class SeqUtil:
34
33
  @staticmethod
35
34
  def parse_fasta(
36
- input_file: Union[str, BufferedIOBase] = None,
35
+ input_file: Union[str, BufferedIOBase, None] = None,
37
36
  default_header: Optional[str] = None,
38
37
  allow_any_sequence_characters: bool = False,
39
- allow_empty_sequence: bool = False,
38
+ allow_empty_sequence: bool = True,
40
39
  file_name: Optional[str] = None,
41
40
  ) -> List[SeqUtilRecord]:
42
41
  if input_file is None:
43
42
  if file_name:
44
43
  input_file = file_name
45
44
  else:
46
- raise ValueError("input_file must be a file name (str) or a BufferedIOBase object")
45
+ raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
47
46
  if isinstance(input_file, str):
48
- with open(input_file, 'r') as file_handle:
47
+ with open(input_file) as file_handle:
49
48
  data = file_handle.read().strip()
50
49
  elif isinstance(input_file, BufferedIOBase):
51
50
  data = input_file.read().decode('utf-8')
52
51
  else:
53
- raise ValueError("input_file must be a file name (str) or a BufferedIOBase object")
52
+ raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
54
53
  if not data:
55
54
  return []
56
55
 
@@ -66,9 +65,9 @@ class SeqUtil:
66
65
  raise Exception(f'No header line found in FASTA file "{file_name}"')
67
66
 
68
67
  splitted = []
69
- tmp_data = ""
68
+ tmp_data = ''
70
69
  for line in data.splitlines():
71
- if line.startswith(">"):
70
+ if line.startswith('>'):
72
71
  if tmp_data:
73
72
  splitted.append(tmp_data)
74
73
  tmp_data = line[1:].strip() + '\n'
@@ -84,23 +83,20 @@ class SeqUtil:
84
83
  sequence_data_splitted = sequence_data.strip().split('\n')
85
84
  header_line = sequence_data_splitted[0].split()
86
85
  sequence_id = header_line[0]
87
- description = sequence_data_splitted[0][len(sequence_id):].strip()
88
- sequence = "".join([seq.strip().upper() for seq in sequence_data_splitted[1:]])
86
+ description = sequence_data_splitted[0][len(sequence_id) :].strip()
87
+ sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
89
88
 
90
89
  if not allow_any_sequence_characters:
91
- invalid_sequence_characters = find_invalid_sequence_characters(sequence)
90
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
92
91
  if len(invalid_sequence_characters) > 0:
93
92
  raise Exception(
94
93
  f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
95
94
  )
96
95
  if not allow_empty_sequence and len(sequence) == 0:
97
- raise Exception(
98
- f'Error: No sequence found for fasta entry {sequence_id}'
99
- )
96
+ raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
97
+
98
+ parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
100
99
 
101
- parsed_sequences.append(
102
- SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description)
103
- )
104
100
  return parsed_sequences
105
101
 
106
102
  @staticmethod
@@ -111,5 +107,17 @@ class SeqUtil:
111
107
  if record.properties:
112
108
  for key, value in record.properties.items():
113
109
  optional_description += f' [{key}={value}]'
114
- sequence = '\n'.join(record.sequence[i:i + 80] for i in range(0, len(record.sequence), 80))
110
+ sequence = '\n'.join(record.sequence[i : i + 80] for i in range(0, len(record.sequence), 80))
115
111
  file_handle.write(f'>{record.id}{optional_description}\n{sequence}\n')
112
+
113
+ @staticmethod
114
+ def _find_invalid_sequence_characters(sequence: str) -> List[str]:
115
+ allowed_sequence_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.')
116
+ invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
117
+ return invalid_chars
118
+
119
+ @staticmethod
120
+ def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
121
+ allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
122
+ invalid_chars = [char for char in sequence if char not in allowed_chars]
123
+ return invalid_chars
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.1747
3
+ Version: 1.1.2193
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -0,0 +1,123 @@
1
+ LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
2
+ README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
3
+ biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
4
+ biolib/_data_record/data_record.py,sha256=CoyYRse5VdUBhQzzPfR9BkytgOsM-IZxkfMX1kyRnPk,12589
5
+ biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
7
+ biolib/_internal/data_record/data_record.py,sha256=YmaAABR57goDCE8-rKb2j0FPMSbDtRPCm_HhT3mM074,4299
8
+ biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
9
+ biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
10
+ biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
11
+ biolib/_internal/fuse_mount/experiment_fuse_mount.py,sha256=08aUdEq_bvqLBft_gSLjOClKDy5sBnMts1RfJf7AP_U,7012
12
+ biolib/_internal/http_client.py,sha256=DdooXei93JKGYGV4aQmzue_oFzvHkozg2UCxgk9dfDM,5081
13
+ biolib/_internal/lfs/__init__.py,sha256=gSWo_xg61UniYgD7yNYxeT4I9uaXBCBSi3_nmZjnPpE,35
14
+ biolib/_internal/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
15
+ biolib/_internal/libs/__init__.py,sha256=Jdf4tNPqe_oIIf6zYml6TiqhL_02Vyqwge6IELrAFhw,98
16
+ biolib/_internal/libs/fusepy/__init__.py,sha256=AWDzNFS-XV_5yKb0Qx7kggIhPzq1nj_BZS5y2Nso08k,41944
17
+ biolib/_internal/push_application.py,sha256=8P7eXvySn7CRp5XBDkO3xjTGixS8g7-jD-_iwzM_XDI,10020
18
+ biolib/_internal/runtime.py,sha256=9pZ3s3L7LGxdqOgnHh1KK3Jjyn_9MjhQmKHI-6hMT3U,448
19
+ biolib/_internal/types/__init__.py,sha256=11ZucS8jKeLGAAswXyKI7FH2KLHd6T9Sh8ZK2Ar3jlk,152
20
+ biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
21
+ biolib/_internal/types/data_record.py,sha256=AHoIiwVqeHj0HozQxFRAyxk-d3XJgLWno4ic1z9eTrQ,865
22
+ biolib/_internal/types/experiment.py,sha256=D94iBdn2nS92lRW-TOs1a2WKXJD5ZtmzL4ypggKX2ys,176
23
+ biolib/_internal/types/resource.py,sha256=G-vPkZoe4Um6FPxsQZtRzAlbSW5sDW4NFkbjn21I3V4,372
24
+ biolib/_internal/types/typing.py,sha256=D4EKKEe7kDx0K6lJi-H_XLtk-8w6nu2fdqn9bvzI-Xo,288
25
+ biolib/_internal/utils/__init__.py,sha256=p5vsIFyu-zYqBgdSMfwW9NC_jk7rXvvCbV4Bzd3As7c,630
26
+ biolib/_runtime/runtime.py,sha256=daYxzIpRoW4k-HJFu2BMXeylYSlCXn3-SqdSriCFnKw,2770
27
+ biolib/api/__init__.py,sha256=mQ4u8FijqyLzjYMezMUUbbBGNB3iFmkNdjXnWPZ7Jlw,138
28
+ biolib/api/client.py,sha256=FRpdH5aI187b_I_4HUNi680v4iOP65z5f2RcUo8D8MA,3559
29
+ biolib/app/__init__.py,sha256=cdPtcfb_U-bxb9iSL4fCEq2rpD9OjkyY4W-Zw60B0LI,37
30
+ biolib/app/app.py,sha256=P2RwaDAskUHzlciuTJUroqUocRwoyOLT6YbgMyCRRDI,8484
31
+ biolib/app/search_apps.py,sha256=K4a41f5XIWth2BWI7OffASgIsD0ko8elCax8YL2igaY,1470
32
+ biolib/biolib_api_client/__init__.py,sha256=E5EMa19wJoblwSdQPYrxc_BtIeRsAuO0L_jQweWw-Yk,182
33
+ biolib/biolib_api_client/api_client.py,sha256=ciNx4ybpyKG5LEf4KQdGEz13r0jTxImyQat4_HDecD0,7373
34
+ biolib/biolib_api_client/app_types.py,sha256=FxSr4UqfnMhLe34p8bm02wsC3g1Jz8iaing5tRKDOQI,2442
35
+ biolib/biolib_api_client/auth.py,sha256=kjm0ZHnH3I8so3su2sZbBxNHYp-ZUdrZ5lwQ0K36RSw,949
36
+ biolib/biolib_api_client/biolib_app_api.py,sha256=DndlVxrNTes6DOaWyMINLGZQCRMWVvR7gwt5HVlyf5Y,4240
37
+ biolib/biolib_api_client/biolib_job_api.py,sha256=7bKfav3-12ewXkEUoLdCmbWdebW8148kxfGJW9SsXZI,7125
38
+ biolib/biolib_api_client/common_types.py,sha256=RH-1KNHqUF-EkTpfPOSTt5Mq1GPdfju_cqXDesscO1I,123
39
+ biolib/biolib_api_client/job_types.py,sha256=Dl4NhU2xpgpXV-7YIoDf6WL63SLR5bni55OX8x5539M,1300
40
+ biolib/biolib_api_client/lfs_types.py,sha256=joZWP6-sa5_Ug_6xIp5fHAgEo_bqLE3rbleQocZtDcg,339
41
+ biolib/biolib_api_client/user_state.py,sha256=XcgWV-MgVk88mIlMmnu8yHxMu8OCaw8o0tk7TVo5Hcg,637
42
+ biolib/biolib_binary_format/__init__.py,sha256=HMl5SdX_VUWE4OQzi4Jf_yFvC7b0bSPOGPHYi9dWM2Q,185
43
+ biolib/biolib_binary_format/base_bbf_package.py,sha256=vxRV4iKy0dXeDOlFWnMFI0hGnDBYDH5Cgh5gAfuObt8,959
44
+ biolib/biolib_binary_format/file_in_container.py,sha256=j1eEPRxf_ew8I6G8sDiiZZxn4Wx1ppagfM9K8zTDG4U,3591
45
+ biolib/biolib_binary_format/module_input.py,sha256=led2QhHeec_ymBPw5uEn3_3vJKI-1T8zrFQGqwEWLMY,2788
46
+ biolib/biolib_binary_format/module_output_v2.py,sha256=J5ZO5gCSeudpE12EVDrjYrNTS2DwgszY-SVXT7Qjuyg,5913
47
+ biolib/biolib_binary_format/remote_endpoints.py,sha256=V48mwOj3eAQAKp-8DjtWUdEKUyC0WKc1pEiKTmtjrJY,1651
48
+ biolib/biolib_binary_format/remote_stream_seeker.py,sha256=uyi6kJBU1C1DWfiuR0kRUQIY7nalG7ocgwgngd3Ul4U,1999
49
+ biolib/biolib_binary_format/saved_job.py,sha256=nFHVFRNTNcAFGODLSiBntCtMk55QKwreUq6qLX80dI4,1125
50
+ biolib/biolib_binary_format/stdout_and_stderr.py,sha256=WfUUJFFCBrtfXjuWIaRPiWCpuBLxfko68oxoTKhrwx8,1023
51
+ biolib/biolib_binary_format/system_exception.py,sha256=T3iL4_cSHAHim3RSDPS8Xyb1mfteaJBZonSXuRltc28,853
52
+ biolib/biolib_binary_format/system_status_update.py,sha256=aOELuQ0k-GtpaZTUxYd0GFomP_OInmrK585y6fuQuKE,1191
53
+ biolib/biolib_binary_format/utils.py,sha256=ra_plrh_Z10u98O2gW9uW2qzscQZCfq91SOznmDTY64,5170
54
+ biolib/biolib_docker_client/__init__.py,sha256=aBfA6mtWSI5dBEfNNMD6bIZzCPloW4ghKm0wqQiljdo,1481
55
+ biolib/biolib_download_container.py,sha256=8TmBV8iv3bCvkNlHa1SSsc4zl0wX_eaxhfnW5rvFIh8,1779
56
+ biolib/biolib_errors.py,sha256=5m4lK2l39DafpoXBImEBD4EPH3ayXBX0JgtPzmGClow,689
57
+ biolib/biolib_logging.py,sha256=J3E5H_LL5k6ZUim2C8gqN7E6lCBZMTpO4tnMpOPwG9U,2854
58
+ biolib/cli/__init__.py,sha256=0v3c_J-U0k46c5ZWeQjLG_kTaKDJm81LBxQpDO2B_aI,1286
59
+ biolib/cli/auth.py,sha256=rpWGmXs6Fz6CGrO9K8ibPRszOdXG78Vig_boKaVCD9A,2082
60
+ biolib/cli/data_record.py,sha256=08JbZkFWKMo0PrnhhG0jQEKnNW7pPLti9cOw8s1TWfI,3344
61
+ biolib/cli/download_container.py,sha256=HIZVHOPmslGE5M2Dsp9r2cCkAEJx__vcsDz5Wt5LRos,483
62
+ biolib/cli/init.py,sha256=wQOfii_au-d30Hp7DdH-WVw-WVraKvA_zY4za1w7DE8,821
63
+ biolib/cli/lfs.py,sha256=z2qHUwink85mv9yDgifbVKkVwuyknGhMDTfly_gLKJM,4151
64
+ biolib/cli/push.py,sha256=TFi7O9tJ3zFe0VmtVTV3Vh9_xIMHnrc41xxcaBKU46g,813
65
+ biolib/cli/run.py,sha256=BbvXLQ-XibjQ71Y2d4URMH_8dflNVwM0i3TIWhw_u_c,1634
66
+ biolib/cli/runtime.py,sha256=Xv-nrma5xX8NidWcvbUKcUvuN5TCarZa4A8mPVmF-z0,361
67
+ biolib/cli/start.py,sha256=rg8VVY8rboFhf1iQo3zE3WA5oh_R1VWWfYJEO1gMReY,1737
68
+ biolib/compute_node/.gitignore,sha256=GZdZ4g7HftqfOfasFpBC5zV1YQAbht1a7EzcXD6f3zg,45
69
+ biolib/compute_node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
+ biolib/compute_node/cloud_utils/__init__.py,sha256=VZSScLqaz5tg_gpMvWgwkAu9Qf-vgW_QHRoDOaAmU44,67
71
+ biolib/compute_node/cloud_utils/cloud_utils.py,sha256=_iaKelsmQLLwbDKVXZMXPFZayZPH9iHXc4NISFP9uzk,7462
72
+ biolib/compute_node/job_worker/__init__.py,sha256=ipdPWaABKYrltxny15e2kK8PWdEE7VzXbkKK6wM_zDk,71
73
+ biolib/compute_node/job_worker/cache_state.py,sha256=MwjSRzcJJ_4jybqvBL4xdgnDYSIiw4s90pNn83Netoo,4830
74
+ biolib/compute_node/job_worker/cache_types.py,sha256=ajpLy8i09QeQS9dEqTn3T6NVNMY_YsHQkSD5nvIHccQ,818
75
+ biolib/compute_node/job_worker/docker_image_cache.py,sha256=ansHIkJIq_EMW1nZNlW-RRLVVeKWTbzNICYaOHpKiRE,7460
76
+ biolib/compute_node/job_worker/executors/__init__.py,sha256=bW6t1qi3PZTlHM4quaTLa8EI4ALTCk83cqcVJfJfJfE,145
77
+ biolib/compute_node/job_worker/executors/docker_executor.py,sha256=2H7GooL0oAifPcbie0unatB4fRoHyqbsr6S91uagc_g,27952
78
+ biolib/compute_node/job_worker/executors/docker_types.py,sha256=VhsU1DKtJjx_BbCkVmiPZPH4ROiL1ygW1Y_s1Kbpa2o,216
79
+ biolib/compute_node/job_worker/executors/tars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
+ biolib/compute_node/job_worker/executors/types.py,sha256=yP5gG39hr-DLnw9bOE--VHi-1arDbIYiGuV1rlTbbHI,1466
81
+ biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py,sha256=_cvEiZbOwfkv6fYmfrvdi_FVviIEYr_dSClQcOQaUWM,1198
82
+ biolib/compute_node/job_worker/job_max_runtime_timer_thread.py,sha256=K_xgz7IhiIjpLlXRk8sqaMyLoApcidJkgu29sJX0gb8,1174
83
+ biolib/compute_node/job_worker/job_storage.py,sha256=LNkklckDLbYgCHsK5FGrEK75Kw-H4f4JcTCAtuE9His,4035
84
+ biolib/compute_node/job_worker/job_worker.py,sha256=fuWoYJo9HOqLmWl8yeCXh0mhT4ebbkrWac-BVb58khs,28842
85
+ biolib/compute_node/job_worker/large_file_system.py,sha256=XXqRlVtYhs-Ji9zQGIk5KQPXFO_Q5jJH0nnlw4GkeMY,10461
86
+ biolib/compute_node/job_worker/mappings.py,sha256=Z48Kg4nbcOvsT2-9o3RRikBkqflgO4XeaWxTGz-CNvI,2499
87
+ biolib/compute_node/job_worker/utilization_reporter_thread.py,sha256=7tm5Yk9coqJ9VbEdnO86tSXI0iM0omwIyKENxdxiVXk,8575
88
+ biolib/compute_node/job_worker/utils.py,sha256=wgxcIA8yAhUPdCwyvuuJ0JmreyWmmUoBO33vWtG60xg,1282
89
+ biolib/compute_node/remote_host_proxy.py,sha256=CNWJLXXYm8DGujxEJIsg1wUKFoSgU0nhwdmjMn5gelE,14690
90
+ biolib/compute_node/socker_listener_thread.py,sha256=T5_UikA3MB9bD5W_dckYLPTgixh72vKUlgbBvj9dbM0,1601
91
+ biolib/compute_node/socket_sender_thread.py,sha256=YgamPHeUm2GjMFGx8qk-99WlZhEs-kAb3q_2O6qByig,971
92
+ biolib/compute_node/utils.py,sha256=M7i_WTyxbFM3Lri9RWZ_8FeQNYrQIWpKGLfp2I55oeY,4677
93
+ biolib/compute_node/webserver/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
+ biolib/compute_node/webserver/gunicorn_flask_application.py,sha256=jPfR_YvNBekLUXWo_vHFV-FIwlb8s8tacKmGHvh93qc,914
95
+ biolib/compute_node/webserver/webserver.py,sha256=15PkRyhtdtSgFDxa0z78aPO4ciZURsFqJYi-HtUmZF8,6494
96
+ biolib/compute_node/webserver/webserver_types.py,sha256=2t8EaFKESnves3BA_NBdnS2yAdo1qwamCFHiSt888nE,380
97
+ biolib/compute_node/webserver/webserver_utils.py,sha256=XWvwYPbWNR3qS0FYbLLp-MDDfVk0QdaAmg3xPrT0H2s,4234
98
+ biolib/compute_node/webserver/worker_thread.py,sha256=26tG73TADnOcXsAr7Iyf6smrLlCqB4x-vvmpUb8WqnA,11569
99
+ biolib/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
+ biolib/experiments/experiment.py,sha256=jIRixmQm3Gq9YdJ3I0-rE1vFukXqq6U4zXehFOJ1yZk,7614
101
+ biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
102
+ biolib/jobs/job.py,sha256=OfG8cLd3AjGjiMWRlJRZdVVbLsRWSX-OM5nxJhR6mPQ,19136
103
+ biolib/jobs/job_result.py,sha256=rALHiKYNaC9lHi_JJqBob1RubzNLwG9Z386kwRJjd2M,5885
104
+ biolib/jobs/types.py,sha256=qhadtH2KDC2WUOOqPiwke0YgtQY4FtuB71Stekq1k48,970
105
+ biolib/runtime/__init__.py,sha256=MlRepA11n2H-3plB5rzWyyHK2JmP6PiaP3i6x3vt0mg,506
106
+ biolib/sdk/__init__.py,sha256=amVp_jMxi2nqCcTsmL2aKUNGCAH3Yk4EzAnps9d1VH8,1928
107
+ biolib/tables.py,sha256=acH7VjwAbadLo8P84FSnKEZxCTVsF5rEg9VPuxElNs8,872
108
+ biolib/templates/__init__.py,sha256=Yx62sSyDCDesRQDQgmbDsLpfgEh93fWE8r9u4g2azXk,36
109
+ biolib/templates/example_app.py,sha256=EB3E3RT4SeO_ii5nVQqJpi5KDGNE_huF1ub-e5ZFveE,715
110
+ biolib/typing_utils.py,sha256=ntzrlyTkUaO2OtccLYzCAGztGdca0WT5fikJUmSkT-Y,148
111
+ biolib/user/__init__.py,sha256=Db5wtxLfFz3ID9TULSSTo77csw9tO6RtxMRvV5cqKEE,39
112
+ biolib/user/sign_in.py,sha256=XTAmRPKfmg7VAaB8cT5wcmfxoPXeHqY8LmDiADF7zbw,2064
113
+ biolib/utils/__init__.py,sha256=fwjciJyJicvYyZcVTzfDBgD0SKY13DeXqvTeG4qZIy8,5548
114
+ biolib/utils/app_uri.py,sha256=Yq_-_VGugQhMMo6mM5f0G9yNlLkr0WK4j0Nrf3FE4xQ,2171
115
+ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3100
116
+ biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
117
+ biolib/utils/seq_util.py,sha256=ZQFcaE37B2dtucN2zDjOmdya_X0ITc1zBFZJNQY13XA,5183
118
+ biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
119
+ pybiolib-1.1.2193.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
+ pybiolib-1.1.2193.dist-info/METADATA,sha256=NCFsHsCnbnOhw1O2L-polZfFa2pNdtA89TfBW7f_o7E,1508
121
+ pybiolib-1.1.2193.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
+ pybiolib-1.1.2193.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
+ pybiolib-1.1.2193.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,8 +0,0 @@
1
- import biolib.api
2
-
3
- class BiolibAccountApi:
4
-
5
- @staticmethod
6
- def fetch_by_handle(account_handle):
7
- response = biolib.api.client.get(path=f'/account/{account_handle}')
8
- return response.json()
@@ -1,34 +0,0 @@
1
- import biolib.api
2
- from biolib.biolib_api_client.lfs_types import LargeFileSystemVersion, LargeFileSystem
3
-
4
-
5
- class BiolibLargeFileSystemApi:
6
-
7
- @staticmethod
8
- def create(account_uuid: str, name: str) -> LargeFileSystem:
9
- response = biolib.api.client.post(
10
- path='/lfs/',
11
- data={'account_uuid': account_uuid, 'name': name},
12
- )
13
-
14
- lfs: LargeFileSystem = response.json()
15
- return lfs
16
-
17
- @staticmethod
18
- def fetch_version(lfs_version_uuid: str) -> LargeFileSystemVersion:
19
- response = biolib.api.client.get(
20
- path=f'/lfs/versions/{lfs_version_uuid}/',
21
- )
22
-
23
- lfs_version: LargeFileSystemVersion = response.json()
24
- return lfs_version
25
-
26
- @staticmethod
27
- def create_version(resource_uuid: str) -> LargeFileSystemVersion:
28
- response = biolib.api.client.post(
29
- path='/lfs/versions/',
30
- data={'resource_uuid': resource_uuid},
31
- )
32
-
33
- lfs_version: LargeFileSystemVersion = response.json()
34
- return lfs_version
@@ -1,9 +0,0 @@
1
- from biolib.typing_utils import TypedDict
2
-
3
-
4
- class ExperimentDict(TypedDict):
5
- created_at: str
6
- job_count: int
7
- job_running_count: int
8
- name: str
9
- uuid: str
biolib/lfs/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- from .cache import prune_lfs_cache
2
- from .utils import \
3
- describe_large_file_system, \
4
- get_file_data_from_large_file_system, \
5
- push_large_file_system, \
6
- create_large_file_system
biolib/lfs/utils.py DELETED
@@ -1,237 +0,0 @@
1
- import io
2
- import json
3
- import os
4
- import zipfile as zf
5
- from collections import namedtuple
6
- from pathlib import Path
7
- from struct import Struct
8
-
9
- from biolib import utils
10
- from biolib._internal.http_client import HttpClient
11
- from biolib.app import BioLibApp
12
- from biolib.biolib_api_client.biolib_account_api import BiolibAccountApi
13
- from biolib.biolib_api_client.biolib_large_file_system_api import BiolibLargeFileSystemApi
14
- from biolib.biolib_api_client import BiolibApiClient
15
- from biolib.biolib_logging import logger
16
- from biolib.biolib_errors import BioLibError
17
- from biolib.typing_utils import List, Tuple, Iterator, Optional
18
- from biolib.utils.zip.remote_zip import RemoteZip # type:ignore
19
-
20
-
21
- def _get_lfs_info_from_uri(lfs_uri):
22
- lfs_uri_parts = lfs_uri.split('/')
23
- lfs_uri_parts = [uri_part for uri_part in lfs_uri_parts if '@' not in uri_part] # Remove hostname
24
- team_account_handle = lfs_uri_parts[0]
25
- lfs_name = lfs_uri_parts[1]
26
- account = BiolibAccountApi.fetch_by_handle(team_account_handle)
27
- return account, lfs_name
28
-
29
-
30
- def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
31
- data_size = 0
32
- file_list: List[str] = []
33
-
34
- for path, _, files in os.walk(directory):
35
- for file in files:
36
- file_path = os.path.join(path, file)
37
- if os.path.islink(file_path):
38
- continue # skip symlinks
39
-
40
- relative_file_path = file_path[len(directory) + 1:] # +1 to remove starting slash
41
- file_list.append(relative_file_path)
42
- data_size += os.path.getsize(file_path)
43
-
44
- return file_list, data_size
45
-
46
-
47
- def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes]:
48
- class ChunkedIOBuffer(io.RawIOBase):
49
- def __init__(self, chunk_size: int):
50
- super().__init__()
51
- self.chunk_size = chunk_size
52
- self.tmp_data = bytearray()
53
-
54
- def get_buffer_size(self):
55
- return len(self.tmp_data)
56
-
57
- def read_chunk(self):
58
- chunk = bytes(self.tmp_data[:self.chunk_size])
59
- self.tmp_data = self.tmp_data[self.chunk_size:]
60
- return chunk
61
-
62
- def write(self, data):
63
- data_length = len(data)
64
- self.tmp_data += data
65
- return data_length
66
-
67
- # create chunked buffer to hold data temporarily
68
- io_buffer = ChunkedIOBuffer(chunk_size)
69
-
70
- # create zip writer that will write to the io buffer
71
- zip_writer = zf.ZipFile(io_buffer, mode='w') # type: ignore
72
-
73
- for file_path in files:
74
- # generate zip info and prepare zip pointer for writing
75
- z_info = zf.ZipInfo.from_file(file_path)
76
- zip_pointer = zip_writer.open(z_info, mode='w')
77
- if Path(file_path).is_file():
78
- # read file chunk by chunk
79
- with open(file_path, 'br') as file_pointer:
80
- while True:
81
- chunk = file_pointer.read(chunk_size)
82
- if len(chunk) == 0:
83
- break
84
- # write the chunk to the zip
85
- zip_pointer.write(chunk)
86
- # if writing the chunk caused us to go over chunk_size, flush it
87
- if io_buffer.get_buffer_size() > chunk_size:
88
- yield io_buffer.read_chunk()
89
-
90
- zip_pointer.close()
91
-
92
- # flush any remaining data in the stream (e.g. zip file meta data)
93
- zip_writer.close()
94
- while True:
95
- chunk = io_buffer.read_chunk()
96
- if len(chunk) == 0:
97
- break
98
- yield chunk
99
-
100
-
101
- def create_large_file_system(lfs_uri: str):
102
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Large File System')
103
- lfs_account, lfs_name = _get_lfs_info_from_uri(lfs_uri)
104
- lfs_resource = BiolibLargeFileSystemApi.create(account_uuid=lfs_account['public_id'], name=lfs_name)
105
- logger.info(f"Successfully created new Large File System '{lfs_resource['uri']}'")
106
-
107
-
108
- def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> None:
109
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Large File System')
110
-
111
- if not os.path.isdir(input_dir):
112
- raise BioLibError(f'Could not find folder at {input_dir}')
113
-
114
- if os.path.realpath(input_dir) == '/':
115
- raise BioLibError('Pushing your root directory is not possible')
116
-
117
- lfs_resource = BioLibApp(lfs_uri)
118
-
119
- original_working_dir = os.getcwd()
120
- os.chdir(input_dir)
121
- files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
122
-
123
- if data_size_in_bytes > 4_500_000_000_000:
124
- raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
125
-
126
- min_chunk_size_bytes = 10_000_000
127
- chunk_size_in_bytes: int
128
- if chunk_size_in_mb:
129
- chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
130
- if chunk_size_in_bytes < min_chunk_size_bytes:
131
- logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
132
- chunk_size_in_bytes = min_chunk_size_bytes
133
- else:
134
- # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
135
- chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
136
-
137
- data_size_in_mb = round(data_size_in_bytes / 10 ** 6)
138
- print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
139
-
140
- lfs_resource_version = BiolibLargeFileSystemApi.create_version(resource_uuid=lfs_resource.uuid)
141
- lfs_resource_version_uuid = lfs_resource_version['uuid']
142
-
143
- iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
144
-
145
- multipart_uploader = utils.MultiPartUploader(
146
- use_process_pool=True,
147
- get_presigned_upload_url_request=dict(
148
- headers=None,
149
- requires_biolib_auth=True,
150
- path=f'/lfs/versions/{lfs_resource_version_uuid}/presigned_upload_url/',
151
- ),
152
- complete_upload_request=dict(
153
- headers=None,
154
- requires_biolib_auth=True,
155
- path=f'/lfs/versions/{lfs_resource_version_uuid}/complete_upload/',
156
- ),
157
- )
158
-
159
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
160
- logger.info(f"Successfully pushed a new LFS version '{lfs_resource_version['uri']}'")
161
- os.chdir(original_working_dir)
162
-
163
-
164
- def describe_large_file_system(lfs_uri: str, output_as_json: bool = False) -> None:
165
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='describe a Large File System')
166
- lfs_resource = BioLibApp(lfs_uri)
167
- lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
168
-
169
- files = []
170
- total_size = 0
171
- with RemoteZip(url=lfs_version['presigned_download_url']) as remote_zip:
172
- central_directory = remote_zip.get_central_directory()
173
- for file in central_directory.values():
174
- files.append(dict(path=file['filename'], size_bytes=file['file_size']))
175
- total_size += file['file_size']
176
-
177
- lfs_version_metadata = dict(files=files, **lfs_version)
178
- lfs_version_metadata['size_bytes'] = total_size
179
-
180
- if output_as_json:
181
- print(json.dumps(lfs_version_metadata, indent=4))
182
- else:
183
- print(f"Large File System {lfs_version_metadata['uri']}\ntotal {lfs_version_metadata['size_bytes']} bytes\n")
184
- print('size bytes path')
185
- for file in files:
186
- size_string = str(file['size_bytes'])
187
- leading_space_string = ' ' * (10 - len(size_string))
188
- print(f"{leading_space_string}{size_string} {file['path']}")
189
-
190
-
191
- def get_file_data_from_large_file_system(lfs_uri: str, file_path: str) -> bytes:
192
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='get file from a Large File System')
193
- lfs_resource = BioLibApp(lfs_uri)
194
- lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
195
- lfs_url = lfs_version['presigned_download_url']
196
-
197
- with RemoteZip(lfs_url) as remote_zip:
198
- central_directory = remote_zip.get_central_directory()
199
- if file_path not in central_directory:
200
- raise Exception('File not found in Large File System')
201
-
202
- file_info = central_directory[file_path]
203
-
204
- local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
205
- local_file_header_struct = Struct('<H2sHHHIIIHH')
206
- LocalFileHeader = namedtuple('LocalFileHeader', (
207
- 'version',
208
- 'flags',
209
- 'compression_raw',
210
- 'mod_time',
211
- 'mod_date',
212
- 'crc_32_expected',
213
- 'compressed_size_raw',
214
- 'uncompressed_size_raw',
215
- 'file_name_len',
216
- 'extra_field_len',
217
- ))
218
-
219
- local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
220
- local_file_header_end = local_file_header_start + local_file_header_struct.size
221
-
222
- local_file_header_response = HttpClient.request(
223
- url=lfs_url,
224
- headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
225
- timeout_in_seconds=300,
226
- )
227
- local_file_header = LocalFileHeader._make(local_file_header_struct.unpack(local_file_header_response.content))
228
-
229
- file_start = local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
230
- file_end = file_start + file_info['file_size']
231
-
232
- response = HttpClient.request(
233
- url=lfs_url,
234
- headers={'range': f'bytes={file_start}-{file_end - 1}'},
235
- timeout_in_seconds=300, # timeout after 5 min
236
- )
237
- return response.content
biolib/runtime/results.py DELETED
@@ -1,20 +0,0 @@
1
- import json
2
-
3
- from biolib import api
4
-
5
-
6
- def set_main_result_prefix(result_prefix: str) -> None:
7
- try:
8
- with open('/biolib/secrets/biolib_system_secret', mode='r') as system_secrets_file:
9
- system_secrets = json.loads(system_secrets_file.read())
10
- except Exception: # pylint: disable=broad-except
11
- raise Exception('Unable to load the BioLib runtime system secret') from None
12
-
13
- if not system_secrets['version'].startswith('1.'):
14
- raise Exception(f"Unexpected system secret version {system_secrets['version']} expected 1.x.x")
15
-
16
- api.client.patch(
17
- data={'result_name_prefix': result_prefix},
18
- headers={'Job-Auth-Token': system_secrets['job_auth_token']},
19
- path=f"/jobs/{system_secrets['job_uuid']}/main_result/",
20
- )