pybiolib 1.1.2193__py3-none-any.whl → 1.2.105.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biolib/__init__.py CHANGED
@@ -41,8 +41,8 @@ def search(
41
41
  return apps
42
42
 
43
43
 
44
- def get_job(job_id: str) -> _Job:
45
- return _Job.create_from_uuid(uuid=job_id)
44
+ def get_job(job_id: str, job_token: Optional[str] = None) -> _Job:
45
+ return _Job.create_from_uuid(uuid=job_id, auth_token=job_token)
46
46
 
47
47
 
48
48
  def get_data_record(uri: str) -> _DataRecord:
@@ -6,19 +6,21 @@ from pathlib import Path
6
6
  from struct import Struct
7
7
  from typing import Callable, Dict, List, Optional, Union, cast
8
8
 
9
- from biolib import api, utils
9
+ from biolib import api
10
10
  from biolib._internal import types
11
11
  from biolib._internal.data_record import get_data_record_state_from_uri
12
12
  from biolib._internal.data_record.data_record import validate_sqlite_v1
13
+ from biolib._internal.data_record.push_data import (
14
+ push_data_path,
15
+ validate_data_path_and_get_files_and_size_of_directory,
16
+ )
13
17
  from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
14
- from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
15
18
  from biolib._internal.http_client import HttpClient
16
19
  from biolib.api import client as api_client
17
20
  from biolib.biolib_api_client import BiolibApiClient
18
21
  from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
19
22
  from biolib.biolib_binary_format import LazyLoadedFile
20
23
  from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
21
- from biolib.biolib_errors import BioLibError
22
24
  from biolib.biolib_logging import logger
23
25
  from biolib.utils.app_uri import parse_app_uri
24
26
  from biolib.utils.zip.remote_zip import RemoteZip
@@ -85,18 +87,8 @@ class DataRecord:
85
87
  self.download_files(output_dir=output_dir, path_filter=path_filter)
86
88
 
87
89
  def update(self, data_path: str, chunk_size_in_mb: Optional[int] = None) -> None:
88
- assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
89
90
  BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
90
-
91
- if os.path.realpath(data_path) == '/':
92
- raise BioLibError('Pushing your root directory is not possible')
93
-
94
- original_working_dir = os.getcwd()
95
- os.chdir(data_path)
96
- files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
97
-
98
- if data_size_in_bytes > 4_500_000_000_000:
99
- raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
91
+ files_to_zip, data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(data_path)
100
92
 
101
93
  # validate data record
102
94
  detailed_dict: types.DataRecordDetailedDict = self._get_detailed_dict()
@@ -114,40 +106,23 @@ class DataRecord:
114
106
  else:
115
107
  raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
116
108
 
117
- min_chunk_size_bytes = 10_000_000
118
- chunk_size_in_bytes: int
119
- if chunk_size_in_mb:
120
- chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
121
- if chunk_size_in_bytes < min_chunk_size_bytes:
122
- logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
123
- chunk_size_in_bytes = min_chunk_size_bytes
124
- else:
125
- # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
126
- chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
127
-
128
- data_size_in_mb = round(data_size_in_bytes / 10**6)
129
- logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
130
-
131
109
  response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
132
110
  data_record_version: DataRecordVersion = response.json()
133
- iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
134
-
135
- multipart_uploader = utils.MultiPartUploader(
136
- use_process_pool=True,
137
- get_presigned_upload_url_request=dict(
138
- headers=None,
139
- requires_biolib_auth=True,
140
- path=f"/lfs/versions/{data_record_version['uuid']}/presigned_upload_url/",
141
- ),
142
- complete_upload_request=dict(
143
- headers=None,
144
- requires_biolib_auth=True,
145
- path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
146
- ),
111
+ resource_version_uuid = data_record_version['uuid']
112
+
113
+ push_data_path(
114
+ data_path=data_path,
115
+ data_size_in_bytes=data_size_in_bytes,
116
+ files_to_zip=files_to_zip,
117
+ resource_version_uuid=resource_version_uuid,
118
+ chunk_size_in_mb=chunk_size_in_mb,
119
+ )
120
+
121
+ api.client.patch(
122
+ path=f'/resources/versions/{resource_version_uuid}/',
123
+ data={'state': 'published', 'set_as_active': True},
147
124
  )
148
125
 
149
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
150
- os.chdir(original_working_dir)
151
126
  logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
152
127
  self._state = get_data_record_state_from_uri(data_record_version['uri'])
153
128
 
@@ -177,12 +152,11 @@ class DataRecord:
177
152
  data_record_info: DataRecordInfo = response.json()
178
153
  logger.info(f"Successfully created new Data Record '{data_record_info['uri']}'")
179
154
 
155
+ data_record = DataRecord.get_by_uri(uri=data_record_info['uri'])
180
156
  if data_path is not None:
181
- data_record = DataRecord.get_by_uri(uri=data_record_info['uri'])
182
157
  data_record.update(data_path=data_path)
183
- return data_record
184
- else:
185
- return DataRecord.get_by_uri(uri=data_record_info['uri'])
158
+
159
+ return data_record
186
160
 
187
161
  @staticmethod
188
162
  def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
@@ -83,6 +83,8 @@ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1
83
83
 
84
84
  def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
85
85
  app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
86
+ if app_response['app']['type'] != 'data-record':
87
+ raise Exception(f'Resource "{uri}" is not a Data Record')
86
88
  return DataRecordVersionInfo(
87
89
  resource_uri=app_response['app_version']['app_uri'],
88
90
  resource_uuid=app_response['app']['public_id'],
@@ -0,0 +1,67 @@
1
+ import os
2
+
3
+ from biolib import utils
4
+ from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
5
+ from biolib._internal.types.typing import List, Optional, Tuple
6
+ from biolib.biolib_errors import BioLibError
7
+ from biolib.biolib_logging import logger
8
+
9
+
10
+ def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
11
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
12
+
13
+ if os.path.realpath(data_path) == '/':
14
+ raise BioLibError('Pushing your root directory is not possible')
15
+
16
+ original_working_dir = os.getcwd()
17
+ os.chdir(data_path)
18
+ files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
19
+ os.chdir(original_working_dir)
20
+
21
+ if data_size_in_bytes > 4_500_000_000_000:
22
+ raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
23
+
24
+ return files_to_zip, data_size_in_bytes
25
+
26
+
27
+ def push_data_path(
28
+ data_path: str,
29
+ data_size_in_bytes: int,
30
+ files_to_zip: List[str],
31
+ resource_version_uuid: str,
32
+ chunk_size_in_mb: Optional[int] = None,
33
+ ) -> None:
34
+ original_working_dir = os.getcwd()
35
+ os.chdir(data_path)
36
+
37
+ min_chunk_size_bytes = 10_000_000
38
+ chunk_size_in_bytes: int
39
+ if chunk_size_in_mb:
40
+ chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
41
+ if chunk_size_in_bytes < min_chunk_size_bytes:
42
+ logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
43
+ chunk_size_in_bytes = min_chunk_size_bytes
44
+ else:
45
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
46
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
47
+
48
+ data_size_in_mb = round(data_size_in_bytes / 10**6)
49
+ logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
50
+
51
+ iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
52
+ multipart_uploader = utils.MultiPartUploader(
53
+ use_process_pool=True,
54
+ get_presigned_upload_url_request=dict(
55
+ headers=None,
56
+ requires_biolib_auth=True,
57
+ path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
58
+ ),
59
+ complete_upload_request=dict(
60
+ headers=None,
61
+ requires_biolib_auth=True,
62
+ path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
63
+ ),
64
+ )
65
+
66
+ multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
67
+ os.chdir(original_working_dir)
@@ -125,12 +125,14 @@ class HttpClient:
125
125
 
126
126
  except urllib.error.URLError as error:
127
127
  if isinstance(error.reason, socket.timeout):
128
- logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
128
+ if retry_count > 0:
129
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
129
130
  last_error = error
130
131
  else:
131
132
  raise error
132
133
  except socket.timeout as error:
133
- logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
+ if retry_count > 0:
135
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
136
  last_error = error
135
137
 
136
138
  raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
@@ -6,6 +6,10 @@ import rich.progress
6
6
  import yaml
7
7
 
8
8
  from biolib import api, utils
9
+ from biolib._internal.data_record.push_data import (
10
+ push_data_path,
11
+ validate_data_path_and_get_files_and_size_of_directory,
12
+ )
9
13
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
10
14
  from biolib.biolib_api_client import BiolibApiClient
11
15
  from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
@@ -94,10 +98,25 @@ def push_application(
94
98
  zip_filters.add('.biolib/config.yml')
95
99
 
96
100
  input_files_maps_to_root = False
101
+ app_data_path: Optional[Path] = None
97
102
  try:
98
103
  with open(config_yml_path) as config_yml_file:
99
104
  config = yaml.safe_load(config_yml_file.read())
100
105
 
106
+ app_data = config.get('app_data')
107
+ if app_data:
108
+ if not isinstance(app_data, str):
109
+ raise BioLibError(
110
+ f'In .biolib/config.yml the value of "app_data" must be a string but got {type(app_data)}'
111
+ )
112
+
113
+ app_data_path = app_path_absolute.joinpath(app_data).resolve()
114
+ if not app_data_path.is_dir():
115
+ raise BioLibError(
116
+ 'In .biolib/config.yml the value of "app_data" must be a path to a directory '
117
+ 'in the application directory'
118
+ )
119
+
101
120
  license_file_relative_path = config.get('license_file', 'LICENSE')
102
121
  if app_path_absolute.joinpath(license_file_relative_path).is_file():
103
122
  zip_filters.add(license_file_relative_path)
@@ -118,11 +137,14 @@ def push_application(
118
137
  if module.get('source_files'):
119
138
  zip_filters.add('*')
120
139
 
121
- for mapping in module['input_files']:
140
+ for mapping in module.get('input_files', []):
122
141
  mapping_parts = mapping.split(' ')
123
142
  if len(mapping_parts) == 3 and mapping_parts[2] == '/':
124
143
  input_files_maps_to_root = True
125
144
 
145
+ except BioLibError as error:
146
+ raise error from None
147
+
126
148
  except Exception as error:
127
149
  raise BioLibError('Failed to parse the .biolib/config.yml file') from error
128
150
 
@@ -175,6 +197,17 @@ def push_application(
175
197
  else None,
176
198
  )
177
199
 
200
+ if app_data_path:
201
+ app_data_files_to_zip, app_data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(
202
+ data_path=str(app_data_path),
203
+ )
204
+ push_data_path(
205
+ resource_version_uuid=new_app_version_json['public_id'],
206
+ data_path=str(app_data_path),
207
+ data_size_in_bytes=app_data_size_in_bytes,
208
+ files_to_zip=app_data_files_to_zip,
209
+ )
210
+
178
211
  # Don't push docker images if copying from another app version
179
212
  docker_tags = new_app_version_json.get('docker_tags', {})
180
213
  if not app_version_to_copy_images_from and docker_tags:
@@ -211,6 +244,11 @@ def push_application(
211
244
  try:
212
245
  logger.info(f'Trying to push image {docker_image_name} defined on module {module_name}.')
213
246
  image = docker_client.images.get(docker_image_name)
247
+ architecture = image.attrs.get('Architecture')
248
+ if architecture != 'amd64':
249
+ print(f"Error: '{docker_image_name}' is compiled for {architecture}, expected x86 (amd64).")
250
+ print('If you are on an ARM processor, try passing --platform linux/amd64 to docker build.')
251
+ exit(1)
214
252
  absolute_repo_uri = f'{utils.BIOLIB_SITE_HOSTNAME}/{repo}'
215
253
  image.tag(absolute_repo_uri, tag)
216
254
 
@@ -7,6 +7,7 @@ class RuntimeJobDataDict(TypedDict):
7
7
  job_uuid: str
8
8
  job_auth_token: str
9
9
  app_uri: str
10
+ is_environment_biolib_cloud: bool
10
11
 
11
12
 
12
13
  class BioLibRuntimeError(Exception):
@@ -2,3 +2,4 @@ from .app import * # noqa: F403
2
2
  from .data_record import * # noqa: F403
3
3
  from .experiment import * # noqa: F403
4
4
  from .resource import * # noqa: F403
5
+ from .resource_version import * # noqa: F403
@@ -7,7 +7,7 @@ class SqliteV1ForeignKey(TypedDict):
7
7
 
8
8
 
9
9
  class SqliteV1Column(TypedDict):
10
- type: Literal['INTEGER', 'REAL', 'TEXT', 'JSON'] # noqa:F821
10
+ type: Literal['INTEGER', 'REAL', 'TEXT', 'JSON']
11
11
  nullable: Optional[bool]
12
12
  foreign_key: Optional[SqliteV1ForeignKey]
13
13
  json_schema: Optional[Dict]
@@ -0,0 +1,7 @@
1
+ from .typing import Literal, TypedDict
2
+
3
+
4
+ class ResourceVersionDetailedDict(TypedDict):
5
+ semantic_version: str
6
+ state: Literal['published', 'unpublished']
7
+ uuid: str
@@ -0,0 +1,267 @@
1
+ import glob
2
+ import os
3
+ import re
4
+ import shutil
5
+ import subprocess
6
+ import tempfile
7
+
8
+ import biolib
9
+ from biolib.utils import SeqUtil
10
+
11
+
12
+ def natsorted(lst):
13
+ """Sort the list using the natural sort key."""
14
+
15
+ def _natural_sort_key(s):
16
+ """A key function for natural sorting."""
17
+ return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
18
+
19
+ return sorted(lst, key=_natural_sort_key)
20
+
21
+
22
+ def fasta_above_threshold(fasta_file, work_threshold, work_per_residue=1, verbose=False):
23
+ """True if total FASYA residue work above max_work"""
24
+
25
+ records = SeqUtil.parse_fasta(fasta_file)
26
+
27
+ # Calculate work units
28
+ total_work_units = 0
29
+ for i, record in enumerate(records):
30
+ sequence_work_units = len(record.sequence) * work_per_residue
31
+ total_work_units += sequence_work_units
32
+
33
+ if total_work_units >= work_threshold:
34
+ if verbose:
35
+ print(f'FASTA above threshold (stopped at {total_work_units}) >= {work_threshold}')
36
+ print(f'From from {i+1}/{len(records)} sequences in {fasta_file}')
37
+ return True
38
+
39
+ if verbose:
40
+ print(f'FASTA below threshold ({total_work_units}) < {work_threshold}')
41
+ print(f'From {len(records)} sequences in {fasta_file}')
42
+
43
+ return False
44
+
45
+
46
+ def run_locally(command_list, args):
47
+ """Run script locally (no multi-node processing)"""
48
+
49
+ # Prepare command
50
+ new_args = vars(args)
51
+
52
+ # Delete multinode-specific input arguments
53
+ for k in list(new_args.keys()):
54
+ if str(k).startswith('multinode'):
55
+ del new_args[k]
56
+
57
+ # Convert to list format
58
+ new_args_list = _args_dict_to_args_list(new_args)
59
+
60
+ # Prepare command, e.g. ["python3", "predict.py"] + new_args_list
61
+ command = command_list + new_args_list
62
+
63
+ if args.verbose >= 1:
64
+ print(f'Running {command}')
65
+
66
+ # Run command
67
+ result = subprocess.run(command, capture_output=True, text=True, check=False)
68
+ if result.returncode == 0:
69
+ print(f'{result.stdout}')
70
+ else:
71
+ print(f'Error: {result.stderr}')
72
+
73
+
74
+ def fasta_batch_records(fasta_file, work_per_batch_min, work_per_residue=1, verbose=False):
75
+ """Converts FASTA records to batches of records, based on thresholds"""
76
+
77
+ def log_batches(batches):
78
+ for i, batch in enumerate(batches):
79
+ batch_dict = {
80
+ 'records': len(batch),
81
+ 'residues': sum(len(record.sequence) for record in batch),
82
+ }
83
+
84
+ n_seqs, n_res = batch_dict['records'], batch_dict['residues']
85
+ print(f'Batch {i+1}: {n_res} residues from {n_seqs} sequences')
86
+
87
+ records = SeqUtil.parse_fasta(fasta_file)
88
+
89
+ batches = []
90
+ batch = []
91
+ current_work_units = 0
92
+ total_work_units = 0
93
+ for record in records:
94
+ # Add to batch
95
+ batch.append(record)
96
+
97
+ # Calculate work units
98
+ seq = record.sequence
99
+ sequence_work_units = len(seq) * work_per_residue
100
+
101
+ # Increase counters
102
+ current_work_units += sequence_work_units
103
+ total_work_units += sequence_work_units
104
+
105
+ # If above limit, start a new batch
106
+ if current_work_units >= work_per_batch_min:
107
+ batches.append(batch)
108
+ batch = []
109
+ current_work_units = 0
110
+
111
+ # Append last batch if present
112
+ if batch:
113
+ batches.append(batch)
114
+
115
+ if verbose:
116
+ log_batches(batches)
117
+
118
+ return batches
119
+
120
+
121
+ def fasta_send_batches_biolib(
122
+ app_url, batches, args, args_fasta='fasta', machine='cpu.large', stream_all_jobs=True, verbose=1
123
+ ):
124
+ """
125
+ Send jobs through pybiolib interface
126
+ """
127
+
128
+ if args.verbose >= 1:
129
+ print(f'Sending {len(batches)} batches to Biolib')
130
+
131
+ # Login to biolib, prepare app
132
+ # current_app = biolib.load(Runtime.get_app_uri())
133
+ current_app = biolib.load(app_url) # Nb: uses "_" not "-"
134
+
135
+ # Compute results
136
+ job_list = []
137
+ for i, batch_records in enumerate(batches): # MH
138
+ # Write FASTA, send to server
139
+ with tempfile.TemporaryDirectory() as tempdir:
140
+ # New arguments
141
+ new_args = vars(args)
142
+
143
+ # Write batched FASTA to send
144
+ fasta_path = f'{tempdir}/input.fasta'
145
+ SeqUtil.write_records_to_fasta(fasta_path, batch_records)
146
+ new_args[args_fasta] = fasta_path
147
+ new_args['multinode_only_local'] = True
148
+
149
+ # Convert to list
150
+ new_args_list = _args_dict_to_args_list(new_args)
151
+
152
+ # Send job
153
+ job = current_app.cli(args=new_args_list, blocking=False, machine=machine)
154
+ job_list.append(job)
155
+
156
+ # Job stats
157
+ if args.verbose:
158
+ batch_dict = _get_batch_stats(batch_records)
159
+ n_seqs, n_res = batch_dict['records'], batch_dict['residues']
160
+ print(f'Sending job {i+1}: {n_res} residues from {n_seqs} sequences -> arg_list = {new_args_list}')
161
+
162
+ # Stream job output at a time
163
+ print('Streaming job outputs ...')
164
+ for i, job in enumerate(job_list):
165
+ # Try to print if verbose. Always on first job, otherwise only if stream_all_jobs set
166
+ if (i == 0 and verbose) or (stream_all_jobs and verbose):
167
+ job.stream_logs()
168
+
169
+ # Check if job succeeded
170
+ assert job.get_exit_code() == 0, f'Job failed with exit code {job.get_exit_code()}'
171
+
172
+ # Write to disk
173
+ output_dir = f'job_output/job_{i+1}'
174
+ job.save_files(output_dir=output_dir)
175
+
176
+ if verbose:
177
+ print(f'Saving to {output_dir}')
178
+
179
+
180
+ def merge_folder(folder_name, job_out_dir='job_output', out_dir='output', verbose=1):
181
+ """Helper function for merging folders"""
182
+
183
+ os.makedirs(out_dir, exist_ok=True)
184
+
185
+ job_dirs = glob.glob(f'{job_out_dir}/job_*')
186
+ job_dirs = natsorted(job_dirs)
187
+
188
+ # Move first file, prepare to merge
189
+ first_folder = f'{job_dirs[0]}/{folder_name}'
190
+ merged_folder = f'{out_dir}/{folder_name}'
191
+ shutil.move(first_folder, merged_folder)
192
+
193
+ if verbose:
194
+ print(f'Merging {folder_name} from {len(job_dirs)} directories to {merged_folder}')
195
+
196
+ # If more than one folder, merge to first
197
+ if len(job_dirs) >= 2:
198
+ # Find each job output file
199
+ for job_dir in job_dirs[1:]:
200
+ # Move over extra files
201
+ extra_folder = f'{job_dir}/{folder_name}'
202
+ extra_files = os.listdir(extra_folder)
203
+ for file_name in extra_files:
204
+ file_path = f'{extra_folder}/{file_name}'
205
+ shutil.move(file_path, merged_folder)
206
+
207
+
208
+ def merge_file(
209
+ file_name,
210
+ header_lines_int=1,
211
+ job_out_dir='job_output',
212
+ out_dir='output',
213
+ verbose=1,
214
+ ):
215
+ """Helper function for merging files with headers"""
216
+
217
+ os.makedirs(out_dir, exist_ok=True)
218
+
219
+ job_dirs = glob.glob(f'{job_out_dir}/job_*')
220
+ job_dirs = natsorted(job_dirs)
221
+
222
+ # Move first file, prepare to merge
223
+ first_file = f'{job_dirs[0]}/{file_name}'
224
+ merged_file = f'{out_dir}/{file_name}'
225
+ shutil.move(first_file, merged_file)
226
+
227
+ if verbose:
228
+ print(f'Merging {file_name} from {len(job_dirs)} directories to {merged_file}')
229
+
230
+ # If more than one file, append to first
231
+ if len(job_dirs) >= 2:
232
+ # Open first file
233
+ with open(merged_file, 'a') as merged_file_handle:
234
+ # Find each job output file
235
+ for job_dir in job_dirs[1:]:
236
+ # Open extra file
237
+ extra_file = f'{job_dir}/{file_name}'
238
+ with open(extra_file) as extra_file_handle:
239
+ # Skip first n header lines
240
+ for _ in range(header_lines_int):
241
+ next(extra_file_handle)
242
+
243
+ # Append content to first file
244
+ contents = extra_file_handle.read()
245
+ merged_file_handle.write(contents)
246
+
247
+
248
+ def _get_batch_stats(batch):
249
+ stats_dict = {
250
+ 'records': len(batch),
251
+ 'residues': sum(len(R.sequence) for R in batch),
252
+ }
253
+
254
+ return stats_dict
255
+
256
+
257
+ def _args_dict_to_args_list(new_args):
258
+ """Converts args dict to list of arguments for Biolib"""
259
+
260
+ nested_list = [[f'--{key}', f'{value}'] for key, value in new_args.items()]
261
+
262
+ arg_list = []
263
+ for lst in nested_list:
264
+ for item in lst:
265
+ arg_list.append(item)
266
+
267
+ return arg_list
@@ -14,6 +14,10 @@ class Runtime:
14
14
  def check_is_environment_biolib_app() -> bool:
15
15
  return bool(Runtime._try_to_get_job_data())
16
16
 
17
+ @staticmethod
18
+ def check_is_environment_biolib_cloud() -> bool:
19
+ return Runtime._get_job_data().get('is_environment_biolib_cloud', False)
20
+
17
21
  @staticmethod
18
22
  def get_job_id() -> str:
19
23
  return Runtime._get_job_data()['job_uuid']
@@ -32,6 +32,7 @@ class App(TypedDict):
32
32
  public_id: str
33
33
  state: str
34
34
  resource_uri: str
35
+ type: str
35
36
 
36
37
 
37
38
  class AppGetResponse(TypedDict):
biolib/cli/data_record.py CHANGED
@@ -6,6 +6,7 @@ from typing import Dict, List
6
6
  import click
7
7
 
8
8
  from biolib._data_record.data_record import DataRecord
9
+ from biolib.biolib_api_client import BiolibApiClient
9
10
  from biolib.biolib_logging import logger, logger_no_user_data
10
11
  from biolib.typing_utils import Optional
11
12
 
@@ -57,6 +58,7 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
57
58
  @click.argument('uri', required=True)
58
59
  @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
59
60
  def describe(uri: str, output_as_json: bool) -> None:
61
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='get Data Record description')
60
62
  record = DataRecord.get_by_uri(uri)
61
63
  files_info: List[Dict] = []
62
64
  total_size_in_bytes = 0
@@ -286,6 +286,7 @@ class DockerExecutor:
286
286
  job_uuid=self._options['job']['public_id'],
287
287
  job_auth_token=self._options['job']['auth_token'],
288
288
  app_uri=self._options['job']['app_uri'],
289
+ is_environment_biolib_cloud=bool(utils.IS_RUNNING_IN_CLOUD),
289
290
  )
290
291
  secrets: Dict[str, str] = dict(
291
292
  **module.get('secrets', {}),
@@ -47,8 +47,21 @@ class JobStorage:
47
47
  module_output_path = os.path.join(job_temporary_dir, JobStorage.module_output_file_name)
48
48
  module_output_size = os.path.getsize(module_output_path)
49
49
 
50
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
51
+ max_chunk_count = 9_000
52
+ min_chunk_size_bytes = 50_000_000
53
+ chunk_size_in_bytes = max(min_chunk_size_bytes, module_output_size // max_chunk_count)
54
+
55
+ logger_no_user_data.debug(
56
+ f'Job "{job_uuid}" uploading result of size {module_output_size} bytes '
57
+ f'with chunk size of {chunk_size_in_bytes} bytes...'
58
+ )
59
+
50
60
  with open(module_output_path, mode='rb') as module_output_file:
51
- module_output_iterator = get_chunk_iterator_from_file_object(module_output_file)
61
+ module_output_iterator = get_chunk_iterator_from_file_object(
62
+ file_object=module_output_file,
63
+ chunk_size_in_bytes=chunk_size_in_bytes,
64
+ )
52
65
  multipart_uploader = JobStorage._get_module_output_uploader(job_uuid)
53
66
  multipart_uploader.upload(
54
67
  payload_iterator=module_output_iterator,
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import io
2
3
  import subprocess
3
4
  import tarfile
@@ -160,6 +161,9 @@ class RemoteHostProxy:
160
161
  access_token = BiolibApiClient.get().access_token
161
162
  bearer_token = f'Bearer {access_token}' if access_token else ''
162
163
 
164
+ biolib_index_basic_auth = f'compute_node|admin:{compute_node_auth_token},{self._job_uuid}'
165
+ biolib_index_basic_auth_base64 = base64.b64encode(biolib_index_basic_auth.encode('utf-8')).decode('utf-8')
166
+
163
167
  nginx_config = f"""
164
168
  events {{
165
169
  worker_connections 1024;
@@ -270,7 +274,7 @@ http {{
270
274
  }}
271
275
 
272
276
  location /api/lfs/ {{
273
- proxy_pass https://$upstream_hostname/api/lfs/;
277
+ proxy_pass https://$upstream_hostname$request_uri;
274
278
  proxy_set_header authorization "";
275
279
  proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
276
280
  proxy_set_header job-uuid "{self._job_uuid}";
@@ -279,7 +283,16 @@ http {{
279
283
  }}
280
284
 
281
285
  location /api/app/ {{
282
- proxy_pass https://$upstream_hostname/api/app/;
286
+ proxy_pass https://$upstream_hostname$request_uri;
287
+ proxy_set_header authorization "";
288
+ proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
289
+ proxy_set_header job-uuid "{self._job_uuid}";
290
+ proxy_set_header cookie "";
291
+ proxy_ssl_server_name on;
292
+ }}
293
+
294
+ location /api/resources/data-records/ {{
295
+ proxy_pass https://$upstream_hostname$request_uri;
283
296
  proxy_set_header authorization "";
284
297
  proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
285
298
  proxy_set_header job-uuid "{self._job_uuid}";
@@ -288,33 +301,40 @@ http {{
288
301
  }}
289
302
 
290
303
  location /api/ {{
291
- proxy_pass https://$upstream_hostname/api/;
304
+ proxy_pass https://$upstream_hostname$request_uri;
292
305
  proxy_set_header authorization "";
293
306
  proxy_set_header cookie "";
294
307
  proxy_ssl_server_name on;
295
308
  }}
296
309
 
297
310
  location /proxy/storage/job-storage/ {{
298
- proxy_pass https://$upstream_hostname/proxy/storage/job-storage/;
311
+ proxy_pass https://$upstream_hostname$request_uri;
299
312
  proxy_set_header authorization "";
300
313
  proxy_set_header cookie "";
301
314
  proxy_ssl_server_name on;
302
315
  }}
303
316
 
304
317
  location /proxy/storage/lfs/versions/ {{
305
- proxy_pass https://$upstream_hostname/proxy/storage/lfs/versions/;
318
+ proxy_pass https://$upstream_hostname$request_uri;
306
319
  proxy_set_header authorization "";
307
320
  proxy_set_header cookie "";
308
321
  proxy_ssl_server_name on;
309
322
  }}
310
323
 
311
324
  location /proxy/cloud/ {{
312
- proxy_pass https://$upstream_hostname/proxy/cloud/;
325
+ proxy_pass https://$upstream_hostname$request_uri;
313
326
  proxy_set_header authorization "";
314
327
  proxy_set_header cookie "";
315
328
  proxy_ssl_server_name on;
316
329
  }}
317
330
 
331
+ location /proxy/index/ {{
332
+ proxy_pass https://$upstream_hostname$request_uri;
333
+ proxy_set_header authorization "Basic {biolib_index_basic_auth_base64}";
334
+ proxy_set_header cookie "";
335
+ proxy_ssl_server_name on;
336
+ }}
337
+
318
338
  location / {{
319
339
  return 404 "Not found";
320
340
  }}
@@ -2,23 +2,23 @@ import base64
2
2
  import os
3
3
  import random
4
4
  import shutil
5
+ import socket
5
6
  import sys
6
- import time
7
7
  import threading
8
- import socket
8
+ import time
9
9
  from queue import Queue
10
10
 
11
- from biolib import utils
11
+ from biolib import api, utils
12
+ from biolib.biolib_binary_format import ModuleOutputV2, SystemException, SystemStatusUpdate
12
13
  from biolib.biolib_binary_format.utils import LocalFileIndexableBuffer
14
+ from biolib.biolib_logging import logger, logger_no_user_data
13
15
  from biolib.compute_node.cloud_utils import CloudUtils
14
16
  from biolib.compute_node.job_worker import JobWorkerProcess
15
17
  from biolib.compute_node.job_worker.job_storage import JobStorage
16
18
  from biolib.compute_node.socker_listener_thread import SocketListenerThread
17
19
  from biolib.compute_node.socket_sender_thread import SocketSenderThread
20
+ from biolib.compute_node.utils import SystemExceptionCodes, WorkerThreadException, get_package_type
18
21
  from biolib.compute_node.webserver import webserver_utils
19
- from biolib.biolib_binary_format import SystemStatusUpdate, SystemException, ModuleOutputV2
20
- from biolib.compute_node.utils import get_package_type, WorkerThreadException, SystemExceptionCodes
21
- from biolib.biolib_logging import logger, logger_no_user_data
22
22
 
23
23
  SOCKET_HOST = '127.0.0.1'
24
24
 
@@ -37,7 +37,7 @@ class WorkerThread(threading.Thread):
37
37
  self._sender_thread = None
38
38
  self._start_and_connect_to_compute_process()
39
39
 
40
- logger.debug(f"WorkerThread connected to port {self._socket_port}")
40
+ logger.debug(f'WorkerThread connected to port {self._socket_port}')
41
41
 
42
42
  except Exception as exception:
43
43
  logger_no_user_data.error(exception)
@@ -79,20 +79,16 @@ class WorkerThread(threading.Thread):
79
79
  if progress == 94:
80
80
  # Get Job exit code
81
81
  try:
82
- module_output_path = os.path.join(self._job_temporary_dir,
83
- JobStorage.module_output_file_name)
84
- module_output = ModuleOutputV2(
85
- buffer=LocalFileIndexableBuffer(
86
- filename=module_output_path
87
- )
82
+ module_output_path = os.path.join(
83
+ self._job_temporary_dir,
84
+ JobStorage.module_output_file_name,
88
85
  )
86
+ module_output = ModuleOutputV2(buffer=LocalFileIndexableBuffer(filename=module_output_path))
89
87
  self.compute_state['exit_code'] = module_output.get_exit_code()
90
88
  logger_no_user_data.debug(f"Got exit code: {self.compute_state['exit_code']}")
91
89
 
92
90
  except Exception as error: # pylint: disable=broad-except
93
- logger_no_user_data.error(
94
- f'Could not get exit_code from module output due to: {error}'
95
- )
91
+ logger_no_user_data.error(f'Could not get exit_code from module output due to: {error}')
96
92
 
97
93
  if utils.IS_RUNNING_IN_CLOUD:
98
94
  JobStorage.upload_module_output(
@@ -107,7 +103,7 @@ class WorkerThread(threading.Thread):
107
103
  elif package_type == 'SystemException':
108
104
  error_code = SystemException(package).deserialize()
109
105
  self.compute_state['status']['error_code'] = error_code
110
- logger.debug("Hit error. Terminating Worker Thread and Compute Process")
106
+ logger.debug('Hit error. Terminating Worker Thread and Compute Process')
111
107
  self.compute_state['progress'] = 95
112
108
  self.terminate()
113
109
 
@@ -153,10 +149,10 @@ class WorkerThread(threading.Thread):
153
149
 
154
150
  # Starting a thread for accepting connections before starting the process that should to connect to the socket
155
151
  logger_no_user_data.debug('Starting connection thread')
156
- self._connection_thread = threading.Thread(target=self._accept_new_socket_connection, args=[
157
- received_messages_queue,
158
- messages_to_send_queue
159
- ])
152
+ self._connection_thread = threading.Thread(
153
+ target=self._accept_new_socket_connection,
154
+ args=[received_messages_queue, messages_to_send_queue],
155
+ )
160
156
  self._connection_thread.start()
161
157
  logger_no_user_data.debug('Started connection thread')
162
158
  logger_no_user_data.debug('Starting compute process')
@@ -177,6 +173,16 @@ class WorkerThread(threading.Thread):
177
173
  self._sender_thread.start()
178
174
 
179
175
  def terminate(self) -> None:
176
+ cloud_job_uuid = self.compute_state['cloud_job_id']
177
+ exit_code = self.compute_state.get('exit_code')
178
+ system_exception_code = self.compute_state['status'].get('error_code')
179
+ if utils.IS_RUNNING_IN_CLOUD:
180
+ CloudUtils.finish_cloud_job(
181
+ cloud_job_id=cloud_job_uuid,
182
+ system_exception_code=system_exception_code,
183
+ exit_code=exit_code,
184
+ )
185
+
180
186
  deregistered_due_to_error = False
181
187
  if self._job_worker_process:
182
188
  logger_no_user_data.debug(
@@ -184,7 +190,8 @@ class WorkerThread(threading.Thread):
184
190
  )
185
191
  self._job_worker_process.terminate()
186
192
 
187
- for _ in range(10):
193
+ clean_up_timeout_in_seconds = 600
194
+ for _ in range(clean_up_timeout_in_seconds):
188
195
  if self._job_worker_process.exitcode is not None:
189
196
  logger_no_user_data.debug(
190
197
  f'Job "{self._job_uuid}" worker process exitcode {self._job_worker_process.exitcode}'
@@ -196,28 +203,18 @@ class WorkerThread(threading.Thread):
196
203
 
197
204
  if self._job_worker_process.exitcode is None:
198
205
  # TODO: Figure out if more error handling is necessary here
199
- logger_no_user_data.error(f'Job {self._job_uuid} worker process did not exit within 10 seconds')
206
+ logger_no_user_data.error(
207
+ f'Job {self._job_uuid} worker process did not exit within {clean_up_timeout_in_seconds} seconds'
208
+ )
200
209
  if utils.IS_RUNNING_IN_CLOUD:
201
210
  logger_no_user_data.error('Deregistering compute node...')
202
211
  CloudUtils.deregister(error='job_cleanup_timed_out')
203
212
  deregistered_due_to_error = True
204
213
 
205
214
  # Delete result as error occurred
206
- system_exception_code = self.compute_state['status'].get('error_code')
207
215
  if system_exception_code and os.path.exists(self._job_temporary_dir):
208
216
  shutil.rmtree(self._job_temporary_dir)
209
217
 
210
- exit_code = self.compute_state.get('exit_code')
211
-
212
- if utils.IS_RUNNING_IN_CLOUD:
213
- # Get and send compute node exception code and job exit code if present
214
- logger_no_user_data.debug(f"Sending exit code {exit_code}")
215
- CloudUtils.finish_cloud_job(
216
- cloud_job_id=self.compute_state['cloud_job_id'],
217
- system_exception_code=system_exception_code,
218
- exit_code=exit_code
219
- )
220
-
221
218
  if self._socket:
222
219
  self._socket.close()
223
220
 
@@ -225,7 +222,7 @@ class WorkerThread(threading.Thread):
225
222
  self._connection.close()
226
223
 
227
224
  if self.compute_state['progress'] == 95:
228
- seconds_to_sleep = 60 # 1 minute
225
+ seconds_to_sleep = 5
229
226
  logger_no_user_data.debug(
230
227
  f'Job "{self._job_uuid}" worker thread sleeping for {seconds_to_sleep} seconds before cleaning up'
231
228
  )
@@ -234,7 +231,7 @@ class WorkerThread(threading.Thread):
234
231
 
235
232
  compute_state_dict = webserver_utils.JOB_ID_TO_COMPUTE_STATE_DICT
236
233
  if self._job_uuid in compute_state_dict:
237
- # Delete result as user has not started download within 60 seconds
234
+ # Delete result as user has not started download
238
235
  if compute_state_dict[self._job_uuid]['progress'] == 95 and os.path.exists(self._job_temporary_dir):
239
236
  shutil.rmtree(self._job_temporary_dir)
240
237
 
@@ -245,12 +242,18 @@ class WorkerThread(threading.Thread):
245
242
  f'Job "{self._job_uuid}" could not be found, maybe it has already been cleaned up'
246
243
  )
247
244
 
248
- logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread terminated')
249
-
250
245
  if utils.IS_RUNNING_IN_CLOUD:
246
+ config = CloudUtils.get_webserver_config()
247
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" reporting CloudJob "{cloud_job_uuid}" as cleaned up...')
248
+ api.client.post(
249
+ path=f'/internal/compute-nodes/cloud-jobs/{cloud_job_uuid}/cleaned-up/',
250
+ headers={'Compute-Node-Auth-Token': config['compute_node_info']['auth_token']},
251
+ )
252
+
251
253
  if deregistered_due_to_error:
252
254
  CloudUtils.shutdown() # shutdown now
253
255
  else:
254
256
  webserver_utils.update_auto_shutdown_time()
255
257
 
258
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread exiting...')
256
259
  sys.exit()
biolib/utils/seq_util.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import re
2
- from io import BufferedIOBase
2
+ from io import BufferedIOBase, TextIOBase
3
3
 
4
- from biolib.typing_utils import Dict, List, Optional, Union
4
+ from biolib.typing_utils import Dict, List, Optional, Union, Iterator
5
5
 
6
6
 
7
7
  class SeqUtilRecord:
@@ -37,67 +37,77 @@ class SeqUtil:
37
37
  allow_any_sequence_characters: bool = False,
38
38
  allow_empty_sequence: bool = True,
39
39
  file_name: Optional[str] = None,
40
- ) -> List[SeqUtilRecord]:
40
+ ) -> Iterator[SeqUtilRecord]:
41
+ def process_and_yield_record(header: str, sequence_lines: List[str]):
42
+ sequence = ''.join(sequence_lines)
43
+ sequence_id = header.split()[0]
44
+ if not allow_any_sequence_characters:
45
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
46
+ if invalid_sequence_characters:
47
+ raise Exception(
48
+ f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
49
+ )
50
+ if not allow_empty_sequence and not sequence:
51
+ raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
52
+ yield SeqUtilRecord(
53
+ sequence=sequence,
54
+ sequence_id=sequence_id,
55
+ description=header[len(sequence_id):].strip()
56
+ )
57
+
58
+ def line_generator_from_buffered_io_base(file_handle: BufferedIOBase) -> Iterator[str]:
59
+ for line in file_handle:
60
+ yield line.decode('utf-8')
61
+
62
+ def line_generator_from_text_io_base(file_handle: TextIOBase) -> Iterator[str]:
63
+ for line in file_handle:
64
+ yield line
65
+
41
66
  if input_file is None:
42
67
  if file_name:
43
68
  input_file = file_name
44
69
  else:
45
70
  raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
71
+
72
+ file_handle = None
46
73
  if isinstance(input_file, str):
47
- with open(input_file) as file_handle:
48
- data = file_handle.read().strip()
74
+ file_handle = open(input_file, "rb")
75
+ line_iterator = line_generator_from_buffered_io_base(file_handle)
49
76
  elif isinstance(input_file, BufferedIOBase):
50
- data = input_file.read().decode('utf-8')
77
+ line_iterator = line_generator_from_buffered_io_base(input_file)
78
+ elif isinstance(input_file, TextIOBase):
79
+ line_iterator = line_generator_from_text_io_base(input_file)
51
80
  else:
52
81
  raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
53
- if not data:
54
- return []
55
-
56
- if '>' not in data:
57
- if default_header:
58
- lines_with_header = []
59
- for index, line in enumerate(data.split('\n')):
60
- index_string = str(index + 1) if index > 0 else ''
61
- lines_with_header.append(f'>{default_header}{index_string}\n{line}')
62
-
63
- data = '\n'.join(lines_with_header)
64
- else:
65
- raise Exception(f'No header line found in FASTA file "{file_name}"')
66
-
67
- splitted = []
68
- tmp_data = ''
69
- for line in data.splitlines():
70
- if line.startswith('>'):
71
- if tmp_data:
72
- splitted.append(tmp_data)
73
- tmp_data = line[1:].strip() + '\n'
74
- else:
75
- if line.strip():
76
- tmp_data += line.strip() + '\n'
77
-
78
- if tmp_data:
79
- splitted.append(tmp_data)
80
-
81
- parsed_sequences = []
82
- for sequence_data in splitted:
83
- sequence_data_splitted = sequence_data.strip().split('\n')
84
- header_line = sequence_data_splitted[0].split()
85
- sequence_id = header_line[0]
86
- description = sequence_data_splitted[0][len(sequence_id) :].strip()
87
- sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
88
-
89
- if not allow_any_sequence_characters:
90
- invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
91
- if len(invalid_sequence_characters) > 0:
92
- raise Exception(
93
- f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
94
- )
95
- if not allow_empty_sequence and len(sequence) == 0:
96
- raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
97
-
98
- parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
99
82
 
100
- return parsed_sequences
83
+ header = None
84
+ sequence_lines: List[str] = []
85
+
86
+ try:
87
+ for line_number, line in enumerate(line_iterator):
88
+ line = line.strip()
89
+ if not line:
90
+ continue # skip empty lines
91
+ if line.startswith('>'):
92
+ if header is not None:
93
+ yield from process_and_yield_record(header, sequence_lines)
94
+
95
+ header = line[1:].strip()
96
+ sequence_lines = []
97
+ else:
98
+ if header is None:
99
+ if default_header:
100
+ yield from process_and_yield_record(f"{default_header}{line_number}", [line])
101
+ else:
102
+ raise Exception(f'No header line found in FASTA file "{file_name}"')
103
+ else:
104
+ sequence_lines.append(line)
105
+
106
+ if header is not None:
107
+ yield from process_and_yield_record(header, sequence_lines)
108
+ finally:
109
+ if file_handle:
110
+ file_handle.close()
101
111
 
102
112
  @staticmethod
103
113
  def write_records_to_fasta(file_name: str, records: List[SeqUtilRecord]) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.2193
3
+ Version: 1.2.105.dev1
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -1,29 +1,32 @@
1
1
  LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
2
- README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
3
- biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
4
- biolib/_data_record/data_record.py,sha256=CoyYRse5VdUBhQzzPfR9BkytgOsM-IZxkfMX1kyRnPk,12589
2
+ PYPI_README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
3
+ biolib/__init__.py,sha256=q_YhAYw51Vq16IKtSk8_MJclDAa4CfCmPhvWDYmrSIg,4393
4
+ biolib/_data_record/data_record.py,sha256=zVAhFU1RLI1-ptoQ_l639RNwrMANXV9j75yXHvB7dtA,10950
5
5
  biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
7
- biolib/_internal/data_record/data_record.py,sha256=YmaAABR57goDCE8-rKb2j0FPMSbDtRPCm_HhT3mM074,4299
7
+ biolib/_internal/data_record/data_record.py,sha256=g_-jdy5-Zem3dthwxJj2OuQqkDGTyc-iGqN1rtYYD1A,4418
8
+ biolib/_internal/data_record/push_data.py,sha256=-L3a_7zZzDCXabBu3O4lWPMAMeBbeRPTrBlEM-_5SCI,2693
8
9
  biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
9
10
  biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
10
11
  biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
11
12
  biolib/_internal/fuse_mount/experiment_fuse_mount.py,sha256=08aUdEq_bvqLBft_gSLjOClKDy5sBnMts1RfJf7AP_U,7012
12
- biolib/_internal/http_client.py,sha256=DdooXei93JKGYGV4aQmzue_oFzvHkozg2UCxgk9dfDM,5081
13
+ biolib/_internal/http_client.py,sha256=Q7z7_DMI1EaVJSyevrra63piuee-NfqcAHRtQt76ZdA,5165
13
14
  biolib/_internal/lfs/__init__.py,sha256=gSWo_xg61UniYgD7yNYxeT4I9uaXBCBSi3_nmZjnPpE,35
14
15
  biolib/_internal/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
15
16
  biolib/_internal/libs/__init__.py,sha256=Jdf4tNPqe_oIIf6zYml6TiqhL_02Vyqwge6IELrAFhw,98
16
17
  biolib/_internal/libs/fusepy/__init__.py,sha256=AWDzNFS-XV_5yKb0Qx7kggIhPzq1nj_BZS5y2Nso08k,41944
17
- biolib/_internal/push_application.py,sha256=8P7eXvySn7CRp5XBDkO3xjTGixS8g7-jD-_iwzM_XDI,10020
18
- biolib/_internal/runtime.py,sha256=9pZ3s3L7LGxdqOgnHh1KK3Jjyn_9MjhQmKHI-6hMT3U,448
19
- biolib/_internal/types/__init__.py,sha256=11ZucS8jKeLGAAswXyKI7FH2KLHd6T9Sh8ZK2Ar3jlk,152
18
+ biolib/_internal/push_application.py,sha256=mKs3kIKW-ZYfz3Cy6LIyFBwsWkbcGZ9zgMk-xn5NDyg,11660
19
+ biolib/_internal/runtime.py,sha256=BiHl4klUHr36MCpqKaUso4idHeBZfPAahLYRQrabFqA,486
20
+ biolib/_internal/types/__init__.py,sha256=xLgOQJFh3GRtiqIJq7MaqHReZx4pp34_zcaFQ_JjuJ4,198
20
21
  biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
21
- biolib/_internal/types/data_record.py,sha256=AHoIiwVqeHj0HozQxFRAyxk-d3XJgLWno4ic1z9eTrQ,865
22
+ biolib/_internal/types/data_record.py,sha256=9r_vdhVs60YTnzU4XQFXfDrfS2P2MqD3BH2xa7lk6ck,852
22
23
  biolib/_internal/types/experiment.py,sha256=D94iBdn2nS92lRW-TOs1a2WKXJD5ZtmzL4ypggKX2ys,176
23
24
  biolib/_internal/types/resource.py,sha256=G-vPkZoe4Um6FPxsQZtRzAlbSW5sDW4NFkbjn21I3V4,372
25
+ biolib/_internal/types/resource_version.py,sha256=sLxViYXloDDUhTDFgjegiQCj097OM1Ih1-uqlC_4ULA,174
24
26
  biolib/_internal/types/typing.py,sha256=D4EKKEe7kDx0K6lJi-H_XLtk-8w6nu2fdqn9bvzI-Xo,288
25
27
  biolib/_internal/utils/__init__.py,sha256=p5vsIFyu-zYqBgdSMfwW9NC_jk7rXvvCbV4Bzd3As7c,630
26
- biolib/_runtime/runtime.py,sha256=daYxzIpRoW4k-HJFu2BMXeylYSlCXn3-SqdSriCFnKw,2770
28
+ biolib/_internal/utils/multinode.py,sha256=zWrQhcVK5u_xdWX2oIM-D_2fINqNPlqF_h71fu4K8LY,8279
29
+ biolib/_runtime/runtime.py,sha256=oVgTnDDJv9L4BUP1_sd0oAj4LLyyiPSQdhp7ixWARvw,2923
27
30
  biolib/api/__init__.py,sha256=mQ4u8FijqyLzjYMezMUUbbBGNB3iFmkNdjXnWPZ7Jlw,138
28
31
  biolib/api/client.py,sha256=FRpdH5aI187b_I_4HUNi680v4iOP65z5f2RcUo8D8MA,3559
29
32
  biolib/app/__init__.py,sha256=cdPtcfb_U-bxb9iSL4fCEq2rpD9OjkyY4W-Zw60B0LI,37
@@ -31,7 +34,7 @@ biolib/app/app.py,sha256=P2RwaDAskUHzlciuTJUroqUocRwoyOLT6YbgMyCRRDI,8484
31
34
  biolib/app/search_apps.py,sha256=K4a41f5XIWth2BWI7OffASgIsD0ko8elCax8YL2igaY,1470
32
35
  biolib/biolib_api_client/__init__.py,sha256=E5EMa19wJoblwSdQPYrxc_BtIeRsAuO0L_jQweWw-Yk,182
33
36
  biolib/biolib_api_client/api_client.py,sha256=ciNx4ybpyKG5LEf4KQdGEz13r0jTxImyQat4_HDecD0,7373
34
- biolib/biolib_api_client/app_types.py,sha256=FxSr4UqfnMhLe34p8bm02wsC3g1Jz8iaing5tRKDOQI,2442
37
+ biolib/biolib_api_client/app_types.py,sha256=1sXz9XnLRKNALMglNdTbew7AL6OkcUan0MPdj4xQLis,2456
35
38
  biolib/biolib_api_client/auth.py,sha256=kjm0ZHnH3I8so3su2sZbBxNHYp-ZUdrZ5lwQ0K36RSw,949
36
39
  biolib/biolib_api_client/biolib_app_api.py,sha256=DndlVxrNTes6DOaWyMINLGZQCRMWVvR7gwt5HVlyf5Y,4240
37
40
  biolib/biolib_api_client/biolib_job_api.py,sha256=7bKfav3-12ewXkEUoLdCmbWdebW8148kxfGJW9SsXZI,7125
@@ -57,7 +60,7 @@ biolib/biolib_errors.py,sha256=5m4lK2l39DafpoXBImEBD4EPH3ayXBX0JgtPzmGClow,689
57
60
  biolib/biolib_logging.py,sha256=J3E5H_LL5k6ZUim2C8gqN7E6lCBZMTpO4tnMpOPwG9U,2854
58
61
  biolib/cli/__init__.py,sha256=0v3c_J-U0k46c5ZWeQjLG_kTaKDJm81LBxQpDO2B_aI,1286
59
62
  biolib/cli/auth.py,sha256=rpWGmXs6Fz6CGrO9K8ibPRszOdXG78Vig_boKaVCD9A,2082
60
- biolib/cli/data_record.py,sha256=08JbZkFWKMo0PrnhhG0jQEKnNW7pPLti9cOw8s1TWfI,3344
63
+ biolib/cli/data_record.py,sha256=t8DfJK2EZ_SNZ9drDA_N5Jqy8DNwf9f5SlFrIaOvtv0,3501
61
64
  biolib/cli/download_container.py,sha256=HIZVHOPmslGE5M2Dsp9r2cCkAEJx__vcsDz5Wt5LRos,483
62
65
  biolib/cli/init.py,sha256=wQOfii_au-d30Hp7DdH-WVw-WVraKvA_zY4za1w7DE8,821
63
66
  biolib/cli/lfs.py,sha256=z2qHUwink85mv9yDgifbVKkVwuyknGhMDTfly_gLKJM,4151
@@ -74,19 +77,19 @@ biolib/compute_node/job_worker/cache_state.py,sha256=MwjSRzcJJ_4jybqvBL4xdgnDYSI
74
77
  biolib/compute_node/job_worker/cache_types.py,sha256=ajpLy8i09QeQS9dEqTn3T6NVNMY_YsHQkSD5nvIHccQ,818
75
78
  biolib/compute_node/job_worker/docker_image_cache.py,sha256=ansHIkJIq_EMW1nZNlW-RRLVVeKWTbzNICYaOHpKiRE,7460
76
79
  biolib/compute_node/job_worker/executors/__init__.py,sha256=bW6t1qi3PZTlHM4quaTLa8EI4ALTCk83cqcVJfJfJfE,145
77
- biolib/compute_node/job_worker/executors/docker_executor.py,sha256=2H7GooL0oAifPcbie0unatB4fRoHyqbsr6S91uagc_g,27952
80
+ biolib/compute_node/job_worker/executors/docker_executor.py,sha256=LcYc4x4-vkmRM1FN1SNj7xRj4qZUNVjQWtgbYMFg3JU,28029
78
81
  biolib/compute_node/job_worker/executors/docker_types.py,sha256=VhsU1DKtJjx_BbCkVmiPZPH4ROiL1ygW1Y_s1Kbpa2o,216
79
82
  biolib/compute_node/job_worker/executors/tars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
83
  biolib/compute_node/job_worker/executors/types.py,sha256=yP5gG39hr-DLnw9bOE--VHi-1arDbIYiGuV1rlTbbHI,1466
81
84
  biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py,sha256=_cvEiZbOwfkv6fYmfrvdi_FVviIEYr_dSClQcOQaUWM,1198
82
85
  biolib/compute_node/job_worker/job_max_runtime_timer_thread.py,sha256=K_xgz7IhiIjpLlXRk8sqaMyLoApcidJkgu29sJX0gb8,1174
83
- biolib/compute_node/job_worker/job_storage.py,sha256=LNkklckDLbYgCHsK5FGrEK75Kw-H4f4JcTCAtuE9His,4035
86
+ biolib/compute_node/job_worker/job_storage.py,sha256=lScHI3ubcHKagSEW243tgbIWXUfbWDHDjEOPMvXxJE8,4603
84
87
  biolib/compute_node/job_worker/job_worker.py,sha256=fuWoYJo9HOqLmWl8yeCXh0mhT4ebbkrWac-BVb58khs,28842
85
88
  biolib/compute_node/job_worker/large_file_system.py,sha256=XXqRlVtYhs-Ji9zQGIk5KQPXFO_Q5jJH0nnlw4GkeMY,10461
86
89
  biolib/compute_node/job_worker/mappings.py,sha256=Z48Kg4nbcOvsT2-9o3RRikBkqflgO4XeaWxTGz-CNvI,2499
87
90
  biolib/compute_node/job_worker/utilization_reporter_thread.py,sha256=7tm5Yk9coqJ9VbEdnO86tSXI0iM0omwIyKENxdxiVXk,8575
88
91
  biolib/compute_node/job_worker/utils.py,sha256=wgxcIA8yAhUPdCwyvuuJ0JmreyWmmUoBO33vWtG60xg,1282
89
- biolib/compute_node/remote_host_proxy.py,sha256=CNWJLXXYm8DGujxEJIsg1wUKFoSgU0nhwdmjMn5gelE,14690
92
+ biolib/compute_node/remote_host_proxy.py,sha256=eTT7x7ht_cxXMQ-0yXCvhKZW6mKeYM4KrfBf75KTbc8,15651
90
93
  biolib/compute_node/socker_listener_thread.py,sha256=T5_UikA3MB9bD5W_dckYLPTgixh72vKUlgbBvj9dbM0,1601
91
94
  biolib/compute_node/socket_sender_thread.py,sha256=YgamPHeUm2GjMFGx8qk-99WlZhEs-kAb3q_2O6qByig,971
92
95
  biolib/compute_node/utils.py,sha256=M7i_WTyxbFM3Lri9RWZ_8FeQNYrQIWpKGLfp2I55oeY,4677
@@ -95,7 +98,7 @@ biolib/compute_node/webserver/gunicorn_flask_application.py,sha256=jPfR_YvNBekLU
95
98
  biolib/compute_node/webserver/webserver.py,sha256=15PkRyhtdtSgFDxa0z78aPO4ciZURsFqJYi-HtUmZF8,6494
96
99
  biolib/compute_node/webserver/webserver_types.py,sha256=2t8EaFKESnves3BA_NBdnS2yAdo1qwamCFHiSt888nE,380
97
100
  biolib/compute_node/webserver/webserver_utils.py,sha256=XWvwYPbWNR3qS0FYbLLp-MDDfVk0QdaAmg3xPrT0H2s,4234
98
- biolib/compute_node/webserver/worker_thread.py,sha256=26tG73TADnOcXsAr7Iyf6smrLlCqB4x-vvmpUb8WqnA,11569
101
+ biolib/compute_node/webserver/worker_thread.py,sha256=GRRBUqXdMKvbjyLQhYlqGIbFKeU2iiEXIe5IXi9wgdg,11806
99
102
  biolib/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
103
  biolib/experiments/experiment.py,sha256=jIRixmQm3Gq9YdJ3I0-rE1vFukXqq6U4zXehFOJ1yZk,7614
101
104
  biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
@@ -114,10 +117,10 @@ biolib/utils/__init__.py,sha256=fwjciJyJicvYyZcVTzfDBgD0SKY13DeXqvTeG4qZIy8,5548
114
117
  biolib/utils/app_uri.py,sha256=Yq_-_VGugQhMMo6mM5f0G9yNlLkr0WK4j0Nrf3FE4xQ,2171
115
118
  biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3100
116
119
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
117
- biolib/utils/seq_util.py,sha256=ZQFcaE37B2dtucN2zDjOmdya_X0ITc1zBFZJNQY13XA,5183
120
+ biolib/utils/seq_util.py,sha256=WJnU9vZdwY8RHXvzATyV80OXzyJ7w9EkG33Tna9Nr6A,5698
118
121
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
119
- pybiolib-1.1.2193.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
- pybiolib-1.1.2193.dist-info/METADATA,sha256=NCFsHsCnbnOhw1O2L-polZfFa2pNdtA89TfBW7f_o7E,1508
121
- pybiolib-1.1.2193.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
- pybiolib-1.1.2193.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
- pybiolib-1.1.2193.dist-info/RECORD,,
122
+ pybiolib-1.2.105.dev1.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
123
+ pybiolib-1.2.105.dev1.dist-info/METADATA,sha256=8gXSVU8uvqONlUb6KfFDPrrnjV4z9abxHxZ1FL4EtTo,1512
124
+ pybiolib-1.2.105.dev1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
125
+ pybiolib-1.2.105.dev1.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
126
+ pybiolib-1.2.105.dev1.dist-info/RECORD,,
File without changes