PyPI - pybiolib - Versions diffs - 1.1.2189__tar.gz → 1.2.12__tar.gz - Mend

pybiolib 1.1.2189tar.gz → 1.2.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

{pybiolib-1.1.2189 → pybiolib-1.2.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pybiolib
-Version: 1.1.2189
+Version: 1.2.12
 Summary: BioLib Python Client
 Home-page: https://github.com/biolib
 License: MIT

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/data_record/data_record.py RENAMED Viewed

@@ -83,6 +83,8 @@ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1
 def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
     app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
+    if app_response['app']['type'] != 'data-record':
+        raise Exception(f'Resource "{uri}" is not a Data Record')
     return DataRecordVersionInfo(
         resource_uri=app_response['app_version']['app_uri'],
         resource_uuid=app_response['app']['public_id'],

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/http_client.py RENAMED Viewed

@@ -125,12 +125,14 @@ class HttpClient:
             except urllib.error.URLError as error:
                 if isinstance(error.reason, socket.timeout):
-                    logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
+                    if retry_count > 0:
+                        logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
                     last_error = error
                 else:
                     raise error
             except socket.timeout as error:
-                logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
+                if retry_count > 0:
+                    logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
                 last_error = error
         raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/push_application.py RENAMED Viewed

@@ -211,6 +211,11 @@ def push_application(
             try:
                 logger.info(f'Trying to push image {docker_image_name} defined on module {module_name}.')
                 image = docker_client.images.get(docker_image_name)
+                architecture = image.attrs.get('Architecture')
+                if architecture != 'amd64':
+                    print(f"Error: '{docker_image_name}' is compiled for {architecture}, expected x86 (amd64).")
+                    print('If you are on an ARM processor, try passing --platform linux/amd64 to docker build.')
+                    exit(1)
                 absolute_repo_uri = f'{utils.BIOLIB_SITE_HOSTNAME}/{repo}'
                 image.tag(absolute_repo_uri, tag)

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/runtime.py RENAMED Viewed

@@ -7,6 +7,7 @@ class RuntimeJobDataDict(TypedDict):
     job_uuid: str
     job_auth_token: str
     app_uri: str
+    is_environment_biolib_cloud: bool
 class BioLibRuntimeError(Exception):

pybiolib-1.2.12/biolib/_internal/utils/multinode.py ADDED Viewed

@@ -0,0 +1,264 @@
+import glob
+import os
+import re
+import shutil
+import subprocess
+import tempfile
+import biolib
+from biolib.utils import SeqUtil
+def natsorted(lst):
+    """Sort the list using the natural sort key."""
+    def _natural_sort_key(s):
+        """A key function for natural sorting."""
+        return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
+    return sorted(lst, key=_natural_sort_key)
+def fasta_above_threshold(fasta_file, work_threshold, work_per_residue=1, verbose=False):
+    """True if total FASYA residue work above max_work"""
+    records = SeqUtil.parse_fasta(fasta_file)
+    # Calculate work units
+    total_work_units = 0
+    for i, record in enumerate(records):
+        sequence_work_units = len(record.sequence) * work_per_residue
+        total_work_units += sequence_work_units
+        if total_work_units >= work_threshold:
+            if verbose:
+                print(f'FASTA above threshold (stopped at {total_work_units}) >= {work_threshold}')
+                print(f'From  from {i+1}/{len(records)} sequences in {fasta_file}')
+            return True
+    if verbose:
+        print(f'FASTA below threshold ({total_work_units}) < {work_threshold}')
+        print(f'From {len(records)} sequences in {fasta_file}')
+    return False
+def run_locally(command_list, args):
+    """Run script locally (no multi-node processing)"""
+    # Prepare command
+    new_args = vars(args)
+    # Delete multinode-specific input arguments
+    for k in list(new_args.keys()):
+        if str(k).startswith('multinode'):
+            del new_args[k]
+    # Convert to list format
+    new_args_list = _args_dict_to_args_list(new_args)
+    # Prepare command, e.g. ["python3", "predict.py"] + new_args_list
+    command = command_list + new_args_list
+    if args.verbose >= 1:
+        print(f'Running {command}')
+    # Run command
+    result = subprocess.run(command, capture_output=True, text=True, check=False)
+    if result.returncode == 0:
+        print(f'{result.stdout}')
+    else:
+        print(f'Error: {result.stderr}')
+def fasta_batch_records(fasta_file, work_per_batch_min, work_per_residue=1, verbose=False):
+    """Converts FASTA records to batches of records, based on thresholds"""
+    def log_batches(batches):
+        for i, batch in enumerate(batches):
+            batch_dict = {
+                'records': len(batch),
+                'residues': sum(len(record.sequence) for record in batch),
+            }
+            n_seqs, n_res = batch_dict['records'], batch_dict['residues']
+            print(f'Batch {i+1}: {n_res} residues from {n_seqs} sequences')
+    records = SeqUtil.parse_fasta(fasta_file)
+    batches = []
+    batch = []
+    current_work_units = 0
+    total_work_units = 0
+    for record in records:
+        # Add to batch
+        batch.append(record)
+        # Calculate work units
+        seq = record.sequence
+        sequence_work_units = len(seq) * work_per_residue
+        # Increase counters
+        current_work_units += sequence_work_units
+        total_work_units += sequence_work_units
+        # If above limit, start a new batch
+        if current_work_units >= work_per_batch_min:
+            batches.append(batch)
+            batch = []
+            current_work_units = 0
+    # Append last batch if present
+    if batch:
+        batches.append(batch)
+    if verbose:
+        log_batches(batches)
+    return batches
+def fasta_send_batches_biolib(app_url, batches, args, args_fasta='fasta', verbose=1):
+    """
+    Send jobs through pybiolib interface
+    """
+    if args.verbose >= 1:
+        print(f'Sending {len(batches)} batches to Biolib')
+    # Login to biolib, prepare app
+    # current_app = biolib.load(Runtime.get_app_uri())
+    biolib.login()
+    current_app = biolib.load(app_url)  # Nb: uses "_" not "-"
+    # Compute results
+    job_list = []
+    for i, batch_records in enumerate(batches):  # MH
+        # Write FASTA, send to server
+        with tempfile.TemporaryDirectory() as tempdir:
+            # New arguments
+            new_args = vars(args)
+            # Write batched FASTA to send
+            fasta_path = f'{tempdir}/input.fasta'
+            SeqUtil.write_records_to_fasta(fasta_path, batch_records)
+            new_args[args_fasta] = fasta_path
+            new_args['multinode_only_local'] = True
+            # Convert to list
+            new_args_list = _args_dict_to_args_list(new_args)
+            # Send job
+            job = current_app.cli(args=new_args_list, blocking=False)
+            job_list.append(job)
+            # Job stats
+            if args.verbose:
+                batch_dict = _get_batch_stats(batch_records)
+                n_seqs, n_res = batch_dict['records'], batch_dict['residues']
+                print(f'Sending job {i+1}: {n_res} residues from {n_seqs} sequences -> arg_list = {new_args_list}')
+    # Stream job output at a time
+    print('Streaming job outputs ...')
+    for i, job in enumerate(job_list):
+        job.stream_logs()
+        # Check if job succeeded
+        assert job.get_exit_code() == 0, f'Job failed with exit code {job.get_exit_code()}'
+        # Write to disk
+        output_dir = f'job_output/job_{i+1}'
+        job.save_files(output_dir=output_dir)
+        if verbose:
+            print(f'Saving to {output_dir}')
+def merge_folder(folder_name, job_out_dir='job_output', out_dir='output', verbose=1):
+    """Helper function for merging folders"""
+    os.makedirs(out_dir, exist_ok=True)
+    job_dirs = glob.glob(f'{job_out_dir}/job_*')
+    job_dirs = natsorted(job_dirs)
+    # Move first file, prepare to merge
+    first_folder = f'{job_dirs[0]}/{folder_name}'
+    merged_folder = f'{out_dir}/{folder_name}'
+    shutil.move(first_folder, merged_folder)
+    if verbose:
+        print(f'Merging {folder_name} from {len(job_dirs)} directories to {merged_folder}')
+    # If more than one folder, merge to first
+    if len(job_dirs) >= 2:
+        # Find each job output file
+        for job_dir in job_dirs[1:]:
+            # Move over extra files
+            extra_folder = f'{job_dir}/{folder_name}'
+            extra_files = os.listdir(extra_folder)
+            for file_name in extra_files:
+                file_path = f'{extra_folder}/{file_name}'
+                shutil.move(file_path, merged_folder)
+def merge_file(
+    file_name,
+    header_lines_int=1,
+    job_out_dir='job_output',
+    out_dir='output',
+    verbose=1,
+):
+    """Helper function for merging files with headers"""
+    os.makedirs(out_dir, exist_ok=True)
+    job_dirs = glob.glob(f'{job_out_dir}/job_*')
+    job_dirs = natsorted(job_dirs)
+    # Move first file, prepare to merge
+    first_file = f'{job_dirs[0]}/{file_name}'
+    merged_file = f'{out_dir}/{file_name}'
+    shutil.move(first_file, merged_file)
+    if verbose:
+        print(f'Merging {file_name} from {len(job_dirs)} directories to {merged_file}')
+    # If more than one file, append to first
+    if len(job_dirs) >= 2:
+        # Open first file
+        with open(merged_file, 'a') as merged_file_handle:
+            # Find each job output file
+            for job_dir in job_dirs[1:]:
+                # Open extra file
+                extra_file = f'{job_dir}/{file_name}'
+                with open(extra_file) as extra_file_handle:
+                    # Skip first n header lines
+                    for _ in range(header_lines_int):
+                        next(extra_file_handle)
+                    # Append content to first file
+                    contents = extra_file_handle.read()
+                    merged_file_handle.write(contents)
+def _get_batch_stats(batch):
+    stats_dict = {
+        'records': len(batch),
+        'residues': sum(len(R.sequence) for R in batch),
+    }
+    return stats_dict
+def _args_dict_to_args_list(new_args):
+    """Converts args dict to list of arguments for Biolib"""
+    nested_list = [[f'--{key}', f'{value}'] for key, value in new_args.items()]
+    arg_list = []
+    for lst in nested_list:
+        for item in lst:
+            arg_list.append(item)
+    return arg_list

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_runtime/runtime.py RENAMED Viewed

@@ -14,6 +14,10 @@ class Runtime:
     def check_is_environment_biolib_app() -> bool:
         return bool(Runtime._try_to_get_job_data())
+    @staticmethod
+    def check_is_environment_biolib_cloud() -> bool:
+        return Runtime._get_job_data().get('is_environment_biolib_cloud', False)
     @staticmethod
     def get_job_id() -> str:
         return Runtime._get_job_data()['job_uuid']

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/app_types.py RENAMED Viewed

@@ -32,6 +32,7 @@ class App(TypedDict):
     public_id: str
     state: str
     resource_uri: str
+    type: str
 class AppGetResponse(TypedDict):

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/data_record.py RENAMED Viewed

@@ -6,6 +6,7 @@ from typing import Dict, List
 import click
 from biolib._data_record.data_record import DataRecord
+from biolib.biolib_api_client import BiolibApiClient
 from biolib.biolib_logging import logger, logger_no_user_data
 from biolib.typing_utils import Optional
@@ -57,6 +58,7 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
 @click.argument('uri', required=True)
 @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
 def describe(uri: str, output_as_json: bool) -> None:
+    BiolibApiClient.assert_is_signed_in(authenticated_action_description='get Data Record description')
     record = DataRecord.get_by_uri(uri)
     files_info: List[Dict] = []
     total_size_in_bytes = 0

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/executors/docker_executor.py RENAMED Viewed

@@ -286,6 +286,7 @@ class DockerExecutor:
                 job_uuid=self._options['job']['public_id'],
                 job_auth_token=self._options['job']['auth_token'],
                 app_uri=self._options['job']['app_uri'],
+                is_environment_biolib_cloud=bool(utils.IS_RUNNING_IN_CLOUD),
             )
             secrets: Dict[str, str] = dict(
                 **module.get('secrets', {}),

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/job_storage.py RENAMED Viewed

@@ -47,8 +47,21 @@ class JobStorage:
         module_output_path = os.path.join(job_temporary_dir, JobStorage.module_output_file_name)
         module_output_size = os.path.getsize(module_output_path)
+        # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
+        max_chunk_count = 9_000
+        min_chunk_size_bytes = 50_000_000
+        chunk_size_in_bytes = max(min_chunk_size_bytes, module_output_size // max_chunk_count)
+        logger_no_user_data.debug(
+            f'Job "{job_uuid}" uploading result of size {module_output_size} bytes '
+            f'with chunk size of {chunk_size_in_bytes} bytes...'
+        )
         with open(module_output_path, mode='rb') as module_output_file:
-            module_output_iterator = get_chunk_iterator_from_file_object(module_output_file)
+            module_output_iterator = get_chunk_iterator_from_file_object(
+                file_object=module_output_file,
+                chunk_size_in_bytes=chunk_size_in_bytes,
+            )
             multipart_uploader = JobStorage._get_module_output_uploader(job_uuid)
             multipart_uploader.upload(
                 payload_iterator=module_output_iterator,

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/remote_host_proxy.py RENAMED Viewed

@@ -1,7 +1,9 @@
+import base64
 import io
 import subprocess
 import tarfile
 import time
+from urllib.parse import urlparse
 from docker.errors import ImageNotFound  # type: ignore
 from docker.models.containers import Container  # type: ignore
@@ -145,14 +147,12 @@ class RemoteHostProxy:
             raise Exception('RemoteHostProxy container not defined when attempting to write NGINX config')
         docker = BiolibDockerClient.get_docker_client()
-        base_url = BiolibApiClient.get().base_url
+        upstream_hostname = urlparse(BiolibApiClient.get().base_url).hostname
         if self.is_app_caller_proxy:
-            if not utils.IS_RUNNING_IN_CLOUD or not utils.BIOLIB_CLOUD_BASE_URL:
+            if not utils.IS_RUNNING_IN_CLOUD:
                 raise BioLibError('Calling apps inside apps is not supported in local compute environment')
             logger_no_user_data.debug(f'Job "{self._job_uuid}" writing config for and starting App Caller Proxy...')
-            cloud_base_url = utils.BIOLIB_CLOUD_BASE_URL
             config = CloudUtils.get_webserver_config()
             compute_node_uuid = config['compute_node_info']['public_id']
             compute_node_auth_token = config['compute_node_info']['auth_token']
@@ -161,6 +161,9 @@ class RemoteHostProxy:
             access_token = BiolibApiClient.get().access_token
             bearer_token = f'Bearer {access_token}' if access_token else ''
+            biolib_index_basic_auth = f'compute_node|admin:{compute_node_auth_token},{self._job_uuid}'
+            biolib_index_basic_auth_base64 = base64.b64encode(biolib_index_basic_auth.encode('utf-8')).decode('utf-8')
             nginx_config = f"""
 events {{
   worker_connections  1024;
@@ -186,10 +189,11 @@ http {{
     server {{
         listen       80;
-        resolver 127.0.0.11 valid=30s;
+        resolver 127.0.0.11 ipv6=off valid=30s;
+        set $upstream_hostname {upstream_hostname};
         location ~* "^/api/jobs/cloud/(?<job_id>[a-z0-9-]{{36}})/status/$" {{
-            proxy_pass               {base_url}/api/jobs/cloud/$job_id/status/;
+            proxy_pass               https://$upstream_hostname/api/jobs/cloud/$job_id/status/;
             proxy_set_header         authorization $bearer_token_on_get;
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
@@ -197,35 +201,35 @@ http {{
         location ~* "^/api/jobs/cloud/$" {{
             # Note: Using $1 here as URI part from regex must be used for proxy_pass
-            proxy_pass               {base_url}/api/jobs/cloud/$1;
+            proxy_pass               https://$upstream_hostname/api/jobs/cloud/$1;
             proxy_set_header         authorization $bearer_token_on_post;
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location ~* "^/api/jobs/(?<job_id>[a-z0-9-]{{36}})/storage/input/start_upload/$" {{
-            proxy_pass               {base_url}/api/jobs/$job_id/storage/input/start_upload/;
+            proxy_pass               https://$upstream_hostname/api/jobs/$job_id/storage/input/start_upload/;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location ~* "^/api/jobs/(?<job_id>[a-z0-9-]{{36}})/storage/input/presigned_upload_url/$" {{
-            proxy_pass               {base_url}/api/jobs/$job_id/storage/input/presigned_upload_url/$is_args$args;
+            proxy_pass               https://$upstream_hostname/api/jobs/$job_id/storage/input/presigned_upload_url/$is_args$args;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location ~* "^/api/jobs/(?<job_id>[a-z0-9-]{{36}})/storage/input/complete_upload/$" {{
-            proxy_pass               {base_url}/api/jobs/$job_id/storage/input/complete_upload/;
+            proxy_pass               https://$upstream_hostname/api/jobs/$job_id/storage/input/complete_upload/;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location ~* "^/api/jobs/(?<job_id>[a-z0-9-]{{36}})/main_result/$" {{
-            proxy_pass                  {base_url}/api/jobs/$job_id/main_result/;
+            proxy_pass                  https://$upstream_hostname/api/jobs/$job_id/main_result/;
             proxy_set_header            authorization "";
             proxy_set_header            cookie "";
             proxy_pass_request_headers  on;
@@ -233,7 +237,7 @@ http {{
         }}
         location ~* "^/api/jobs/(?<job_id>[a-z0-9-]{{36}})/$" {{
-            proxy_pass               {base_url}/api/jobs/$job_id/;
+            proxy_pass               https://$upstream_hostname/api/jobs/$job_id/;
             proxy_set_header         authorization $bearer_token_on_patch_and_get;
             proxy_set_header         caller-job-uuid "{self._job_uuid}";
             proxy_set_header         cookie "";
@@ -242,7 +246,7 @@ http {{
         location ~* "^/api/jobs/create_job_with_data/$" {{
             # Note: Using $1 here as URI part from regex must be used for proxy_pass
-            proxy_pass               {base_url}/api/jobs/create_job_with_data/$1;
+            proxy_pass               https://$upstream_hostname/api/jobs/create_job_with_data/$1;
             proxy_set_header         authorization $bearer_token_on_post;
             proxy_set_header         caller-job-uuid "{self._job_uuid}";
             proxy_set_header         cookie "";
@@ -251,7 +255,7 @@ http {{
         location ~* "^/api/jobs/$" {{
             # Note: Using $1 here as URI part from regex must be used for proxy_pass
-            proxy_pass               {base_url}/api/jobs/$1;
+            proxy_pass               https://$upstream_hostname/api/jobs/$1;
             proxy_set_header         authorization $bearer_token_on_post;
             proxy_set_header         caller-job-uuid "{self._job_uuid}";
             proxy_set_header         cookie "";
@@ -260,7 +264,7 @@ http {{
         location ~ "^/api/jobs/{self._job_uuid}/notes/$" {{
             # Note: Using $1 here as URI part from regex must be used for proxy_pass
-            proxy_pass               {base_url}/api/jobs/{self._job_uuid}/notes/$1;
+            proxy_pass               https://$upstream_hostname/api/jobs/{self._job_uuid}/notes/$1;
             proxy_set_header         authorization "";
             proxy_set_header         job-auth-token "";
             proxy_set_header         compute-node-auth-token "{compute_node_auth_token}";
@@ -270,7 +274,7 @@ http {{
         }}
         location /api/lfs/ {{
-            proxy_pass               {base_url}/api/lfs/;
+            proxy_pass               https://$upstream_hostname$request_uri;
             proxy_set_header         authorization "";
             proxy_set_header         compute-node-auth-token "{compute_node_auth_token}";
             proxy_set_header         job-uuid "{self._job_uuid}";
@@ -279,7 +283,7 @@ http {{
         }}
         location /api/app/ {{
-            proxy_pass               {base_url}/api/app/;
+            proxy_pass               https://$upstream_hostname$request_uri;
             proxy_set_header         authorization "";
             proxy_set_header         compute-node-auth-token "{compute_node_auth_token}";
             proxy_set_header         job-uuid "{self._job_uuid}";
@@ -288,33 +292,40 @@ http {{
         }}
         location /api/ {{
-            proxy_pass               {base_url}/api/;
+            proxy_pass               https://$upstream_hostname$request_uri;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location /proxy/storage/job-storage/ {{
-            proxy_pass               {cloud_base_url}/proxy/storage/job-storage/;
+            proxy_pass               https://$upstream_hostname$request_uri;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location /proxy/storage/lfs/versions/ {{
-            proxy_pass               {cloud_base_url}/proxy/storage/lfs/versions/;
+            proxy_pass               https://$upstream_hostname$request_uri;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
         location /proxy/cloud/ {{
-            proxy_pass               {cloud_base_url}/proxy/cloud/;
+            proxy_pass               https://$upstream_hostname$request_uri;
             proxy_set_header         authorization "";
             proxy_set_header         cookie "";
             proxy_ssl_server_name    on;
         }}
+        location /proxy/index/ {{
+            proxy_pass               https://$upstream_hostname$request_uri;
+            proxy_set_header         authorization "Basic {biolib_index_basic_auth_base64}";
+            proxy_set_header         cookie "";
+            proxy_ssl_server_name    on;
+        }}
         location / {{
             return 404 "Not found";
         }}

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/webserver/worker_thread.py RENAMED Viewed

@@ -2,23 +2,23 @@ import base64
 import os
 import random
 import shutil
+import socket
 import sys
-import time
 import threading
-import socket
+import time
 from queue import Queue
-from biolib import utils
+from biolib import api, utils
+from biolib.biolib_binary_format import ModuleOutputV2, SystemException, SystemStatusUpdate
 from biolib.biolib_binary_format.utils import LocalFileIndexableBuffer
+from biolib.biolib_logging import logger, logger_no_user_data
 from biolib.compute_node.cloud_utils import CloudUtils
 from biolib.compute_node.job_worker import JobWorkerProcess
 from biolib.compute_node.job_worker.job_storage import JobStorage
 from biolib.compute_node.socker_listener_thread import SocketListenerThread
 from biolib.compute_node.socket_sender_thread import SocketSenderThread
+from biolib.compute_node.utils import SystemExceptionCodes, WorkerThreadException, get_package_type
 from biolib.compute_node.webserver import webserver_utils
-from biolib.biolib_binary_format import SystemStatusUpdate, SystemException, ModuleOutputV2
-from biolib.compute_node.utils import get_package_type, WorkerThreadException, SystemExceptionCodes
-from biolib.biolib_logging import logger, logger_no_user_data
 SOCKET_HOST = '127.0.0.1'
@@ -37,7 +37,7 @@ class WorkerThread(threading.Thread):
             self._sender_thread = None
             self._start_and_connect_to_compute_process()
-            logger.debug(f"WorkerThread connected to port {self._socket_port}")
+            logger.debug(f'WorkerThread connected to port {self._socket_port}')
         except Exception as exception:
             logger_no_user_data.error(exception)
@@ -79,20 +79,16 @@ class WorkerThread(threading.Thread):
                     if progress == 94:
                         # Get Job exit code
                         try:
-                            module_output_path = os.path.join(self._job_temporary_dir,
-                                                              JobStorage.module_output_file_name)
-                            module_output = ModuleOutputV2(
-                                buffer=LocalFileIndexableBuffer(
-                                    filename=module_output_path
-                                )
+                            module_output_path = os.path.join(
+                                self._job_temporary_dir,
+                                JobStorage.module_output_file_name,
                             )
+                            module_output = ModuleOutputV2(buffer=LocalFileIndexableBuffer(filename=module_output_path))
                             self.compute_state['exit_code'] = module_output.get_exit_code()
                             logger_no_user_data.debug(f"Got exit code: {self.compute_state['exit_code']}")
                         except Exception as error:  # pylint: disable=broad-except
-                            logger_no_user_data.error(
-                                f'Could not get exit_code from module output due to: {error}'
-                            )
+                            logger_no_user_data.error(f'Could not get exit_code from module output due to: {error}')
                         if utils.IS_RUNNING_IN_CLOUD:
                             JobStorage.upload_module_output(
@@ -107,7 +103,7 @@ class WorkerThread(threading.Thread):
                 elif package_type == 'SystemException':
                     error_code = SystemException(package).deserialize()
                     self.compute_state['status']['error_code'] = error_code
-                    logger.debug("Hit error. Terminating Worker Thread and Compute Process")
+                    logger.debug('Hit error. Terminating Worker Thread and Compute Process')
                     self.compute_state['progress'] = 95
                     self.terminate()
@@ -153,10 +149,10 @@ class WorkerThread(threading.Thread):
         # Starting a thread for accepting connections before starting the process that should to connect to the socket
         logger_no_user_data.debug('Starting connection thread')
-        self._connection_thread = threading.Thread(target=self._accept_new_socket_connection, args=[
-            received_messages_queue,
-            messages_to_send_queue
-        ])
+        self._connection_thread = threading.Thread(
+            target=self._accept_new_socket_connection,
+            args=[received_messages_queue, messages_to_send_queue],
+        )
         self._connection_thread.start()
         logger_no_user_data.debug('Started connection thread')
         logger_no_user_data.debug('Starting compute process')
@@ -177,6 +173,16 @@ class WorkerThread(threading.Thread):
         self._sender_thread.start()
     def terminate(self) -> None:
+        cloud_job_uuid = self.compute_state['cloud_job_id']
+        exit_code = self.compute_state.get('exit_code')
+        system_exception_code = self.compute_state['status'].get('error_code')
+        if utils.IS_RUNNING_IN_CLOUD:
+            CloudUtils.finish_cloud_job(
+                cloud_job_id=cloud_job_uuid,
+                system_exception_code=system_exception_code,
+                exit_code=exit_code,
+            )
         deregistered_due_to_error = False
         if self._job_worker_process:
             logger_no_user_data.debug(
@@ -184,7 +190,8 @@ class WorkerThread(threading.Thread):
             )
             self._job_worker_process.terminate()
-            for _ in range(10):
+            clean_up_timeout_in_seconds = 600
+            for _ in range(clean_up_timeout_in_seconds):
                 if self._job_worker_process.exitcode is not None:
                     logger_no_user_data.debug(
                         f'Job "{self._job_uuid}" worker process exitcode {self._job_worker_process.exitcode}'
@@ -196,28 +203,18 @@ class WorkerThread(threading.Thread):
             if self._job_worker_process.exitcode is None:
                 # TODO: Figure out if more error handling is necessary here
-                logger_no_user_data.error(f'Job {self._job_uuid} worker process did not exit within 10 seconds')
+                logger_no_user_data.error(
+                    f'Job {self._job_uuid} worker process did not exit within {clean_up_timeout_in_seconds} seconds'
+                )
                 if utils.IS_RUNNING_IN_CLOUD:
                     logger_no_user_data.error('Deregistering compute node...')
                     CloudUtils.deregister(error='job_cleanup_timed_out')
                     deregistered_due_to_error = True
         # Delete result as error occurred
-        system_exception_code = self.compute_state['status'].get('error_code')
         if system_exception_code and os.path.exists(self._job_temporary_dir):
             shutil.rmtree(self._job_temporary_dir)
-        exit_code = self.compute_state.get('exit_code')
-        if utils.IS_RUNNING_IN_CLOUD:
-            # Get and send compute node exception code and job exit code if present
-            logger_no_user_data.debug(f"Sending exit code {exit_code}")
-            CloudUtils.finish_cloud_job(
-                cloud_job_id=self.compute_state['cloud_job_id'],
-                system_exception_code=system_exception_code,
-                exit_code=exit_code
-            )
         if self._socket:
             self._socket.close()
@@ -225,7 +222,7 @@ class WorkerThread(threading.Thread):
             self._connection.close()
         if self.compute_state['progress'] == 95:
-            seconds_to_sleep = 60  # 1 minute
+            seconds_to_sleep = 5
             logger_no_user_data.debug(
                 f'Job "{self._job_uuid}" worker thread sleeping for {seconds_to_sleep} seconds before cleaning up'
             )
@@ -234,7 +231,7 @@ class WorkerThread(threading.Thread):
         compute_state_dict = webserver_utils.JOB_ID_TO_COMPUTE_STATE_DICT
         if self._job_uuid in compute_state_dict:
-            # Delete result as user has not started download within 60 seconds
+            # Delete result as user has not started download
             if compute_state_dict[self._job_uuid]['progress'] == 95 and os.path.exists(self._job_temporary_dir):
                 shutil.rmtree(self._job_temporary_dir)
@@ -245,12 +242,18 @@ class WorkerThread(threading.Thread):
                 f'Job "{self._job_uuid}" could not be found, maybe it has already been cleaned up'
             )
-        logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread terminated')
         if utils.IS_RUNNING_IN_CLOUD:
+            config = CloudUtils.get_webserver_config()
+            logger_no_user_data.debug(f'Job "{self._job_uuid}" reporting CloudJob "{cloud_job_uuid}" as cleaned up...')
+            api.client.post(
+                path=f'/internal/compute-nodes/cloud-jobs/{cloud_job_uuid}/cleaned-up/',
+                headers={'Compute-Node-Auth-Token': config['compute_node_info']['auth_token']},
+            )
             if deregistered_due_to_error:
                 CloudUtils.shutdown()  # shutdown now
             else:
                 webserver_utils.update_auto_shutdown_time()
+        logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread exiting...')
         sys.exit()

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/utils/seq_util.py RENAMED Viewed

@@ -35,7 +35,7 @@ class SeqUtil:
         input_file: Union[str, BufferedIOBase, None] = None,
         default_header: Optional[str] = None,
         allow_any_sequence_characters: bool = False,
-        allow_empty_sequence: bool = False,
+        allow_empty_sequence: bool = True,
         file_name: Optional[str] = None,
     ) -> List[SeqUtilRecord]:
         if input_file is None:

{pybiolib-1.1.2189 → pybiolib-1.2.12}/pyproject.toml RENAMED Viewed

@@ -1,8 +1,8 @@
 [tool.poetry]
 name = "pybiolib"
-version = "1.1.2189"
+version = "1.2.12"
 description = "BioLib Python Client"
-readme = "README.md"
+readme = "PYPI_README.md"
 license = "MIT"
 homepage = "https://github.com/biolib"
 keywords = ["biolib"]
@@ -17,7 +17,7 @@ packages = [
     { include = "biolib" },
 ]
 include = [
-    "README.md",
+    "PYPI_README.md",
     "LICENSE",
 ]

{pybiolib-1.1.2189 → pybiolib-1.2.12}/LICENSE RENAMED Viewed

File without changes

/pybiolib-1.1.2189/README.md → /pybiolib-1.2.12/PYPI_README.md RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_data_record/data_record.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/data_record/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/data_record/remote_storage_endpoint.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/file_utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/fuse_mount/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/fuse_mount/experiment_fuse_mount.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/lfs/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/lfs/cache.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/libs/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/libs/fusepy/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/types/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/types/app.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/types/data_record.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/types/experiment.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/types/resource.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/types/typing.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/_internal/utils/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/api/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/api/client.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/app/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/app/app.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/app/search_apps.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/api_client.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/auth.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/biolib_app_api.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/biolib_job_api.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/common_types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/job_types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/lfs_types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_api_client/user_state.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/base_bbf_package.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/file_in_container.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/module_input.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/module_output_v2.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/remote_endpoints.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/remote_stream_seeker.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/saved_job.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/stdout_and_stderr.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/system_exception.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/system_status_update.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_binary_format/utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_docker_client/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_download_container.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_errors.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/biolib_logging.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/auth.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/download_container.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/init.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/lfs.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/push.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/run.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/runtime.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/cli/start.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/.gitignore RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/cloud_utils/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/cloud_utils/cloud_utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/cache_state.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/cache_types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/docker_image_cache.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/executors/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/executors/docker_types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/executors/tars/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/executors/types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/job_max_runtime_timer_thread.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/job_worker.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/large_file_system.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/mappings.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/utilization_reporter_thread.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/job_worker/utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/socker_listener_thread.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/socket_sender_thread.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/webserver/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/webserver/gunicorn_flask_application.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/webserver/webserver.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/webserver/webserver_types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/compute_node/webserver/webserver_utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/experiments/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/experiments/experiment.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/jobs/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/jobs/job.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/jobs/job_result.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/jobs/types.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/runtime/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/sdk/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/tables.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/templates/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/templates/example_app.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/typing_utils.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/user/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/user/sign_in.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/utils/__init__.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/utils/app_uri.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/utils/cache_state.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/utils/multipart_uploader.py RENAMED Viewed

File without changes

{pybiolib-1.1.2189 → pybiolib-1.2.12}/biolib/utils/zip/remote_zip.py RENAMED Viewed

File without changes

pybiolib 1.1.2189__tar.gz → 1.2.12__tar.gz

pybiolib 1.1.2189tar.gz → 1.2.12tar.gz