PyPI - pybiolib - Versions diffs - 1.1.1881__py3-none-any.whl → 1.1.2193__py3-none-any.whl - Mend

pybiolib 1.1.1881py3-none-any.whl → 1.1.2193py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

biolib/__init__.py +11 -4
biolib/_data_record/data_record.py +278 -0
biolib/_internal/data_record/__init__.py +1 -1
biolib/_internal/data_record/data_record.py +95 -151
biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
biolib/_internal/file_utils.py +77 -0
biolib/_internal/fuse_mount/__init__.py +1 -0
biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
biolib/_internal/http_client.py +29 -9
biolib/_internal/lfs/__init__.py +1 -0
biolib/_internal/libs/__init__.py +1 -0
biolib/_internal/libs/fusepy/__init__.py +1257 -0
biolib/_internal/push_application.py +1 -1
biolib/_internal/runtime.py +2 -56
biolib/_internal/types/__init__.py +4 -0
biolib/_internal/types/app.py +9 -0
biolib/_internal/types/data_record.py +40 -0
biolib/_internal/types/experiment.py +10 -0
biolib/_internal/types/resource.py +14 -0
biolib/_internal/types/typing.py +7 -0
biolib/_runtime/runtime.py +80 -0
biolib/api/__init__.py +1 -0
biolib/api/client.py +39 -17
biolib/app/app.py +34 -71
biolib/biolib_api_client/api_client.py +9 -2
biolib/biolib_api_client/app_types.py +2 -2
biolib/biolib_api_client/biolib_job_api.py +6 -0
biolib/biolib_api_client/job_types.py +4 -4
biolib/biolib_api_client/lfs_types.py +8 -2
biolib/biolib_binary_format/remote_endpoints.py +12 -10
biolib/biolib_binary_format/utils.py +23 -3
biolib/cli/auth.py +1 -1
biolib/cli/data_record.py +43 -6
biolib/cli/lfs.py +10 -6
biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
biolib/compute_node/job_worker/executors/docker_executor.py +126 -108
biolib/compute_node/job_worker/job_storage.py +3 -4
biolib/compute_node/job_worker/job_worker.py +25 -15
biolib/compute_node/remote_host_proxy.py +61 -84
biolib/compute_node/webserver/webserver_types.py +0 -1
biolib/experiments/experiment.py +75 -44
biolib/jobs/job.py +98 -19
biolib/jobs/job_result.py +46 -21
biolib/jobs/types.py +1 -1
biolib/runtime/__init__.py +2 -1
biolib/sdk/__init__.py +18 -7
biolib/typing_utils.py +2 -7
biolib/user/sign_in.py +2 -2
biolib/utils/seq_util.py +38 -35
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/RECORD +55 -44
biolib/experiments/types.py +0 -9
biolib/lfs/__init__.py +0 -4
biolib/lfs/utils.py +0 -153
/biolib/{lfs → _internal/lfs}/cache.py +0 -0
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +0 -0
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0

biolib/jobs/job.py CHANGED Viewed

@@ -1,26 +1,30 @@
 import base64
-from datetime import datetime, timedelta
 import sys
 import time
-from pathlib import Path
 from collections import OrderedDict
+from datetime import datetime, timedelta
+from pathlib import Path
 from urllib.parse import urlparse
 from biolib import api, utils
 from biolib._internal.http_client import HttpClient
 from biolib._internal.utils import open_browser_window_from_notebook
-from biolib.biolib_api_client import BiolibApiClient
+from biolib.biolib_api_client import BiolibApiClient, CreatedJobDict
+from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
 from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
-from biolib.biolib_binary_format import LazyLoadedFile, ModuleOutputV2, ModuleInput, ModuleInputDict
+from biolib.biolib_binary_format import LazyLoadedFile, ModuleInput, ModuleInputDict, ModuleOutputV2
+from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
 from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
 from biolib.biolib_errors import BioLibError, CloudJobFinishedError
 from biolib.biolib_logging import logger, logger_no_user_data
+from biolib.compute_node.job_worker.job_storage import JobStorage
 from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes
 from biolib.jobs.job_result import JobResult
-from biolib.jobs.types import JobDict, CloudJobStartedDict, CloudJobDict
+from biolib.jobs.types import CloudJobDict, CloudJobStartedDict, JobDict
 from biolib.tables import BioLibTable
-from biolib.typing_utils import Optional, List, cast, Dict
+from biolib.typing_utils import Dict, List, Optional, cast
 from biolib.utils import IS_RUNNING_IN_NOTEBOOK
+from biolib.utils.app_uri import parse_app_uri
 class Job:
@@ -56,26 +60,23 @@ class Job:
     @property
     def result(self) -> JobResult:
         if not self._result:
-            if self.get_status() == "completed":
-                self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
-            else:
-                raise BioLibError(f"Result is not available for {self._uuid}: status is {self._job_dict['state']}.")
+            self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
         return self._result
     @property
     def stdout(self) -> bytes:
-        logger.warning("The property .stdout is deprecated, please use .get_stdout()")
+        logger.warning('The property .stdout is deprecated, please use .get_stdout()')
         return self.result.get_stdout()
     @property
     def stderr(self) -> bytes:
-        logger.warning("The property .stderr is deprecated, please use .get_stderr()")
+        logger.warning('The property .stderr is deprecated, please use .get_stderr()')
         return self.result.get_stderr()
     @property
     def exitcode(self) -> int:
-        logger.warning("The property .exitcode is deprecated, please use .get_exit_code()")
+        logger.warning('The property .exitcode is deprecated, please use .get_exit_code()')
         return self.result.get_exit_code()
     def is_finished(self) -> bool:
@@ -109,8 +110,8 @@ class Job:
     def load_file_as_numpy(self, *args, **kwargs):
         try:
             import numpy  # type: ignore # pylint: disable=import-outside-toplevel,import-error
-        except:  # pylint: disable=raise-missing-from
-            raise Exception("Failed to import numpy, please make sure it is installed.")
+        except ImportError:  # pylint: disable=raise-missing-from
+            raise Exception('Failed to import numpy, please make sure it is installed.') from None
         file_handle = self.result.get_output_file(*args, **kwargs).get_file_handle()
         return numpy.load(file_handle, allow_pickle=False)  # type: ignore
@@ -187,6 +188,39 @@ class Job:
             print('Please copy and paste the following link into your browser:')
             print(results_url_to_open)
+    def cancel(self) -> None:
+        try:
+            api.client.patch(
+                path=f'/jobs/{self._uuid}/',
+                headers={'Job-Auth-Token': self._auth_token} if self._auth_token else None,
+                data={'state': 'cancelled'},
+            )
+            logger.info(f'Job {self._uuid} canceled')
+        except Exception as error:
+            logger.error(f'Failed to cancel job {self._uuid} due to: {error}')
+    def recompute(self, app_uri: Optional[str] = None, machine: Optional[str] = None, blocking: bool = True) -> 'Job':
+        app_response = BiolibAppApi.get_by_uri(uri=app_uri or self._job_dict['app_uri'])
+        job_storage_input = RemoteJobStorageEndpoint(
+            job_auth_token=self._auth_token,
+            job_uuid=self._uuid,
+            storage_type='input',
+        )
+        http_response = HttpClient.request(url=job_storage_input.get_remote_url())
+        module_input_serialized = http_response.content
+        job = self._start_job_in_cloud(
+            app_uri=app_response['app_uri'],
+            app_version_uuid=app_response['app_version']['public_id'],
+            module_input_serialized=module_input_serialized,
+            machine=machine,
+        )
+        if blocking:
+            job.stream_logs()
+        return job
     def _get_cloud_job(self) -> CloudJobDict:
         self._refetch_job_dict(force_refetch=True)
         if self._job_dict['cloud_job'] is None:
@@ -278,7 +312,7 @@ class Job:
             status_json = self._get_job_status_from_compute_node(compute_node_url)
             if not status_json:
                 # this can happen if the job is finished but already removed from the compute node
-                logger.warning("WARN: We were unable to retrieve the full log of the job, please try again")
+                logger.warning('WARN: We were unable to retrieve the full log of the job, please try again')
                 break
             job_is_completed = status_json['is_completed']
             for status_update in status_json['status_updates']:
@@ -320,7 +354,10 @@ class Job:
         self.print_logs_packages(response_json['streamed_logs_packages_b64'])
     def _get_cloud_job_awaiting_started(self) -> CloudJobStartedDict:
+        retry_count = 0
         while True:
+            retry_count += 1
+            time.sleep(min(10, retry_count))
             cloud_job = self._get_cloud_job()
             if cloud_job['finished_at']:
@@ -333,7 +370,6 @@ class Job:
                 return cast(CloudJobStartedDict, cloud_job)
             logger.info('Cloud: The job has been queued. Please wait...')
-            time.sleep(10)
     def _get_job_status_from_compute_node(self, compute_node_url):
         for _ in range(15):
@@ -341,9 +377,9 @@ class Job:
                 return HttpClient.request(url=f'{compute_node_url}/v1/job/{self._uuid}/status/').json()
             except Exception:  # pylint: disable=broad-except
                 cloud_job = self._get_cloud_job()
-                logger.debug("Failed to get status from compute node, retrying...")
+                logger.debug('Failed to get status from compute node, retrying...')
                 if cloud_job['finished_at']:
-                    logger.debug("Job no longer exists on compute node, checking for error...")
+                    logger.debug('Job no longer exists on compute node, checking for error...')
                     if cloud_job['error_code'] != SystemExceptionCodes.COMPLETED_SUCCESSFULLY.value:
                         error_message = SystemExceptionCodeMap.get(
                             cloud_job['error_code'], f'Unknown error code {cloud_job["error_code"]}'
@@ -366,3 +402,46 @@ class Job:
         self._job_dict = self._get_job_dict(self._uuid, self._auth_token)
         self._job_dict_last_fetched_at = datetime.utcnow()
+    @staticmethod
+    def _start_job_in_cloud(
+        app_uri: str,
+        app_version_uuid: str,
+        module_input_serialized: bytes,
+        override_command: bool = False,
+        machine: Optional[str] = None,
+        experiment_id: Optional[str] = None,
+        result_prefix: Optional[str] = None,
+        timeout: Optional[int] = None,
+        notify: bool = False,
+        requested_machine_count: Optional[int] = None,
+    ) -> 'Job':
+        if len(module_input_serialized) < 500_000:
+            _job_dict = BiolibJobApi.create_job_with_data(
+                app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
+                app_version_uuid=app_version_uuid,
+                arguments_override_command=override_command,
+                experiment_uuid=experiment_id,
+                module_input_serialized=module_input_serialized,
+                notify=notify,
+                requested_machine=machine,
+                requested_timeout_seconds=timeout,
+                result_name_prefix=result_prefix,
+                requested_machine_count=requested_machine_count,
+            )
+            return Job(cast(JobDict, _job_dict))
+        job_dict: CreatedJobDict = BiolibJobApi.create(
+            app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
+            app_version_id=app_version_uuid,
+            experiment_uuid=experiment_id,
+            machine=machine,
+            notify=notify,
+            override_command=override_command,
+            timeout=timeout,
+            requested_machine_count=requested_machine_count,
+        )
+        JobStorage.upload_module_input(job=job_dict, module_input_serialized=module_input_serialized)
+        cloud_job = BiolibJobApi.create_cloud_job(job_id=job_dict['public_id'], result_name_prefix=result_prefix)
+        logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
+        return Job(cast(JobDict, job_dict))

biolib/jobs/job_result.py CHANGED Viewed

@@ -1,25 +1,24 @@
-from pathlib import Path
-from fnmatch import fnmatch
 import time
+from fnmatch import fnmatch
+from pathlib import Path
 from biolib.biolib_binary_format import ModuleOutputV2
+from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
 from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
-from biolib.biolib_binary_format.utils import RemoteIndexableBuffer, LazyLoadedFile
-from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageResultEndpoint
+from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
 from biolib.biolib_errors import BioLibError
 from biolib.biolib_logging import logger
-from biolib.typing_utils import Optional, List, cast, Union, Callable
+from biolib.typing_utils import Callable, List, Optional, Union, cast
 PathFilter = Union[str, Callable[[str], bool]]
 class JobResult:
     def __init__(
-            self,
-            job_uuid: str,
-            job_auth_token: str,
-            module_output: Optional[ModuleOutputV2] = None,
+        self,
+        job_uuid: str,
+        job_auth_token: str,
+        module_output: Optional[ModuleOutputV2] = None,
     ):
         self._job_uuid: str = job_uuid
         self._job_auth_token: str = job_auth_token
@@ -35,7 +34,12 @@ class JobResult:
     def get_exit_code(self) -> int:
         return self._get_module_output().get_exit_code()
-    def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
+    def save_files(
+        self,
+        output_dir: str,
+        path_filter: Optional[PathFilter] = None,
+        skip_file_if_exists: Optional[bool] = None,
+    ) -> None:
         module_output = self._get_module_output()
         output_files = module_output.get_files()
         filtered_output_files = self._get_filtered_files(output_files, path_filter) if path_filter else output_files
@@ -61,24 +65,44 @@ class JobResult:
             # Remove leading slash of file_path
             destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
             if destination_file_path.exists():
-                destination_file_path.rename(f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}')
+                if skip_file_if_exists:
+                    print(f'Skipping {destination_file_path} as a file with that name already exists locally.')
+                    continue
+                else:
+                    destination_file_path.rename(
+                        f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
+                    )
             dir_path = destination_file_path.parent
             if dir_path:
                 dir_path.mkdir(parents=True, exist_ok=True)
-            with open(destination_file_path, mode='wb') as destination_file:
-                for chunk in stream_seeker.seek_and_read(file_start=file.start, file_length=file.length):
-                    destination_file.write(chunk)
+            # write content to temporary (partial) file
+            partial_path = destination_file_path.with_suffix(
+                destination_file_path.suffix + f'.{self._job_uuid}.partial_biolib_download'
+            )
+            file_start = file.start
+            data_to_download = file.length
+            if partial_path.exists():
+                data_already_downloaded = partial_path.stat().st_size
+                file_start += data_already_downloaded
+                data_to_download -= data_already_downloaded
+            with open(partial_path, mode='ab') as partial_file:
+                for chunk in stream_seeker.seek_and_read(file_start=file_start, file_length=data_to_download):
+                    partial_file.write(chunk)
+            # rename partial file to actual file name
+            partial_path.rename(destination_file_path)
     def get_output_file(self, filename) -> LazyLoadedFile:
         files = self._get_module_output().get_files()
         filtered_files = self._get_filtered_files(files, path_filter=filename)
         if not filtered_files:
-            raise BioLibError(f"File {filename} not found in results.")
+            raise BioLibError(f'File {filename} not found in results.')
         if len(filtered_files) != 1:
-            raise BioLibError(f"Found multiple results for filename {filename}.")
+            raise BioLibError(f'Found multiple results for filename {filename}.')
         return filtered_files[0]
@@ -100,8 +124,8 @@ class JobResult:
         glob_filter = cast(str, path_filter)
         # since all file paths start with /, make sure filter does too
-        if not glob_filter.startswith("/"):
-            glob_filter = "/" + glob_filter
+        if not glob_filter.startswith('/'):
+            glob_filter = '/' + glob_filter
         def _filter_function(file: LazyLoadedFile) -> bool:
             return fnmatch(file.path, glob_filter)
@@ -110,9 +134,10 @@ class JobResult:
     def _get_module_output(self) -> ModuleOutputV2:
         if self._module_output is None:
-            remote_job_storage_endpoint = RemoteJobStorageResultEndpoint(
-                job_id=self._job_uuid,
+            remote_job_storage_endpoint = RemoteJobStorageEndpoint(
                 job_auth_token=self._job_auth_token,
+                job_uuid=self._job_uuid,
+                storage_type='output',
             )
             buffer = RemoteIndexableBuffer(endpoint=remote_job_storage_endpoint)
             self._module_output = ModuleOutputV2(buffer)

biolib/jobs/types.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from biolib.typing_utils import TypedDict, Optional, Literal, List
+from biolib.typing_utils import List, Literal, Optional, TypedDict
 JobState = Literal['in_progress', 'completed', 'failed', 'cancelled']

biolib/runtime/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import warnings
-from biolib.sdk import Runtime as _Runtime
+from biolib._runtime.runtime import Runtime as _Runtime
 def set_main_result_prefix(result_prefix: str) -> None:

biolib/sdk/__init__.py CHANGED Viewed

@@ -1,12 +1,14 @@
+from typing import Optional
 # Imports to hide and use as private internal utils
-from biolib._internal.data_record import DataRecord as _DataRecord
+from biolib._data_record.data_record import DataRecord as _DataRecord
 from biolib._internal.push_application import push_application as _push_application
 from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
+from biolib._runtime.runtime import Runtime as _Runtime
 from biolib.app import BioLibApp as _BioLibApp
-from biolib.typing_utils import Optional as _Optional
-# Imports to expose as public API
-from biolib._internal.runtime import Runtime
+# Classes to expose as public API
+Runtime = _Runtime
 def push_app_version(uri: str, path: str) -> _BioLibApp:
@@ -31,7 +33,7 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
     except BaseException:
         raise Exception('Failed to import pytest; please make sure it is installed') from None
-    class AppVersionFixturePlugin(object):
+    class AppVersionFixturePlugin:
         def __init__(self, app_version_ref):
             self.app_version_ref = app_version_ref
@@ -42,5 +44,14 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
     return AppVersionFixturePlugin(app_version)
-def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> _DataRecord:
-    return _DataRecord.create(destination, data_path, name)
+def create_data_record(
+    destination: str,
+    data_path: str,
+    name: Optional[str] = None,
+    record_type: Optional[str] = None,
+) -> _DataRecord:
+    return _DataRecord.create(
+        destination=f'{destination}/{name}' if name else destination,
+        data_path=data_path,
+        record_type=record_type,
+    )

biolib/typing_utils.py CHANGED Viewed

@@ -1,7 +1,2 @@
-import sys
-# import and expose everything from the typing module
-from typing import *  # pylint: disable=wildcard-import, unused-wildcard-import
-if sys.version_info < (3, 8):
-    from typing_extensions import TypedDict, Literal  # pylint: disable=unused-import
+# TODO: Deprecate and later remove this file
+from biolib._internal.types.typing import *  # pylint: disable=wildcard-import, unused-wildcard-import

biolib/user/sign_in.py CHANGED Viewed

@@ -14,11 +14,11 @@ def sign_out() -> None:
 def sign_in(open_in_default_browser: bool = False) -> None:
-    api_client = BiolibApiClient.get()
-    if api_client.is_signed_in:
+    if not BiolibApiClient.is_reauthentication_needed():
         logger_no_user_data.info('Already signed in')
         return
+    api_client = BiolibApiClient.get()
     auth_challenge = BiolibAuthChallengeApi.create_auth_challenge()
     auth_challenge_token = auth_challenge['token']

biolib/utils/seq_util.py CHANGED Viewed

@@ -1,32 +1,26 @@
 import re
 from io import BufferedIOBase
-from biolib.typing_utils import List, Optional, Dict, Union
-allowed_sequence_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.")
-def find_invalid_sequence_characters(sequence):
-    invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
-    return invalid_chars
+from biolib.typing_utils import Dict, List, Optional, Union
 class SeqUtilRecord:
     def __init__(
-            self,
-            sequence: str,
-            sequence_id: str,
-            description: Optional['str'],
-            properties: Optional[Dict[str, str]] = None,
+        self,
+        sequence: str,
+        sequence_id: str,
+        description: Optional['str'],
+        properties: Optional[Dict[str, str]] = None,
     ):
         self.sequence = sequence
         self.id = sequence_id  # pylint: disable=invalid-name
         self.description = description
         if properties:
-            disallowed_pattern = re.compile(r"[=\[\]\n]")
+            disallowed_pattern = re.compile(r'[=\[\]\n]')
             for key, value in properties.items():
-                assert not bool(disallowed_pattern.search(key)), "Key cannot contain characters =[] and newline"
-                assert not bool(disallowed_pattern.search(value)), "Value cannot contain characters =[] and newline"
+                assert not bool(disallowed_pattern.search(key)), 'Key cannot contain characters =[] and newline'
+                assert not bool(disallowed_pattern.search(value)), 'Value cannot contain characters =[] and newline'
             self.properties = properties
         else:
             self.properties = {}
@@ -38,24 +32,24 @@ class SeqUtilRecord:
 class SeqUtil:
     @staticmethod
     def parse_fasta(
-            input_file: Union[str, BufferedIOBase, None] = None,
-            default_header: Optional[str] = None,
-            allow_any_sequence_characters: bool = False,
-            allow_empty_sequence: bool = False,
-            file_name: Optional[str] = None,
+        input_file: Union[str, BufferedIOBase, None] = None,
+        default_header: Optional[str] = None,
+        allow_any_sequence_characters: bool = False,
+        allow_empty_sequence: bool = True,
+        file_name: Optional[str] = None,
     ) -> List[SeqUtilRecord]:
         if input_file is None:
             if file_name:
                 input_file = file_name
             else:
-                raise ValueError("input_file must be a file name (str) or a BufferedIOBase object")
+                raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
         if isinstance(input_file, str):
-            with open(input_file, 'r') as file_handle:
+            with open(input_file) as file_handle:
                 data = file_handle.read().strip()
         elif isinstance(input_file, BufferedIOBase):
             data = input_file.read().decode('utf-8')
         else:
-            raise ValueError("input_file must be a file name (str) or a BufferedIOBase object")
+            raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
         if not data:
             return []
@@ -71,9 +65,9 @@ class SeqUtil:
                 raise Exception(f'No header line found in FASTA file "{file_name}"')
         splitted = []
-        tmp_data = ""
+        tmp_data = ''
         for line in data.splitlines():
-            if line.startswith(">"):
+            if line.startswith('>'):
                 if tmp_data:
                     splitted.append(tmp_data)
                 tmp_data = line[1:].strip() + '\n'
@@ -89,23 +83,20 @@ class SeqUtil:
             sequence_data_splitted = sequence_data.strip().split('\n')
             header_line = sequence_data_splitted[0].split()
             sequence_id = header_line[0]
-            description = sequence_data_splitted[0][len(sequence_id):].strip()
-            sequence = "".join([seq.strip().upper() for seq in sequence_data_splitted[1:]])
+            description = sequence_data_splitted[0][len(sequence_id) :].strip()
+            sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
             if not allow_any_sequence_characters:
-                invalid_sequence_characters = find_invalid_sequence_characters(sequence)
+                invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
                 if len(invalid_sequence_characters) > 0:
                     raise Exception(
                         f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
                     )
             if not allow_empty_sequence and len(sequence) == 0:
-                raise Exception(
-                    f'Error: No sequence found for fasta entry {sequence_id}'
-                )
+                raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
+            parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
-            parsed_sequences.append(
-                SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description)
-            )
         return parsed_sequences
     @staticmethod
@@ -116,5 +107,17 @@ class SeqUtil:
                 if record.properties:
                     for key, value in record.properties.items():
                         optional_description += f' [{key}={value}]'
-                sequence = '\n'.join(record.sequence[i:i + 80] for i in range(0, len(record.sequence), 80))
+                sequence = '\n'.join(record.sequence[i : i + 80] for i in range(0, len(record.sequence), 80))
                 file_handle.write(f'>{record.id}{optional_description}\n{sequence}\n')
+    @staticmethod
+    def _find_invalid_sequence_characters(sequence: str) -> List[str]:
+        allowed_sequence_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.')
+        invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
+        return invalid_chars
+    @staticmethod
+    def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
+        allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
+        invalid_chars = [char for char in sequence if char not in allowed_chars]
+        return invalid_chars

{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pybiolib
-Version: 1.1.1881
+Version: 1.1.2193
 Summary: BioLib Python Client
 Home-page: https://github.com/biolib
 License: MIT

pybiolib 1.1.1881__py3-none-any.whl → 1.1.2193__py3-none-any.whl

pybiolib 1.1.1881py3-none-any.whl → 1.1.2193py3-none-any.whl