PyPI - opencloning - Versions diffs - 0.4.8__py3-none-any.whl → 0.5.0.1__py3-none-any.whl - Mend

opencloning 0.4.8py3-none-any.whl → 0.5.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

opencloning/app_settings.py +7 -0
opencloning/batch_cloning/pombe/__init__.py +2 -2
opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
opencloning/bug_fixing/backend_v0_3.py +13 -5
opencloning/catalogs/__init__.py +36 -0
opencloning/catalogs/igem2024.yaml +2172 -0
opencloning/catalogs/openDNA_collections.yaml +1161 -0
opencloning/catalogs/readme.txt +1 -0
opencloning/catalogs/seva.tsv +231 -0
opencloning/catalogs/snapgene.yaml +2837 -0
opencloning/dna_functions.py +155 -158
opencloning/dna_utils.py +45 -62
opencloning/ebic/primer_design.py +1 -1
opencloning/endpoints/annotation.py +9 -13
opencloning/endpoints/assembly.py +157 -378
opencloning/endpoints/endpoint_utils.py +52 -0
opencloning/endpoints/external_import.py +169 -124
opencloning/endpoints/no_assembly.py +23 -39
opencloning/endpoints/no_input.py +32 -47
opencloning/endpoints/other.py +1 -1
opencloning/endpoints/primer_design.py +2 -1
opencloning/http_client.py +2 -2
opencloning/ncbi_requests.py +113 -47
opencloning/primer_design.py +1 -1
opencloning/pydantic_models.py +10 -510
opencloning/request_examples.py +10 -22
opencloning/temp_functions.py +50 -0
{opencloning-0.4.8.dist-info → opencloning-0.5.0.1.dist-info}/METADATA +18 -8
opencloning-0.5.0.1.dist-info/RECORD +51 -0
{opencloning-0.4.8.dist-info → opencloning-0.5.0.1.dist-info}/WHEEL +1 -1
opencloning/cre_lox.py +0 -116
opencloning/gateway.py +0 -154
opencloning-0.4.8.dist-info/RECORD +0 -45
{opencloning-0.4.8.dist-info → opencloning-0.5.0.1.dist-info}/licenses/LICENSE +0 -0

opencloning/dna_functions.py CHANGED Viewed

@@ -1,24 +1,38 @@
-from functools import cmp_to_key
-from urllib.error import HTTPError
+from fastapi import HTTPException
+from urllib.parse import quote
+import math
 from Bio.Restriction.Restriction import RestrictionBatch
 from Bio.Seq import reverse_complement
 from pydna.dseqrecord import Dseqrecord
 from pydna.dseq import Dseq
-from .pydantic_models import TextFileSequence, AddgeneIdSource, SequenceFileFormat, WekWikGeneIdSource, SEVASource
-from opencloning_linkml.datamodel import PlannotateAnnotationReport
-from pydna.parsers import parse as pydna_parse
+from opencloning_linkml.datamodel import (
+    PlannotateAnnotationReport,
+    TextFileSequence,
+    SequenceFileFormat,
+)
+from pydna.opencloning_models import (
+    AddgeneIdSource,
+    OpenDNACollectionsSource,
+    SEVASource,
+    SnapGenePlasmidSource,
+    WekWikGeneIdSource,
+    BenchlingUrlSource,
+    IGEMSource,
+    EuroscarfSource,
+)
 from bs4 import BeautifulSoup
-import regex
-from Bio.SeqFeature import SimpleLocation, Location
-from pydna.utils import shift_location
 from pydna.common_sub_strings import common_sub_strings
 from Bio.SeqIO import parse as seqio_parse
 import io
 import warnings
 from Bio.SeqIO.InsdcIO import GenBankScanner, GenBankIterator
 import re
+from opencloning.catalogs import iGEM2024_catalog, openDNA_collections_catalog, seva_catalog, snapgene_catalog
 from .http_client import get_http_client, ConnectError, TimeoutException
 from .ncbi_requests import get_genbank_sequence
+from typing import Callable
 def format_sequence_genbank(seq: Dseqrecord, seq_name: str = None) -> TextFileSequence:
@@ -33,12 +47,18 @@ def format_sequence_genbank(seq: Dseqrecord, seq_name: str = None) -> TextFileSe
         file_content=seq.format('genbank'),
         sequence_file_format=SequenceFileFormat('genbank'),
         overhang_crick_3prime=seq.seq.ovhg,
-        overhang_watson_3prime=seq.seq.watson_ovhg(),
+        overhang_watson_3prime=seq.seq.watson_ovhg,
     )
 def read_dsrecord_from_json(seq: TextFileSequence) -> Dseqrecord:
-    initial_dseqrecord: Dseqrecord = pydna_parse(seq.file_content)[0]
+    with io.StringIO(seq.file_content) as handle:
+        try:
+            initial_dseqrecord: Dseqrecord = custom_file_parser(handle, 'genbank')[0]
+        except ValueError as e:
+            raise HTTPException(
+                422, f'The file for sequence with id {seq.id} is not in a valid genbank format: {e}'
+            ) from e
     if seq.overhang_watson_3prime == 0 and seq.overhang_crick_3prime == 0:
         out_dseq_record = initial_dseqrecord
     else:
@@ -68,117 +88,118 @@ def get_invalid_enzyme_names(enzyme_names_list: list[str | None]) -> list[str]:
 async def get_sequences_from_file_url(
-    url: str, format: SequenceFileFormat = SequenceFileFormat('genbank')
+    url: str,
+    format: SequenceFileFormat = SequenceFileFormat('genbank'),
+    params: dict | None = None,
+    headers: dict | None = None,
+    get_function: None | Callable = None,
 ) -> list[Dseqrecord]:
-    # TODO once pydna parse is fixed it should handle urls that point to non-gb files
-    async with get_http_client() as client:
-        resp = await client.get(url)
-    if resp.status_code != 200:
-        raise HTTPError(url, 404, 'file requested from url not found', 'file requested from url not found', None)
-    if format == SequenceFileFormat('snapgene'):
-        return custom_file_parser(io.BytesIO(resp.content), format)
+    if get_function is None:
+        async with get_http_client() as client:
+            resp = await client.get(url, params=params, headers=headers)
     else:
-        return custom_file_parser(io.StringIO(resp.text), format)
+        resp = await get_function(url, params=params, headers=headers)
+    if math.floor(resp.status_code / 100) == 5:
+        raise HTTPException(503, 'the external server (not OpenCloning) returned an error')
+    elif math.floor(resp.status_code / 100) != 2:
+        raise HTTPException(404, 'file requested from url not found')
+    try:
+        if format == SequenceFileFormat('snapgene'):
+            return custom_file_parser(io.BytesIO(resp.content), format)
+        else:
+            return custom_file_parser(io.StringIO(resp.text), format)
+    except ValueError as e:
+        raise HTTPException(400, f'{e}') from e
-async def get_sequence_from_snapgene_url(url: str) -> Dseqrecord:
-    async with get_http_client() as client:
-        resp = await client.get(url)
-    # Check that resp.content is not empty
-    if len(resp.content) == 0:
-        raise HTTPError(url, 404, 'invalid snapgene id', 'invalid snapgene id', None)
-    parsed_seq = next(seqio_parse(io.BytesIO(resp.content), 'snapgene'))
-    circularize = 'topology' in parsed_seq.annotations.keys() and parsed_seq.annotations['topology'] == 'circular'
-    return Dseqrecord(parsed_seq, circular=circularize)
+async def request_from_snapgene(plasmid_set: dict, plasmid_name: str) -> Dseqrecord:
+    if plasmid_set not in snapgene_catalog:
+        raise HTTPException(404, 'invalid plasmid set')
+    if plasmid_name not in snapgene_catalog[plasmid_set]:
+        raise HTTPException(404, f'{plasmid_name} is not part of {plasmid_set}')
+    url = f'https://www.snapgene.com/local/fetch.php?set={plasmid_set}&plasmid={plasmid_name}'
+    seqs = await get_sequences_from_file_url(url, SequenceFileFormat('snapgene'))
+    seq = seqs[0]
+    seq.name = plasmid_name
+    seq.source = SnapGenePlasmidSource(repository_id=f'{plasmid_set}/{plasmid_name}')
+    return seq
-async def request_from_addgene(source: AddgeneIdSource) -> tuple[Dseqrecord, AddgeneIdSource]:
+async def request_from_addgene(repository_id: str) -> Dseqrecord:
-    url = f'https://www.addgene.org/{source.repository_id}/sequences/'
+    url = f'https://www.addgene.org/{repository_id}/sequences/'
     async with get_http_client() as client:
         resp = await client.get(url)
     if resp.status_code == 404:
-        raise HTTPError(url, 404, 'wrong addgene id', 'wrong addgene id', None)
+        raise HTTPException(404, 'wrong addgene id')
     soup = BeautifulSoup(resp.content, 'html.parser')
     # Get a span.material-name from the soup, see https://github.com/manulera/OpenCloning_backend/issues/182
     plasmid_name = soup.find('span', class_='material-name').text.replace(' ', '_')
-    if source.sequence_file_url:
-        dseqr = (await get_sequences_from_file_url(source.sequence_file_url))[0]
-        dseqr.name = plasmid_name
-        return dseqr, source
-    sequence_file_url_dict = dict()
-    for _type in ['depositor-full', 'depositor-partial', 'addgene-full', 'addgene-partial']:
-        sequence_file_url_dict[_type] = []
-        if soup.find(id=_type) is not None:
-            sequence_file_url_dict[_type] = [
-                a.get('href') for a in soup.find(id=_type).findAll(class_='genbank-file-download')
-            ]
-    # TODO provide addgene sequencing data supporting the sequence
-    # We prefer to return addgene full if both available
-    products = list()
-    sources = list()
-    for _type in ['addgene-full', 'depositor-full']:
-        if len(sequence_file_url_dict[_type]) > 0:
-            for seq_url in sequence_file_url_dict[_type]:
-                new_source = source.model_copy()
-                new_source.sequence_file_url = seq_url
-                new_source.addgene_sequence_type = _type
-                sources.append(new_source)
-                # There should be only one sequence
-                products.append((await get_sequences_from_file_url(seq_url))[0])
-    if len(products) == 0:
-        # They may have only partial sequences
-        raise HTTPError(
-            url,
+    # Find the link to either the addgene-full (preferred) or depositor-full (secondary)
+    for addgene_sequence_type in ['depositor-full', 'addgene-full']:
+        if soup.find(id=addgene_sequence_type) is not None:
+            sequence_file_url = next(
+                a.get('href') for a in soup.find(id=addgene_sequence_type).findAll(class_='genbank-file-download')
+            )
+            break
+    else:
+        raise HTTPException(
             404,
-            f'The requested plasmid does not have full sequences, see https://www.addgene.org/{source.repository_id}/sequences/',
-            f'The requested plasmid does not have full sequences, see https://www.addgene.org/{source.repository_id}/sequences/',
-            None,
+            f'The requested plasmid does not have full sequences, see https://www.addgene.org/{repository_id}/sequences/',
         )
-    # Rename the plasmid
-    for p in products:
-        p.name = plasmid_name
-    return products[0], sources[0]
+    dseqr = (await get_sequences_from_file_url(sequence_file_url))[0]
+    dseqr.name = plasmid_name
+    dseqr.source = AddgeneIdSource(
+        repository_id=repository_id,
+        sequence_file_url=sequence_file_url,
+        addgene_sequence_type=addgene_sequence_type,
+    )
+    return dseqr
-async def request_from_wekwikgene(source: WekWikGeneIdSource) -> tuple[Dseqrecord, WekWikGeneIdSource]:
-    url = f'https://wekwikgene.wllsb.edu.cn/plasmids/{source.repository_id}'
+async def request_from_wekwikgene(repository_id: str) -> Dseqrecord:
+    url = f'https://wekwikgene.wllsb.edu.cn/plasmids/{repository_id}'
     async with get_http_client() as client:
         resp = await client.get(url)
     if resp.status_code == 404:
-        raise HTTPError(url, 404, 'invalid wekwikgene id', 'invalid wekwikgene id', None)
+        raise HTTPException(404, 'invalid wekwikgene id')
     soup = BeautifulSoup(resp.content, 'html.parser')
     # Get the sequence file URL from the page
     sequence_file_url = soup.find('a', text=lambda x: x and 'Download Sequence' in x).get('href')
     sequence_name = soup.find('h1', class_='plasmid__info__name').text.replace(' ', '_')
     seq = (await get_sequences_from_file_url(sequence_file_url, 'snapgene'))[0]
     seq.name = sequence_name
-    source.sequence_file_url = sequence_file_url
-    return seq, source
-async def get_seva_plasmid(source: SEVASource) -> tuple[Dseqrecord, SEVASource]:
-    if 'ncbi.nlm.nih.gov/nuccore' in source.sequence_file_url:
-        genbank_id = source.sequence_file_url.split('/')[-1]
-        seq = await get_genbank_sequence(genbank_id)
-        seq.name = source.repository_id
-    elif source.sequence_file_url.startswith('https://seva-plasmids.com'):
-        seq_list = await get_sequences_from_file_url(source.sequence_file_url)
-        if len(seq_list) == 0:
-            raise ValueError('No sequences found in SEVA file')
-        seq = seq_list[0]
+    seq.source = WekWikGeneIdSource(repository_id=repository_id, sequence_file_url=sequence_file_url)
+    return seq
+async def get_seva_plasmid(repository_id: str) -> Dseqrecord:
+    if repository_id not in seva_catalog:
+        raise HTTPException(404, 'invalid SEVA id')
+    link = seva_catalog[repository_id]
+    if 'http' not in link:
+        seq = await get_genbank_sequence(link)
     else:
-        raise HTTPError(source.sequence_file_url, 404, 'invalid SEVA url', 'invalid SEVA url', None)
+        seqs = await get_sequences_from_file_url(link)
+        seq = seqs[0]
     if not seq.circular:
         seq = seq.looped()
-    return seq, source
+    seq.name = repository_id
+    sequence_file_url = link if 'http' in link else f'https://www.ncbi.nlm.nih.gov/nuccore/{link}'
+    seq.source = SEVASource(repository_id=repository_id, sequence_file_url=sequence_file_url)
+    return seq
+async def get_sequence_from_benchling_url(url: str) -> Dseqrecord:
+    dseqs = await get_sequences_from_file_url(url)
+    dseq = dseqs[0]
+    dseq.source = BenchlingUrlSource(repository_id=url)
+    return dseq
 def correct_name(dseq: Dseqrecord):
@@ -187,57 +208,6 @@ def correct_name(dseq: Dseqrecord):
         dseq.name = dseq.annotations['keywords'][0].replace(' ', '_')
-def location_sorter(x, y) -> int:
-    """
-    Sort by start, then length, then strand.
-    """
-    if x.parts[0].start != y.parts[0].start:
-        return x.parts[0].start - y.parts[0].start
-    elif x.parts[-1].end != y.parts[-1].end:
-        return x.parts[-1].end - y.parts[-1].end
-    return x.strand - y.strand
-def get_all_regex_feature_edges(pattern: str, seq: str, is_circular: bool) -> list[tuple[int, int]]:
-    subject = 2 * seq if is_circular else seq
-    compiled_pattern = regex.compile(pattern, regex.IGNORECASE)
-    compiled_pattern_rev = regex.compile('(?r)' + pattern, regex.IGNORECASE)
-    matches = list(regex.finditer(compiled_pattern, subject, overlapped=True))
-    matches += list(regex.finditer(compiled_pattern_rev, subject, overlapped=True))
-    # In circular objects we remove the matches that span the sequence more than once: m.end() - m.start() <= len(seq)
-    return list(set([(m.start(), m.end()) for m in matches if (m.end() - m.start() <= len(seq))]))
-def find_sequence_regex(pattern: str, seq: str, is_circular: bool) -> list[Location]:
-    feature_locations = list()
-    # Strand 1
-    feature_edges = get_all_regex_feature_edges(pattern, seq, is_circular)
-    # We use shift_location to format origin-spanning features in circular DNA
-    feature_locations += [shift_location(SimpleLocation(start, end, 1), 0, len(seq)) for start, end in feature_edges]
-    # Strand -1
-    feature_edges = get_all_regex_feature_edges(pattern, reverse_complement(seq), is_circular)
-    feature_locations += [
-        shift_location(SimpleLocation(start, end, 1)._flip(len(seq)), 0, len(seq)) for start, end in feature_edges
-    ]
-    # We return a unique list, cannot use a set because Location is not hashable
-    return sorted(
-        [x for i, x in enumerate(feature_locations) if x not in feature_locations[:i]], key=cmp_to_key(location_sorter)
-    )
-# Could be useful at some point
-# def seq_overlap_length(dseq: Dseq) -> int:
-#     return len(dseq) - abs(dseq.ovhg) - abs(dseq.watson_ovhg())
 def oligonucleotide_hybridization_overhangs(
     fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
 ) -> list[int]:
@@ -327,38 +297,38 @@ def custom_file_parser(
                 )
                 out.append(Dseqrecord(parsed_seq, circular=circularize))
+    if len(out) == 0:
+        raise ValueError('No sequences found in file')
     return out
 async def get_sequence_from_euroscarf_url(plasmid_id: str) -> Dseqrecord:
     url = f'http://www.euroscarf.de/plasmid_details.php?accno={plasmid_id}'
     async with get_http_client() as client:
-        try:
-            resp = await client.get(url)
-        except ConnectError as e:
-            raise HTTPError(url, 504, 'could not connect to euroscarf', 'could not connect to euroscarf', None) from e
-    # I don't think this ever happens
-    if resp.status_code != 200:
-        raise HTTPError(
-            url, resp.status_code, 'could not connect to euroscarf', 'could not connect to euroscarf', None
-        )
+        resp = await client.get(url)
     # Use beautifulsoup to parse the html
     soup = BeautifulSoup(resp.text, 'html.parser')
     # Identify if it's an error (seems to be a php error log without a body tag)
     body_tag = soup.find('body')
     if body_tag is None:
         if 'Call to a member function getName()' in resp.text:
-            raise HTTPError(url, 404, 'invalid euroscarf id', 'invalid euroscarf id', None)
+            raise HTTPException(404, 'invalid euroscarf id')
         else:
             msg = f'Could not retrieve plasmid details, double-check the euroscarf site: {url}'
-            raise HTTPError(url, 503, msg, msg, None)
+            raise HTTPException(503, msg)
     # Get the download link
     subpath = soup.find('a', href=lambda x: x and x.startswith('files/dna'))
     if subpath is None:
         msg = f'Could not retrieve plasmid details, double-check the euroscarf site: {url}'
-        raise HTTPError(url, 503, msg, msg, None)
+        raise HTTPException(503, msg)
     genbank_url = f'http://www.euroscarf.de/{subpath.get("href")}'
-    return (await get_sequences_from_file_url(genbank_url))[0]
+    seq = (await get_sequences_from_file_url(genbank_url))[0]
+    # Sometimes the files do not contain correct topology information, so we loop them
+    if not seq.circular:
+        seq = seq.looped()
+    seq.source = EuroscarfSource(repository_id=plasmid_id)
+    return seq
 async def annotate_with_plannotate(
@@ -373,14 +343,41 @@ async def annotate_with_plannotate(
             )
             if response.status_code != 200:
                 detail = response.json().get('detail', 'plannotate server error')
-                raise HTTPError(url, response.status_code, detail, detail, None)
+                raise HTTPException(response.status_code, detail)
             data = response.json()
             dseqr = custom_file_parser(io.StringIO(data['gb_file']), 'genbank')[0]
             report = [PlannotateAnnotationReport.model_validate(r) for r in data['report']]
             return dseqr, report, data['version']
         except TimeoutException as e:
-            raise HTTPError(url, 504, 'plannotate server timeout', 'plannotate server timeout', None) from e
+            raise HTTPException(504, 'plannotate server timeout') from e
         except ConnectError as e:
-            raise HTTPError(
-                url, 500, 'cannot connect to plannotate server', 'cannot connect to plannotate server', None
-            ) from e
+            raise HTTPException(500, 'cannot connect to plannotate server') from e
+async def get_sequence_from_openDNA_collections(collection_name: str, plasmid_id: str) -> Dseqrecord:
+    if collection_name not in openDNA_collections_catalog:
+        raise HTTPException(404, 'invalid openDNA collections collection')
+    plasmid = next((item for item in openDNA_collections_catalog[collection_name] if item['id'] == plasmid_id), None)
+    if plasmid is None:
+        raise HTTPException(404, f'plasmid {plasmid_id} not found in {collection_name}')
+    path = quote(plasmid['path'])
+    url = f'https://assets.opencloning.org/open-dna-collections/{path}'
+    seqs = await get_sequences_from_file_url(url)
+    seq = seqs[0]
+    seq.name = plasmid['name'] if plasmid['name'] is not None else plasmid_id
+    seq.source = OpenDNACollectionsSource(repository_id=f'{collection_name}/{plasmid_id}', sequence_file_url=url)
+    return seq
+async def get_sequence_from_iGEM2024(part: str, backbone: str) -> Dseqrecord:
+    all_plasmids = [item for collection in iGEM2024_catalog.values() for item in collection]
+    plasmid = next((item for item in all_plasmids if item['part'] == part and item['backbone'] == backbone), None)
+    if plasmid is None:
+        raise HTTPException(404, f'plasmid {part}-{backbone} not found in iGEM 2024')
+    url = f'https://assets.opencloning.org/annotated-igem-distribution/results/plasmids/{plasmid["id"]}.gb'
+    seqs = await get_sequences_from_file_url(url)
+    seq = seqs[0]
+    seq.name = f'{part}-{backbone}'
+    seq.source = IGEMSource(repository_id=f'{part}-{backbone}', sequence_file_url=url)
+    return seq

opencloning/dna_utils.py CHANGED Viewed

@@ -4,18 +4,16 @@ Utility functions moved here to avoid circular imports.
 from Bio.Seq import reverse_complement
 from pydna.dseqrecord import Dseqrecord
-from pydna.dseq import Dseq
 import tempfile
 import subprocess
 import os
 import shutil
 from pydna.parsers import parse
-from Bio.Align import PairwiseAligner
+from Bio.Align import PairwiseAligner, Alignment
 from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
-import re
-from Bio.SeqFeature import Location, SimpleLocation
-from pydna.utils import shift_location
 from pairwise_alignments_to_msa.alignment import aligned_tuples_to_MSA
+from copy import deepcopy
+import numpy as np
 aligner = PairwiseAligner(scoring='blastn')
@@ -24,6 +22,39 @@ for normal_base in 'ACGT':
     del ambiguous_only_dna_values[normal_base]
+def get_sequence_shift(sequence: str, reference: str) -> int:
+    """Given two identical but shifted sequences, return the shift."""
+    if sequence == reference:
+        return 0
+    else:
+        result = (sequence.upper() * 2).find(reference.upper())
+        if result == -1:
+            raise ValueError('Sequence not found in reference')
+        return result % len(sequence)
+def remove_padding(alignment: Alignment, reference: str) -> (str, str):
+    """Remove the padding from the permutated sequence."""
+    new_alignment = deepcopy(alignment)
+    permutated_sequence = new_alignment.sequences[1]
+    sequence_shift = get_sequence_shift(permutated_sequence, reference)
+    padding = len(permutated_sequence) - len(reference)
+    if padding == 0:
+        return tuple(new_alignment)
+    unshifted = permutated_sequence[sequence_shift:] + permutated_sequence[:sequence_shift]
+    replaced = unshifted[:-padding] + '-' * padding
+    new_alignment.sequences[1] = replaced[-sequence_shift:] + replaced[:-sequence_shift]
+    # Remove positions in the alignment where both positions contain a dash
+    # this happens because of - matching Ns in the permutated sequence.
+    # It's not the best way to do this, but it works for now.
+    out_seqs = tuple(new_alignment)
+    seqs_array = np.array([list(s) for s in out_seqs])
+    # Drop positions where both sequences are dashes
+    seqs_array = seqs_array[:, ~np.all(seqs_array == '-', axis=0)]
+    return tuple(''.join(s) for s in seqs_array)
 def sum_is_sticky(three_prime_end: tuple[str, str], five_prime_end: tuple[str, str], partial: bool = False) -> int:
     """Return the overlap length if the 3' end of seq1 and 5' end of seq2 ends are sticky and compatible for ligation.
     Return 0 if they are not compatible."""
@@ -52,31 +83,6 @@ def sum_is_sticky(three_prime_end: tuple[str, str], five_prime_end: tuple[str, s
         return 0
-def get_alignment_shift(alignment: Dseq, shift: int) -> int:
-    """Shift the alignment by the given number of positions, ignoring gap characters (-).
-    Parameters
-    ----------
-    alignment : Dseq
-        The alignment sequence that may contain gap characters (-)
-    shift : int
-        Number of positions to shift the sequence by
-    """
-    nucleotides_shifted = 0
-    positions_shifted = 0
-    corrected_shift = shift if shift >= 0 else len(alignment) + shift
-    alignment_str = str(alignment)
-    while nucleotides_shifted != corrected_shift:
-        if alignment_str[positions_shifted] != '-':
-            nucleotides_shifted += 1
-        positions_shifted += 1
-    return positions_shifted
 def align_with_mafft(inputs: list[str], orientation_known: bool) -> list[str]:
     """Align a sanger track to a dseqr sequence"""
@@ -140,12 +146,13 @@ def align_sanger_traces(dseqr: Dseqrecord, sanger_traces: list[str]) -> list[str
     aligned_pairs = []
     for trace in sanger_traces:
         # If the sequence is circular, permutate both fwd and reverse complement
+        rc_trace = reverse_complement(trace)
         if dseqr.circular:
             fwd = permutate_trace(query_str, trace)
-            rvs = permutate_trace(query_str, reverse_complement(trace))
+            rvs = permutate_trace(query_str, rc_trace)
         else:
             fwd = trace
-            rvs = reverse_complement(trace)
+            rvs = rc_trace
         # Pairwise-align and keep the best alignment
         fwd_alignment = next(aligner.align(query_str, fwd))
@@ -153,35 +160,11 @@ def align_sanger_traces(dseqr: Dseqrecord, sanger_traces: list[str]) -> list[str
         best_alignment = fwd_alignment if fwd_alignment.score > rvs_alignment.score else rvs_alignment
-        formatted_alignment = best_alignment.format('fasta').split()[1::2]
-        aligned_pairs.append(tuple(formatted_alignment))
+        if dseqr.circular:
+            trace4padding = trace if best_alignment is fwd_alignment else rc_trace
+            formatted_alignment = remove_padding(best_alignment, trace4padding)
+        else:
+            formatted_alignment = tuple(best_alignment)
+        aligned_pairs.append(formatted_alignment)
     return aligned_tuples_to_MSA(aligned_pairs)
-def compute_regex_site(site: str) -> str:
-    upper_site = site.upper()
-    for k, v in ambiguous_only_dna_values.items():
-        if len(v) > 1:
-            upper_site = upper_site.replace(k, f"[{''.join(v)}]")
-    # Make case insensitive
-    upper_site = f'(?i){upper_site}'
-    return upper_site
-def dseqrecord_finditer(pattern: str, seq: Dseqrecord) -> list[re.Match]:
-    query = str(seq.seq) if not seq.circular else str(seq.seq) * 2
-    matches = re.finditer(pattern, query)
-    return (m for m in matches if m.start() <= len(seq))
-def create_location(start: int, end: int, lim: int) -> Location:
-    while start < 0:
-        start += lim
-    while end < 0:
-        end += lim
-    if end > start:
-        return SimpleLocation(start, end)
-    else:
-        return shift_location(SimpleLocation(start, end + lim), 0, lim)

opencloning/ebic/primer_design.py CHANGED Viewed

@@ -2,7 +2,7 @@ from pydna.dseqrecord import Dseqrecord
 from Bio.SeqFeature import SimpleLocation
 from ..primer3_functions import PrimerDesignSettings, primer3_design_primers
-from ..pydantic_models import PrimerModel
+from opencloning_linkml.datamodel import Primer as PrimerModel
 from .primer_design_settings import amanda_settings
 adapter_left_fwd = 'ataGGTCTCtGGAG'

opencloning/endpoints/annotation.py CHANGED Viewed

@@ -1,15 +1,14 @@
-from fastapi import Query, HTTPException
+from fastapi import Query
 from pydantic import create_model
-from urllib.error import HTTPError
 from ..get_router import get_router
-from ..pydantic_models import TextFileSequence, AnnotationSource
+from opencloning_linkml.datamodel import TextFileSequence, AnnotationSource
 from ..dna_functions import (
     read_dsrecord_from_json,
     annotate_with_plannotate as _annotate_with_plannotate,
     format_sequence_genbank,
 )
-from ..gateway import find_gateway_sites
+from pydna.gateway import find_gateway_sites
 from ..app_settings import settings
 router = get_router()
@@ -46,15 +45,12 @@ if settings.PLANNOTATE_URL is not None:
     ):
         input_seqr = read_dsrecord_from_json(sequence)
         # Make a request submitting sequence as a file:
-        try:
-            seqr, annotations, version = await _annotate_with_plannotate(
-                sequence.file_content,
-                f'{sequence.id}.gb',
-                settings.PLANNOTATE_URL + 'annotate',
-                settings.PLANNOTATE_TIMEOUT,
-            )
-        except HTTPError as e:
-            raise HTTPException(e.code, e.msg) from e
+        seqr, annotations, version = await _annotate_with_plannotate(
+            sequence.file_content,
+            f'{sequence.id}.gb',
+            settings.PLANNOTATE_URL + 'annotate',
+            settings.PLANNOTATE_TIMEOUT,
+        )
         source.annotation_report = annotations
         source.annotation_tool = 'plannotate'

opencloning 0.4.8__py3-none-any.whl → 0.5.0.1__py3-none-any.whl

opencloning 0.4.8py3-none-any.whl → 0.5.0.1py3-none-any.whl