PyPI - opencloning - Versions diffs - 0.4.8__py3-none-any.whl → 0.5__py3-none-any.whl - Mend

opencloning 0.4.8py3-none-any.whl → 0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

opencloning/app_settings.py +7 -0
opencloning/batch_cloning/pombe/__init__.py +2 -2
opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
opencloning/bug_fixing/backend_v0_3.py +13 -5
opencloning/catalogs/__init__.py +36 -0
opencloning/catalogs/igem2024.yaml +2172 -0
opencloning/catalogs/openDNA_collections.yaml +1161 -0
opencloning/catalogs/readme.txt +1 -0
opencloning/catalogs/seva.tsv +231 -0
opencloning/catalogs/snapgene.yaml +2837 -0
opencloning/dna_functions.py +155 -158
opencloning/dna_utils.py +45 -62
opencloning/ebic/primer_design.py +1 -1
opencloning/endpoints/annotation.py +9 -13
opencloning/endpoints/assembly.py +157 -378
opencloning/endpoints/endpoint_utils.py +52 -0
opencloning/endpoints/external_import.py +169 -124
opencloning/endpoints/no_assembly.py +23 -39
opencloning/endpoints/no_input.py +32 -47
opencloning/endpoints/other.py +1 -1
opencloning/endpoints/primer_design.py +2 -1
opencloning/http_client.py +2 -2
opencloning/ncbi_requests.py +113 -47
opencloning/primer_design.py +1 -1
opencloning/pydantic_models.py +10 -510
opencloning/request_examples.py +10 -22
opencloning/temp_functions.py +50 -0
{opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/METADATA +18 -8
opencloning-0.5.dist-info/RECORD +51 -0
{opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/WHEEL +1 -1
opencloning/cre_lox.py +0 -116
opencloning/gateway.py +0 -154
opencloning-0.4.8.dist-info/RECORD +0 -45
{opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/licenses/LICENSE +0 -0

opencloning/endpoints/no_input.py CHANGED Viewed

@@ -1,19 +1,22 @@
 from fastapi import Query, HTTPException
 from pydna.dseqrecord import Dseqrecord
 from pydna.dseq import Dseq
+from pydna.primer import Primer as PydnaPrimer
+from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization as _oligonucleotide_hybridization
 from pydantic import create_model, Field
 from typing import Annotated
+from opencloning.endpoints.endpoint_utils import format_products
 from ..dna_functions import (
     format_sequence_genbank,
-    oligonucleotide_hybridization_overhangs,
 )
-from ..pydantic_models import (
-    PrimerModel,
+from opencloning_linkml.datamodel import (
+    Primer as PrimerModel,
     TextFileSequence,
     ManuallyTypedSource,
     OligoHybridizationSource,
-    SourceInput,
+    ManuallyTypedSequence,
 )
 from .. import request_examples
@@ -28,14 +31,14 @@ router = get_router()
         'ManuallyTypedResponse', sources=(list[ManuallyTypedSource], ...), sequences=(list[TextFileSequence], ...)
     ),
 )
-async def manually_typed(source: ManuallyTypedSource):
+async def manually_typed(source: ManuallyTypedSource, sequence: ManuallyTypedSequence):
     """Return the sequence from a manually typed sequence"""
-    if source.circular:
-        seq = Dseqrecord(source.user_input, circular=source.circular)
+    if sequence.circular:
+        seq = Dseqrecord(sequence.sequence, circular=sequence.circular)
     else:
         seq = Dseqrecord(
             Dseq.from_full_sequence_and_overhangs(
-                source.user_input, source.overhang_crick_3prime, source.overhang_watson_3prime
+                sequence.sequence, sequence.overhang_crick_3prime, sequence.overhang_watson_3prime
             )
         )
     return {'sequences': [format_sequence_genbank(seq, source.output_name)], 'sources': [source]}
@@ -59,57 +62,39 @@ async def oligonucleotide_hybridization(
     primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
     minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
 ):
     if len(source.input):
-        watson_seq = next((p.sequence for p in primers if p.id == source.input[0].sequence), None)
-        crick_seq = next((p.sequence for p in primers if p.id == source.input[1].sequence), None)
+        fwd_primer = next((p for p in primers if p.id == source.input[0].sequence), None)
+        rvs_primer = next((p for p in primers if p.id == source.input[1].sequence), None)
     else:
-        watson_seq = primers[0].sequence
-        crick_seq = primers[1].sequence if len(primers) > 1 else watson_seq
-        source.input = [SourceInput(sequence=primers[0].id), SourceInput(sequence=primers[1].id)]
+        fwd_primer = primers[0]
+        rvs_primer = primers[1] if len(primers) > 1 else fwd_primer
-    if watson_seq is None or crick_seq is None:
+    if fwd_primer is None or rvs_primer is None:
         raise HTTPException(404, 'Invalid oligo id.')
-    # The overhang is provided
+    fwd_primer = PydnaPrimer(fwd_primer.sequence, id=str(fwd_primer.id), name=fwd_primer.name)
+    rvs_primer = PydnaPrimer(rvs_primer.sequence, id=str(rvs_primer.id), name=rvs_primer.name)
+    # If the overhang is provided, the minimal annealing is set from that
     if source.overhang_crick_3prime is not None:
-        ovhg_watson = len(watson_seq) - len(crick_seq) + source.overhang_crick_3prime
-        minimal_annealing = len(watson_seq)
+        ovhg_watson = len(fwd_primer.seq) - len(rvs_primer.seq) + source.overhang_crick_3prime
+        minimal_annealing = len(fwd_primer.seq)
         if source.overhang_crick_3prime < 0:
             minimal_annealing += source.overhang_crick_3prime
         if ovhg_watson > 0:
             minimal_annealing -= ovhg_watson
     try:
-        possible_overhangs = oligonucleotide_hybridization_overhangs(watson_seq, crick_seq, minimal_annealing)
+        dseqs = _oligonucleotide_hybridization(fwd_primer, rvs_primer, minimal_annealing)
     except ValueError as e:
         raise HTTPException(400, *e.args)
-    if len(possible_overhangs) == 0:
-        raise HTTPException(400, 'No pair of annealing oligos was found. Try changing the annealing settings.')
-    if source.overhang_crick_3prime is not None:
-        if source.overhang_crick_3prime not in possible_overhangs:
-            raise HTTPException(400, 'The provided overhang is not compatible with the primers.')
-        return {
-            'sources': [source],
-            'sequences': [
-                format_sequence_genbank(
-                    Dseqrecord(Dseq(watson_seq, crick_seq, source.overhang_crick_3prime)), source.output_name
-                )
-            ],
-        }
-    out_sources = list()
-    out_sequences = list()
-    for overhang in possible_overhangs:
-        new_source = source.model_copy()
-        new_source.overhang_crick_3prime = overhang
-        out_sources.append(new_source)
-        out_sequences.append(
-            format_sequence_genbank(
-                Dseqrecord(Dseq(watson_seq, crick_seq, new_source.overhang_crick_3prime)), source.output_name
-            )
-        )
-    return {'sources': out_sources, 'sequences': out_sequences}
+    return format_products(
+        source.id,
+        dseqs,
+        source if source.overhang_crick_3prime is not None else None,
+        source.output_name,
+        no_products_error_message='No pair of annealing oligos was found. Try changing the annealing settings.',
+        wrong_completed_source_error_message='The provided source is not valid.',
+    )

opencloning/endpoints/other.py CHANGED Viewed

@@ -13,9 +13,9 @@ from ..dna_functions import (
 )
 from ..dna_utils import align_sanger_traces
 from ..pydantic_models import (
-    TextFileSequence,
     BaseCloningStrategy,
 )
+from opencloning_linkml.datamodel import TextFileSequence
 from ..get_router import get_router
 from .._version import __version__ as backend_version

opencloning/endpoints/primer_design.py CHANGED Viewed

@@ -5,7 +5,8 @@ from Bio.Restriction import RestrictionBatch
 from Bio.SeqUtils import gc_fraction
 from ..dna_functions import get_invalid_enzyme_names
-from ..pydantic_models import PrimerModel, PrimerDesignQuery
+from opencloning_linkml.datamodel import Primer as PrimerModel
+from opencloning.pydantic_models import PrimerDesignQuery
 from ..dna_functions import read_dsrecord_from_json
 from ..primer_design import (
     homologous_recombination_primers,

opencloning/http_client.py CHANGED Viewed

@@ -7,7 +7,7 @@ from httpx import (  # noqa: F401
     AsyncHTTPTransport,
     Request,
 )
-from urllib.error import HTTPError
+from fastapi import HTTPException
 import ssl
 import certifi
 from .app_settings import settings
@@ -22,7 +22,7 @@ class AllowedExternalUrlsTransport(AsyncHTTPTransport):
     async def handle_async_request(self, request: Request) -> Response:
         if any(str(request.url).startswith(url) for url in allowed_external_urls):
             return await super().handle_async_request(request)
-        raise HTTPError(request.url, 403, f'Request to {request.url} is not allowed', None, None)
+        raise HTTPException(403, f'Request to {request.url} is not allowed')
 proxy = None

opencloning/ncbi_requests.py CHANGED Viewed

@@ -1,6 +1,9 @@
 from fastapi import HTTPException
-from pydna.parsers import parse as pydna_parse
+import math
 from pydna.dseqrecord import Dseqrecord
+from pydna.opencloning_models import GenomeCoordinatesSource, NCBISequenceSource
+from Bio.SeqFeature import Location
 from .app_settings import settings
 from .http_client import get_http_client, Response
@@ -9,7 +12,14 @@ headers = None if settings.NCBI_API_KEY is None else {'api_key': settings.NCBI_A
 async def async_get(url, headers, params=None) -> Response:
     async with get_http_client() as client:
-        return await client.get(url, headers=headers, params=params, timeout=20.0)
+        resp = await client.get(url, headers=headers, params=params, timeout=20.0)
+        if resp.status_code == 500:
+            raise HTTPException(503, 'NCBI is down, try again later')
+        elif resp.status_code == 503:
+            raise HTTPException(503, 'NCBI returned an internal server error')
+        elif resp.status_code != 200 and not math.floor(resp.status_code / 100) == 4:
+            raise HTTPException(503, 'NCBI returned an unexpected error')
+        return resp
 # TODO: this does not return old assembly accessions, see https://github.com/ncbi/datasets/issues/380#issuecomment-2231142816
@@ -43,23 +53,11 @@ async def get_sequence_accessions_from_assembly_accession(assembly_accession: st
 async def get_annotation_from_locus_tag(locus_tag: str, assembly_accession: str) -> dict:
-    url = f'https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/{assembly_accession}/annotation_report?search_text={locus_tag}'
-    resp = await async_get(url, headers=headers)
-    if resp.status_code == 404:
-        raise HTTPException(404, 'wrong accession number')
-    data = resp.json()
-    if 'reports' not in data:
-        raise HTTPException(404, 'wrong locus_tag')
-    matching_annotations = list(a['annotation'] for a in data['reports'] if a['annotation']['locus_tag'] == locus_tag)
-    if len(matching_annotations) == 0:
-        raise HTTPException(404, 'wrong locus_tag')
-    elif len(matching_annotations) > 1:
-        # Not sure if this can ever happen, but just in case
+    annotations = await get_annotations_from_query(locus_tag, assembly_accession)
+    locus_tag_annotations = [a for a in annotations if locus_tag.upper() in a['locus_tag'].upper()]
+    if len(locus_tag_annotations) != 1:
         raise HTTPException(400, 'multiple matches for locus_tag')
-    return matching_annotations[0]
+    return locus_tag_annotations[0]
 async def get_annotations_from_query(query: str, assembly_accession: str) -> list[dict]:
@@ -72,9 +70,6 @@ async def get_annotations_from_query(query: str, assembly_accession: str) -> lis
     if 'reports' not in data:
         raise HTTPException(404, f'query "{query}" gave no results')
-    if len(data['reports']) > 1:
-        raise HTTPException(400, 'multiple matches for query')
     return [r['annotation'] for r in data['reports']]
@@ -94,6 +89,12 @@ async def get_sequence_length_from_sequence_accession(sequence_accession: str) -
 async def get_genbank_sequence(sequence_accession, start=None, end=None, strand=None) -> Dseqrecord:
+    from opencloning.dna_functions import get_sequences_from_file_url
+    # Ensure that start, end, and strand are either all None or none are None
+    if (start is None or end is None or strand is None) and not (start is None and end is None and strand is None):
+        raise ValueError('start, end, and strand must either all be None or none be None')
     gb_strand = 1 if strand == 1 or strand is None else 2
     url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
     params = {
@@ -108,30 +109,95 @@ async def get_genbank_sequence(sequence_accession, start=None, end=None, strand=
     if headers is not None:
         params['api_key'] = headers['api_key']
-    resp = await async_get(url, headers=headers, params=params)
-    if resp.status_code == 200:
-        try:
-            return pydna_parse(resp.text)[0]
-        except Exception:
-            # Now the ncbi returns something like this:
-            # Example: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=blah&rettype=gbwithparts&retmode=text
-            # 'Error: F a i l e d  t o  u n d e r s t a n d  i d :  b l a h '
-            raise HTTPException(404, 'wrong sequence accession')
-    elif resp.status_code == 400:
-        raise HTTPException(404, 'wrong sequence accession')
-    elif resp.status_code == 503:
-        raise HTTPException(503, 'NCBI returned an error')
+    try:
+        seq = (await get_sequences_from_file_url(url, params=params, headers=headers, get_function=async_get))[0]
+    except HTTPException as e:
+        # Now the ncbi returns something like this:
+        # Example: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=blah&rettype=gbwithparts&retmode=text
+        # 'Error: F a i l e d  t o  u n d e r s t a n d  i d :  b l a h '
+        if 'No sequences found in file' in e.detail:
+            raise HTTPException(404, 'invalid sequence accession') from e
+        raise e
+    except Exception as e:
+        raise e
+    if start is not None:
+        if strand == -1:
+            location = Location.fromstring(f'complement({start}..{end})')
+        else:
+            location = Location.fromstring(f'{start}..{end}')
     else:
-        raise HTTPException(500, 'NCBI returned an unexpected error')
-def validate_coordinates_pre_request(start, end, strand):
-    # TODO: move this to the class
-    if strand not in [1, -1]:
-        raise HTTPException(422, 'strand must be 1 or -1')
-    if start >= end:
-        raise HTTPException(422, 'start must be less than end')
-    if start < 1:
-        raise HTTPException(422, 'start must be greater than 0')
-    if end - start > 100000:
-        raise HTTPException(400, 'sequence is too long (max 100000 bp)')
+        location = None
+    seq.source = NCBISequenceSource(repository_id=sequence_accession, coordinates=location)
+    return seq
+def get_info_from_annotation(annotation: dict) -> dict:
+    start = int(annotation['genomic_regions'][0]['gene_range']['range'][0]['begin'])
+    end = int(annotation['genomic_regions'][0]['gene_range']['range'][0]['end'])
+    strand = 1 if annotation['genomic_regions'][0]['gene_range']['range'][0]['orientation'] == 'plus' else -1
+    sequence_accession = annotation['genomic_regions'][0]['gene_range']['accession_version']
+    locus_tag = annotation['locus_tag'] if 'locus_tag' in annotation else None
+    gene_id = int(annotation['gene_id']) if 'gene_id' in annotation else None
+    try:
+        assembly_accession = annotation['annotations'][0]['assembly_accession']
+    except KeyError:
+        assembly_accession = None
+    except IndexError:
+        assembly_accession = None
+    return start, end, strand, gene_id, sequence_accession, locus_tag, assembly_accession
+async def validate_locus_tag(
+    locus_tag: str, assembly_accession: str, gene_id: int | None, start: int, end: int, strand: int
+) -> int:
+    """
+    Validate that the locus tag exists in the assembly and that the gene falls within the requested coordinates.
+    Returns gene_id for convenience.
+    """
+    annotation = await get_annotation_from_locus_tag(locus_tag, assembly_accession)
+    gene_start, gene_end, gene_strand, gene_id_annotation, *_ = get_info_from_annotation(annotation)
+    # This field will not be present in all cases, but should be there in reference genomes
+    if gene_id is not None:
+        if 'gene_id' not in annotation:
+            raise HTTPException(400, 'gene_id is set, but not found in the annotation')
+        if gene_id != gene_id_annotation:
+            raise HTTPException(400, 'gene_id does not match the locus_tag')
+    elif 'gene_id' in annotation:
+        gene_id = gene_id_annotation
+    # The gene should fall within the range (range might be bigger if bases were requested upstream or downstream)
+    if gene_start < start or gene_end > end or gene_strand != strand:
+        raise HTTPException(
+            400,
+            f'wrong coordinates, the gene should fall within the requested coordinates, {start}, {end} on strand: {strand}',
+        )
+    return gene_id
+async def get_genome_region_from_annotation(
+    annotation: dict, padding_left: int = 0, padding_right: int = 0
+) -> Dseqrecord:
+    start, end, strand, gene_id, sequence_accession, locus_tag, assembly_accession = get_info_from_annotation(
+        annotation
+    )
+    start = start - padding_left
+    end = end + padding_right
+    seq = await get_genbank_sequence(sequence_accession, start, end, strand)
+    location_str = f'{start}..{end}' if strand != -1 else f'complement({start}..{end})'
+    coordinates = Location.fromstring(location_str)
+    source = GenomeCoordinatesSource(
+        assembly_accession=assembly_accession,
+        repository_id=sequence_accession,
+        coordinates=coordinates,
+        locus_tag=locus_tag,
+        gene_id=gene_id,
+    )
+    seq.name = locus_tag
+    seq.source = source
+    return seq

opencloning/primer_design.py CHANGED Viewed

@@ -3,12 +3,12 @@ from pydna.design import primer_design, assembly_fragments
 from Bio.SeqFeature import SimpleLocation
 from pydna.utils import locations_overlap, shift_location, location_boundaries
 from pydna.amplicon import Amplicon
-from .pydantic_models import PrimerModel
 from Bio.Seq import reverse_complement
 from Bio.Restriction.Restriction import RestrictionType
 from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
 from typing import Callable
 from .primer3_functions import primer3_calc_tm, PrimerDesignSettings
+from opencloning_linkml.datamodel import Primer as PrimerModel
 ambiguous_dna_values = _ambiguous_dna_values.copy()
 # Remove acgt

opencloning 0.4.8__py3-none-any.whl → 0.5__py3-none-any.whl

opencloning 0.4.8py3-none-any.whl → 0.5py3-none-any.whl