PyPI - opencloning - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.8.2__py3-none-any.whl - Mend

opencloning 0.2.8py3-none-any.whl → 0.2.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

opencloning/cre_lox.py CHANGED Viewed

@@ -1,29 +1,58 @@
 from itertools import product
 from pydna.dseqrecord import Dseqrecord
+from Bio.Data.IUPACData import ambiguous_dna_values
+from Bio.Seq import reverse_complement
 from .dna_utils import compute_regex_site, dseqrecord_finditer
+# We create a dictionary to map ambiguous bases to their consensus base
+# For example, ambigous_base_dict['ACGT'] -> 'N'
+ambiguous_base_dict = {}
+for ambiguous, bases in ambiguous_dna_values.items():
+    ambiguous_base_dict[''.join(sorted(bases))] = ambiguous
+# To handle N values
+ambiguous_base_dict['N'] = 'N'
 # This is the original loxP sequence, here for reference
 LOXP_SEQUENCE = 'ATAACTTCGTATAGCATACATTATACGAAGTTAT'
-# This is a consensus sequence, from this Addgene blog post: https://blog.addgene.org/plasmids-101-cre-lox
-# IMPORTANT: Because it is palyndromic, we only look for it in the forward direction, if this was changed
-# to a non-palindromic sequence, you would need to look for matches reversing it, like in Gateway cloning
-LOXP_CONSENSUS = 'ATAACTTCGTATANNNTANNNTATACGAAGTTAT'
+loxP_sequences = [
+    # https://blog.addgene.org/plasmids-101-cre-lox
+    # loxP
+    'ATAACTTCGTATANNNTANNNTATACGAAGTTAT',
+    # PMID:12202778
+    # lox66
+    'ATAACTTCGTATANNNTANNNTATACGAACGGTA',
+    # lox71
+    'TACCGTTCGTATANNNTANNNTATACGAAGTTAT',
+]
+loxP_consensus = ''
+for pos in range(len(LOXP_SEQUENCE)):
+    all_letters = set(seq[pos] for seq in loxP_sequences)
+    key = ''.join(sorted(all_letters))
+    loxP_consensus += ambiguous_base_dict[key]
-loxP_regex = compute_regex_site(LOXP_CONSENSUS)
+# We compute the regex for the forward and reverse loxP sequences
+loxP_regex = (compute_regex_site(loxP_consensus), compute_regex_site(reverse_complement(loxP_consensus)))
 def cre_loxP_overlap(x: Dseqrecord, y: Dseqrecord, _l: None = None) -> list[tuple[int, int, int]]:
     """Find matching loxP sites between two sequences."""
     out = list()
-    matches_x = dseqrecord_finditer(loxP_regex, x)
-    matches_y = dseqrecord_finditer(loxP_regex, y)
-    for match_x, match_y in product(matches_x, matches_y):
-        value_x = match_x.group()
-        value_y = match_y.group()
-        if value_x == value_y:
-            out.append((match_x.start(), match_y.start(), len(value_x)))
-    return out
+    for pattern in loxP_regex:
+        matches_x = dseqrecord_finditer(pattern, x)
+        matches_y = dseqrecord_finditer(pattern, y)
+        for match_x, match_y in product(matches_x, matches_y):
+            value_x = match_x.group()
+            value_y = match_y.group()
+            if value_x[13:21] == value_y[13:21]:
+                out.append((match_x.start() + 13, match_y.start() + 13, 8))
+    # Unique values (keeping the order)
+    unique_out = []
+    for item in out:
+        if item not in unique_out:
+            unique_out.append(item)
+    return unique_out

opencloning/dna_functions.py CHANGED Viewed

@@ -241,16 +241,23 @@ def find_sequence_regex(pattern: str, seq: str, is_circular: bool) -> list[Locat
 def oligonucleotide_hybridization_overhangs(
     fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
 ) -> list[int]:
+    """
+    Returns possible overhangs between two oligos, and returns an error if mismatches are found.
+    see https://github.com/manulera/OpenCloning_backend/issues/302 for notation
+    """
     matches = common_sub_strings(fwd_oligo_seq.lower(), reverse_complement(rvs_oligo_seq.lower()), minimal_annealing)
-    for m in matches:
-        if not (
-            (m[0] == 0 and m[1] + m[2] == len(fwd_oligo_seq)) or (m[1] == 0 and m[0] + m[2] == len(rvs_oligo_seq))
+    for pos_fwd, pos_rvs, length in matches:
+        if (pos_fwd != 0 and pos_rvs != 0) or (
+            pos_fwd + length < len(fwd_oligo_seq) and pos_rvs + length < len(rvs_oligo_seq)
         ):
             raise ValueError('The oligonucleotides can anneal with mismatches')
     # Return possible overhangs
-    return [start_on_rvs - start_on_fwd for start_on_fwd, start_on_rvs, length in matches]
+    return [pos_rvs - pos_fwd for pos_fwd, pos_rvs, length in matches]
 class MyGenBankScanner(GenBankScanner):

opencloning/endpoints/external_import.py CHANGED Viewed

@@ -8,6 +8,8 @@ from starlette.responses import RedirectResponse
 from Bio import BiopythonParserWarning
 from typing import Annotated
 from urllib.error import HTTPError
+from pydna.utils import location_boundaries
 from ..get_router import get_router
 from ..pydantic_models import (
     TextFileSequence,
@@ -22,6 +24,7 @@ from ..pydantic_models import (
     GenomeCoordinatesSource,
     SequenceFileFormat,
     SEVASource,
+    SimpleSequenceLocation,
 )
 from ..dna_functions import (
     format_sequence_genbank,
@@ -51,13 +54,13 @@ router = get_router()
             'description': 'The sequence was successfully parsed',
             'headers': {
                 'x-warning': {
-                    'description': 'A warning returned if the file can be read but is not in the expected format',
+                    'description': 'A warning returned if the file can be read but is not in the expected format or if some sequences were not extracted because they are incompatible with the provided coordinates',
                     'schema': {'type': 'string'},
                 },
             },
         },
         422: {
-            'description': 'Biopython cannot process this file.',
+            'description': 'Biopython cannot process this file or provided coordinates are invalid.',
         },
         404: {
             'description': 'The index_in_file is out of range.',
@@ -83,6 +86,12 @@ async def read_from_file(
         None,
         description='Name of the output sequence',
     ),
+    start: int | None = Query(None, description='Start position of the sequence to read (0-based)', ge=0),
+    end: int | None = Query(
+        None,
+        description='End position of the sequence to read (0-based)',
+        ge=0,
+    ),
 ):
     """Return a json sequence from a sequence file"""
@@ -107,6 +116,7 @@ async def read_from_file(
         sequence_file_format = SequenceFileFormat(extension_dict[extension])
     dseqs = list()
+    warning_messages = list()
     file_content = await file.read()
     if sequence_file_format == 'snapgene':
@@ -124,7 +134,6 @@ async def read_from_file(
         if warnings_captured:
             warning_messages = [str(w.message) for w in warnings_captured]
-            response.headers['x-warning'] = '; '.join(warning_messages)
     except ValueError as e:
         raise HTTPException(422, f'Biopython cannot process this file: {e}.')
@@ -134,25 +143,62 @@ async def read_from_file(
     if len(dseqs) == 0:
         raise HTTPException(422, 'Biopython cannot process this file.')
+    if index_in_file is not None:
+        if index_in_file >= len(dseqs):
+            raise HTTPException(404, 'The index_in_file is out of range.')
+        dseqs = [dseqs[index_in_file]]
+    seq_feature = None
+    if start is not None and end is not None:
+        seq_feature = SimpleSequenceLocation(start=start, end=end)
+        extracted_sequences = list()
+        for dseq in dseqs:
+            try:
+                # TODO: We could use extract when this is addressed: https://github.com/biopython/biopython/issues/4989
+                location = seq_feature.to_biopython_location(circular=dseq.circular, seq_len=len(dseq))
+                i, j = location_boundaries(location)
+                extracted_sequence = dseq[i:j]
+                # Only add the sequence if the interval is not out of bounds
+                if len(extracted_sequence) == len(location):
+                    extracted_sequences.append(extracted_sequence)
+                else:
+                    extracted_sequences.append(None)
+            except Exception:
+                extracted_sequences.append(None)
+        dseqs = extracted_sequences
     # The common part
-    # TODO: using id=0 is not great
     parent_source = UploadedFileSource(
-        id=0, sequence_file_format=sequence_file_format, file_name=file.filename, circularize=circularize
+        id=0,
+        sequence_file_format=sequence_file_format,
+        file_name=file.filename,
+        circularize=circularize,
+        coordinates=seq_feature,
     )
+    # If coordinates are provided, we only keep the sequences compatible with those coordinates
     out_sources = list()
+    out_sequences = list()
     for i in range(len(dseqs)):
+        if dseqs[i] is None:
+            continue
         new_source = parent_source.model_copy()
-        new_source.index_in_file = i
+        new_source.index_in_file = index_in_file if index_in_file is not None else i
         out_sources.append(new_source)
+        out_sequences.append(format_sequence_genbank(dseqs[i], output_name))
-    out_sequences = [format_sequence_genbank(s, output_name) for s in dseqs]
+    if len(out_sequences) == 0:
+        raise HTTPException(422, 'Provided coordinates are incompatible with sequences in the file.')
-    if index_in_file is not None:
-        if index_in_file >= len(out_sources):
-            raise HTTPException(404, 'The index_in_file is out of range.')
-        return {'sequences': [out_sequences[index_in_file]], 'sources': [out_sources[index_in_file]]}
-    else:
-        return {'sequences': out_sequences, 'sources': out_sources}
+    if len(out_sequences) < len(dseqs):
+        warning_messages.append(
+            'Some sequences were not extracted because they are incompatible with the provided coordinates.'
+        )
+    if len(warning_messages) > 0:
+        response.headers['x-warning'] = '; '.join(warning_messages)
+    return {'sequences': out_sequences, 'sources': out_sources}
 # TODO: a bit inconsistent that here you don't put {source: {...}} in the request, but

{opencloning-0.2.8.dist-info → opencloning-0.2.8.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: opencloning
-Version: 0.2.8
+Version: 0.2.8.2
 Summary: Backend of OpenCloning, a web application to generate molecular cloning strategies in json format, and share them with others.
 License: MIT
 Author: Manuel Lera-Ramirez
@@ -15,7 +15,7 @@ Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
 Requires-Dist: biopython (==1.84)
 Requires-Dist: fastapi
 Requires-Dist: httpx (>=0.25.0,<0.26.0)
-Requires-Dist: opencloning-linkml (==0.2.6a0)
+Requires-Dist: opencloning-linkml (==0.2.6.1a0)
 Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
 Requires-Dist: pandas (>=2.2.3,<3.0.0)
 Requires-Dist: primer3-py (>=2.0.3,<3.0.0)

{opencloning-0.2.8.dist-info → opencloning-0.2.8.2.dist-info}/RECORD RENAMED Viewed

@@ -19,15 +19,15 @@ opencloning/batch_cloning/pombe/pombe_summary.py,sha256=W9DLpnCuwK7w2DhHLu60N7L6
 opencloning/batch_cloning/ziqiang_et_al2024/__init__.py,sha256=zZUbj3uMzd9rKMXi5s9LQ1yUg7sccdS0f_4kpw7SQlk,7584
 opencloning/batch_cloning/ziqiang_et_al2024/index.html,sha256=EDncANDhhQkhi5FjnnAP6liHkG5srf4_Y46IrnMUG5g,4607
 opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json,sha256=mB81j2qWam7uRc-980YFjfqq2CiWTXJYfKFAoKuGtRw,157148
-opencloning/cre_lox.py,sha256=ocPx3EVkecoZjHx_ENhk5pEteRXRtiN5z5URmrIcCPw,1194
-opencloning/dna_functions.py,sha256=W-SxEfvYpN1JVZbTeCNitpQXkazEHvFyqZBUndd-jpY,16329
+opencloning/cre_lox.py,sha256=mb2ZddjrPIrUBT3xxMub5-c97WkKZ4Z-HkGFVzuR8pQ,2031
+opencloning/dna_functions.py,sha256=ivepJM2wRTIW0ArSiQ5s-XuqBd69giEQijaWXXGT64E,16536
 opencloning/dna_utils.py,sha256=uv97aO04dbk3NnqbN6GlnwOu0MOpK88rl2np2QcEQ4Y,6301
 opencloning/ebic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 opencloning/ebic/primer_design.py,sha256=gPZTF9w5SV7WGgnefp_HBM831y0z73M1Kb0QUPnbfIM,2270
 opencloning/ebic/primer_design_settings.py,sha256=OnFsuh0QCvplUEPXLZouzRo9R7rm4nLbcd2LkDCiIDM,1896
 opencloning/endpoints/annotation.py,sha256=3rlIXeNQzoqPD9lJUEBGLGxvlhUCTcfkqno814A8P0U,2283
 opencloning/endpoints/assembly.py,sha256=H1b7CRx1JZ5pcUGd3uyJG2syYugkXiIo8HRCA11TQfE,20704
-opencloning/endpoints/external_import.py,sha256=dDG7DiNb8WYE46nLGnkyRbGVVNUDXp3h0_1ixsJAh5o,16242
+opencloning/endpoints/external_import.py,sha256=DG8WSvyvr-9xy-odEwLHHA4FWiIh8sw4DvTblw5NCYc,18179
 opencloning/endpoints/no_assembly.py,sha256=NY6rhEDCNoZVn6Xk81cen2n-FkMr7ierfxM8G0npbQs,4722
 opencloning/endpoints/no_input.py,sha256=DuqKD3Ph3a44ZxPMEzZv1nwD5xlxYsN7YyxXcfjSUFc,3844
 opencloning/endpoints/other.py,sha256=TzfCJLDmZFWeKYxKhEfXOvlQrWWyBIGJ5FR0yA7tuvI,1673
@@ -40,7 +40,7 @@ opencloning/primer_design.py,sha256=nqCmYIZ7UvU4CQwVGJwX7T5LTHwt3-51_ZcTZZAgT_Y,
 opencloning/pydantic_models.py,sha256=gsipVXhjQOXVz2NL-MiNpLuOZYDVo2Pli9F--bp6tjs,15345
 opencloning/request_examples.py,sha256=QAsJxVaq5tHwlPB404IiJ9WC6SA7iNY7XnJm63BWT_E,2944
 opencloning/utils.py,sha256=wsdTJYliap-t3oa7yQE3pWDa1CR19mr5lUQfocp4hoM,1875
-opencloning-0.2.8.dist-info/LICENSE,sha256=VSdVE1f8axjIh6gvo9ZZygJdTVkRFMcwCW_hvjOHC_w,1058
-opencloning-0.2.8.dist-info/METADATA,sha256=0kyQ2RhJcsCrkjRR6usNPg4LswxSYq71A61MY0ro0Yw,8425
-opencloning-0.2.8.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-opencloning-0.2.8.dist-info/RECORD,,
+opencloning-0.2.8.2.dist-info/LICENSE,sha256=VSdVE1f8axjIh6gvo9ZZygJdTVkRFMcwCW_hvjOHC_w,1058
+opencloning-0.2.8.2.dist-info/METADATA,sha256=OrcZ2VMjjkWI31tB4B1tEa6GgIxBFlOiGWxmA_8eK6A,8429
+opencloning-0.2.8.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+opencloning-0.2.8.2.dist-info/RECORD,,

{opencloning-0.2.8.dist-info → opencloning-0.2.8.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.2
+Generator: poetry-core 2.1.3
 Root-Is-Purelib: true
 Tag: py3-none-any

{opencloning-0.2.8.dist-info → opencloning-0.2.8.2.dist-info}/LICENSE RENAMED Viewed

File without changes

opencloning 0.2.8__py3-none-any.whl → 0.2.8.2__py3-none-any.whl

opencloning 0.2.8py3-none-any.whl → 0.2.8.2py3-none-any.whl