PyPI - opencloning - Versions diffs - 0.2.8__tar.gz → 0.2.8.1__tar.gz - Mend

opencloning 0.2.8tar.gz → 0.2.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{opencloning-0.2.8 → opencloning-0.2.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: opencloning
-Version: 0.2.8
+Version: 0.2.8.1
 Summary: Backend of OpenCloning, a web application to generate molecular cloning strategies in json format, and share them with others.
 License: MIT
 Author: Manuel Lera-Ramirez
@@ -15,7 +15,7 @@ Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
 Requires-Dist: biopython (==1.84)
 Requires-Dist: fastapi
 Requires-Dist: httpx (>=0.25.0,<0.26.0)
-Requires-Dist: opencloning-linkml (==0.2.6a0)
+Requires-Dist: opencloning-linkml (==0.2.6.1a0)
 Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
 Requires-Dist: pandas (>=2.2.3,<3.0.0)
 Requires-Dist: primer3-py (>=2.0.3,<3.0.0)

{opencloning-0.2.8 → opencloning-0.2.8.1}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ authors = ["Manuel Lera-Ramirez <manulera14@gmail.com>"]
 description = "Backend of OpenCloning, a web application to generate molecular cloning strategies in json format, and share them with others."
 license = "MIT"
 name = "opencloning"
-version = "v0.2.8"
+version = "v0.2.8.1"
 package-mode = true
 readme = "README.md"
 repository = "https://github.com/manulera/OpenCloning_backend"
@@ -22,7 +22,7 @@ pydantic = "^2.7.1"
 pandas = "^2.2.3"
 openpyxl = "^3.1.5"
 pyyaml = "^6.0.2"
-opencloning-linkml = "0.2.6a0"
+opencloning-linkml = "0.2.6.1a0"
 primer3-py = "^2.0.3"
 biopython = "1.84"

opencloning-0.2.8.1/src/opencloning/cre_lox.py ADDED Viewed

@@ -0,0 +1,58 @@
+from itertools import product
+from pydna.dseqrecord import Dseqrecord
+from Bio.Data.IUPACData import ambiguous_dna_values
+from Bio.Seq import reverse_complement
+from .dna_utils import compute_regex_site, dseqrecord_finditer
+# We create a dictionary to map ambiguous bases to their consensus base
+# For example, ambigous_base_dict['ACGT'] -> 'N'
+ambiguous_base_dict = {}
+for ambiguous, bases in ambiguous_dna_values.items():
+    ambiguous_base_dict[''.join(sorted(bases))] = ambiguous
+# To handle N values
+ambiguous_base_dict['N'] = 'N'
+# This is the original loxP sequence, here for reference
+LOXP_SEQUENCE = 'ATAACTTCGTATAGCATACATTATACGAAGTTAT'
+loxP_sequences = [
+    # https://blog.addgene.org/plasmids-101-cre-lox
+    # loxP
+    'ATAACTTCGTATANNNTANNNTATACGAAGTTAT',
+    # PMID:12202778
+    # lox66
+    'ATAACTTCGTATANNNTANNNTATACGAACGGTA',
+    # lox71
+    'TACCGTTCGTATANNNTANNNTATACGAAGTTAT',
+]
+loxP_consensus = ''
+for pos in range(len(LOXP_SEQUENCE)):
+    all_letters = set(seq[pos] for seq in loxP_sequences)
+    key = ''.join(sorted(all_letters))
+    loxP_consensus += ambiguous_base_dict[key]
+# We compute the regex for the forward and reverse loxP sequences
+loxP_regex = (compute_regex_site(loxP_consensus), compute_regex_site(reverse_complement(loxP_consensus)))
+def cre_loxP_overlap(x: Dseqrecord, y: Dseqrecord, _l: None = None) -> list[tuple[int, int, int]]:
+    """Find matching loxP sites between two sequences."""
+    out = list()
+    for pattern in loxP_regex:
+        matches_x = dseqrecord_finditer(pattern, x)
+        matches_y = dseqrecord_finditer(pattern, y)
+        for match_x, match_y in product(matches_x, matches_y):
+            value_x = match_x.group()
+            value_y = match_y.group()
+            if value_x[13:21] == value_y[13:21]:
+                out.append((match_x.start() + 13, match_y.start() + 13, 8))
+    # Unique values (keeping the order)
+    unique_out = []
+    for item in out:
+        if item not in unique_out:
+            unique_out.append(item)
+    return unique_out

{opencloning-0.2.8 → opencloning-0.2.8.1}/src/opencloning/endpoints/external_import.py RENAMED Viewed

@@ -8,6 +8,8 @@ from starlette.responses import RedirectResponse
 from Bio import BiopythonParserWarning
 from typing import Annotated
 from urllib.error import HTTPError
+from pydna.utils import location_boundaries
 from ..get_router import get_router
 from ..pydantic_models import (
     TextFileSequence,
@@ -22,6 +24,7 @@ from ..pydantic_models import (
     GenomeCoordinatesSource,
     SequenceFileFormat,
     SEVASource,
+    SimpleSequenceLocation,
 )
 from ..dna_functions import (
     format_sequence_genbank,
@@ -51,13 +54,13 @@ router = get_router()
             'description': 'The sequence was successfully parsed',
             'headers': {
                 'x-warning': {
-                    'description': 'A warning returned if the file can be read but is not in the expected format',
+                    'description': 'A warning returned if the file can be read but is not in the expected format or if some sequences were not extracted because they are incompatible with the provided coordinates',
                     'schema': {'type': 'string'},
                 },
             },
         },
         422: {
-            'description': 'Biopython cannot process this file.',
+            'description': 'Biopython cannot process this file or provided coordinates are invalid.',
         },
         404: {
             'description': 'The index_in_file is out of range.',
@@ -83,6 +86,12 @@ async def read_from_file(
         None,
         description='Name of the output sequence',
     ),
+    start: int | None = Query(None, description='Start position of the sequence to read (0-based)', ge=0),
+    end: int | None = Query(
+        None,
+        description='End position of the sequence to read (0-based)',
+        ge=0,
+    ),
 ):
     """Return a json sequence from a sequence file"""
@@ -107,6 +116,7 @@ async def read_from_file(
         sequence_file_format = SequenceFileFormat(extension_dict[extension])
     dseqs = list()
+    warning_messages = list()
     file_content = await file.read()
     if sequence_file_format == 'snapgene':
@@ -124,7 +134,6 @@ async def read_from_file(
         if warnings_captured:
             warning_messages = [str(w.message) for w in warnings_captured]
-            response.headers['x-warning'] = '; '.join(warning_messages)
     except ValueError as e:
         raise HTTPException(422, f'Biopython cannot process this file: {e}.')
@@ -134,25 +143,62 @@ async def read_from_file(
     if len(dseqs) == 0:
         raise HTTPException(422, 'Biopython cannot process this file.')
+    if index_in_file is not None:
+        if index_in_file >= len(dseqs):
+            raise HTTPException(404, 'The index_in_file is out of range.')
+        dseqs = [dseqs[index_in_file]]
+    seq_feature = None
+    if start is not None and end is not None:
+        seq_feature = SimpleSequenceLocation(start=start, end=end)
+        extracted_sequences = list()
+        for dseq in dseqs:
+            try:
+                # TODO: We could use extract when this is addressed: https://github.com/biopython/biopython/issues/4989
+                location = seq_feature.to_biopython_location(circular=dseq.circular, seq_len=len(dseq))
+                i, j = location_boundaries(location)
+                extracted_sequence = dseq[i:j]
+                # Only add the sequence if the interval is not out of bounds
+                if len(extracted_sequence) == len(location):
+                    extracted_sequences.append(extracted_sequence)
+                else:
+                    extracted_sequences.append(None)
+            except Exception:
+                extracted_sequences.append(None)
+        dseqs = extracted_sequences
     # The common part
-    # TODO: using id=0 is not great
     parent_source = UploadedFileSource(
-        id=0, sequence_file_format=sequence_file_format, file_name=file.filename, circularize=circularize
+        id=0,
+        sequence_file_format=sequence_file_format,
+        file_name=file.filename,
+        circularize=circularize,
+        coordinates=seq_feature,
     )
+    # If coordinates are provided, we only keep the sequences compatible with those coordinates
     out_sources = list()
+    out_sequences = list()
     for i in range(len(dseqs)):
+        if dseqs[i] is None:
+            continue
         new_source = parent_source.model_copy()
-        new_source.index_in_file = i
+        new_source.index_in_file = index_in_file if index_in_file is not None else i
         out_sources.append(new_source)
+        out_sequences.append(format_sequence_genbank(dseqs[i], output_name))
-    out_sequences = [format_sequence_genbank(s, output_name) for s in dseqs]
+    if len(out_sequences) == 0:
+        raise HTTPException(422, 'Provided coordinates are incompatible with sequences in the file.')
-    if index_in_file is not None:
-        if index_in_file >= len(out_sources):
-            raise HTTPException(404, 'The index_in_file is out of range.')
-        return {'sequences': [out_sequences[index_in_file]], 'sources': [out_sources[index_in_file]]}
-    else:
-        return {'sequences': out_sequences, 'sources': out_sources}
+    if len(out_sequences) < len(dseqs):
+        warning_messages.append(
+            'Some sequences were not extracted because they are incompatible with the provided coordinates.'
+        )
+    if len(warning_messages) > 0:
+        response.headers['x-warning'] = '; '.join(warning_messages)
+    return {'sequences': out_sequences, 'sources': out_sources}
 # TODO: a bit inconsistent that here you don't put {source: {...}} in the request, but

opencloning-0.2.8/src/opencloning/cre_lox.py DELETED Viewed

@@ -1,29 +0,0 @@
-from itertools import product
-from pydna.dseqrecord import Dseqrecord
-from .dna_utils import compute_regex_site, dseqrecord_finditer
-# This is the original loxP sequence, here for reference
-LOXP_SEQUENCE = 'ATAACTTCGTATAGCATACATTATACGAAGTTAT'
-# This is a consensus sequence, from this Addgene blog post: https://blog.addgene.org/plasmids-101-cre-lox
-# IMPORTANT: Because it is palyndromic, we only look for it in the forward direction, if this was changed
-# to a non-palindromic sequence, you would need to look for matches reversing it, like in Gateway cloning
-LOXP_CONSENSUS = 'ATAACTTCGTATANNNTANNNTATACGAAGTTAT'
-loxP_regex = compute_regex_site(LOXP_CONSENSUS)
-def cre_loxP_overlap(x: Dseqrecord, y: Dseqrecord, _l: None = None) -> list[tuple[int, int, int]]:
-    """Find matching loxP sites between two sequences."""
-    out = list()
-    matches_x = dseqrecord_finditer(loxP_regex, x)
-    matches_y = dseqrecord_finditer(loxP_regex, y)
-    for match_x, match_y in product(matches_x, matches_y):
-        value_x = match_x.group()
-        value_y = match_y.group()
-        if value_x == value_y:
-            out.append((match_x.start(), match_y.start(), len(value_x)))
-    return out