PyPI - pydna - Versions diffs - 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl - Mend

pydna 5.5.4py3-none-any.whl → 5.5.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

pydna/__init__.py +30 -195
pydna/_pretty.py +8 -8
pydna/_thermodynamic_data.py +3 -3
pydna/all.py +1 -12
pydna/alphabet.py +995 -0
pydna/amplicon.py +19 -24
pydna/amplify.py +75 -95
pydna/assembly.py +64 -81
pydna/assembly2.py +375 -310
pydna/codon.py +4 -4
pydna/common_sub_strings.py +6 -8
pydna/contig.py +203 -10
pydna/design.py +176 -60
pydna/dseq.py +1788 -718
pydna/dseqrecord.py +197 -179
pydna/gateway.py +6 -6
pydna/gel.py +5 -5
pydna/genbank.py +43 -46
pydna/genbankfixer.py +89 -92
pydna/ladders.py +11 -12
pydna/oligonucleotide_hybridization.py +124 -0
pydna/opencloning_models.py +187 -60
pydna/parsers.py +45 -32
pydna/primer.py +4 -4
pydna/primer_screen.py +833 -0
pydna/readers.py +14 -9
pydna/seq.py +137 -47
pydna/seqrecord.py +54 -62
pydna/sequence_picker.py +2 -5
pydna/sequence_regex.py +6 -6
pydna/tm.py +17 -17
pydna/types.py +19 -19
pydna/utils.py +97 -75
{pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/METADATA +8 -8
pydna-5.5.6.dist-info/RECORD +42 -0
{pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/WHEEL +1 -1
pydna/conftest.py +0 -42
pydna/download.py +0 -32
pydna/genbankfile.py +0 -42
pydna/genbankrecord.py +0 -168
pydna/goldengate.py +0 -45
pydna/ligate.py +0 -62
pydna/user_cloning.py +0 -29
pydna-5.5.4.dist-info/RECORD +0 -46
{pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/licenses/LICENSE.txt +0 -0

pydna/oligonucleotide_hybridization.py ADDED Viewed

@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+"""
+This module contains the functions for oligonucleotide hybridization.
+"""
+from pydna.common_sub_strings import common_sub_strings
+from Bio.Seq import reverse_complement
+from pydna.primer import Primer
+from pydna.dseqrecord import Dseqrecord
+from pydna.dseq import Dseq
+from pydna.opencloning_models import OligoHybridizationSource, SourceInput
+def oligonucleotide_hybridization_overhangs(
+    fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
+) -> list[int]:
+    """
+    Returns possible overhangs between two oligos given a minimal annealing length, and
+    returns an error if mismatches are found.
+    see https://github.com/manulera/OpenCloning_backend/issues/302 for notation
+    >>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization_overhangs
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCAT", 3)
+    [0]
+    >>> oligonucleotide_hybridization_overhangs("aATGGC", "GCCAT", 5)
+    [-1]
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATa", 5)
+    [1]
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 5)
+    [0, 7]
+    If the minimal annealing length is longer than the length of the shortest oligo, it returns an empty list.
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 100)
+    []
+    If it's possible to anneal for ``minimal_annealing`` length, but with mismatches, it raises an error.
+    >>> oligonucleotide_hybridization_overhangs("cATGGC", "GCCATa", 5)
+    Traceback (most recent call last):
+        ...
+    ValueError: The oligonucleotides can anneal with mismatches
+    """
+    matches = common_sub_strings(
+        fwd_oligo_seq.lower(),
+        reverse_complement(rvs_oligo_seq.lower()),
+        minimal_annealing,
+    )
+    for pos_fwd, pos_rvs, length in matches:
+        if (pos_fwd != 0 and pos_rvs != 0) or (
+            pos_fwd + length < len(fwd_oligo_seq)
+            and pos_rvs + length < len(rvs_oligo_seq)
+        ):
+            raise ValueError("The oligonucleotides can anneal with mismatches")
+    # Return possible overhangs
+    return [pos_rvs - pos_fwd for pos_fwd, pos_rvs, length in matches]
+def oligonucleotide_hybridization(
+    fwd_primer: Primer, rvs_primer: Primer, minimal_annealing: int
+) -> list[Dseqrecord]:
+    """
+    Returns a list of Dseqrecord objects representing the hybridization of two primers.
+    >>> from pydna.primer import Primer
+    >>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization
+    >>> fwd_primer = Primer("ATGGC")
+    >>> rvs_primer = Primer("GCCA")
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 3)[0].seq
+    Dseq(-5)
+    ATGGC
+     ACCG
+    Multiple values can be returned:
+    >>> rvs_primer2 = Primer("GCCATaaGCCAT")
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[0].seq
+    Dseq(-12)
+    ATGGC
+    TACCGaaTACCG
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[1].seq
+    Dseq(-12)
+           ATGGC
+    TACCGaaTACCG
+    If no possible overhangs are found, it returns an empty list.
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 100)
+    []
+    If there are mismatches given the minimal annealing length, it raises an error.
+    >>> fwd_primer3 = Primer("cATGGC")
+    >>> rvs_primer3 = Primer("GCCATa")
+    >>> oligonucleotide_hybridization(fwd_primer3, rvs_primer3, 5)
+    Traceback (most recent call last):
+        ...
+    ValueError: The oligonucleotides can anneal with mismatches
+    """
+    possible_overhangs = oligonucleotide_hybridization_overhangs(
+        str(fwd_primer.seq), str(rvs_primer.seq), minimal_annealing
+    )
+    sources = [
+        OligoHybridizationSource(
+            overhang_crick_3prime=pos,
+            input=[SourceInput(sequence=fwd_primer), SourceInput(sequence=rvs_primer)],
+        )
+        for pos in possible_overhangs
+    ]
+    return [
+        Dseqrecord(
+            Dseq(
+                str(fwd_primer.seq),
+                str(rvs_primer.seq),
+                ovhg=source.overhang_crick_3prime,
+            ),
+            source=source,
+        )
+        for source in sources
+    ]

pydna/opencloning_models.py CHANGED Viewed

@@ -16,6 +16,17 @@ sequence. You can also use the ``CloningStrategy`` class to create a JSON repres
 the cloning strategy. That ``CloningStrategy`` can be loaded in the OpenCloning web interface
 to see a representation of the cloning strategy.
+Contributing
+============
+Not all fields can be readily serialized to be converted to regular types in pydantic. For
+instance, the ``coordinates`` field of the ``GenomeCoordinatesSource`` class is a
+``SimpleLocation`` object, or the ``input`` field of ``Source`` is a list of ``SourceInput``
+objects, which can be ``Dseqrecord`` or ``Primer`` objects, or ``AssemblyFragment`` objects.
+For these type of fields, you have to define a ``field_serializer`` method to serialize them
+to the correct type.
 """
 from __future__ import annotations
@@ -24,10 +35,11 @@ from pydantic_core import core_schema
 from contextlib import contextmanager
 from threading import local
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
 from opencloning_linkml.datamodel import (
     CloningStrategy as _BaseCloningStrategy,
+    DatabaseSource as _DatabaseSource,
     Primer as _PrimerModel,
     Source as _Source,
     TextFileSequence as _TextFileSequence,
@@ -47,12 +59,32 @@ from opencloning_linkml.datamodel import (
     LigationSource as _LigationSource,
     GatewaySource as _GatewaySource,
     GatewayReactionType,
+    AnnotationTool,
     HomologousRecombinationSource as _HomologousRecombinationSource,
     CreLoxRecombinationSource as _CreLoxRecombinationSource,
     PCRSource as _PCRSource,
     CRISPRSource as _CRISPRSource,
+    RepositoryIdSource as _RepositoryIdSource,
+    UploadedFileSource as _UploadedFileSource,
+    AddgeneIdSource as _AddgeneIdSource,
+    AddgeneSequenceType,
+    BenchlingUrlSource as _BenchlingUrlSource,
+    SnapGenePlasmidSource as _SnapGenePlasmidSource,
+    EuroscarfSource as _EuroscarfSource,
+    WekWikGeneIdSource as _WekWikGeneIdSource,
+    SEVASource as _SEVASource,
+    IGEMSource as _IGEMSource,
+    OpenDNACollectionsSource as _OpenDNACollectionsSource,
+    GenomeCoordinatesSource as _GenomeCoordinatesSource,
+    OligoHybridizationSource as _OligoHybridizationSource,
+    PolymeraseExtensionSource as _PolymeraseExtensionSource,
+    AnnotationSource as _AnnotationSource,
+    AnnotationReport as _AnnotationReport,
+    PlannotateAnnotationReport as _PlannotateAnnotationReport,
+    ReverseComplementSource as _ReverseComplementSource,
+    NCBISequenceSource as _NCBISequenceSource,
 )
-from Bio.SeqFeature import Location, LocationParserError
+from Bio.SeqFeature import Location, LocationParserError, SimpleLocation
 from Bio.Restriction.Restriction import AbstractCut
 import networkx as nx
 from typing import List
@@ -78,8 +110,9 @@ def id_mode(use_python_internal_id: bool = True):
     mapping them to the OpenCloning data model. If ``use_python_internal_id`` is True,
     the built-in python ``id()`` function is used to assign ids to objects. That function
     produces a unique integer for each object in python, so it's guaranteed to be unique.
-    If ``use_python_internal_id`` is False, the object's ``.id`` attribute (must be a string integer)
-    is used to assign ids to objects. This is useful when the objects already have meaningful ids,
+    If ``use_python_internal_id`` is False, the object's ``.id`` attribute
+    (must be a string integer) is used to assign ids to objects. This is useful
+    when the objects already have meaningful ids,
     and you want to keep references to them in ``SourceInput`` objects (which sequences and
     primers are used in a particular source).
@@ -136,7 +169,6 @@ def get_id(obj: "Primer" | "Dseqrecord") -> int:
 class SequenceLocationStr(str):
     """A string representation of a sequence location, genbank-like."""
-    # TODO: this should handle origin-spanning simple locations (splitted)
     @classmethod
     def from_biopython_location(cls, location: Location):
         return cls(format_feature_location(location, None))
@@ -178,6 +210,14 @@ class SequenceLocationStr(str):
     ):
         return cls.from_biopython_location(create_location(start, end, seq_len, strand))
+    def get_ncbi_format_coordinates(self) -> str:
+        """Return start, end, strand in the same format as the NCBI eutils API (1-based, inclusive)"""
+        return (
+            self.to_biopython_location().start + 1,
+            self.to_biopython_location().end,
+            self.to_biopython_location().strand,
+        )
 class ConfiguredBaseModel(BaseModel):
     model_config = ConfigDict(
@@ -199,7 +239,7 @@ class TextFileSequence(_TextFileSequence):
             id=get_id(dseqr),
             sequence_file_format="genbank",
             overhang_crick_3prime=dseqr.seq.ovhg,
-            overhang_watson_3prime=dseqr.seq.watson_ovhg(),
+            overhang_watson_3prime=dseqr.seq.watson_ovhg,
             file_content=dseqr.format("genbank"),
         )
@@ -261,18 +301,23 @@ class Source(ConfiguredBaseModel):
     input: list[Union[SourceInput, AssemblyFragment]] = Field(default_factory=list)
     TARGET_MODEL: ClassVar[Type[_Source]] = _Source
-    def input_models(self):
-        return [fragment.to_pydantic_model() for fragment in self.input]
-    def _kwargs(self, seq_id: int) -> dict:
-        return {
-            "id": seq_id,
-            "input": self.input_models(),
-        }
+    @field_serializer("input")
+    def serialize_input(
+        self, input: list[Union[SourceInput, AssemblyFragment]]
+    ) -> list[_SourceInput | _AssemblyFragment]:
+        return [fragment.to_pydantic_model() for fragment in input]
     def to_pydantic_model(self, seq_id: int):
-        kwargs = self._kwargs(seq_id)
-        return self.TARGET_MODEL(**kwargs)
+        model_dict = self.model_dump()
+        model_dict["id"] = seq_id
+        return self.TARGET_MODEL(**model_dict)
+    def to_unserialized_dict(self):
+        """
+        Converts into a dictionary without serializing the fields.
+        This is used to be able to recast.
+        """
+        return {field: getattr(self, field) for field in self.__pydantic_fields__}
     def add_to_history_graph(self, history_graph: nx.DiGraph, seq: "Dseqrecord"):
         """
@@ -315,15 +360,6 @@ class AssemblySource(Source):
     TARGET_MODEL: ClassVar[Type[_AssemblySource]] = _AssemblySource
-    def _kwargs(self, seq_id: int) -> dict:
-        return {
-            **super()._kwargs(seq_id),
-            "circular": self.circular,
-        }
-    def to_pydantic_model(self, seq_id: int):
-        return self.TARGET_MODEL(**self._kwargs(seq_id))
     @classmethod
     def from_subfragment_representation(
         cls,
@@ -346,6 +382,90 @@ class AssemblySource(Source):
         return AssemblySource(input=input_list, circular=is_circular)
+class DatabaseSource(Source):
+    TARGET_MODEL: ClassVar[Type[_DatabaseSource]] = _DatabaseSource
+    database_id: int
+class UploadedFileSource(Source):
+    TARGET_MODEL: ClassVar[Type[_UploadedFileSource]] = _UploadedFileSource
+    file_name: str
+    index_in_file: int
+    sequence_file_format: str
+class RepositoryIdSource(Source):
+    TARGET_MODEL: ClassVar[Type[_RepositoryIdSource]] = _RepositoryIdSource
+    repository_id: str
+    # location: Location
+class RepositoryIdSourceWithSequenceFileUrl(RepositoryIdSource):
+    """
+    Auxiliary class to avoid code duplication in the sources that have
+    a sequence file url.
+    """
+    sequence_file_url: Optional[str] = None
+class AddgeneIdSource(RepositoryIdSourceWithSequenceFileUrl):
+    TARGET_MODEL: ClassVar[Type[_AddgeneIdSource]] = _AddgeneIdSource
+    addgene_sequence_type: Optional[AddgeneSequenceType] = None
+class BenchlingUrlSource(RepositoryIdSource):
+    TARGET_MODEL: ClassVar[Type[_BenchlingUrlSource]] = _BenchlingUrlSource
+class SnapGenePlasmidSource(RepositoryIdSource):
+    TARGET_MODEL: ClassVar[Type[_SnapGenePlasmidSource]] = _SnapGenePlasmidSource
+class EuroscarfSource(RepositoryIdSource):
+    TARGET_MODEL: ClassVar[Type[_EuroscarfSource]] = _EuroscarfSource
+class WekWikGeneIdSource(RepositoryIdSourceWithSequenceFileUrl):
+    TARGET_MODEL: ClassVar[Type[_WekWikGeneIdSource]] = _WekWikGeneIdSource
+class SEVASource(RepositoryIdSourceWithSequenceFileUrl):
+    TARGET_MODEL: ClassVar[Type[_SEVASource]] = _SEVASource
+class IGEMSource(RepositoryIdSourceWithSequenceFileUrl):
+    TARGET_MODEL: ClassVar[Type[_IGEMSource]] = _IGEMSource
+class OpenDNACollectionsSource(RepositoryIdSourceWithSequenceFileUrl):
+    TARGET_MODEL: ClassVar[Type[_OpenDNACollectionsSource]] = _OpenDNACollectionsSource
+class NCBISequenceSource(RepositoryIdSource):
+    TARGET_MODEL: ClassVar[Type[_NCBISequenceSource]] = _NCBISequenceSource
+    coordinates: SimpleLocation | None = None
+class GenomeCoordinatesSource(NCBISequenceSource):
+    TARGET_MODEL: ClassVar[Type[_GenomeCoordinatesSource]] = _GenomeCoordinatesSource
+    assembly_accession: Optional[str] = None
+    locus_tag: Optional[str] = None
+    gene_id: Optional[int] = None
+    coordinates: SimpleLocation
+    @field_serializer("coordinates")
+    def serialize_coordinates(self, coordinates: SimpleLocation) -> str:
+        return SequenceLocationStr.from_biopython_location(coordinates)
 class RestrictionAndLigationSource(AssemblySource):
     restriction_enzymes: list[AbstractCut]
@@ -353,11 +473,11 @@ class RestrictionAndLigationSource(AssemblySource):
         _RestrictionAndLigationSource
     )
-    def _kwargs(self, seq_id: int) -> dict:
-        return {
-            **super()._kwargs(seq_id),
-            "restriction_enzymes": [str(enzyme) for enzyme in self.restriction_enzymes],
-        }
+    @field_serializer("restriction_enzymes")
+    def serialize_restriction_enzymes(
+        self, restriction_enzymes: list[AbstractCut]
+    ) -> list[str]:
+        return [str(enzyme) for enzyme in restriction_enzymes]
 class GibsonAssemblySource(AssemblySource):
@@ -387,13 +507,6 @@ class GatewaySource(AssemblySource):
     reaction_type: GatewayReactionType
     greedy: bool = Field(default=False)
-    def _kwargs(self, seq_id: int) -> dict:
-        return {
-            **super()._kwargs(seq_id),
-            "reaction_type": self.reaction_type,
-            "greedy": self.greedy,
-        }
 class HomologousRecombinationSource(AssemblySource):
     TARGET_MODEL: ClassVar[Type[_HomologousRecombinationSource]] = (
@@ -415,21 +528,24 @@ class PCRSource(AssemblySource):
     TARGET_MODEL: ClassVar[Type[_PCRSource]] = _PCRSource
     add_primer_features: bool = Field(default=False)
-    def _kwargs(self, seq_id: int) -> dict:
-        return {
-            **super()._kwargs(seq_id),
-            "add_primer_features": self.add_primer_features,
-        }
 class SequenceCutSource(Source):
     left_edge: CutSiteType | None
     right_edge: CutSiteType | None
-    BASE_MODEL: ClassVar[Type[_SequenceCutSource]] = _SequenceCutSource
-    ENZYME_MODEL: ClassVar[Type[_RestrictionEnzymeDigestionSource]] = (
-        _RestrictionEnzymeDigestionSource
-    )
+    @property
+    def TARGET_MODEL(self):
+        return (
+            _RestrictionEnzymeDigestionSource
+            if self._has_enzyme()
+            else _SequenceCutSource
+        )
+    @field_serializer("left_edge", "right_edge")
+    def serialize_cut_site(
+        self, cut_site: CutSiteType | None
+    ) -> _RestrictionSequenceCut | _SequenceCut | None:
+        return self._cutsite_to_model(cut_site)
     @staticmethod
     def _cutsite_to_model(cut_site: CutSiteType | None):
@@ -461,18 +577,31 @@ class SequenceCutSource(Source):
         return has_enzyme(self.left_edge) or has_enzyme(self.right_edge)
-    def _target_model(self):
-        return self.ENZYME_MODEL if self._has_enzyme() else self.BASE_MODEL
-    def _kwargs(self, seq_id: int) -> dict:
-        return {
-            **super()._kwargs(seq_id),
-            "left_edge": self._cutsite_to_model(self.left_edge),
-            "right_edge": self._cutsite_to_model(self.right_edge),
-        }
+class OligoHybridizationSource(Source):
+    TARGET_MODEL: ClassVar[Type[_OligoHybridizationSource]] = _OligoHybridizationSource
+    overhang_crick_3prime: Optional[int] = None
-    def to_pydantic_model(self, seq_id: int):
-        return self._target_model()(**self._kwargs(seq_id))
+class PolymeraseExtensionSource(Source):
+    TARGET_MODEL: ClassVar[Type[_PolymeraseExtensionSource]] = (
+        _PolymeraseExtensionSource
+    )
+class AnnotationSource(Source):
+    TARGET_MODEL: ClassVar[Type[_AnnotationSource]] = _AnnotationSource
+    annotation_tool: AnnotationTool
+    annotation_tool_version: Optional[str] = None
+    annotation_report: Optional[
+        list[_AnnotationReport | _PlannotateAnnotationReport]
+    ] = None
+class ReverseComplementSource(Source):
+    TARGET_MODEL: ClassVar[Type[_ReverseComplementSource]] = _ReverseComplementSource
 class CloningStrategy(_BaseCloningStrategy):
@@ -510,9 +639,7 @@ class CloningStrategy(_BaseCloningStrategy):
                 else:
                     self.add_primer(source_input.sequence)
         else:
-            self.sources.append(
-                _ManuallyTypedSource(id=get_id(dseqr), input=[], user_input="A")
-            )
+            self.sources.append(_ManuallyTypedSource(id=get_id(dseqr), input=[]))
     def reassign_ids(self):
         all_ids = (

pydna/parsers.py CHANGED Viewed

@@ -7,26 +7,23 @@
 """Provides two functions, parse and parse_primers"""
-# import os as _os
-import re as _re
-import io as _io
-import textwrap as _textwrap
+import re
+import io
+import textwrap
-from Bio import SeqIO as _SeqIO
-from pydna.genbankfile import GenbankFile as _GenbankFile
-from pydna.dseqrecord import Dseqrecord as _Dseqrecord
-from pydna.primer import Primer as _Primer
+from Bio import SeqIO
+from pydna.dseqrecord import Dseqrecord
+from Bio.SeqRecord import SeqRecord
+from pydna.opencloning_models import UploadedFileSource
+from pydna.primer import Primer
-# from pydna.amplify import pcr as _pcr
-# from copy import deepcopy as _deepcopy
-# from Bio.SeqFeature import SeqFeature as _SeqFeature
-# import xml.etree.ElementTree as _et
 try:
-    from itertools import pairwise as _pairwise
+    from itertools import pairwise
 except ImportError:
-    def _pairwise(iterable):
+    def pairwise(iterable):
         # pairwise('ABCDEFG') → AB BC CD DE EF FG
         iterator = iter(iterable)
         a = next(iterator, None)
@@ -51,8 +48,8 @@ gb_fasta_embl_regex = (
 def extract_from_text(text):
     """docstring."""
-    data = _textwrap.dedent(str(text))
-    mos = list(_re.finditer(gb_fasta_embl_regex, data + "\n\n", flags=_re.MULTILINE))
+    data = textwrap.dedent(str(text))
+    mos = list(re.finditer(gb_fasta_embl_regex, data + "\n\n", flags=re.MULTILINE))
     class Fakemo(object):
         def start(self):
@@ -65,7 +62,7 @@ def extract_from_text(text):
     gaps = []
-    for mo1, mo2 in _pairwise([mofirst] + mos + [molast]):
+    for mo1, mo2 in pairwise([mofirst] + mos + [molast]):
         gaps.append(data[mo1.end() : mo2.start()])
     return tuple(mo.group(0) for mo in mos), tuple(gaps)
@@ -85,19 +82,22 @@ def embl_gb_fasta(text):
     # topology = "linear"
     for chunk in chunks:
-        handle = _io.StringIO(chunk)
+        handle = io.StringIO(chunk)
         # circular = False
         first_line = chunk.splitlines()[0].lower().split()
         try:
-            parsed = _SeqIO.read(handle, "embl")
+            parsed = SeqIO.read(handle, "embl")
+            parsed.annotations["pydna_parse_sequence_file_format"] = "embl"
         except ValueError:
             handle.seek(0)
             try:
-                parsed = _SeqIO.read(handle, "genbank")
+                parsed = SeqIO.read(handle, "genbank")
+                parsed.annotations["pydna_parse_sequence_file_format"] = "genbank"
             except ValueError:
                 handle.seek(0)
                 try:
-                    parsed = _SeqIO.read(handle, "fasta-blast")
+                    parsed = SeqIO.read(handle, "fasta-blast")
+                    parsed.annotations["pydna_parse_sequence_file_format"] = "fasta"
                 except ValueError:
                     handle.close()
                     continue
@@ -126,7 +126,7 @@ def embl_gb_fasta(text):
     return tuple(result_list)
-def parse(data, ds=True):
+def parse(data, ds=True) -> list[Dseqrecord | SeqRecord]:
     """Return *all* DNA sequences found in data.
     If no sequences are found, an empty list is returned. This is a greedy
@@ -191,15 +191,22 @@ def parse(data, ds=True):
             path = item
         finally:
             newsequences = embl_gb_fasta(raw)
-            # nfs = [_SeqFeature() for f in parsed.features]
-            # for f, nf in zip(parsed.features, nfs):
-            #     nf.__dict__ = _deepcopy(f.__dict__)
-            # parsed.features = nfs
             for s in newsequences:
                 if ds and path:
-                    sequences.append(_GenbankFile.from_SeqRecord(s, path=path))
+                    from pydna.opencloning_models import UploadedFileSource
+                    result = Dseqrecord.from_SeqRecord(s)
+                    result.source = UploadedFileSource(
+                        file_name=str(path),  # we use str to handle PosixPath
+                        sequence_file_format=s.annotations[
+                            "pydna_parse_sequence_file_format"
+                        ],
+                        index_in_file=0,
+                    )
+                    sequences.append(result)
+                    # sequences.append(_GenbankFile.from_SeqRecord(s, path=path))
                 elif ds:
-                    sequences.append(_Dseqrecord.from_SeqRecord(s))
+                    sequences.append(Dseqrecord.from_SeqRecord(s))
                 else:
                     sequences.append(s)
     return sequences
@@ -207,10 +214,10 @@ def parse(data, ds=True):
 def parse_primers(data):
     """docstring."""
-    return [_Primer(x) for x in parse(data, ds=False)]
+    return [Primer(x) for x in parse(data, ds=False)]
-def parse_snapgene(file_path: str) -> list[_Dseqrecord]:
+def parse_snapgene(file_path: str) -> list[Dseqrecord]:
     """Parse a SnapGene file and return a Dseqrecord object.
     Parameters
@@ -225,9 +232,15 @@ def parse_snapgene(file_path: str) -> list[_Dseqrecord]:
     """
     with open(file_path, "rb") as f:
-        parsed_seq = next(_SeqIO.parse(f, "snapgene"))
+        parsed_seq = next(SeqIO.parse(f, "snapgene"))
         circular = (
             "topology" in parsed_seq.annotations.keys()
             and parsed_seq.annotations["topology"] == "circular"
         )
-        return [_Dseqrecord(parsed_seq, circular=circular)]
+        source = UploadedFileSource(
+            file_name=str(file_path),
+            sequence_file_format="snapgene",
+            index_in_file=0,
+        )
+        return [Dseqrecord(parsed_seq, circular=circular, source=source)]

pydna/primer.py CHANGED Viewed

@@ -7,11 +7,11 @@
 """This module provide the Primer class that is a subclass of the biopython SeqRecord."""
-from pydna.seq import Seq as _Seq
-from pydna.seqrecord import SeqRecord as _SeqRecord
+from pydna.seq import Seq
+from pydna.seqrecord import SeqRecord
-class Primer(_SeqRecord):
+class Primer(SeqRecord):
     """Primer and its position on a template, footprint and tail."""
     def __init__(
@@ -23,7 +23,7 @@ class Primer(_SeqRecord):
         elif hasattr(record, "transcribe"):  # Seq
             super().__init__(record, *args, **kwargs)
         else:  # string?
-            super().__init__(_Seq(record), *args, **kwargs)
+            super().__init__(Seq(record), *args, **kwargs)
         self.amplicon = amplicon
         self.position = position
         self._fp = footprint or len(record)

pydna 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl

pydna 5.5.4py3-none-any.whl → 5.5.6py3-none-any.whl