pydna 5.5.3__py3-none-any.whl → 5.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +1 -1
- pydna/assembly2.py +415 -159
- pydna/dseqrecord.py +50 -2
- pydna/opencloning_models.py +553 -0
- pydna/types.py +5 -2
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/METADATA +8 -40
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/RECORD +9 -8
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/WHEEL +1 -1
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info/licenses}/LICENSE.txt +0 -0
pydna/dseqrecord.py
CHANGED
|
@@ -35,6 +35,11 @@ import os as _os
|
|
|
35
35
|
import re as _re
|
|
36
36
|
import time as _time
|
|
37
37
|
import datetime as _datetime
|
|
38
|
+
from typing import Union, TYPE_CHECKING
|
|
39
|
+
from pydna.opencloning_models import SequenceCutSource
|
|
40
|
+
|
|
41
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
42
|
+
from pydna.opencloning_models import Source
|
|
38
43
|
|
|
39
44
|
|
|
40
45
|
# import logging as _logging
|
|
@@ -128,6 +133,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
128
133
|
"""
|
|
129
134
|
|
|
130
135
|
seq: _Dseq
|
|
136
|
+
source: Union["Source", None] = None
|
|
131
137
|
|
|
132
138
|
def __init__(
|
|
133
139
|
self,
|
|
@@ -135,6 +141,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
135
141
|
*args,
|
|
136
142
|
circular=None,
|
|
137
143
|
n=5e-14, # mol ( = 0.05 pmol)
|
|
144
|
+
source=None,
|
|
138
145
|
**kwargs,
|
|
139
146
|
):
|
|
140
147
|
# _module_logger.info("### Dseqrecord initialized ###")
|
|
@@ -202,6 +209,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
202
209
|
self.map_target = None
|
|
203
210
|
self.n = n # amount, set to 5E-14 which is 5 pmols
|
|
204
211
|
self.annotations.update({"molecule_type": "DNA"})
|
|
212
|
+
self.source = source
|
|
205
213
|
|
|
206
214
|
@classmethod
|
|
207
215
|
def from_string(
|
|
@@ -256,6 +264,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
256
264
|
obj.features = record.features
|
|
257
265
|
obj.map_target = None
|
|
258
266
|
obj.n = n
|
|
267
|
+
obj.source = None
|
|
259
268
|
if circular is None:
|
|
260
269
|
circular = record.annotations.get("topology") == "circular"
|
|
261
270
|
obj.seq = _Dseq.quick(
|
|
@@ -875,7 +884,11 @@ class Dseqrecord(_SeqRecord):
|
|
|
875
884
|
def __eq__(self, other):
|
|
876
885
|
"""docstring."""
|
|
877
886
|
try:
|
|
878
|
-
|
|
887
|
+
this_dict = self.__dict__.copy()
|
|
888
|
+
other_dict = other.__dict__.copy()
|
|
889
|
+
del this_dict["source"]
|
|
890
|
+
del other_dict["source"]
|
|
891
|
+
if self.seq == other.seq and str(this_dict) == str(other_dict):
|
|
879
892
|
return True
|
|
880
893
|
except AttributeError:
|
|
881
894
|
pass
|
|
@@ -1419,4 +1432,39 @@ class Dseqrecord(_SeqRecord):
|
|
|
1419
1432
|
right_edge = right_watson if right_ovhg > 0 else right_crick
|
|
1420
1433
|
features = self[left_edge:right_edge].features
|
|
1421
1434
|
|
|
1422
|
-
|
|
1435
|
+
# This will need to be generalised to all types of cuts
|
|
1436
|
+
source = SequenceCutSource.from_parent(self, left_cut, right_cut)
|
|
1437
|
+
return Dseqrecord(dseq, features=features, source=source)
|
|
1438
|
+
|
|
1439
|
+
def history(self):
|
|
1440
|
+
"""
|
|
1441
|
+
Returns a string representation of the cloning history of the sequence.
|
|
1442
|
+
Returns an empty string if the sequence has no source.
|
|
1443
|
+
|
|
1444
|
+
Check the documentation notebooks for extensive examples.
|
|
1445
|
+
|
|
1446
|
+
Returns
|
|
1447
|
+
-------
|
|
1448
|
+
str: A string representation of the cloning history of the sequence.
|
|
1449
|
+
|
|
1450
|
+
Examples
|
|
1451
|
+
--------
|
|
1452
|
+
>>> from pydna.dseqrecord import Dseqrecord
|
|
1453
|
+
>>> from pydna.assembly2 import gibson_assembly
|
|
1454
|
+
>>> fragments = [
|
|
1455
|
+
... Dseqrecord("TTTTacgatAAtgctccCCCC", circular=False, name="fragment1"),
|
|
1456
|
+
... Dseqrecord("CCCCtcatGGGG", circular=False, name="fragment2"),
|
|
1457
|
+
... Dseqrecord("GGGGatataTTTT", circular=False, name="fragment3"),
|
|
1458
|
+
... ]
|
|
1459
|
+
>>> product, *_ = gibson_assembly(fragments, limit=4)
|
|
1460
|
+
>>> product.name = "product_name"
|
|
1461
|
+
>>> print(product.history())
|
|
1462
|
+
╙── product_name (Dseqrecord(o34))
|
|
1463
|
+
└─╼ GibsonAssemblySource
|
|
1464
|
+
├─╼ fragment1 (Dseqrecord(-21))
|
|
1465
|
+
├─╼ fragment2 (Dseqrecord(-12))
|
|
1466
|
+
└─╼ fragment3 (Dseqrecord(-13))
|
|
1467
|
+
"""
|
|
1468
|
+
if self.source is None:
|
|
1469
|
+
return ""
|
|
1470
|
+
return self.source.history_string(self)
|
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
This module provides classes that roughly map to the `OpenCloning <https://opencloning.org>`_
|
|
4
|
+
data model, which is defined using `LinkML <https://linkml.io>`, and available as a python
|
|
5
|
+
package `opencloning-linkml <https://pypi.org/project/opencloning-linkml/>`_. These classes
|
|
6
|
+
are documented there, and the ones in this module essentially replace the fields pointing to
|
|
7
|
+
sequences and primers (which use ids in the data model) to ``Dseqrecord`` and ``Primer``
|
|
8
|
+
objects, respectively. Similarly, it uses Location from ``Biopython`` instead of a string,
|
|
9
|
+
which is what the data model uses.
|
|
10
|
+
|
|
11
|
+
When using pydna to plan cloning, it stores the provenance of ``Dseqrecord`` objects in
|
|
12
|
+
their ``source`` attribute. Not all methods generate sources so far, so refer to the
|
|
13
|
+
documentation notebooks for examples on how to use this feature. The ``history`` method of
|
|
14
|
+
``Dseqrecord`` objects can be used to get a string representation of the provenance of the
|
|
15
|
+
sequence. You can also use the ``CloningStrategy`` class to create a JSON representation of
|
|
16
|
+
the cloning strategy. That ``CloningStrategy`` can be loaded in the OpenCloning web interface
|
|
17
|
+
to see a representation of the cloning strategy.
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from typing import Optional, Union, Any, ClassVar, Type
|
|
23
|
+
from pydantic_core import core_schema
|
|
24
|
+
from contextlib import contextmanager
|
|
25
|
+
from threading import local
|
|
26
|
+
|
|
27
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
28
|
+
|
|
29
|
+
from opencloning_linkml.datamodel import (
|
|
30
|
+
CloningStrategy as _BaseCloningStrategy,
|
|
31
|
+
Primer as _PrimerModel,
|
|
32
|
+
Source as _Source,
|
|
33
|
+
TextFileSequence as _TextFileSequence,
|
|
34
|
+
AssemblySource as _AssemblySource,
|
|
35
|
+
SourceInput as _SourceInput,
|
|
36
|
+
AssemblyFragment as _AssemblyFragment,
|
|
37
|
+
ManuallyTypedSource as _ManuallyTypedSource,
|
|
38
|
+
RestrictionAndLigationSource as _RestrictionAndLigationSource,
|
|
39
|
+
GibsonAssemblySource as _GibsonAssemblySource,
|
|
40
|
+
RestrictionEnzymeDigestionSource as _RestrictionEnzymeDigestionSource,
|
|
41
|
+
SequenceCutSource as _SequenceCutSource,
|
|
42
|
+
RestrictionSequenceCut as _RestrictionSequenceCut,
|
|
43
|
+
SequenceCut as _SequenceCut,
|
|
44
|
+
InFusionSource as _InFusionSource,
|
|
45
|
+
OverlapExtensionPCRLigationSource as _OverlapExtensionPCRLigationSource,
|
|
46
|
+
InVivoAssemblySource as _InVivoAssemblySource,
|
|
47
|
+
LigationSource as _LigationSource,
|
|
48
|
+
GatewaySource as _GatewaySource,
|
|
49
|
+
GatewayReactionType,
|
|
50
|
+
HomologousRecombinationSource as _HomologousRecombinationSource,
|
|
51
|
+
CreLoxRecombinationSource as _CreLoxRecombinationSource,
|
|
52
|
+
PCRSource as _PCRSource,
|
|
53
|
+
CRISPRSource as _CRISPRSource,
|
|
54
|
+
)
|
|
55
|
+
from Bio.SeqFeature import Location, LocationParserError
|
|
56
|
+
from Bio.Restriction.Restriction import AbstractCut
|
|
57
|
+
import networkx as nx
|
|
58
|
+
from typing import List
|
|
59
|
+
|
|
60
|
+
from Bio.SeqIO.InsdcIO import _insdc_location_string as format_feature_location
|
|
61
|
+
|
|
62
|
+
from pydna.types import CutSiteType, SubFragmentRepresentationAssembly
|
|
63
|
+
from pydna.utils import create_location
|
|
64
|
+
from typing import TYPE_CHECKING
|
|
65
|
+
|
|
66
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
67
|
+
from pydna.dseqrecord import Dseqrecord
|
|
68
|
+
from pydna.primer import Primer
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Thread-local storage for ID strategy
|
|
72
|
+
_thread_local = local()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@contextmanager
|
|
76
|
+
def id_mode(use_python_internal_id: bool = True):
|
|
77
|
+
"""Context manager that is used to determine how ids are assigned to objects when
|
|
78
|
+
mapping them to the OpenCloning data model. If ``use_python_internal_id`` is True,
|
|
79
|
+
the built-in python ``id()`` function is used to assign ids to objects. That function
|
|
80
|
+
produces a unique integer for each object in python, so it's guaranteed to be unique.
|
|
81
|
+
If ``use_python_internal_id`` is False, the object's ``.id`` attribute (must be a string integer)
|
|
82
|
+
is used to assign ids to objects. This is useful when the objects already have meaningful ids,
|
|
83
|
+
and you want to keep references to them in ``SourceInput`` objects (which sequences and
|
|
84
|
+
primers are used in a particular source).
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
use_python_internal_id: bool
|
|
89
|
+
If True, use Python's built-in id() function.
|
|
90
|
+
If False, use the object's .id attribute (must be a string integer).
|
|
91
|
+
|
|
92
|
+
Examples
|
|
93
|
+
--------
|
|
94
|
+
>>> from pydna.dseqrecord import Dseqrecord
|
|
95
|
+
>>> from pydna.opencloning_models import get_id, id_mode
|
|
96
|
+
>>> dseqr = Dseqrecord("ATGC")
|
|
97
|
+
>>> dseqr.name = "my_sequence"
|
|
98
|
+
>>> dseqr.id = "123"
|
|
99
|
+
>>> get_id(dseqr) == id(dseqr)
|
|
100
|
+
True
|
|
101
|
+
>>> with id_mode(use_python_internal_id=False):
|
|
102
|
+
... get_id(dseqr)
|
|
103
|
+
123
|
|
104
|
+
"""
|
|
105
|
+
old_value = getattr(_thread_local, "use_python_internal_id", True)
|
|
106
|
+
_thread_local.use_python_internal_id = use_python_internal_id
|
|
107
|
+
try:
|
|
108
|
+
yield
|
|
109
|
+
finally:
|
|
110
|
+
_thread_local.use_python_internal_id = old_value
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_id(obj: "Primer" | "Dseqrecord") -> int:
|
|
114
|
+
"""Get ID using the current strategy from thread-local storage (see id_mode)
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
obj: Primer | Dseqrecord
|
|
118
|
+
The object to get the id of
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
int: The id of the object
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
use_python_internal_id = getattr(_thread_local, "use_python_internal_id", True)
|
|
126
|
+
if use_python_internal_id:
|
|
127
|
+
return id(obj)
|
|
128
|
+
if not isinstance(obj.id, str) or not obj.id.isdigit():
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"If use_python_internal_id is False, id must be a string representing an integer, "
|
|
131
|
+
f"but object {obj} has an invalid id: {obj.id}"
|
|
132
|
+
)
|
|
133
|
+
return int(obj.id)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class SequenceLocationStr(str):
|
|
137
|
+
"""A string representation of a sequence location, genbank-like."""
|
|
138
|
+
|
|
139
|
+
# TODO: this should handle origin-spanning simple locations (splitted)
|
|
140
|
+
@classmethod
|
|
141
|
+
def from_biopython_location(cls, location: Location):
|
|
142
|
+
return cls(format_feature_location(location, None))
|
|
143
|
+
|
|
144
|
+
def to_biopython_location(self) -> Location:
|
|
145
|
+
return Location.fromstring(self)
|
|
146
|
+
|
|
147
|
+
@classmethod
|
|
148
|
+
def field_validator(cls, v):
|
|
149
|
+
if isinstance(v, str):
|
|
150
|
+
value = cls(v)
|
|
151
|
+
try:
|
|
152
|
+
value.to_biopython_location()
|
|
153
|
+
except LocationParserError as err:
|
|
154
|
+
raise ValueError(f"Location {v!r} is not a valid location") from err
|
|
155
|
+
return value
|
|
156
|
+
raise ValueError(f"Location must be a string or a {cls.__name__}")
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def __get_pydantic_core_schema__(
|
|
160
|
+
cls,
|
|
161
|
+
source_type,
|
|
162
|
+
handler,
|
|
163
|
+
) -> core_schema.CoreSchema:
|
|
164
|
+
"""Generate Pydantic core schema for SequenceLocationStr."""
|
|
165
|
+
return core_schema.with_info_after_validator_function(
|
|
166
|
+
cls._validate,
|
|
167
|
+
core_schema.str_schema(),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
def _validate(cls, value: str, info):
|
|
172
|
+
"""Validate and create SequenceLocationStr instance."""
|
|
173
|
+
return cls.field_validator(value)
|
|
174
|
+
|
|
175
|
+
@classmethod
|
|
176
|
+
def from_start_and_end(
|
|
177
|
+
cls, start: int, end: int, seq_len: int | None = None, strand: int | None = 1
|
|
178
|
+
):
|
|
179
|
+
return cls.from_biopython_location(create_location(start, end, seq_len, strand))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class ConfiguredBaseModel(BaseModel):
|
|
183
|
+
model_config = ConfigDict(
|
|
184
|
+
validate_assignment=True,
|
|
185
|
+
validate_default=True,
|
|
186
|
+
extra="forbid",
|
|
187
|
+
arbitrary_types_allowed=True,
|
|
188
|
+
use_enum_values=True,
|
|
189
|
+
strict=False,
|
|
190
|
+
)
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class TextFileSequence(_TextFileSequence):
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def from_dseqrecord(cls, dseqr: "Dseqrecord"):
|
|
198
|
+
return cls(
|
|
199
|
+
id=get_id(dseqr),
|
|
200
|
+
sequence_file_format="genbank",
|
|
201
|
+
overhang_crick_3prime=dseqr.seq.ovhg,
|
|
202
|
+
overhang_watson_3prime=dseqr.seq.watson_ovhg(),
|
|
203
|
+
file_content=dseqr.format("genbank"),
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class PrimerModel(_PrimerModel):
|
|
208
|
+
|
|
209
|
+
@classmethod
|
|
210
|
+
def from_primer(cls, primer: "Primer"):
|
|
211
|
+
return cls(
|
|
212
|
+
id=get_id(primer),
|
|
213
|
+
name=primer.name,
|
|
214
|
+
sequence=str(primer.seq),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class SourceInput(ConfiguredBaseModel):
|
|
219
|
+
sequence: object
|
|
220
|
+
|
|
221
|
+
@field_validator("sequence")
|
|
222
|
+
@classmethod
|
|
223
|
+
def _validate_sequence_field(cls, value: Any):
|
|
224
|
+
"""Separate validation to avoid circular imports."""
|
|
225
|
+
|
|
226
|
+
from pydna.dseqrecord import Dseqrecord
|
|
227
|
+
from pydna.primer import Primer
|
|
228
|
+
|
|
229
|
+
if isinstance(value, (Dseqrecord, Primer)):
|
|
230
|
+
return value
|
|
231
|
+
module = type(value).__module__
|
|
232
|
+
name = type(value).__name__
|
|
233
|
+
raise TypeError(f"sequence must be Dseqrecord or Primer; got {module}.{name}")
|
|
234
|
+
|
|
235
|
+
def to_pydantic_model(self) -> _SourceInput:
|
|
236
|
+
return _SourceInput(sequence=get_id(self.sequence))
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class AssemblyFragment(SourceInput):
|
|
240
|
+
|
|
241
|
+
left_location: Optional[Location] = Field(default=None)
|
|
242
|
+
right_location: Optional[Location] = Field(default=None)
|
|
243
|
+
reverse_complemented: bool
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def from_biopython_location(location: Location | None):
|
|
247
|
+
if location is None:
|
|
248
|
+
return None
|
|
249
|
+
return SequenceLocationStr.from_biopython_location(location)
|
|
250
|
+
|
|
251
|
+
def to_pydantic_model(self) -> _AssemblyFragment:
|
|
252
|
+
return _AssemblyFragment(
|
|
253
|
+
sequence=get_id(self.sequence),
|
|
254
|
+
left_location=self.from_biopython_location(self.left_location),
|
|
255
|
+
right_location=self.from_biopython_location(self.right_location),
|
|
256
|
+
reverse_complemented=self.reverse_complemented,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class Source(ConfiguredBaseModel):
|
|
261
|
+
input: list[Union[SourceInput, AssemblyFragment]] = Field(default_factory=list)
|
|
262
|
+
TARGET_MODEL: ClassVar[Type[_Source]] = _Source
|
|
263
|
+
|
|
264
|
+
def input_models(self):
|
|
265
|
+
return [fragment.to_pydantic_model() for fragment in self.input]
|
|
266
|
+
|
|
267
|
+
def _kwargs(self, seq_id: int) -> dict:
|
|
268
|
+
return {
|
|
269
|
+
"id": seq_id,
|
|
270
|
+
"input": self.input_models(),
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
def to_pydantic_model(self, seq_id: int):
|
|
274
|
+
kwargs = self._kwargs(seq_id)
|
|
275
|
+
return self.TARGET_MODEL(**kwargs)
|
|
276
|
+
|
|
277
|
+
def add_to_history_graph(self, history_graph: nx.DiGraph, seq: "Dseqrecord"):
|
|
278
|
+
"""
|
|
279
|
+
Add the source to the history graph.
|
|
280
|
+
|
|
281
|
+
It does not use the get_id function, because it just uses it to have unique identifiers
|
|
282
|
+
for graph nodes, not to store them anywhere.
|
|
283
|
+
"""
|
|
284
|
+
from pydna.dseqrecord import Dseqrecord
|
|
285
|
+
|
|
286
|
+
history_graph.add_node(id(seq), label=f"{seq.name} ({repr(seq)})")
|
|
287
|
+
history_graph.add_node(id(self), label=str(self.TARGET_MODEL.__name__))
|
|
288
|
+
history_graph.add_edge(id(seq), id(self))
|
|
289
|
+
for fragment in self.input:
|
|
290
|
+
fragment_seq = fragment.sequence
|
|
291
|
+
# This could be a Primer as well, which doesn't have a source
|
|
292
|
+
if isinstance(fragment_seq, Dseqrecord) and fragment_seq.source is not None:
|
|
293
|
+
fragment_seq.source.add_to_history_graph(history_graph, fragment_seq)
|
|
294
|
+
else:
|
|
295
|
+
history_graph.add_node(
|
|
296
|
+
id(fragment_seq),
|
|
297
|
+
label=f"{fragment_seq.name} ({repr(fragment_seq)})",
|
|
298
|
+
)
|
|
299
|
+
history_graph.add_edge(id(self), id(fragment_seq))
|
|
300
|
+
|
|
301
|
+
def history_string(self, seq: "Dseqrecord"):
|
|
302
|
+
"""
|
|
303
|
+
Returns a string representation of the cloning history of the sequence.
|
|
304
|
+
See dseqrecord.history() for examples.
|
|
305
|
+
"""
|
|
306
|
+
history_graph = nx.DiGraph()
|
|
307
|
+
self.add_to_history_graph(history_graph, seq)
|
|
308
|
+
return "\n".join(
|
|
309
|
+
nx.generate_network_text(history_graph, with_labels=True, sources=[id(seq)])
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class AssemblySource(Source):
|
|
314
|
+
circular: bool
|
|
315
|
+
|
|
316
|
+
TARGET_MODEL: ClassVar[Type[_AssemblySource]] = _AssemblySource
|
|
317
|
+
|
|
318
|
+
def _kwargs(self, seq_id: int) -> dict:
|
|
319
|
+
return {
|
|
320
|
+
**super()._kwargs(seq_id),
|
|
321
|
+
"circular": self.circular,
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
def to_pydantic_model(self, seq_id: int):
|
|
325
|
+
return self.TARGET_MODEL(**self._kwargs(seq_id))
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def from_subfragment_representation(
|
|
329
|
+
cls,
|
|
330
|
+
assembly: SubFragmentRepresentationAssembly,
|
|
331
|
+
fragments: list["Dseqrecord"],
|
|
332
|
+
is_circular: bool,
|
|
333
|
+
):
|
|
334
|
+
|
|
335
|
+
input_list = []
|
|
336
|
+
for f_index, loc1, loc2 in assembly:
|
|
337
|
+
input_list.append(
|
|
338
|
+
AssemblyFragment(
|
|
339
|
+
sequence=fragments[abs(f_index) - 1],
|
|
340
|
+
left_location=loc1,
|
|
341
|
+
right_location=loc2,
|
|
342
|
+
reverse_complemented=f_index < 0,
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return AssemblySource(input=input_list, circular=is_circular)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
class RestrictionAndLigationSource(AssemblySource):
|
|
350
|
+
restriction_enzymes: list[AbstractCut]
|
|
351
|
+
|
|
352
|
+
TARGET_MODEL: ClassVar[Type[_RestrictionAndLigationSource]] = (
|
|
353
|
+
_RestrictionAndLigationSource
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
def _kwargs(self, seq_id: int) -> dict:
|
|
357
|
+
return {
|
|
358
|
+
**super()._kwargs(seq_id),
|
|
359
|
+
"restriction_enzymes": [str(enzyme) for enzyme in self.restriction_enzymes],
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class GibsonAssemblySource(AssemblySource):
|
|
364
|
+
TARGET_MODEL: ClassVar[Type[_GibsonAssemblySource]] = _GibsonAssemblySource
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
class InFusionSource(AssemblySource):
|
|
368
|
+
TARGET_MODEL: ClassVar[Type[_InFusionSource]] = _InFusionSource
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
class OverlapExtensionPCRLigationSource(AssemblySource):
|
|
372
|
+
TARGET_MODEL: ClassVar[Type[_OverlapExtensionPCRLigationSource]] = (
|
|
373
|
+
_OverlapExtensionPCRLigationSource
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class InVivoAssemblySource(AssemblySource):
|
|
378
|
+
TARGET_MODEL: ClassVar[Type[_InVivoAssemblySource]] = _InVivoAssemblySource
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class LigationSource(AssemblySource):
|
|
382
|
+
TARGET_MODEL: ClassVar[Type[_LigationSource]] = _LigationSource
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class GatewaySource(AssemblySource):
|
|
386
|
+
TARGET_MODEL: ClassVar[Type[_GatewaySource]] = _GatewaySource
|
|
387
|
+
reaction_type: GatewayReactionType
|
|
388
|
+
greedy: bool = Field(default=False)
|
|
389
|
+
|
|
390
|
+
def _kwargs(self, seq_id: int) -> dict:
|
|
391
|
+
return {
|
|
392
|
+
**super()._kwargs(seq_id),
|
|
393
|
+
"reaction_type": self.reaction_type,
|
|
394
|
+
"greedy": self.greedy,
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class HomologousRecombinationSource(AssemblySource):
|
|
399
|
+
TARGET_MODEL: ClassVar[Type[_HomologousRecombinationSource]] = (
|
|
400
|
+
_HomologousRecombinationSource
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class CRISPRSource(HomologousRecombinationSource):
|
|
405
|
+
TARGET_MODEL: ClassVar[Type[_CRISPRSource]] = _CRISPRSource
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class CreLoxRecombinationSource(AssemblySource):
|
|
409
|
+
TARGET_MODEL: ClassVar[Type[_CreLoxRecombinationSource]] = (
|
|
410
|
+
_CreLoxRecombinationSource
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class PCRSource(AssemblySource):
|
|
415
|
+
TARGET_MODEL: ClassVar[Type[_PCRSource]] = _PCRSource
|
|
416
|
+
add_primer_features: bool = Field(default=False)
|
|
417
|
+
|
|
418
|
+
def _kwargs(self, seq_id: int) -> dict:
|
|
419
|
+
return {
|
|
420
|
+
**super()._kwargs(seq_id),
|
|
421
|
+
"add_primer_features": self.add_primer_features,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class SequenceCutSource(Source):
|
|
426
|
+
left_edge: CutSiteType | None
|
|
427
|
+
right_edge: CutSiteType | None
|
|
428
|
+
|
|
429
|
+
BASE_MODEL: ClassVar[Type[_SequenceCutSource]] = _SequenceCutSource
|
|
430
|
+
ENZYME_MODEL: ClassVar[Type[_RestrictionEnzymeDigestionSource]] = (
|
|
431
|
+
_RestrictionEnzymeDigestionSource
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
@staticmethod
|
|
435
|
+
def _cutsite_to_model(cut_site: CutSiteType | None):
|
|
436
|
+
if cut_site is None:
|
|
437
|
+
return None
|
|
438
|
+
watson, overhang = cut_site[0]
|
|
439
|
+
enzyme_or_none = cut_site[1]
|
|
440
|
+
if isinstance(enzyme_or_none, AbstractCut):
|
|
441
|
+
return _RestrictionSequenceCut(
|
|
442
|
+
cut_watson=watson,
|
|
443
|
+
overhang=overhang,
|
|
444
|
+
restriction_enzyme=str(enzyme_or_none),
|
|
445
|
+
)
|
|
446
|
+
return _SequenceCut(cut_watson=watson, overhang=overhang)
|
|
447
|
+
|
|
448
|
+
@classmethod
|
|
449
|
+
def from_parent(
|
|
450
|
+
cls, parent: "Dseqrecord", left_edge: CutSiteType, right_edge: CutSiteType
|
|
451
|
+
):
|
|
452
|
+
return cls(
|
|
453
|
+
input=[SourceInput(sequence=parent)],
|
|
454
|
+
left_edge=left_edge,
|
|
455
|
+
right_edge=right_edge,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def _has_enzyme(self) -> bool:
|
|
459
|
+
def has_enzyme(edge):
|
|
460
|
+
return edge is not None and isinstance(edge[1], AbstractCut)
|
|
461
|
+
|
|
462
|
+
return has_enzyme(self.left_edge) or has_enzyme(self.right_edge)
|
|
463
|
+
|
|
464
|
+
def _target_model(self):
|
|
465
|
+
return self.ENZYME_MODEL if self._has_enzyme() else self.BASE_MODEL
|
|
466
|
+
|
|
467
|
+
def _kwargs(self, seq_id: int) -> dict:
|
|
468
|
+
return {
|
|
469
|
+
**super()._kwargs(seq_id),
|
|
470
|
+
"left_edge": self._cutsite_to_model(self.left_edge),
|
|
471
|
+
"right_edge": self._cutsite_to_model(self.right_edge),
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
def to_pydantic_model(self, seq_id: int):
|
|
475
|
+
return self._target_model()(**self._kwargs(seq_id))
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
class CloningStrategy(_BaseCloningStrategy):
|
|
479
|
+
|
|
480
|
+
# For now, we don't add anything, but the classes will not have the new
|
|
481
|
+
# methods if this is used
|
|
482
|
+
# It will be used for validation for now
|
|
483
|
+
primers: Optional[List[PrimerModel]] = Field(
|
|
484
|
+
default_factory=list,
|
|
485
|
+
description="""The primers that are used in the cloning strategy""",
|
|
486
|
+
json_schema_extra={
|
|
487
|
+
"linkml_meta": {"alias": "primers", "domain_of": ["CloningStrategy"]}
|
|
488
|
+
},
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
def add_primer(self, primer: "Primer"):
|
|
492
|
+
existing_ids = {seq.id for seq in self.primers}
|
|
493
|
+
if get_id(primer) in existing_ids:
|
|
494
|
+
return
|
|
495
|
+
self.primers.append(PrimerModel.from_primer(primer))
|
|
496
|
+
|
|
497
|
+
def add_dseqrecord(self, dseqr: "Dseqrecord"):
|
|
498
|
+
from pydna.dseqrecord import Dseqrecord
|
|
499
|
+
|
|
500
|
+
existing_ids = {seq.id for seq in self.sequences}
|
|
501
|
+
if get_id(dseqr) in existing_ids:
|
|
502
|
+
return
|
|
503
|
+
self.sequences.append(TextFileSequence.from_dseqrecord(dseqr))
|
|
504
|
+
if dseqr.source is not None:
|
|
505
|
+
self.sources.append(dseqr.source.to_pydantic_model(get_id(dseqr)))
|
|
506
|
+
this_source: Source = dseqr.source
|
|
507
|
+
for source_input in this_source.input:
|
|
508
|
+
if isinstance(source_input.sequence, Dseqrecord):
|
|
509
|
+
self.add_dseqrecord(source_input.sequence)
|
|
510
|
+
else:
|
|
511
|
+
self.add_primer(source_input.sequence)
|
|
512
|
+
else:
|
|
513
|
+
self.sources.append(
|
|
514
|
+
_ManuallyTypedSource(id=get_id(dseqr), input=[], user_input="A")
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
def reassign_ids(self):
|
|
518
|
+
all_ids = (
|
|
519
|
+
{seq.id for seq in self.sequences}
|
|
520
|
+
| {source.id for source in self.sources}
|
|
521
|
+
| {primer.id for primer in self.primers}
|
|
522
|
+
)
|
|
523
|
+
id_mappings = {id: i + 1 for i, id in enumerate(sorted(all_ids))}
|
|
524
|
+
for seq in self.sequences:
|
|
525
|
+
seq.id = id_mappings[seq.id]
|
|
526
|
+
for primer in self.primers:
|
|
527
|
+
primer.id = id_mappings[primer.id]
|
|
528
|
+
for source in self.sources:
|
|
529
|
+
source.id = id_mappings[source.id]
|
|
530
|
+
for assembly_fragment in source.input:
|
|
531
|
+
assembly_fragment.sequence = id_mappings[assembly_fragment.sequence]
|
|
532
|
+
|
|
533
|
+
@classmethod
|
|
534
|
+
def from_dseqrecords(cls, dseqrs: list["Dseqrecord"], description: str = ""):
|
|
535
|
+
cloning_strategy = cls(sources=[], sequences=[], description=description)
|
|
536
|
+
for dseqr in dseqrs:
|
|
537
|
+
cloning_strategy.add_dseqrecord(dseqr)
|
|
538
|
+
return cloning_strategy
|
|
539
|
+
|
|
540
|
+
def model_dump_json(self, *args, **kwargs):
|
|
541
|
+
if getattr(_thread_local, "use_python_internal_id", True):
|
|
542
|
+
# Make a deep copy of the cloning strategy and reassign ids
|
|
543
|
+
cs = self.__deepcopy__()
|
|
544
|
+
cs.reassign_ids()
|
|
545
|
+
return super(CloningStrategy, cs).model_dump_json(*args, **kwargs)
|
|
546
|
+
return super().model_dump_json(*args, **kwargs)
|
|
547
|
+
|
|
548
|
+
def model_dump(self, *args, **kwargs):
|
|
549
|
+
if getattr(_thread_local, "use_python_internal_id", True):
|
|
550
|
+
cs = self.__deepcopy__()
|
|
551
|
+
cs.reassign_ids()
|
|
552
|
+
return super(CloningStrategy, cs).model_dump(*args, **kwargs)
|
|
553
|
+
return super().model_dump(*args, **kwargs)
|
pydna/types.py
CHANGED
|
@@ -12,8 +12,11 @@ from typing import (
|
|
|
12
12
|
Callable as _Callable,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
+
# Import AbstractCut at runtime for CutSiteType
|
|
16
|
+
from Bio.Restriction.Restriction import AbstractCut as _AbstractCut
|
|
17
|
+
from pydna.crispr import _cas as __cas
|
|
18
|
+
|
|
15
19
|
if TYPE_CHECKING:
|
|
16
|
-
from Bio.Restriction import AbstractCut as _AbstractCut
|
|
17
20
|
from Bio.Restriction import RestrictionBatch as _RestrictionBatch
|
|
18
21
|
from pydna.dseq import Dseq
|
|
19
22
|
from Bio.SeqFeature import Location as _Location
|
|
@@ -25,7 +28,7 @@ DseqType = _TypeVar("DseqType", bound="Dseq")
|
|
|
25
28
|
EnzymesType = _TypeVar(
|
|
26
29
|
"EnzymesType", "_RestrictionBatch", _Iterable["_AbstractCut"], "_AbstractCut"
|
|
27
30
|
)
|
|
28
|
-
CutSiteType = _Tuple[_Tuple[int, int], _Union[
|
|
31
|
+
CutSiteType = _Tuple[_Tuple[int, int], _Union[_AbstractCut, None, __cas]]
|
|
29
32
|
AssemblyEdgeType = _Tuple[int, int, "_Location | None", "_Location | None"]
|
|
30
33
|
AssemblySubFragmentType = _Tuple[int, "_Location | None", "_Location | None"]
|
|
31
34
|
EdgeRepresentationAssembly = list[AssemblyEdgeType]
|