pydna 5.5.3__py3-none-any.whl → 5.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/dseqrecord.py CHANGED
@@ -35,6 +35,11 @@ import os as _os
35
35
  import re as _re
36
36
  import time as _time
37
37
  import datetime as _datetime
38
+ from typing import Union, TYPE_CHECKING
39
+ from pydna.opencloning_models import SequenceCutSource
40
+
41
+ if TYPE_CHECKING: # pragma: no cover
42
+ from pydna.opencloning_models import Source
38
43
 
39
44
 
40
45
  # import logging as _logging
@@ -128,6 +133,7 @@ class Dseqrecord(_SeqRecord):
128
133
  """
129
134
 
130
135
  seq: _Dseq
136
+ source: Union["Source", None] = None
131
137
 
132
138
  def __init__(
133
139
  self,
@@ -135,6 +141,7 @@ class Dseqrecord(_SeqRecord):
135
141
  *args,
136
142
  circular=None,
137
143
  n=5e-14, # mol ( = 0.05 pmol)
144
+ source=None,
138
145
  **kwargs,
139
146
  ):
140
147
  # _module_logger.info("### Dseqrecord initialized ###")
@@ -202,6 +209,7 @@ class Dseqrecord(_SeqRecord):
202
209
  self.map_target = None
203
210
  self.n = n # amount, set to 5E-14 which is 5 pmols
204
211
  self.annotations.update({"molecule_type": "DNA"})
212
+ self.source = source
205
213
 
206
214
  @classmethod
207
215
  def from_string(
@@ -256,6 +264,7 @@ class Dseqrecord(_SeqRecord):
256
264
  obj.features = record.features
257
265
  obj.map_target = None
258
266
  obj.n = n
267
+ obj.source = None
259
268
  if circular is None:
260
269
  circular = record.annotations.get("topology") == "circular"
261
270
  obj.seq = _Dseq.quick(
@@ -875,7 +884,11 @@ class Dseqrecord(_SeqRecord):
875
884
  def __eq__(self, other):
876
885
  """docstring."""
877
886
  try:
878
- if self.seq == other.seq and str(self.__dict__) == str(other.__dict__):
887
+ this_dict = self.__dict__.copy()
888
+ other_dict = other.__dict__.copy()
889
+ del this_dict["source"]
890
+ del other_dict["source"]
891
+ if self.seq == other.seq and str(this_dict) == str(other_dict):
879
892
  return True
880
893
  except AttributeError:
881
894
  pass
@@ -1419,4 +1432,39 @@ class Dseqrecord(_SeqRecord):
1419
1432
  right_edge = right_watson if right_ovhg > 0 else right_crick
1420
1433
  features = self[left_edge:right_edge].features
1421
1434
 
1422
- return Dseqrecord(dseq, features=features)
1435
+ # This will need to be generalised to all types of cuts
1436
+ source = SequenceCutSource.from_parent(self, left_cut, right_cut)
1437
+ return Dseqrecord(dseq, features=features, source=source)
1438
+
1439
+ def history(self):
1440
+ """
1441
+ Returns a string representation of the cloning history of the sequence.
1442
+ Returns an empty string if the sequence has no source.
1443
+
1444
+ Check the documentation notebooks for extensive examples.
1445
+
1446
+ Returns
1447
+ -------
1448
+ str: A string representation of the cloning history of the sequence.
1449
+
1450
+ Examples
1451
+ --------
1452
+ >>> from pydna.dseqrecord import Dseqrecord
1453
+ >>> from pydna.assembly2 import gibson_assembly
1454
+ >>> fragments = [
1455
+ ... Dseqrecord("TTTTacgatAAtgctccCCCC", circular=False, name="fragment1"),
1456
+ ... Dseqrecord("CCCCtcatGGGG", circular=False, name="fragment2"),
1457
+ ... Dseqrecord("GGGGatataTTTT", circular=False, name="fragment3"),
1458
+ ... ]
1459
+ >>> product, *_ = gibson_assembly(fragments, limit=4)
1460
+ >>> product.name = "product_name"
1461
+ >>> print(product.history())
1462
+ ╙── product_name (Dseqrecord(o34))
1463
+ └─╼ GibsonAssemblySource
1464
+ ├─╼ fragment1 (Dseqrecord(-21))
1465
+ ├─╼ fragment2 (Dseqrecord(-12))
1466
+ └─╼ fragment3 (Dseqrecord(-13))
1467
+ """
1468
+ if self.source is None:
1469
+ return ""
1470
+ return self.source.history_string(self)
@@ -0,0 +1,553 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ This module provides classes that roughly map to the `OpenCloning <https://opencloning.org>`_
4
+ data model, which is defined using `LinkML <https://linkml.io>`, and available as a python
5
+ package `opencloning-linkml <https://pypi.org/project/opencloning-linkml/>`_. These classes
6
+ are documented there, and the ones in this module essentially replace the fields pointing to
7
+ sequences and primers (which use ids in the data model) to ``Dseqrecord`` and ``Primer``
8
+ objects, respectively. Similarly, it uses Location from ``Biopython`` instead of a string,
9
+ which is what the data model uses.
10
+
11
+ When using pydna to plan cloning, it stores the provenance of ``Dseqrecord`` objects in
12
+ their ``source`` attribute. Not all methods generate sources so far, so refer to the
13
+ documentation notebooks for examples on how to use this feature. The ``history`` method of
14
+ ``Dseqrecord`` objects can be used to get a string representation of the provenance of the
15
+ sequence. You can also use the ``CloningStrategy`` class to create a JSON representation of
16
+ the cloning strategy. That ``CloningStrategy`` can be loaded in the OpenCloning web interface
17
+ to see a representation of the cloning strategy.
18
+
19
+ """
20
+ from __future__ import annotations
21
+
22
+ from typing import Optional, Union, Any, ClassVar, Type
23
+ from pydantic_core import core_schema
24
+ from contextlib import contextmanager
25
+ from threading import local
26
+
27
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
28
+
29
+ from opencloning_linkml.datamodel import (
30
+ CloningStrategy as _BaseCloningStrategy,
31
+ Primer as _PrimerModel,
32
+ Source as _Source,
33
+ TextFileSequence as _TextFileSequence,
34
+ AssemblySource as _AssemblySource,
35
+ SourceInput as _SourceInput,
36
+ AssemblyFragment as _AssemblyFragment,
37
+ ManuallyTypedSource as _ManuallyTypedSource,
38
+ RestrictionAndLigationSource as _RestrictionAndLigationSource,
39
+ GibsonAssemblySource as _GibsonAssemblySource,
40
+ RestrictionEnzymeDigestionSource as _RestrictionEnzymeDigestionSource,
41
+ SequenceCutSource as _SequenceCutSource,
42
+ RestrictionSequenceCut as _RestrictionSequenceCut,
43
+ SequenceCut as _SequenceCut,
44
+ InFusionSource as _InFusionSource,
45
+ OverlapExtensionPCRLigationSource as _OverlapExtensionPCRLigationSource,
46
+ InVivoAssemblySource as _InVivoAssemblySource,
47
+ LigationSource as _LigationSource,
48
+ GatewaySource as _GatewaySource,
49
+ GatewayReactionType,
50
+ HomologousRecombinationSource as _HomologousRecombinationSource,
51
+ CreLoxRecombinationSource as _CreLoxRecombinationSource,
52
+ PCRSource as _PCRSource,
53
+ CRISPRSource as _CRISPRSource,
54
+ )
55
+ from Bio.SeqFeature import Location, LocationParserError
56
+ from Bio.Restriction.Restriction import AbstractCut
57
+ import networkx as nx
58
+ from typing import List
59
+
60
+ from Bio.SeqIO.InsdcIO import _insdc_location_string as format_feature_location
61
+
62
+ from pydna.types import CutSiteType, SubFragmentRepresentationAssembly
63
+ from pydna.utils import create_location
64
+ from typing import TYPE_CHECKING
65
+
66
+ if TYPE_CHECKING: # pragma: no cover
67
+ from pydna.dseqrecord import Dseqrecord
68
+ from pydna.primer import Primer
69
+
70
+
71
+ # Thread-local storage for ID strategy
72
+ _thread_local = local()
73
+
74
+
75
+ @contextmanager
76
+ def id_mode(use_python_internal_id: bool = True):
77
+ """Context manager that is used to determine how ids are assigned to objects when
78
+ mapping them to the OpenCloning data model. If ``use_python_internal_id`` is True,
79
+ the built-in python ``id()`` function is used to assign ids to objects. That function
80
+ produces a unique integer for each object in python, so it's guaranteed to be unique.
81
+ If ``use_python_internal_id`` is False, the object's ``.id`` attribute (must be a string integer)
82
+ is used to assign ids to objects. This is useful when the objects already have meaningful ids,
83
+ and you want to keep references to them in ``SourceInput`` objects (which sequences and
84
+ primers are used in a particular source).
85
+
86
+ Parameters
87
+ ----------
88
+ use_python_internal_id: bool
89
+ If True, use Python's built-in id() function.
90
+ If False, use the object's .id attribute (must be a string integer).
91
+
92
+ Examples
93
+ --------
94
+ >>> from pydna.dseqrecord import Dseqrecord
95
+ >>> from pydna.opencloning_models import get_id, id_mode
96
+ >>> dseqr = Dseqrecord("ATGC")
97
+ >>> dseqr.name = "my_sequence"
98
+ >>> dseqr.id = "123"
99
+ >>> get_id(dseqr) == id(dseqr)
100
+ True
101
+ >>> with id_mode(use_python_internal_id=False):
102
+ ... get_id(dseqr)
103
+ 123
104
+ """
105
+ old_value = getattr(_thread_local, "use_python_internal_id", True)
106
+ _thread_local.use_python_internal_id = use_python_internal_id
107
+ try:
108
+ yield
109
+ finally:
110
+ _thread_local.use_python_internal_id = old_value
111
+
112
+
113
+ def get_id(obj: "Primer" | "Dseqrecord") -> int:
114
+ """Get ID using the current strategy from thread-local storage (see id_mode)
115
+ Parameters
116
+ ----------
117
+ obj: Primer | Dseqrecord
118
+ The object to get the id of
119
+
120
+ Returns
121
+ -------
122
+ int: The id of the object
123
+
124
+ """
125
+ use_python_internal_id = getattr(_thread_local, "use_python_internal_id", True)
126
+ if use_python_internal_id:
127
+ return id(obj)
128
+ if not isinstance(obj.id, str) or not obj.id.isdigit():
129
+ raise ValueError(
130
+ f"If use_python_internal_id is False, id must be a string representing an integer, "
131
+ f"but object {obj} has an invalid id: {obj.id}"
132
+ )
133
+ return int(obj.id)
134
+
135
+
136
+ class SequenceLocationStr(str):
137
+ """A string representation of a sequence location, genbank-like."""
138
+
139
+ # TODO: this should handle origin-spanning simple locations (splitted)
140
+ @classmethod
141
+ def from_biopython_location(cls, location: Location):
142
+ return cls(format_feature_location(location, None))
143
+
144
+ def to_biopython_location(self) -> Location:
145
+ return Location.fromstring(self)
146
+
147
+ @classmethod
148
+ def field_validator(cls, v):
149
+ if isinstance(v, str):
150
+ value = cls(v)
151
+ try:
152
+ value.to_biopython_location()
153
+ except LocationParserError as err:
154
+ raise ValueError(f"Location {v!r} is not a valid location") from err
155
+ return value
156
+ raise ValueError(f"Location must be a string or a {cls.__name__}")
157
+
158
+ @classmethod
159
+ def __get_pydantic_core_schema__(
160
+ cls,
161
+ source_type,
162
+ handler,
163
+ ) -> core_schema.CoreSchema:
164
+ """Generate Pydantic core schema for SequenceLocationStr."""
165
+ return core_schema.with_info_after_validator_function(
166
+ cls._validate,
167
+ core_schema.str_schema(),
168
+ )
169
+
170
+ @classmethod
171
+ def _validate(cls, value: str, info):
172
+ """Validate and create SequenceLocationStr instance."""
173
+ return cls.field_validator(value)
174
+
175
+ @classmethod
176
+ def from_start_and_end(
177
+ cls, start: int, end: int, seq_len: int | None = None, strand: int | None = 1
178
+ ):
179
+ return cls.from_biopython_location(create_location(start, end, seq_len, strand))
180
+
181
+
182
+ class ConfiguredBaseModel(BaseModel):
183
+ model_config = ConfigDict(
184
+ validate_assignment=True,
185
+ validate_default=True,
186
+ extra="forbid",
187
+ arbitrary_types_allowed=True,
188
+ use_enum_values=True,
189
+ strict=False,
190
+ )
191
+ pass
192
+
193
+
194
+ class TextFileSequence(_TextFileSequence):
195
+
196
+ @classmethod
197
+ def from_dseqrecord(cls, dseqr: "Dseqrecord"):
198
+ return cls(
199
+ id=get_id(dseqr),
200
+ sequence_file_format="genbank",
201
+ overhang_crick_3prime=dseqr.seq.ovhg,
202
+ overhang_watson_3prime=dseqr.seq.watson_ovhg(),
203
+ file_content=dseqr.format("genbank"),
204
+ )
205
+
206
+
207
+ class PrimerModel(_PrimerModel):
208
+
209
+ @classmethod
210
+ def from_primer(cls, primer: "Primer"):
211
+ return cls(
212
+ id=get_id(primer),
213
+ name=primer.name,
214
+ sequence=str(primer.seq),
215
+ )
216
+
217
+
218
+ class SourceInput(ConfiguredBaseModel):
219
+ sequence: object
220
+
221
+ @field_validator("sequence")
222
+ @classmethod
223
+ def _validate_sequence_field(cls, value: Any):
224
+ """Separate validation to avoid circular imports."""
225
+
226
+ from pydna.dseqrecord import Dseqrecord
227
+ from pydna.primer import Primer
228
+
229
+ if isinstance(value, (Dseqrecord, Primer)):
230
+ return value
231
+ module = type(value).__module__
232
+ name = type(value).__name__
233
+ raise TypeError(f"sequence must be Dseqrecord or Primer; got {module}.{name}")
234
+
235
+ def to_pydantic_model(self) -> _SourceInput:
236
+ return _SourceInput(sequence=get_id(self.sequence))
237
+
238
+
239
+ class AssemblyFragment(SourceInput):
240
+
241
+ left_location: Optional[Location] = Field(default=None)
242
+ right_location: Optional[Location] = Field(default=None)
243
+ reverse_complemented: bool
244
+
245
+ @staticmethod
246
+ def from_biopython_location(location: Location | None):
247
+ if location is None:
248
+ return None
249
+ return SequenceLocationStr.from_biopython_location(location)
250
+
251
+ def to_pydantic_model(self) -> _AssemblyFragment:
252
+ return _AssemblyFragment(
253
+ sequence=get_id(self.sequence),
254
+ left_location=self.from_biopython_location(self.left_location),
255
+ right_location=self.from_biopython_location(self.right_location),
256
+ reverse_complemented=self.reverse_complemented,
257
+ )
258
+
259
+
260
+ class Source(ConfiguredBaseModel):
261
+ input: list[Union[SourceInput, AssemblyFragment]] = Field(default_factory=list)
262
+ TARGET_MODEL: ClassVar[Type[_Source]] = _Source
263
+
264
+ def input_models(self):
265
+ return [fragment.to_pydantic_model() for fragment in self.input]
266
+
267
+ def _kwargs(self, seq_id: int) -> dict:
268
+ return {
269
+ "id": seq_id,
270
+ "input": self.input_models(),
271
+ }
272
+
273
+ def to_pydantic_model(self, seq_id: int):
274
+ kwargs = self._kwargs(seq_id)
275
+ return self.TARGET_MODEL(**kwargs)
276
+
277
+ def add_to_history_graph(self, history_graph: nx.DiGraph, seq: "Dseqrecord"):
278
+ """
279
+ Add the source to the history graph.
280
+
281
+ It does not use the get_id function, because it just uses it to have unique identifiers
282
+ for graph nodes, not to store them anywhere.
283
+ """
284
+ from pydna.dseqrecord import Dseqrecord
285
+
286
+ history_graph.add_node(id(seq), label=f"{seq.name} ({repr(seq)})")
287
+ history_graph.add_node(id(self), label=str(self.TARGET_MODEL.__name__))
288
+ history_graph.add_edge(id(seq), id(self))
289
+ for fragment in self.input:
290
+ fragment_seq = fragment.sequence
291
+ # This could be a Primer as well, which doesn't have a source
292
+ if isinstance(fragment_seq, Dseqrecord) and fragment_seq.source is not None:
293
+ fragment_seq.source.add_to_history_graph(history_graph, fragment_seq)
294
+ else:
295
+ history_graph.add_node(
296
+ id(fragment_seq),
297
+ label=f"{fragment_seq.name} ({repr(fragment_seq)})",
298
+ )
299
+ history_graph.add_edge(id(self), id(fragment_seq))
300
+
301
+ def history_string(self, seq: "Dseqrecord"):
302
+ """
303
+ Returns a string representation of the cloning history of the sequence.
304
+ See dseqrecord.history() for examples.
305
+ """
306
+ history_graph = nx.DiGraph()
307
+ self.add_to_history_graph(history_graph, seq)
308
+ return "\n".join(
309
+ nx.generate_network_text(history_graph, with_labels=True, sources=[id(seq)])
310
+ )
311
+
312
+
313
+ class AssemblySource(Source):
314
+ circular: bool
315
+
316
+ TARGET_MODEL: ClassVar[Type[_AssemblySource]] = _AssemblySource
317
+
318
+ def _kwargs(self, seq_id: int) -> dict:
319
+ return {
320
+ **super()._kwargs(seq_id),
321
+ "circular": self.circular,
322
+ }
323
+
324
+ def to_pydantic_model(self, seq_id: int):
325
+ return self.TARGET_MODEL(**self._kwargs(seq_id))
326
+
327
+ @classmethod
328
+ def from_subfragment_representation(
329
+ cls,
330
+ assembly: SubFragmentRepresentationAssembly,
331
+ fragments: list["Dseqrecord"],
332
+ is_circular: bool,
333
+ ):
334
+
335
+ input_list = []
336
+ for f_index, loc1, loc2 in assembly:
337
+ input_list.append(
338
+ AssemblyFragment(
339
+ sequence=fragments[abs(f_index) - 1],
340
+ left_location=loc1,
341
+ right_location=loc2,
342
+ reverse_complemented=f_index < 0,
343
+ )
344
+ )
345
+
346
+ return AssemblySource(input=input_list, circular=is_circular)
347
+
348
+
349
+ class RestrictionAndLigationSource(AssemblySource):
350
+ restriction_enzymes: list[AbstractCut]
351
+
352
+ TARGET_MODEL: ClassVar[Type[_RestrictionAndLigationSource]] = (
353
+ _RestrictionAndLigationSource
354
+ )
355
+
356
+ def _kwargs(self, seq_id: int) -> dict:
357
+ return {
358
+ **super()._kwargs(seq_id),
359
+ "restriction_enzymes": [str(enzyme) for enzyme in self.restriction_enzymes],
360
+ }
361
+
362
+
363
+ class GibsonAssemblySource(AssemblySource):
364
+ TARGET_MODEL: ClassVar[Type[_GibsonAssemblySource]] = _GibsonAssemblySource
365
+
366
+
367
+ class InFusionSource(AssemblySource):
368
+ TARGET_MODEL: ClassVar[Type[_InFusionSource]] = _InFusionSource
369
+
370
+
371
+ class OverlapExtensionPCRLigationSource(AssemblySource):
372
+ TARGET_MODEL: ClassVar[Type[_OverlapExtensionPCRLigationSource]] = (
373
+ _OverlapExtensionPCRLigationSource
374
+ )
375
+
376
+
377
+ class InVivoAssemblySource(AssemblySource):
378
+ TARGET_MODEL: ClassVar[Type[_InVivoAssemblySource]] = _InVivoAssemblySource
379
+
380
+
381
+ class LigationSource(AssemblySource):
382
+ TARGET_MODEL: ClassVar[Type[_LigationSource]] = _LigationSource
383
+
384
+
385
+ class GatewaySource(AssemblySource):
386
+ TARGET_MODEL: ClassVar[Type[_GatewaySource]] = _GatewaySource
387
+ reaction_type: GatewayReactionType
388
+ greedy: bool = Field(default=False)
389
+
390
+ def _kwargs(self, seq_id: int) -> dict:
391
+ return {
392
+ **super()._kwargs(seq_id),
393
+ "reaction_type": self.reaction_type,
394
+ "greedy": self.greedy,
395
+ }
396
+
397
+
398
+ class HomologousRecombinationSource(AssemblySource):
399
+ TARGET_MODEL: ClassVar[Type[_HomologousRecombinationSource]] = (
400
+ _HomologousRecombinationSource
401
+ )
402
+
403
+
404
+ class CRISPRSource(HomologousRecombinationSource):
405
+ TARGET_MODEL: ClassVar[Type[_CRISPRSource]] = _CRISPRSource
406
+
407
+
408
+ class CreLoxRecombinationSource(AssemblySource):
409
+ TARGET_MODEL: ClassVar[Type[_CreLoxRecombinationSource]] = (
410
+ _CreLoxRecombinationSource
411
+ )
412
+
413
+
414
+ class PCRSource(AssemblySource):
415
+ TARGET_MODEL: ClassVar[Type[_PCRSource]] = _PCRSource
416
+ add_primer_features: bool = Field(default=False)
417
+
418
+ def _kwargs(self, seq_id: int) -> dict:
419
+ return {
420
+ **super()._kwargs(seq_id),
421
+ "add_primer_features": self.add_primer_features,
422
+ }
423
+
424
+
425
+ class SequenceCutSource(Source):
426
+ left_edge: CutSiteType | None
427
+ right_edge: CutSiteType | None
428
+
429
+ BASE_MODEL: ClassVar[Type[_SequenceCutSource]] = _SequenceCutSource
430
+ ENZYME_MODEL: ClassVar[Type[_RestrictionEnzymeDigestionSource]] = (
431
+ _RestrictionEnzymeDigestionSource
432
+ )
433
+
434
+ @staticmethod
435
+ def _cutsite_to_model(cut_site: CutSiteType | None):
436
+ if cut_site is None:
437
+ return None
438
+ watson, overhang = cut_site[0]
439
+ enzyme_or_none = cut_site[1]
440
+ if isinstance(enzyme_or_none, AbstractCut):
441
+ return _RestrictionSequenceCut(
442
+ cut_watson=watson,
443
+ overhang=overhang,
444
+ restriction_enzyme=str(enzyme_or_none),
445
+ )
446
+ return _SequenceCut(cut_watson=watson, overhang=overhang)
447
+
448
+ @classmethod
449
+ def from_parent(
450
+ cls, parent: "Dseqrecord", left_edge: CutSiteType, right_edge: CutSiteType
451
+ ):
452
+ return cls(
453
+ input=[SourceInput(sequence=parent)],
454
+ left_edge=left_edge,
455
+ right_edge=right_edge,
456
+ )
457
+
458
+ def _has_enzyme(self) -> bool:
459
+ def has_enzyme(edge):
460
+ return edge is not None and isinstance(edge[1], AbstractCut)
461
+
462
+ return has_enzyme(self.left_edge) or has_enzyme(self.right_edge)
463
+
464
+ def _target_model(self):
465
+ return self.ENZYME_MODEL if self._has_enzyme() else self.BASE_MODEL
466
+
467
+ def _kwargs(self, seq_id: int) -> dict:
468
+ return {
469
+ **super()._kwargs(seq_id),
470
+ "left_edge": self._cutsite_to_model(self.left_edge),
471
+ "right_edge": self._cutsite_to_model(self.right_edge),
472
+ }
473
+
474
+ def to_pydantic_model(self, seq_id: int):
475
+ return self._target_model()(**self._kwargs(seq_id))
476
+
477
+
478
+ class CloningStrategy(_BaseCloningStrategy):
479
+
480
+ # For now, we don't add anything, but the classes will not have the new
481
+ # methods if this is used
482
+ # It will be used for validation for now
483
+ primers: Optional[List[PrimerModel]] = Field(
484
+ default_factory=list,
485
+ description="""The primers that are used in the cloning strategy""",
486
+ json_schema_extra={
487
+ "linkml_meta": {"alias": "primers", "domain_of": ["CloningStrategy"]}
488
+ },
489
+ )
490
+
491
+ def add_primer(self, primer: "Primer"):
492
+ existing_ids = {seq.id for seq in self.primers}
493
+ if get_id(primer) in existing_ids:
494
+ return
495
+ self.primers.append(PrimerModel.from_primer(primer))
496
+
497
+ def add_dseqrecord(self, dseqr: "Dseqrecord"):
498
+ from pydna.dseqrecord import Dseqrecord
499
+
500
+ existing_ids = {seq.id for seq in self.sequences}
501
+ if get_id(dseqr) in existing_ids:
502
+ return
503
+ self.sequences.append(TextFileSequence.from_dseqrecord(dseqr))
504
+ if dseqr.source is not None:
505
+ self.sources.append(dseqr.source.to_pydantic_model(get_id(dseqr)))
506
+ this_source: Source = dseqr.source
507
+ for source_input in this_source.input:
508
+ if isinstance(source_input.sequence, Dseqrecord):
509
+ self.add_dseqrecord(source_input.sequence)
510
+ else:
511
+ self.add_primer(source_input.sequence)
512
+ else:
513
+ self.sources.append(
514
+ _ManuallyTypedSource(id=get_id(dseqr), input=[], user_input="A")
515
+ )
516
+
517
+ def reassign_ids(self):
518
+ all_ids = (
519
+ {seq.id for seq in self.sequences}
520
+ | {source.id for source in self.sources}
521
+ | {primer.id for primer in self.primers}
522
+ )
523
+ id_mappings = {id: i + 1 for i, id in enumerate(sorted(all_ids))}
524
+ for seq in self.sequences:
525
+ seq.id = id_mappings[seq.id]
526
+ for primer in self.primers:
527
+ primer.id = id_mappings[primer.id]
528
+ for source in self.sources:
529
+ source.id = id_mappings[source.id]
530
+ for assembly_fragment in source.input:
531
+ assembly_fragment.sequence = id_mappings[assembly_fragment.sequence]
532
+
533
+ @classmethod
534
+ def from_dseqrecords(cls, dseqrs: list["Dseqrecord"], description: str = ""):
535
+ cloning_strategy = cls(sources=[], sequences=[], description=description)
536
+ for dseqr in dseqrs:
537
+ cloning_strategy.add_dseqrecord(dseqr)
538
+ return cloning_strategy
539
+
540
+ def model_dump_json(self, *args, **kwargs):
541
+ if getattr(_thread_local, "use_python_internal_id", True):
542
+ # Make a deep copy of the cloning strategy and reassign ids
543
+ cs = self.__deepcopy__()
544
+ cs.reassign_ids()
545
+ return super(CloningStrategy, cs).model_dump_json(*args, **kwargs)
546
+ return super().model_dump_json(*args, **kwargs)
547
+
548
+ def model_dump(self, *args, **kwargs):
549
+ if getattr(_thread_local, "use_python_internal_id", True):
550
+ cs = self.__deepcopy__()
551
+ cs.reassign_ids()
552
+ return super(CloningStrategy, cs).model_dump(*args, **kwargs)
553
+ return super().model_dump(*args, **kwargs)
pydna/types.py CHANGED
@@ -12,8 +12,11 @@ from typing import (
12
12
  Callable as _Callable,
13
13
  )
14
14
 
15
+ # Import AbstractCut at runtime for CutSiteType
16
+ from Bio.Restriction.Restriction import AbstractCut as _AbstractCut
17
+ from pydna.crispr import _cas as __cas
18
+
15
19
  if TYPE_CHECKING:
16
- from Bio.Restriction import AbstractCut as _AbstractCut
17
20
  from Bio.Restriction import RestrictionBatch as _RestrictionBatch
18
21
  from pydna.dseq import Dseq
19
22
  from Bio.SeqFeature import Location as _Location
@@ -25,7 +28,7 @@ DseqType = _TypeVar("DseqType", bound="Dseq")
25
28
  EnzymesType = _TypeVar(
26
29
  "EnzymesType", "_RestrictionBatch", _Iterable["_AbstractCut"], "_AbstractCut"
27
30
  )
28
- CutSiteType = _Tuple[_Tuple[int, int], _Union["_AbstractCut", None]]
31
+ CutSiteType = _Tuple[_Tuple[int, int], _Union[_AbstractCut, None, __cas]]
29
32
  AssemblyEdgeType = _Tuple[int, int, "_Location | None", "_Location | None"]
30
33
  AssemblySubFragmentType = _Tuple[int, "_Location | None", "_Location | None"]
31
34
  EdgeRepresentationAssembly = list[AssemblyEdgeType]