opencloning 0.4.7__py3-none-any.whl → 0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencloning/app_settings.py +7 -0
- opencloning/batch_cloning/pombe/__init__.py +2 -2
- opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
- opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
- opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
- opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
- opencloning/bug_fixing/backend_v0_3.py +13 -5
- opencloning/catalogs/__init__.py +36 -0
- opencloning/catalogs/igem2024.yaml +2172 -0
- opencloning/catalogs/openDNA_collections.yaml +1161 -0
- opencloning/catalogs/readme.txt +1 -0
- opencloning/catalogs/seva.tsv +231 -0
- opencloning/catalogs/snapgene.yaml +2837 -0
- opencloning/dna_functions.py +155 -158
- opencloning/dna_utils.py +45 -62
- opencloning/ebic/primer_design.py +24 -14
- opencloning/endpoints/annotation.py +9 -13
- opencloning/endpoints/assembly.py +157 -378
- opencloning/endpoints/endpoint_utils.py +52 -0
- opencloning/endpoints/external_import.py +169 -124
- opencloning/endpoints/no_assembly.py +23 -39
- opencloning/endpoints/no_input.py +32 -47
- opencloning/endpoints/other.py +1 -1
- opencloning/endpoints/primer_design.py +23 -17
- opencloning/http_client.py +2 -2
- opencloning/ncbi_requests.py +113 -47
- opencloning/primer3_functions.py +3 -3
- opencloning/primer_design.py +1 -1
- opencloning/pydantic_models.py +10 -510
- opencloning/request_examples.py +10 -22
- opencloning/temp_functions.py +50 -0
- {opencloning-0.4.7.dist-info → opencloning-0.5.dist-info}/METADATA +18 -8
- opencloning-0.5.dist-info/RECORD +51 -0
- {opencloning-0.4.7.dist-info → opencloning-0.5.dist-info}/WHEEL +1 -1
- opencloning/cre_lox.py +0 -116
- opencloning/gateway.py +0 -154
- opencloning-0.4.7.dist-info/RECORD +0 -45
- {opencloning-0.4.7.dist-info → opencloning-0.5.dist-info}/licenses/LICENSE +0 -0
opencloning/pydantic_models.py
CHANGED
|
@@ -1,516 +1,16 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field,
|
|
2
|
-
from typing import Optional, List
|
|
3
|
-
|
|
1
|
+
from pydantic import BaseModel, Field, field_validator
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
|
|
4
4
|
from ._version import __version__
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
Location,
|
|
9
|
-
SimpleLocation,
|
|
10
|
-
FeatureLocation as BioFeatureLocation,
|
|
11
|
-
LocationParserError,
|
|
12
|
-
)
|
|
13
|
-
from Bio.SeqIO.InsdcIO import _insdc_location_string as format_feature_location
|
|
14
|
-
from Bio.Restriction.Restriction import RestrictionType, RestrictionBatch
|
|
15
|
-
from Bio.SeqRecord import SeqRecord as _SeqRecord
|
|
16
|
-
from pydna.primer import Primer as _PydnaPrimer
|
|
6
|
+
from pydna.opencloning_models import SequenceLocationStr
|
|
7
|
+
|
|
17
8
|
from opencloning_linkml.datamodel import (
|
|
18
|
-
OligoHybridizationSource as _OligoHybridizationSource,
|
|
19
|
-
PolymeraseExtensionSource as _PolymeraseExtensionSource,
|
|
20
|
-
GenomeCoordinatesSource as _GenomeCoordinatesSource,
|
|
21
|
-
RepositoryIdSource as _RepositoryIdSource,
|
|
22
|
-
ManuallyTypedSource as _ManuallyTypedSource,
|
|
23
|
-
UploadedFileSource as _UploadedFileSource,
|
|
24
|
-
SequenceFileFormat as _SequenceFileFormat,
|
|
25
|
-
RestrictionEnzymeDigestionSource as _RestrictionEnzymeDigestionSource,
|
|
26
|
-
RestrictionSequenceCut as _RestrictionSequenceCut,
|
|
27
|
-
TextFileSequence as _TextFileSequence,
|
|
28
|
-
AssemblySource as _AssemblySource,
|
|
29
|
-
PCRSource as _PCRSource,
|
|
30
|
-
HomologousRecombinationSource as _HomologousRecombinationSource,
|
|
31
|
-
GibsonAssemblySource as _GibsonAssemblySource,
|
|
32
|
-
RestrictionAndLigationSource as _RestrictionAndLigationSource,
|
|
33
|
-
LigationSource as _LigationSource,
|
|
34
|
-
CRISPRSource as _CRISPRSource,
|
|
35
|
-
Primer as _Primer,
|
|
36
|
-
AssemblyFragment as _AssemblyFragment,
|
|
37
|
-
AddgeneIdSource as _AddgeneIdSource,
|
|
38
|
-
WekWikGeneIdSource as _WekWikGeneIdSource,
|
|
39
|
-
BenchlingUrlSource as _BenchlingUrlSource,
|
|
40
9
|
CloningStrategy as _CloningStrategy,
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
GatewaySource as _GatewaySource,
|
|
45
|
-
InFusionSource as _InFusionSource,
|
|
46
|
-
AnnotationSource as _AnnotationSource,
|
|
47
|
-
IGEMSource as _IGEMSource,
|
|
48
|
-
ReverseComplementSource as _ReverseComplementSource,
|
|
49
|
-
SEVASource as _SEVASource,
|
|
50
|
-
CreLoxRecombinationSource as _CreLoxRecombinationSource,
|
|
51
|
-
InVivoAssemblySource as _InVivoAssemblySource,
|
|
52
|
-
SourceInput as _SourceInput,
|
|
53
|
-
OpenDNACollectionsSource as _OpenDNACollectionsSource,
|
|
54
|
-
)
|
|
55
|
-
from pydna.assembly2 import (
|
|
56
|
-
edge_representation2subfragment_representation,
|
|
57
|
-
subfragment_representation2edge_representation,
|
|
10
|
+
Primer as PrimerModel,
|
|
11
|
+
TextFileSequence as _TextFileSequence,
|
|
12
|
+
Source as _Source,
|
|
58
13
|
)
|
|
59
|
-
from pydna.utils import location_boundaries, shift_location
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
SequenceFileFormat = _SequenceFileFormat
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class TextFileSequence(_TextFileSequence):
|
|
66
|
-
pass
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class SourceInput(_SourceInput):
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class PrimerModel(_Primer):
|
|
74
|
-
"""Called PrimerModel not to be confused with the class from pydna."""
|
|
75
|
-
|
|
76
|
-
def to_pydna_primer(self) -> _PydnaPrimer:
|
|
77
|
-
"""
|
|
78
|
-
Convert the PrimerModel to a pydna Primer object.
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
_PydnaPrimer: A pydna Primer object with the same sequence and name as the PrimerModel.
|
|
82
|
-
"""
|
|
83
|
-
return _PydnaPrimer(self.sequence, name=self.name, id=str(self.id))
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
class SeqFeatureModel(BaseModel):
|
|
87
|
-
type: str
|
|
88
|
-
qualifiers: dict[str, list[str]] = {}
|
|
89
|
-
location: str
|
|
90
|
-
|
|
91
|
-
def convert_to_seq_feature(self) -> SeqFeature:
|
|
92
|
-
return SeqFeature(location=Location.fromstring(self.location), type=self.type, qualifiers=self.qualifiers)
|
|
93
|
-
|
|
94
|
-
def read_from_seq_feature(sf: SeqFeature) -> 'SeqFeatureModel':
|
|
95
|
-
return SeqFeatureModel(
|
|
96
|
-
type=sf.type, qualifiers=sf.qualifiers, location=format_feature_location(sf.location, None)
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# Sources =========================================
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def input_discriminator(v) -> str | None:
|
|
104
|
-
"""
|
|
105
|
-
Discriminator that yields SourceInput by default
|
|
106
|
-
"""
|
|
107
|
-
if isinstance(v, dict):
|
|
108
|
-
input_type = v.get('type', None)
|
|
109
|
-
if input_type is None:
|
|
110
|
-
return 'SourceInput'
|
|
111
|
-
else:
|
|
112
|
-
return input_type
|
|
113
|
-
elif isinstance(v, SourceInput):
|
|
114
|
-
return v.type
|
|
115
|
-
return None
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class SourceCommonClass(BaseModel):
|
|
119
|
-
input: Optional[List[SourceInput]] = Field(
|
|
120
|
-
default_factory=list,
|
|
121
|
-
description="""The sequences that are an input to this source. If the source represents external import of a sequence, it's empty.""",
|
|
122
|
-
json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source']}},
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
class ManuallyTypedSource(SourceCommonClass, _ManuallyTypedSource):
|
|
127
|
-
"""Describes a sequence that is typed manually by the user"""
|
|
128
|
-
|
|
129
|
-
@model_validator(mode='after')
|
|
130
|
-
def validate_circularity(self):
|
|
131
|
-
# Do the validation instead of printing
|
|
132
|
-
if self.circular:
|
|
133
|
-
assert self.overhang_crick_3prime == 0, 'Circular sequences cannot have overhangs.'
|
|
134
|
-
assert self.overhang_watson_3prime == 0, 'Circular sequences cannot have overhangs.'
|
|
135
|
-
return self
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
class UploadedFileSource(SourceCommonClass, _UploadedFileSource):
|
|
139
|
-
coordinates: Optional['SequenceLocationStr'] = Field(
|
|
140
|
-
default=None,
|
|
141
|
-
description="""If provided, coordinates within the sequence of the file to extract a subsequence""",
|
|
142
|
-
json_schema_extra={'linkml_meta': {'alias': 'coordinates', 'domain_of': ['UploadedFileSource']}},
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
@field_validator('coordinates', mode='before')
|
|
146
|
-
def parse_coordinates(cls, v):
|
|
147
|
-
if v is None:
|
|
148
|
-
return None
|
|
149
|
-
return SequenceLocationStr.field_validator(v)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
class RepositoryIdSource(SourceCommonClass, _RepositoryIdSource):
|
|
153
|
-
pass
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
class AddgeneIdSource(SourceCommonClass, _AddgeneIdSource):
|
|
157
|
-
# TODO: add this to LinkML
|
|
158
|
-
# repository_name: RepositoryName = RepositoryName('addgene')
|
|
159
|
-
pass
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
class WekWikGeneIdSource(SourceCommonClass, _WekWikGeneIdSource):
|
|
163
|
-
pass
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
class BenchlingUrlSource(SourceCommonClass, _BenchlingUrlSource):
|
|
167
|
-
pass
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class SnapGenePlasmidSource(SourceCommonClass, _SnapGenePlasmidSource):
|
|
171
|
-
pass
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
class EuroscarfSource(SourceCommonClass, _EuroscarfSource):
|
|
175
|
-
pass
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
class IGEMSource(SourceCommonClass, _IGEMSource):
|
|
179
|
-
|
|
180
|
-
@model_validator(mode='after')
|
|
181
|
-
def validate_repository_id(self):
|
|
182
|
-
file_name = self.sequence_file_url.split('/')[-1]
|
|
183
|
-
assert file_name.endswith('.gb'), 'The sequence file must be a GenBank file'
|
|
184
|
-
return self
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
class OpenDNACollectionsSource(SourceCommonClass, _OpenDNACollectionsSource):
|
|
188
|
-
pass
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
class SEVASource(SourceCommonClass, _SEVASource):
|
|
192
|
-
pass
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
class GenomeCoordinatesSource(SourceCommonClass, _GenomeCoordinatesSource):
|
|
196
|
-
pass
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
class AnnotationSource(SourceCommonClass, _AnnotationSource):
|
|
200
|
-
pass
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
class ReverseComplementSource(SourceCommonClass, _ReverseComplementSource):
|
|
204
|
-
pass
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
class RestrictionSequenceCut(_RestrictionSequenceCut):
|
|
208
|
-
|
|
209
|
-
@classmethod
|
|
210
|
-
def from_cutsite_tuple(cls, cutsite_tuple: tuple[tuple[int, int], RestrictionType]):
|
|
211
|
-
cut_watson, ovhg = cutsite_tuple[0]
|
|
212
|
-
enzyme = str(cutsite_tuple[1])
|
|
213
|
-
|
|
214
|
-
return cls(
|
|
215
|
-
cut_watson=cut_watson,
|
|
216
|
-
overhang=ovhg,
|
|
217
|
-
restriction_enzyme=enzyme,
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
def to_cutsite_tuple(self) -> tuple[tuple[int, int], RestrictionType]:
|
|
221
|
-
restriction_enzyme = RestrictionBatch(first=[self.restriction_enzyme]).pop()
|
|
222
|
-
return ((self.cut_watson, self.overhang), restriction_enzyme)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
class RestrictionEnzymeDigestionSource(SourceCommonClass, _RestrictionEnzymeDigestionSource):
|
|
226
|
-
"""Documents a restriction enzyme digestion, and the selection of one of the fragments."""
|
|
227
|
-
|
|
228
|
-
# TODO: maybe a better way? They have to be redefined here because
|
|
229
|
-
# we have overriden the original class
|
|
230
|
-
|
|
231
|
-
left_edge: Optional[RestrictionSequenceCut] = Field(None)
|
|
232
|
-
right_edge: Optional[RestrictionSequenceCut] = Field(None)
|
|
233
|
-
|
|
234
|
-
@classmethod
|
|
235
|
-
def from_cutsites(
|
|
236
|
-
cls,
|
|
237
|
-
left: tuple[tuple[int, int], RestrictionType],
|
|
238
|
-
right: tuple[tuple[int, int], RestrictionType],
|
|
239
|
-
input: list[int],
|
|
240
|
-
id: int,
|
|
241
|
-
):
|
|
242
|
-
return cls(
|
|
243
|
-
id=id,
|
|
244
|
-
left_edge=None if left is None else RestrictionSequenceCut.from_cutsite_tuple(left),
|
|
245
|
-
right_edge=None if right is None else RestrictionSequenceCut.from_cutsite_tuple(right),
|
|
246
|
-
input=input,
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
# TODO could be made into a computed field?
|
|
250
|
-
def get_enzymes(self) -> list[str]:
|
|
251
|
-
"""Returns the enzymes used in the digestion"""
|
|
252
|
-
out = list()
|
|
253
|
-
if self.left_edge is not None:
|
|
254
|
-
out.append(self.left_edge.restriction_enzyme)
|
|
255
|
-
if self.right_edge is not None:
|
|
256
|
-
out.append(self.right_edge.restriction_enzyme)
|
|
257
|
-
# Unique values, sorted the same way
|
|
258
|
-
return sorted(list(set(out)), key=out.index)
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
class SequenceLocationStr(str):
|
|
262
|
-
"""A string representation of a sequence location, genbank-like."""
|
|
263
|
-
|
|
264
|
-
# TODO: this should handle origin-spanning simple locations (splitted)
|
|
265
|
-
@classmethod
|
|
266
|
-
def from_biopython_location(cls, location: Location):
|
|
267
|
-
return cls(format_feature_location(location, None))
|
|
268
|
-
|
|
269
|
-
@classmethod
|
|
270
|
-
def from_start_and_end(cls, start: int, end: int, seq_len: int | None = None, strand: int | None = 1):
|
|
271
|
-
if end >= start:
|
|
272
|
-
return cls.from_biopython_location(SimpleLocation(start, end, strand=strand))
|
|
273
|
-
else:
|
|
274
|
-
if seq_len is None:
|
|
275
|
-
raise ValueError('Sequence length is required to handle origin-spanning simple locations')
|
|
276
|
-
unwrapped_location = SimpleLocation(start, end + seq_len, strand=strand)
|
|
277
|
-
wrapped_location = shift_location(unwrapped_location, 0, seq_len)
|
|
278
|
-
return cls.from_biopython_location(wrapped_location)
|
|
279
|
-
|
|
280
|
-
def to_biopython_location(self) -> BioFeatureLocation:
|
|
281
|
-
return Location.fromstring(self)
|
|
282
|
-
|
|
283
|
-
@classmethod
|
|
284
|
-
def field_validator(cls, v):
|
|
285
|
-
if isinstance(v, str):
|
|
286
|
-
value = cls(v)
|
|
287
|
-
try:
|
|
288
|
-
value.to_biopython_location()
|
|
289
|
-
except LocationParserError:
|
|
290
|
-
raise ValueError(f'Location "{v}" is not a valid location')
|
|
291
|
-
return value
|
|
292
|
-
raise ValueError(f'Location must be a string or a {cls.__name__}')
|
|
293
|
-
|
|
294
|
-
@property
|
|
295
|
-
def start(self) -> int:
|
|
296
|
-
return location_boundaries(self.to_biopython_location())[0]
|
|
297
|
-
|
|
298
|
-
@property
|
|
299
|
-
def end(self) -> int:
|
|
300
|
-
return location_boundaries(self.to_biopython_location())[1]
|
|
301
|
-
|
|
302
|
-
@classmethod
|
|
303
|
-
def __get_pydantic_core_schema__(
|
|
304
|
-
cls,
|
|
305
|
-
source_type,
|
|
306
|
-
handler,
|
|
307
|
-
) -> core_schema.CoreSchema:
|
|
308
|
-
"""Generate Pydantic core schema for SequenceLocationStr."""
|
|
309
|
-
return core_schema.with_info_after_validator_function(
|
|
310
|
-
cls._validate,
|
|
311
|
-
core_schema.str_schema(),
|
|
312
|
-
)
|
|
313
|
-
|
|
314
|
-
@classmethod
|
|
315
|
-
def _validate(cls, value: str, info):
|
|
316
|
-
"""Validate and create SequenceLocationStr instance."""
|
|
317
|
-
return cls.field_validator(value)
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
class AssemblyFragment(_AssemblyFragment, SourceInput):
|
|
321
|
-
left_location: Optional[SequenceLocationStr] = None
|
|
322
|
-
right_location: Optional[SequenceLocationStr] = None
|
|
323
|
-
|
|
324
|
-
def to_fragment_tuple(self, fragments) -> tuple[int, Location, Location]:
|
|
325
|
-
fragment_ids = [int(f.id) for f in fragments]
|
|
326
|
-
# By convention, these have no strand
|
|
327
|
-
left_loc = None if self.left_location is None else self.left_location.to_biopython_location()
|
|
328
|
-
right_loc = None if self.right_location is None else self.right_location.to_biopython_location()
|
|
329
|
-
if left_loc is not None:
|
|
330
|
-
left_loc.strand = None
|
|
331
|
-
if right_loc is not None:
|
|
332
|
-
right_loc.strand = None
|
|
333
|
-
|
|
334
|
-
return (
|
|
335
|
-
(fragment_ids.index(self.sequence) + 1) * (-1 if self.reverse_complemented else 1),
|
|
336
|
-
left_loc,
|
|
337
|
-
right_loc,
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
@field_validator('left_location', 'right_location', mode='before')
|
|
341
|
-
def parse_location(cls, v):
|
|
342
|
-
if v is None:
|
|
343
|
-
return None
|
|
344
|
-
return SequenceLocationStr.field_validator(v)
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
class AssemblySourceCommonClass(SourceCommonClass):
|
|
348
|
-
# TODO: This is different in the LinkML model, because there it is not required,
|
|
349
|
-
# and here we make it default to list.
|
|
350
|
-
input: Optional[
|
|
351
|
-
List[
|
|
352
|
-
Annotated[
|
|
353
|
-
Union[
|
|
354
|
-
Annotated[SourceInput, Tag('SourceInput')],
|
|
355
|
-
Annotated['AssemblyFragment', Tag('AssemblyFragment')],
|
|
356
|
-
],
|
|
357
|
-
Discriminator(input_discriminator),
|
|
358
|
-
]
|
|
359
|
-
]
|
|
360
|
-
] = Field(
|
|
361
|
-
default_factory=list,
|
|
362
|
-
description="""The inputs to this source. If the source represents external import of a sequence, it's empty.""",
|
|
363
|
-
json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source'], 'slot_uri': 'schema:object'}},
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
def minimal_overlap(self):
|
|
367
|
-
"""Returns the minimal overlap between the fragments in the assembly"""
|
|
368
|
-
all_overlaps = list()
|
|
369
|
-
for f in self.input:
|
|
370
|
-
if f.left_location is not None:
|
|
371
|
-
all_overlaps.append(f.left_location.end - f.left_location.start)
|
|
372
|
-
if f.right_location is not None:
|
|
373
|
-
all_overlaps.append(f.right_location.end - f.right_location.start)
|
|
374
|
-
return min(all_overlaps)
|
|
375
|
-
|
|
376
|
-
def get_assembly_plan(self, fragments: list[_SeqRecord]) -> tuple:
|
|
377
|
-
"""Returns the assembly plan"""
|
|
378
|
-
subf = [f.to_fragment_tuple(fragments) for f in self.input if f.type == 'AssemblyFragment']
|
|
379
|
-
return subfragment_representation2edge_representation(subf, self.circular)
|
|
380
|
-
|
|
381
|
-
def is_assembly_complete(self) -> bool:
|
|
382
|
-
"""Returns True if the assembly is complete"""
|
|
383
|
-
return any(f.type == 'AssemblyFragment' for f in self.input)
|
|
384
|
-
|
|
385
|
-
@classmethod
|
|
386
|
-
def from_assembly(
|
|
387
|
-
cls,
|
|
388
|
-
assembly: list[tuple[int, int, Location, Location]],
|
|
389
|
-
id: int,
|
|
390
|
-
circular: bool,
|
|
391
|
-
fragments: list[_SeqRecord],
|
|
392
|
-
**kwargs,
|
|
393
|
-
):
|
|
394
|
-
|
|
395
|
-
# Replace the positions with the actual ids
|
|
396
|
-
fragment_ids = [int(f.id) for f in fragments]
|
|
397
|
-
|
|
398
|
-
# Here the ids are still the positions in the fragments list
|
|
399
|
-
fragment_assembly_positions = edge_representation2subfragment_representation(assembly, circular)
|
|
400
|
-
assembly_fragments = [
|
|
401
|
-
AssemblyFragment(
|
|
402
|
-
sequence=fragment_ids[abs(pos) - 1],
|
|
403
|
-
left_location=None if left_loc is None else SequenceLocationStr.from_biopython_location(left_loc),
|
|
404
|
-
right_location=None if right_loc is None else SequenceLocationStr.from_biopython_location(right_loc),
|
|
405
|
-
reverse_complemented=pos < 0,
|
|
406
|
-
)
|
|
407
|
-
for pos, left_loc, right_loc in fragment_assembly_positions
|
|
408
|
-
]
|
|
409
|
-
return cls(
|
|
410
|
-
id=id,
|
|
411
|
-
input=assembly_fragments,
|
|
412
|
-
circular=circular,
|
|
413
|
-
**kwargs,
|
|
414
|
-
)
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
class AssemblySource(AssemblySourceCommonClass, _AssemblySource):
|
|
418
|
-
pass
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
class PCRSource(AssemblySourceCommonClass, _PCRSource):
|
|
422
|
-
pass
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
class LigationSource(AssemblySourceCommonClass, _LigationSource):
|
|
426
|
-
pass
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
class HomologousRecombinationSource(AssemblySourceCommonClass, _HomologousRecombinationSource):
|
|
430
|
-
|
|
431
|
-
# TODO: add this to LinkML
|
|
432
|
-
# This can only take two inputs, the first one is the template, the second one is the insert
|
|
433
|
-
# input: conlist(int, min_length=2, max_length=2)
|
|
434
|
-
pass
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
class GibsonAssemblySource(AssemblySourceCommonClass, _GibsonAssemblySource):
|
|
438
|
-
|
|
439
|
-
# TODO: add this to LinkML
|
|
440
|
-
# input: conlist(int, min_length=1)
|
|
441
|
-
pass
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
class OverlapExtensionPCRLigationSource(AssemblySourceCommonClass, _OverlapExtensionPCRLigationSource):
|
|
445
|
-
pass
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
class InFusionSource(AssemblySourceCommonClass, _InFusionSource):
|
|
449
|
-
pass
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
class InVivoAssemblySource(AssemblySourceCommonClass, _InVivoAssemblySource):
|
|
453
|
-
pass
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
class CRISPRSource(AssemblySourceCommonClass, _CRISPRSource):
|
|
457
|
-
|
|
458
|
-
# TODO
|
|
459
|
-
# input: conlist(int, min_length=2, max_length=2)
|
|
460
|
-
# circular: bool = False
|
|
461
|
-
|
|
462
|
-
@classmethod
|
|
463
|
-
def from_assembly(
|
|
464
|
-
cls,
|
|
465
|
-
assembly: list[tuple[int, int, Location, Location]],
|
|
466
|
-
id: int,
|
|
467
|
-
fragments: list[_SeqRecord],
|
|
468
|
-
guides: list[int],
|
|
469
|
-
):
|
|
470
|
-
source = super().from_assembly(assembly, id, False, fragments)
|
|
471
|
-
source.input += [SourceInput(sequence=guide) for guide in guides]
|
|
472
|
-
return source
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
class RestrictionAndLigationSource(AssemblySourceCommonClass, _RestrictionAndLigationSource):
|
|
476
|
-
# TODO: add this to LinkML
|
|
477
|
-
# input: conlist(int, min_length=1)
|
|
478
|
-
|
|
479
|
-
@classmethod
|
|
480
|
-
def from_assembly(
|
|
481
|
-
cls,
|
|
482
|
-
assembly: list[tuple[int, int, Location, Location]],
|
|
483
|
-
circular: bool,
|
|
484
|
-
id: int,
|
|
485
|
-
fragments: list[_SeqRecord],
|
|
486
|
-
restriction_enzymes=list['str'],
|
|
487
|
-
):
|
|
488
|
-
return super().from_assembly(assembly, id, circular, fragments, restriction_enzymes=restriction_enzymes)
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
class GatewaySource(AssemblySourceCommonClass, _GatewaySource):
|
|
492
|
-
@classmethod
|
|
493
|
-
def from_assembly(
|
|
494
|
-
cls,
|
|
495
|
-
assembly: list[tuple[int, int, Location, Location]],
|
|
496
|
-
circular: bool,
|
|
497
|
-
id: int,
|
|
498
|
-
fragments: list[_SeqRecord],
|
|
499
|
-
reaction_type: str,
|
|
500
|
-
):
|
|
501
|
-
return super().from_assembly(assembly, id, circular, fragments, reaction_type=reaction_type)
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
class CreLoxRecombinationSource(AssemblySourceCommonClass, _CreLoxRecombinationSource):
|
|
505
|
-
pass
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
class OligoHybridizationSource(SourceCommonClass, _OligoHybridizationSource):
|
|
509
|
-
pass
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
class PolymeraseExtensionSource(SourceCommonClass, _PolymeraseExtensionSource):
|
|
513
|
-
pass
|
|
514
14
|
|
|
515
15
|
|
|
516
16
|
class BaseCloningStrategy(_CloningStrategy):
|
|
@@ -536,7 +36,7 @@ class BaseCloningStrategy(_CloningStrategy):
|
|
|
536
36
|
def next_id(self):
|
|
537
37
|
return max([s.id for s in self.sources + self.sequences + self.primers], default=0) + 1
|
|
538
38
|
|
|
539
|
-
def add_source_and_sequence(self, source:
|
|
39
|
+
def add_source_and_sequence(self, source: _Source, sequence: _TextFileSequence):
|
|
540
40
|
if source in self.sources:
|
|
541
41
|
if sequence not in self.sequences:
|
|
542
42
|
raise ValueError(
|
|
@@ -566,7 +66,7 @@ class BaseCloningStrategy(_CloningStrategy):
|
|
|
566
66
|
|
|
567
67
|
class PrimerDesignQuery(BaseModel):
|
|
568
68
|
model_config = {'arbitrary_types_allowed': True}
|
|
569
|
-
sequence:
|
|
69
|
+
sequence: _TextFileSequence
|
|
570
70
|
location: SequenceLocationStr
|
|
571
71
|
forward_orientation: bool = True
|
|
572
72
|
|
opencloning/request_examples.py
CHANGED
|
@@ -3,58 +3,48 @@ genome_region_examples = {
|
|
|
3
3
|
'summary': 'All parameters provided',
|
|
4
4
|
'value': {
|
|
5
5
|
'id': 1,
|
|
6
|
-
'
|
|
6
|
+
'repository_id': 'NC_003424.3',
|
|
7
7
|
'assembly_accession': 'GCF_000002945.2',
|
|
8
8
|
'locus_tag': 'SPOM_SPAPB1A10.09',
|
|
9
9
|
'gene_id': 2543372,
|
|
10
|
-
'
|
|
11
|
-
'end': 1881726,
|
|
12
|
-
'strand': 1,
|
|
10
|
+
'coordinates': '1877009..1881726',
|
|
13
11
|
},
|
|
14
12
|
},
|
|
15
13
|
'full_with_genbank_accession': {
|
|
16
14
|
'summary': 'All parameters provided, but sequence accession is GenBank',
|
|
17
15
|
'value': {
|
|
18
16
|
'id': 1,
|
|
19
|
-
'
|
|
17
|
+
'repository_id': 'CU329670.1',
|
|
20
18
|
'assembly_accession': 'GCF_000002945.2',
|
|
21
19
|
'locus_tag': 'SPOM_SPAPB1A10.09',
|
|
22
20
|
'gene_id': 2543372,
|
|
23
|
-
'
|
|
24
|
-
'end': 1881726,
|
|
25
|
-
'strand': 1,
|
|
21
|
+
'coordinates': '1877009..1881726',
|
|
26
22
|
},
|
|
27
23
|
},
|
|
28
24
|
'id_omitted': {
|
|
29
25
|
'summary': 'Gene ID omitted (filled in response)',
|
|
30
26
|
'value': {
|
|
31
27
|
'id': 1,
|
|
32
|
-
'
|
|
28
|
+
'repository_id': 'NC_003424.3',
|
|
33
29
|
'assembly_accession': 'GCF_000002945.2',
|
|
34
30
|
'locus_tag': 'SPOM_SPAPB1A10.09',
|
|
35
|
-
'
|
|
36
|
-
'end': 1881726,
|
|
37
|
-
'strand': 1,
|
|
31
|
+
'coordinates': '1877009..1881726',
|
|
38
32
|
},
|
|
39
33
|
},
|
|
40
34
|
'assembly_accession_omitted': {
|
|
41
35
|
'summary': 'Sequence accession only',
|
|
42
36
|
'value': {
|
|
43
37
|
'id': 1,
|
|
44
|
-
'
|
|
45
|
-
'
|
|
46
|
-
'end': 1881726,
|
|
47
|
-
'strand': 1,
|
|
38
|
+
'repository_id': 'NC_003424.3',
|
|
39
|
+
'coordinates': '1877009..1881726',
|
|
48
40
|
},
|
|
49
41
|
},
|
|
50
42
|
'viral_sequence': {
|
|
51
43
|
'summary': 'Viral sequence not associated with assembly',
|
|
52
44
|
'value': {
|
|
53
45
|
'id': 1,
|
|
54
|
-
'
|
|
55
|
-
'
|
|
56
|
-
'end': 2050,
|
|
57
|
-
'strand': -1,
|
|
46
|
+
'repository_id': 'DQ208311.2',
|
|
47
|
+
'coordinates': 'complement(20..2050)',
|
|
58
48
|
},
|
|
59
49
|
},
|
|
60
50
|
}
|
|
@@ -84,7 +74,6 @@ benchling_url_examples = {
|
|
|
84
74
|
'summary': 'Typical example',
|
|
85
75
|
'value': {
|
|
86
76
|
'id': 0,
|
|
87
|
-
'repository_name': 'benchling',
|
|
88
77
|
'repository_id': 'https://benchling.com/siverson/f/lib_B94YxDHhQh-cidar-moclo-library/seq_kryGidaz-c0062_cd.gb',
|
|
89
78
|
},
|
|
90
79
|
},
|
|
@@ -95,7 +84,6 @@ snapgene_plasmid_examples = {
|
|
|
95
84
|
'summary': 'Typical example',
|
|
96
85
|
'value': {
|
|
97
86
|
'id': 0,
|
|
98
|
-
'repository_name': 'snapgene',
|
|
99
87
|
'repository_id': 'basic_cloning_vectors/pEASY-T1_(linearized)',
|
|
100
88
|
},
|
|
101
89
|
},
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Functions to be moved to pydna at some point.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from opencloning_linkml.datamodel import AssemblySource
|
|
6
|
+
from Bio.SeqFeature import Location
|
|
7
|
+
from opencloning_linkml.datamodel import RestrictionEnzymeDigestionSource
|
|
8
|
+
from opencloning_linkml.datamodel import RestrictionSequenceCut
|
|
9
|
+
from opencloning_linkml.datamodel import Primer as PrimerModel
|
|
10
|
+
from Bio.Restriction.Restriction import RestrictionType, RestrictionBatch
|
|
11
|
+
from pydna.primer import Primer as PydnaPrimer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_assembly_complete(source: AssemblySource) -> bool:
|
|
15
|
+
return any(f.type == 'AssemblyFragment' for f in source.input)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def minimal_assembly_overlap(source: AssemblySource) -> int:
|
|
19
|
+
all_overlaps = list()
|
|
20
|
+
for f in source.input:
|
|
21
|
+
if f.type != 'AssemblyFragment':
|
|
22
|
+
continue
|
|
23
|
+
if f.left_location is not None:
|
|
24
|
+
all_overlaps.append(len(Location.fromstring(f.left_location)))
|
|
25
|
+
if f.right_location is not None:
|
|
26
|
+
all_overlaps.append(len(Location.fromstring(f.right_location)))
|
|
27
|
+
if len(all_overlaps) == 0:
|
|
28
|
+
raise ValueError('Assembly is not complete')
|
|
29
|
+
return min(all_overlaps)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_enzymes_from_source(source: RestrictionEnzymeDigestionSource) -> list[str]:
|
|
33
|
+
out = list()
|
|
34
|
+
if source.left_edge is not None:
|
|
35
|
+
out.append(source.left_edge.restriction_enzyme)
|
|
36
|
+
if source.right_edge is not None:
|
|
37
|
+
out.append(source.right_edge.restriction_enzyme)
|
|
38
|
+
# Unique values, sorted the same way
|
|
39
|
+
return sorted(list(set(out)), key=out.index)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def restriction_sequence_cut_to_cutsite_tuple(
|
|
43
|
+
restriction_sequence_cut: RestrictionSequenceCut,
|
|
44
|
+
) -> tuple[tuple[int, int], RestrictionType]:
|
|
45
|
+
restriction_enzyme = RestrictionBatch(first=[restriction_sequence_cut.restriction_enzyme]).pop()
|
|
46
|
+
return ((restriction_sequence_cut.cut_watson, restriction_sequence_cut.overhang), restriction_enzyme)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def primer_model_to_pydna_primer(primer_model: PrimerModel) -> PydnaPrimer:
|
|
50
|
+
return PydnaPrimer(primer_model.sequence, id=str(primer_model.id), name=primer_model.name)
|