opencloning 0.4.8__py3-none-any.whl → 0.5.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. opencloning/app_settings.py +7 -0
  2. opencloning/batch_cloning/pombe/__init__.py +2 -2
  3. opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
  4. opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
  5. opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
  6. opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
  7. opencloning/bug_fixing/backend_v0_3.py +13 -5
  8. opencloning/catalogs/__init__.py +36 -0
  9. opencloning/catalogs/igem2024.yaml +2172 -0
  10. opencloning/catalogs/openDNA_collections.yaml +1161 -0
  11. opencloning/catalogs/readme.txt +1 -0
  12. opencloning/catalogs/seva.tsv +231 -0
  13. opencloning/catalogs/snapgene.yaml +2837 -0
  14. opencloning/dna_functions.py +155 -158
  15. opencloning/dna_utils.py +45 -62
  16. opencloning/ebic/primer_design.py +1 -1
  17. opencloning/endpoints/annotation.py +9 -13
  18. opencloning/endpoints/assembly.py +157 -378
  19. opencloning/endpoints/endpoint_utils.py +52 -0
  20. opencloning/endpoints/external_import.py +169 -124
  21. opencloning/endpoints/no_assembly.py +23 -39
  22. opencloning/endpoints/no_input.py +32 -47
  23. opencloning/endpoints/other.py +1 -1
  24. opencloning/endpoints/primer_design.py +2 -1
  25. opencloning/http_client.py +2 -2
  26. opencloning/ncbi_requests.py +113 -47
  27. opencloning/primer_design.py +1 -1
  28. opencloning/pydantic_models.py +10 -510
  29. opencloning/request_examples.py +10 -22
  30. opencloning/temp_functions.py +50 -0
  31. {opencloning-0.4.8.dist-info → opencloning-0.5.0.1.dist-info}/METADATA +18 -8
  32. opencloning-0.5.0.1.dist-info/RECORD +51 -0
  33. {opencloning-0.4.8.dist-info → opencloning-0.5.0.1.dist-info}/WHEEL +1 -1
  34. opencloning/cre_lox.py +0 -116
  35. opencloning/gateway.py +0 -154
  36. opencloning-0.4.8.dist-info/RECORD +0 -45
  37. {opencloning-0.4.8.dist-info → opencloning-0.5.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,516 +1,16 @@
1
- from pydantic import BaseModel, Field, model_validator, field_validator, Discriminator, Tag
2
- from typing import Optional, List, Union, Annotated
3
- from pydantic_core import core_schema
1
+ from pydantic import BaseModel, Field, field_validator
2
+ from typing import Optional, List
3
+
4
4
  from ._version import __version__
5
5
 
6
- from Bio.SeqFeature import (
7
- SeqFeature,
8
- Location,
9
- SimpleLocation,
10
- FeatureLocation as BioFeatureLocation,
11
- LocationParserError,
12
- )
13
- from Bio.SeqIO.InsdcIO import _insdc_location_string as format_feature_location
14
- from Bio.Restriction.Restriction import RestrictionType, RestrictionBatch
15
- from Bio.SeqRecord import SeqRecord as _SeqRecord
16
- from pydna.primer import Primer as _PydnaPrimer
6
+ from pydna.opencloning_models import SequenceLocationStr
7
+
17
8
  from opencloning_linkml.datamodel import (
18
- OligoHybridizationSource as _OligoHybridizationSource,
19
- PolymeraseExtensionSource as _PolymeraseExtensionSource,
20
- GenomeCoordinatesSource as _GenomeCoordinatesSource,
21
- RepositoryIdSource as _RepositoryIdSource,
22
- ManuallyTypedSource as _ManuallyTypedSource,
23
- UploadedFileSource as _UploadedFileSource,
24
- SequenceFileFormat as _SequenceFileFormat,
25
- RestrictionEnzymeDigestionSource as _RestrictionEnzymeDigestionSource,
26
- RestrictionSequenceCut as _RestrictionSequenceCut,
27
- TextFileSequence as _TextFileSequence,
28
- AssemblySource as _AssemblySource,
29
- PCRSource as _PCRSource,
30
- HomologousRecombinationSource as _HomologousRecombinationSource,
31
- GibsonAssemblySource as _GibsonAssemblySource,
32
- RestrictionAndLigationSource as _RestrictionAndLigationSource,
33
- LigationSource as _LigationSource,
34
- CRISPRSource as _CRISPRSource,
35
- Primer as _Primer,
36
- AssemblyFragment as _AssemblyFragment,
37
- AddgeneIdSource as _AddgeneIdSource,
38
- WekWikGeneIdSource as _WekWikGeneIdSource,
39
- BenchlingUrlSource as _BenchlingUrlSource,
40
9
  CloningStrategy as _CloningStrategy,
41
- OverlapExtensionPCRLigationSource as _OverlapExtensionPCRLigationSource,
42
- SnapGenePlasmidSource as _SnapGenePlasmidSource,
43
- EuroscarfSource as _EuroscarfSource,
44
- GatewaySource as _GatewaySource,
45
- InFusionSource as _InFusionSource,
46
- AnnotationSource as _AnnotationSource,
47
- IGEMSource as _IGEMSource,
48
- ReverseComplementSource as _ReverseComplementSource,
49
- SEVASource as _SEVASource,
50
- CreLoxRecombinationSource as _CreLoxRecombinationSource,
51
- InVivoAssemblySource as _InVivoAssemblySource,
52
- SourceInput as _SourceInput,
53
- OpenDNACollectionsSource as _OpenDNACollectionsSource,
54
- )
55
- from pydna.assembly2 import (
56
- edge_representation2subfragment_representation,
57
- subfragment_representation2edge_representation,
10
+ Primer as PrimerModel,
11
+ TextFileSequence as _TextFileSequence,
12
+ Source as _Source,
58
13
  )
59
- from pydna.utils import location_boundaries, shift_location
60
-
61
-
62
- SequenceFileFormat = _SequenceFileFormat
63
-
64
-
65
- class TextFileSequence(_TextFileSequence):
66
- pass
67
-
68
-
69
- class SourceInput(_SourceInput):
70
- pass
71
-
72
-
73
- class PrimerModel(_Primer):
74
- """Called PrimerModel not to be confused with the class from pydna."""
75
-
76
- def to_pydna_primer(self) -> _PydnaPrimer:
77
- """
78
- Convert the PrimerModel to a pydna Primer object.
79
-
80
- Returns:
81
- _PydnaPrimer: A pydna Primer object with the same sequence and name as the PrimerModel.
82
- """
83
- return _PydnaPrimer(self.sequence, name=self.name, id=str(self.id))
84
-
85
-
86
- class SeqFeatureModel(BaseModel):
87
- type: str
88
- qualifiers: dict[str, list[str]] = {}
89
- location: str
90
-
91
- def convert_to_seq_feature(self) -> SeqFeature:
92
- return SeqFeature(location=Location.fromstring(self.location), type=self.type, qualifiers=self.qualifiers)
93
-
94
- def read_from_seq_feature(sf: SeqFeature) -> 'SeqFeatureModel':
95
- return SeqFeatureModel(
96
- type=sf.type, qualifiers=sf.qualifiers, location=format_feature_location(sf.location, None)
97
- )
98
-
99
-
100
- # Sources =========================================
101
-
102
-
103
- def input_discriminator(v) -> str | None:
104
- """
105
- Discriminator that yields SourceInput by default
106
- """
107
- if isinstance(v, dict):
108
- input_type = v.get('type', None)
109
- if input_type is None:
110
- return 'SourceInput'
111
- else:
112
- return input_type
113
- elif isinstance(v, SourceInput):
114
- return v.type
115
- return None
116
-
117
-
118
- class SourceCommonClass(BaseModel):
119
- input: Optional[List[SourceInput]] = Field(
120
- default_factory=list,
121
- description="""The sequences that are an input to this source. If the source represents external import of a sequence, it's empty.""",
122
- json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source']}},
123
- )
124
-
125
-
126
- class ManuallyTypedSource(SourceCommonClass, _ManuallyTypedSource):
127
- """Describes a sequence that is typed manually by the user"""
128
-
129
- @model_validator(mode='after')
130
- def validate_circularity(self):
131
- # Do the validation instead of printing
132
- if self.circular:
133
- assert self.overhang_crick_3prime == 0, 'Circular sequences cannot have overhangs.'
134
- assert self.overhang_watson_3prime == 0, 'Circular sequences cannot have overhangs.'
135
- return self
136
-
137
-
138
- class UploadedFileSource(SourceCommonClass, _UploadedFileSource):
139
- coordinates: Optional['SequenceLocationStr'] = Field(
140
- default=None,
141
- description="""If provided, coordinates within the sequence of the file to extract a subsequence""",
142
- json_schema_extra={'linkml_meta': {'alias': 'coordinates', 'domain_of': ['UploadedFileSource']}},
143
- )
144
-
145
- @field_validator('coordinates', mode='before')
146
- def parse_coordinates(cls, v):
147
- if v is None:
148
- return None
149
- return SequenceLocationStr.field_validator(v)
150
-
151
-
152
- class RepositoryIdSource(SourceCommonClass, _RepositoryIdSource):
153
- pass
154
-
155
-
156
- class AddgeneIdSource(SourceCommonClass, _AddgeneIdSource):
157
- # TODO: add this to LinkML
158
- # repository_name: RepositoryName = RepositoryName('addgene')
159
- pass
160
-
161
-
162
- class WekWikGeneIdSource(SourceCommonClass, _WekWikGeneIdSource):
163
- pass
164
-
165
-
166
- class BenchlingUrlSource(SourceCommonClass, _BenchlingUrlSource):
167
- pass
168
-
169
-
170
- class SnapGenePlasmidSource(SourceCommonClass, _SnapGenePlasmidSource):
171
- pass
172
-
173
-
174
- class EuroscarfSource(SourceCommonClass, _EuroscarfSource):
175
- pass
176
-
177
-
178
- class IGEMSource(SourceCommonClass, _IGEMSource):
179
-
180
- @model_validator(mode='after')
181
- def validate_repository_id(self):
182
- file_name = self.sequence_file_url.split('/')[-1]
183
- assert file_name.endswith('.gb'), 'The sequence file must be a GenBank file'
184
- return self
185
-
186
-
187
- class OpenDNACollectionsSource(SourceCommonClass, _OpenDNACollectionsSource):
188
- pass
189
-
190
-
191
- class SEVASource(SourceCommonClass, _SEVASource):
192
- pass
193
-
194
-
195
- class GenomeCoordinatesSource(SourceCommonClass, _GenomeCoordinatesSource):
196
- pass
197
-
198
-
199
- class AnnotationSource(SourceCommonClass, _AnnotationSource):
200
- pass
201
-
202
-
203
- class ReverseComplementSource(SourceCommonClass, _ReverseComplementSource):
204
- pass
205
-
206
-
207
- class RestrictionSequenceCut(_RestrictionSequenceCut):
208
-
209
- @classmethod
210
- def from_cutsite_tuple(cls, cutsite_tuple: tuple[tuple[int, int], RestrictionType]):
211
- cut_watson, ovhg = cutsite_tuple[0]
212
- enzyme = str(cutsite_tuple[1])
213
-
214
- return cls(
215
- cut_watson=cut_watson,
216
- overhang=ovhg,
217
- restriction_enzyme=enzyme,
218
- )
219
-
220
- def to_cutsite_tuple(self) -> tuple[tuple[int, int], RestrictionType]:
221
- restriction_enzyme = RestrictionBatch(first=[self.restriction_enzyme]).pop()
222
- return ((self.cut_watson, self.overhang), restriction_enzyme)
223
-
224
-
225
- class RestrictionEnzymeDigestionSource(SourceCommonClass, _RestrictionEnzymeDigestionSource):
226
- """Documents a restriction enzyme digestion, and the selection of one of the fragments."""
227
-
228
- # TODO: maybe a better way? They have to be redefined here because
229
- # we have overriden the original class
230
-
231
- left_edge: Optional[RestrictionSequenceCut] = Field(None)
232
- right_edge: Optional[RestrictionSequenceCut] = Field(None)
233
-
234
- @classmethod
235
- def from_cutsites(
236
- cls,
237
- left: tuple[tuple[int, int], RestrictionType],
238
- right: tuple[tuple[int, int], RestrictionType],
239
- input: list[int],
240
- id: int,
241
- ):
242
- return cls(
243
- id=id,
244
- left_edge=None if left is None else RestrictionSequenceCut.from_cutsite_tuple(left),
245
- right_edge=None if right is None else RestrictionSequenceCut.from_cutsite_tuple(right),
246
- input=input,
247
- )
248
-
249
- # TODO could be made into a computed field?
250
- def get_enzymes(self) -> list[str]:
251
- """Returns the enzymes used in the digestion"""
252
- out = list()
253
- if self.left_edge is not None:
254
- out.append(self.left_edge.restriction_enzyme)
255
- if self.right_edge is not None:
256
- out.append(self.right_edge.restriction_enzyme)
257
- # Unique values, sorted the same way
258
- return sorted(list(set(out)), key=out.index)
259
-
260
-
261
- class SequenceLocationStr(str):
262
- """A string representation of a sequence location, genbank-like."""
263
-
264
- # TODO: this should handle origin-spanning simple locations (splitted)
265
- @classmethod
266
- def from_biopython_location(cls, location: Location):
267
- return cls(format_feature_location(location, None))
268
-
269
- @classmethod
270
- def from_start_and_end(cls, start: int, end: int, seq_len: int | None = None, strand: int | None = 1):
271
- if end >= start:
272
- return cls.from_biopython_location(SimpleLocation(start, end, strand=strand))
273
- else:
274
- if seq_len is None:
275
- raise ValueError('Sequence length is required to handle origin-spanning simple locations')
276
- unwrapped_location = SimpleLocation(start, end + seq_len, strand=strand)
277
- wrapped_location = shift_location(unwrapped_location, 0, seq_len)
278
- return cls.from_biopython_location(wrapped_location)
279
-
280
- def to_biopython_location(self) -> BioFeatureLocation:
281
- return Location.fromstring(self)
282
-
283
- @classmethod
284
- def field_validator(cls, v):
285
- if isinstance(v, str):
286
- value = cls(v)
287
- try:
288
- value.to_biopython_location()
289
- except LocationParserError:
290
- raise ValueError(f'Location "{v}" is not a valid location')
291
- return value
292
- raise ValueError(f'Location must be a string or a {cls.__name__}')
293
-
294
- @property
295
- def start(self) -> int:
296
- return location_boundaries(self.to_biopython_location())[0]
297
-
298
- @property
299
- def end(self) -> int:
300
- return location_boundaries(self.to_biopython_location())[1]
301
-
302
- @classmethod
303
- def __get_pydantic_core_schema__(
304
- cls,
305
- source_type,
306
- handler,
307
- ) -> core_schema.CoreSchema:
308
- """Generate Pydantic core schema for SequenceLocationStr."""
309
- return core_schema.with_info_after_validator_function(
310
- cls._validate,
311
- core_schema.str_schema(),
312
- )
313
-
314
- @classmethod
315
- def _validate(cls, value: str, info):
316
- """Validate and create SequenceLocationStr instance."""
317
- return cls.field_validator(value)
318
-
319
-
320
- class AssemblyFragment(_AssemblyFragment, SourceInput):
321
- left_location: Optional[SequenceLocationStr] = None
322
- right_location: Optional[SequenceLocationStr] = None
323
-
324
- def to_fragment_tuple(self, fragments) -> tuple[int, Location, Location]:
325
- fragment_ids = [int(f.id) for f in fragments]
326
- # By convention, these have no strand
327
- left_loc = None if self.left_location is None else self.left_location.to_biopython_location()
328
- right_loc = None if self.right_location is None else self.right_location.to_biopython_location()
329
- if left_loc is not None:
330
- left_loc.strand = None
331
- if right_loc is not None:
332
- right_loc.strand = None
333
-
334
- return (
335
- (fragment_ids.index(self.sequence) + 1) * (-1 if self.reverse_complemented else 1),
336
- left_loc,
337
- right_loc,
338
- )
339
-
340
- @field_validator('left_location', 'right_location', mode='before')
341
- def parse_location(cls, v):
342
- if v is None:
343
- return None
344
- return SequenceLocationStr.field_validator(v)
345
-
346
-
347
- class AssemblySourceCommonClass(SourceCommonClass):
348
- # TODO: This is different in the LinkML model, because there it is not required,
349
- # and here we make it default to list.
350
- input: Optional[
351
- List[
352
- Annotated[
353
- Union[
354
- Annotated[SourceInput, Tag('SourceInput')],
355
- Annotated['AssemblyFragment', Tag('AssemblyFragment')],
356
- ],
357
- Discriminator(input_discriminator),
358
- ]
359
- ]
360
- ] = Field(
361
- default_factory=list,
362
- description="""The inputs to this source. If the source represents external import of a sequence, it's empty.""",
363
- json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source'], 'slot_uri': 'schema:object'}},
364
- )
365
-
366
- def minimal_overlap(self):
367
- """Returns the minimal overlap between the fragments in the assembly"""
368
- all_overlaps = list()
369
- for f in self.input:
370
- if f.left_location is not None:
371
- all_overlaps.append(f.left_location.end - f.left_location.start)
372
- if f.right_location is not None:
373
- all_overlaps.append(f.right_location.end - f.right_location.start)
374
- return min(all_overlaps)
375
-
376
- def get_assembly_plan(self, fragments: list[_SeqRecord]) -> tuple:
377
- """Returns the assembly plan"""
378
- subf = [f.to_fragment_tuple(fragments) for f in self.input if f.type == 'AssemblyFragment']
379
- return subfragment_representation2edge_representation(subf, self.circular)
380
-
381
- def is_assembly_complete(self) -> bool:
382
- """Returns True if the assembly is complete"""
383
- return any(f.type == 'AssemblyFragment' for f in self.input)
384
-
385
- @classmethod
386
- def from_assembly(
387
- cls,
388
- assembly: list[tuple[int, int, Location, Location]],
389
- id: int,
390
- circular: bool,
391
- fragments: list[_SeqRecord],
392
- **kwargs,
393
- ):
394
-
395
- # Replace the positions with the actual ids
396
- fragment_ids = [int(f.id) for f in fragments]
397
-
398
- # Here the ids are still the positions in the fragments list
399
- fragment_assembly_positions = edge_representation2subfragment_representation(assembly, circular)
400
- assembly_fragments = [
401
- AssemblyFragment(
402
- sequence=fragment_ids[abs(pos) - 1],
403
- left_location=None if left_loc is None else SequenceLocationStr.from_biopython_location(left_loc),
404
- right_location=None if right_loc is None else SequenceLocationStr.from_biopython_location(right_loc),
405
- reverse_complemented=pos < 0,
406
- )
407
- for pos, left_loc, right_loc in fragment_assembly_positions
408
- ]
409
- return cls(
410
- id=id,
411
- input=assembly_fragments,
412
- circular=circular,
413
- **kwargs,
414
- )
415
-
416
-
417
- class AssemblySource(AssemblySourceCommonClass, _AssemblySource):
418
- pass
419
-
420
-
421
- class PCRSource(AssemblySourceCommonClass, _PCRSource):
422
- pass
423
-
424
-
425
- class LigationSource(AssemblySourceCommonClass, _LigationSource):
426
- pass
427
-
428
-
429
- class HomologousRecombinationSource(AssemblySourceCommonClass, _HomologousRecombinationSource):
430
-
431
- # TODO: add this to LinkML
432
- # This can only take two inputs, the first one is the template, the second one is the insert
433
- # input: conlist(int, min_length=2, max_length=2)
434
- pass
435
-
436
-
437
- class GibsonAssemblySource(AssemblySourceCommonClass, _GibsonAssemblySource):
438
-
439
- # TODO: add this to LinkML
440
- # input: conlist(int, min_length=1)
441
- pass
442
-
443
-
444
- class OverlapExtensionPCRLigationSource(AssemblySourceCommonClass, _OverlapExtensionPCRLigationSource):
445
- pass
446
-
447
-
448
- class InFusionSource(AssemblySourceCommonClass, _InFusionSource):
449
- pass
450
-
451
-
452
- class InVivoAssemblySource(AssemblySourceCommonClass, _InVivoAssemblySource):
453
- pass
454
-
455
-
456
- class CRISPRSource(AssemblySourceCommonClass, _CRISPRSource):
457
-
458
- # TODO
459
- # input: conlist(int, min_length=2, max_length=2)
460
- # circular: bool = False
461
-
462
- @classmethod
463
- def from_assembly(
464
- cls,
465
- assembly: list[tuple[int, int, Location, Location]],
466
- id: int,
467
- fragments: list[_SeqRecord],
468
- guides: list[int],
469
- ):
470
- source = super().from_assembly(assembly, id, False, fragments)
471
- source.input += [SourceInput(sequence=guide) for guide in guides]
472
- return source
473
-
474
-
475
- class RestrictionAndLigationSource(AssemblySourceCommonClass, _RestrictionAndLigationSource):
476
- # TODO: add this to LinkML
477
- # input: conlist(int, min_length=1)
478
-
479
- @classmethod
480
- def from_assembly(
481
- cls,
482
- assembly: list[tuple[int, int, Location, Location]],
483
- circular: bool,
484
- id: int,
485
- fragments: list[_SeqRecord],
486
- restriction_enzymes=list['str'],
487
- ):
488
- return super().from_assembly(assembly, id, circular, fragments, restriction_enzymes=restriction_enzymes)
489
-
490
-
491
- class GatewaySource(AssemblySourceCommonClass, _GatewaySource):
492
- @classmethod
493
- def from_assembly(
494
- cls,
495
- assembly: list[tuple[int, int, Location, Location]],
496
- circular: bool,
497
- id: int,
498
- fragments: list[_SeqRecord],
499
- reaction_type: str,
500
- ):
501
- return super().from_assembly(assembly, id, circular, fragments, reaction_type=reaction_type)
502
-
503
-
504
- class CreLoxRecombinationSource(AssemblySourceCommonClass, _CreLoxRecombinationSource):
505
- pass
506
-
507
-
508
- class OligoHybridizationSource(SourceCommonClass, _OligoHybridizationSource):
509
- pass
510
-
511
-
512
- class PolymeraseExtensionSource(SourceCommonClass, _PolymeraseExtensionSource):
513
- pass
514
14
 
515
15
 
516
16
  class BaseCloningStrategy(_CloningStrategy):
@@ -536,7 +36,7 @@ class BaseCloningStrategy(_CloningStrategy):
536
36
  def next_id(self):
537
37
  return max([s.id for s in self.sources + self.sequences + self.primers], default=0) + 1
538
38
 
539
- def add_source_and_sequence(self, source: SourceCommonClass, sequence: TextFileSequence):
39
+ def add_source_and_sequence(self, source: _Source, sequence: _TextFileSequence):
540
40
  if source in self.sources:
541
41
  if sequence not in self.sequences:
542
42
  raise ValueError(
@@ -566,7 +66,7 @@ class BaseCloningStrategy(_CloningStrategy):
566
66
 
567
67
  class PrimerDesignQuery(BaseModel):
568
68
  model_config = {'arbitrary_types_allowed': True}
569
- sequence: TextFileSequence
69
+ sequence: _TextFileSequence
570
70
  location: SequenceLocationStr
571
71
  forward_orientation: bool = True
572
72
 
@@ -3,58 +3,48 @@ genome_region_examples = {
3
3
  'summary': 'All parameters provided',
4
4
  'value': {
5
5
  'id': 1,
6
- 'sequence_accession': 'NC_003424.3',
6
+ 'repository_id': 'NC_003424.3',
7
7
  'assembly_accession': 'GCF_000002945.2',
8
8
  'locus_tag': 'SPOM_SPAPB1A10.09',
9
9
  'gene_id': 2543372,
10
- 'start': 1877009,
11
- 'end': 1881726,
12
- 'strand': 1,
10
+ 'coordinates': '1877009..1881726',
13
11
  },
14
12
  },
15
13
  'full_with_genbank_accession': {
16
14
  'summary': 'All parameters provided, but sequence accession is GenBank',
17
15
  'value': {
18
16
  'id': 1,
19
- 'sequence_accession': 'CU329670.1',
17
+ 'repository_id': 'CU329670.1',
20
18
  'assembly_accession': 'GCF_000002945.2',
21
19
  'locus_tag': 'SPOM_SPAPB1A10.09',
22
20
  'gene_id': 2543372,
23
- 'start': 1877009,
24
- 'end': 1881726,
25
- 'strand': 1,
21
+ 'coordinates': '1877009..1881726',
26
22
  },
27
23
  },
28
24
  'id_omitted': {
29
25
  'summary': 'Gene ID omitted (filled in response)',
30
26
  'value': {
31
27
  'id': 1,
32
- 'sequence_accession': 'NC_003424.3',
28
+ 'repository_id': 'NC_003424.3',
33
29
  'assembly_accession': 'GCF_000002945.2',
34
30
  'locus_tag': 'SPOM_SPAPB1A10.09',
35
- 'start': 1877009,
36
- 'end': 1881726,
37
- 'strand': 1,
31
+ 'coordinates': '1877009..1881726',
38
32
  },
39
33
  },
40
34
  'assembly_accession_omitted': {
41
35
  'summary': 'Sequence accession only',
42
36
  'value': {
43
37
  'id': 1,
44
- 'sequence_accession': 'NC_003424.3',
45
- 'start': 1877009,
46
- 'end': 1881726,
47
- 'strand': 1,
38
+ 'repository_id': 'NC_003424.3',
39
+ 'coordinates': '1877009..1881726',
48
40
  },
49
41
  },
50
42
  'viral_sequence': {
51
43
  'summary': 'Viral sequence not associated with assembly',
52
44
  'value': {
53
45
  'id': 1,
54
- 'sequence_accession': 'DQ208311.2',
55
- 'start': 20,
56
- 'end': 2050,
57
- 'strand': -1,
46
+ 'repository_id': 'DQ208311.2',
47
+ 'coordinates': 'complement(20..2050)',
58
48
  },
59
49
  },
60
50
  }
@@ -84,7 +74,6 @@ benchling_url_examples = {
84
74
  'summary': 'Typical example',
85
75
  'value': {
86
76
  'id': 0,
87
- 'repository_name': 'benchling',
88
77
  'repository_id': 'https://benchling.com/siverson/f/lib_B94YxDHhQh-cidar-moclo-library/seq_kryGidaz-c0062_cd.gb',
89
78
  },
90
79
  },
@@ -95,7 +84,6 @@ snapgene_plasmid_examples = {
95
84
  'summary': 'Typical example',
96
85
  'value': {
97
86
  'id': 0,
98
- 'repository_name': 'snapgene',
99
87
  'repository_id': 'basic_cloning_vectors/pEASY-T1_(linearized)',
100
88
  },
101
89
  },
@@ -0,0 +1,50 @@
1
+ """
2
+ Functions to be moved to pydna at some point.
3
+ """
4
+
5
+ from opencloning_linkml.datamodel import AssemblySource
6
+ from Bio.SeqFeature import Location
7
+ from opencloning_linkml.datamodel import RestrictionEnzymeDigestionSource
8
+ from opencloning_linkml.datamodel import RestrictionSequenceCut
9
+ from opencloning_linkml.datamodel import Primer as PrimerModel
10
+ from Bio.Restriction.Restriction import RestrictionType, RestrictionBatch
11
+ from pydna.primer import Primer as PydnaPrimer
12
+
13
+
14
+ def is_assembly_complete(source: AssemblySource) -> bool:
15
+ return any(f.type == 'AssemblyFragment' for f in source.input)
16
+
17
+
18
+ def minimal_assembly_overlap(source: AssemblySource) -> int:
19
+ all_overlaps = list()
20
+ for f in source.input:
21
+ if f.type != 'AssemblyFragment':
22
+ continue
23
+ if f.left_location is not None:
24
+ all_overlaps.append(len(Location.fromstring(f.left_location)))
25
+ if f.right_location is not None:
26
+ all_overlaps.append(len(Location.fromstring(f.right_location)))
27
+ if len(all_overlaps) == 0:
28
+ raise ValueError('Assembly is not complete')
29
+ return min(all_overlaps)
30
+
31
+
32
+ def get_enzymes_from_source(source: RestrictionEnzymeDigestionSource) -> list[str]:
33
+ out = list()
34
+ if source.left_edge is not None:
35
+ out.append(source.left_edge.restriction_enzyme)
36
+ if source.right_edge is not None:
37
+ out.append(source.right_edge.restriction_enzyme)
38
+ # Unique values, sorted the same way
39
+ return sorted(list(set(out)), key=out.index)
40
+
41
+
42
+ def restriction_sequence_cut_to_cutsite_tuple(
43
+ restriction_sequence_cut: RestrictionSequenceCut,
44
+ ) -> tuple[tuple[int, int], RestrictionType]:
45
+ restriction_enzyme = RestrictionBatch(first=[restriction_sequence_cut.restriction_enzyme]).pop()
46
+ return ((restriction_sequence_cut.cut_watson, restriction_sequence_cut.overhang), restriction_enzyme)
47
+
48
+
49
+ def primer_model_to_pydna_primer(primer_model: PrimerModel) -> PydnaPrimer:
50
+ return PydnaPrimer(primer_model.sequence, id=str(primer_model.id), name=primer_model.name)