opencloning 0.3.8__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencloning/app_settings.py +1 -0
- opencloning/batch_cloning/EBIC/example.py +1 -3
- opencloning/batch_cloning/pombe/pombe_clone.py +29 -37
- opencloning/batch_cloning/pombe/pombe_summary.py +11 -7
- opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +28 -56
- opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +47 -56
- opencloning/bug_fixing/README.md +5 -2
- opencloning/bug_fixing/backend_v0_3.py +12 -15
- opencloning/dna_functions.py +5 -6
- opencloning/dna_utils.py +26 -21
- opencloning/endpoints/assembly.py +27 -23
- opencloning/endpoints/no_assembly.py +8 -5
- opencloning/endpoints/no_input.py +11 -4
- opencloning/pydantic_models.py +57 -24
- opencloning/request_examples.py +4 -4
- {opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/METADATA +6 -5
- {opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/RECORD +19 -21
- opencloning/assembly2.py +0 -1467
- opencloning/batch_cloning/pombe/pombe_all.sh +0 -9
- {opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/LICENSE +0 -0
- {opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/WHEEL +0 -0
opencloning/bug_fixing/README.md
CHANGED
|
@@ -121,7 +121,7 @@ If you want to fix several bugs from the command line, you can use the `backend_
|
|
|
121
121
|
Before running this script, you need to migrate the data to the latest version of the schema. See [full documentation](https://github.com/OpenCloning/OpenCloning_LinkML?tab=readme-ov-file#migration-from-previous-versions-of-the-schema), but basically:
|
|
122
122
|
|
|
123
123
|
```bash
|
|
124
|
-
python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
|
|
124
|
+
python -m opencloning_linkl.migrations.migrate --target-version='0.3.0' file1.json file2.json ...
|
|
125
125
|
```
|
|
126
126
|
|
|
127
127
|
Then, you can run the script:
|
|
@@ -131,7 +131,10 @@ python -m opencloning.bug_fixing.backend_v0_3 file1.json file2.json ...
|
|
|
131
131
|
```
|
|
132
132
|
|
|
133
133
|
For each file:
|
|
134
|
-
* If the file does not need fixing, it will be skipped.
|
|
134
|
+
* If the file does not need fixing, it will be skipped. Migrate it to the latest version of the schema by removing the `--target-version` flag.
|
|
135
|
+
```bash
|
|
136
|
+
python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
|
|
137
|
+
```
|
|
135
138
|
* If the file needs fixing, it will create a new file `file_1_needs_fixing.json` at the same location where the original file is, with the problematic sources replaced by templates.
|
|
136
139
|
* You can then load these files into the web application and run the correct steps manually.
|
|
137
140
|
|
|
@@ -27,31 +27,26 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
|
|
|
27
27
|
for source in data['sources']:
|
|
28
28
|
if source['type'] == 'GatewaySource':
|
|
29
29
|
# Take the first assembly value and check that the length of features is 7
|
|
30
|
-
|
|
31
|
-
if len(
|
|
30
|
+
input = source['input']
|
|
31
|
+
if len(input):
|
|
32
32
|
feat2check = (
|
|
33
|
-
|
|
34
|
-
if assembly[0]['left_location'] is not None
|
|
35
|
-
else assembly[0]['right_location']
|
|
33
|
+
input[0]['left_location'] if input[0]['left_location'] is not None else input[0]['right_location']
|
|
36
34
|
)
|
|
37
35
|
if len(SequenceLocationStr(feat2check).to_biopython_location()) != 7:
|
|
38
36
|
problematic_source_ids.add(source['id'])
|
|
39
37
|
|
|
40
|
-
elif '
|
|
38
|
+
elif any(('type' in i and i['type'] == 'AssemblyFragment') for i in source['input']):
|
|
41
39
|
assembly_source = AssemblySource(
|
|
42
40
|
id=source['id'],
|
|
43
41
|
input=source['input'],
|
|
44
|
-
output=source['output'],
|
|
45
42
|
circular=source['circular'],
|
|
46
|
-
assembly=source['assembly'],
|
|
47
43
|
)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
]
|
|
44
|
+
input_ids = [i.sequence for i in assembly_source.input]
|
|
45
|
+
input_seqs = [TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in input_ids]
|
|
51
46
|
# Sort input_seqs as in input
|
|
52
|
-
input_seqs.sort(key=lambda x:
|
|
47
|
+
input_seqs.sort(key=lambda x: input_ids.index(x.id))
|
|
53
48
|
if source['type'] == 'PCRSource':
|
|
54
|
-
primer_ids = [assembly_source.
|
|
49
|
+
primer_ids = [assembly_source.input[0].sequence, assembly_source.input[2].sequence]
|
|
55
50
|
primers = [PrimerModel.model_validate(p) for p in data['primers'] if p['id'] in primer_ids]
|
|
56
51
|
input_seqs = [primers[0], input_seqs[0], primers[1]]
|
|
57
52
|
|
|
@@ -68,9 +63,11 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
|
|
|
68
63
|
problematic_source_ids.update(sum([cs.all_children_source_ids(s) for s in problematic_source_ids], []))
|
|
69
64
|
for source_id in problematic_source_ids:
|
|
70
65
|
source = next(s for s in data['sources'] if s['id'] == source_id)
|
|
71
|
-
output_seq = next(s for s in data['sequences'] if s['id'] ==
|
|
72
|
-
|
|
66
|
+
output_seq = next(s for s in data['sequences'] if s['id'] == source_id)
|
|
67
|
+
# Remove assembly info
|
|
68
|
+
remove_keys = ['circular']
|
|
73
69
|
source_keep = {key: value for key, value in source.items() if key not in remove_keys}
|
|
70
|
+
source_keep['input'] = [{'sequence': f['sequence']} for f in source_keep['input']]
|
|
74
71
|
source.clear()
|
|
75
72
|
source.update(source_keep)
|
|
76
73
|
|
opencloning/dna_functions.py
CHANGED
|
@@ -15,7 +15,7 @@ from pydna.common_sub_strings import common_sub_strings
|
|
|
15
15
|
from Bio.SeqIO import parse as seqio_parse
|
|
16
16
|
import io
|
|
17
17
|
import warnings
|
|
18
|
-
from Bio.SeqIO.InsdcIO import
|
|
18
|
+
from Bio.SeqIO.InsdcIO import GenBankScanner, GenBankIterator
|
|
19
19
|
import re
|
|
20
20
|
from .http_client import get_http_client, ConnectError, TimeoutException
|
|
21
21
|
from .ncbi_requests import get_genbank_sequence
|
|
@@ -29,7 +29,7 @@ def format_sequence_genbank(seq: Dseqrecord, seq_name: str = None) -> TextFileSe
|
|
|
29
29
|
correct_name(seq)
|
|
30
30
|
|
|
31
31
|
return TextFileSequence(
|
|
32
|
-
id=0,
|
|
32
|
+
id=int(seq.id) if seq.id is not None and str(seq.id).isdigit() else 0,
|
|
33
33
|
file_content=seq.format('genbank'),
|
|
34
34
|
sequence_file_format=SequenceFileFormat('genbank'),
|
|
35
35
|
overhang_crick_3prime=seq.seq.ovhg,
|
|
@@ -280,10 +280,9 @@ class MyGenBankScanner(GenBankScanner):
|
|
|
280
280
|
|
|
281
281
|
class MyGenBankIterator(GenBankIterator):
|
|
282
282
|
|
|
283
|
-
def
|
|
284
|
-
|
|
285
|
-
records = MyGenBankScanner(debug=0).parse_records(
|
|
286
|
-
return records
|
|
283
|
+
def __init__(self, source):
|
|
284
|
+
super(GenBankIterator, self).__init__(source, fmt='GenBank')
|
|
285
|
+
self.records = MyGenBankScanner(debug=0).parse_records(self.stream)
|
|
287
286
|
|
|
288
287
|
|
|
289
288
|
def custom_file_parser(
|
opencloning/dna_utils.py
CHANGED
|
@@ -15,6 +15,7 @@ from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
|
|
|
15
15
|
import re
|
|
16
16
|
from Bio.SeqFeature import Location, SimpleLocation
|
|
17
17
|
from pydna.utils import shift_location
|
|
18
|
+
from pairwise_alignments_to_msa.alignment import aligned_tuples_to_MSA
|
|
18
19
|
|
|
19
20
|
aligner = PairwiseAligner(scoring='blastn')
|
|
20
21
|
|
|
@@ -125,33 +126,37 @@ def permutate_trace(reference: str, sanger_trace: str) -> str:
|
|
|
125
126
|
|
|
126
127
|
def align_sanger_traces(dseqr: Dseqrecord, sanger_traces: list[str]) -> list[str]:
|
|
127
128
|
"""Align a sanger track to a dseqr sequence"""
|
|
128
|
-
|
|
129
|
+
|
|
130
|
+
# Ensure sequences are in upper case
|
|
131
|
+
query_str = str(dseqr.seq).upper()
|
|
132
|
+
sanger_traces = [trace.upper() for trace in sanger_traces]
|
|
133
|
+
|
|
129
134
|
# Check that required executables exist in PATH
|
|
130
135
|
if not shutil.which('mars'):
|
|
131
136
|
raise RuntimeError("'mars' executable not found in PATH")
|
|
132
137
|
if not shutil.which('mafft'):
|
|
133
138
|
raise RuntimeError("'mafft' executable not found in PATH")
|
|
134
139
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
return
|
|
140
|
+
aligned_pairs = []
|
|
141
|
+
for trace in sanger_traces:
|
|
142
|
+
# If the sequence is circular, permutate both fwd and reverse complement
|
|
143
|
+
if dseqr.circular:
|
|
144
|
+
fwd = permutate_trace(query_str, trace)
|
|
145
|
+
rvs = permutate_trace(query_str, reverse_complement(trace))
|
|
146
|
+
else:
|
|
147
|
+
fwd = trace
|
|
148
|
+
rvs = reverse_complement(trace)
|
|
149
|
+
|
|
150
|
+
# Pairwise-align and keep the best alignment
|
|
151
|
+
fwd_alignment = next(aligner.align(query_str, fwd))
|
|
152
|
+
rvs_alignment = next(aligner.align(query_str, rvs))
|
|
153
|
+
|
|
154
|
+
best_alignment = fwd_alignment if fwd_alignment.score > rvs_alignment.score else rvs_alignment
|
|
155
|
+
|
|
156
|
+
formatted_alignment = best_alignment.format('fasta').split()[1::2]
|
|
157
|
+
aligned_pairs.append(tuple(formatted_alignment))
|
|
158
|
+
|
|
159
|
+
return aligned_tuples_to_MSA(aligned_pairs)
|
|
155
160
|
|
|
156
161
|
|
|
157
162
|
def compute_regex_site(site: str) -> str:
|
|
@@ -3,7 +3,8 @@ from typing import Union, Literal, Callable
|
|
|
3
3
|
from pydna.dseqrecord import Dseqrecord
|
|
4
4
|
from pydna.primer import Primer as PydnaPrimer
|
|
5
5
|
from pydna.crispr import cas9
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import create_model, Field
|
|
7
|
+
from typing import Annotated
|
|
7
8
|
from Bio.Restriction.Restriction import RestrictionBatch
|
|
8
9
|
from opencloning.cre_lox import cre_loxP_overlap, annotate_loxP_sites
|
|
9
10
|
from ..dna_functions import (
|
|
@@ -27,7 +28,7 @@ from ..pydantic_models import (
|
|
|
27
28
|
CreLoxRecombinationSource,
|
|
28
29
|
InVivoAssemblySource,
|
|
29
30
|
)
|
|
30
|
-
from
|
|
31
|
+
from pydna.assembly2 import (
|
|
31
32
|
Assembly,
|
|
32
33
|
assemble,
|
|
33
34
|
sticky_end_sub_strings,
|
|
@@ -80,8 +81,8 @@ def format_known_assembly_response(
|
|
|
80
81
|
)
|
|
81
82
|
async def crispr(
|
|
82
83
|
source: CRISPRSource,
|
|
83
|
-
guides: list[PrimerModel],
|
|
84
|
-
sequences:
|
|
84
|
+
guides: Annotated[list[PrimerModel], Field(min_length=1)],
|
|
85
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
|
|
85
86
|
minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
|
|
86
87
|
):
|
|
87
88
|
"""Return the sequence after performing CRISPR editing by Homology directed repair
|
|
@@ -106,6 +107,7 @@ async def crispr(
|
|
|
106
107
|
400, f'Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}'
|
|
107
108
|
)
|
|
108
109
|
guide_cuts.append(possible_cuts)
|
|
110
|
+
sorted_guide_ids = list(sorted([guide.id for guide in guides]))
|
|
109
111
|
|
|
110
112
|
# Check if homologous recombination is possible
|
|
111
113
|
fragments = [template, insert]
|
|
@@ -144,12 +146,12 @@ async def crispr(
|
|
|
144
146
|
# meant for linear DNA
|
|
145
147
|
|
|
146
148
|
out_sources = [
|
|
147
|
-
CRISPRSource.from_assembly(id=source.id, assembly=a, guides=
|
|
149
|
+
CRISPRSource.from_assembly(id=source.id, assembly=a, guides=sorted_guide_ids, fragments=fragments)
|
|
148
150
|
for a in valid_assemblies
|
|
149
151
|
]
|
|
150
152
|
|
|
151
153
|
# If a specific assembly is requested
|
|
152
|
-
if
|
|
154
|
+
if source.is_assembly_complete():
|
|
153
155
|
return format_known_assembly_response(source, out_sources, [template, insert])
|
|
154
156
|
|
|
155
157
|
out_sequences = [
|
|
@@ -204,7 +206,7 @@ def generate_assemblies(
|
|
|
204
206
|
raise HTTPException(400, *e.args)
|
|
205
207
|
|
|
206
208
|
# If a specific assembly is requested
|
|
207
|
-
if
|
|
209
|
+
if source.is_assembly_complete():
|
|
208
210
|
return format_known_assembly_response(source, out_sources, fragments, product_callback)
|
|
209
211
|
|
|
210
212
|
out_sequences = [
|
|
@@ -225,7 +227,7 @@ def generate_assemblies(
|
|
|
225
227
|
)
|
|
226
228
|
async def ligation(
|
|
227
229
|
source: LigationSource,
|
|
228
|
-
sequences:
|
|
230
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
|
|
229
231
|
blunt: bool = Query(False, description='Use blunt ligation as well as sticky ends.'),
|
|
230
232
|
allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
|
|
231
233
|
circular_only: bool = Query(False, description='Only return circular assemblies.'),
|
|
@@ -239,7 +241,7 @@ async def ligation(
|
|
|
239
241
|
|
|
240
242
|
# If the assembly is known, the blunt parameter is ignored, and we set the algorithm type from the assembly
|
|
241
243
|
# (blunt ligations have features without length)
|
|
242
|
-
if
|
|
244
|
+
if source.is_assembly_complete():
|
|
243
245
|
asm = source.get_assembly_plan(fragments)
|
|
244
246
|
blunt = len(asm[0][2]) == 0
|
|
245
247
|
|
|
@@ -261,8 +263,8 @@ async def ligation(
|
|
|
261
263
|
)
|
|
262
264
|
async def pcr(
|
|
263
265
|
source: PCRSource,
|
|
264
|
-
sequences:
|
|
265
|
-
primers:
|
|
266
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
|
|
267
|
+
primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
|
|
266
268
|
minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
|
|
267
269
|
allowed_mismatches: int = Query(0, description='The number of mismatches allowed'),
|
|
268
270
|
):
|
|
@@ -277,7 +279,7 @@ async def pcr(
|
|
|
277
279
|
# What happens if annealing is zero? That would mean
|
|
278
280
|
# mismatch in the 3' of the primer, which maybe should
|
|
279
281
|
# not be allowed.
|
|
280
|
-
if
|
|
282
|
+
if source.is_assembly_complete():
|
|
281
283
|
minimal_annealing = source.minimal_overlap()
|
|
282
284
|
# Only the ones that match are included in the output assembly
|
|
283
285
|
# location, so the submitted assembly should be returned without
|
|
@@ -315,11 +317,11 @@ async def pcr(
|
|
|
315
317
|
]
|
|
316
318
|
|
|
317
319
|
# If a specific assembly is requested
|
|
318
|
-
if
|
|
320
|
+
if source.is_assembly_complete():
|
|
319
321
|
|
|
320
322
|
def callback(x):
|
|
321
323
|
if source.add_primer_features:
|
|
322
|
-
return annotate_primer_binding_sites(x, fragments
|
|
324
|
+
return annotate_primer_binding_sites(x, fragments)
|
|
323
325
|
else:
|
|
324
326
|
return x
|
|
325
327
|
|
|
@@ -331,7 +333,7 @@ async def pcr(
|
|
|
331
333
|
def callback(fragments, a):
|
|
332
334
|
out_seq = assemble(fragments, a)
|
|
333
335
|
if source.add_primer_features:
|
|
334
|
-
return annotate_primer_binding_sites(out_seq, fragments
|
|
336
|
+
return annotate_primer_binding_sites(out_seq, fragments)
|
|
335
337
|
else:
|
|
336
338
|
return out_seq
|
|
337
339
|
|
|
@@ -353,14 +355,14 @@ async def pcr(
|
|
|
353
355
|
)
|
|
354
356
|
async def homologous_recombination(
|
|
355
357
|
source: HomologousRecombinationSource,
|
|
356
|
-
sequences:
|
|
358
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
|
|
357
359
|
minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
|
|
358
360
|
):
|
|
359
361
|
|
|
360
362
|
template, insert = [read_dsrecord_from_json(seq) for seq in sequences]
|
|
361
363
|
|
|
362
364
|
# If an assembly is provided, we ignore minimal_homology
|
|
363
|
-
if
|
|
365
|
+
if source.is_assembly_complete():
|
|
364
366
|
minimal_homology = source.minimal_overlap()
|
|
365
367
|
|
|
366
368
|
asm = Assembly((template, insert), limit=minimal_homology, use_all_fragments=True)
|
|
@@ -386,7 +388,7 @@ async def homologous_recombination(
|
|
|
386
388
|
]
|
|
387
389
|
|
|
388
390
|
# If a specific assembly is requested
|
|
389
|
-
if
|
|
391
|
+
if source.is_assembly_complete():
|
|
390
392
|
return format_known_assembly_response(source, out_sources, [template, insert])
|
|
391
393
|
|
|
392
394
|
out_sequences = [
|
|
@@ -411,7 +413,7 @@ async def homologous_recombination(
|
|
|
411
413
|
),
|
|
412
414
|
)
|
|
413
415
|
async def gibson_assembly(
|
|
414
|
-
sequences:
|
|
416
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
|
|
415
417
|
source: Union[GibsonAssemblySource, OverlapExtensionPCRLigationSource, InFusionSource, InVivoAssemblySource],
|
|
416
418
|
minimal_homology: int = Query(
|
|
417
419
|
40, description='The minimum homology between consecutive fragments in the assembly.'
|
|
@@ -450,7 +452,7 @@ async def gibson_assembly(
|
|
|
450
452
|
)
|
|
451
453
|
async def restriction_and_ligation(
|
|
452
454
|
source: RestrictionAndLigationSource,
|
|
453
|
-
sequences:
|
|
455
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
|
|
454
456
|
allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
|
|
455
457
|
circular_only: bool = Query(False, description='Only return circular assemblies.'),
|
|
456
458
|
):
|
|
@@ -492,7 +494,7 @@ async def restriction_and_ligation(
|
|
|
492
494
|
)
|
|
493
495
|
async def gateway(
|
|
494
496
|
source: GatewaySource,
|
|
495
|
-
sequences:
|
|
497
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
|
|
496
498
|
circular_only: bool = Query(False, description='Only return circular assemblies.'),
|
|
497
499
|
only_multi_site: bool = Query(
|
|
498
500
|
False, description='Only return assemblies where more than one site per sequence recombined.'
|
|
@@ -537,7 +539,7 @@ async def gateway(
|
|
|
537
539
|
multi_site_sources = [
|
|
538
540
|
i
|
|
539
541
|
for i, s in enumerate(resp['sources'])
|
|
540
|
-
if all(join.left_location != join.right_location for join in s.
|
|
542
|
+
if all(join.left_location != join.right_location for join in s.input)
|
|
541
543
|
]
|
|
542
544
|
sources = [resp['sources'][i] for i in multi_site_sources]
|
|
543
545
|
sequences = [resp['sequences'][i] for i in multi_site_sources]
|
|
@@ -554,7 +556,9 @@ async def gateway(
|
|
|
554
556
|
sequences=(list[TextFileSequence], ...),
|
|
555
557
|
),
|
|
556
558
|
)
|
|
557
|
-
async def cre_lox_recombination(
|
|
559
|
+
async def cre_lox_recombination(
|
|
560
|
+
source: CreLoxRecombinationSource, sequences: Annotated[list[TextFileSequence], Field(min_length=1)]
|
|
561
|
+
):
|
|
558
562
|
fragments = [read_dsrecord_from_json(seq) for seq in sequences]
|
|
559
563
|
|
|
560
564
|
# Lambda function for code clarity
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from fastapi import Query, HTTPException
|
|
2
2
|
from pydna.dseqrecord import Dseqrecord
|
|
3
|
-
from pydantic import
|
|
3
|
+
from pydantic import create_model, Field
|
|
4
4
|
from typing import Annotated
|
|
5
5
|
from Bio.Restriction import RestrictionBatch
|
|
6
6
|
|
|
@@ -30,7 +30,7 @@ router = get_router()
|
|
|
30
30
|
)
|
|
31
31
|
async def restriction(
|
|
32
32
|
source: RestrictionEnzymeDigestionSource,
|
|
33
|
-
sequences:
|
|
33
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
|
|
34
34
|
restriction_enzymes: Annotated[list[str], Query(default_factory=list)],
|
|
35
35
|
):
|
|
36
36
|
# There should be 1 or 2 enzymes in the request if the source does not have cuts
|
|
@@ -53,7 +53,10 @@ async def restriction(
|
|
|
53
53
|
|
|
54
54
|
cutsites = seqr.seq.get_cutsites(*enzymes)
|
|
55
55
|
cutsite_pairs = seqr.seq.get_cutsite_pairs(cutsites)
|
|
56
|
-
sources = [
|
|
56
|
+
sources = [
|
|
57
|
+
RestrictionEnzymeDigestionSource.from_cutsites(*p, [{'sequence': sequences[0].id}], source.id)
|
|
58
|
+
for p in cutsite_pairs
|
|
59
|
+
]
|
|
57
60
|
|
|
58
61
|
all_enzymes = set(enzyme for s in sources for enzyme in s.get_enzymes())
|
|
59
62
|
enzymes_not_cutting = set(restriction_enzymes) - set(all_enzymes)
|
|
@@ -90,7 +93,7 @@ async def restriction(
|
|
|
90
93
|
)
|
|
91
94
|
async def polymerase_extension(
|
|
92
95
|
source: PolymeraseExtensionSource,
|
|
93
|
-
sequences:
|
|
96
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
|
|
94
97
|
):
|
|
95
98
|
"""Return the sequence from a polymerase extension reaction"""
|
|
96
99
|
|
|
@@ -117,7 +120,7 @@ async def polymerase_extension(
|
|
|
117
120
|
)
|
|
118
121
|
async def reverse_complement(
|
|
119
122
|
source: ReverseComplementSource,
|
|
120
|
-
sequences:
|
|
123
|
+
sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
|
|
121
124
|
):
|
|
122
125
|
dseq = read_dsrecord_from_json(sequences[0])
|
|
123
126
|
out_sequence = dseq.reverse_complement()
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from fastapi import Query, HTTPException
|
|
2
2
|
from pydna.dseqrecord import Dseqrecord
|
|
3
3
|
from pydna.dseq import Dseq
|
|
4
|
-
from pydantic import
|
|
4
|
+
from pydantic import create_model, Field
|
|
5
|
+
from typing import Annotated
|
|
5
6
|
|
|
6
7
|
from ..dna_functions import (
|
|
7
8
|
format_sequence_genbank,
|
|
@@ -12,6 +13,7 @@ from ..pydantic_models import (
|
|
|
12
13
|
TextFileSequence,
|
|
13
14
|
ManuallyTypedSource,
|
|
14
15
|
OligoHybridizationSource,
|
|
16
|
+
SourceInput,
|
|
15
17
|
)
|
|
16
18
|
|
|
17
19
|
from .. import request_examples
|
|
@@ -54,11 +56,16 @@ async def manually_typed(source: ManuallyTypedSource):
|
|
|
54
56
|
)
|
|
55
57
|
async def oligonucleotide_hybridization(
|
|
56
58
|
source: OligoHybridizationSource,
|
|
57
|
-
primers:
|
|
59
|
+
primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
|
|
58
60
|
minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
|
|
59
61
|
):
|
|
60
|
-
|
|
61
|
-
|
|
62
|
+
if len(source.input):
|
|
63
|
+
watson_seq = next((p.sequence for p in primers if p.id == source.input[0].sequence), None)
|
|
64
|
+
crick_seq = next((p.sequence for p in primers if p.id == source.input[1].sequence), None)
|
|
65
|
+
else:
|
|
66
|
+
watson_seq = primers[0].sequence
|
|
67
|
+
crick_seq = primers[1].sequence if len(primers) > 1 else watson_seq
|
|
68
|
+
source.input = [SourceInput(sequence=primers[0].id), SourceInput(sequence=primers[1].id)]
|
|
62
69
|
|
|
63
70
|
if watson_seq is None or crick_seq is None:
|
|
64
71
|
raise HTTPException(404, 'Invalid oligo id.')
|
opencloning/pydantic_models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field, model_validator, field_validator
|
|
2
|
-
from typing import Optional, List
|
|
1
|
+
from pydantic import BaseModel, Field, model_validator, field_validator, Discriminator, Tag
|
|
2
|
+
from typing import Optional, List, Union, Annotated
|
|
3
3
|
from pydantic_core import core_schema
|
|
4
4
|
from ._version import __version__
|
|
5
5
|
|
|
@@ -49,8 +49,9 @@ from opencloning_linkml.datamodel import (
|
|
|
49
49
|
SEVASource as _SEVASource,
|
|
50
50
|
CreLoxRecombinationSource as _CreLoxRecombinationSource,
|
|
51
51
|
InVivoAssemblySource as _InVivoAssemblySource,
|
|
52
|
+
SourceInput as _SourceInput,
|
|
52
53
|
)
|
|
53
|
-
from .assembly2 import (
|
|
54
|
+
from pydna.assembly2 import (
|
|
54
55
|
edge_representation2subfragment_representation,
|
|
55
56
|
subfragment_representation2edge_representation,
|
|
56
57
|
)
|
|
@@ -64,6 +65,10 @@ class TextFileSequence(_TextFileSequence):
|
|
|
64
65
|
pass
|
|
65
66
|
|
|
66
67
|
|
|
68
|
+
class SourceInput(_SourceInput):
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
67
72
|
class PrimerModel(_Primer):
|
|
68
73
|
"""Called PrimerModel not to be confused with the class from pydna."""
|
|
69
74
|
|
|
@@ -94,8 +99,23 @@ class SeqFeatureModel(BaseModel):
|
|
|
94
99
|
# Sources =========================================
|
|
95
100
|
|
|
96
101
|
|
|
97
|
-
|
|
98
|
-
|
|
102
|
+
def input_discriminator(v) -> str | None:
|
|
103
|
+
"""
|
|
104
|
+
Discriminator that yields SourceInput by default
|
|
105
|
+
"""
|
|
106
|
+
if isinstance(v, dict):
|
|
107
|
+
input_type = v.get('type', None)
|
|
108
|
+
if input_type is None:
|
|
109
|
+
return 'SourceInput'
|
|
110
|
+
else:
|
|
111
|
+
return input_type
|
|
112
|
+
elif isinstance(v, SourceInput):
|
|
113
|
+
return v.type
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class SourceCommonClass(BaseModel):
|
|
118
|
+
input: Optional[List[SourceInput]] = Field(
|
|
99
119
|
default_factory=list,
|
|
100
120
|
description="""The sequences that are an input to this source. If the source represents external import of a sequence, it's empty.""",
|
|
101
121
|
json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source']}},
|
|
@@ -292,7 +312,7 @@ class SequenceLocationStr(str):
|
|
|
292
312
|
return cls.field_validator(value)
|
|
293
313
|
|
|
294
314
|
|
|
295
|
-
class AssemblyFragment(_AssemblyFragment):
|
|
315
|
+
class AssemblyFragment(_AssemblyFragment, SourceInput):
|
|
296
316
|
left_location: Optional[SequenceLocationStr] = None
|
|
297
317
|
right_location: Optional[SequenceLocationStr] = None
|
|
298
318
|
|
|
@@ -322,14 +342,26 @@ class AssemblyFragment(_AssemblyFragment):
|
|
|
322
342
|
class AssemblySourceCommonClass(SourceCommonClass):
|
|
323
343
|
# TODO: This is different in the LinkML model, because there it is not required,
|
|
324
344
|
# and here we make it default to list.
|
|
325
|
-
|
|
326
|
-
|
|
345
|
+
input: Optional[
|
|
346
|
+
List[
|
|
347
|
+
Annotated[
|
|
348
|
+
Union[
|
|
349
|
+
Annotated[SourceInput, Tag('SourceInput')],
|
|
350
|
+
Annotated['AssemblyFragment', Tag('AssemblyFragment')],
|
|
351
|
+
],
|
|
352
|
+
Discriminator(input_discriminator),
|
|
353
|
+
]
|
|
354
|
+
]
|
|
355
|
+
] = Field(
|
|
356
|
+
default_factory=list,
|
|
357
|
+
description="""The inputs to this source. If the source represents external import of a sequence, it's empty.""",
|
|
358
|
+
json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source'], 'slot_uri': 'schema:object'}},
|
|
327
359
|
)
|
|
328
360
|
|
|
329
361
|
def minimal_overlap(self):
|
|
330
362
|
"""Returns the minimal overlap between the fragments in the assembly"""
|
|
331
363
|
all_overlaps = list()
|
|
332
|
-
for f in self.
|
|
364
|
+
for f in self.input:
|
|
333
365
|
if f.left_location is not None:
|
|
334
366
|
all_overlaps.append(f.left_location.end - f.left_location.start)
|
|
335
367
|
if f.right_location is not None:
|
|
@@ -338,9 +370,13 @@ class AssemblySourceCommonClass(SourceCommonClass):
|
|
|
338
370
|
|
|
339
371
|
def get_assembly_plan(self, fragments: list[_SeqRecord]) -> tuple:
|
|
340
372
|
"""Returns the assembly plan"""
|
|
341
|
-
subf = [f.to_fragment_tuple(fragments) for f in self.
|
|
373
|
+
subf = [f.to_fragment_tuple(fragments) for f in self.input if f.type == 'AssemblyFragment']
|
|
342
374
|
return subfragment_representation2edge_representation(subf, self.circular)
|
|
343
375
|
|
|
376
|
+
def is_assembly_complete(self) -> bool:
|
|
377
|
+
"""Returns True if the assembly is complete"""
|
|
378
|
+
return any(f.type == 'AssemblyFragment' for f in self.input)
|
|
379
|
+
|
|
344
380
|
@classmethod
|
|
345
381
|
def from_assembly(
|
|
346
382
|
cls,
|
|
@@ -353,7 +389,6 @@ class AssemblySourceCommonClass(SourceCommonClass):
|
|
|
353
389
|
|
|
354
390
|
# Replace the positions with the actual ids
|
|
355
391
|
fragment_ids = [int(f.id) for f in fragments]
|
|
356
|
-
input_ids = [int(f.id) for f in fragments if not isinstance(f, _PydnaPrimer)]
|
|
357
392
|
|
|
358
393
|
# Here the ids are still the positions in the fragments list
|
|
359
394
|
fragment_assembly_positions = edge_representation2subfragment_representation(assembly, circular)
|
|
@@ -368,8 +403,7 @@ class AssemblySourceCommonClass(SourceCommonClass):
|
|
|
368
403
|
]
|
|
369
404
|
return cls(
|
|
370
405
|
id=id,
|
|
371
|
-
input=
|
|
372
|
-
assembly=assembly_fragments,
|
|
406
|
+
input=assembly_fragments,
|
|
373
407
|
circular=circular,
|
|
374
408
|
**kwargs,
|
|
375
409
|
)
|
|
@@ -428,7 +462,9 @@ class CRISPRSource(AssemblySourceCommonClass, _CRISPRSource):
|
|
|
428
462
|
fragments: list[_SeqRecord],
|
|
429
463
|
guides: list[int],
|
|
430
464
|
):
|
|
431
|
-
|
|
465
|
+
source = super().from_assembly(assembly, id, False, fragments)
|
|
466
|
+
source.input += [SourceInput(sequence=guide) for guide in guides]
|
|
467
|
+
return source
|
|
432
468
|
|
|
433
469
|
|
|
434
470
|
class RestrictionAndLigationSource(AssemblySourceCommonClass, _RestrictionAndLigationSource):
|
|
@@ -486,17 +522,14 @@ class BaseCloningStrategy(_CloningStrategy):
|
|
|
486
522
|
json_schema_extra={'linkml_meta': {'alias': 'backend_version', 'domain_of': ['CloningStrategy']}},
|
|
487
523
|
)
|
|
488
524
|
|
|
489
|
-
def next_primer_id(self):
|
|
490
|
-
return max([p.id for p in self.primers], default=0) + 1
|
|
491
|
-
|
|
492
525
|
def add_primer(self, primer: PrimerModel):
|
|
493
526
|
if primer in self.primers:
|
|
494
527
|
return
|
|
495
|
-
primer.id = self.
|
|
528
|
+
primer.id = self.next_id()
|
|
496
529
|
self.primers.append(primer)
|
|
497
530
|
|
|
498
|
-
def
|
|
499
|
-
return max([s.id for s in self.sources + self.sequences], default=0) + 1
|
|
531
|
+
def next_id(self):
|
|
532
|
+
return max([s.id for s in self.sources + self.sequences + self.primers], default=0) + 1
|
|
500
533
|
|
|
501
534
|
def add_source_and_sequence(self, source: SourceCommonClass, sequence: TextFileSequence):
|
|
502
535
|
if source in self.sources:
|
|
@@ -505,11 +538,11 @@ class BaseCloningStrategy(_CloningStrategy):
|
|
|
505
538
|
f"Source {source.id} already exists in the cloning strategy, but sequence {sequence.id} it's not its output."
|
|
506
539
|
)
|
|
507
540
|
return
|
|
508
|
-
|
|
541
|
+
new_id = self.next_id()
|
|
542
|
+
source.id = new_id
|
|
509
543
|
self.sources.append(source)
|
|
510
|
-
sequence.id =
|
|
544
|
+
sequence.id = new_id
|
|
511
545
|
self.sequences.append(sequence)
|
|
512
|
-
source.output = sequence.id
|
|
513
546
|
|
|
514
547
|
def all_children_source_ids(self, source_id: int, source_children: list | None = None) -> list[int]:
|
|
515
548
|
"""Returns the ids of all source children ids of a source"""
|
|
@@ -517,7 +550,7 @@ class BaseCloningStrategy(_CloningStrategy):
|
|
|
517
550
|
if source_children is None:
|
|
518
551
|
source_children = []
|
|
519
552
|
|
|
520
|
-
sources_that_take_output_as_input = [s for s in self.sources if source.
|
|
553
|
+
sources_that_take_output_as_input = [s for s in self.sources if source.id in [inp.sequence for inp in s.input]]
|
|
521
554
|
new_source_ids = [s.id for s in sources_that_take_output_as_input]
|
|
522
555
|
|
|
523
556
|
source_children.extend(new_source_ids)
|
opencloning/request_examples.py
CHANGED
|
@@ -66,10 +66,10 @@ oligonucleotide_hybridization_examples = {
|
|
|
66
66
|
'value': {
|
|
67
67
|
'source': {
|
|
68
68
|
'id': 1,
|
|
69
|
-
'input': [
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
69
|
+
'input': [
|
|
70
|
+
{'sequence': 2},
|
|
71
|
+
{'sequence': 3},
|
|
72
|
+
],
|
|
73
73
|
},
|
|
74
74
|
'primers': [
|
|
75
75
|
{'id': 2, 'name': 'primer1', 'sequence': 'aaGCGGCCGCgtagaactttatgtgcttccttacattggt'},
|