opencloning 0.4.8__py3-none-any.whl → 0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencloning/app_settings.py +7 -0
- opencloning/batch_cloning/pombe/__init__.py +2 -2
- opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
- opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
- opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
- opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
- opencloning/bug_fixing/backend_v0_3.py +13 -5
- opencloning/catalogs/__init__.py +36 -0
- opencloning/catalogs/igem2024.yaml +2172 -0
- opencloning/catalogs/openDNA_collections.yaml +1161 -0
- opencloning/catalogs/readme.txt +1 -0
- opencloning/catalogs/seva.tsv +231 -0
- opencloning/catalogs/snapgene.yaml +2837 -0
- opencloning/dna_functions.py +155 -158
- opencloning/dna_utils.py +45 -62
- opencloning/ebic/primer_design.py +1 -1
- opencloning/endpoints/annotation.py +9 -13
- opencloning/endpoints/assembly.py +157 -378
- opencloning/endpoints/endpoint_utils.py +52 -0
- opencloning/endpoints/external_import.py +169 -124
- opencloning/endpoints/no_assembly.py +23 -39
- opencloning/endpoints/no_input.py +32 -47
- opencloning/endpoints/other.py +1 -1
- opencloning/endpoints/primer_design.py +2 -1
- opencloning/http_client.py +2 -2
- opencloning/ncbi_requests.py +113 -47
- opencloning/primer_design.py +1 -1
- opencloning/pydantic_models.py +10 -510
- opencloning/request_examples.py +10 -22
- opencloning/temp_functions.py +50 -0
- {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/METADATA +18 -8
- opencloning-0.5.dist-info/RECORD +51 -0
- {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/WHEEL +1 -1
- opencloning/cre_lox.py +0 -116
- opencloning/gateway.py +0 -154
- opencloning-0.4.8.dist-info/RECORD +0 -45
- {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from fastapi import HTTPException
|
|
2
|
+
from pydna.dseqrecord import Dseqrecord
|
|
3
|
+
from opencloning_linkml.datamodel import Source, TextFileSequence
|
|
4
|
+
from typing import Literal
|
|
5
|
+
from opencloning.dna_functions import format_sequence_genbank
|
|
6
|
+
from pydna.opencloning_models import id_mode
|
|
7
|
+
from opencloning.dna_functions import get_invalid_enzyme_names
|
|
8
|
+
from Bio.Restriction.Restriction import RestrictionBatch
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def format_products(
|
|
12
|
+
source_id: int,
|
|
13
|
+
products: list[Dseqrecord],
|
|
14
|
+
completed_source: Source | None,
|
|
15
|
+
output_name: str,
|
|
16
|
+
no_products_error_message: str = 'No products were found.',
|
|
17
|
+
wrong_completed_source_error_message: str = 'The provided assembly is not valid.',
|
|
18
|
+
) -> dict[Literal['sources', 'sequences'], list[Source] | list[TextFileSequence]]:
|
|
19
|
+
|
|
20
|
+
formatted_products = [format_sequence_genbank(p, output_name) for p in products]
|
|
21
|
+
for p in formatted_products:
|
|
22
|
+
p.id = source_id
|
|
23
|
+
|
|
24
|
+
with id_mode(use_python_internal_id=False):
|
|
25
|
+
formatted_sources = [p.source.to_pydantic_model(source_id).model_dump() for p in products]
|
|
26
|
+
for source in formatted_sources:
|
|
27
|
+
source['output_name'] = output_name
|
|
28
|
+
|
|
29
|
+
if completed_source is not None:
|
|
30
|
+
this_source_dict = completed_source.model_dump()
|
|
31
|
+
for prod, source in zip(formatted_products, formatted_sources):
|
|
32
|
+
if source == this_source_dict:
|
|
33
|
+
return {
|
|
34
|
+
'sources': [source],
|
|
35
|
+
'sequences': [prod],
|
|
36
|
+
}
|
|
37
|
+
raise HTTPException(400, wrong_completed_source_error_message)
|
|
38
|
+
|
|
39
|
+
if len(products) == 0:
|
|
40
|
+
raise HTTPException(400, no_products_error_message)
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
'sources': formatted_sources,
|
|
44
|
+
'sequences': formatted_products,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def parse_restriction_enzymes(enzymes: list[str]) -> RestrictionBatch:
|
|
49
|
+
invalid_enzymes = get_invalid_enzyme_names(enzymes)
|
|
50
|
+
if len(invalid_enzymes):
|
|
51
|
+
raise HTTPException(404, 'These enzymes do not exist: ' + ', '.join(invalid_enzymes))
|
|
52
|
+
return RestrictionBatch(first=[e for e in enzymes if e is not None])
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from fastapi import Body, Query, HTTPException, Response, UploadFile, File
|
|
2
|
+
from opencloning.app_settings import settings
|
|
2
3
|
from pydantic import create_model
|
|
3
4
|
import io
|
|
4
5
|
import warnings
|
|
@@ -6,11 +7,12 @@ import asyncio
|
|
|
6
7
|
from starlette.responses import RedirectResponse
|
|
7
8
|
from Bio import BiopythonParserWarning
|
|
8
9
|
from typing import Annotated
|
|
9
|
-
from urllib.error import HTTPError
|
|
10
10
|
from pydna.utils import location_boundaries
|
|
11
11
|
|
|
12
|
+
from opencloning.endpoints.endpoint_utils import format_products
|
|
13
|
+
|
|
12
14
|
from ..get_router import get_router
|
|
13
|
-
from
|
|
15
|
+
from opencloning_linkml.datamodel import (
|
|
14
16
|
TextFileSequence,
|
|
15
17
|
UploadedFileSource,
|
|
16
18
|
RepositoryIdSource,
|
|
@@ -23,18 +25,22 @@ from ..pydantic_models import (
|
|
|
23
25
|
GenomeCoordinatesSource,
|
|
24
26
|
SequenceFileFormat,
|
|
25
27
|
SEVASource,
|
|
26
|
-
SequenceLocationStr,
|
|
27
28
|
OpenDNACollectionsSource,
|
|
29
|
+
NCBISequenceSource,
|
|
28
30
|
)
|
|
31
|
+
from pydna.opencloning_models import SequenceLocationStr
|
|
29
32
|
from ..dna_functions import (
|
|
30
33
|
format_sequence_genbank,
|
|
34
|
+
get_sequence_from_benchling_url,
|
|
35
|
+
get_sequence_from_iGEM2024,
|
|
36
|
+
get_sequence_from_openDNA_collections,
|
|
31
37
|
request_from_addgene,
|
|
38
|
+
request_from_snapgene,
|
|
32
39
|
request_from_wekwikgene,
|
|
33
|
-
get_sequences_from_file_url,
|
|
34
|
-
get_sequence_from_snapgene_url,
|
|
35
40
|
custom_file_parser,
|
|
36
41
|
get_sequence_from_euroscarf_url,
|
|
37
42
|
get_seva_plasmid,
|
|
43
|
+
read_dsrecord_from_json,
|
|
38
44
|
)
|
|
39
45
|
from .. import request_examples
|
|
40
46
|
from .. import ncbi_requests
|
|
@@ -137,12 +143,7 @@ async def read_from_file(
|
|
|
137
143
|
warning_messages = [str(w.message) for w in warnings_captured]
|
|
138
144
|
|
|
139
145
|
except ValueError as e:
|
|
140
|
-
raise HTTPException(422, f'Biopython cannot process this file: {e}.')
|
|
141
|
-
|
|
142
|
-
# This happens when textfiles are empty or contain something else, or when reading a text file as snapgene file,
|
|
143
|
-
# since StringIO does not raise an error when "Unexpected end of packet" is found
|
|
144
|
-
if len(dseqs) == 0:
|
|
145
|
-
raise HTTPException(422, 'Biopython cannot process this file.')
|
|
146
|
+
raise HTTPException(422, f'Biopython cannot process this file: {e}.') from e
|
|
146
147
|
|
|
147
148
|
if index_in_file is not None:
|
|
148
149
|
if index_in_file >= len(dseqs):
|
|
@@ -199,6 +200,10 @@ async def read_from_file(
|
|
|
199
200
|
if len(warning_messages) > 0:
|
|
200
201
|
response.headers['x-warning'] = '; '.join(warning_messages)
|
|
201
202
|
|
|
203
|
+
# Validate that the sequences are in a valid genbank format
|
|
204
|
+
for seq in out_sequences:
|
|
205
|
+
read_dsrecord_from_json(seq)
|
|
206
|
+
|
|
202
207
|
return {'sequences': out_sequences, 'sources': out_sources}
|
|
203
208
|
|
|
204
209
|
|
|
@@ -206,22 +211,20 @@ async def read_from_file(
|
|
|
206
211
|
# directly the object.
|
|
207
212
|
|
|
208
213
|
|
|
209
|
-
def
|
|
214
|
+
def handle_repository_errors(exception: Exception, repository_name: str) -> None:
|
|
215
|
+
"""
|
|
216
|
+
Centralized error handler for repository requests.
|
|
217
|
+
Re-raises HTTPException as-is, converts ConnectError to HTTPException with 504 status.
|
|
218
|
+
"""
|
|
219
|
+
if isinstance(exception, HTTPException):
|
|
220
|
+
raise
|
|
221
|
+
elif isinstance(exception, ConnectError):
|
|
222
|
+
raise HTTPException(504, f'Unable to connect to {repository_name}: {exception}')
|
|
223
|
+
else: # pragma: no cover
|
|
224
|
+
import traceback
|
|
210
225
|
|
|
211
|
-
|
|
212
|
-
raise HTTPException(
|
|
213
|
-
503, f'{source.repository_name} returned: {exception} - {source.repository_name} might be down'
|
|
214
|
-
)
|
|
215
|
-
elif exception.code == 400 or exception.code == 404:
|
|
216
|
-
raise HTTPException(
|
|
217
|
-
404,
|
|
218
|
-
f'{source.repository_name} returned: {exception} - Likely you inserted a wrong {source.repository_name} id',
|
|
219
|
-
)
|
|
220
|
-
elif exception.code == 403:
|
|
221
|
-
raise HTTPException(
|
|
222
|
-
403,
|
|
223
|
-
f'Request to {source.repository_name} is not allowed. Please check that the URL is whitelisted.',
|
|
224
|
-
)
|
|
226
|
+
traceback.print_exc()
|
|
227
|
+
raise HTTPException(500, f'Unexpected error: {exception}')
|
|
225
228
|
|
|
226
229
|
|
|
227
230
|
# Redirect to the right repository
|
|
@@ -244,36 +247,46 @@ def repository_id_http_error_handler(exception: HTTPError, source: RepositoryIdS
|
|
|
244
247
|
)
|
|
245
248
|
async def get_from_repository_id(
|
|
246
249
|
source: (
|
|
247
|
-
|
|
248
|
-
| AddgeneIdSource
|
|
250
|
+
AddgeneIdSource
|
|
249
251
|
| BenchlingUrlSource
|
|
250
252
|
| SnapGenePlasmidSource
|
|
251
253
|
| EuroscarfSource
|
|
252
254
|
| WekWikGeneIdSource
|
|
253
255
|
| SEVASource
|
|
254
256
|
| OpenDNACollectionsSource
|
|
257
|
+
| NCBISequenceSource
|
|
255
258
|
),
|
|
256
259
|
):
|
|
257
|
-
|
|
260
|
+
mapping_dict = {
|
|
261
|
+
'AddgeneIdSource': 'addgene',
|
|
262
|
+
'BenchlingUrlSource': 'benchling',
|
|
263
|
+
'SnapGenePlasmidSource': 'snapgene',
|
|
264
|
+
'EuroscarfSource': 'euroscarf',
|
|
265
|
+
'WekWikGeneIdSource': 'wekwikgene',
|
|
266
|
+
'SEVASource': 'seva',
|
|
267
|
+
'OpenDNACollectionsSource': 'open_dna_collections',
|
|
268
|
+
'NCBISequenceSource': 'genbank',
|
|
269
|
+
}
|
|
270
|
+
return RedirectResponse(f'/repository_id/{mapping_dict[source.type]}', status_code=307)
|
|
258
271
|
|
|
259
272
|
|
|
260
273
|
@router.post(
|
|
261
274
|
'/repository_id/genbank',
|
|
262
275
|
response_model=create_model(
|
|
263
|
-
'RepositoryIdResponse', sources=(list[
|
|
276
|
+
'RepositoryIdResponse', sources=(list[NCBISequenceSource], ...), sequences=(list[TextFileSequence], ...)
|
|
264
277
|
),
|
|
265
278
|
)
|
|
266
|
-
async def get_from_repository_id_genbank(source:
|
|
279
|
+
async def get_from_repository_id_genbank(source: NCBISequenceSource):
|
|
267
280
|
try:
|
|
268
281
|
# This request already fails if the sequence does not exist
|
|
269
282
|
seq_length = await ncbi_requests.get_sequence_length_from_sequence_accession(source.repository_id)
|
|
270
|
-
if seq_length >
|
|
271
|
-
raise HTTPException(400, 'sequence is too long (max
|
|
283
|
+
if seq_length > settings.NCBI_MAX_SEQUENCE_LENGTH:
|
|
284
|
+
raise HTTPException(400, f'sequence is too long (max {settings.NCBI_MAX_SEQUENCE_LENGTH} bp)')
|
|
272
285
|
seq = await ncbi_requests.get_genbank_sequence(source.repository_id)
|
|
273
|
-
except
|
|
274
|
-
|
|
286
|
+
except Exception as exception:
|
|
287
|
+
handle_repository_errors(exception, 'NCBI')
|
|
275
288
|
|
|
276
|
-
return
|
|
289
|
+
return format_products(source.id, [seq], None, source.output_name)
|
|
277
290
|
|
|
278
291
|
|
|
279
292
|
@router.post(
|
|
@@ -284,13 +297,23 @@ async def get_from_repository_id_genbank(source: RepositoryIdSource):
|
|
|
284
297
|
)
|
|
285
298
|
async def get_from_repository_id_addgene(source: AddgeneIdSource):
|
|
286
299
|
try:
|
|
287
|
-
dseq
|
|
288
|
-
except
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
300
|
+
dseq = await request_from_addgene(source.repository_id)
|
|
301
|
+
except Exception as exception:
|
|
302
|
+
handle_repository_errors(exception, 'Addgene')
|
|
303
|
+
|
|
304
|
+
return format_products(
|
|
305
|
+
source.id,
|
|
306
|
+
[dseq],
|
|
307
|
+
source if source.sequence_file_url is not None else None,
|
|
308
|
+
source.output_name,
|
|
309
|
+
wrong_completed_source_error_message=f'''
|
|
310
|
+
The provided source is not valid.
|
|
311
|
+
We found the following:
|
|
312
|
+
- repository_id: {dseq.source.repository_id}
|
|
313
|
+
- sequence_file_url: {dseq.source.sequence_file_url}
|
|
314
|
+
- addgene_sequence_type: {dseq.source.addgene_sequence_type}
|
|
315
|
+
''',
|
|
316
|
+
)
|
|
294
317
|
|
|
295
318
|
|
|
296
319
|
@router.post(
|
|
@@ -301,12 +324,21 @@ async def get_from_repository_id_addgene(source: AddgeneIdSource):
|
|
|
301
324
|
)
|
|
302
325
|
async def get_from_repository_id_wekwikgene(source: WekWikGeneIdSource):
|
|
303
326
|
try:
|
|
304
|
-
dseq
|
|
305
|
-
except
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
327
|
+
dseq = await request_from_wekwikgene(source.repository_id)
|
|
328
|
+
except Exception as exception:
|
|
329
|
+
handle_repository_errors(exception, 'WeKwikGene')
|
|
330
|
+
return format_products(
|
|
331
|
+
source.id,
|
|
332
|
+
[dseq],
|
|
333
|
+
source if source.sequence_file_url is not None else None,
|
|
334
|
+
source.output_name,
|
|
335
|
+
wrong_completed_source_error_message=f'''
|
|
336
|
+
The provided source is not valid.
|
|
337
|
+
We found the following:
|
|
338
|
+
- repository_id: {dseq.source.repository_id}
|
|
339
|
+
- sequence_file_url: {dseq.source.sequence_file_url}
|
|
340
|
+
''',
|
|
341
|
+
)
|
|
310
342
|
|
|
311
343
|
|
|
312
344
|
@router.post(
|
|
@@ -319,13 +351,10 @@ async def get_from_benchling_url(
|
|
|
319
351
|
source: Annotated[BenchlingUrlSource, Body(openapi_examples=request_examples.benchling_url_examples)]
|
|
320
352
|
):
|
|
321
353
|
try:
|
|
322
|
-
|
|
323
|
-
return
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
}
|
|
327
|
-
except HTTPError as exception:
|
|
328
|
-
repository_id_http_error_handler(exception, source)
|
|
354
|
+
dseq = await get_sequence_from_benchling_url(source.repository_id)
|
|
355
|
+
return format_products(source.id, [dseq], None, source.output_name)
|
|
356
|
+
except Exception as exception:
|
|
357
|
+
handle_repository_errors(exception, 'Benchling')
|
|
329
358
|
|
|
330
359
|
|
|
331
360
|
@router.post(
|
|
@@ -339,17 +368,10 @@ async def get_from_repository_id_snapgene(
|
|
|
339
368
|
):
|
|
340
369
|
try:
|
|
341
370
|
plasmid_set, plasmid_name = source.repository_id.split('/')
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
source.output_name = plasmid_name
|
|
347
|
-
return {
|
|
348
|
-
'sequences': [format_sequence_genbank(dseq, source.output_name)],
|
|
349
|
-
'sources': [source],
|
|
350
|
-
}
|
|
351
|
-
except HTTPError as exception:
|
|
352
|
-
repository_id_http_error_handler(exception, source)
|
|
371
|
+
seq = await request_from_snapgene(plasmid_set, plasmid_name)
|
|
372
|
+
return format_products(source.id, [seq], None, source.output_name)
|
|
373
|
+
except Exception as exception:
|
|
374
|
+
handle_repository_errors(exception, 'Snapgene')
|
|
353
375
|
|
|
354
376
|
|
|
355
377
|
@router.post(
|
|
@@ -365,12 +387,9 @@ async def get_from_repository_id_euroscarf(source: EuroscarfSource):
|
|
|
365
387
|
"""
|
|
366
388
|
try:
|
|
367
389
|
dseq = await get_sequence_from_euroscarf_url(source.repository_id)
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [source]}
|
|
372
|
-
except HTTPError as exception:
|
|
373
|
-
repository_id_http_error_handler(exception, source)
|
|
390
|
+
return format_products(source.id, [dseq], None, source.output_name)
|
|
391
|
+
except Exception as exception:
|
|
392
|
+
handle_repository_errors(exception, 'Euroscarf')
|
|
374
393
|
|
|
375
394
|
|
|
376
395
|
@router.post(
|
|
@@ -381,10 +400,21 @@ async def get_from_repository_id_euroscarf(source: EuroscarfSource):
|
|
|
381
400
|
)
|
|
382
401
|
async def get_from_repository_id_igem(source: IGEMSource):
|
|
383
402
|
try:
|
|
384
|
-
dseq =
|
|
385
|
-
return
|
|
386
|
-
|
|
387
|
-
|
|
403
|
+
dseq = await get_sequence_from_iGEM2024(*source.repository_id.split('-'))
|
|
404
|
+
return format_products(
|
|
405
|
+
source.id,
|
|
406
|
+
[dseq],
|
|
407
|
+
source if source.sequence_file_url is not None else None,
|
|
408
|
+
source.output_name,
|
|
409
|
+
wrong_completed_source_error_message=f'''
|
|
410
|
+
The provided source is not valid.
|
|
411
|
+
We found the following:
|
|
412
|
+
- repository_id: {source.repository_id}
|
|
413
|
+
- sequence_file_url: {dseq.source.sequence_file_url}
|
|
414
|
+
''',
|
|
415
|
+
)
|
|
416
|
+
except Exception as exception:
|
|
417
|
+
handle_repository_errors(exception, 'iGEM')
|
|
388
418
|
|
|
389
419
|
|
|
390
420
|
@router.post(
|
|
@@ -397,10 +427,23 @@ async def get_from_repository_id_igem(source: IGEMSource):
|
|
|
397
427
|
)
|
|
398
428
|
async def get_from_repository_id_open_dna_collections(source: OpenDNACollectionsSource):
|
|
399
429
|
try:
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
430
|
+
collection_name, plasmid_id = source.repository_id.split('/')
|
|
431
|
+
dseq = await get_sequence_from_openDNA_collections(collection_name, plasmid_id)
|
|
432
|
+
return format_products(
|
|
433
|
+
source.id,
|
|
434
|
+
[dseq],
|
|
435
|
+
source if source.sequence_file_url is not None else None,
|
|
436
|
+
source.output_name,
|
|
437
|
+
wrong_completed_source_error_message=f'''
|
|
438
|
+
The provided source is not valid.
|
|
439
|
+
We found the following:
|
|
440
|
+
- collection_name: {collection_name}
|
|
441
|
+
- plasmid_id: {plasmid_id}
|
|
442
|
+
- sequence_file_url: {dseq.source.sequence_file_url}
|
|
443
|
+
''',
|
|
444
|
+
)
|
|
445
|
+
except Exception as exception:
|
|
446
|
+
handle_repository_errors(exception, 'OpenDNA Collections')
|
|
404
447
|
|
|
405
448
|
|
|
406
449
|
@router.post(
|
|
@@ -414,39 +457,30 @@ async def genome_coordinates(
|
|
|
414
457
|
):
|
|
415
458
|
|
|
416
459
|
# Validate that coordinates make sense
|
|
417
|
-
|
|
460
|
+
try:
|
|
461
|
+
location_str = SequenceLocationStr(source.coordinates)
|
|
462
|
+
location = location_str.to_biopython_location()
|
|
463
|
+
start, end, strand = location_str.get_ncbi_format_coordinates()
|
|
464
|
+
except Exception as e:
|
|
465
|
+
raise HTTPException(422, f'Invalid coordinates: {e}') from e
|
|
418
466
|
|
|
419
|
-
|
|
467
|
+
if len(location) > settings.NCBI_MAX_SEQUENCE_LENGTH:
|
|
468
|
+
raise HTTPException(400, f'sequence is too long (max {settings.NCBI_MAX_SEQUENCE_LENGTH} bp)')
|
|
469
|
+
|
|
470
|
+
if source.locus_tag is not None and source.assembly_accession is None:
|
|
471
|
+
raise HTTPException(422, 'assembly_accession is required if locus_tag is set')
|
|
420
472
|
|
|
473
|
+
# Source includes a locus tag in annotated assembly
|
|
421
474
|
async def validate_locus_task():
|
|
422
475
|
if source.locus_tag is not None:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
# This field will not be present in all cases, but should be there in reference genomes
|
|
432
|
-
if source.gene_id is not None:
|
|
433
|
-
if 'gene_id' not in annotation:
|
|
434
|
-
raise HTTPException(400, 'gene_id is set, but not found in the annotation')
|
|
435
|
-
if source.gene_id != int(annotation['gene_id']):
|
|
436
|
-
raise HTTPException(400, 'gene_id does not match the locus_tag')
|
|
437
|
-
elif 'gene_id' in annotation:
|
|
438
|
-
source.gene_id = int(annotation['gene_id'])
|
|
439
|
-
|
|
440
|
-
# The gene should fall within the range (range might be bigger if bases were requested upstream or downstream)
|
|
441
|
-
if (
|
|
442
|
-
int(gene_range['begin']) < source.start
|
|
443
|
-
or int(gene_range['end']) > source.end
|
|
444
|
-
or gene_strand != source.strand
|
|
445
|
-
):
|
|
446
|
-
raise HTTPException(
|
|
447
|
-
400,
|
|
448
|
-
f'wrong coordinates, expected to fall within {source.start}, {source.end} on strand: {source.strand}',
|
|
449
|
-
)
|
|
476
|
+
return await ncbi_requests.validate_locus_tag(
|
|
477
|
+
source.locus_tag,
|
|
478
|
+
source.assembly_accession,
|
|
479
|
+
source.gene_id,
|
|
480
|
+
start,
|
|
481
|
+
end,
|
|
482
|
+
strand,
|
|
483
|
+
)
|
|
450
484
|
|
|
451
485
|
async def validate_assembly_task():
|
|
452
486
|
if source.assembly_accession is not None:
|
|
@@ -454,23 +488,26 @@ async def genome_coordinates(
|
|
|
454
488
|
sequence_accessions = await ncbi_requests.get_sequence_accessions_from_assembly_accession(
|
|
455
489
|
source.assembly_accession
|
|
456
490
|
)
|
|
457
|
-
if source.
|
|
491
|
+
if source.repository_id not in sequence_accessions:
|
|
458
492
|
raise HTTPException(
|
|
459
493
|
400,
|
|
460
|
-
f'Sequence accession {source.
|
|
494
|
+
f'Sequence accession {source.repository_id} not contained in assembly accession {source.assembly_accession}, which contains accessions: {", ".join(sequence_accessions)}',
|
|
461
495
|
)
|
|
462
496
|
|
|
463
497
|
async def get_sequence_task():
|
|
464
|
-
return await ncbi_requests.get_genbank_sequence(
|
|
465
|
-
source.sequence_accession, source.start, source.end, source.strand
|
|
466
|
-
)
|
|
498
|
+
return await ncbi_requests.get_genbank_sequence(source.repository_id, start, end, strand)
|
|
467
499
|
|
|
468
500
|
tasks = [validate_locus_task(), validate_assembly_task(), get_sequence_task()]
|
|
469
501
|
|
|
470
|
-
|
|
502
|
+
try:
|
|
503
|
+
gene_id, _, seq = await asyncio.gather(*tasks)
|
|
504
|
+
except Exception as exception:
|
|
505
|
+
handle_repository_errors(exception, 'NCBI')
|
|
506
|
+
|
|
507
|
+
source.gene_id = gene_id
|
|
471
508
|
|
|
472
509
|
# NCBI does not complain for coordinates that fall out of the sequence, so we have to check here
|
|
473
|
-
if len(seq) !=
|
|
510
|
+
if len(seq) != len(location):
|
|
474
511
|
raise HTTPException(400, 'coordinates fall outside the sequence')
|
|
475
512
|
|
|
476
513
|
return {'sequences': [format_sequence_genbank(seq, source.output_name)], 'sources': [source.model_copy()]}
|
|
@@ -487,11 +524,19 @@ async def get_from_repository_id_seva(source: SEVASource):
|
|
|
487
524
|
Return the sequence from a plasmid in SEVA.
|
|
488
525
|
"""
|
|
489
526
|
try:
|
|
490
|
-
dseq
|
|
491
|
-
return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [source]}
|
|
492
|
-
except HTTPError as exception:
|
|
493
|
-
repository_id_http_error_handler(exception, source)
|
|
494
|
-
except ConnectError:
|
|
495
|
-
raise HTTPException(504, 'unable to connect to SEVA')
|
|
527
|
+
dseq = await get_seva_plasmid(source.repository_id)
|
|
496
528
|
except Exception as exception:
|
|
497
|
-
|
|
529
|
+
handle_repository_errors(exception, 'SEVA')
|
|
530
|
+
|
|
531
|
+
return format_products(
|
|
532
|
+
source.id,
|
|
533
|
+
[dseq],
|
|
534
|
+
source if source.sequence_file_url is not None else None,
|
|
535
|
+
source.output_name,
|
|
536
|
+
wrong_completed_source_error_message=f'''
|
|
537
|
+
The provided source is not valid.
|
|
538
|
+
We found the following:
|
|
539
|
+
- repository_id: {dseq.source.repository_id}
|
|
540
|
+
- sequence_file_url: {dseq.source.sequence_file_url}
|
|
541
|
+
''',
|
|
542
|
+
)
|
|
@@ -2,14 +2,15 @@ from fastapi import Query, HTTPException
|
|
|
2
2
|
from pydna.dseqrecord import Dseqrecord
|
|
3
3
|
from pydantic import create_model, Field
|
|
4
4
|
from typing import Annotated
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
from opencloning.endpoints.endpoint_utils import format_products, parse_restriction_enzymes
|
|
7
|
+
from opencloning.temp_functions import get_enzymes_from_source
|
|
6
8
|
|
|
7
9
|
from ..dna_functions import (
|
|
8
10
|
format_sequence_genbank,
|
|
9
11
|
read_dsrecord_from_json,
|
|
10
|
-
get_invalid_enzyme_names,
|
|
11
12
|
)
|
|
12
|
-
from
|
|
13
|
+
from opencloning_linkml.datamodel import (
|
|
13
14
|
RestrictionEnzymeDigestionSource,
|
|
14
15
|
TextFileSequence,
|
|
15
16
|
PolymeraseExtensionSource,
|
|
@@ -33,54 +34,37 @@ async def restriction(
|
|
|
33
34
|
sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
|
|
34
35
|
restriction_enzymes: Annotated[list[str], Query(default_factory=list)],
|
|
35
36
|
):
|
|
37
|
+
completed_source = source if (source.left_edge is not None or source.right_edge is not None) else None
|
|
36
38
|
# There should be 1 or 2 enzymes in the request if the source does not have cuts
|
|
37
|
-
if
|
|
38
|
-
|
|
39
|
+
if completed_source is None:
|
|
40
|
+
enzymes = parse_restriction_enzymes(restriction_enzymes)
|
|
41
|
+
if len(enzymes) not in [1, 2]:
|
|
39
42
|
raise HTTPException(422, 'There should be 1 or 2 restriction enzymes in the request.')
|
|
40
43
|
else:
|
|
41
44
|
if len(restriction_enzymes) != 0:
|
|
42
45
|
raise HTTPException(422, 'There should be no restriction enzymes in the request if source is populated.')
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
# TODO: this could be moved to the class
|
|
46
|
-
invalid_enzymes = get_invalid_enzyme_names(restriction_enzymes)
|
|
47
|
-
if len(invalid_enzymes):
|
|
48
|
-
raise HTTPException(404, 'These enzymes do not exist: ' + ', '.join(invalid_enzymes))
|
|
49
|
-
enzymes = RestrictionBatch(first=[e for e in restriction_enzymes if e is not None])
|
|
46
|
+
enzymes = parse_restriction_enzymes(get_enzymes_from_source(completed_source))
|
|
50
47
|
|
|
51
48
|
seqr = read_dsrecord_from_json(sequences[0])
|
|
52
|
-
# TODO: return error if the id of the sequence does not correspond
|
|
53
49
|
|
|
54
50
|
cutsites = seqr.seq.get_cutsites(*enzymes)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
RestrictionEnzymeDigestionSource.from_cutsites(*p, [{'sequence': sequences[0].id}], source.id)
|
|
58
|
-
for p in cutsite_pairs
|
|
59
|
-
]
|
|
60
|
-
|
|
61
|
-
all_enzymes = set(enzyme for s in sources for enzyme in s.get_enzymes())
|
|
62
|
-
enzymes_not_cutting = set(restriction_enzymes) - set(all_enzymes)
|
|
51
|
+
cutting_enzymes = set(e for _, e in cutsites if e is not None)
|
|
52
|
+
enzymes_not_cutting = set(enzymes) - set(cutting_enzymes)
|
|
63
53
|
if len(enzymes_not_cutting):
|
|
64
|
-
raise HTTPException(400, 'These enzymes do not cut: ' + ', '.join(enzymes_not_cutting))
|
|
54
|
+
raise HTTPException(400, 'These enzymes do not cut: ' + ', '.join(map(str, enzymes_not_cutting)))
|
|
65
55
|
|
|
66
56
|
try:
|
|
67
|
-
|
|
68
|
-
if source.left_edge is not None or source.right_edge is not None:
|
|
69
|
-
|
|
70
|
-
for i, s in enumerate(sources):
|
|
71
|
-
if s == source:
|
|
72
|
-
return {
|
|
73
|
-
'sequences': [format_sequence_genbank(seqr.apply_cut(*cutsite_pairs[i]), source.output_name)],
|
|
74
|
-
'sources': [s],
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
raise HTTPException(400, 'Invalid restriction enzyme pair.')
|
|
78
|
-
|
|
79
|
-
products = [format_sequence_genbank(seqr.apply_cut(*p), source.output_name) for p in cutsite_pairs]
|
|
80
|
-
|
|
81
|
-
return {'sequences': products, 'sources': sources}
|
|
57
|
+
products = seqr.cut(*enzymes)
|
|
82
58
|
except ValueError as e:
|
|
83
|
-
raise HTTPException(400,
|
|
59
|
+
raise HTTPException(400, *e.args)
|
|
60
|
+
|
|
61
|
+
return format_products(
|
|
62
|
+
source.id,
|
|
63
|
+
products,
|
|
64
|
+
completed_source,
|
|
65
|
+
source.output_name,
|
|
66
|
+
wrong_completed_source_error_message='Invalid restriction enzyme pair.',
|
|
67
|
+
)
|
|
84
68
|
|
|
85
69
|
|
|
86
70
|
@router.post(
|
|
@@ -102,7 +86,7 @@ async def polymerase_extension(
|
|
|
102
86
|
if dseq.circular:
|
|
103
87
|
raise HTTPException(400, 'The sequence must be linear.')
|
|
104
88
|
|
|
105
|
-
if dseq.seq.ovhg == dseq.seq.watson_ovhg
|
|
89
|
+
if dseq.seq.ovhg == dseq.seq.watson_ovhg == 0:
|
|
106
90
|
raise HTTPException(400, 'The sequence must have an overhang.')
|
|
107
91
|
|
|
108
92
|
out_sequence = Dseqrecord(dseq.seq.fill_in(), features=dseq.features)
|