opencloning 0.4.8__py3-none-any.whl → 0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. opencloning/app_settings.py +7 -0
  2. opencloning/batch_cloning/pombe/__init__.py +2 -2
  3. opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
  4. opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
  5. opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
  6. opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
  7. opencloning/bug_fixing/backend_v0_3.py +13 -5
  8. opencloning/catalogs/__init__.py +36 -0
  9. opencloning/catalogs/igem2024.yaml +2172 -0
  10. opencloning/catalogs/openDNA_collections.yaml +1161 -0
  11. opencloning/catalogs/readme.txt +1 -0
  12. opencloning/catalogs/seva.tsv +231 -0
  13. opencloning/catalogs/snapgene.yaml +2837 -0
  14. opencloning/dna_functions.py +155 -158
  15. opencloning/dna_utils.py +45 -62
  16. opencloning/ebic/primer_design.py +1 -1
  17. opencloning/endpoints/annotation.py +9 -13
  18. opencloning/endpoints/assembly.py +157 -378
  19. opencloning/endpoints/endpoint_utils.py +52 -0
  20. opencloning/endpoints/external_import.py +169 -124
  21. opencloning/endpoints/no_assembly.py +23 -39
  22. opencloning/endpoints/no_input.py +32 -47
  23. opencloning/endpoints/other.py +1 -1
  24. opencloning/endpoints/primer_design.py +2 -1
  25. opencloning/http_client.py +2 -2
  26. opencloning/ncbi_requests.py +113 -47
  27. opencloning/primer_design.py +1 -1
  28. opencloning/pydantic_models.py +10 -510
  29. opencloning/request_examples.py +10 -22
  30. opencloning/temp_functions.py +50 -0
  31. {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/METADATA +18 -8
  32. opencloning-0.5.dist-info/RECORD +51 -0
  33. {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/WHEEL +1 -1
  34. opencloning/cre_lox.py +0 -116
  35. opencloning/gateway.py +0 -154
  36. opencloning-0.4.8.dist-info/RECORD +0 -45
  37. {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,52 @@
1
+ from fastapi import HTTPException
2
+ from pydna.dseqrecord import Dseqrecord
3
+ from opencloning_linkml.datamodel import Source, TextFileSequence
4
+ from typing import Literal
5
+ from opencloning.dna_functions import format_sequence_genbank
6
+ from pydna.opencloning_models import id_mode
7
+ from opencloning.dna_functions import get_invalid_enzyme_names
8
+ from Bio.Restriction.Restriction import RestrictionBatch
9
+
10
+
11
+ def format_products(
12
+ source_id: int,
13
+ products: list[Dseqrecord],
14
+ completed_source: Source | None,
15
+ output_name: str,
16
+ no_products_error_message: str = 'No products were found.',
17
+ wrong_completed_source_error_message: str = 'The provided assembly is not valid.',
18
+ ) -> dict[Literal['sources', 'sequences'], list[Source] | list[TextFileSequence]]:
19
+
20
+ formatted_products = [format_sequence_genbank(p, output_name) for p in products]
21
+ for p in formatted_products:
22
+ p.id = source_id
23
+
24
+ with id_mode(use_python_internal_id=False):
25
+ formatted_sources = [p.source.to_pydantic_model(source_id).model_dump() for p in products]
26
+ for source in formatted_sources:
27
+ source['output_name'] = output_name
28
+
29
+ if completed_source is not None:
30
+ this_source_dict = completed_source.model_dump()
31
+ for prod, source in zip(formatted_products, formatted_sources):
32
+ if source == this_source_dict:
33
+ return {
34
+ 'sources': [source],
35
+ 'sequences': [prod],
36
+ }
37
+ raise HTTPException(400, wrong_completed_source_error_message)
38
+
39
+ if len(products) == 0:
40
+ raise HTTPException(400, no_products_error_message)
41
+
42
+ return {
43
+ 'sources': formatted_sources,
44
+ 'sequences': formatted_products,
45
+ }
46
+
47
+
48
+ def parse_restriction_enzymes(enzymes: list[str]) -> RestrictionBatch:
49
+ invalid_enzymes = get_invalid_enzyme_names(enzymes)
50
+ if len(invalid_enzymes):
51
+ raise HTTPException(404, 'These enzymes do not exist: ' + ', '.join(invalid_enzymes))
52
+ return RestrictionBatch(first=[e for e in enzymes if e is not None])
@@ -1,4 +1,5 @@
1
1
  from fastapi import Body, Query, HTTPException, Response, UploadFile, File
2
+ from opencloning.app_settings import settings
2
3
  from pydantic import create_model
3
4
  import io
4
5
  import warnings
@@ -6,11 +7,12 @@ import asyncio
6
7
  from starlette.responses import RedirectResponse
7
8
  from Bio import BiopythonParserWarning
8
9
  from typing import Annotated
9
- from urllib.error import HTTPError
10
10
  from pydna.utils import location_boundaries
11
11
 
12
+ from opencloning.endpoints.endpoint_utils import format_products
13
+
12
14
  from ..get_router import get_router
13
- from ..pydantic_models import (
15
+ from opencloning_linkml.datamodel import (
14
16
  TextFileSequence,
15
17
  UploadedFileSource,
16
18
  RepositoryIdSource,
@@ -23,18 +25,22 @@ from ..pydantic_models import (
23
25
  GenomeCoordinatesSource,
24
26
  SequenceFileFormat,
25
27
  SEVASource,
26
- SequenceLocationStr,
27
28
  OpenDNACollectionsSource,
29
+ NCBISequenceSource,
28
30
  )
31
+ from pydna.opencloning_models import SequenceLocationStr
29
32
  from ..dna_functions import (
30
33
  format_sequence_genbank,
34
+ get_sequence_from_benchling_url,
35
+ get_sequence_from_iGEM2024,
36
+ get_sequence_from_openDNA_collections,
31
37
  request_from_addgene,
38
+ request_from_snapgene,
32
39
  request_from_wekwikgene,
33
- get_sequences_from_file_url,
34
- get_sequence_from_snapgene_url,
35
40
  custom_file_parser,
36
41
  get_sequence_from_euroscarf_url,
37
42
  get_seva_plasmid,
43
+ read_dsrecord_from_json,
38
44
  )
39
45
  from .. import request_examples
40
46
  from .. import ncbi_requests
@@ -137,12 +143,7 @@ async def read_from_file(
137
143
  warning_messages = [str(w.message) for w in warnings_captured]
138
144
 
139
145
  except ValueError as e:
140
- raise HTTPException(422, f'Biopython cannot process this file: {e}.')
141
-
142
- # This happens when textfiles are empty or contain something else, or when reading a text file as snapgene file,
143
- # since StringIO does not raise an error when "Unexpected end of packet" is found
144
- if len(dseqs) == 0:
145
- raise HTTPException(422, 'Biopython cannot process this file.')
146
+ raise HTTPException(422, f'Biopython cannot process this file: {e}.') from e
146
147
 
147
148
  if index_in_file is not None:
148
149
  if index_in_file >= len(dseqs):
@@ -199,6 +200,10 @@ async def read_from_file(
199
200
  if len(warning_messages) > 0:
200
201
  response.headers['x-warning'] = '; '.join(warning_messages)
201
202
 
203
+ # Validate that the sequences are in a valid genbank format
204
+ for seq in out_sequences:
205
+ read_dsrecord_from_json(seq)
206
+
202
207
  return {'sequences': out_sequences, 'sources': out_sources}
203
208
 
204
209
 
@@ -206,22 +211,20 @@ async def read_from_file(
206
211
  # directly the object.
207
212
 
208
213
 
209
- def repository_id_http_error_handler(exception: HTTPError, source: RepositoryIdSource):
214
+ def handle_repository_errors(exception: Exception, repository_name: str) -> None:
215
+ """
216
+ Centralized error handler for repository requests.
217
+ Re-raises HTTPException as-is, converts ConnectError to HTTPException with 504 status.
218
+ """
219
+ if isinstance(exception, HTTPException):
220
+ raise
221
+ elif isinstance(exception, ConnectError):
222
+ raise HTTPException(504, f'Unable to connect to {repository_name}: {exception}')
223
+ else: # pragma: no cover
224
+ import traceback
210
225
 
211
- if exception.code == 500: # pragma: no cover
212
- raise HTTPException(
213
- 503, f'{source.repository_name} returned: {exception} - {source.repository_name} might be down'
214
- )
215
- elif exception.code == 400 or exception.code == 404:
216
- raise HTTPException(
217
- 404,
218
- f'{source.repository_name} returned: {exception} - Likely you inserted a wrong {source.repository_name} id',
219
- )
220
- elif exception.code == 403:
221
- raise HTTPException(
222
- 403,
223
- f'Request to {source.repository_name} is not allowed. Please check that the URL is whitelisted.',
224
- )
226
+ traceback.print_exc()
227
+ raise HTTPException(500, f'Unexpected error: {exception}')
225
228
 
226
229
 
227
230
  # Redirect to the right repository
@@ -244,36 +247,46 @@ def repository_id_http_error_handler(exception: HTTPError, source: RepositoryIdS
244
247
  )
245
248
  async def get_from_repository_id(
246
249
  source: (
247
- RepositoryIdSource
248
- | AddgeneIdSource
250
+ AddgeneIdSource
249
251
  | BenchlingUrlSource
250
252
  | SnapGenePlasmidSource
251
253
  | EuroscarfSource
252
254
  | WekWikGeneIdSource
253
255
  | SEVASource
254
256
  | OpenDNACollectionsSource
257
+ | NCBISequenceSource
255
258
  ),
256
259
  ):
257
- return RedirectResponse(f'/repository_id/{source.repository_name}', status_code=307)
260
+ mapping_dict = {
261
+ 'AddgeneIdSource': 'addgene',
262
+ 'BenchlingUrlSource': 'benchling',
263
+ 'SnapGenePlasmidSource': 'snapgene',
264
+ 'EuroscarfSource': 'euroscarf',
265
+ 'WekWikGeneIdSource': 'wekwikgene',
266
+ 'SEVASource': 'seva',
267
+ 'OpenDNACollectionsSource': 'open_dna_collections',
268
+ 'NCBISequenceSource': 'genbank',
269
+ }
270
+ return RedirectResponse(f'/repository_id/{mapping_dict[source.type]}', status_code=307)
258
271
 
259
272
 
260
273
  @router.post(
261
274
  '/repository_id/genbank',
262
275
  response_model=create_model(
263
- 'RepositoryIdResponse', sources=(list[RepositoryIdSource], ...), sequences=(list[TextFileSequence], ...)
276
+ 'RepositoryIdResponse', sources=(list[NCBISequenceSource], ...), sequences=(list[TextFileSequence], ...)
264
277
  ),
265
278
  )
266
- async def get_from_repository_id_genbank(source: RepositoryIdSource):
279
+ async def get_from_repository_id_genbank(source: NCBISequenceSource):
267
280
  try:
268
281
  # This request already fails if the sequence does not exist
269
282
  seq_length = await ncbi_requests.get_sequence_length_from_sequence_accession(source.repository_id)
270
- if seq_length > 100000:
271
- raise HTTPException(400, 'sequence is too long (max 100000 bp)')
283
+ if seq_length > settings.NCBI_MAX_SEQUENCE_LENGTH:
284
+ raise HTTPException(400, f'sequence is too long (max {settings.NCBI_MAX_SEQUENCE_LENGTH} bp)')
272
285
  seq = await ncbi_requests.get_genbank_sequence(source.repository_id)
273
- except ConnectError as exception:
274
- raise HTTPException(504, f'Unable to connect to NCBI: {exception}')
286
+ except Exception as exception:
287
+ handle_repository_errors(exception, 'NCBI')
275
288
 
276
- return {'sequences': [format_sequence_genbank(seq, source.output_name)], 'sources': [source.model_copy()]}
289
+ return format_products(source.id, [seq], None, source.output_name)
277
290
 
278
291
 
279
292
  @router.post(
@@ -284,13 +297,23 @@ async def get_from_repository_id_genbank(source: RepositoryIdSource):
284
297
  )
285
298
  async def get_from_repository_id_addgene(source: AddgeneIdSource):
286
299
  try:
287
- dseq, out_source = await request_from_addgene(source)
288
- except HTTPError as exception:
289
- repository_id_http_error_handler(exception, source)
290
- except ConnectError:
291
- raise HTTPException(504, 'unable to connect to Addgene')
292
-
293
- return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [out_source]}
300
+ dseq = await request_from_addgene(source.repository_id)
301
+ except Exception as exception:
302
+ handle_repository_errors(exception, 'Addgene')
303
+
304
+ return format_products(
305
+ source.id,
306
+ [dseq],
307
+ source if source.sequence_file_url is not None else None,
308
+ source.output_name,
309
+ wrong_completed_source_error_message=f'''
310
+ The provided source is not valid.
311
+ We found the following:
312
+ - repository_id: {dseq.source.repository_id}
313
+ - sequence_file_url: {dseq.source.sequence_file_url}
314
+ - addgene_sequence_type: {dseq.source.addgene_sequence_type}
315
+ ''',
316
+ )
294
317
 
295
318
 
296
319
  @router.post(
@@ -301,12 +324,21 @@ async def get_from_repository_id_addgene(source: AddgeneIdSource):
301
324
  )
302
325
  async def get_from_repository_id_wekwikgene(source: WekWikGeneIdSource):
303
326
  try:
304
- dseq, out_source = await request_from_wekwikgene(source)
305
- except HTTPError as exception:
306
- repository_id_http_error_handler(exception, source)
307
- except ConnectError:
308
- raise HTTPException(504, 'unable to connect to WekWikGene')
309
- return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [out_source]}
327
+ dseq = await request_from_wekwikgene(source.repository_id)
328
+ except Exception as exception:
329
+ handle_repository_errors(exception, 'WeKwikGene')
330
+ return format_products(
331
+ source.id,
332
+ [dseq],
333
+ source if source.sequence_file_url is not None else None,
334
+ source.output_name,
335
+ wrong_completed_source_error_message=f'''
336
+ The provided source is not valid.
337
+ We found the following:
338
+ - repository_id: {dseq.source.repository_id}
339
+ - sequence_file_url: {dseq.source.sequence_file_url}
340
+ ''',
341
+ )
310
342
 
311
343
 
312
344
  @router.post(
@@ -319,13 +351,10 @@ async def get_from_benchling_url(
319
351
  source: Annotated[BenchlingUrlSource, Body(openapi_examples=request_examples.benchling_url_examples)]
320
352
  ):
321
353
  try:
322
- dseqs = await get_sequences_from_file_url(source.repository_id)
323
- return {
324
- 'sequences': [format_sequence_genbank(s, source.output_name) for s in dseqs],
325
- 'sources': [source for s in dseqs],
326
- }
327
- except HTTPError as exception:
328
- repository_id_http_error_handler(exception, source)
354
+ dseq = await get_sequence_from_benchling_url(source.repository_id)
355
+ return format_products(source.id, [dseq], None, source.output_name)
356
+ except Exception as exception:
357
+ handle_repository_errors(exception, 'Benchling')
329
358
 
330
359
 
331
360
  @router.post(
@@ -339,17 +368,10 @@ async def get_from_repository_id_snapgene(
339
368
  ):
340
369
  try:
341
370
  plasmid_set, plasmid_name = source.repository_id.split('/')
342
- url = f'https://www.snapgene.com/local/fetch.php?set={plasmid_set}&plasmid={plasmid_name}'
343
- dseq = await get_sequence_from_snapgene_url(url)
344
- # Unless a name is provided, we use the plasmid name from snapgene
345
- if source.output_name is None:
346
- source.output_name = plasmid_name
347
- return {
348
- 'sequences': [format_sequence_genbank(dseq, source.output_name)],
349
- 'sources': [source],
350
- }
351
- except HTTPError as exception:
352
- repository_id_http_error_handler(exception, source)
371
+ seq = await request_from_snapgene(plasmid_set, plasmid_name)
372
+ return format_products(source.id, [seq], None, source.output_name)
373
+ except Exception as exception:
374
+ handle_repository_errors(exception, 'Snapgene')
353
375
 
354
376
 
355
377
  @router.post(
@@ -365,12 +387,9 @@ async def get_from_repository_id_euroscarf(source: EuroscarfSource):
365
387
  """
366
388
  try:
367
389
  dseq = await get_sequence_from_euroscarf_url(source.repository_id)
368
- # Sometimes the files do not contain correct topology information, so we loop them
369
- if not dseq.circular:
370
- dseq = dseq.looped()
371
- return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [source]}
372
- except HTTPError as exception:
373
- repository_id_http_error_handler(exception, source)
390
+ return format_products(source.id, [dseq], None, source.output_name)
391
+ except Exception as exception:
392
+ handle_repository_errors(exception, 'Euroscarf')
374
393
 
375
394
 
376
395
  @router.post(
@@ -381,10 +400,21 @@ async def get_from_repository_id_euroscarf(source: EuroscarfSource):
381
400
  )
382
401
  async def get_from_repository_id_igem(source: IGEMSource):
383
402
  try:
384
- dseq = (await get_sequences_from_file_url(source.sequence_file_url))[0]
385
- return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [source]}
386
- except HTTPError as exception:
387
- repository_id_http_error_handler(exception, source)
403
+ dseq = await get_sequence_from_iGEM2024(*source.repository_id.split('-'))
404
+ return format_products(
405
+ source.id,
406
+ [dseq],
407
+ source if source.sequence_file_url is not None else None,
408
+ source.output_name,
409
+ wrong_completed_source_error_message=f'''
410
+ The provided source is not valid.
411
+ We found the following:
412
+ - repository_id: {source.repository_id}
413
+ - sequence_file_url: {dseq.source.sequence_file_url}
414
+ ''',
415
+ )
416
+ except Exception as exception:
417
+ handle_repository_errors(exception, 'iGEM')
388
418
 
389
419
 
390
420
  @router.post(
@@ -397,10 +427,23 @@ async def get_from_repository_id_igem(source: IGEMSource):
397
427
  )
398
428
  async def get_from_repository_id_open_dna_collections(source: OpenDNACollectionsSource):
399
429
  try:
400
- dseq = (await get_sequences_from_file_url(source.sequence_file_url))[0]
401
- return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [source]}
402
- except HTTPError as exception:
403
- repository_id_http_error_handler(exception, source)
430
+ collection_name, plasmid_id = source.repository_id.split('/')
431
+ dseq = await get_sequence_from_openDNA_collections(collection_name, plasmid_id)
432
+ return format_products(
433
+ source.id,
434
+ [dseq],
435
+ source if source.sequence_file_url is not None else None,
436
+ source.output_name,
437
+ wrong_completed_source_error_message=f'''
438
+ The provided source is not valid.
439
+ We found the following:
440
+ - collection_name: {collection_name}
441
+ - plasmid_id: {plasmid_id}
442
+ - sequence_file_url: {dseq.source.sequence_file_url}
443
+ ''',
444
+ )
445
+ except Exception as exception:
446
+ handle_repository_errors(exception, 'OpenDNA Collections')
404
447
 
405
448
 
406
449
  @router.post(
@@ -414,39 +457,30 @@ async def genome_coordinates(
414
457
  ):
415
458
 
416
459
  # Validate that coordinates make sense
417
- ncbi_requests.validate_coordinates_pre_request(source.start, source.end, source.strand)
460
+ try:
461
+ location_str = SequenceLocationStr(source.coordinates)
462
+ location = location_str.to_biopython_location()
463
+ start, end, strand = location_str.get_ncbi_format_coordinates()
464
+ except Exception as e:
465
+ raise HTTPException(422, f'Invalid coordinates: {e}') from e
418
466
 
419
- # Source includes a locus tag in annotated assembly
467
+ if len(location) > settings.NCBI_MAX_SEQUENCE_LENGTH:
468
+ raise HTTPException(400, f'sequence is too long (max {settings.NCBI_MAX_SEQUENCE_LENGTH} bp)')
469
+
470
+ if source.locus_tag is not None and source.assembly_accession is None:
471
+ raise HTTPException(422, 'assembly_accession is required if locus_tag is set')
420
472
 
473
+ # Source includes a locus tag in annotated assembly
421
474
  async def validate_locus_task():
422
475
  if source.locus_tag is not None:
423
-
424
- if source.assembly_accession is None:
425
- raise HTTPException(422, 'assembly_accession is required if locus_tag is set')
426
-
427
- annotation = await ncbi_requests.get_annotation_from_locus_tag(source.locus_tag, source.assembly_accession)
428
- gene_range = annotation['genomic_regions'][0]['gene_range']['range'][0]
429
- gene_strand = 1 if gene_range['orientation'] == 'plus' else -1
430
-
431
- # This field will not be present in all cases, but should be there in reference genomes
432
- if source.gene_id is not None:
433
- if 'gene_id' not in annotation:
434
- raise HTTPException(400, 'gene_id is set, but not found in the annotation')
435
- if source.gene_id != int(annotation['gene_id']):
436
- raise HTTPException(400, 'gene_id does not match the locus_tag')
437
- elif 'gene_id' in annotation:
438
- source.gene_id = int(annotation['gene_id'])
439
-
440
- # The gene should fall within the range (range might be bigger if bases were requested upstream or downstream)
441
- if (
442
- int(gene_range['begin']) < source.start
443
- or int(gene_range['end']) > source.end
444
- or gene_strand != source.strand
445
- ):
446
- raise HTTPException(
447
- 400,
448
- f'wrong coordinates, expected to fall within {source.start}, {source.end} on strand: {source.strand}',
449
- )
476
+ return await ncbi_requests.validate_locus_tag(
477
+ source.locus_tag,
478
+ source.assembly_accession,
479
+ source.gene_id,
480
+ start,
481
+ end,
482
+ strand,
483
+ )
450
484
 
451
485
  async def validate_assembly_task():
452
486
  if source.assembly_accession is not None:
@@ -454,23 +488,26 @@ async def genome_coordinates(
454
488
  sequence_accessions = await ncbi_requests.get_sequence_accessions_from_assembly_accession(
455
489
  source.assembly_accession
456
490
  )
457
- if source.sequence_accession not in sequence_accessions:
491
+ if source.repository_id not in sequence_accessions:
458
492
  raise HTTPException(
459
493
  400,
460
- f'Sequence accession {source.sequence_accession} not contained in assembly accession {source.assembly_accession}, which contains accessions: {", ".join(sequence_accessions)}',
494
+ f'Sequence accession {source.repository_id} not contained in assembly accession {source.assembly_accession}, which contains accessions: {", ".join(sequence_accessions)}',
461
495
  )
462
496
 
463
497
  async def get_sequence_task():
464
- return await ncbi_requests.get_genbank_sequence(
465
- source.sequence_accession, source.start, source.end, source.strand
466
- )
498
+ return await ncbi_requests.get_genbank_sequence(source.repository_id, start, end, strand)
467
499
 
468
500
  tasks = [validate_locus_task(), validate_assembly_task(), get_sequence_task()]
469
501
 
470
- _, _, seq = await asyncio.gather(*tasks)
502
+ try:
503
+ gene_id, _, seq = await asyncio.gather(*tasks)
504
+ except Exception as exception:
505
+ handle_repository_errors(exception, 'NCBI')
506
+
507
+ source.gene_id = gene_id
471
508
 
472
509
  # NCBI does not complain for coordinates that fall out of the sequence, so we have to check here
473
- if len(seq) != source.end - source.start + 1:
510
+ if len(seq) != len(location):
474
511
  raise HTTPException(400, 'coordinates fall outside the sequence')
475
512
 
476
513
  return {'sequences': [format_sequence_genbank(seq, source.output_name)], 'sources': [source.model_copy()]}
@@ -487,11 +524,19 @@ async def get_from_repository_id_seva(source: SEVASource):
487
524
  Return the sequence from a plasmid in SEVA.
488
525
  """
489
526
  try:
490
- dseq, source = await get_seva_plasmid(source)
491
- return {'sequences': [format_sequence_genbank(dseq, source.output_name)], 'sources': [source]}
492
- except HTTPError as exception:
493
- repository_id_http_error_handler(exception, source)
494
- except ConnectError:
495
- raise HTTPException(504, 'unable to connect to SEVA')
527
+ dseq = await get_seva_plasmid(source.repository_id)
496
528
  except Exception as exception:
497
- raise HTTPException(400, f'Error parsing file: {exception}')
529
+ handle_repository_errors(exception, 'SEVA')
530
+
531
+ return format_products(
532
+ source.id,
533
+ [dseq],
534
+ source if source.sequence_file_url is not None else None,
535
+ source.output_name,
536
+ wrong_completed_source_error_message=f'''
537
+ The provided source is not valid.
538
+ We found the following:
539
+ - repository_id: {dseq.source.repository_id}
540
+ - sequence_file_url: {dseq.source.sequence_file_url}
541
+ ''',
542
+ )
@@ -2,14 +2,15 @@ from fastapi import Query, HTTPException
2
2
  from pydna.dseqrecord import Dseqrecord
3
3
  from pydantic import create_model, Field
4
4
  from typing import Annotated
5
- from Bio.Restriction import RestrictionBatch
5
+
6
+ from opencloning.endpoints.endpoint_utils import format_products, parse_restriction_enzymes
7
+ from opencloning.temp_functions import get_enzymes_from_source
6
8
 
7
9
  from ..dna_functions import (
8
10
  format_sequence_genbank,
9
11
  read_dsrecord_from_json,
10
- get_invalid_enzyme_names,
11
12
  )
12
- from ..pydantic_models import (
13
+ from opencloning_linkml.datamodel import (
13
14
  RestrictionEnzymeDigestionSource,
14
15
  TextFileSequence,
15
16
  PolymeraseExtensionSource,
@@ -33,54 +34,37 @@ async def restriction(
33
34
  sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
34
35
  restriction_enzymes: Annotated[list[str], Query(default_factory=list)],
35
36
  ):
37
+ completed_source = source if (source.left_edge is not None or source.right_edge is not None) else None
36
38
  # There should be 1 or 2 enzymes in the request if the source does not have cuts
37
- if source.left_edge is None and source.right_edge is None:
38
- if len(restriction_enzymes) < 1 or len(restriction_enzymes) > 2:
39
+ if completed_source is None:
40
+ enzymes = parse_restriction_enzymes(restriction_enzymes)
41
+ if len(enzymes) not in [1, 2]:
39
42
  raise HTTPException(422, 'There should be 1 or 2 restriction enzymes in the request.')
40
43
  else:
41
44
  if len(restriction_enzymes) != 0:
42
45
  raise HTTPException(422, 'There should be no restriction enzymes in the request if source is populated.')
43
- restriction_enzymes = source.get_enzymes()
44
-
45
- # TODO: this could be moved to the class
46
- invalid_enzymes = get_invalid_enzyme_names(restriction_enzymes)
47
- if len(invalid_enzymes):
48
- raise HTTPException(404, 'These enzymes do not exist: ' + ', '.join(invalid_enzymes))
49
- enzymes = RestrictionBatch(first=[e for e in restriction_enzymes if e is not None])
46
+ enzymes = parse_restriction_enzymes(get_enzymes_from_source(completed_source))
50
47
 
51
48
  seqr = read_dsrecord_from_json(sequences[0])
52
- # TODO: return error if the id of the sequence does not correspond
53
49
 
54
50
  cutsites = seqr.seq.get_cutsites(*enzymes)
55
- cutsite_pairs = seqr.seq.get_cutsite_pairs(cutsites)
56
- sources = [
57
- RestrictionEnzymeDigestionSource.from_cutsites(*p, [{'sequence': sequences[0].id}], source.id)
58
- for p in cutsite_pairs
59
- ]
60
-
61
- all_enzymes = set(enzyme for s in sources for enzyme in s.get_enzymes())
62
- enzymes_not_cutting = set(restriction_enzymes) - set(all_enzymes)
51
+ cutting_enzymes = set(e for _, e in cutsites if e is not None)
52
+ enzymes_not_cutting = set(enzymes) - set(cutting_enzymes)
63
53
  if len(enzymes_not_cutting):
64
- raise HTTPException(400, 'These enzymes do not cut: ' + ', '.join(enzymes_not_cutting))
54
+ raise HTTPException(400, 'These enzymes do not cut: ' + ', '.join(map(str, enzymes_not_cutting)))
65
55
 
66
56
  try:
67
- # If the output is known
68
- if source.left_edge is not None or source.right_edge is not None:
69
-
70
- for i, s in enumerate(sources):
71
- if s == source:
72
- return {
73
- 'sequences': [format_sequence_genbank(seqr.apply_cut(*cutsite_pairs[i]), source.output_name)],
74
- 'sources': [s],
75
- }
76
-
77
- raise HTTPException(400, 'Invalid restriction enzyme pair.')
78
-
79
- products = [format_sequence_genbank(seqr.apply_cut(*p), source.output_name) for p in cutsite_pairs]
80
-
81
- return {'sequences': products, 'sources': sources}
57
+ products = seqr.cut(*enzymes)
82
58
  except ValueError as e:
83
- raise HTTPException(400, str(e))
59
+ raise HTTPException(400, *e.args)
60
+
61
+ return format_products(
62
+ source.id,
63
+ products,
64
+ completed_source,
65
+ source.output_name,
66
+ wrong_completed_source_error_message='Invalid restriction enzyme pair.',
67
+ )
84
68
 
85
69
 
86
70
  @router.post(
@@ -102,7 +86,7 @@ async def polymerase_extension(
102
86
  if dseq.circular:
103
87
  raise HTTPException(400, 'The sequence must be linear.')
104
88
 
105
- if dseq.seq.ovhg == dseq.seq.watson_ovhg() == 0:
89
+ if dseq.seq.ovhg == dseq.seq.watson_ovhg == 0:
106
90
  raise HTTPException(400, 'The sequence must have an overhang.')
107
91
 
108
92
  out_sequence = Dseqrecord(dseq.seq.fill_in(), features=dseq.features)