opencloning 0.3.7__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,7 +121,7 @@ If you want to fix several bugs from the command line, you can use the `backend_
121
121
  Before running this script, you need to migrate the data to the latest version of the schema. See [full documentation](https://github.com/OpenCloning/OpenCloning_LinkML?tab=readme-ov-file#migration-from-previous-versions-of-the-schema), but basically:
122
122
 
123
123
  ```bash
124
- python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
124
+ python -m opencloning_linkl.migrations.migrate --target-version='0.3.0' file1.json file2.json ...
125
125
  ```
126
126
 
127
127
  Then, you can run the script:
@@ -131,7 +131,10 @@ python -m opencloning.bug_fixing.backend_v0_3 file1.json file2.json ...
131
131
  ```
132
132
 
133
133
  For each file:
134
- * If the file does not need fixing, it will be skipped.
134
+ * If the file does not need fixing, it will be skipped. Migrate it to the latest version of the schema by removing the `--target-version` flag.
135
+ ```bash
136
+ python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
137
+ ```
135
138
  * If the file needs fixing, it will create a new file `file_1_needs_fixing.json` at the same location where the original file is, with the problematic sources replaced by templates.
136
139
  * You can then load these files into the web application and run the correct steps manually.
137
140
 
@@ -27,31 +27,26 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
27
27
  for source in data['sources']:
28
28
  if source['type'] == 'GatewaySource':
29
29
  # Take the first assembly value and check that the length of features is 7
30
- assembly = source['assembly']
31
- if len(assembly):
30
+ input = source['input']
31
+ if len(input):
32
32
  feat2check = (
33
- assembly[0]['left_location']
34
- if assembly[0]['left_location'] is not None
35
- else assembly[0]['right_location']
33
+ input[0]['left_location'] if input[0]['left_location'] is not None else input[0]['right_location']
36
34
  )
37
35
  if len(SequenceLocationStr(feat2check).to_biopython_location()) != 7:
38
36
  problematic_source_ids.add(source['id'])
39
37
 
40
- elif 'assembly' in source:
38
+ elif any(('type' in i and i['type'] == 'AssemblyFragment') for i in source['input']):
41
39
  assembly_source = AssemblySource(
42
40
  id=source['id'],
43
41
  input=source['input'],
44
- output=source['output'],
45
42
  circular=source['circular'],
46
- assembly=source['assembly'],
47
43
  )
48
- input_seqs = [
49
- TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in assembly_source.input
50
- ]
44
+ input_ids = [i.sequence for i in assembly_source.input]
45
+ input_seqs = [TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in input_ids]
51
46
  # Sort input_seqs as in input
52
- input_seqs.sort(key=lambda x: assembly_source.input.index(x.id))
47
+ input_seqs.sort(key=lambda x: input_ids.index(x.id))
53
48
  if source['type'] == 'PCRSource':
54
- primer_ids = [assembly_source.assembly[0].sequence, assembly_source.assembly[2].sequence]
49
+ primer_ids = [assembly_source.input[0].sequence, assembly_source.input[2].sequence]
55
50
  primers = [PrimerModel.model_validate(p) for p in data['primers'] if p['id'] in primer_ids]
56
51
  input_seqs = [primers[0], input_seqs[0], primers[1]]
57
52
 
@@ -68,9 +63,11 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
68
63
  problematic_source_ids.update(sum([cs.all_children_source_ids(s) for s in problematic_source_ids], []))
69
64
  for source_id in problematic_source_ids:
70
65
  source = next(s for s in data['sources'] if s['id'] == source_id)
71
- output_seq = next(s for s in data['sequences'] if s['id'] == source['output'])
72
- remove_keys = ['assembly', 'circular']
66
+ output_seq = next(s for s in data['sequences'] if s['id'] == source_id)
67
+ # Remove assembly info
68
+ remove_keys = ['circular']
73
69
  source_keep = {key: value for key, value in source.items() if key not in remove_keys}
70
+ source_keep['input'] = [{'sequence': f['sequence']} for f in source_keep['input']]
74
71
  source.clear()
75
72
  source.update(source_keep)
76
73
 
@@ -15,7 +15,7 @@ from pydna.common_sub_strings import common_sub_strings
15
15
  from Bio.SeqIO import parse as seqio_parse
16
16
  import io
17
17
  import warnings
18
- from Bio.SeqIO.InsdcIO import GenBankIterator, GenBankScanner
18
+ from Bio.SeqIO.InsdcIO import GenBankScanner, GenBankIterator
19
19
  import re
20
20
  from .http_client import get_http_client, ConnectError, TimeoutException
21
21
  from .ncbi_requests import get_genbank_sequence
@@ -29,7 +29,7 @@ def format_sequence_genbank(seq: Dseqrecord, seq_name: str = None) -> TextFileSe
29
29
  correct_name(seq)
30
30
 
31
31
  return TextFileSequence(
32
- id=0,
32
+ id=int(seq.id) if seq.id is not None and str(seq.id).isdigit() else 0,
33
33
  file_content=seq.format('genbank'),
34
34
  sequence_file_format=SequenceFileFormat('genbank'),
35
35
  overhang_crick_3prime=seq.seq.ovhg,
@@ -280,10 +280,9 @@ class MyGenBankScanner(GenBankScanner):
280
280
 
281
281
  class MyGenBankIterator(GenBankIterator):
282
282
 
283
- def parse(self, handle):
284
- """Start parsing the file, and return a SeqRecord generator."""
285
- records = MyGenBankScanner(debug=0).parse_records(handle)
286
- return records
283
+ def __init__(self, source):
284
+ super(GenBankIterator, self).__init__(source, fmt='GenBank')
285
+ self.records = MyGenBankScanner(debug=0).parse_records(self.stream)
287
286
 
288
287
 
289
288
  def custom_file_parser(
opencloning/dna_utils.py CHANGED
@@ -15,6 +15,7 @@ from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
15
15
  import re
16
16
  from Bio.SeqFeature import Location, SimpleLocation
17
17
  from pydna.utils import shift_location
18
+ from pairwise_alignments_to_msa.alignment import aligned_tuples_to_MSA
18
19
 
19
20
  aligner = PairwiseAligner(scoring='blastn')
20
21
 
@@ -125,33 +126,37 @@ def permutate_trace(reference: str, sanger_trace: str) -> str:
125
126
 
126
127
  def align_sanger_traces(dseqr: Dseqrecord, sanger_traces: list[str]) -> list[str]:
127
128
  """Align a sanger track to a dseqr sequence"""
128
- query_str = str(dseqr.seq)
129
+
130
+ # Ensure sequences are in upper case
131
+ query_str = str(dseqr.seq).upper()
132
+ sanger_traces = [trace.upper() for trace in sanger_traces]
133
+
129
134
  # Check that required executables exist in PATH
130
135
  if not shutil.which('mars'):
131
136
  raise RuntimeError("'mars' executable not found in PATH")
132
137
  if not shutil.which('mafft'):
133
138
  raise RuntimeError("'mafft' executable not found in PATH")
134
139
 
135
- # If the sequence is circular, use MARS to permutate the traces
136
- if dseqr.circular:
137
- permutated_traces = []
138
- for trace in sanger_traces:
139
- permutated_traces.append(permutate_trace(query_str, trace))
140
- permutated_traces.append(permutate_trace(query_str, reverse_complement(trace)))
141
-
142
- traces_oriented = []
143
- # Pairwise-align and keep the best alignment, to decide which orientation to keep
144
- for fwd, rvs in zip(permutated_traces[::2], permutated_traces[1::2]):
145
- fwd_alignment = next(aligner.align(query_str, fwd))
146
- rvs_alignment = next(aligner.align(query_str, rvs))
147
-
148
- if fwd_alignment.score > rvs_alignment.score:
149
- traces_oriented.append(fwd.replace('N', ''))
150
- else:
151
- traces_oriented.append(rvs.replace('N', ''))
152
- sanger_traces = traces_oriented
153
-
154
- return align_with_mafft([query_str, *sanger_traces], True)
140
+ aligned_pairs = []
141
+ for trace in sanger_traces:
142
+ # If the sequence is circular, permutate both fwd and reverse complement
143
+ if dseqr.circular:
144
+ fwd = permutate_trace(query_str, trace)
145
+ rvs = permutate_trace(query_str, reverse_complement(trace))
146
+ else:
147
+ fwd = trace
148
+ rvs = reverse_complement(trace)
149
+
150
+ # Pairwise-align and keep the best alignment
151
+ fwd_alignment = next(aligner.align(query_str, fwd))
152
+ rvs_alignment = next(aligner.align(query_str, rvs))
153
+
154
+ best_alignment = fwd_alignment if fwd_alignment.score > rvs_alignment.score else rvs_alignment
155
+
156
+ formatted_alignment = best_alignment.format('fasta').split()[1::2]
157
+ aligned_pairs.append(tuple(formatted_alignment))
158
+
159
+ return aligned_tuples_to_MSA(aligned_pairs)
155
160
 
156
161
 
157
162
  def compute_regex_site(site: str) -> str:
@@ -3,7 +3,8 @@ from typing import Union, Literal, Callable
3
3
  from pydna.dseqrecord import Dseqrecord
4
4
  from pydna.primer import Primer as PydnaPrimer
5
5
  from pydna.crispr import cas9
6
- from pydantic import conlist, create_model
6
+ from pydantic import create_model, Field
7
+ from typing import Annotated
7
8
  from Bio.Restriction.Restriction import RestrictionBatch
8
9
  from opencloning.cre_lox import cre_loxP_overlap, annotate_loxP_sites
9
10
  from ..dna_functions import (
@@ -27,7 +28,7 @@ from ..pydantic_models import (
27
28
  CreLoxRecombinationSource,
28
29
  InVivoAssemblySource,
29
30
  )
30
- from ..assembly2 import (
31
+ from pydna.assembly2 import (
31
32
  Assembly,
32
33
  assemble,
33
34
  sticky_end_sub_strings,
@@ -80,8 +81,8 @@ def format_known_assembly_response(
80
81
  )
81
82
  async def crispr(
82
83
  source: CRISPRSource,
83
- guides: list[PrimerModel],
84
- sequences: conlist(TextFileSequence, min_length=2, max_length=2),
84
+ guides: Annotated[list[PrimerModel], Field(min_length=1)],
85
+ sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
85
86
  minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
86
87
  ):
87
88
  """Return the sequence after performing CRISPR editing by Homology directed repair
@@ -106,6 +107,7 @@ async def crispr(
106
107
  400, f'Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}'
107
108
  )
108
109
  guide_cuts.append(possible_cuts)
110
+ sorted_guide_ids = list(sorted([guide.id for guide in guides]))
109
111
 
110
112
  # Check if homologous recombination is possible
111
113
  fragments = [template, insert]
@@ -144,12 +146,12 @@ async def crispr(
144
146
  # meant for linear DNA
145
147
 
146
148
  out_sources = [
147
- CRISPRSource.from_assembly(id=source.id, assembly=a, guides=source.guides, fragments=fragments)
149
+ CRISPRSource.from_assembly(id=source.id, assembly=a, guides=sorted_guide_ids, fragments=fragments)
148
150
  for a in valid_assemblies
149
151
  ]
150
152
 
151
153
  # If a specific assembly is requested
152
- if len(source.assembly):
154
+ if source.is_assembly_complete():
153
155
  return format_known_assembly_response(source, out_sources, [template, insert])
154
156
 
155
157
  out_sequences = [
@@ -204,7 +206,7 @@ def generate_assemblies(
204
206
  raise HTTPException(400, *e.args)
205
207
 
206
208
  # If a specific assembly is requested
207
- if len(source.assembly):
209
+ if source.is_assembly_complete():
208
210
  return format_known_assembly_response(source, out_sources, fragments, product_callback)
209
211
 
210
212
  out_sequences = [
@@ -225,7 +227,7 @@ def generate_assemblies(
225
227
  )
226
228
  async def ligation(
227
229
  source: LigationSource,
228
- sequences: conlist(TextFileSequence, min_length=1),
230
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
229
231
  blunt: bool = Query(False, description='Use blunt ligation as well as sticky ends.'),
230
232
  allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
231
233
  circular_only: bool = Query(False, description='Only return circular assemblies.'),
@@ -239,7 +241,7 @@ async def ligation(
239
241
 
240
242
  # If the assembly is known, the blunt parameter is ignored, and we set the algorithm type from the assembly
241
243
  # (blunt ligations have features without length)
242
- if len(source.assembly):
244
+ if source.is_assembly_complete():
243
245
  asm = source.get_assembly_plan(fragments)
244
246
  blunt = len(asm[0][2]) == 0
245
247
 
@@ -261,8 +263,8 @@ async def ligation(
261
263
  )
262
264
  async def pcr(
263
265
  source: PCRSource,
264
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
265
- primers: conlist(PrimerModel, min_length=1, max_length=2),
266
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
267
+ primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
266
268
  minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
267
269
  allowed_mismatches: int = Query(0, description='The number of mismatches allowed'),
268
270
  ):
@@ -277,7 +279,7 @@ async def pcr(
277
279
  # What happens if annealing is zero? That would mean
278
280
  # mismatch in the 3' of the primer, which maybe should
279
281
  # not be allowed.
280
- if len(source.assembly):
282
+ if source.is_assembly_complete():
281
283
  minimal_annealing = source.minimal_overlap()
282
284
  # Only the ones that match are included in the output assembly
283
285
  # location, so the submitted assembly should be returned without
@@ -315,11 +317,11 @@ async def pcr(
315
317
  ]
316
318
 
317
319
  # If a specific assembly is requested
318
- if len(source.assembly):
320
+ if source.is_assembly_complete():
319
321
 
320
322
  def callback(x):
321
323
  if source.add_primer_features:
322
- return annotate_primer_binding_sites(x, fragments, source.get_assembly_plan(fragments))
324
+ return annotate_primer_binding_sites(x, fragments)
323
325
  else:
324
326
  return x
325
327
 
@@ -331,7 +333,7 @@ async def pcr(
331
333
  def callback(fragments, a):
332
334
  out_seq = assemble(fragments, a)
333
335
  if source.add_primer_features:
334
- return annotate_primer_binding_sites(out_seq, fragments, possible_assemblies)
336
+ return annotate_primer_binding_sites(out_seq, fragments)
335
337
  else:
336
338
  return out_seq
337
339
 
@@ -353,14 +355,14 @@ async def pcr(
353
355
  )
354
356
  async def homologous_recombination(
355
357
  source: HomologousRecombinationSource,
356
- sequences: conlist(TextFileSequence, min_length=2, max_length=2),
358
+ sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
357
359
  minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
358
360
  ):
359
361
 
360
362
  template, insert = [read_dsrecord_from_json(seq) for seq in sequences]
361
363
 
362
364
  # If an assembly is provided, we ignore minimal_homology
363
- if len(source.assembly):
365
+ if source.is_assembly_complete():
364
366
  minimal_homology = source.minimal_overlap()
365
367
 
366
368
  asm = Assembly((template, insert), limit=minimal_homology, use_all_fragments=True)
@@ -386,7 +388,7 @@ async def homologous_recombination(
386
388
  ]
387
389
 
388
390
  # If a specific assembly is requested
389
- if len(source.assembly):
391
+ if source.is_assembly_complete():
390
392
  return format_known_assembly_response(source, out_sources, [template, insert])
391
393
 
392
394
  out_sequences = [
@@ -411,7 +413,7 @@ async def homologous_recombination(
411
413
  ),
412
414
  )
413
415
  async def gibson_assembly(
414
- sequences: conlist(TextFileSequence, min_length=1),
416
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
415
417
  source: Union[GibsonAssemblySource, OverlapExtensionPCRLigationSource, InFusionSource, InVivoAssemblySource],
416
418
  minimal_homology: int = Query(
417
419
  40, description='The minimum homology between consecutive fragments in the assembly.'
@@ -450,7 +452,7 @@ async def gibson_assembly(
450
452
  )
451
453
  async def restriction_and_ligation(
452
454
  source: RestrictionAndLigationSource,
453
- sequences: conlist(TextFileSequence, min_length=1),
455
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
454
456
  allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
455
457
  circular_only: bool = Query(False, description='Only return circular assemblies.'),
456
458
  ):
@@ -492,7 +494,7 @@ async def restriction_and_ligation(
492
494
  )
493
495
  async def gateway(
494
496
  source: GatewaySource,
495
- sequences: conlist(TextFileSequence, min_length=1),
497
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
496
498
  circular_only: bool = Query(False, description='Only return circular assemblies.'),
497
499
  only_multi_site: bool = Query(
498
500
  False, description='Only return assemblies where more than one site per sequence recombined.'
@@ -537,7 +539,7 @@ async def gateway(
537
539
  multi_site_sources = [
538
540
  i
539
541
  for i, s in enumerate(resp['sources'])
540
- if all(join.left_location != join.right_location for join in s.assembly)
542
+ if all(join.left_location != join.right_location for join in s.input)
541
543
  ]
542
544
  sources = [resp['sources'][i] for i in multi_site_sources]
543
545
  sequences = [resp['sequences'][i] for i in multi_site_sources]
@@ -554,7 +556,9 @@ async def gateway(
554
556
  sequences=(list[TextFileSequence], ...),
555
557
  ),
556
558
  )
557
- async def cre_lox_recombination(source: CreLoxRecombinationSource, sequences: conlist(TextFileSequence, min_length=1)):
559
+ async def cre_lox_recombination(
560
+ source: CreLoxRecombinationSource, sequences: Annotated[list[TextFileSequence], Field(min_length=1)]
561
+ ):
558
562
  fragments = [read_dsrecord_from_json(seq) for seq in sequences]
559
563
 
560
564
  # Lambda function for code clarity
@@ -216,6 +216,11 @@ def repository_id_http_error_handler(exception: HTTPError, source: RepositoryIdS
216
216
  404,
217
217
  f'{source.repository_name} returned: {exception} - Likely you inserted a wrong {source.repository_name} id',
218
218
  )
219
+ elif exception.code == 403:
220
+ raise HTTPException(
221
+ 403,
222
+ f'Request to {source.repository_name} is not allowed. Please check that the URL is whitelisted.',
223
+ )
219
224
 
220
225
 
221
226
  # Redirect to the right repository
@@ -1,6 +1,6 @@
1
1
  from fastapi import Query, HTTPException
2
2
  from pydna.dseqrecord import Dseqrecord
3
- from pydantic import conlist, create_model
3
+ from pydantic import create_model, Field
4
4
  from typing import Annotated
5
5
  from Bio.Restriction import RestrictionBatch
6
6
 
@@ -30,7 +30,7 @@ router = get_router()
30
30
  )
31
31
  async def restriction(
32
32
  source: RestrictionEnzymeDigestionSource,
33
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
33
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
34
34
  restriction_enzymes: Annotated[list[str], Query(default_factory=list)],
35
35
  ):
36
36
  # There should be 1 or 2 enzymes in the request if the source does not have cuts
@@ -53,7 +53,10 @@ async def restriction(
53
53
 
54
54
  cutsites = seqr.seq.get_cutsites(*enzymes)
55
55
  cutsite_pairs = seqr.seq.get_cutsite_pairs(cutsites)
56
- sources = [RestrictionEnzymeDigestionSource.from_cutsites(*p, source.input, source.id) for p in cutsite_pairs]
56
+ sources = [
57
+ RestrictionEnzymeDigestionSource.from_cutsites(*p, [{'sequence': sequences[0].id}], source.id)
58
+ for p in cutsite_pairs
59
+ ]
57
60
 
58
61
  all_enzymes = set(enzyme for s in sources for enzyme in s.get_enzymes())
59
62
  enzymes_not_cutting = set(restriction_enzymes) - set(all_enzymes)
@@ -90,7 +93,7 @@ async def restriction(
90
93
  )
91
94
  async def polymerase_extension(
92
95
  source: PolymeraseExtensionSource,
93
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
96
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
94
97
  ):
95
98
  """Return the sequence from a polymerase extension reaction"""
96
99
 
@@ -117,7 +120,7 @@ async def polymerase_extension(
117
120
  )
118
121
  async def reverse_complement(
119
122
  source: ReverseComplementSource,
120
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
123
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
121
124
  ):
122
125
  dseq = read_dsrecord_from_json(sequences[0])
123
126
  out_sequence = dseq.reverse_complement()
@@ -1,7 +1,8 @@
1
1
  from fastapi import Query, HTTPException
2
2
  from pydna.dseqrecord import Dseqrecord
3
3
  from pydna.dseq import Dseq
4
- from pydantic import conlist, create_model
4
+ from pydantic import create_model, Field
5
+ from typing import Annotated
5
6
 
6
7
  from ..dna_functions import (
7
8
  format_sequence_genbank,
@@ -12,6 +13,7 @@ from ..pydantic_models import (
12
13
  TextFileSequence,
13
14
  ManuallyTypedSource,
14
15
  OligoHybridizationSource,
16
+ SourceInput,
15
17
  )
16
18
 
17
19
  from .. import request_examples
@@ -54,11 +56,16 @@ async def manually_typed(source: ManuallyTypedSource):
54
56
  )
55
57
  async def oligonucleotide_hybridization(
56
58
  source: OligoHybridizationSource,
57
- primers: conlist(PrimerModel, min_length=1, max_length=2),
59
+ primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
58
60
  minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
59
61
  ):
60
- watson_seq = next((p.sequence for p in primers if p.id == source.forward_oligo), None)
61
- crick_seq = next((p.sequence for p in primers if p.id == source.reverse_oligo), None)
62
+ if len(source.input):
63
+ watson_seq = next((p.sequence for p in primers if p.id == source.input[0].sequence), None)
64
+ crick_seq = next((p.sequence for p in primers if p.id == source.input[1].sequence), None)
65
+ else:
66
+ watson_seq = primers[0].sequence
67
+ crick_seq = primers[1].sequence if len(primers) > 1 else watson_seq
68
+ source.input = [SourceInput(sequence=primers[0].id), SourceInput(sequence=primers[1].id)]
62
69
 
63
70
  if watson_seq is None or crick_seq is None:
64
71
  raise HTTPException(404, 'Invalid oligo id.')
@@ -6,36 +6,24 @@ from httpx import ( # noqa: F401
6
6
  TimeoutException,
7
7
  AsyncHTTPTransport,
8
8
  Request,
9
- RequestError,
10
9
  )
10
+ from urllib.error import HTTPError
11
11
  import ssl
12
12
  import certifi
13
13
  from .app_settings import settings
14
- import re
15
-
16
- white_listed_urls = {
17
- r'^https://www.addgene.org/',
18
- r'^https://media.addgene.org/',
19
- r'^https://wekwikgene.wllsb.edu.cn',
20
- r'^https://seva-plasmids.com/',
21
- r'^https://api.ncbi.nlm.nih.gov/datasets/v2alpha/',
22
- r'^https://eutils.ncbi.nlm.nih.gov/entrez/eutils/',
23
- r'^https://www.snapgene.com/local/fetch.php',
24
- r'^https://benchling.com/',
25
- r'^https://assets.opencloning.org/annotated-igem-distribution',
26
- r'^http://www.euroscarf.de/',
27
- }
14
+
15
+ allowed_external_urls = settings.ALLOWED_EXTERNAL_URLS
28
16
 
29
17
  if settings.PLANNOTATE_URL:
30
- white_listed_urls.add(settings.PLANNOTATE_URL)
18
+ allowed_external_urls.append(settings.PLANNOTATE_URL)
31
19
 
32
20
 
33
- class WhiteListTransport(AsyncHTTPTransport):
21
+ class AllowedExternalUrlsTransport(AsyncHTTPTransport):
34
22
  async def handle_async_request(self, request: Request) -> Response:
35
- if any(re.match(url, str(request.url)) for url in white_listed_urls):
23
+ if any(str(request.url).startswith(url) for url in allowed_external_urls):
36
24
  return await super().handle_async_request(request)
37
25
 
38
- raise RequestError(f'Request to {request.url} is not whitelisted')
26
+ raise HTTPError(request.url, 403, f'Request to {request.url} is not allowed', None, None)
39
27
 
40
28
 
41
29
  proxy = None
@@ -44,7 +32,7 @@ if settings.PROXY_URL:
44
32
 
45
33
 
46
34
  def get_http_client():
47
- transport = WhiteListTransport()
35
+ transport = AllowedExternalUrlsTransport()
48
36
  client_ctx = None
49
37
  if proxy is not None:
50
38
  client_ctx = ssl.create_default_context(cafile=certifi.where())