opencloning 0.3.8__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,7 @@ default_allowed_urls = [
43
43
  'https://assets.opencloning.org/annotated-igem-distribution',
44
44
  'http://www.euroscarf.de/',
45
45
  'https://wekwikgene.wllsb.edu.cn',
46
+ 'http://bahlerweb.cs.ucl.ac.uk',
46
47
  ]
47
48
 
48
49
  if os.environ.get('ALLOWED_EXTERNAL_URLS') is not None:
@@ -4,7 +4,7 @@ from primer3 import bindings
4
4
  import json
5
5
  from fastapi import UploadFile, Response
6
6
 
7
- from ...pydantic_models import (
7
+ from opencloning.pydantic_models import (
8
8
  GenomeCoordinatesSource,
9
9
  TextFileSequence,
10
10
  PrimerModel,
@@ -13,10 +13,10 @@ from ...pydantic_models import (
13
13
  BaseCloningStrategy,
14
14
  HomologousRecombinationSource,
15
15
  )
16
- from .primer_design_settings import amanda_settings
17
- from ...endpoints.external_import import genome_coordinates, read_from_file
18
- from ...endpoints.assembly import pcr, restriction_and_ligation, homologous_recombination
19
- from ...dna_functions import read_dsrecord_from_json
16
+ from opencloning.batch_cloning.EBIC.primer_design_settings import amanda_settings
17
+ from opencloning.endpoints.external_import import genome_coordinates, read_from_file
18
+ from opencloning.endpoints.assembly import pcr, restriction_and_ligation, homologous_recombination
19
+ from opencloning.dna_functions import read_dsrecord_from_json
20
20
 
21
21
  # Settings for design
22
22
  padding = 1000
@@ -123,7 +123,9 @@ async def main():
123
123
 
124
124
  with open(os.path.join(os.path.dirname(__file__), 'barcode.gb'), 'rb') as f:
125
125
  dummy_resp = Response()
126
- resp = await read_from_file(dummy_resp, UploadFile(file=f, filename='barcode.gb'), None, None, True, 'barcode')
126
+ resp = await read_from_file(
127
+ dummy_resp, UploadFile(file=f, filename='barcode.gb'), None, None, True, 'barcode', None, None
128
+ )
127
129
 
128
130
  barcode_source = resp['sources'][0]
129
131
  barcode_seq: TextFileSequence = resp['sequences'][0]
@@ -132,7 +134,14 @@ async def main():
132
134
  with open(os.path.join(os.path.dirname(__file__), 'common_plasmid.gb'), 'rb') as f:
133
135
  dummy_resp = Response()
134
136
  resp = await read_from_file(
135
- dummy_resp, UploadFile(file=f, filename='common_plasmid.gb'), None, None, True, 'common_plasmid'
137
+ dummy_resp,
138
+ UploadFile(file=f, filename='common_plasmid.gb'),
139
+ None,
140
+ None,
141
+ True,
142
+ 'common_plasmid',
143
+ None,
144
+ None,
136
145
  )
137
146
 
138
147
  common_plasmid_source = resp['sources'][0]
@@ -151,9 +160,7 @@ async def main():
151
160
  resp = await homologous_recombination(homologous_recombination_source, [locus_seq, golgen_gate_product], 17)
152
161
 
153
162
  multi_site_sources = [
154
- i
155
- for i, s in enumerate(resp['sources'])
156
- if all(join.left_location != join.right_location for join in s.assembly)
163
+ i for i, s in enumerate(resp['sources']) if all(join.left_location != join.right_location for join in s.input)
157
164
  ]
158
165
  if len(multi_site_sources) > 1:
159
166
  raise ValueError('Multiple insertions possible')
@@ -1,7 +1,7 @@
1
1
  import os
2
- from ...endpoints.external_import import genome_coordinates, get_from_repository_id_addgene, read_from_file
3
- from ...endpoints.assembly import pcr, homologous_recombination
4
- from ...pydantic_models import (
2
+ from opencloning.endpoints.external_import import genome_coordinates, get_from_repository_id_addgene, read_from_file
3
+ from opencloning.endpoints.assembly import pcr, homologous_recombination
4
+ from opencloning.pydantic_models import (
5
5
  GenomeCoordinatesSource,
6
6
  TextFileSequence,
7
7
  AddgeneIdSource,
@@ -12,7 +12,7 @@ from ...pydantic_models import (
12
12
  UploadedFileSource,
13
13
  )
14
14
 
15
- from ...ncbi_requests import get_annotations_from_query
15
+ from opencloning.ncbi_requests import get_annotations_from_query
16
16
  import asyncio
17
17
  import json
18
18
  from Bio import SeqIO
@@ -28,8 +28,8 @@ async def main(
28
28
  checking_primers = list(SeqIO.parse(os.path.join(output_dir, 'checking_primers.fa'), 'fasta'))
29
29
  primer_records = primer_records[:3] + checking_primers[1:] + primer_records[3:] + checking_primers[:1]
30
30
  primers = []
31
- for i, primer in enumerate(primer_records):
32
- primers.append(PrimerModel(sequence=str(primer.seq), id=i + 1, name=primer.id))
31
+ for primer in primer_records:
32
+ primers.append(PrimerModel(sequence=str(primer.seq), id=0, name=primer.id))
33
33
 
34
34
  # Get genome region =====================================================================
35
35
  annotations = await get_annotations_from_query(gene, assembly_accession)
@@ -51,7 +51,7 @@ async def main(
51
51
  orientation = 1 if gene_range['orientation'] == 'plus' else -1
52
52
 
53
53
  source = GenomeCoordinatesSource(
54
- id=1,
54
+ id=0,
55
55
  start=start - padding,
56
56
  end=end + padding,
57
57
  strand=orientation,
@@ -63,89 +63,81 @@ async def main(
63
63
  )
64
64
  locus = await genome_coordinates(source)
65
65
 
66
+ cloning_strategy = BaseCloningStrategy(
67
+ sequences=[],
68
+ sources=[],
69
+ primers=[],
70
+ description=f'Cloning strategy for deleting the gene {gene} using PCR and homologous recombination',
71
+ )
72
+ for primer in primers:
73
+ cloning_strategy.add_primer(primer)
66
74
  locus_seq: TextFileSequence = TextFileSequence.model_validate(locus['sequences'][0])
67
- locus_seq.id = 2
68
75
  locus_source: GenomeCoordinatesSource = GenomeCoordinatesSource.model_validate(locus['sources'][0])
69
- locus_source.output = 2
76
+ cloning_strategy.add_source_and_sequence(locus_source, locus_seq)
70
77
 
71
78
  # Get plasmid sequence =================s================================================================
72
79
  if not isinstance(plasmid, str):
73
80
  if plasmid.filename.endswith('.fa') or plasmid.filename.endswith('.fasta'):
74
- resp = await read_from_file(plasmid, None, None, True, None)
81
+ resp = await read_from_file(plasmid, None, None, True, None, None, None)
75
82
  else:
76
- resp = await read_from_file(plasmid, None, None, None, None)
77
- resp['sources'][0].id = 3
83
+ resp = await read_from_file(plasmid, None, None, None, None, None, None)
78
84
  # Verify that plasmid is circular
79
85
  if not pydna_parse(resp['sequences'][0].file_content)[0].circular:
80
86
  raise ValueError('Plasmid is not circular')
81
87
  plasmid_source: UploadedFileSource = UploadedFileSource.model_validate(resp['sources'][0])
82
- plasmid_source.output = 4
83
88
  else:
84
89
  addgene_source = AddgeneIdSource(
85
- id=3,
90
+ id=0,
86
91
  repository_id=plasmid,
87
92
  repository_name='addgene',
88
93
  )
89
94
  resp = await get_from_repository_id_addgene(addgene_source)
90
95
  plasmid_source: AddgeneIdSource = AddgeneIdSource.model_validate(resp['sources'][0])
91
- plasmid_source.output = 4
92
96
 
93
97
  plasmid_seq: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
94
- plasmid_seq.id = 4
98
+ cloning_strategy.add_source_and_sequence(plasmid_source, plasmid_seq)
95
99
 
96
100
  # PCR ================================================================================================
97
- pcr_source = PCRSource(id=5, output_name='amplified_marker')
98
- resp = await pcr(pcr_source, [plasmid_seq], [primers[0], primers[1]], 20, 0)
101
+ pcr_source = PCRSource(id=0, output_name='amplified_marker')
102
+ resp = await pcr(pcr_source, [plasmid_seq], [primers[0], primers[1]], 15, 0)
99
103
 
100
104
  pcr_product: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
101
- pcr_product.id = 6
102
105
  pcr_source: PCRSource = PCRSource.model_validate(resp['sources'][0])
103
- pcr_source.output = 6
106
+ cloning_strategy.add_source_and_sequence(pcr_source, pcr_product)
104
107
 
105
108
  # Homologous recombination ========================================================================
106
- hrec_source = HomologousRecombinationSource(id=7, output_name='deletion_allele')
109
+ hrec_source = HomologousRecombinationSource(id=0, output_name='deletion_allele')
107
110
  resp = await homologous_recombination(hrec_source, [locus_seq, pcr_product], 50)
108
111
 
109
112
  hrec_product: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
110
- hrec_product.id = 8
111
113
  hrec_source: HomologousRecombinationSource = HomologousRecombinationSource.model_validate(resp['sources'][0])
112
- hrec_source.output = 8
114
+ cloning_strategy.add_source_and_sequence(hrec_source, hrec_product)
113
115
 
114
116
  # Checking pcr 1 ======================================================================================
115
- check_pcr_source_left = PCRSource(id=9, output_name='check_pcr_left')
116
- resp = await pcr(check_pcr_source_left, [hrec_product], [primers[2], primers[3]], 20, 0)
117
+ check_pcr_source_left = PCRSource(id=0, output_name='check_pcr_left')
118
+ resp = await pcr(check_pcr_source_left, [hrec_product], [primers[2], primers[3]], 15, 0)
117
119
 
118
120
  check_pcr_product_left: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
119
- check_pcr_product_left.id = 10
120
121
  check_pcr_source_left: PCRSource = PCRSource.model_validate(resp['sources'][0])
121
- check_pcr_source_left.output = 10
122
+ cloning_strategy.add_source_and_sequence(check_pcr_source_left, check_pcr_product_left)
122
123
 
123
124
  # Checking pcr 2 ======================================================================================
124
- check_pcr_source_right = PCRSource(id=11, output_name='check_pcr_right')
125
- resp = await pcr(check_pcr_source_right, [hrec_product], [primers[4], primers[5]], 20, 0)
125
+ check_pcr_source_right = PCRSource(id=0, output_name='check_pcr_right')
126
+ resp = await pcr(check_pcr_source_right, [hrec_product], [primers[4], primers[5]], 15, 0)
126
127
 
127
128
  check_pcr_product_right: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
128
- check_pcr_product_right.id = 12
129
129
  check_pcr_source_right: PCRSource = PCRSource.model_validate(resp['sources'][0])
130
- check_pcr_source_right.output = 12
131
-
132
- sources = [locus_source, plasmid_source, pcr_source, hrec_source, check_pcr_source_left, check_pcr_source_right]
133
- sequences = [locus_seq, plasmid_seq, pcr_product, hrec_product, check_pcr_product_left, check_pcr_product_right]
130
+ cloning_strategy.add_source_and_sequence(check_pcr_source_right, check_pcr_product_right)
134
131
 
135
- cloning_strategy = {
136
- 'sources': [s.model_dump() for s in sources],
137
- 'sequences': [s.model_dump() for s in sequences],
138
- 'primers': [p.model_dump() for p in primers],
139
- 'description': f'Cloning strategy for deleting the gene {gene} using PCR and homologous recombination',
140
- }
141
-
142
- BaseCloningStrategy.model_validate(cloning_strategy)
132
+ cloning_strategy.description = (
133
+ f'Cloning strategy for deleting the gene {gene} using PCR and homologous recombination'
134
+ )
143
135
 
144
136
  if not os.path.exists(os.path.join(output_dir, gene)):
145
137
  os.makedirs(os.path.join(output_dir, gene))
146
138
 
147
139
  with open(os.path.join(output_dir, gene, 'cloning_strategy.json'), 'w') as f:
148
- json.dump(cloning_strategy, f, indent=2)
140
+ json.dump(cloning_strategy.model_dump(), f, indent=2)
149
141
 
150
142
 
151
143
  if __name__ == '__main__':
@@ -1,4 +1,4 @@
1
- from ...pydantic_models import BaseCloningStrategy, PrimerModel, PCRSource
1
+ from ...pydantic_models import BaseCloningStrategy, PrimerModel, PCRSource, HomologousRecombinationSource
2
2
  from pydna.parsers import parse as pydna_parse
3
3
  import os
4
4
  import json
@@ -16,11 +16,11 @@ chromosomes = {
16
16
 
17
17
  def find_primer_aligned_sequence(pcr_sources: list[PCRSource], primer: PrimerModel) -> str:
18
18
  for source in pcr_sources:
19
- if source.assembly[0].sequence == primer.id:
20
- loc = source.assembly[0].right_location
19
+ if source.input[0].sequence == primer.id:
20
+ loc = source.input[0].right_location
21
21
  return str(primer.sequence[loc.start : loc.end])
22
- if source.assembly[-1].sequence == primer.id:
23
- loc = source.assembly[-1].left_location
22
+ if source.input[-1].sequence == primer.id:
23
+ loc = source.input[-1].left_location
24
24
  return str(reverse_complement(primer.sequence)[loc.start : loc.end])
25
25
  raise ValueError(f"Primer {primer.id} not found in any PCR source")
26
26
 
@@ -30,12 +30,16 @@ def process_folder(working_dir: str):
30
30
  strategy = BaseCloningStrategy.model_validate(json.load(f))
31
31
 
32
32
  pcr_sources = [s for s in strategy.sources if s.type == 'PCRSource']
33
+ # We do this to have action to .end and .start
34
+ pcr_sources = [PCRSource.model_validate(s.model_dump()) for s in pcr_sources]
33
35
  locus_source = next(s for s in strategy.sources if s.type == 'GenomeCoordinatesSource')
34
36
  hrec_source = next(s for s in strategy.sources if s.type == 'HomologousRecombinationSource')
37
+ # We do this to have action to .end and .start
38
+ hrec_source: HomologousRecombinationSource = HomologousRecombinationSource.model_validate(hrec_source.model_dump())
35
39
 
36
40
  chromosome = chromosomes[locus_source.sequence_accession]
37
- insertion_start = locus_source.start + hrec_source.assembly[0].right_location.end
38
- insertion_end = locus_source.start + hrec_source.assembly[-1].left_location.start
41
+ insertion_start = locus_source.start + hrec_source.input[0].right_location.end
42
+ insertion_end = locus_source.start + hrec_source.input[-1].left_location.start
39
43
 
40
44
  # Write out the sequences in genbank format and extract some relevant info
41
45
  sequences = [pydna_parse(sequence.file_content)[0] for sequence in strategy.sequences]
@@ -77,21 +77,17 @@ async def ziqiang_et_al2024_post(
77
77
  primers = design_primers(protospacers)
78
78
 
79
79
  with open(os.path.join(os.path.dirname(__file__), 'ziqiang_et_al2024.json'), 'r') as f:
80
- template = BaseCloningStrategy.model_validate(json.load(f))
81
-
82
- max_primer_id = max([primer.id for primer in template.primers], default=0)
80
+ cloning_strategy = BaseCloningStrategy.model_validate(json.load(f))
83
81
 
84
82
  for i, primer in enumerate(primers):
85
- max_primer_id += 1
86
83
  orientation = 'rvs' if i % 2 == 0 else 'fwd'
87
- template.primers.append(
88
- PrimerModel(id=max_primer_id, name=f"protospacer_{i // 2 + 1}_{orientation}", sequence=primer)
89
- )
84
+ cloning_strategy.add_primer(PrimerModel(id=0, name=f"protospacer_{i // 2 + 1}_{orientation}", sequence=primer))
90
85
 
91
- primer_ids_for_pcrs = [3, *[p.id for p in template.primers[-len(primers) :]], 12]
92
- next_node_id = max([s.id for s in template.sequences] + [s.id for s in template.sources]) + 1
86
+ fwd_primer3 = next(p for p in cloning_strategy.primers if p.name == 'Fw-Primer3')
87
+ rvs_primer12 = next(p for p in cloning_strategy.primers if p.name == 'Rev-Primer12')
88
+ primer_ids_for_pcrs = [fwd_primer3.id, *[p.id for p in cloning_strategy.primers[-len(primers) :]], rvs_primer12.id]
93
89
 
94
- template_sequence = next(s for s in template.sequences if s.id == 18)
90
+ template_sequence = next(s for s in cloning_strategy.sequences if s.id == 9)
95
91
  for i, (fwd_primer_id, rvs_primer_id) in enumerate(zip(primer_ids_for_pcrs[::2], primer_ids_for_pcrs[1::2])):
96
92
  if i == 0:
97
93
  name = 'start_ps1'
@@ -100,80 +96,56 @@ async def ziqiang_et_al2024_post(
100
96
  else:
101
97
  name = f'end_ps{i}_start_ps{i + 1}'
102
98
 
103
- pcr_source = PCRSource(id=next_node_id, output_name=name)
104
- fwd_primer = next(p for p in template.primers if p.id == fwd_primer_id)
105
- rvs_primer = next(p for p in template.primers if p.id == rvs_primer_id)
99
+ pcr_source = PCRSource(id=0, output_name=name)
100
+ fwd_primer = next(p for p in cloning_strategy.primers if p.id == fwd_primer_id)
101
+ rvs_primer = next(p for p in cloning_strategy.primers if p.id == rvs_primer_id)
106
102
 
107
- next_node_id += 1
108
- resp = await pcr(pcr_source, [template_sequence], [fwd_primer, rvs_primer], 14, 0)
103
+ resp = await pcr(pcr_source, [template_sequence], [fwd_primer, rvs_primer], 7, 0)
109
104
  pcr_product: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
110
- pcr_product.id = next_node_id
111
105
  pcr_source: PCRSource = PCRSource.model_validate(resp['sources'][0])
112
- pcr_source.output = next_node_id
113
-
114
- template.sequences.append(pcr_product)
115
- template.sources.append(pcr_source)
116
-
117
- next_node_id += 1
106
+ cloning_strategy.add_source_and_sequence(pcr_source, pcr_product)
118
107
 
119
108
  # Find all PCR products
120
109
  # (we use type instead of isinstance because the BaseCloningStrategy does not
121
110
  # have the newer source models with extra methods)
122
- pcr_product_ids = [s.output for s in template.sources if s.type == 'PCRSource']
111
+ pcr_product_ids = [s.id for s in cloning_strategy.sources if s.type == 'PCRSource']
123
112
 
124
113
  # Make all input of a Golden gate assembly
125
114
  golden_gate_source = RestrictionAndLigationSource(
126
- id=next_node_id, output_name='golden_gate_assembly', restriction_enzymes=['BsaI'], input=pcr_product_ids
115
+ id=0, output_name='golden_gate_assembly', restriction_enzymes=['BsaI']
127
116
  )
128
117
 
129
- next_node_id += 1
130
118
  # Make them
131
- input_sequences = [next(s for s in template.sequences if s.id == p) for p in pcr_product_ids]
119
+ input_sequences = [next(s for s in cloning_strategy.sequences if s.id == p) for p in pcr_product_ids]
132
120
  resp = await restriction_and_ligation(golden_gate_source, input_sequences, False, False)
133
121
  golden_gate_product: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
134
- golden_gate_product.id = next_node_id
135
122
  golden_gate_source: RestrictionAndLigationSource = RestrictionAndLigationSource.model_validate(resp['sources'][0])
136
- golden_gate_source.output = next_node_id
137
- next_node_id += 1
123
+ cloning_strategy.add_source_and_sequence(golden_gate_source, golden_gate_product)
138
124
 
139
- template.sequences.append(golden_gate_product)
140
- template.sources.append(golden_gate_source)
141
-
142
- bp_target = next(s for s in template.sequences if s.id == 12)
143
- gateway_source = GatewaySource(id=next_node_id, output_name='entry_clone', reaction_type='BP', greedy=False)
144
- next_node_id += 1
125
+ bp_target = next(s for s in cloning_strategy.sequences if s.id == 6)
126
+ gateway_source = GatewaySource(id=0, output_name='entry_clone', reaction_type='BP', greedy=False)
145
127
  resp = await gateway(gateway_source, [golden_gate_product, bp_target], circular_only=True, only_multi_site=True)
146
128
  gateway_product: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][0])
147
- gateway_product.id = next_node_id
148
129
  gateway_source: GatewaySource = GatewaySource.model_validate(resp['sources'][0])
149
- gateway_source.output = next_node_id
150
- next_node_id += 1
151
-
152
- template.sequences.append(gateway_product)
153
- template.sources.append(gateway_source)
130
+ cloning_strategy.add_source_and_sequence(gateway_source, gateway_product)
154
131
 
155
132
  if until_bp:
156
133
  # Delete sources and sequences left
157
- ids2delete = list(range(5, 11))
158
- template.sources = [s for s in template.sources if s.id not in ids2delete]
159
- template.sequences = [s for s in template.sequences if s.id not in ids2delete]
160
- return template
134
+ ids2delete = list(range(3, 6))
135
+ cloning_strategy.sources = [s for s in cloning_strategy.sources if s.id not in ids2delete]
136
+ cloning_strategy.sequences = [s for s in cloning_strategy.sequences if s.id not in ids2delete]
137
+ return cloning_strategy
161
138
 
162
139
  # Now we want to do a Gateway with everything, so we need to find all sequences that are not input of anything
163
- all_input_ids = sum([s.input for s in template.sources], [])
164
- sequences_to_clone = [s for s in template.sequences if s.id not in all_input_ids]
140
+ all_inputs = sum([s.input for s in cloning_strategy.sources], [])
141
+ all_input_ids = [s.sequence for s in all_inputs]
142
+ sequences_to_clone = [s for s in cloning_strategy.sequences if s.id not in all_input_ids]
165
143
 
166
- gateway_source = GatewaySource(id=next_node_id, output_name='expression_clone', reaction_type='LR', greedy=False)
167
- next_node_id += 1
144
+ gateway_source = GatewaySource(id=0, output_name='expression_clone', reaction_type='LR', greedy=False)
168
145
  resp = await gateway(gateway_source, sequences_to_clone, circular_only=True, only_multi_site=True)
169
146
  index_of_product = next(i for i, s in enumerate(resp['sequences']) if '/label="Cas9"' in s.file_content)
170
147
  expression_clone: TextFileSequence = TextFileSequence.model_validate(resp['sequences'][index_of_product])
171
- expression_clone.id = next_node_id
172
148
  gateway_source: GatewaySource = GatewaySource.model_validate(resp['sources'][index_of_product])
173
- gateway_source.output = next_node_id
174
- next_node_id += 1
175
-
176
- template.sequences.append(expression_clone)
177
- template.sources.append(gateway_source)
149
+ cloning_strategy.add_source_and_sequence(gateway_source, expression_clone)
178
150
 
179
- return template
151
+ return cloning_strategy