opencloning 0.3.8__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,7 +121,7 @@ If you want to fix several bugs from the command line, you can use the `backend_
121
121
  Before running this script, you need to migrate the data to the latest version of the schema. See [full documentation](https://github.com/OpenCloning/OpenCloning_LinkML?tab=readme-ov-file#migration-from-previous-versions-of-the-schema), but basically:
122
122
 
123
123
  ```bash
124
- python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
124
+ python -m opencloning_linkl.migrations.migrate --target-version='0.3.0' file1.json file2.json ...
125
125
  ```
126
126
 
127
127
  Then, you can run the script:
@@ -131,7 +131,10 @@ python -m opencloning.bug_fixing.backend_v0_3 file1.json file2.json ...
131
131
  ```
132
132
 
133
133
  For each file:
134
- * If the file does not need fixing, it will be skipped.
134
+ * If the file does not need fixing, it will be skipped. Migrate it to the latest version of the schema by removing the `--target-version` flag.
135
+ ```bash
136
+ python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
137
+ ```
135
138
  * If the file needs fixing, it will create a new file `file_1_needs_fixing.json` at the same location where the original file is, with the problematic sources replaced by templates.
136
139
  * You can then load these files into the web application and run the correct steps manually.
137
140
 
@@ -27,31 +27,26 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
27
27
  for source in data['sources']:
28
28
  if source['type'] == 'GatewaySource':
29
29
  # Take the first assembly value and check that the length of features is 7
30
- assembly = source['assembly']
31
- if len(assembly):
30
+ input = source['input']
31
+ if len(input):
32
32
  feat2check = (
33
- assembly[0]['left_location']
34
- if assembly[0]['left_location'] is not None
35
- else assembly[0]['right_location']
33
+ input[0]['left_location'] if input[0]['left_location'] is not None else input[0]['right_location']
36
34
  )
37
35
  if len(SequenceLocationStr(feat2check).to_biopython_location()) != 7:
38
36
  problematic_source_ids.add(source['id'])
39
37
 
40
- elif 'assembly' in source:
38
+ elif any(('type' in i and i['type'] == 'AssemblyFragment') for i in source['input']):
41
39
  assembly_source = AssemblySource(
42
40
  id=source['id'],
43
41
  input=source['input'],
44
- output=source['output'],
45
42
  circular=source['circular'],
46
- assembly=source['assembly'],
47
43
  )
48
- input_seqs = [
49
- TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in assembly_source.input
50
- ]
44
+ input_ids = [i.sequence for i in assembly_source.input]
45
+ input_seqs = [TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in input_ids]
51
46
  # Sort input_seqs as in input
52
- input_seqs.sort(key=lambda x: assembly_source.input.index(x.id))
47
+ input_seqs.sort(key=lambda x: input_ids.index(x.id))
53
48
  if source['type'] == 'PCRSource':
54
- primer_ids = [assembly_source.assembly[0].sequence, assembly_source.assembly[2].sequence]
49
+ primer_ids = [assembly_source.input[0].sequence, assembly_source.input[2].sequence]
55
50
  primers = [PrimerModel.model_validate(p) for p in data['primers'] if p['id'] in primer_ids]
56
51
  input_seqs = [primers[0], input_seqs[0], primers[1]]
57
52
 
@@ -68,9 +63,11 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
68
63
  problematic_source_ids.update(sum([cs.all_children_source_ids(s) for s in problematic_source_ids], []))
69
64
  for source_id in problematic_source_ids:
70
65
  source = next(s for s in data['sources'] if s['id'] == source_id)
71
- output_seq = next(s for s in data['sequences'] if s['id'] == source['output'])
72
- remove_keys = ['assembly', 'circular']
66
+ output_seq = next(s for s in data['sequences'] if s['id'] == source_id)
67
+ # Remove assembly info
68
+ remove_keys = ['circular']
73
69
  source_keep = {key: value for key, value in source.items() if key not in remove_keys}
70
+ source_keep['input'] = [{'sequence': f['sequence']} for f in source_keep['input']]
74
71
  source.clear()
75
72
  source.update(source_keep)
76
73
 
@@ -15,7 +15,7 @@ from pydna.common_sub_strings import common_sub_strings
15
15
  from Bio.SeqIO import parse as seqio_parse
16
16
  import io
17
17
  import warnings
18
- from Bio.SeqIO.InsdcIO import GenBankIterator, GenBankScanner
18
+ from Bio.SeqIO.InsdcIO import GenBankScanner, GenBankIterator
19
19
  import re
20
20
  from .http_client import get_http_client, ConnectError, TimeoutException
21
21
  from .ncbi_requests import get_genbank_sequence
@@ -29,7 +29,7 @@ def format_sequence_genbank(seq: Dseqrecord, seq_name: str = None) -> TextFileSe
29
29
  correct_name(seq)
30
30
 
31
31
  return TextFileSequence(
32
- id=0,
32
+ id=int(seq.id) if seq.id is not None and str(seq.id).isdigit() else 0,
33
33
  file_content=seq.format('genbank'),
34
34
  sequence_file_format=SequenceFileFormat('genbank'),
35
35
  overhang_crick_3prime=seq.seq.ovhg,
@@ -280,10 +280,9 @@ class MyGenBankScanner(GenBankScanner):
280
280
 
281
281
  class MyGenBankIterator(GenBankIterator):
282
282
 
283
- def parse(self, handle):
284
- """Start parsing the file, and return a SeqRecord generator."""
285
- records = MyGenBankScanner(debug=0).parse_records(handle)
286
- return records
283
+ def __init__(self, source):
284
+ super(GenBankIterator, self).__init__(source, fmt='GenBank')
285
+ self.records = MyGenBankScanner(debug=0).parse_records(self.stream)
287
286
 
288
287
 
289
288
  def custom_file_parser(
opencloning/dna_utils.py CHANGED
@@ -15,6 +15,7 @@ from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
15
15
  import re
16
16
  from Bio.SeqFeature import Location, SimpleLocation
17
17
  from pydna.utils import shift_location
18
+ from pairwise_alignments_to_msa.alignment import aligned_tuples_to_MSA
18
19
 
19
20
  aligner = PairwiseAligner(scoring='blastn')
20
21
 
@@ -125,33 +126,37 @@ def permutate_trace(reference: str, sanger_trace: str) -> str:
125
126
 
126
127
  def align_sanger_traces(dseqr: Dseqrecord, sanger_traces: list[str]) -> list[str]:
127
128
  """Align a sanger track to a dseqr sequence"""
128
- query_str = str(dseqr.seq)
129
+
130
+ # Ensure sequences are in upper case
131
+ query_str = str(dseqr.seq).upper()
132
+ sanger_traces = [trace.upper() for trace in sanger_traces]
133
+
129
134
  # Check that required executables exist in PATH
130
135
  if not shutil.which('mars'):
131
136
  raise RuntimeError("'mars' executable not found in PATH")
132
137
  if not shutil.which('mafft'):
133
138
  raise RuntimeError("'mafft' executable not found in PATH")
134
139
 
135
- # If the sequence is circular, use MARS to permutate the traces
136
- if dseqr.circular:
137
- permutated_traces = []
138
- for trace in sanger_traces:
139
- permutated_traces.append(permutate_trace(query_str, trace))
140
- permutated_traces.append(permutate_trace(query_str, reverse_complement(trace)))
141
-
142
- traces_oriented = []
143
- # Pairwise-align and keep the best alignment, to decide which orientation to keep
144
- for fwd, rvs in zip(permutated_traces[::2], permutated_traces[1::2]):
145
- fwd_alignment = next(aligner.align(query_str, fwd))
146
- rvs_alignment = next(aligner.align(query_str, rvs))
147
-
148
- if fwd_alignment.score > rvs_alignment.score:
149
- traces_oriented.append(fwd.replace('N', ''))
150
- else:
151
- traces_oriented.append(rvs.replace('N', ''))
152
- sanger_traces = traces_oriented
153
-
154
- return align_with_mafft([query_str, *sanger_traces], True)
140
+ aligned_pairs = []
141
+ for trace in sanger_traces:
142
+ # If the sequence is circular, permutate both fwd and reverse complement
143
+ if dseqr.circular:
144
+ fwd = permutate_trace(query_str, trace)
145
+ rvs = permutate_trace(query_str, reverse_complement(trace))
146
+ else:
147
+ fwd = trace
148
+ rvs = reverse_complement(trace)
149
+
150
+ # Pairwise-align and keep the best alignment
151
+ fwd_alignment = next(aligner.align(query_str, fwd))
152
+ rvs_alignment = next(aligner.align(query_str, rvs))
153
+
154
+ best_alignment = fwd_alignment if fwd_alignment.score > rvs_alignment.score else rvs_alignment
155
+
156
+ formatted_alignment = best_alignment.format('fasta').split()[1::2]
157
+ aligned_pairs.append(tuple(formatted_alignment))
158
+
159
+ return aligned_tuples_to_MSA(aligned_pairs)
155
160
 
156
161
 
157
162
  def compute_regex_site(site: str) -> str:
@@ -3,7 +3,8 @@ from typing import Union, Literal, Callable
3
3
  from pydna.dseqrecord import Dseqrecord
4
4
  from pydna.primer import Primer as PydnaPrimer
5
5
  from pydna.crispr import cas9
6
- from pydantic import conlist, create_model
6
+ from pydantic import create_model, Field
7
+ from typing import Annotated
7
8
  from Bio.Restriction.Restriction import RestrictionBatch
8
9
  from opencloning.cre_lox import cre_loxP_overlap, annotate_loxP_sites
9
10
  from ..dna_functions import (
@@ -27,7 +28,7 @@ from ..pydantic_models import (
27
28
  CreLoxRecombinationSource,
28
29
  InVivoAssemblySource,
29
30
  )
30
- from ..assembly2 import (
31
+ from pydna.assembly2 import (
31
32
  Assembly,
32
33
  assemble,
33
34
  sticky_end_sub_strings,
@@ -80,8 +81,8 @@ def format_known_assembly_response(
80
81
  )
81
82
  async def crispr(
82
83
  source: CRISPRSource,
83
- guides: list[PrimerModel],
84
- sequences: conlist(TextFileSequence, min_length=2, max_length=2),
84
+ guides: Annotated[list[PrimerModel], Field(min_length=1)],
85
+ sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
85
86
  minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
86
87
  ):
87
88
  """Return the sequence after performing CRISPR editing by Homology directed repair
@@ -106,6 +107,7 @@ async def crispr(
106
107
  400, f'Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}'
107
108
  )
108
109
  guide_cuts.append(possible_cuts)
110
+ sorted_guide_ids = list(sorted([guide.id for guide in guides]))
109
111
 
110
112
  # Check if homologous recombination is possible
111
113
  fragments = [template, insert]
@@ -144,12 +146,12 @@ async def crispr(
144
146
  # meant for linear DNA
145
147
 
146
148
  out_sources = [
147
- CRISPRSource.from_assembly(id=source.id, assembly=a, guides=source.guides, fragments=fragments)
149
+ CRISPRSource.from_assembly(id=source.id, assembly=a, guides=sorted_guide_ids, fragments=fragments)
148
150
  for a in valid_assemblies
149
151
  ]
150
152
 
151
153
  # If a specific assembly is requested
152
- if len(source.assembly):
154
+ if source.is_assembly_complete():
153
155
  return format_known_assembly_response(source, out_sources, [template, insert])
154
156
 
155
157
  out_sequences = [
@@ -204,7 +206,7 @@ def generate_assemblies(
204
206
  raise HTTPException(400, *e.args)
205
207
 
206
208
  # If a specific assembly is requested
207
- if len(source.assembly):
209
+ if source.is_assembly_complete():
208
210
  return format_known_assembly_response(source, out_sources, fragments, product_callback)
209
211
 
210
212
  out_sequences = [
@@ -225,7 +227,7 @@ def generate_assemblies(
225
227
  )
226
228
  async def ligation(
227
229
  source: LigationSource,
228
- sequences: conlist(TextFileSequence, min_length=1),
230
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
229
231
  blunt: bool = Query(False, description='Use blunt ligation as well as sticky ends.'),
230
232
  allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
231
233
  circular_only: bool = Query(False, description='Only return circular assemblies.'),
@@ -239,7 +241,7 @@ async def ligation(
239
241
 
240
242
  # If the assembly is known, the blunt parameter is ignored, and we set the algorithm type from the assembly
241
243
  # (blunt ligations have features without length)
242
- if len(source.assembly):
244
+ if source.is_assembly_complete():
243
245
  asm = source.get_assembly_plan(fragments)
244
246
  blunt = len(asm[0][2]) == 0
245
247
 
@@ -261,8 +263,8 @@ async def ligation(
261
263
  )
262
264
  async def pcr(
263
265
  source: PCRSource,
264
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
265
- primers: conlist(PrimerModel, min_length=1, max_length=2),
266
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
267
+ primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
266
268
  minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
267
269
  allowed_mismatches: int = Query(0, description='The number of mismatches allowed'),
268
270
  ):
@@ -277,7 +279,7 @@ async def pcr(
277
279
  # What happens if annealing is zero? That would mean
278
280
  # mismatch in the 3' of the primer, which maybe should
279
281
  # not be allowed.
280
- if len(source.assembly):
282
+ if source.is_assembly_complete():
281
283
  minimal_annealing = source.minimal_overlap()
282
284
  # Only the ones that match are included in the output assembly
283
285
  # location, so the submitted assembly should be returned without
@@ -315,11 +317,11 @@ async def pcr(
315
317
  ]
316
318
 
317
319
  # If a specific assembly is requested
318
- if len(source.assembly):
320
+ if source.is_assembly_complete():
319
321
 
320
322
  def callback(x):
321
323
  if source.add_primer_features:
322
- return annotate_primer_binding_sites(x, fragments, source.get_assembly_plan(fragments))
324
+ return annotate_primer_binding_sites(x, fragments)
323
325
  else:
324
326
  return x
325
327
 
@@ -331,7 +333,7 @@ async def pcr(
331
333
  def callback(fragments, a):
332
334
  out_seq = assemble(fragments, a)
333
335
  if source.add_primer_features:
334
- return annotate_primer_binding_sites(out_seq, fragments, possible_assemblies)
336
+ return annotate_primer_binding_sites(out_seq, fragments)
335
337
  else:
336
338
  return out_seq
337
339
 
@@ -353,14 +355,14 @@ async def pcr(
353
355
  )
354
356
  async def homologous_recombination(
355
357
  source: HomologousRecombinationSource,
356
- sequences: conlist(TextFileSequence, min_length=2, max_length=2),
358
+ sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
357
359
  minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
358
360
  ):
359
361
 
360
362
  template, insert = [read_dsrecord_from_json(seq) for seq in sequences]
361
363
 
362
364
  # If an assembly is provided, we ignore minimal_homology
363
- if len(source.assembly):
365
+ if source.is_assembly_complete():
364
366
  minimal_homology = source.minimal_overlap()
365
367
 
366
368
  asm = Assembly((template, insert), limit=minimal_homology, use_all_fragments=True)
@@ -386,7 +388,7 @@ async def homologous_recombination(
386
388
  ]
387
389
 
388
390
  # If a specific assembly is requested
389
- if len(source.assembly):
391
+ if source.is_assembly_complete():
390
392
  return format_known_assembly_response(source, out_sources, [template, insert])
391
393
 
392
394
  out_sequences = [
@@ -411,7 +413,7 @@ async def homologous_recombination(
411
413
  ),
412
414
  )
413
415
  async def gibson_assembly(
414
- sequences: conlist(TextFileSequence, min_length=1),
416
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
415
417
  source: Union[GibsonAssemblySource, OverlapExtensionPCRLigationSource, InFusionSource, InVivoAssemblySource],
416
418
  minimal_homology: int = Query(
417
419
  40, description='The minimum homology between consecutive fragments in the assembly.'
@@ -450,7 +452,7 @@ async def gibson_assembly(
450
452
  )
451
453
  async def restriction_and_ligation(
452
454
  source: RestrictionAndLigationSource,
453
- sequences: conlist(TextFileSequence, min_length=1),
455
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
454
456
  allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
455
457
  circular_only: bool = Query(False, description='Only return circular assemblies.'),
456
458
  ):
@@ -492,7 +494,7 @@ async def restriction_and_ligation(
492
494
  )
493
495
  async def gateway(
494
496
  source: GatewaySource,
495
- sequences: conlist(TextFileSequence, min_length=1),
497
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
496
498
  circular_only: bool = Query(False, description='Only return circular assemblies.'),
497
499
  only_multi_site: bool = Query(
498
500
  False, description='Only return assemblies where more than one site per sequence recombined.'
@@ -537,7 +539,7 @@ async def gateway(
537
539
  multi_site_sources = [
538
540
  i
539
541
  for i, s in enumerate(resp['sources'])
540
- if all(join.left_location != join.right_location for join in s.assembly)
542
+ if all(join.left_location != join.right_location for join in s.input)
541
543
  ]
542
544
  sources = [resp['sources'][i] for i in multi_site_sources]
543
545
  sequences = [resp['sequences'][i] for i in multi_site_sources]
@@ -554,7 +556,9 @@ async def gateway(
554
556
  sequences=(list[TextFileSequence], ...),
555
557
  ),
556
558
  )
557
- async def cre_lox_recombination(source: CreLoxRecombinationSource, sequences: conlist(TextFileSequence, min_length=1)):
559
+ async def cre_lox_recombination(
560
+ source: CreLoxRecombinationSource, sequences: Annotated[list[TextFileSequence], Field(min_length=1)]
561
+ ):
558
562
  fragments = [read_dsrecord_from_json(seq) for seq in sequences]
559
563
 
560
564
  # Lambda function for code clarity
@@ -1,6 +1,6 @@
1
1
  from fastapi import Query, HTTPException
2
2
  from pydna.dseqrecord import Dseqrecord
3
- from pydantic import conlist, create_model
3
+ from pydantic import create_model, Field
4
4
  from typing import Annotated
5
5
  from Bio.Restriction import RestrictionBatch
6
6
 
@@ -30,7 +30,7 @@ router = get_router()
30
30
  )
31
31
  async def restriction(
32
32
  source: RestrictionEnzymeDigestionSource,
33
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
33
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
34
34
  restriction_enzymes: Annotated[list[str], Query(default_factory=list)],
35
35
  ):
36
36
  # There should be 1 or 2 enzymes in the request if the source does not have cuts
@@ -53,7 +53,10 @@ async def restriction(
53
53
 
54
54
  cutsites = seqr.seq.get_cutsites(*enzymes)
55
55
  cutsite_pairs = seqr.seq.get_cutsite_pairs(cutsites)
56
- sources = [RestrictionEnzymeDigestionSource.from_cutsites(*p, source.input, source.id) for p in cutsite_pairs]
56
+ sources = [
57
+ RestrictionEnzymeDigestionSource.from_cutsites(*p, [{'sequence': sequences[0].id}], source.id)
58
+ for p in cutsite_pairs
59
+ ]
57
60
 
58
61
  all_enzymes = set(enzyme for s in sources for enzyme in s.get_enzymes())
59
62
  enzymes_not_cutting = set(restriction_enzymes) - set(all_enzymes)
@@ -90,7 +93,7 @@ async def restriction(
90
93
  )
91
94
  async def polymerase_extension(
92
95
  source: PolymeraseExtensionSource,
93
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
96
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
94
97
  ):
95
98
  """Return the sequence from a polymerase extension reaction"""
96
99
 
@@ -117,7 +120,7 @@ async def polymerase_extension(
117
120
  )
118
121
  async def reverse_complement(
119
122
  source: ReverseComplementSource,
120
- sequences: conlist(TextFileSequence, min_length=1, max_length=1),
123
+ sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
121
124
  ):
122
125
  dseq = read_dsrecord_from_json(sequences[0])
123
126
  out_sequence = dseq.reverse_complement()
@@ -1,7 +1,8 @@
1
1
  from fastapi import Query, HTTPException
2
2
  from pydna.dseqrecord import Dseqrecord
3
3
  from pydna.dseq import Dseq
4
- from pydantic import conlist, create_model
4
+ from pydantic import create_model, Field
5
+ from typing import Annotated
5
6
 
6
7
  from ..dna_functions import (
7
8
  format_sequence_genbank,
@@ -12,6 +13,7 @@ from ..pydantic_models import (
12
13
  TextFileSequence,
13
14
  ManuallyTypedSource,
14
15
  OligoHybridizationSource,
16
+ SourceInput,
15
17
  )
16
18
 
17
19
  from .. import request_examples
@@ -54,11 +56,16 @@ async def manually_typed(source: ManuallyTypedSource):
54
56
  )
55
57
  async def oligonucleotide_hybridization(
56
58
  source: OligoHybridizationSource,
57
- primers: conlist(PrimerModel, min_length=1, max_length=2),
59
+ primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
58
60
  minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
59
61
  ):
60
- watson_seq = next((p.sequence for p in primers if p.id == source.forward_oligo), None)
61
- crick_seq = next((p.sequence for p in primers if p.id == source.reverse_oligo), None)
62
+ if len(source.input):
63
+ watson_seq = next((p.sequence for p in primers if p.id == source.input[0].sequence), None)
64
+ crick_seq = next((p.sequence for p in primers if p.id == source.input[1].sequence), None)
65
+ else:
66
+ watson_seq = primers[0].sequence
67
+ crick_seq = primers[1].sequence if len(primers) > 1 else watson_seq
68
+ source.input = [SourceInput(sequence=primers[0].id), SourceInput(sequence=primers[1].id)]
62
69
 
63
70
  if watson_seq is None or crick_seq is None:
64
71
  raise HTTPException(404, 'Invalid oligo id.')
@@ -1,5 +1,5 @@
1
- from pydantic import BaseModel, Field, model_validator, field_validator
2
- from typing import Optional, List
1
+ from pydantic import BaseModel, Field, model_validator, field_validator, Discriminator, Tag
2
+ from typing import Optional, List, Union, Annotated
3
3
  from pydantic_core import core_schema
4
4
  from ._version import __version__
5
5
 
@@ -49,8 +49,9 @@ from opencloning_linkml.datamodel import (
49
49
  SEVASource as _SEVASource,
50
50
  CreLoxRecombinationSource as _CreLoxRecombinationSource,
51
51
  InVivoAssemblySource as _InVivoAssemblySource,
52
+ SourceInput as _SourceInput,
52
53
  )
53
- from .assembly2 import (
54
+ from pydna.assembly2 import (
54
55
  edge_representation2subfragment_representation,
55
56
  subfragment_representation2edge_representation,
56
57
  )
@@ -64,6 +65,10 @@ class TextFileSequence(_TextFileSequence):
64
65
  pass
65
66
 
66
67
 
68
+ class SourceInput(_SourceInput):
69
+ pass
70
+
71
+
67
72
  class PrimerModel(_Primer):
68
73
  """Called PrimerModel not to be confused with the class from pydna."""
69
74
 
@@ -94,8 +99,23 @@ class SeqFeatureModel(BaseModel):
94
99
  # Sources =========================================
95
100
 
96
101
 
97
- class SourceCommonClass:
98
- input: Optional[List[int]] = Field(
102
+ def input_discriminator(v) -> str | None:
103
+ """
104
+ Discriminator that yields SourceInput by default
105
+ """
106
+ if isinstance(v, dict):
107
+ input_type = v.get('type', None)
108
+ if input_type is None:
109
+ return 'SourceInput'
110
+ else:
111
+ return input_type
112
+ elif isinstance(v, SourceInput):
113
+ return v.type
114
+ return None
115
+
116
+
117
+ class SourceCommonClass(BaseModel):
118
+ input: Optional[List[SourceInput]] = Field(
99
119
  default_factory=list,
100
120
  description="""The sequences that are an input to this source. If the source represents external import of a sequence, it's empty.""",
101
121
  json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source']}},
@@ -292,7 +312,7 @@ class SequenceLocationStr(str):
292
312
  return cls.field_validator(value)
293
313
 
294
314
 
295
- class AssemblyFragment(_AssemblyFragment):
315
+ class AssemblyFragment(_AssemblyFragment, SourceInput):
296
316
  left_location: Optional[SequenceLocationStr] = None
297
317
  right_location: Optional[SequenceLocationStr] = None
298
318
 
@@ -322,14 +342,26 @@ class AssemblyFragment(_AssemblyFragment):
322
342
  class AssemblySourceCommonClass(SourceCommonClass):
323
343
  # TODO: This is different in the LinkML model, because there it is not required,
324
344
  # and here we make it default to list.
325
- assembly: List[AssemblyFragment] = Field(
326
- default_factory=list, description="""The joins between the fragments in the assembly"""
345
+ input: Optional[
346
+ List[
347
+ Annotated[
348
+ Union[
349
+ Annotated[SourceInput, Tag('SourceInput')],
350
+ Annotated['AssemblyFragment', Tag('AssemblyFragment')],
351
+ ],
352
+ Discriminator(input_discriminator),
353
+ ]
354
+ ]
355
+ ] = Field(
356
+ default_factory=list,
357
+ description="""The inputs to this source. If the source represents external import of a sequence, it's empty.""",
358
+ json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source'], 'slot_uri': 'schema:object'}},
327
359
  )
328
360
 
329
361
  def minimal_overlap(self):
330
362
  """Returns the minimal overlap between the fragments in the assembly"""
331
363
  all_overlaps = list()
332
- for f in self.assembly:
364
+ for f in self.input:
333
365
  if f.left_location is not None:
334
366
  all_overlaps.append(f.left_location.end - f.left_location.start)
335
367
  if f.right_location is not None:
@@ -338,9 +370,13 @@ class AssemblySourceCommonClass(SourceCommonClass):
338
370
 
339
371
  def get_assembly_plan(self, fragments: list[_SeqRecord]) -> tuple:
340
372
  """Returns the assembly plan"""
341
- subf = [f.to_fragment_tuple(fragments) for f in self.assembly]
373
+ subf = [f.to_fragment_tuple(fragments) for f in self.input if f.type == 'AssemblyFragment']
342
374
  return subfragment_representation2edge_representation(subf, self.circular)
343
375
 
376
+ def is_assembly_complete(self) -> bool:
377
+ """Returns True if the assembly is complete"""
378
+ return any(f.type == 'AssemblyFragment' for f in self.input)
379
+
344
380
  @classmethod
345
381
  def from_assembly(
346
382
  cls,
@@ -353,7 +389,6 @@ class AssemblySourceCommonClass(SourceCommonClass):
353
389
 
354
390
  # Replace the positions with the actual ids
355
391
  fragment_ids = [int(f.id) for f in fragments]
356
- input_ids = [int(f.id) for f in fragments if not isinstance(f, _PydnaPrimer)]
357
392
 
358
393
  # Here the ids are still the positions in the fragments list
359
394
  fragment_assembly_positions = edge_representation2subfragment_representation(assembly, circular)
@@ -368,8 +403,7 @@ class AssemblySourceCommonClass(SourceCommonClass):
368
403
  ]
369
404
  return cls(
370
405
  id=id,
371
- input=input_ids,
372
- assembly=assembly_fragments,
406
+ input=assembly_fragments,
373
407
  circular=circular,
374
408
  **kwargs,
375
409
  )
@@ -428,7 +462,9 @@ class CRISPRSource(AssemblySourceCommonClass, _CRISPRSource):
428
462
  fragments: list[_SeqRecord],
429
463
  guides: list[int],
430
464
  ):
431
- return super().from_assembly(assembly, id, False, fragments, guides=guides)
465
+ source = super().from_assembly(assembly, id, False, fragments)
466
+ source.input += [SourceInput(sequence=guide) for guide in guides]
467
+ return source
432
468
 
433
469
 
434
470
  class RestrictionAndLigationSource(AssemblySourceCommonClass, _RestrictionAndLigationSource):
@@ -486,17 +522,14 @@ class BaseCloningStrategy(_CloningStrategy):
486
522
  json_schema_extra={'linkml_meta': {'alias': 'backend_version', 'domain_of': ['CloningStrategy']}},
487
523
  )
488
524
 
489
- def next_primer_id(self):
490
- return max([p.id for p in self.primers], default=0) + 1
491
-
492
525
  def add_primer(self, primer: PrimerModel):
493
526
  if primer in self.primers:
494
527
  return
495
- primer.id = self.next_primer_id()
528
+ primer.id = self.next_id()
496
529
  self.primers.append(primer)
497
530
 
498
- def next_node_id(self):
499
- return max([s.id for s in self.sources + self.sequences], default=0) + 1
531
+ def next_id(self):
532
+ return max([s.id for s in self.sources + self.sequences + self.primers], default=0) + 1
500
533
 
501
534
  def add_source_and_sequence(self, source: SourceCommonClass, sequence: TextFileSequence):
502
535
  if source in self.sources:
@@ -505,11 +538,11 @@ class BaseCloningStrategy(_CloningStrategy):
505
538
  f"Source {source.id} already exists in the cloning strategy, but sequence {sequence.id} it's not its output."
506
539
  )
507
540
  return
508
- source.id = self.next_node_id()
541
+ new_id = self.next_id()
542
+ source.id = new_id
509
543
  self.sources.append(source)
510
- sequence.id = self.next_node_id()
544
+ sequence.id = new_id
511
545
  self.sequences.append(sequence)
512
- source.output = sequence.id
513
546
 
514
547
  def all_children_source_ids(self, source_id: int, source_children: list | None = None) -> list[int]:
515
548
  """Returns the ids of all source children ids of a source"""
@@ -517,7 +550,7 @@ class BaseCloningStrategy(_CloningStrategy):
517
550
  if source_children is None:
518
551
  source_children = []
519
552
 
520
- sources_that_take_output_as_input = [s for s in self.sources if source.output in s.input]
553
+ sources_that_take_output_as_input = [s for s in self.sources if source.id in [inp.sequence for inp in s.input]]
521
554
  new_source_ids = [s.id for s in sources_that_take_output_as_input]
522
555
 
523
556
  source_children.extend(new_source_ids)
@@ -66,10 +66,10 @@ oligonucleotide_hybridization_examples = {
66
66
  'value': {
67
67
  'source': {
68
68
  'id': 1,
69
- 'input': [],
70
- 'output': 0,
71
- 'forward_oligo': 2,
72
- 'reverse_oligo': 3,
69
+ 'input': [
70
+ {'sequence': 2},
71
+ {'sequence': 3},
72
+ ],
73
73
  },
74
74
  'primers': [
75
75
  {'id': 2, 'name': 'primer1', 'sequence': 'aaGCGGCCGCgtagaactttatgtgcttccttacattggt'},