PyPI - opencloning - Versions diffs - 0.3.8__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

opencloning 0.3.8py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

opencloning/app_settings.py +1 -0
opencloning/batch_cloning/EBIC/example.py +1 -3
opencloning/batch_cloning/pombe/pombe_clone.py +29 -37
opencloning/batch_cloning/pombe/pombe_summary.py +11 -7
opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +28 -56
opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +47 -56
opencloning/bug_fixing/README.md +5 -2
opencloning/bug_fixing/backend_v0_3.py +12 -15
opencloning/dna_functions.py +5 -6
opencloning/dna_utils.py +26 -21
opencloning/endpoints/assembly.py +27 -23
opencloning/endpoints/no_assembly.py +8 -5
opencloning/endpoints/no_input.py +11 -4
opencloning/pydantic_models.py +57 -24
opencloning/request_examples.py +4 -4
{opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/METADATA +6 -5
{opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/RECORD +19 -21
opencloning/assembly2.py +0 -1467
opencloning/batch_cloning/pombe/pombe_all.sh +0 -9
{opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/LICENSE +0 -0
{opencloning-0.3.8.dist-info → opencloning-0.4.2.dist-info}/WHEEL +0 -0

opencloning/bug_fixing/README.md CHANGED Viewed

@@ -121,7 +121,7 @@ If you want to fix several bugs from the command line, you can use the `backend_
 Before running this script, you need to migrate the data to the latest version of the schema. See [full documentation](https://github.com/OpenCloning/OpenCloning_LinkML?tab=readme-ov-file#migration-from-previous-versions-of-the-schema), but basically:
 ```bash
-python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
+python -m opencloning_linkl.migrations.migrate --target-version='0.3.0' file1.json file2.json ...
 ```
 Then, you can run the script:
@@ -131,7 +131,10 @@ python -m opencloning.bug_fixing.backend_v0_3 file1.json file2.json ...
 ```
 For each file:
-* If the file does not need fixing, it will be skipped.
+* If the file does not need fixing, it will be skipped. Migrate it to the latest version of the schema by removing the `--target-version` flag.
+  ```bash
+  python -m opencloning_linkl.migrations.migrate file1.json file2.json ...
+  ```
 * If the file needs fixing, it will create a new file `file_1_needs_fixing.json` at the same location where the original file is, with the problematic sources replaced by templates.
 * You can then load these files into the web application and run the correct steps manually.

opencloning/bug_fixing/backend_v0_3.py CHANGED Viewed

@@ -27,31 +27,26 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
     for source in data['sources']:
         if source['type'] == 'GatewaySource':
             # Take the first assembly value and check that the length of features is 7
-            assembly = source['assembly']
-            if len(assembly):
+            input = source['input']
+            if len(input):
                 feat2check = (
-                    assembly[0]['left_location']
-                    if assembly[0]['left_location'] is not None
-                    else assembly[0]['right_location']
+                    input[0]['left_location'] if input[0]['left_location'] is not None else input[0]['right_location']
                 )
                 if len(SequenceLocationStr(feat2check).to_biopython_location()) != 7:
                     problematic_source_ids.add(source['id'])
-        elif 'assembly' in source:
+        elif any(('type' in i and i['type'] == 'AssemblyFragment') for i in source['input']):
             assembly_source = AssemblySource(
                 id=source['id'],
                 input=source['input'],
-                output=source['output'],
                 circular=source['circular'],
-                assembly=source['assembly'],
             )
-            input_seqs = [
-                TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in assembly_source.input
-            ]
+            input_ids = [i.sequence for i in assembly_source.input]
+            input_seqs = [TextFileSequence.model_validate(s) for s in data['sequences'] if s['id'] in input_ids]
             # Sort input_seqs as in input
-            input_seqs.sort(key=lambda x: assembly_source.input.index(x.id))
+            input_seqs.sort(key=lambda x: input_ids.index(x.id))
             if source['type'] == 'PCRSource':
-                primer_ids = [assembly_source.assembly[0].sequence, assembly_source.assembly[2].sequence]
+                primer_ids = [assembly_source.input[0].sequence, assembly_source.input[2].sequence]
                 primers = [PrimerModel.model_validate(p) for p in data['primers'] if p['id'] in primer_ids]
                 input_seqs = [primers[0], input_seqs[0], primers[1]]
@@ -68,9 +63,11 @@ def fix_backend_v0_3(input_data: dict) -> CloningStrategy | None:
     problematic_source_ids.update(sum([cs.all_children_source_ids(s) for s in problematic_source_ids], []))
     for source_id in problematic_source_ids:
         source = next(s for s in data['sources'] if s['id'] == source_id)
-        output_seq = next(s for s in data['sequences'] if s['id'] == source['output'])
-        remove_keys = ['assembly', 'circular']
+        output_seq = next(s for s in data['sequences'] if s['id'] == source_id)
+        # Remove assembly info
+        remove_keys = ['circular']
         source_keep = {key: value for key, value in source.items() if key not in remove_keys}
+        source_keep['input'] = [{'sequence': f['sequence']} for f in source_keep['input']]
         source.clear()
         source.update(source_keep)

opencloning/dna_functions.py CHANGED Viewed

@@ -15,7 +15,7 @@ from pydna.common_sub_strings import common_sub_strings
 from Bio.SeqIO import parse as seqio_parse
 import io
 import warnings
-from Bio.SeqIO.InsdcIO import GenBankIterator, GenBankScanner
+from Bio.SeqIO.InsdcIO import GenBankScanner, GenBankIterator
 import re
 from .http_client import get_http_client, ConnectError, TimeoutException
 from .ncbi_requests import get_genbank_sequence
@@ -29,7 +29,7 @@ def format_sequence_genbank(seq: Dseqrecord, seq_name: str = None) -> TextFileSe
         correct_name(seq)
     return TextFileSequence(
-        id=0,
+        id=int(seq.id) if seq.id is not None and str(seq.id).isdigit() else 0,
         file_content=seq.format('genbank'),
         sequence_file_format=SequenceFileFormat('genbank'),
         overhang_crick_3prime=seq.seq.ovhg,
@@ -280,10 +280,9 @@ class MyGenBankScanner(GenBankScanner):
 class MyGenBankIterator(GenBankIterator):
-    def parse(self, handle):
-        """Start parsing the file, and return a SeqRecord generator."""
-        records = MyGenBankScanner(debug=0).parse_records(handle)
-        return records
+    def __init__(self, source):
+        super(GenBankIterator, self).__init__(source, fmt='GenBank')
+        self.records = MyGenBankScanner(debug=0).parse_records(self.stream)
 def custom_file_parser(

opencloning/dna_utils.py CHANGED Viewed

@@ -15,6 +15,7 @@ from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
 import re
 from Bio.SeqFeature import Location, SimpleLocation
 from pydna.utils import shift_location
+from pairwise_alignments_to_msa.alignment import aligned_tuples_to_MSA
 aligner = PairwiseAligner(scoring='blastn')
@@ -125,33 +126,37 @@ def permutate_trace(reference: str, sanger_trace: str) -> str:
 def align_sanger_traces(dseqr: Dseqrecord, sanger_traces: list[str]) -> list[str]:
     """Align a sanger track to a dseqr sequence"""
-    query_str = str(dseqr.seq)
+    # Ensure sequences are in upper case
+    query_str = str(dseqr.seq).upper()
+    sanger_traces = [trace.upper() for trace in sanger_traces]
     # Check that required executables exist in PATH
     if not shutil.which('mars'):
         raise RuntimeError("'mars' executable not found in PATH")
     if not shutil.which('mafft'):
         raise RuntimeError("'mafft' executable not found in PATH")
-    # If the sequence is circular, use MARS to permutate the traces
-    if dseqr.circular:
-        permutated_traces = []
-        for trace in sanger_traces:
-            permutated_traces.append(permutate_trace(query_str, trace))
-            permutated_traces.append(permutate_trace(query_str, reverse_complement(trace)))
-        traces_oriented = []
-        # Pairwise-align and keep the best alignment, to decide which orientation to keep
-        for fwd, rvs in zip(permutated_traces[::2], permutated_traces[1::2]):
-            fwd_alignment = next(aligner.align(query_str, fwd))
-            rvs_alignment = next(aligner.align(query_str, rvs))
-            if fwd_alignment.score > rvs_alignment.score:
-                traces_oriented.append(fwd.replace('N', ''))
-            else:
-                traces_oriented.append(rvs.replace('N', ''))
-        sanger_traces = traces_oriented
-    return align_with_mafft([query_str, *sanger_traces], True)
+    aligned_pairs = []
+    for trace in sanger_traces:
+        # If the sequence is circular, permutate both fwd and reverse complement
+        if dseqr.circular:
+            fwd = permutate_trace(query_str, trace)
+            rvs = permutate_trace(query_str, reverse_complement(trace))
+        else:
+            fwd = trace
+            rvs = reverse_complement(trace)
+        # Pairwise-align and keep the best alignment
+        fwd_alignment = next(aligner.align(query_str, fwd))
+        rvs_alignment = next(aligner.align(query_str, rvs))
+        best_alignment = fwd_alignment if fwd_alignment.score > rvs_alignment.score else rvs_alignment
+        formatted_alignment = best_alignment.format('fasta').split()[1::2]
+        aligned_pairs.append(tuple(formatted_alignment))
+    return aligned_tuples_to_MSA(aligned_pairs)
 def compute_regex_site(site: str) -> str:

opencloning/endpoints/assembly.py CHANGED Viewed

@@ -3,7 +3,8 @@ from typing import Union, Literal, Callable
 from pydna.dseqrecord import Dseqrecord
 from pydna.primer import Primer as PydnaPrimer
 from pydna.crispr import cas9
-from pydantic import conlist, create_model
+from pydantic import create_model, Field
+from typing import Annotated
 from Bio.Restriction.Restriction import RestrictionBatch
 from opencloning.cre_lox import cre_loxP_overlap, annotate_loxP_sites
 from ..dna_functions import (
@@ -27,7 +28,7 @@ from ..pydantic_models import (
     CreLoxRecombinationSource,
     InVivoAssemblySource,
 )
-from ..assembly2 import (
+from pydna.assembly2 import (
     Assembly,
     assemble,
     sticky_end_sub_strings,
@@ -80,8 +81,8 @@ def format_known_assembly_response(
 )
 async def crispr(
     source: CRISPRSource,
-    guides: list[PrimerModel],
-    sequences: conlist(TextFileSequence, min_length=2, max_length=2),
+    guides: Annotated[list[PrimerModel], Field(min_length=1)],
+    sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
     minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
 ):
     """Return the sequence after performing CRISPR editing by Homology directed repair
@@ -106,6 +107,7 @@ async def crispr(
                 400, f'Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}'
             )
         guide_cuts.append(possible_cuts)
+    sorted_guide_ids = list(sorted([guide.id for guide in guides]))
     # Check if homologous recombination is possible
     fragments = [template, insert]
@@ -144,12 +146,12 @@ async def crispr(
     # meant for linear DNA
     out_sources = [
-        CRISPRSource.from_assembly(id=source.id, assembly=a, guides=source.guides, fragments=fragments)
+        CRISPRSource.from_assembly(id=source.id, assembly=a, guides=sorted_guide_ids, fragments=fragments)
         for a in valid_assemblies
     ]
     # If a specific assembly is requested
-    if len(source.assembly):
+    if source.is_assembly_complete():
         return format_known_assembly_response(source, out_sources, [template, insert])
     out_sequences = [
@@ -204,7 +206,7 @@ def generate_assemblies(
         raise HTTPException(400, *e.args)
     # If a specific assembly is requested
-    if len(source.assembly):
+    if source.is_assembly_complete():
         return format_known_assembly_response(source, out_sources, fragments, product_callback)
     out_sequences = [
@@ -225,7 +227,7 @@ def generate_assemblies(
 )
 async def ligation(
     source: LigationSource,
-    sequences: conlist(TextFileSequence, min_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
     blunt: bool = Query(False, description='Use blunt ligation as well as sticky ends.'),
     allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
     circular_only: bool = Query(False, description='Only return circular assemblies.'),
@@ -239,7 +241,7 @@ async def ligation(
     # If the assembly is known, the blunt parameter is ignored, and we set the algorithm type from the assembly
     # (blunt ligations have features without length)
-    if len(source.assembly):
+    if source.is_assembly_complete():
         asm = source.get_assembly_plan(fragments)
         blunt = len(asm[0][2]) == 0
@@ -261,8 +263,8 @@ async def ligation(
 )
 async def pcr(
     source: PCRSource,
-    sequences: conlist(TextFileSequence, min_length=1, max_length=1),
-    primers: conlist(PrimerModel, min_length=1, max_length=2),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
+    primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
     minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
     allowed_mismatches: int = Query(0, description='The number of mismatches allowed'),
 ):
@@ -277,7 +279,7 @@ async def pcr(
     # What happens if annealing is zero? That would mean
     # mismatch in the 3' of the primer, which maybe should
     # not be allowed.
-    if len(source.assembly):
+    if source.is_assembly_complete():
         minimal_annealing = source.minimal_overlap()
         # Only the ones that match are included in the output assembly
         # location, so the submitted assembly should be returned without
@@ -315,11 +317,11 @@ async def pcr(
     ]
     # If a specific assembly is requested
-    if len(source.assembly):
+    if source.is_assembly_complete():
         def callback(x):
             if source.add_primer_features:
-                return annotate_primer_binding_sites(x, fragments, source.get_assembly_plan(fragments))
+                return annotate_primer_binding_sites(x, fragments)
             else:
                 return x
@@ -331,7 +333,7 @@ async def pcr(
     def callback(fragments, a):
         out_seq = assemble(fragments, a)
         if source.add_primer_features:
-            return annotate_primer_binding_sites(out_seq, fragments, possible_assemblies)
+            return annotate_primer_binding_sites(out_seq, fragments)
         else:
             return out_seq
@@ -353,14 +355,14 @@ async def pcr(
 )
 async def homologous_recombination(
     source: HomologousRecombinationSource,
-    sequences: conlist(TextFileSequence, min_length=2, max_length=2),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=2, max_length=2)],
     minimal_homology: int = Query(40, description='The minimum homology between the template and the insert.'),
 ):
     template, insert = [read_dsrecord_from_json(seq) for seq in sequences]
     # If an assembly is provided, we ignore minimal_homology
-    if len(source.assembly):
+    if source.is_assembly_complete():
         minimal_homology = source.minimal_overlap()
     asm = Assembly((template, insert), limit=minimal_homology, use_all_fragments=True)
@@ -386,7 +388,7 @@ async def homologous_recombination(
     ]
     # If a specific assembly is requested
-    if len(source.assembly):
+    if source.is_assembly_complete():
         return format_known_assembly_response(source, out_sources, [template, insert])
     out_sequences = [
@@ -411,7 +413,7 @@ async def homologous_recombination(
     ),
 )
 async def gibson_assembly(
-    sequences: conlist(TextFileSequence, min_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
     source: Union[GibsonAssemblySource, OverlapExtensionPCRLigationSource, InFusionSource, InVivoAssemblySource],
     minimal_homology: int = Query(
         40, description='The minimum homology between consecutive fragments in the assembly.'
@@ -450,7 +452,7 @@ async def gibson_assembly(
 )
 async def restriction_and_ligation(
     source: RestrictionAndLigationSource,
-    sequences: conlist(TextFileSequence, min_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
     allow_partial_overlap: bool = Query(False, description='Allow for partially overlapping sticky ends.'),
     circular_only: bool = Query(False, description='Only return circular assemblies.'),
 ):
@@ -492,7 +494,7 @@ async def restriction_and_ligation(
 )
 async def gateway(
     source: GatewaySource,
-    sequences: conlist(TextFileSequence, min_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1)],
     circular_only: bool = Query(False, description='Only return circular assemblies.'),
     only_multi_site: bool = Query(
         False, description='Only return assemblies where more than one site per sequence recombined.'
@@ -537,7 +539,7 @@ async def gateway(
         multi_site_sources = [
             i
             for i, s in enumerate(resp['sources'])
-            if all(join.left_location != join.right_location for join in s.assembly)
+            if all(join.left_location != join.right_location for join in s.input)
         ]
         sources = [resp['sources'][i] for i in multi_site_sources]
         sequences = [resp['sequences'][i] for i in multi_site_sources]
@@ -554,7 +556,9 @@ async def gateway(
         sequences=(list[TextFileSequence], ...),
     ),
 )
-async def cre_lox_recombination(source: CreLoxRecombinationSource, sequences: conlist(TextFileSequence, min_length=1)):
+async def cre_lox_recombination(
+    source: CreLoxRecombinationSource, sequences: Annotated[list[TextFileSequence], Field(min_length=1)]
+):
     fragments = [read_dsrecord_from_json(seq) for seq in sequences]
     # Lambda function for code clarity

opencloning/endpoints/no_assembly.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import Query, HTTPException
 from pydna.dseqrecord import Dseqrecord
-from pydantic import conlist, create_model
+from pydantic import create_model, Field
 from typing import Annotated
 from Bio.Restriction import RestrictionBatch
@@ -30,7 +30,7 @@ router = get_router()
 )
 async def restriction(
     source: RestrictionEnzymeDigestionSource,
-    sequences: conlist(TextFileSequence, min_length=1, max_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
     restriction_enzymes: Annotated[list[str], Query(default_factory=list)],
 ):
     # There should be 1 or 2 enzymes in the request if the source does not have cuts
@@ -53,7 +53,10 @@ async def restriction(
     cutsites = seqr.seq.get_cutsites(*enzymes)
     cutsite_pairs = seqr.seq.get_cutsite_pairs(cutsites)
-    sources = [RestrictionEnzymeDigestionSource.from_cutsites(*p, source.input, source.id) for p in cutsite_pairs]
+    sources = [
+        RestrictionEnzymeDigestionSource.from_cutsites(*p, [{'sequence': sequences[0].id}], source.id)
+        for p in cutsite_pairs
+    ]
     all_enzymes = set(enzyme for s in sources for enzyme in s.get_enzymes())
     enzymes_not_cutting = set(restriction_enzymes) - set(all_enzymes)
@@ -90,7 +93,7 @@ async def restriction(
 )
 async def polymerase_extension(
     source: PolymeraseExtensionSource,
-    sequences: conlist(TextFileSequence, min_length=1, max_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
 ):
     """Return the sequence from a polymerase extension reaction"""
@@ -117,7 +120,7 @@ async def polymerase_extension(
 )
 async def reverse_complement(
     source: ReverseComplementSource,
-    sequences: conlist(TextFileSequence, min_length=1, max_length=1),
+    sequences: Annotated[list[TextFileSequence], Field(min_length=1, max_length=1)],
 ):
     dseq = read_dsrecord_from_json(sequences[0])
     out_sequence = dseq.reverse_complement()

opencloning/endpoints/no_input.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from fastapi import Query, HTTPException
 from pydna.dseqrecord import Dseqrecord
 from pydna.dseq import Dseq
-from pydantic import conlist, create_model
+from pydantic import create_model, Field
+from typing import Annotated
 from ..dna_functions import (
     format_sequence_genbank,
@@ -12,6 +13,7 @@ from ..pydantic_models import (
     TextFileSequence,
     ManuallyTypedSource,
     OligoHybridizationSource,
+    SourceInput,
 )
 from .. import request_examples
@@ -54,11 +56,16 @@ async def manually_typed(source: ManuallyTypedSource):
 )
 async def oligonucleotide_hybridization(
     source: OligoHybridizationSource,
-    primers: conlist(PrimerModel, min_length=1, max_length=2),
+    primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
     minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
 ):
-    watson_seq = next((p.sequence for p in primers if p.id == source.forward_oligo), None)
-    crick_seq = next((p.sequence for p in primers if p.id == source.reverse_oligo), None)
+    if len(source.input):
+        watson_seq = next((p.sequence for p in primers if p.id == source.input[0].sequence), None)
+        crick_seq = next((p.sequence for p in primers if p.id == source.input[1].sequence), None)
+    else:
+        watson_seq = primers[0].sequence
+        crick_seq = primers[1].sequence if len(primers) > 1 else watson_seq
+        source.input = [SourceInput(sequence=primers[0].id), SourceInput(sequence=primers[1].id)]
     if watson_seq is None or crick_seq is None:
         raise HTTPException(404, 'Invalid oligo id.')

opencloning/pydantic_models.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from pydantic import BaseModel, Field, model_validator, field_validator
-from typing import Optional, List
+from pydantic import BaseModel, Field, model_validator, field_validator, Discriminator, Tag
+from typing import Optional, List, Union, Annotated
 from pydantic_core import core_schema
 from ._version import __version__
@@ -49,8 +49,9 @@ from opencloning_linkml.datamodel import (
     SEVASource as _SEVASource,
     CreLoxRecombinationSource as _CreLoxRecombinationSource,
     InVivoAssemblySource as _InVivoAssemblySource,
+    SourceInput as _SourceInput,
 )
-from .assembly2 import (
+from pydna.assembly2 import (
     edge_representation2subfragment_representation,
     subfragment_representation2edge_representation,
 )
@@ -64,6 +65,10 @@ class TextFileSequence(_TextFileSequence):
     pass
+class SourceInput(_SourceInput):
+    pass
 class PrimerModel(_Primer):
     """Called PrimerModel not to be confused with the class from pydna."""
@@ -94,8 +99,23 @@ class SeqFeatureModel(BaseModel):
 # Sources =========================================
-class SourceCommonClass:
-    input: Optional[List[int]] = Field(
+def input_discriminator(v) -> str | None:
+    """
+    Discriminator that yields SourceInput by default
+    """
+    if isinstance(v, dict):
+        input_type = v.get('type', None)
+        if input_type is None:
+            return 'SourceInput'
+        else:
+            return input_type
+    elif isinstance(v, SourceInput):
+        return v.type
+    return None
+class SourceCommonClass(BaseModel):
+    input: Optional[List[SourceInput]] = Field(
         default_factory=list,
         description="""The sequences that are an input to this source. If the source represents external import of a sequence, it's empty.""",
         json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source']}},
@@ -292,7 +312,7 @@ class SequenceLocationStr(str):
         return cls.field_validator(value)
-class AssemblyFragment(_AssemblyFragment):
+class AssemblyFragment(_AssemblyFragment, SourceInput):
     left_location: Optional[SequenceLocationStr] = None
     right_location: Optional[SequenceLocationStr] = None
@@ -322,14 +342,26 @@ class AssemblyFragment(_AssemblyFragment):
 class AssemblySourceCommonClass(SourceCommonClass):
     # TODO: This is different in the LinkML model, because there it is not required,
     # and here we make it default to list.
-    assembly: List[AssemblyFragment] = Field(
-        default_factory=list, description="""The joins between the fragments in the assembly"""
+    input: Optional[
+        List[
+            Annotated[
+                Union[
+                    Annotated[SourceInput, Tag('SourceInput')],
+                    Annotated['AssemblyFragment', Tag('AssemblyFragment')],
+                ],
+                Discriminator(input_discriminator),
+            ]
+        ]
+    ] = Field(
+        default_factory=list,
+        description="""The inputs to this source. If the source represents external import of a sequence, it's empty.""",
+        json_schema_extra={'linkml_meta': {'alias': 'input', 'domain_of': ['Source'], 'slot_uri': 'schema:object'}},
     )
     def minimal_overlap(self):
         """Returns the minimal overlap between the fragments in the assembly"""
         all_overlaps = list()
-        for f in self.assembly:
+        for f in self.input:
             if f.left_location is not None:
                 all_overlaps.append(f.left_location.end - f.left_location.start)
             if f.right_location is not None:
@@ -338,9 +370,13 @@ class AssemblySourceCommonClass(SourceCommonClass):
     def get_assembly_plan(self, fragments: list[_SeqRecord]) -> tuple:
         """Returns the assembly plan"""
-        subf = [f.to_fragment_tuple(fragments) for f in self.assembly]
+        subf = [f.to_fragment_tuple(fragments) for f in self.input if f.type == 'AssemblyFragment']
         return subfragment_representation2edge_representation(subf, self.circular)
+    def is_assembly_complete(self) -> bool:
+        """Returns True if the assembly is complete"""
+        return any(f.type == 'AssemblyFragment' for f in self.input)
     @classmethod
     def from_assembly(
         cls,
@@ -353,7 +389,6 @@ class AssemblySourceCommonClass(SourceCommonClass):
         # Replace the positions with the actual ids
         fragment_ids = [int(f.id) for f in fragments]
-        input_ids = [int(f.id) for f in fragments if not isinstance(f, _PydnaPrimer)]
         # Here the ids are still the positions in the fragments list
         fragment_assembly_positions = edge_representation2subfragment_representation(assembly, circular)
@@ -368,8 +403,7 @@ class AssemblySourceCommonClass(SourceCommonClass):
         ]
         return cls(
             id=id,
-            input=input_ids,
-            assembly=assembly_fragments,
+            input=assembly_fragments,
             circular=circular,
             **kwargs,
         )
@@ -428,7 +462,9 @@ class CRISPRSource(AssemblySourceCommonClass, _CRISPRSource):
         fragments: list[_SeqRecord],
         guides: list[int],
     ):
-        return super().from_assembly(assembly, id, False, fragments, guides=guides)
+        source = super().from_assembly(assembly, id, False, fragments)
+        source.input += [SourceInput(sequence=guide) for guide in guides]
+        return source
 class RestrictionAndLigationSource(AssemblySourceCommonClass, _RestrictionAndLigationSource):
@@ -486,17 +522,14 @@ class BaseCloningStrategy(_CloningStrategy):
         json_schema_extra={'linkml_meta': {'alias': 'backend_version', 'domain_of': ['CloningStrategy']}},
     )
-    def next_primer_id(self):
-        return max([p.id for p in self.primers], default=0) + 1
     def add_primer(self, primer: PrimerModel):
         if primer in self.primers:
             return
-        primer.id = self.next_primer_id()
+        primer.id = self.next_id()
         self.primers.append(primer)
-    def next_node_id(self):
-        return max([s.id for s in self.sources + self.sequences], default=0) + 1
+    def next_id(self):
+        return max([s.id for s in self.sources + self.sequences + self.primers], default=0) + 1
     def add_source_and_sequence(self, source: SourceCommonClass, sequence: TextFileSequence):
         if source in self.sources:
@@ -505,11 +538,11 @@ class BaseCloningStrategy(_CloningStrategy):
                     f"Source {source.id} already exists in the cloning strategy, but sequence {sequence.id} it's not its output."
                 )
             return
-        source.id = self.next_node_id()
+        new_id = self.next_id()
+        source.id = new_id
         self.sources.append(source)
-        sequence.id = self.next_node_id()
+        sequence.id = new_id
         self.sequences.append(sequence)
-        source.output = sequence.id
     def all_children_source_ids(self, source_id: int, source_children: list | None = None) -> list[int]:
         """Returns the ids of all source children ids of a source"""
@@ -517,7 +550,7 @@ class BaseCloningStrategy(_CloningStrategy):
         if source_children is None:
             source_children = []
-        sources_that_take_output_as_input = [s for s in self.sources if source.output in s.input]
+        sources_that_take_output_as_input = [s for s in self.sources if source.id in [inp.sequence for inp in s.input]]
         new_source_ids = [s.id for s in sources_that_take_output_as_input]
         source_children.extend(new_source_ids)

opencloning/request_examples.py CHANGED Viewed

@@ -66,10 +66,10 @@ oligonucleotide_hybridization_examples = {
         'value': {
             'source': {
                 'id': 1,
-                'input': [],
-                'output': 0,
-                'forward_oligo': 2,
-                'reverse_oligo': 3,
+                'input': [
+                    {'sequence': 2},
+                    {'sequence': 3},
+                ],
             },
             'primers': [
                 {'id': 2, 'name': 'primer1', 'sequence': 'aaGCGGCCGCgtagaactttatgtgcttccttacattggt'},

opencloning 0.3.8__py3-none-any.whl → 0.4.2__py3-none-any.whl

opencloning 0.3.8py3-none-any.whl → 0.4.2py3-none-any.whl