opencloning 0.4.8__py3-none-any.whl → 0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencloning/app_settings.py +7 -0
- opencloning/batch_cloning/pombe/__init__.py +2 -2
- opencloning/batch_cloning/pombe/pombe_clone.py +31 -112
- opencloning/batch_cloning/pombe/pombe_summary.py +20 -8
- opencloning/batch_cloning/ziqiang_et_al2024/__init__.py +8 -8
- opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json +2 -9
- opencloning/bug_fixing/backend_v0_3.py +13 -5
- opencloning/catalogs/__init__.py +36 -0
- opencloning/catalogs/igem2024.yaml +2172 -0
- opencloning/catalogs/openDNA_collections.yaml +1161 -0
- opencloning/catalogs/readme.txt +1 -0
- opencloning/catalogs/seva.tsv +231 -0
- opencloning/catalogs/snapgene.yaml +2837 -0
- opencloning/dna_functions.py +155 -158
- opencloning/dna_utils.py +45 -62
- opencloning/ebic/primer_design.py +1 -1
- opencloning/endpoints/annotation.py +9 -13
- opencloning/endpoints/assembly.py +157 -378
- opencloning/endpoints/endpoint_utils.py +52 -0
- opencloning/endpoints/external_import.py +169 -124
- opencloning/endpoints/no_assembly.py +23 -39
- opencloning/endpoints/no_input.py +32 -47
- opencloning/endpoints/other.py +1 -1
- opencloning/endpoints/primer_design.py +2 -1
- opencloning/http_client.py +2 -2
- opencloning/ncbi_requests.py +113 -47
- opencloning/primer_design.py +1 -1
- opencloning/pydantic_models.py +10 -510
- opencloning/request_examples.py +10 -22
- opencloning/temp_functions.py +50 -0
- {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/METADATA +18 -8
- opencloning-0.5.dist-info/RECORD +51 -0
- {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/WHEEL +1 -1
- opencloning/cre_lox.py +0 -116
- opencloning/gateway.py +0 -154
- opencloning-0.4.8.dist-info/RECORD +0 -45
- {opencloning-0.4.8.dist-info → opencloning-0.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
from fastapi import Query, HTTPException
|
|
2
2
|
from pydna.dseqrecord import Dseqrecord
|
|
3
3
|
from pydna.dseq import Dseq
|
|
4
|
+
from pydna.primer import Primer as PydnaPrimer
|
|
5
|
+
from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization as _oligonucleotide_hybridization
|
|
4
6
|
from pydantic import create_model, Field
|
|
5
7
|
from typing import Annotated
|
|
6
8
|
|
|
9
|
+
from opencloning.endpoints.endpoint_utils import format_products
|
|
10
|
+
|
|
7
11
|
from ..dna_functions import (
|
|
8
12
|
format_sequence_genbank,
|
|
9
|
-
oligonucleotide_hybridization_overhangs,
|
|
10
13
|
)
|
|
11
|
-
from
|
|
12
|
-
PrimerModel,
|
|
14
|
+
from opencloning_linkml.datamodel import (
|
|
15
|
+
Primer as PrimerModel,
|
|
13
16
|
TextFileSequence,
|
|
14
17
|
ManuallyTypedSource,
|
|
15
18
|
OligoHybridizationSource,
|
|
16
|
-
|
|
19
|
+
ManuallyTypedSequence,
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
from .. import request_examples
|
|
@@ -28,14 +31,14 @@ router = get_router()
|
|
|
28
31
|
'ManuallyTypedResponse', sources=(list[ManuallyTypedSource], ...), sequences=(list[TextFileSequence], ...)
|
|
29
32
|
),
|
|
30
33
|
)
|
|
31
|
-
async def manually_typed(source: ManuallyTypedSource):
|
|
34
|
+
async def manually_typed(source: ManuallyTypedSource, sequence: ManuallyTypedSequence):
|
|
32
35
|
"""Return the sequence from a manually typed sequence"""
|
|
33
|
-
if
|
|
34
|
-
seq = Dseqrecord(
|
|
36
|
+
if sequence.circular:
|
|
37
|
+
seq = Dseqrecord(sequence.sequence, circular=sequence.circular)
|
|
35
38
|
else:
|
|
36
39
|
seq = Dseqrecord(
|
|
37
40
|
Dseq.from_full_sequence_and_overhangs(
|
|
38
|
-
|
|
41
|
+
sequence.sequence, sequence.overhang_crick_3prime, sequence.overhang_watson_3prime
|
|
39
42
|
)
|
|
40
43
|
)
|
|
41
44
|
return {'sequences': [format_sequence_genbank(seq, source.output_name)], 'sources': [source]}
|
|
@@ -59,57 +62,39 @@ async def oligonucleotide_hybridization(
|
|
|
59
62
|
primers: Annotated[list[PrimerModel], Field(min_length=1, max_length=2)],
|
|
60
63
|
minimal_annealing: int = Query(20, description='The minimal annealing length for each primer.'),
|
|
61
64
|
):
|
|
65
|
+
|
|
62
66
|
if len(source.input):
|
|
63
|
-
|
|
64
|
-
|
|
67
|
+
fwd_primer = next((p for p in primers if p.id == source.input[0].sequence), None)
|
|
68
|
+
rvs_primer = next((p for p in primers if p.id == source.input[1].sequence), None)
|
|
65
69
|
else:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
source.input = [SourceInput(sequence=primers[0].id), SourceInput(sequence=primers[1].id)]
|
|
70
|
+
fwd_primer = primers[0]
|
|
71
|
+
rvs_primer = primers[1] if len(primers) > 1 else fwd_primer
|
|
69
72
|
|
|
70
|
-
if
|
|
73
|
+
if fwd_primer is None or rvs_primer is None:
|
|
71
74
|
raise HTTPException(404, 'Invalid oligo id.')
|
|
72
75
|
|
|
73
|
-
|
|
76
|
+
fwd_primer = PydnaPrimer(fwd_primer.sequence, id=str(fwd_primer.id), name=fwd_primer.name)
|
|
77
|
+
rvs_primer = PydnaPrimer(rvs_primer.sequence, id=str(rvs_primer.id), name=rvs_primer.name)
|
|
78
|
+
|
|
79
|
+
# If the overhang is provided, the minimal annealing is set from that
|
|
74
80
|
if source.overhang_crick_3prime is not None:
|
|
75
|
-
ovhg_watson = len(
|
|
76
|
-
minimal_annealing = len(
|
|
81
|
+
ovhg_watson = len(fwd_primer.seq) - len(rvs_primer.seq) + source.overhang_crick_3prime
|
|
82
|
+
minimal_annealing = len(fwd_primer.seq)
|
|
77
83
|
if source.overhang_crick_3prime < 0:
|
|
78
84
|
minimal_annealing += source.overhang_crick_3prime
|
|
79
85
|
if ovhg_watson > 0:
|
|
80
86
|
minimal_annealing -= ovhg_watson
|
|
81
87
|
|
|
82
88
|
try:
|
|
83
|
-
|
|
89
|
+
dseqs = _oligonucleotide_hybridization(fwd_primer, rvs_primer, minimal_annealing)
|
|
84
90
|
except ValueError as e:
|
|
85
91
|
raise HTTPException(400, *e.args)
|
|
86
92
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
'sources': [source],
|
|
96
|
-
'sequences': [
|
|
97
|
-
format_sequence_genbank(
|
|
98
|
-
Dseqrecord(Dseq(watson_seq, crick_seq, source.overhang_crick_3prime)), source.output_name
|
|
99
|
-
)
|
|
100
|
-
],
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
out_sources = list()
|
|
104
|
-
out_sequences = list()
|
|
105
|
-
for overhang in possible_overhangs:
|
|
106
|
-
new_source = source.model_copy()
|
|
107
|
-
new_source.overhang_crick_3prime = overhang
|
|
108
|
-
out_sources.append(new_source)
|
|
109
|
-
out_sequences.append(
|
|
110
|
-
format_sequence_genbank(
|
|
111
|
-
Dseqrecord(Dseq(watson_seq, crick_seq, new_source.overhang_crick_3prime)), source.output_name
|
|
112
|
-
)
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
return {'sources': out_sources, 'sequences': out_sequences}
|
|
93
|
+
return format_products(
|
|
94
|
+
source.id,
|
|
95
|
+
dseqs,
|
|
96
|
+
source if source.overhang_crick_3prime is not None else None,
|
|
97
|
+
source.output_name,
|
|
98
|
+
no_products_error_message='No pair of annealing oligos was found. Try changing the annealing settings.',
|
|
99
|
+
wrong_completed_source_error_message='The provided source is not valid.',
|
|
100
|
+
)
|
opencloning/endpoints/other.py
CHANGED
|
@@ -13,9 +13,9 @@ from ..dna_functions import (
|
|
|
13
13
|
)
|
|
14
14
|
from ..dna_utils import align_sanger_traces
|
|
15
15
|
from ..pydantic_models import (
|
|
16
|
-
TextFileSequence,
|
|
17
16
|
BaseCloningStrategy,
|
|
18
17
|
)
|
|
18
|
+
from opencloning_linkml.datamodel import TextFileSequence
|
|
19
19
|
from ..get_router import get_router
|
|
20
20
|
from .._version import __version__ as backend_version
|
|
21
21
|
|
|
@@ -5,7 +5,8 @@ from Bio.Restriction import RestrictionBatch
|
|
|
5
5
|
from Bio.SeqUtils import gc_fraction
|
|
6
6
|
|
|
7
7
|
from ..dna_functions import get_invalid_enzyme_names
|
|
8
|
-
from
|
|
8
|
+
from opencloning_linkml.datamodel import Primer as PrimerModel
|
|
9
|
+
from opencloning.pydantic_models import PrimerDesignQuery
|
|
9
10
|
from ..dna_functions import read_dsrecord_from_json
|
|
10
11
|
from ..primer_design import (
|
|
11
12
|
homologous_recombination_primers,
|
opencloning/http_client.py
CHANGED
|
@@ -7,7 +7,7 @@ from httpx import ( # noqa: F401
|
|
|
7
7
|
AsyncHTTPTransport,
|
|
8
8
|
Request,
|
|
9
9
|
)
|
|
10
|
-
from
|
|
10
|
+
from fastapi import HTTPException
|
|
11
11
|
import ssl
|
|
12
12
|
import certifi
|
|
13
13
|
from .app_settings import settings
|
|
@@ -22,7 +22,7 @@ class AllowedExternalUrlsTransport(AsyncHTTPTransport):
|
|
|
22
22
|
async def handle_async_request(self, request: Request) -> Response:
|
|
23
23
|
if any(str(request.url).startswith(url) for url in allowed_external_urls):
|
|
24
24
|
return await super().handle_async_request(request)
|
|
25
|
-
raise
|
|
25
|
+
raise HTTPException(403, f'Request to {request.url} is not allowed')
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
proxy = None
|
opencloning/ncbi_requests.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from fastapi import HTTPException
|
|
2
|
-
|
|
2
|
+
import math
|
|
3
3
|
from pydna.dseqrecord import Dseqrecord
|
|
4
|
+
from pydna.opencloning_models import GenomeCoordinatesSource, NCBISequenceSource
|
|
5
|
+
from Bio.SeqFeature import Location
|
|
6
|
+
|
|
4
7
|
from .app_settings import settings
|
|
5
8
|
from .http_client import get_http_client, Response
|
|
6
9
|
|
|
@@ -9,7 +12,14 @@ headers = None if settings.NCBI_API_KEY is None else {'api_key': settings.NCBI_A
|
|
|
9
12
|
|
|
10
13
|
async def async_get(url, headers, params=None) -> Response:
|
|
11
14
|
async with get_http_client() as client:
|
|
12
|
-
|
|
15
|
+
resp = await client.get(url, headers=headers, params=params, timeout=20.0)
|
|
16
|
+
if resp.status_code == 500:
|
|
17
|
+
raise HTTPException(503, 'NCBI is down, try again later')
|
|
18
|
+
elif resp.status_code == 503:
|
|
19
|
+
raise HTTPException(503, 'NCBI returned an internal server error')
|
|
20
|
+
elif resp.status_code != 200 and not math.floor(resp.status_code / 100) == 4:
|
|
21
|
+
raise HTTPException(503, 'NCBI returned an unexpected error')
|
|
22
|
+
return resp
|
|
13
23
|
|
|
14
24
|
|
|
15
25
|
# TODO: this does not return old assembly accessions, see https://github.com/ncbi/datasets/issues/380#issuecomment-2231142816
|
|
@@ -43,23 +53,11 @@ async def get_sequence_accessions_from_assembly_accession(assembly_accession: st
|
|
|
43
53
|
|
|
44
54
|
|
|
45
55
|
async def get_annotation_from_locus_tag(locus_tag: str, assembly_accession: str) -> dict:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if
|
|
49
|
-
raise HTTPException(404, 'wrong accession number')
|
|
50
|
-
data = resp.json()
|
|
51
|
-
if 'reports' not in data:
|
|
52
|
-
raise HTTPException(404, 'wrong locus_tag')
|
|
53
|
-
|
|
54
|
-
matching_annotations = list(a['annotation'] for a in data['reports'] if a['annotation']['locus_tag'] == locus_tag)
|
|
55
|
-
|
|
56
|
-
if len(matching_annotations) == 0:
|
|
57
|
-
raise HTTPException(404, 'wrong locus_tag')
|
|
58
|
-
elif len(matching_annotations) > 1:
|
|
59
|
-
# Not sure if this can ever happen, but just in case
|
|
56
|
+
annotations = await get_annotations_from_query(locus_tag, assembly_accession)
|
|
57
|
+
locus_tag_annotations = [a for a in annotations if locus_tag.upper() in a['locus_tag'].upper()]
|
|
58
|
+
if len(locus_tag_annotations) != 1:
|
|
60
59
|
raise HTTPException(400, 'multiple matches for locus_tag')
|
|
61
|
-
|
|
62
|
-
return matching_annotations[0]
|
|
60
|
+
return locus_tag_annotations[0]
|
|
63
61
|
|
|
64
62
|
|
|
65
63
|
async def get_annotations_from_query(query: str, assembly_accession: str) -> list[dict]:
|
|
@@ -72,9 +70,6 @@ async def get_annotations_from_query(query: str, assembly_accession: str) -> lis
|
|
|
72
70
|
if 'reports' not in data:
|
|
73
71
|
raise HTTPException(404, f'query "{query}" gave no results')
|
|
74
72
|
|
|
75
|
-
if len(data['reports']) > 1:
|
|
76
|
-
raise HTTPException(400, 'multiple matches for query')
|
|
77
|
-
|
|
78
73
|
return [r['annotation'] for r in data['reports']]
|
|
79
74
|
|
|
80
75
|
|
|
@@ -94,6 +89,12 @@ async def get_sequence_length_from_sequence_accession(sequence_accession: str) -
|
|
|
94
89
|
|
|
95
90
|
|
|
96
91
|
async def get_genbank_sequence(sequence_accession, start=None, end=None, strand=None) -> Dseqrecord:
|
|
92
|
+
from opencloning.dna_functions import get_sequences_from_file_url
|
|
93
|
+
|
|
94
|
+
# Ensure that start, end, and strand are either all None or none are None
|
|
95
|
+
if (start is None or end is None or strand is None) and not (start is None and end is None and strand is None):
|
|
96
|
+
raise ValueError('start, end, and strand must either all be None or none be None')
|
|
97
|
+
|
|
97
98
|
gb_strand = 1 if strand == 1 or strand is None else 2
|
|
98
99
|
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
|
|
99
100
|
params = {
|
|
@@ -108,30 +109,95 @@ async def get_genbank_sequence(sequence_accession, start=None, end=None, strand=
|
|
|
108
109
|
if headers is not None:
|
|
109
110
|
params['api_key'] = headers['api_key']
|
|
110
111
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
raise
|
|
122
|
-
|
|
123
|
-
|
|
112
|
+
try:
|
|
113
|
+
seq = (await get_sequences_from_file_url(url, params=params, headers=headers, get_function=async_get))[0]
|
|
114
|
+
except HTTPException as e:
|
|
115
|
+
# Now the ncbi returns something like this:
|
|
116
|
+
# Example: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=blah&rettype=gbwithparts&retmode=text
|
|
117
|
+
# 'Error: F a i l e d t o u n d e r s t a n d i d : b l a h '
|
|
118
|
+
if 'No sequences found in file' in e.detail:
|
|
119
|
+
raise HTTPException(404, 'invalid sequence accession') from e
|
|
120
|
+
raise e
|
|
121
|
+
except Exception as e:
|
|
122
|
+
raise e
|
|
123
|
+
|
|
124
|
+
if start is not None:
|
|
125
|
+
if strand == -1:
|
|
126
|
+
location = Location.fromstring(f'complement({start}..{end})')
|
|
127
|
+
else:
|
|
128
|
+
location = Location.fromstring(f'{start}..{end}')
|
|
124
129
|
else:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if
|
|
135
|
-
|
|
136
|
-
if
|
|
137
|
-
|
|
130
|
+
location = None
|
|
131
|
+
|
|
132
|
+
seq.source = NCBISequenceSource(repository_id=sequence_accession, coordinates=location)
|
|
133
|
+
return seq
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def get_info_from_annotation(annotation: dict) -> dict:
|
|
137
|
+
start = int(annotation['genomic_regions'][0]['gene_range']['range'][0]['begin'])
|
|
138
|
+
end = int(annotation['genomic_regions'][0]['gene_range']['range'][0]['end'])
|
|
139
|
+
strand = 1 if annotation['genomic_regions'][0]['gene_range']['range'][0]['orientation'] == 'plus' else -1
|
|
140
|
+
sequence_accession = annotation['genomic_regions'][0]['gene_range']['accession_version']
|
|
141
|
+
locus_tag = annotation['locus_tag'] if 'locus_tag' in annotation else None
|
|
142
|
+
gene_id = int(annotation['gene_id']) if 'gene_id' in annotation else None
|
|
143
|
+
try:
|
|
144
|
+
assembly_accession = annotation['annotations'][0]['assembly_accession']
|
|
145
|
+
except KeyError:
|
|
146
|
+
assembly_accession = None
|
|
147
|
+
except IndexError:
|
|
148
|
+
assembly_accession = None
|
|
149
|
+
|
|
150
|
+
return start, end, strand, gene_id, sequence_accession, locus_tag, assembly_accession
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def validate_locus_tag(
|
|
154
|
+
locus_tag: str, assembly_accession: str, gene_id: int | None, start: int, end: int, strand: int
|
|
155
|
+
) -> int:
|
|
156
|
+
"""
|
|
157
|
+
Validate that the locus tag exists in the assembly and that the gene falls within the requested coordinates.
|
|
158
|
+
Returns gene_id for convenience.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
annotation = await get_annotation_from_locus_tag(locus_tag, assembly_accession)
|
|
162
|
+
gene_start, gene_end, gene_strand, gene_id_annotation, *_ = get_info_from_annotation(annotation)
|
|
163
|
+
|
|
164
|
+
# This field will not be present in all cases, but should be there in reference genomes
|
|
165
|
+
if gene_id is not None:
|
|
166
|
+
if 'gene_id' not in annotation:
|
|
167
|
+
raise HTTPException(400, 'gene_id is set, but not found in the annotation')
|
|
168
|
+
if gene_id != gene_id_annotation:
|
|
169
|
+
raise HTTPException(400, 'gene_id does not match the locus_tag')
|
|
170
|
+
elif 'gene_id' in annotation:
|
|
171
|
+
gene_id = gene_id_annotation
|
|
172
|
+
|
|
173
|
+
# The gene should fall within the range (range might be bigger if bases were requested upstream or downstream)
|
|
174
|
+
if gene_start < start or gene_end > end or gene_strand != strand:
|
|
175
|
+
raise HTTPException(
|
|
176
|
+
400,
|
|
177
|
+
f'wrong coordinates, the gene should fall within the requested coordinates, {start}, {end} on strand: {strand}',
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return gene_id
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
async def get_genome_region_from_annotation(
|
|
184
|
+
annotation: dict, padding_left: int = 0, padding_right: int = 0
|
|
185
|
+
) -> Dseqrecord:
|
|
186
|
+
start, end, strand, gene_id, sequence_accession, locus_tag, assembly_accession = get_info_from_annotation(
|
|
187
|
+
annotation
|
|
188
|
+
)
|
|
189
|
+
start = start - padding_left
|
|
190
|
+
end = end + padding_right
|
|
191
|
+
seq = await get_genbank_sequence(sequence_accession, start, end, strand)
|
|
192
|
+
location_str = f'{start}..{end}' if strand != -1 else f'complement({start}..{end})'
|
|
193
|
+
coordinates = Location.fromstring(location_str)
|
|
194
|
+
source = GenomeCoordinatesSource(
|
|
195
|
+
assembly_accession=assembly_accession,
|
|
196
|
+
repository_id=sequence_accession,
|
|
197
|
+
coordinates=coordinates,
|
|
198
|
+
locus_tag=locus_tag,
|
|
199
|
+
gene_id=gene_id,
|
|
200
|
+
)
|
|
201
|
+
seq.name = locus_tag
|
|
202
|
+
seq.source = source
|
|
203
|
+
return seq
|
opencloning/primer_design.py
CHANGED
|
@@ -3,12 +3,12 @@ from pydna.design import primer_design, assembly_fragments
|
|
|
3
3
|
from Bio.SeqFeature import SimpleLocation
|
|
4
4
|
from pydna.utils import locations_overlap, shift_location, location_boundaries
|
|
5
5
|
from pydna.amplicon import Amplicon
|
|
6
|
-
from .pydantic_models import PrimerModel
|
|
7
6
|
from Bio.Seq import reverse_complement
|
|
8
7
|
from Bio.Restriction.Restriction import RestrictionType
|
|
9
8
|
from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
|
|
10
9
|
from typing import Callable
|
|
11
10
|
from .primer3_functions import primer3_calc_tm, PrimerDesignSettings
|
|
11
|
+
from opencloning_linkml.datamodel import Primer as PrimerModel
|
|
12
12
|
|
|
13
13
|
ambiguous_dna_values = _ambiguous_dna_values.copy()
|
|
14
14
|
# Remove acgt
|