opencloning 0.2.8__py3-none-any.whl → 0.2.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
opencloning/cre_lox.py CHANGED
@@ -1,29 +1,58 @@
1
1
  from itertools import product
2
2
  from pydna.dseqrecord import Dseqrecord
3
-
3
+ from Bio.Data.IUPACData import ambiguous_dna_values
4
+ from Bio.Seq import reverse_complement
4
5
  from .dna_utils import compute_regex_site, dseqrecord_finditer
5
6
 
7
+ # We create a dictionary to map ambiguous bases to their consensus base
8
+ # For example, ambigous_base_dict['ACGT'] -> 'N'
9
+ ambiguous_base_dict = {}
10
+ for ambiguous, bases in ambiguous_dna_values.items():
11
+ ambiguous_base_dict[''.join(sorted(bases))] = ambiguous
12
+
13
+ # To handle N values
14
+ ambiguous_base_dict['N'] = 'N'
15
+
6
16
  # This is the original loxP sequence, here for reference
7
17
  LOXP_SEQUENCE = 'ATAACTTCGTATAGCATACATTATACGAAGTTAT'
8
18
 
9
- # This is a consensus sequence, from this Addgene blog post: https://blog.addgene.org/plasmids-101-cre-lox
10
- # IMPORTANT: Because it is palyndromic, we only look for it in the forward direction, if this was changed
11
- # to a non-palindromic sequence, you would need to look for matches reversing it, like in Gateway cloning
12
- LOXP_CONSENSUS = 'ATAACTTCGTATANNNTANNNTATACGAAGTTAT'
19
+ loxP_sequences = [
20
+ # https://blog.addgene.org/plasmids-101-cre-lox
21
+ # loxP
22
+ 'ATAACTTCGTATANNNTANNNTATACGAAGTTAT',
23
+ # PMID:12202778
24
+ # lox66
25
+ 'ATAACTTCGTATANNNTANNNTATACGAACGGTA',
26
+ # lox71
27
+ 'TACCGTTCGTATANNNTANNNTATACGAAGTTAT',
28
+ ]
29
+
30
+ loxP_consensus = ''
13
31
 
32
+ for pos in range(len(LOXP_SEQUENCE)):
33
+ all_letters = set(seq[pos] for seq in loxP_sequences)
34
+ key = ''.join(sorted(all_letters))
35
+ loxP_consensus += ambiguous_base_dict[key]
14
36
 
15
- loxP_regex = compute_regex_site(LOXP_CONSENSUS)
37
+ # We compute the regex for the forward and reverse loxP sequences
38
+ loxP_regex = (compute_regex_site(loxP_consensus), compute_regex_site(reverse_complement(loxP_consensus)))
16
39
 
17
40
 
18
41
  def cre_loxP_overlap(x: Dseqrecord, y: Dseqrecord, _l: None = None) -> list[tuple[int, int, int]]:
19
42
  """Find matching loxP sites between two sequences."""
20
43
  out = list()
21
- matches_x = dseqrecord_finditer(loxP_regex, x)
22
- matches_y = dseqrecord_finditer(loxP_regex, y)
23
-
24
- for match_x, match_y in product(matches_x, matches_y):
25
- value_x = match_x.group()
26
- value_y = match_y.group()
27
- if value_x == value_y:
28
- out.append((match_x.start(), match_y.start(), len(value_x)))
29
- return out
44
+ for pattern in loxP_regex:
45
+ matches_x = dseqrecord_finditer(pattern, x)
46
+ matches_y = dseqrecord_finditer(pattern, y)
47
+
48
+ for match_x, match_y in product(matches_x, matches_y):
49
+ value_x = match_x.group()
50
+ value_y = match_y.group()
51
+ if value_x[13:21] == value_y[13:21]:
52
+ out.append((match_x.start() + 13, match_y.start() + 13, 8))
53
+ # Unique values (keeping the order)
54
+ unique_out = []
55
+ for item in out:
56
+ if item not in unique_out:
57
+ unique_out.append(item)
58
+ return unique_out
@@ -241,16 +241,23 @@ def find_sequence_regex(pattern: str, seq: str, is_circular: bool) -> list[Locat
241
241
  def oligonucleotide_hybridization_overhangs(
242
242
  fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
243
243
  ) -> list[int]:
244
+ """
245
+ Returns possible overhangs between two oligos, and returns an error if mismatches are found.
246
+
247
+ see https://github.com/manulera/OpenCloning_backend/issues/302 for notation
248
+
249
+ """
244
250
  matches = common_sub_strings(fwd_oligo_seq.lower(), reverse_complement(rvs_oligo_seq.lower()), minimal_annealing)
245
251
 
246
- for m in matches:
247
- if not (
248
- (m[0] == 0 and m[1] + m[2] == len(fwd_oligo_seq)) or (m[1] == 0 and m[0] + m[2] == len(rvs_oligo_seq))
252
+ for pos_fwd, pos_rvs, length in matches:
253
+
254
+ if (pos_fwd != 0 and pos_rvs != 0) or (
255
+ pos_fwd + length < len(fwd_oligo_seq) and pos_rvs + length < len(rvs_oligo_seq)
249
256
  ):
250
257
  raise ValueError('The oligonucleotides can anneal with mismatches')
251
258
 
252
259
  # Return possible overhangs
253
- return [start_on_rvs - start_on_fwd for start_on_fwd, start_on_rvs, length in matches]
260
+ return [pos_rvs - pos_fwd for pos_fwd, pos_rvs, length in matches]
254
261
 
255
262
 
256
263
  class MyGenBankScanner(GenBankScanner):
@@ -8,6 +8,8 @@ from starlette.responses import RedirectResponse
8
8
  from Bio import BiopythonParserWarning
9
9
  from typing import Annotated
10
10
  from urllib.error import HTTPError
11
+ from pydna.utils import location_boundaries
12
+
11
13
  from ..get_router import get_router
12
14
  from ..pydantic_models import (
13
15
  TextFileSequence,
@@ -22,6 +24,7 @@ from ..pydantic_models import (
22
24
  GenomeCoordinatesSource,
23
25
  SequenceFileFormat,
24
26
  SEVASource,
27
+ SimpleSequenceLocation,
25
28
  )
26
29
  from ..dna_functions import (
27
30
  format_sequence_genbank,
@@ -51,13 +54,13 @@ router = get_router()
51
54
  'description': 'The sequence was successfully parsed',
52
55
  'headers': {
53
56
  'x-warning': {
54
- 'description': 'A warning returned if the file can be read but is not in the expected format',
57
+ 'description': 'A warning returned if the file can be read but is not in the expected format or if some sequences were not extracted because they are incompatible with the provided coordinates',
55
58
  'schema': {'type': 'string'},
56
59
  },
57
60
  },
58
61
  },
59
62
  422: {
60
- 'description': 'Biopython cannot process this file.',
63
+ 'description': 'Biopython cannot process this file or provided coordinates are invalid.',
61
64
  },
62
65
  404: {
63
66
  'description': 'The index_in_file is out of range.',
@@ -83,6 +86,12 @@ async def read_from_file(
83
86
  None,
84
87
  description='Name of the output sequence',
85
88
  ),
89
+ start: int | None = Query(None, description='Start position of the sequence to read (0-based)', ge=0),
90
+ end: int | None = Query(
91
+ None,
92
+ description='End position of the sequence to read (0-based)',
93
+ ge=0,
94
+ ),
86
95
  ):
87
96
  """Return a json sequence from a sequence file"""
88
97
 
@@ -107,6 +116,7 @@ async def read_from_file(
107
116
  sequence_file_format = SequenceFileFormat(extension_dict[extension])
108
117
 
109
118
  dseqs = list()
119
+ warning_messages = list()
110
120
 
111
121
  file_content = await file.read()
112
122
  if sequence_file_format == 'snapgene':
@@ -124,7 +134,6 @@ async def read_from_file(
124
134
 
125
135
  if warnings_captured:
126
136
  warning_messages = [str(w.message) for w in warnings_captured]
127
- response.headers['x-warning'] = '; '.join(warning_messages)
128
137
 
129
138
  except ValueError as e:
130
139
  raise HTTPException(422, f'Biopython cannot process this file: {e}.')
@@ -134,25 +143,62 @@ async def read_from_file(
134
143
  if len(dseqs) == 0:
135
144
  raise HTTPException(422, 'Biopython cannot process this file.')
136
145
 
146
+ if index_in_file is not None:
147
+ if index_in_file >= len(dseqs):
148
+ raise HTTPException(404, 'The index_in_file is out of range.')
149
+ dseqs = [dseqs[index_in_file]]
150
+
151
+ seq_feature = None
152
+ if start is not None and end is not None:
153
+ seq_feature = SimpleSequenceLocation(start=start, end=end)
154
+ extracted_sequences = list()
155
+ for dseq in dseqs:
156
+ try:
157
+ # TODO: We could use extract when this is addressed: https://github.com/biopython/biopython/issues/4989
158
+ location = seq_feature.to_biopython_location(circular=dseq.circular, seq_len=len(dseq))
159
+ i, j = location_boundaries(location)
160
+ extracted_sequence = dseq[i:j]
161
+ # Only add the sequence if the interval is not out of bounds
162
+ if len(extracted_sequence) == len(location):
163
+ extracted_sequences.append(extracted_sequence)
164
+ else:
165
+ extracted_sequences.append(None)
166
+ except Exception:
167
+ extracted_sequences.append(None)
168
+ dseqs = extracted_sequences
169
+
137
170
  # The common part
138
- # TODO: using id=0 is not great
139
171
  parent_source = UploadedFileSource(
140
- id=0, sequence_file_format=sequence_file_format, file_name=file.filename, circularize=circularize
172
+ id=0,
173
+ sequence_file_format=sequence_file_format,
174
+ file_name=file.filename,
175
+ circularize=circularize,
176
+ coordinates=seq_feature,
141
177
  )
178
+
179
+ # If coordinates are provided, we only keep the sequences compatible with those coordinates
142
180
  out_sources = list()
181
+ out_sequences = list()
143
182
  for i in range(len(dseqs)):
183
+ if dseqs[i] is None:
184
+ continue
144
185
  new_source = parent_source.model_copy()
145
- new_source.index_in_file = i
186
+ new_source.index_in_file = index_in_file if index_in_file is not None else i
146
187
  out_sources.append(new_source)
188
+ out_sequences.append(format_sequence_genbank(dseqs[i], output_name))
147
189
 
148
- out_sequences = [format_sequence_genbank(s, output_name) for s in dseqs]
190
+ if len(out_sequences) == 0:
191
+ raise HTTPException(422, 'Provided coordinates are incompatible with sequences in the file.')
149
192
 
150
- if index_in_file is not None:
151
- if index_in_file >= len(out_sources):
152
- raise HTTPException(404, 'The index_in_file is out of range.')
153
- return {'sequences': [out_sequences[index_in_file]], 'sources': [out_sources[index_in_file]]}
154
- else:
155
- return {'sequences': out_sequences, 'sources': out_sources}
193
+ if len(out_sequences) < len(dseqs):
194
+ warning_messages.append(
195
+ 'Some sequences were not extracted because they are incompatible with the provided coordinates.'
196
+ )
197
+
198
+ if len(warning_messages) > 0:
199
+ response.headers['x-warning'] = '; '.join(warning_messages)
200
+
201
+ return {'sequences': out_sequences, 'sources': out_sources}
156
202
 
157
203
 
158
204
  # TODO: a bit inconsistent that here you don't put {source: {...}} in the request, but
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: opencloning
3
- Version: 0.2.8
3
+ Version: 0.2.8.2
4
4
  Summary: Backend of OpenCloning, a web application to generate molecular cloning strategies in json format, and share them with others.
5
5
  License: MIT
6
6
  Author: Manuel Lera-Ramirez
@@ -15,7 +15,7 @@ Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
15
15
  Requires-Dist: biopython (==1.84)
16
16
  Requires-Dist: fastapi
17
17
  Requires-Dist: httpx (>=0.25.0,<0.26.0)
18
- Requires-Dist: opencloning-linkml (==0.2.6a0)
18
+ Requires-Dist: opencloning-linkml (==0.2.6.1a0)
19
19
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
20
20
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
21
21
  Requires-Dist: primer3-py (>=2.0.3,<3.0.0)
@@ -19,15 +19,15 @@ opencloning/batch_cloning/pombe/pombe_summary.py,sha256=W9DLpnCuwK7w2DhHLu60N7L6
19
19
  opencloning/batch_cloning/ziqiang_et_al2024/__init__.py,sha256=zZUbj3uMzd9rKMXi5s9LQ1yUg7sccdS0f_4kpw7SQlk,7584
20
20
  opencloning/batch_cloning/ziqiang_et_al2024/index.html,sha256=EDncANDhhQkhi5FjnnAP6liHkG5srf4_Y46IrnMUG5g,4607
21
21
  opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json,sha256=mB81j2qWam7uRc-980YFjfqq2CiWTXJYfKFAoKuGtRw,157148
22
- opencloning/cre_lox.py,sha256=ocPx3EVkecoZjHx_ENhk5pEteRXRtiN5z5URmrIcCPw,1194
23
- opencloning/dna_functions.py,sha256=W-SxEfvYpN1JVZbTeCNitpQXkazEHvFyqZBUndd-jpY,16329
22
+ opencloning/cre_lox.py,sha256=mb2ZddjrPIrUBT3xxMub5-c97WkKZ4Z-HkGFVzuR8pQ,2031
23
+ opencloning/dna_functions.py,sha256=ivepJM2wRTIW0ArSiQ5s-XuqBd69giEQijaWXXGT64E,16536
24
24
  opencloning/dna_utils.py,sha256=uv97aO04dbk3NnqbN6GlnwOu0MOpK88rl2np2QcEQ4Y,6301
25
25
  opencloning/ebic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  opencloning/ebic/primer_design.py,sha256=gPZTF9w5SV7WGgnefp_HBM831y0z73M1Kb0QUPnbfIM,2270
27
27
  opencloning/ebic/primer_design_settings.py,sha256=OnFsuh0QCvplUEPXLZouzRo9R7rm4nLbcd2LkDCiIDM,1896
28
28
  opencloning/endpoints/annotation.py,sha256=3rlIXeNQzoqPD9lJUEBGLGxvlhUCTcfkqno814A8P0U,2283
29
29
  opencloning/endpoints/assembly.py,sha256=H1b7CRx1JZ5pcUGd3uyJG2syYugkXiIo8HRCA11TQfE,20704
30
- opencloning/endpoints/external_import.py,sha256=dDG7DiNb8WYE46nLGnkyRbGVVNUDXp3h0_1ixsJAh5o,16242
30
+ opencloning/endpoints/external_import.py,sha256=DG8WSvyvr-9xy-odEwLHHA4FWiIh8sw4DvTblw5NCYc,18179
31
31
  opencloning/endpoints/no_assembly.py,sha256=NY6rhEDCNoZVn6Xk81cen2n-FkMr7ierfxM8G0npbQs,4722
32
32
  opencloning/endpoints/no_input.py,sha256=DuqKD3Ph3a44ZxPMEzZv1nwD5xlxYsN7YyxXcfjSUFc,3844
33
33
  opencloning/endpoints/other.py,sha256=TzfCJLDmZFWeKYxKhEfXOvlQrWWyBIGJ5FR0yA7tuvI,1673
@@ -40,7 +40,7 @@ opencloning/primer_design.py,sha256=nqCmYIZ7UvU4CQwVGJwX7T5LTHwt3-51_ZcTZZAgT_Y,
40
40
  opencloning/pydantic_models.py,sha256=gsipVXhjQOXVz2NL-MiNpLuOZYDVo2Pli9F--bp6tjs,15345
41
41
  opencloning/request_examples.py,sha256=QAsJxVaq5tHwlPB404IiJ9WC6SA7iNY7XnJm63BWT_E,2944
42
42
  opencloning/utils.py,sha256=wsdTJYliap-t3oa7yQE3pWDa1CR19mr5lUQfocp4hoM,1875
43
- opencloning-0.2.8.dist-info/LICENSE,sha256=VSdVE1f8axjIh6gvo9ZZygJdTVkRFMcwCW_hvjOHC_w,1058
44
- opencloning-0.2.8.dist-info/METADATA,sha256=0kyQ2RhJcsCrkjRR6usNPg4LswxSYq71A61MY0ro0Yw,8425
45
- opencloning-0.2.8.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
46
- opencloning-0.2.8.dist-info/RECORD,,
43
+ opencloning-0.2.8.2.dist-info/LICENSE,sha256=VSdVE1f8axjIh6gvo9ZZygJdTVkRFMcwCW_hvjOHC_w,1058
44
+ opencloning-0.2.8.2.dist-info/METADATA,sha256=OrcZ2VMjjkWI31tB4B1tEa6GgIxBFlOiGWxmA_8eK6A,8429
45
+ opencloning-0.2.8.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
46
+ opencloning-0.2.8.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any