opencloning 0.2.8__py3-none-any.whl → 0.2.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
opencloning/cre_lox.py CHANGED
@@ -1,29 +1,58 @@
1
1
  from itertools import product
2
2
  from pydna.dseqrecord import Dseqrecord
3
-
3
+ from Bio.Data.IUPACData import ambiguous_dna_values
4
+ from Bio.Seq import reverse_complement
4
5
  from .dna_utils import compute_regex_site, dseqrecord_finditer
5
6
 
7
+ # We create a dictionary to map ambiguous bases to their consensus base
8
+ # For example, ambigous_base_dict['ACGT'] -> 'N'
9
+ ambiguous_base_dict = {}
10
+ for ambiguous, bases in ambiguous_dna_values.items():
11
+ ambiguous_base_dict[''.join(sorted(bases))] = ambiguous
12
+
13
+ # To handle N values
14
+ ambiguous_base_dict['N'] = 'N'
15
+
6
16
  # This is the original loxP sequence, here for reference
7
17
  LOXP_SEQUENCE = 'ATAACTTCGTATAGCATACATTATACGAAGTTAT'
8
18
 
9
- # This is a consensus sequence, from this Addgene blog post: https://blog.addgene.org/plasmids-101-cre-lox
10
- # IMPORTANT: Because it is palyndromic, we only look for it in the forward direction, if this was changed
11
- # to a non-palindromic sequence, you would need to look for matches reversing it, like in Gateway cloning
12
- LOXP_CONSENSUS = 'ATAACTTCGTATANNNTANNNTATACGAAGTTAT'
19
+ loxP_sequences = [
20
+ # https://blog.addgene.org/plasmids-101-cre-lox
21
+ # loxP
22
+ 'ATAACTTCGTATANNNTANNNTATACGAAGTTAT',
23
+ # PMID:12202778
24
+ # lox66
25
+ 'ATAACTTCGTATANNNTANNNTATACGAACGGTA',
26
+ # lox71
27
+ 'TACCGTTCGTATANNNTANNNTATACGAAGTTAT',
28
+ ]
29
+
30
+ loxP_consensus = ''
13
31
 
32
+ for pos in range(len(LOXP_SEQUENCE)):
33
+ all_letters = set(seq[pos] for seq in loxP_sequences)
34
+ key = ''.join(sorted(all_letters))
35
+ loxP_consensus += ambiguous_base_dict[key]
14
36
 
15
- loxP_regex = compute_regex_site(LOXP_CONSENSUS)
37
+ # We compute the regex for the forward and reverse loxP sequences
38
+ loxP_regex = (compute_regex_site(loxP_consensus), compute_regex_site(reverse_complement(loxP_consensus)))
16
39
 
17
40
 
18
41
  def cre_loxP_overlap(x: Dseqrecord, y: Dseqrecord, _l: None = None) -> list[tuple[int, int, int]]:
19
42
  """Find matching loxP sites between two sequences."""
20
43
  out = list()
21
- matches_x = dseqrecord_finditer(loxP_regex, x)
22
- matches_y = dseqrecord_finditer(loxP_regex, y)
23
-
24
- for match_x, match_y in product(matches_x, matches_y):
25
- value_x = match_x.group()
26
- value_y = match_y.group()
27
- if value_x == value_y:
28
- out.append((match_x.start(), match_y.start(), len(value_x)))
29
- return out
44
+ for pattern in loxP_regex:
45
+ matches_x = dseqrecord_finditer(pattern, x)
46
+ matches_y = dseqrecord_finditer(pattern, y)
47
+
48
+ for match_x, match_y in product(matches_x, matches_y):
49
+ value_x = match_x.group()
50
+ value_y = match_y.group()
51
+ if value_x[13:21] == value_y[13:21]:
52
+ out.append((match_x.start() + 13, match_y.start() + 13, 8))
53
+ # Unique values (keeping the order)
54
+ unique_out = []
55
+ for item in out:
56
+ if item not in unique_out:
57
+ unique_out.append(item)
58
+ return unique_out
@@ -8,6 +8,8 @@ from starlette.responses import RedirectResponse
8
8
  from Bio import BiopythonParserWarning
9
9
  from typing import Annotated
10
10
  from urllib.error import HTTPError
11
+ from pydna.utils import location_boundaries
12
+
11
13
  from ..get_router import get_router
12
14
  from ..pydantic_models import (
13
15
  TextFileSequence,
@@ -22,6 +24,7 @@ from ..pydantic_models import (
22
24
  GenomeCoordinatesSource,
23
25
  SequenceFileFormat,
24
26
  SEVASource,
27
+ SimpleSequenceLocation,
25
28
  )
26
29
  from ..dna_functions import (
27
30
  format_sequence_genbank,
@@ -51,13 +54,13 @@ router = get_router()
51
54
  'description': 'The sequence was successfully parsed',
52
55
  'headers': {
53
56
  'x-warning': {
54
- 'description': 'A warning returned if the file can be read but is not in the expected format',
57
+ 'description': 'A warning returned if the file can be read but is not in the expected format or if some sequences were not extracted because they are incompatible with the provided coordinates',
55
58
  'schema': {'type': 'string'},
56
59
  },
57
60
  },
58
61
  },
59
62
  422: {
60
- 'description': 'Biopython cannot process this file.',
63
+ 'description': 'Biopython cannot process this file or provided coordinates are invalid.',
61
64
  },
62
65
  404: {
63
66
  'description': 'The index_in_file is out of range.',
@@ -83,6 +86,12 @@ async def read_from_file(
83
86
  None,
84
87
  description='Name of the output sequence',
85
88
  ),
89
+ start: int | None = Query(None, description='Start position of the sequence to read (0-based)', ge=0),
90
+ end: int | None = Query(
91
+ None,
92
+ description='End position of the sequence to read (0-based)',
93
+ ge=0,
94
+ ),
86
95
  ):
87
96
  """Return a json sequence from a sequence file"""
88
97
 
@@ -107,6 +116,7 @@ async def read_from_file(
107
116
  sequence_file_format = SequenceFileFormat(extension_dict[extension])
108
117
 
109
118
  dseqs = list()
119
+ warning_messages = list()
110
120
 
111
121
  file_content = await file.read()
112
122
  if sequence_file_format == 'snapgene':
@@ -124,7 +134,6 @@ async def read_from_file(
124
134
 
125
135
  if warnings_captured:
126
136
  warning_messages = [str(w.message) for w in warnings_captured]
127
- response.headers['x-warning'] = '; '.join(warning_messages)
128
137
 
129
138
  except ValueError as e:
130
139
  raise HTTPException(422, f'Biopython cannot process this file: {e}.')
@@ -134,25 +143,62 @@ async def read_from_file(
134
143
  if len(dseqs) == 0:
135
144
  raise HTTPException(422, 'Biopython cannot process this file.')
136
145
 
146
+ if index_in_file is not None:
147
+ if index_in_file >= len(dseqs):
148
+ raise HTTPException(404, 'The index_in_file is out of range.')
149
+ dseqs = [dseqs[index_in_file]]
150
+
151
+ seq_feature = None
152
+ if start is not None and end is not None:
153
+ seq_feature = SimpleSequenceLocation(start=start, end=end)
154
+ extracted_sequences = list()
155
+ for dseq in dseqs:
156
+ try:
157
+ # TODO: We could use extract when this is addressed: https://github.com/biopython/biopython/issues/4989
158
+ location = seq_feature.to_biopython_location(circular=dseq.circular, seq_len=len(dseq))
159
+ i, j = location_boundaries(location)
160
+ extracted_sequence = dseq[i:j]
161
+ # Only add the sequence if the interval is not out of bounds
162
+ if len(extracted_sequence) == len(location):
163
+ extracted_sequences.append(extracted_sequence)
164
+ else:
165
+ extracted_sequences.append(None)
166
+ except Exception:
167
+ extracted_sequences.append(None)
168
+ dseqs = extracted_sequences
169
+
137
170
  # The common part
138
- # TODO: using id=0 is not great
139
171
  parent_source = UploadedFileSource(
140
- id=0, sequence_file_format=sequence_file_format, file_name=file.filename, circularize=circularize
172
+ id=0,
173
+ sequence_file_format=sequence_file_format,
174
+ file_name=file.filename,
175
+ circularize=circularize,
176
+ coordinates=seq_feature,
141
177
  )
178
+
179
+ # If coordinates are provided, we only keep the sequences compatible with those coordinates
142
180
  out_sources = list()
181
+ out_sequences = list()
143
182
  for i in range(len(dseqs)):
183
+ if dseqs[i] is None:
184
+ continue
144
185
  new_source = parent_source.model_copy()
145
- new_source.index_in_file = i
186
+ new_source.index_in_file = index_in_file if index_in_file is not None else i
146
187
  out_sources.append(new_source)
188
+ out_sequences.append(format_sequence_genbank(dseqs[i], output_name))
147
189
 
148
- out_sequences = [format_sequence_genbank(s, output_name) for s in dseqs]
190
+ if len(out_sequences) == 0:
191
+ raise HTTPException(422, 'Provided coordinates are incompatible with sequences in the file.')
149
192
 
150
- if index_in_file is not None:
151
- if index_in_file >= len(out_sources):
152
- raise HTTPException(404, 'The index_in_file is out of range.')
153
- return {'sequences': [out_sequences[index_in_file]], 'sources': [out_sources[index_in_file]]}
154
- else:
155
- return {'sequences': out_sequences, 'sources': out_sources}
193
+ if len(out_sequences) < len(dseqs):
194
+ warning_messages.append(
195
+ 'Some sequences were not extracted because they are incompatible with the provided coordinates.'
196
+ )
197
+
198
+ if len(warning_messages) > 0:
199
+ response.headers['x-warning'] = '; '.join(warning_messages)
200
+
201
+ return {'sequences': out_sequences, 'sources': out_sources}
156
202
 
157
203
 
158
204
  # TODO: a bit inconsistent that here you don't put {source: {...}} in the request, but
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: opencloning
3
- Version: 0.2.8
3
+ Version: 0.2.8.1
4
4
  Summary: Backend of OpenCloning, a web application to generate molecular cloning strategies in json format, and share them with others.
5
5
  License: MIT
6
6
  Author: Manuel Lera-Ramirez
@@ -15,7 +15,7 @@ Requires-Dist: beautifulsoup4 (>=4.11.1,<5.0.0)
15
15
  Requires-Dist: biopython (==1.84)
16
16
  Requires-Dist: fastapi
17
17
  Requires-Dist: httpx (>=0.25.0,<0.26.0)
18
- Requires-Dist: opencloning-linkml (==0.2.6a0)
18
+ Requires-Dist: opencloning-linkml (==0.2.6.1a0)
19
19
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
20
20
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
21
21
  Requires-Dist: primer3-py (>=2.0.3,<3.0.0)
@@ -19,7 +19,7 @@ opencloning/batch_cloning/pombe/pombe_summary.py,sha256=W9DLpnCuwK7w2DhHLu60N7L6
19
19
  opencloning/batch_cloning/ziqiang_et_al2024/__init__.py,sha256=zZUbj3uMzd9rKMXi5s9LQ1yUg7sccdS0f_4kpw7SQlk,7584
20
20
  opencloning/batch_cloning/ziqiang_et_al2024/index.html,sha256=EDncANDhhQkhi5FjnnAP6liHkG5srf4_Y46IrnMUG5g,4607
21
21
  opencloning/batch_cloning/ziqiang_et_al2024/ziqiang_et_al2024.json,sha256=mB81j2qWam7uRc-980YFjfqq2CiWTXJYfKFAoKuGtRw,157148
22
- opencloning/cre_lox.py,sha256=ocPx3EVkecoZjHx_ENhk5pEteRXRtiN5z5URmrIcCPw,1194
22
+ opencloning/cre_lox.py,sha256=mb2ZddjrPIrUBT3xxMub5-c97WkKZ4Z-HkGFVzuR8pQ,2031
23
23
  opencloning/dna_functions.py,sha256=W-SxEfvYpN1JVZbTeCNitpQXkazEHvFyqZBUndd-jpY,16329
24
24
  opencloning/dna_utils.py,sha256=uv97aO04dbk3NnqbN6GlnwOu0MOpK88rl2np2QcEQ4Y,6301
25
25
  opencloning/ebic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,7 +27,7 @@ opencloning/ebic/primer_design.py,sha256=gPZTF9w5SV7WGgnefp_HBM831y0z73M1Kb0QUPn
27
27
  opencloning/ebic/primer_design_settings.py,sha256=OnFsuh0QCvplUEPXLZouzRo9R7rm4nLbcd2LkDCiIDM,1896
28
28
  opencloning/endpoints/annotation.py,sha256=3rlIXeNQzoqPD9lJUEBGLGxvlhUCTcfkqno814A8P0U,2283
29
29
  opencloning/endpoints/assembly.py,sha256=H1b7CRx1JZ5pcUGd3uyJG2syYugkXiIo8HRCA11TQfE,20704
30
- opencloning/endpoints/external_import.py,sha256=dDG7DiNb8WYE46nLGnkyRbGVVNUDXp3h0_1ixsJAh5o,16242
30
+ opencloning/endpoints/external_import.py,sha256=DG8WSvyvr-9xy-odEwLHHA4FWiIh8sw4DvTblw5NCYc,18179
31
31
  opencloning/endpoints/no_assembly.py,sha256=NY6rhEDCNoZVn6Xk81cen2n-FkMr7ierfxM8G0npbQs,4722
32
32
  opencloning/endpoints/no_input.py,sha256=DuqKD3Ph3a44ZxPMEzZv1nwD5xlxYsN7YyxXcfjSUFc,3844
33
33
  opencloning/endpoints/other.py,sha256=TzfCJLDmZFWeKYxKhEfXOvlQrWWyBIGJ5FR0yA7tuvI,1673
@@ -40,7 +40,7 @@ opencloning/primer_design.py,sha256=nqCmYIZ7UvU4CQwVGJwX7T5LTHwt3-51_ZcTZZAgT_Y,
40
40
  opencloning/pydantic_models.py,sha256=gsipVXhjQOXVz2NL-MiNpLuOZYDVo2Pli9F--bp6tjs,15345
41
41
  opencloning/request_examples.py,sha256=QAsJxVaq5tHwlPB404IiJ9WC6SA7iNY7XnJm63BWT_E,2944
42
42
  opencloning/utils.py,sha256=wsdTJYliap-t3oa7yQE3pWDa1CR19mr5lUQfocp4hoM,1875
43
- opencloning-0.2.8.dist-info/LICENSE,sha256=VSdVE1f8axjIh6gvo9ZZygJdTVkRFMcwCW_hvjOHC_w,1058
44
- opencloning-0.2.8.dist-info/METADATA,sha256=0kyQ2RhJcsCrkjRR6usNPg4LswxSYq71A61MY0ro0Yw,8425
45
- opencloning-0.2.8.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
46
- opencloning-0.2.8.dist-info/RECORD,,
43
+ opencloning-0.2.8.1.dist-info/LICENSE,sha256=VSdVE1f8axjIh6gvo9ZZygJdTVkRFMcwCW_hvjOHC_w,1058
44
+ opencloning-0.2.8.1.dist-info/METADATA,sha256=rp3mHAG3x49YfumIjM5teZL6iAtRbhQG1tl64bOjPfI,8429
45
+ opencloning-0.2.8.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
46
+ opencloning-0.2.8.1.dist-info/RECORD,,