primer3plus-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """primer3plus_core — headless Primer3Plus logic, no Flask dependency."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,281 @@
1
+ """Boulder-IO text processing: normalisation, tag injection, parsing, counting."""
2
+
3
+ import re as _re
4
+
5
+ from .config import (
6
+ RE_THERMO_PATH,
7
+ RE_LIB_PRIMER, RE_LIB_INTERNAL,
8
+ P3LIBFIX, P3LIBIFIX,
9
+ RE_LEFT_COUNT, RE_INTERNAL_COUNT, RE_RIGHT_COUNT,
10
+ )
11
+
12
+
13
+ def _clean_numeric(val: str, label: str) -> str:
14
+ """Strip non-numeric characters from *val*, rejecting negative signs.
15
+
16
+ Returns the cleaned string. Raises ``ValueError`` if *val* contains
17
+ a ``-`` (these fields are concentrations/temperatures that must be ≥ 0).
18
+ """
19
+ if "-" in val:
20
+ raise ValueError(label + " must not be negative.")
21
+ return _re.sub(r"[^0-9\.]", "", val)
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Input normalisation (step 2 of migration)
26
+ # ---------------------------------------------------------------------------
27
+
28
+ def normalize_newlines(text: str) -> str:
29
+ """Collapse \\r\\n and bare \\r to \\n."""
30
+ return text.replace("\r\n", "\n").replace("\r", "\n")
31
+
32
+
33
+ def strip_thermo_path(text: str) -> str:
34
+ """Remove the PRIMER_THERMODYNAMIC_PARAMETERS_PATH line."""
35
+ return RE_THERMO_PATH.sub("", text)
36
+
37
+
38
+ def inject_mispriming_libs(text: str) -> str:
39
+ """Replace PRIMER_MISPRIMING_LIBRARY / PRIMER_INTERNAL_MISHYB_LIBRARY
40
+ paths with the server-local mispriming_lib directory."""
41
+ text = RE_LIB_PRIMER.sub(P3LIBFIX, text)
42
+ text = RE_LIB_INTERNAL.sub(P3LIBIFIX, text)
43
+ return text
44
+
45
+
46
+ def prepare_input(indata: str) -> str:
47
+ """Full input-preparation pipeline: normalise → strip → inject."""
48
+ text = normalize_newlines(indata)
49
+ text = strip_thermo_path(text)
50
+ text = inject_mispriming_libs(text)
51
+ return text
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Output post-processing
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def append_run_metadata(output: str, uuidstr: str, error: str = "") -> str:
59
+ """Append P3P_UUID and (optionally) P3P_ERROR to Boulder-IO output."""
60
+ output += "\nP3P_UUID=" + uuidstr + "\n"
61
+ if error:
62
+ output += "P3P_ERROR=" + error + "\n"
63
+ return output
64
+
65
+
66
+ def count_primers(output: str) -> int:
67
+ """Count total primers returned (left + internal + right)."""
68
+ total = 0
69
+ for regex in (RE_LEFT_COUNT, RE_INTERNAL_COUNT, RE_RIGHT_COUNT):
70
+ m = regex.search(output)
71
+ if m:
72
+ total += int(m.group(1))
73
+ return total
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Prefold input parsing (UNAFold)
78
+ # ---------------------------------------------------------------------------
79
+
80
+ def parse_prefold_input(indata: str):
81
+ """Extract parameters from Boulder-IO input for UNAFold prefolding.
82
+
83
+ Returns a dict with keys: temp, mv, dv, start, seq_id, seq, use_seq,
84
+ incl_start, incl_len, incl_found, data (accumulated output string).
85
+ Raises ValueError on validation failures.
86
+ """
87
+ import re as _re
88
+
89
+ dat = {
90
+ "temp": 0.0, "mv": 0.0, "dv": 0.0, "start": 0,
91
+ "seq_id": "", "seq": "", "use_seq": "",
92
+ "incl_start": 0, "incl_len": 0, "incl_found": False,
93
+ "data": "",
94
+ }
95
+ for line in indata.split("\n"):
96
+ curr = line.split("=")
97
+ if len(curr) != 2:
98
+ continue
99
+ key, val = curr
100
+ if key == "PRIMER_ANNEALING_TEMP":
101
+ dat["temp"] = _clean_numeric(val, "Annealing temperature")
102
+ dat["data"] += "PRIMER_ANNEALING_TEMP=" + dat["temp"] + "\n"
103
+ elif key == "PRIMER_SALT_DIVALENT":
104
+ dat["dv"] = _clean_numeric(val, "Divalent salt concentration")
105
+ dat["data"] += "PRIMER_SALT_DIVALENT=" + dat["dv"] + "\n"
106
+ elif key == "PRIMER_SALT_MONOVALENT":
107
+ dat["mv"] = _clean_numeric(val, "Monovalent salt concentration")
108
+ dat["data"] += "PRIMER_SALT_MONOVALENT=" + dat["mv"] + "\n"
109
+ elif key == "PRIMER_FIRST_BASE_INDEX":
110
+ dat["start"] = int(_re.sub(r"[^0-9]", "", val))
111
+ dat["data"] += "PRIMER_FIRST_BASE_INDEX=" + str(dat["start"]) + "\n"
112
+ elif key == "SEQUENCE_ID":
113
+ dat["seq_id"] = _re.sub(r"[^0-9A-Za-z _,\.]", "", val)
114
+ dat["data"] += "SEQUENCE_ID=" + dat["seq_id"] + "\n"
115
+ elif key == "SEQUENCE_INCLUDED_REGION":
116
+ dat_incl = _re.sub(r"[^0-9,]", "", val)
117
+ incl_spl = dat_incl.split(",")
118
+ if len(incl_spl) == 2:
119
+ dat["incl_start"] = int(incl_spl[0])
120
+ dat["incl_len"] = int(incl_spl[1])
121
+ dat["incl_found"] = True
122
+ elif key == "SEQUENCE_TEMPLATE":
123
+ dat["seq"] = _re.sub(r"[^ACGTNacgtn]", "", val)
124
+ dat["use_seq"] = dat["seq"]
125
+ dat["data"] += "SEQUENCE_TEMPLATE=" + dat["seq"] + "\n"
126
+
127
+ # Derive use_seq from included region
128
+ if dat["incl_found"]:
129
+ adj_start = dat["incl_start"] - dat["start"]
130
+ if adj_start >= 0 and dat["incl_len"] >= 20 and len(dat["seq"]) > adj_start:
131
+ dat["use_seq"] = dat["seq"][adj_start: adj_start + dat["incl_len"]]
132
+
133
+ # Validation
134
+ if len(dat["use_seq"]) > 2000:
135
+ raise ValueError("Sequence to long. Limit with SEQUENCE_INCLUDED_REGION to < 2000 bp.")
136
+ if len(dat["use_seq"]) < 20:
137
+ raise ValueError("Sequence to short < 20 bp.")
138
+ mv_f = float(dat["mv"])
139
+ if mv_f < 1.0 or mv_f > 1000.0:
140
+ raise ValueError("Monovalent ions must be 1.0 - 1000.0.")
141
+ dv_f = float(dat["dv"])
142
+ if dv_f < 0.0 or dv_f > 1000.0:
143
+ raise ValueError("Divalent ions must be 0.0 - 1000.0.")
144
+ temp_f = float(dat["temp"])
145
+ if temp_f < 1.0 or temp_f > 99.0:
146
+ raise ValueError("Annealing Temp. must be 1.0 - 99.0.")
147
+
148
+ return dat
149
+
150
+
151
+ def parse_prefold_output(ct_text: str, dat: dict) -> dict:
152
+ """Parse UNAFold .ct output and produce SEQUENCE_EXCLUDED_REGION.
153
+
154
+ Returns dict with delta_g, excluded_region, state.
155
+ """
156
+ result = {"delta_g": None, "excluded_region": "", "state": "no_sec_struct"}
157
+
158
+ line_data = ct_text.split("\n")
159
+ deltG = line_data[0].split("\t")
160
+ if len(deltG) > 1:
161
+ result["delta_g"] = deltG[1].replace("dG = ", "")
162
+
163
+ excl_reg = ""
164
+ in_reg = False
165
+ dat_incl_start = dat["incl_start"]
166
+ dat_start = dat["start"]
167
+ dat_incl_len = dat["incl_len"]
168
+ dat_seq = dat["seq"]
169
+ inc_start = 0
170
+ inc_end = 0
171
+ inc_last = 0
172
+
173
+ if len(line_data) > 20:
174
+ for line in line_data:
175
+ cells = line.split("\t")
176
+ if len(cells) > 6:
177
+ if int(cells[4]) == 0 and in_reg:
178
+ inc_end = int(cells[0]) - 1
179
+ excl_reg += str(dat_incl_start + dat_start + inc_start)
180
+ excl_reg += "," + str(inc_end - inc_start) + " "
181
+ in_reg = False
182
+ if int(cells[4]) != 0 and not in_reg:
183
+ inc_start = int(cells[0]) - 1
184
+ in_reg = True
185
+ if int(cells[4]) != 0:
186
+ inc_last = int(cells[0]) - 1
187
+ if in_reg:
188
+ excl_reg += str(dat_incl_start + dat_start + inc_start)
189
+ excl_reg += "," + str(inc_last - inc_start) + " "
190
+ result["state"] = "found_sec_struct"
191
+ else:
192
+ result["state"] = "no_sec_struct"
193
+
194
+ if 20 < dat_incl_start + dat_incl_len < len(dat_seq):
195
+ excl_reg += str(dat_incl_start + dat_incl_len + dat_start) + ","
196
+ excl_reg += str(len(dat_seq) - (dat_incl_start + dat_incl_len)) + " "
197
+
198
+ result["excluded_region"] = _re_sub_trailing_spaces(excl_reg)
199
+ return result
200
+
201
+
202
+ def _re_sub_trailing_spaces(s: str) -> str:
203
+ import re as _re
204
+ return _re.sub(r" +$", "", s)
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # Amplicon3 input parsing
209
+ # ---------------------------------------------------------------------------
210
+
211
+ def parse_amplicon3_input(indata: str):
212
+ """Extract parameters from Boulder-IO input for amplicon3.
213
+
214
+ Returns a dict with all numeric/string params and the original data.
215
+ Raises ValueError on validation failures.
216
+ """
217
+ import re as _re
218
+
219
+ dat = {
220
+ "seq": "", "mv": 0.0, "dv": 0.0, "dntp": 0.0,
221
+ "dmso": 0.0, "fact": 0.0, "form": 0.0,
222
+ "tp": 1, "sal": 1, "mf": 1, "temp": -10.0,
223
+ }
224
+ for line in indata.split("\n"):
225
+ curr = line.split("=")
226
+ if len(curr) != 2:
227
+ continue
228
+ key, val = curr
229
+ if key == "SEQUENCE_TEMPLATE":
230
+ dat["seq"] = _re.sub(r"[^ACGTNacgtn]", "", val)
231
+ elif key == "PRIMER_SALT_MONOVALENT":
232
+ dat["mv"] = _clean_numeric(val, "Monovalent salt concentration")
233
+ elif key == "PRIMER_SALT_DIVALENT":
234
+ dat["dv"] = _clean_numeric(val, "Divalent salt concentration")
235
+ elif key == "PRIMER_DNTP_CONC":
236
+ dat["dntp"] = _clean_numeric(val, "DNTP concentration")
237
+ elif key == "PRIMER_DMSO_CONC":
238
+ dat["dmso"] = _clean_numeric(val, "DMSO concentration")
239
+ elif key == "PRIMER_DMSO_FACTOR":
240
+ dat["fact"] = _clean_numeric(val, "DMSO factor")
241
+ elif key == "PRIMER_FORMAMIDE_CONC":
242
+ dat["form"] = _clean_numeric(val, "Formamide concentration")
243
+ elif key == "PRIMER_TM_FORMULA":
244
+ dat["tp"] = _re.sub(r"[^0-9]", "", val)
245
+ elif key == "PRIMER_SALT_CORRECTIONS":
246
+ dat["sal"] = _re.sub(r"[^0-9]", "", val)
247
+ elif key == "PRIMER_AMPLICON_FORMULA":
248
+ dat["mf"] = _re.sub(r"[^0-9]", "", val)
249
+ elif key == "SEQUENCE_MELTINGTEMP":
250
+ dat["temp"] = _clean_numeric(val, "Melting temperature")
251
+
252
+ # Validation
253
+ if len(dat["seq"]) < 36:
254
+ raise ValueError("Sequence to short < 36 bp.")
255
+ if float(dat["mv"]) < 1.0 or float(dat["mv"]) > 1000.0:
256
+ raise ValueError("Monovalent ions conc. must be 1.0 - 1000.0.")
257
+ if float(dat["dv"]) < 0.0 or float(dat["dv"]) > 1000.0:
258
+ raise ValueError("Divalent ions conc. must be 0.0 - 1000.0.")
259
+ if float(dat["dntp"]) < 0.0 or float(dat["dntp"]) > 1000.0:
260
+ raise ValueError("DNTPs conc. must be 0.0 - 1000.0.")
261
+ if float(dat["dmso"]) < 0.0 or float(dat["dmso"]) > 100.0:
262
+ raise ValueError("DMSO conc. must be 0.0 - 100.0.")
263
+ if float(dat["fact"]) < 0.0 or float(dat["fact"]) > 10.0:
264
+ raise ValueError("DMSO factor must be 0.0 - 10.0.")
265
+ if float(dat["form"]) < 0.0 or float(dat["form"]) > 1000.0:
266
+ raise ValueError("Formamide conc. must be 0.0 - 1000.0.")
267
+ tp_int = int(dat["tp"])
268
+ if tp_int < 0 or tp_int > 1:
269
+ raise ValueError("Table of thermodyn. parameters must be 0 or 1")
270
+ sal_int = int(dat["sal"])
271
+ if sal_int < 0 or sal_int > 2:
272
+ raise ValueError("Salt correction formula must be 0, 1 or 2")
273
+ mf_int = int(dat["mf"])
274
+ if mf_int < 0 or mf_int > 1:
275
+ raise ValueError("Tm calculation algorithm must be 0 or 1")
276
+ temp_f = float(dat["temp"])
277
+ if temp_f != -10.0:
278
+ if temp_f < 1.0 or temp_f > 99.0:
279
+ raise ValueError("Measured melting Temp. must be 1.0 - 99.0.")
280
+
281
+ return dat
@@ -0,0 +1,44 @@
1
+ """Shared constants, paths, and compiled regexes."""
2
+
3
+ import os
4
+ import re
5
+
6
+ # ---------------------------------------------------------------------------
7
+ # Paths — resolved relative to this file so the package works when
8
+ # installed via pip (data files ship inside primer3plus_core/).
9
+ # ---------------------------------------------------------------------------
10
+ CORE_DIR = os.path.dirname(os.path.abspath(__file__))
11
+
12
+ MISPRIMING_LIB_DIR = os.path.join(CORE_DIR, "mispriming_lib")
13
+ SETTINGS_FILES_DIR = os.path.join(CORE_DIR, "settings_files")
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Tunables
17
+ # ---------------------------------------------------------------------------
18
+ KILLTIME = 60 # seconds before primer3 / UNAFold / amplicon3 are killed
19
+ LOGP3RUNS = True # log primer3 runs to disk
20
+ LOGIPANONYM = True # anonymise IP addresses in log files
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Allowed upload extensions
24
+ # ---------------------------------------------------------------------------
25
+ ALLOWED_EXTENSIONS = {"json", "fa", "bed"}
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Regexes — Boulder-IO tag manipulation
29
+ # ---------------------------------------------------------------------------
30
+
31
+ # Strip PRIMER_THERMODYNAMIC_PARAMETERS_PATH line
32
+ RE_THERMO_PATH = re.compile(r"PRIMER_THERMODYNAMIC_PARAMETERS_PATH=[^\n]*\n")
33
+
34
+ # Mispriming library replacement
35
+ P3LIBFIX = "PRIMER_MISPRIMING_LIBRARY=" + os.path.join(MISPRIMING_LIB_DIR, "")
36
+ RE_LIB_PRIMER = re.compile(r"PRIMER_MISPRIMING_LIBRARY=")
37
+
38
+ P3LIBIFIX = "PRIMER_INTERNAL_MISHYB_LIBRARY=" + os.path.join(MISPRIMING_LIB_DIR, "")
39
+ RE_LIB_INTERNAL = re.compile(r"PRIMER_INTERNAL_MISHYB_LIBRARY=")
40
+
41
+ # Primer count extraction from output
42
+ RE_LEFT_COUNT = re.compile(r"PRIMER_LEFT_NUM_RETURNED=([^\n]*)\n")
43
+ RE_INTERNAL_COUNT = re.compile(r"PRIMER_INTERNAL_NUM_RETURNED=([^\n]*)\n")
44
+ RE_RIGHT_COUNT = re.compile(r"PRIMER_RIGHT_NUM_RETURNED=([^\n]*)\n")
@@ -0,0 +1,74 @@
1
+ """UUID validation, work-directory management, and file-path helpers."""
2
+
3
+ import os
4
+ import re
5
+ import uuid
6
+
7
+ from .config import ALLOWED_EXTENSIONS
8
+
9
+ # ---------------------------------------------------------------------------
10
+ # UUID
11
+ # ---------------------------------------------------------------------------
12
+ _UUID_RE = re.compile(
13
+ r"(^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"
14
+ r"-{0,1}([ap]{0,1})([cj]{0,1})$"
15
+ )
16
+
17
+
18
+ def is_valid_uuid(s: str) -> bool:
19
+ return _UUID_RE.match(s) is not None
20
+
21
+
22
+ def new_uuid() -> str:
23
+ return str(uuid.uuid4())
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # File helpers
28
+ # ---------------------------------------------------------------------------
29
+
30
+ def allowed_file(filename: str) -> bool:
31
+ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
32
+
33
+
34
+ def work_dir(upload_folder: str, uuidstr: str) -> str:
35
+ """Return (and create) the per-UUID sub-folder under *upload_folder*."""
36
+ sf = os.path.join(upload_folder, uuidstr[0:2])
37
+ os.makedirs(sf, exist_ok=True)
38
+ return sf
39
+
40
+
41
+ def ensure_log_dir(log_folder: str) -> None:
42
+ os.makedirs(log_folder, exist_ok=True)
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Canonical file-name builders
47
+ # ---------------------------------------------------------------------------
48
+
49
+ def input_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
50
+ return os.path.join(sf, f"{prefix}_{uuidstr}_input.txt")
51
+
52
+
53
+ def output_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
54
+ return os.path.join(sf, f"{prefix}_{uuidstr}_output.txt")
55
+
56
+
57
+ def error_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
58
+ return os.path.join(sf, f"{prefix}_{uuidstr}_error.txt")
59
+
60
+
61
+ def log_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
62
+ return os.path.join(sf, f"{prefix}_{uuidstr}.log")
63
+
64
+
65
+ def stderr_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
66
+ return os.path.join(sf, f"{prefix}_{uuidstr}.err")
67
+
68
+
69
+ def upload_path(sf: str, uuidstr: str) -> str:
70
+ return os.path.join(sf, f"p3p_{uuidstr}_upload.txt")
71
+
72
+
73
+ def bed_path(sf: str, uuidstr: str) -> str:
74
+ return os.path.join(sf, f"p3p_{uuidstr}.bed")
@@ -0,0 +1,69 @@
1
+ """Run-logging helper (no Flask dependency).
2
+
3
+ The Flask layer passes *client_ip* and *user_agent* so this module never
4
+ imports ``request``.
5
+ """
6
+
7
+ import datetime
8
+ import os
9
+ from ipaddress import ip_address
10
+ from typing import Optional
11
+
12
+ from .config import LOGP3RUNS, LOGIPANONYM
13
+
14
+
15
+ def anonymise_ip(raw_ip: str) -> str:
16
+ """Zero-out the last octets of *raw_ip* (v4 last byte, v4 last 10 bytes)."""
17
+ ip_bits = ip_address(raw_ip).packed
18
+ mod = bytearray(ip_bits)
19
+ if len(ip_bits) == 4:
20
+ mod[3] = 0
21
+ if len(ip_bits) == 16:
22
+ for i in range(6, len(mod)):
23
+ mod[i] = 0
24
+ return str(ip_address(bytes(mod)))
25
+
26
+
27
+ def log_data(log_folder: str,
28
+ prog: str, key: str, value: str, run_uuid: str,
29
+ client_ip: Optional[str] = None,
30
+ user_agent: Optional[str] = None,
31
+ anonymise: bool = LOGIPANONYM) -> None:
32
+ """Append one TSV line to the monthly log file.
33
+
34
+ Parameters
35
+ ----------
36
+ log_folder : str
37
+ Directory where log files are stored.
38
+ prog, key, value, run_uuid : str
39
+ The four data columns.
40
+ client_ip : str or None
41
+ Client IP as seen by the reverse proxy (X-Real-IP header).
42
+ user_agent : str or None
43
+ User-Agent header value.
44
+ anonymise : bool
45
+ Whether to zero-out trailing IP octets.
46
+ """
47
+ if not LOGP3RUNS:
48
+ return
49
+
50
+ now = datetime.datetime.now(datetime.UTC)
51
+ line = now.strftime("%Y-%m-%dT%H:%M:%S")
52
+ line += "\t" + prog + "\t" + key + "\t" + value + "\t" + run_uuid + "\t"
53
+
54
+ if client_ip:
55
+ if anonymise:
56
+ line += anonymise_ip(client_ip)
57
+ else:
58
+ line += client_ip
59
+
60
+ line += "\t\t"
61
+
62
+ if user_agent:
63
+ line += user_agent.replace("\t", " ")
64
+
65
+ line += "\n"
66
+
67
+ stat_file = os.path.join(log_folder, "p3_runs_" + now.strftime("%Y_%m") + ".log")
68
+ with open(stat_file, "a") as f:
69
+ f.write(line)
File without changes