primer3plus-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- primer3plus_core/__init__.py +3 -0
- primer3plus_core/boulder.py +281 -0
- primer3plus_core/config.py +44 -0
- primer3plus_core/files.py +74 -0
- primer3plus_core/logging.py +69 -0
- primer3plus_core/mispriming_lib/__init__.py +0 -0
- primer3plus_core/mispriming_lib/drosophila_w_transposons.txt +14613 -0
- primer3plus_core/mispriming_lib/humrep_and_simple.txt +494 -0
- primer3plus_core/mispriming_lib/rodent_ref.txt +132 -0
- primer3plus_core/mispriming_lib/rodrep_and_simple.txt +401 -0
- primer3plus_core/models.py +40 -0
- primer3plus_core/runner.py +241 -0
- primer3plus_core/settings.py +29 -0
- primer3plus_core/settings_files/__init__.py +0 -0
- primer3plus_core/settings_files/annealing_temp.txt +26 -0
- primer3plus_core/settings_files/cloning_primers.txt +9 -0
- primer3plus_core/settings_files/default_settings.json +296 -0
- primer3plus_core/settings_files/primer3_v1_1_4_default_settings.txt +151 -0
- primer3plus_core/settings_files/primer3plus_2_4_2_default_settings.txt +155 -0
- primer3plus_core/settings_files/primer3web_v0_4_0_default_settings.txt +151 -0
- primer3plus_core/settings_files/probe.txt +34 -0
- primer3plus_core/settings_files/qPCR.txt +17 -0
- primer3plus_core/settings_files/secondary_structures.txt +19 -0
- primer3plus_core-0.1.0.dist-info/METADATA +116 -0
- primer3plus_core-0.1.0.dist-info/RECORD +28 -0
- primer3plus_core-0.1.0.dist-info/WHEEL +5 -0
- primer3plus_core-0.1.0.dist-info/licenses/LICENSE +674 -0
- primer3plus_core-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""Boulder-IO text processing: normalisation, tag injection, parsing, counting."""
|
|
2
|
+
|
|
3
|
+
import re as _re
|
|
4
|
+
|
|
5
|
+
from .config import (
|
|
6
|
+
RE_THERMO_PATH,
|
|
7
|
+
RE_LIB_PRIMER, RE_LIB_INTERNAL,
|
|
8
|
+
P3LIBFIX, P3LIBIFIX,
|
|
9
|
+
RE_LEFT_COUNT, RE_INTERNAL_COUNT, RE_RIGHT_COUNT,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _clean_numeric(val: str, label: str) -> str:
|
|
14
|
+
"""Strip non-numeric characters from *val*, rejecting negative signs.
|
|
15
|
+
|
|
16
|
+
Returns the cleaned string. Raises ``ValueError`` if *val* contains
|
|
17
|
+
a ``-`` (these fields are concentrations/temperatures that must be ≥ 0).
|
|
18
|
+
"""
|
|
19
|
+
if "-" in val:
|
|
20
|
+
raise ValueError(label + " must not be negative.")
|
|
21
|
+
return _re.sub(r"[^0-9\.]", "", val)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Input normalisation (step 2 of migration)
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
def normalize_newlines(text: str) -> str:
|
|
29
|
+
"""Collapse \\r\\n and bare \\r to \\n."""
|
|
30
|
+
return text.replace("\r\n", "\n").replace("\r", "\n")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def strip_thermo_path(text: str) -> str:
|
|
34
|
+
"""Remove the PRIMER_THERMODYNAMIC_PARAMETERS_PATH line."""
|
|
35
|
+
return RE_THERMO_PATH.sub("", text)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def inject_mispriming_libs(text: str) -> str:
|
|
39
|
+
"""Replace PRIMER_MISPRIMING_LIBRARY / PRIMER_INTERNAL_MISHYB_LIBRARY
|
|
40
|
+
paths with the server-local mispriming_lib directory."""
|
|
41
|
+
text = RE_LIB_PRIMER.sub(P3LIBFIX, text)
|
|
42
|
+
text = RE_LIB_INTERNAL.sub(P3LIBIFIX, text)
|
|
43
|
+
return text
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def prepare_input(indata: str) -> str:
|
|
47
|
+
"""Full input-preparation pipeline: normalise → strip → inject."""
|
|
48
|
+
text = normalize_newlines(indata)
|
|
49
|
+
text = strip_thermo_path(text)
|
|
50
|
+
text = inject_mispriming_libs(text)
|
|
51
|
+
return text
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# Output post-processing
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
def append_run_metadata(output: str, uuidstr: str, error: str = "") -> str:
|
|
59
|
+
"""Append P3P_UUID and (optionally) P3P_ERROR to Boulder-IO output."""
|
|
60
|
+
output += "\nP3P_UUID=" + uuidstr + "\n"
|
|
61
|
+
if error:
|
|
62
|
+
output += "P3P_ERROR=" + error + "\n"
|
|
63
|
+
return output
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def count_primers(output: str) -> int:
|
|
67
|
+
"""Count total primers returned (left + internal + right)."""
|
|
68
|
+
total = 0
|
|
69
|
+
for regex in (RE_LEFT_COUNT, RE_INTERNAL_COUNT, RE_RIGHT_COUNT):
|
|
70
|
+
m = regex.search(output)
|
|
71
|
+
if m:
|
|
72
|
+
total += int(m.group(1))
|
|
73
|
+
return total
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# Prefold input parsing (UNAFold)
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
def parse_prefold_input(indata: str):
|
|
81
|
+
"""Extract parameters from Boulder-IO input for UNAFold prefolding.
|
|
82
|
+
|
|
83
|
+
Returns a dict with keys: temp, mv, dv, start, seq_id, seq, use_seq,
|
|
84
|
+
incl_start, incl_len, incl_found, data (accumulated output string).
|
|
85
|
+
Raises ValueError on validation failures.
|
|
86
|
+
"""
|
|
87
|
+
import re as _re
|
|
88
|
+
|
|
89
|
+
dat = {
|
|
90
|
+
"temp": 0.0, "mv": 0.0, "dv": 0.0, "start": 0,
|
|
91
|
+
"seq_id": "", "seq": "", "use_seq": "",
|
|
92
|
+
"incl_start": 0, "incl_len": 0, "incl_found": False,
|
|
93
|
+
"data": "",
|
|
94
|
+
}
|
|
95
|
+
for line in indata.split("\n"):
|
|
96
|
+
curr = line.split("=")
|
|
97
|
+
if len(curr) != 2:
|
|
98
|
+
continue
|
|
99
|
+
key, val = curr
|
|
100
|
+
if key == "PRIMER_ANNEALING_TEMP":
|
|
101
|
+
dat["temp"] = _clean_numeric(val, "Annealing temperature")
|
|
102
|
+
dat["data"] += "PRIMER_ANNEALING_TEMP=" + dat["temp"] + "\n"
|
|
103
|
+
elif key == "PRIMER_SALT_DIVALENT":
|
|
104
|
+
dat["dv"] = _clean_numeric(val, "Divalent salt concentration")
|
|
105
|
+
dat["data"] += "PRIMER_SALT_DIVALENT=" + dat["dv"] + "\n"
|
|
106
|
+
elif key == "PRIMER_SALT_MONOVALENT":
|
|
107
|
+
dat["mv"] = _clean_numeric(val, "Monovalent salt concentration")
|
|
108
|
+
dat["data"] += "PRIMER_SALT_MONOVALENT=" + dat["mv"] + "\n"
|
|
109
|
+
elif key == "PRIMER_FIRST_BASE_INDEX":
|
|
110
|
+
dat["start"] = int(_re.sub(r"[^0-9]", "", val))
|
|
111
|
+
dat["data"] += "PRIMER_FIRST_BASE_INDEX=" + str(dat["start"]) + "\n"
|
|
112
|
+
elif key == "SEQUENCE_ID":
|
|
113
|
+
dat["seq_id"] = _re.sub(r"[^0-9A-Za-z _,\.]", "", val)
|
|
114
|
+
dat["data"] += "SEQUENCE_ID=" + dat["seq_id"] + "\n"
|
|
115
|
+
elif key == "SEQUENCE_INCLUDED_REGION":
|
|
116
|
+
dat_incl = _re.sub(r"[^0-9,]", "", val)
|
|
117
|
+
incl_spl = dat_incl.split(",")
|
|
118
|
+
if len(incl_spl) == 2:
|
|
119
|
+
dat["incl_start"] = int(incl_spl[0])
|
|
120
|
+
dat["incl_len"] = int(incl_spl[1])
|
|
121
|
+
dat["incl_found"] = True
|
|
122
|
+
elif key == "SEQUENCE_TEMPLATE":
|
|
123
|
+
dat["seq"] = _re.sub(r"[^ACGTNacgtn]", "", val)
|
|
124
|
+
dat["use_seq"] = dat["seq"]
|
|
125
|
+
dat["data"] += "SEQUENCE_TEMPLATE=" + dat["seq"] + "\n"
|
|
126
|
+
|
|
127
|
+
# Derive use_seq from included region
|
|
128
|
+
if dat["incl_found"]:
|
|
129
|
+
adj_start = dat["incl_start"] - dat["start"]
|
|
130
|
+
if adj_start >= 0 and dat["incl_len"] >= 20 and len(dat["seq"]) > adj_start:
|
|
131
|
+
dat["use_seq"] = dat["seq"][adj_start: adj_start + dat["incl_len"]]
|
|
132
|
+
|
|
133
|
+
# Validation
|
|
134
|
+
if len(dat["use_seq"]) > 2000:
|
|
135
|
+
raise ValueError("Sequence to long. Limit with SEQUENCE_INCLUDED_REGION to < 2000 bp.")
|
|
136
|
+
if len(dat["use_seq"]) < 20:
|
|
137
|
+
raise ValueError("Sequence to short < 20 bp.")
|
|
138
|
+
mv_f = float(dat["mv"])
|
|
139
|
+
if mv_f < 1.0 or mv_f > 1000.0:
|
|
140
|
+
raise ValueError("Monovalent ions must be 1.0 - 1000.0.")
|
|
141
|
+
dv_f = float(dat["dv"])
|
|
142
|
+
if dv_f < 0.0 or dv_f > 1000.0:
|
|
143
|
+
raise ValueError("Divalent ions must be 0.0 - 1000.0.")
|
|
144
|
+
temp_f = float(dat["temp"])
|
|
145
|
+
if temp_f < 1.0 or temp_f > 99.0:
|
|
146
|
+
raise ValueError("Annealing Temp. must be 1.0 - 99.0.")
|
|
147
|
+
|
|
148
|
+
return dat
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def parse_prefold_output(ct_text: str, dat: dict) -> dict:
|
|
152
|
+
"""Parse UNAFold .ct output and produce SEQUENCE_EXCLUDED_REGION.
|
|
153
|
+
|
|
154
|
+
Returns dict with delta_g, excluded_region, state.
|
|
155
|
+
"""
|
|
156
|
+
result = {"delta_g": None, "excluded_region": "", "state": "no_sec_struct"}
|
|
157
|
+
|
|
158
|
+
line_data = ct_text.split("\n")
|
|
159
|
+
deltG = line_data[0].split("\t")
|
|
160
|
+
if len(deltG) > 1:
|
|
161
|
+
result["delta_g"] = deltG[1].replace("dG = ", "")
|
|
162
|
+
|
|
163
|
+
excl_reg = ""
|
|
164
|
+
in_reg = False
|
|
165
|
+
dat_incl_start = dat["incl_start"]
|
|
166
|
+
dat_start = dat["start"]
|
|
167
|
+
dat_incl_len = dat["incl_len"]
|
|
168
|
+
dat_seq = dat["seq"]
|
|
169
|
+
inc_start = 0
|
|
170
|
+
inc_end = 0
|
|
171
|
+
inc_last = 0
|
|
172
|
+
|
|
173
|
+
if len(line_data) > 20:
|
|
174
|
+
for line in line_data:
|
|
175
|
+
cells = line.split("\t")
|
|
176
|
+
if len(cells) > 6:
|
|
177
|
+
if int(cells[4]) == 0 and in_reg:
|
|
178
|
+
inc_end = int(cells[0]) - 1
|
|
179
|
+
excl_reg += str(dat_incl_start + dat_start + inc_start)
|
|
180
|
+
excl_reg += "," + str(inc_end - inc_start) + " "
|
|
181
|
+
in_reg = False
|
|
182
|
+
if int(cells[4]) != 0 and not in_reg:
|
|
183
|
+
inc_start = int(cells[0]) - 1
|
|
184
|
+
in_reg = True
|
|
185
|
+
if int(cells[4]) != 0:
|
|
186
|
+
inc_last = int(cells[0]) - 1
|
|
187
|
+
if in_reg:
|
|
188
|
+
excl_reg += str(dat_incl_start + dat_start + inc_start)
|
|
189
|
+
excl_reg += "," + str(inc_last - inc_start) + " "
|
|
190
|
+
result["state"] = "found_sec_struct"
|
|
191
|
+
else:
|
|
192
|
+
result["state"] = "no_sec_struct"
|
|
193
|
+
|
|
194
|
+
if 20 < dat_incl_start + dat_incl_len < len(dat_seq):
|
|
195
|
+
excl_reg += str(dat_incl_start + dat_incl_len + dat_start) + ","
|
|
196
|
+
excl_reg += str(len(dat_seq) - (dat_incl_start + dat_incl_len)) + " "
|
|
197
|
+
|
|
198
|
+
result["excluded_region"] = _re_sub_trailing_spaces(excl_reg)
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _re_sub_trailing_spaces(s: str) -> str:
|
|
203
|
+
import re as _re
|
|
204
|
+
return _re.sub(r" +$", "", s)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ---------------------------------------------------------------------------
|
|
208
|
+
# Amplicon3 input parsing
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
def parse_amplicon3_input(indata: str):
|
|
212
|
+
"""Extract parameters from Boulder-IO input for amplicon3.
|
|
213
|
+
|
|
214
|
+
Returns a dict with all numeric/string params and the original data.
|
|
215
|
+
Raises ValueError on validation failures.
|
|
216
|
+
"""
|
|
217
|
+
import re as _re
|
|
218
|
+
|
|
219
|
+
dat = {
|
|
220
|
+
"seq": "", "mv": 0.0, "dv": 0.0, "dntp": 0.0,
|
|
221
|
+
"dmso": 0.0, "fact": 0.0, "form": 0.0,
|
|
222
|
+
"tp": 1, "sal": 1, "mf": 1, "temp": -10.0,
|
|
223
|
+
}
|
|
224
|
+
for line in indata.split("\n"):
|
|
225
|
+
curr = line.split("=")
|
|
226
|
+
if len(curr) != 2:
|
|
227
|
+
continue
|
|
228
|
+
key, val = curr
|
|
229
|
+
if key == "SEQUENCE_TEMPLATE":
|
|
230
|
+
dat["seq"] = _re.sub(r"[^ACGTNacgtn]", "", val)
|
|
231
|
+
elif key == "PRIMER_SALT_MONOVALENT":
|
|
232
|
+
dat["mv"] = _clean_numeric(val, "Monovalent salt concentration")
|
|
233
|
+
elif key == "PRIMER_SALT_DIVALENT":
|
|
234
|
+
dat["dv"] = _clean_numeric(val, "Divalent salt concentration")
|
|
235
|
+
elif key == "PRIMER_DNTP_CONC":
|
|
236
|
+
dat["dntp"] = _clean_numeric(val, "DNTP concentration")
|
|
237
|
+
elif key == "PRIMER_DMSO_CONC":
|
|
238
|
+
dat["dmso"] = _clean_numeric(val, "DMSO concentration")
|
|
239
|
+
elif key == "PRIMER_DMSO_FACTOR":
|
|
240
|
+
dat["fact"] = _clean_numeric(val, "DMSO factor")
|
|
241
|
+
elif key == "PRIMER_FORMAMIDE_CONC":
|
|
242
|
+
dat["form"] = _clean_numeric(val, "Formamide concentration")
|
|
243
|
+
elif key == "PRIMER_TM_FORMULA":
|
|
244
|
+
dat["tp"] = _re.sub(r"[^0-9]", "", val)
|
|
245
|
+
elif key == "PRIMER_SALT_CORRECTIONS":
|
|
246
|
+
dat["sal"] = _re.sub(r"[^0-9]", "", val)
|
|
247
|
+
elif key == "PRIMER_AMPLICON_FORMULA":
|
|
248
|
+
dat["mf"] = _re.sub(r"[^0-9]", "", val)
|
|
249
|
+
elif key == "SEQUENCE_MELTINGTEMP":
|
|
250
|
+
dat["temp"] = _clean_numeric(val, "Melting temperature")
|
|
251
|
+
|
|
252
|
+
# Validation
|
|
253
|
+
if len(dat["seq"]) < 36:
|
|
254
|
+
raise ValueError("Sequence to short < 36 bp.")
|
|
255
|
+
if float(dat["mv"]) < 1.0 or float(dat["mv"]) > 1000.0:
|
|
256
|
+
raise ValueError("Monovalent ions conc. must be 1.0 - 1000.0.")
|
|
257
|
+
if float(dat["dv"]) < 0.0 or float(dat["dv"]) > 1000.0:
|
|
258
|
+
raise ValueError("Divalent ions conc. must be 0.0 - 1000.0.")
|
|
259
|
+
if float(dat["dntp"]) < 0.0 or float(dat["dntp"]) > 1000.0:
|
|
260
|
+
raise ValueError("DNTPs conc. must be 0.0 - 1000.0.")
|
|
261
|
+
if float(dat["dmso"]) < 0.0 or float(dat["dmso"]) > 100.0:
|
|
262
|
+
raise ValueError("DMSO conc. must be 0.0 - 100.0.")
|
|
263
|
+
if float(dat["fact"]) < 0.0 or float(dat["fact"]) > 10.0:
|
|
264
|
+
raise ValueError("DMSO factor must be 0.0 - 10.0.")
|
|
265
|
+
if float(dat["form"]) < 0.0 or float(dat["form"]) > 1000.0:
|
|
266
|
+
raise ValueError("Formamide conc. must be 0.0 - 1000.0.")
|
|
267
|
+
tp_int = int(dat["tp"])
|
|
268
|
+
if tp_int < 0 or tp_int > 1:
|
|
269
|
+
raise ValueError("Table of thermodyn. parameters must be 0 or 1")
|
|
270
|
+
sal_int = int(dat["sal"])
|
|
271
|
+
if sal_int < 0 or sal_int > 2:
|
|
272
|
+
raise ValueError("Salt correction formula must be 0, 1 or 2")
|
|
273
|
+
mf_int = int(dat["mf"])
|
|
274
|
+
if mf_int < 0 or mf_int > 1:
|
|
275
|
+
raise ValueError("Tm calculation algorithm must be 0 or 1")
|
|
276
|
+
temp_f = float(dat["temp"])
|
|
277
|
+
if temp_f != -10.0:
|
|
278
|
+
if temp_f < 1.0 or temp_f > 99.0:
|
|
279
|
+
raise ValueError("Measured melting Temp. must be 1.0 - 99.0.")
|
|
280
|
+
|
|
281
|
+
return dat
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Shared constants, paths, and compiled regexes."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
# ---------------------------------------------------------------------------
|
|
7
|
+
# Paths — resolved relative to this file so the package works when
|
|
8
|
+
# installed via pip (data files ship inside primer3plus_core/).
|
|
9
|
+
# ---------------------------------------------------------------------------
|
|
10
|
+
CORE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
11
|
+
|
|
12
|
+
MISPRIMING_LIB_DIR = os.path.join(CORE_DIR, "mispriming_lib")
|
|
13
|
+
SETTINGS_FILES_DIR = os.path.join(CORE_DIR, "settings_files")
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# Tunables
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
KILLTIME = 60 # seconds before primer3 / UNAFold / amplicon3 are killed
|
|
19
|
+
LOGP3RUNS = True # log primer3 runs to disk
|
|
20
|
+
LOGIPANONYM = True # anonymise IP addresses in log files
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Allowed upload extensions
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
ALLOWED_EXTENSIONS = {"json", "fa", "bed"}
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Regexes — Boulder-IO tag manipulation
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
# Strip PRIMER_THERMODYNAMIC_PARAMETERS_PATH line
|
|
32
|
+
RE_THERMO_PATH = re.compile(r"PRIMER_THERMODYNAMIC_PARAMETERS_PATH=[^\n]*\n")
|
|
33
|
+
|
|
34
|
+
# Mispriming library replacement
|
|
35
|
+
P3LIBFIX = "PRIMER_MISPRIMING_LIBRARY=" + os.path.join(MISPRIMING_LIB_DIR, "")
|
|
36
|
+
RE_LIB_PRIMER = re.compile(r"PRIMER_MISPRIMING_LIBRARY=")
|
|
37
|
+
|
|
38
|
+
P3LIBIFIX = "PRIMER_INTERNAL_MISHYB_LIBRARY=" + os.path.join(MISPRIMING_LIB_DIR, "")
|
|
39
|
+
RE_LIB_INTERNAL = re.compile(r"PRIMER_INTERNAL_MISHYB_LIBRARY=")
|
|
40
|
+
|
|
41
|
+
# Primer count extraction from output
|
|
42
|
+
RE_LEFT_COUNT = re.compile(r"PRIMER_LEFT_NUM_RETURNED=([^\n]*)\n")
|
|
43
|
+
RE_INTERNAL_COUNT = re.compile(r"PRIMER_INTERNAL_NUM_RETURNED=([^\n]*)\n")
|
|
44
|
+
RE_RIGHT_COUNT = re.compile(r"PRIMER_RIGHT_NUM_RETURNED=([^\n]*)\n")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""UUID validation, work-directory management, and file-path helpers."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from .config import ALLOWED_EXTENSIONS
|
|
8
|
+
|
|
9
|
+
# ---------------------------------------------------------------------------
|
|
10
|
+
# UUID
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
_UUID_RE = re.compile(
|
|
13
|
+
r"(^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"
|
|
14
|
+
r"-{0,1}([ap]{0,1})([cj]{0,1})$"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_valid_uuid(s: str) -> bool:
|
|
19
|
+
return _UUID_RE.match(s) is not None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def new_uuid() -> str:
|
|
23
|
+
return str(uuid.uuid4())
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# File helpers
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
def allowed_file(filename: str) -> bool:
|
|
31
|
+
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def work_dir(upload_folder: str, uuidstr: str) -> str:
|
|
35
|
+
"""Return (and create) the per-UUID sub-folder under *upload_folder*."""
|
|
36
|
+
sf = os.path.join(upload_folder, uuidstr[0:2])
|
|
37
|
+
os.makedirs(sf, exist_ok=True)
|
|
38
|
+
return sf
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def ensure_log_dir(log_folder: str) -> None:
|
|
42
|
+
os.makedirs(log_folder, exist_ok=True)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
# Canonical file-name builders
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
def input_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
|
|
50
|
+
return os.path.join(sf, f"{prefix}_{uuidstr}_input.txt")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def output_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
|
|
54
|
+
return os.path.join(sf, f"{prefix}_{uuidstr}_output.txt")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def error_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
|
|
58
|
+
return os.path.join(sf, f"{prefix}_{uuidstr}_error.txt")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def log_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
|
|
62
|
+
return os.path.join(sf, f"{prefix}_{uuidstr}.log")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def stderr_path(sf: str, uuidstr: str, prefix: str = "p3p") -> str:
|
|
66
|
+
return os.path.join(sf, f"{prefix}_{uuidstr}.err")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def upload_path(sf: str, uuidstr: str) -> str:
|
|
70
|
+
return os.path.join(sf, f"p3p_{uuidstr}_upload.txt")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def bed_path(sf: str, uuidstr: str) -> str:
|
|
74
|
+
return os.path.join(sf, f"p3p_{uuidstr}.bed")
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Run-logging helper (no Flask dependency).
|
|
2
|
+
|
|
3
|
+
The Flask layer passes *client_ip* and *user_agent* so this module never
|
|
4
|
+
imports ``request``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import datetime
|
|
8
|
+
import os
|
|
9
|
+
from ipaddress import ip_address
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .config import LOGP3RUNS, LOGIPANONYM
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def anonymise_ip(raw_ip: str) -> str:
|
|
16
|
+
"""Zero-out the last octets of *raw_ip* (v4 last byte, v4 last 10 bytes)."""
|
|
17
|
+
ip_bits = ip_address(raw_ip).packed
|
|
18
|
+
mod = bytearray(ip_bits)
|
|
19
|
+
if len(ip_bits) == 4:
|
|
20
|
+
mod[3] = 0
|
|
21
|
+
if len(ip_bits) == 16:
|
|
22
|
+
for i in range(6, len(mod)):
|
|
23
|
+
mod[i] = 0
|
|
24
|
+
return str(ip_address(bytes(mod)))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def log_data(log_folder: str,
|
|
28
|
+
prog: str, key: str, value: str, run_uuid: str,
|
|
29
|
+
client_ip: Optional[str] = None,
|
|
30
|
+
user_agent: Optional[str] = None,
|
|
31
|
+
anonymise: bool = LOGIPANONYM) -> None:
|
|
32
|
+
"""Append one TSV line to the monthly log file.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
log_folder : str
|
|
37
|
+
Directory where log files are stored.
|
|
38
|
+
prog, key, value, run_uuid : str
|
|
39
|
+
The four data columns.
|
|
40
|
+
client_ip : str or None
|
|
41
|
+
Client IP as seen by the reverse proxy (X-Real-IP header).
|
|
42
|
+
user_agent : str or None
|
|
43
|
+
User-Agent header value.
|
|
44
|
+
anonymise : bool
|
|
45
|
+
Whether to zero-out trailing IP octets.
|
|
46
|
+
"""
|
|
47
|
+
if not LOGP3RUNS:
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
now = datetime.datetime.now(datetime.UTC)
|
|
51
|
+
line = now.strftime("%Y-%m-%dT%H:%M:%S")
|
|
52
|
+
line += "\t" + prog + "\t" + key + "\t" + value + "\t" + run_uuid + "\t"
|
|
53
|
+
|
|
54
|
+
if client_ip:
|
|
55
|
+
if anonymise:
|
|
56
|
+
line += anonymise_ip(client_ip)
|
|
57
|
+
else:
|
|
58
|
+
line += client_ip
|
|
59
|
+
|
|
60
|
+
line += "\t\t"
|
|
61
|
+
|
|
62
|
+
if user_agent:
|
|
63
|
+
line += user_agent.replace("\t", " ")
|
|
64
|
+
|
|
65
|
+
line += "\n"
|
|
66
|
+
|
|
67
|
+
stat_file = os.path.join(log_folder, "p3_runs_" + now.strftime("%Y_%m") + ".log")
|
|
68
|
+
with open(stat_file, "a") as f:
|
|
69
|
+
f.write(line)
|
|
File without changes
|