pycompound 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +231 -54
- pycompound/spec_lib_matching.py +41 -52
- {pycompound-0.1.5.dist-info → pycompound-0.1.6.dist-info}/METADATA +1 -1
- {pycompound-0.1.5.dist-info → pycompound-0.1.6.dist-info}/RECORD +7 -8
- {pycompound-0.1.5.dist-info → pycompound-0.1.6.dist-info}/top_level.txt +0 -1
- app2.py +0 -101
- {pycompound-0.1.5.dist-info → pycompound-0.1.6.dist-info}/WHEEL +0 -0
- {pycompound-0.1.5.dist-info → pycompound-0.1.6.dist-info}/licenses/LICENSE +0 -0
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
from shiny import App, ui, reactive, render, req
|
|
3
|
+
from shiny.types import SilentException
|
|
3
4
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
5
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
6
|
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
@@ -11,6 +12,7 @@ from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
|
11
12
|
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
from contextlib import redirect_stdout, redirect_stderr
|
|
15
|
+
import contextlib
|
|
14
16
|
import subprocess
|
|
15
17
|
import traceback
|
|
16
18
|
import asyncio
|
|
@@ -24,10 +26,34 @@ import netCDF4 as nc
|
|
|
24
26
|
from pyteomics import mgf, mzml
|
|
25
27
|
import ast
|
|
26
28
|
from numbers import Real
|
|
29
|
+
import logging
|
|
30
|
+
from scipy.optimize import differential_evolution
|
|
27
31
|
|
|
28
32
|
|
|
29
33
|
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
30
34
|
|
|
35
|
+
class _UIWriter:
|
|
36
|
+
def __init__(self, loop, q: asyncio.Queue[str]):
|
|
37
|
+
self._loop = loop
|
|
38
|
+
self._q = q
|
|
39
|
+
def write(self, s: str):
|
|
40
|
+
if s:
|
|
41
|
+
self._loop.call_soon_threadsafe(self._q.put_nowait, s)
|
|
42
|
+
return len(s)
|
|
43
|
+
def flush(self):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def attach_logging_to_writer(writer):
|
|
48
|
+
handler = logging.StreamHandler(writer)
|
|
49
|
+
handler.setLevel(logging.INFO)
|
|
50
|
+
root = logging.getLogger()
|
|
51
|
+
root.addHandler(handler)
|
|
52
|
+
root.setLevel(logging.INFO)
|
|
53
|
+
return handler, root
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
31
57
|
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
32
58
|
with redirect_stdout(writer), redirect_stderr(writer):
|
|
33
59
|
return fn(*args, **kwargs)
|
|
@@ -492,18 +518,37 @@ PARAMS_NRMS = {
|
|
|
492
518
|
"entropy_dimension": (1.0, 3.0)
|
|
493
519
|
}
|
|
494
520
|
|
|
521
|
+
|
|
495
522
|
def run_parameter_tuning_DE_ui(platform: str):
|
|
496
|
-
|
|
497
|
-
|
|
523
|
+
# Pick param set per platform
|
|
524
|
+
if platform == "HRMS":
|
|
525
|
+
PARAMS = PARAMS_HRMS
|
|
498
526
|
else:
|
|
499
|
-
PARAMS=PARAMS_NRMS
|
|
527
|
+
PARAMS = PARAMS_NRMS
|
|
500
528
|
|
|
501
529
|
base_inputs = [
|
|
502
530
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
503
531
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
504
|
-
ui.input_select(
|
|
505
|
-
|
|
506
|
-
|
|
532
|
+
ui.input_select(
|
|
533
|
+
"similarity_measure",
|
|
534
|
+
"Select similarity measure:",
|
|
535
|
+
[
|
|
536
|
+
"cosine","shannon","renyi","tsallis","mixture","jaccard","dice",
|
|
537
|
+
"3w_jaccard","sokal_sneath","binary_cosine","mountford",
|
|
538
|
+
"mcconnaughey","driver_kroeber","simpson","braun_banquet",
|
|
539
|
+
"fager_mcgowan","kulczynski","intersection","hamming","hellinger",
|
|
540
|
+
],
|
|
541
|
+
),
|
|
542
|
+
ui.input_text(
|
|
543
|
+
"weights",
|
|
544
|
+
"Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):",
|
|
545
|
+
"0.25, 0.25, 0.25, 0.25",
|
|
546
|
+
),
|
|
547
|
+
ui.input_select(
|
|
548
|
+
"high_quality_reference_library",
|
|
549
|
+
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
550
|
+
[False, True],
|
|
551
|
+
),
|
|
507
552
|
]
|
|
508
553
|
|
|
509
554
|
if platform == "HRMS":
|
|
@@ -527,47 +572,56 @@ def run_parameter_tuning_DE_ui(platform: str):
|
|
|
527
572
|
|
|
528
573
|
numeric_inputs = [
|
|
529
574
|
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
530
|
-
ui.input_numeric("mz_max", "Maximum m/z for filtering:",
|
|
575
|
+
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99_999_999),
|
|
531
576
|
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
532
|
-
ui.input_numeric("int_max", "Maximum intensity for filtering:",
|
|
577
|
+
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999_999_999),
|
|
533
578
|
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
534
579
|
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
535
580
|
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
536
581
|
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
537
582
|
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
583
|
+
ui.input_numeric("max_iterations", "Maximum number of iterations:", 5),
|
|
538
584
|
]
|
|
539
585
|
|
|
586
|
+
run_button_parameter_tuning_DE = ui.input_action_button(
|
|
587
|
+
"run_btn_parameter_tuning_DE",
|
|
588
|
+
"Tune parameters (differential evolution optimization)",
|
|
589
|
+
style="font-size:16px; padding:15px 30px; width:300px; height:100px",
|
|
590
|
+
)
|
|
591
|
+
back_button = ui.input_action_button(
|
|
592
|
+
"back",
|
|
593
|
+
"Back to main menu",
|
|
594
|
+
style="font-size:16px; padding:15px 30px; width:300px; height:100px",
|
|
595
|
+
)
|
|
540
596
|
|
|
541
|
-
#
|
|
542
|
-
run_button_parameter_tuning_DE = ui.input_action_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
|
|
543
|
-
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
|
|
544
|
-
|
|
597
|
+
# Build the 4-column inputs panel (fixed slices corrected, unpack lists properly)
|
|
545
598
|
if platform == "HRMS":
|
|
546
599
|
inputs_columns = ui.layout_columns(
|
|
547
|
-
ui.div(base_inputs
|
|
548
|
-
ui.div(
|
|
549
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
550
|
-
ui.div(
|
|
551
|
-
col_widths=(3,3,3,3),
|
|
600
|
+
ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
601
|
+
ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
602
|
+
ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
603
|
+
ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
|
|
604
|
+
col_widths=(3, 3, 3, 3),
|
|
552
605
|
)
|
|
553
|
-
|
|
606
|
+
else: # NRMS
|
|
554
607
|
inputs_columns = ui.layout_columns(
|
|
555
|
-
ui.div(base_inputs
|
|
556
|
-
ui.div(
|
|
557
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
558
|
-
ui.div(
|
|
559
|
-
col_widths=(3,3,3,3),
|
|
608
|
+
ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
609
|
+
ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
610
|
+
ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
611
|
+
ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
|
|
612
|
+
col_widths=(3, 3, 3, 3),
|
|
560
613
|
)
|
|
561
614
|
|
|
615
|
+
# Main page: sidebar (param selection + bounds) and body (inputs + buttons + live log)
|
|
562
616
|
return ui.page_fillable(
|
|
563
617
|
ui.layout_sidebar(
|
|
564
618
|
ui.sidebar(
|
|
565
|
-
ui.h3("Select parameters"),
|
|
619
|
+
ui.h3("Select continuous parameters to optimize"),
|
|
566
620
|
ui.input_checkbox_group(
|
|
567
621
|
"params",
|
|
568
622
|
None,
|
|
569
623
|
choices=list(PARAMS.keys()),
|
|
570
|
-
selected=["noise_threshold","LET_threshold"],
|
|
624
|
+
selected=["noise_threshold", "LET_threshold"],
|
|
571
625
|
),
|
|
572
626
|
ui.hr(),
|
|
573
627
|
ui.h4("Bounds for selected parameters"),
|
|
@@ -576,9 +630,15 @@ def run_parameter_tuning_DE_ui(platform: str):
|
|
|
576
630
|
),
|
|
577
631
|
ui.div(
|
|
578
632
|
ui.h2("Tune parameters (differential evolution optimization)"),
|
|
579
|
-
|
|
633
|
+
inputs_columns,
|
|
580
634
|
run_button_parameter_tuning_DE,
|
|
581
635
|
back_button,
|
|
636
|
+
ui.br(),
|
|
637
|
+
ui.card(
|
|
638
|
+
ui.card_header("Live log"),
|
|
639
|
+
ui.output_text_verbatim("run_log"), # <-- make sure server defines this
|
|
640
|
+
),
|
|
641
|
+
style="display:flex; flex-direction:column; gap:16px;",
|
|
582
642
|
),
|
|
583
643
|
)
|
|
584
644
|
)
|
|
@@ -586,6 +646,7 @@ def run_parameter_tuning_DE_ui(platform: str):
|
|
|
586
646
|
|
|
587
647
|
|
|
588
648
|
|
|
649
|
+
|
|
589
650
|
app_ui = ui.page_fluid(
|
|
590
651
|
ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
|
|
591
652
|
ui.output_ui("main_ui"),
|
|
@@ -1280,40 +1341,151 @@ def server(input, output, session):
|
|
|
1280
1341
|
yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
|
|
1281
1342
|
|
|
1282
1343
|
|
|
1344
|
+
|
|
1283
1345
|
@reactive.effect
|
|
1284
1346
|
@reactive.event(input.run_btn_parameter_tuning_DE)
|
|
1285
|
-
def
|
|
1347
|
+
async def run_btn_parameter_tuning_DE():
|
|
1348
|
+
match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
|
|
1286
1349
|
is_any_job_running.set(True)
|
|
1287
1350
|
is_tuning_DE_running.set(True)
|
|
1288
|
-
|
|
1351
|
+
await reactive.flush()
|
|
1289
1352
|
|
|
1290
|
-
#
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1353
|
+
# --- helpers ---
|
|
1354
|
+
def _safe_float(v, default):
|
|
1355
|
+
try:
|
|
1356
|
+
if v is None:
|
|
1357
|
+
return default
|
|
1358
|
+
return float(v)
|
|
1359
|
+
except Exception:
|
|
1360
|
+
return default
|
|
1361
|
+
|
|
1362
|
+
def _iget(id, default=None):
|
|
1363
|
+
# Safe getter for Shiny inputs (avoids SilentException)
|
|
1364
|
+
if id in input:
|
|
1365
|
+
try:
|
|
1366
|
+
return input[id]()
|
|
1367
|
+
except SilentException:
|
|
1368
|
+
return default
|
|
1369
|
+
return default
|
|
1370
|
+
|
|
1371
|
+
# ---- log plumbing (stdout/stderr -> UI) ----
|
|
1372
|
+
loop = asyncio.get_running_loop()
|
|
1373
|
+
q: asyncio.Queue[str | None] = asyncio.Queue()
|
|
1374
|
+
|
|
1375
|
+
class UIWriter(io.TextIOBase):
|
|
1376
|
+
def write(self, s: str):
|
|
1377
|
+
if s:
|
|
1378
|
+
loop.call_soon_threadsafe(q.put_nowait, s)
|
|
1379
|
+
return len(s)
|
|
1380
|
+
def flush(self): pass
|
|
1316
1381
|
|
|
1382
|
+
async def _drain():
|
|
1383
|
+
while True:
|
|
1384
|
+
msg = await q.get()
|
|
1385
|
+
if msg is None:
|
|
1386
|
+
break
|
|
1387
|
+
match_log_rv.set(match_log_rv.get() + msg)
|
|
1388
|
+
await reactive.flush()
|
|
1389
|
+
|
|
1390
|
+
drain_task = asyncio.create_task(_drain())
|
|
1391
|
+
writer = UIWriter()
|
|
1392
|
+
|
|
1393
|
+
# ---------- SNAPSHOT INPUTS SAFELY ----------
|
|
1394
|
+
try:
|
|
1395
|
+
qfile = _iget("query_data")[0]["datapath"]
|
|
1396
|
+
rfile = _iget("reference_data")[0]["datapath"]
|
|
1397
|
+
|
|
1398
|
+
platform = _iget("chromatography_platform", "HRMS")
|
|
1399
|
+
sim = _iget("similarity_measure", "cosine")
|
|
1400
|
+
spro = _iget("spectrum_preprocessing_order", "FCNMWL")
|
|
1401
|
+
|
|
1402
|
+
hq_raw = _iget("high_quality_reference_library", False)
|
|
1403
|
+
if isinstance(hq_raw, str):
|
|
1404
|
+
hq = hq_raw.lower() == "true"
|
|
1405
|
+
else:
|
|
1406
|
+
hq = bool(hq_raw)
|
|
1407
|
+
|
|
1408
|
+
mz_min = _safe_float(_iget("mz_min", 0.0), 0.0)
|
|
1409
|
+
mz_max = _safe_float(_iget("mz_max", 99_999_999.0), 99_999_999.0)
|
|
1410
|
+
int_min = _safe_float(_iget("int_min", 0.0), 0.0)
|
|
1411
|
+
int_max = _safe_float(_iget("int_max", 999_999_999.0), 999_999_999.0)
|
|
1412
|
+
|
|
1413
|
+
# weights "a,b,c,d"
|
|
1414
|
+
w_text = _iget("weights", "") or ""
|
|
1415
|
+
w_list = [float(w.strip()) for w in w_text.split(",") if w.strip()]
|
|
1416
|
+
w_list = (w_list + [0.0, 0.0, 0.0, 0.0])[:4]
|
|
1417
|
+
weights = {"Cosine": w_list[0], "Shannon": w_list[1], "Renyi": w_list[2], "Tsallis": w_list[3]}
|
|
1418
|
+
|
|
1419
|
+
# selected params + bounds
|
|
1420
|
+
opt_params = tuple(_iget("params", ()) or ())
|
|
1421
|
+
bounds_dict = {}
|
|
1422
|
+
# populate bounds using the min_/max_ inputs if present, otherwise fall back
|
|
1423
|
+
# to your default PARAMS dicts already defined in your file
|
|
1424
|
+
param_defaults = PARAMS_HRMS if platform == "HRMS" else PARAMS_NRMS
|
|
1425
|
+
for p in opt_params:
|
|
1426
|
+
lo = _safe_float(_iget(f"min_{p}", param_defaults.get(p, (0.0, 1.0))[0]),
|
|
1427
|
+
param_defaults.get(p, (0.0, 1.0))[0])
|
|
1428
|
+
hi = _safe_float(_iget(f"max_{p}", param_defaults.get(p, (0.0, 1.0))[1]),
|
|
1429
|
+
param_defaults.get(p, (0.0, 1.0))[1])
|
|
1430
|
+
if lo > hi:
|
|
1431
|
+
lo, hi = hi, lo
|
|
1432
|
+
bounds_dict[p] = (lo, hi)
|
|
1433
|
+
|
|
1434
|
+
# defaults (guarded!)
|
|
1435
|
+
defaults = {
|
|
1436
|
+
"window_size_centroiding": _safe_float(_iget("window_size_centroiding", 0.5), 0.5),
|
|
1437
|
+
"window_size_matching": _safe_float(_iget("window_size_matching", 0.5), 0.5),
|
|
1438
|
+
"noise_threshold": _safe_float(_iget("noise_threshold", 0.0), 0.0),
|
|
1439
|
+
"wf_mz": _safe_float(_iget("wf_mz", 0.0), 0.0),
|
|
1440
|
+
"wf_int": _safe_float(_iget("wf_int", 1.0), 1.0),
|
|
1441
|
+
"LET_threshold": _safe_float(_iget("LET_threshold", 0.0), 0.0),
|
|
1442
|
+
"entropy_dimension": _safe_float(_iget("entropy_dimension", 1.1), 1.1),
|
|
1443
|
+
}
|
|
1444
|
+
if platform == "NRMS":
|
|
1445
|
+
defaults.pop("window_size_centroiding", None)
|
|
1446
|
+
defaults.pop("window_size_matching", None)
|
|
1447
|
+
|
|
1448
|
+
except Exception as e:
|
|
1449
|
+
import traceback
|
|
1450
|
+
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
1451
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Input snapshot failed:\n{tb}\n")
|
|
1452
|
+
is_tuning_DE_running.set(False); is_any_job_running.set(False)
|
|
1453
|
+
await q.put(None); await drain_task; await reactive.flush()
|
|
1454
|
+
return
|
|
1455
|
+
|
|
1456
|
+
def _run():
|
|
1457
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
1458
|
+
with redirect_stdout(writer), redirect_stderr(writer):
|
|
1459
|
+
return tune_params_DE(
|
|
1460
|
+
query_data=qfile,
|
|
1461
|
+
reference_data=rfile,
|
|
1462
|
+
chromatography_platform=input.chromatography_platform(),
|
|
1463
|
+
similarity_measure=sim,
|
|
1464
|
+
weights=weights,
|
|
1465
|
+
spectrum_preprocessing_order=spro,
|
|
1466
|
+
mz_min=mz_min, mz_max=mz_max,
|
|
1467
|
+
int_min=int_min, int_max=int_max,
|
|
1468
|
+
high_quality_reference_library=hq,
|
|
1469
|
+
optimize_params=list(opt_params),
|
|
1470
|
+
param_bounds=bounds_dict,
|
|
1471
|
+
default_params=defaults,
|
|
1472
|
+
de_workers=1,
|
|
1473
|
+
maxiters=input.max_iterations()
|
|
1474
|
+
)
|
|
1475
|
+
|
|
1476
|
+
try:
|
|
1477
|
+
_ = await asyncio.to_thread(_run)
|
|
1478
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Differential evolution finished.\n")
|
|
1479
|
+
except Exception as e:
|
|
1480
|
+
import traceback
|
|
1481
|
+
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
1482
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ {type(e).__name__}: {e}\n{tb}\n")
|
|
1483
|
+
finally:
|
|
1484
|
+
await q.put(None)
|
|
1485
|
+
await drain_task
|
|
1486
|
+
is_tuning_DE_running.set(False)
|
|
1487
|
+
is_any_job_running.set(False)
|
|
1488
|
+
await reactive.flush()
|
|
1317
1489
|
|
|
1318
1490
|
|
|
1319
1491
|
@reactive.effect
|
|
@@ -1335,6 +1507,11 @@ def server(input, output, session):
|
|
|
1335
1507
|
return run_status_parameter_tuning_grid.get()
|
|
1336
1508
|
return run_status_parameter_tuning_DE.get()
|
|
1337
1509
|
|
|
1510
|
+
@output
|
|
1511
|
+
@render.text
|
|
1512
|
+
def run_log():
|
|
1513
|
+
return match_log_rv.get()
|
|
1514
|
+
|
|
1338
1515
|
|
|
1339
1516
|
app = App(app_ui, server)
|
|
1340
1517
|
|
pycompound/spec_lib_matching.py
CHANGED
|
@@ -21,46 +21,46 @@ def _vector_to_full_params(X, default_params, optimize_params):
|
|
|
21
21
|
|
|
22
22
|
def objective_function_HRMS(X, ctx):
|
|
23
23
|
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
)
|
|
36
|
-
else:
|
|
37
|
-
acc = get_acc_NRMS(
|
|
38
|
-
ctx["df_query"], ctx["df_reference"],
|
|
39
|
-
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
40
|
-
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
41
|
-
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
42
|
-
p["noise_threshold"],
|
|
43
|
-
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
44
|
-
p["entropy_dimension"],
|
|
45
|
-
ctx["high_quality_reference_library"],
|
|
46
|
-
verbose=False
|
|
47
|
-
)
|
|
24
|
+
acc = get_acc_HRMS(
|
|
25
|
+
ctx["df_query"], ctx["df_reference"],
|
|
26
|
+
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
27
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
28
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
29
|
+
p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
|
|
30
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
31
|
+
p["entropy_dimension"],
|
|
32
|
+
ctx["high_quality_reference_library"],
|
|
33
|
+
verbose=False
|
|
34
|
+
)
|
|
48
35
|
print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
|
|
49
36
|
return 1.0 - acc
|
|
50
37
|
|
|
38
|
+
def objective_function_NRMS(X, ctx):
|
|
39
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
40
|
+
acc = get_acc_NRMS(
|
|
41
|
+
ctx["df_query"], ctx["df_reference"],
|
|
42
|
+
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
43
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
44
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
45
|
+
p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
|
|
46
|
+
ctx["high_quality_reference_library"],
|
|
47
|
+
verbose=False
|
|
48
|
+
)
|
|
49
|
+
print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
|
|
50
|
+
return 1.0 - acc
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def tune_params_DE(query_data=None, reference_data=None, similarity_measure='cosine', weights=None, spectrum_preprocessing_order='CNMWL', mz_min=0, mz_max=999999999, int_min=0, int_max=999999999, high_quality_reference_library=False, optimize_params=["window_size_centroiding","window_size_matching","noise_threshold","wf_mz","wf_int","LET_threshold","entropy_dimension"], param_bounds={"window_size_centroiding":(0.0,0.5),"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0.0,5.0),"entropy_dimension":(1.0,3.0)}, default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1}):
|
|
54
|
+
def tune_params_DE(query_data=None, reference_data=None, chromatography_platform='HRMS', similarity_measure='cosine', weights=None, spectrum_preprocessing_order='CNMWL', mz_min=0, mz_max=999999999, int_min=0, int_max=999999999, high_quality_reference_library=False, optimize_params=["window_size_centroiding","window_size_matching","noise_threshold","wf_mz","wf_int","LET_threshold","entropy_dimension"], param_bounds={"window_size_centroiding":(0.0,0.5),"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0.0,5.0),"entropy_dimension":(1.0,3.0)}, default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1}, maxiters=3, de_workers=1, de_updating='immediate', log_hook=None):
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
56
|
+
def _log(msg):
|
|
57
|
+
if log_hook:
|
|
58
|
+
try: log_hook(msg if msg.endswith("\n") else msg + "\n")
|
|
59
|
+
except: pass
|
|
60
|
+
|
|
61
|
+
def callback(xk, conv):
|
|
62
|
+
_log(f"iter callback: conv={conv:.4g}, x={xk}")
|
|
63
|
+
return False
|
|
64
64
|
|
|
65
65
|
if query_data is None:
|
|
66
66
|
print('\nError: No argument passed to the mandatory query_data. Please pass the path to the TXT file of the query data.')
|
|
@@ -111,22 +111,13 @@ def tune_params_DE(query_data=None, reference_data=None, similarity_measure='cos
|
|
|
111
111
|
|
|
112
112
|
bounds = [param_bounds[p] for p in optimize_params]
|
|
113
113
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
result = differential_evolution(
|
|
122
|
-
objective_function_HRMS,
|
|
123
|
-
bounds=bounds,
|
|
124
|
-
args=(ctx,),
|
|
125
|
-
maxiter=3,
|
|
126
|
-
tol=0.0,
|
|
127
|
-
workers=-1,
|
|
128
|
-
seed=1,
|
|
129
|
-
)
|
|
114
|
+
print('here!!!!!!!!!!!!!!!')
|
|
115
|
+
print(de_workers)
|
|
116
|
+
print('here!!!!!!!!!!!!!!!')
|
|
117
|
+
if chromatography_platform == 'HRMS':
|
|
118
|
+
result = differential_evolution(objective_function_HRMS, bounds=bounds, args=(ctx,), maxiter=maxiters, tol=0.0, workers=de_workers, seed=1)
|
|
119
|
+
else:
|
|
120
|
+
result = differential_evolution(objective_function_NRMS, bounds=bounds, args=(ctx,), maxiter=maxiters, tol=0.0, workers=de_workers, seed=1)
|
|
130
121
|
|
|
131
122
|
best_full_params = _vector_to_full_params(result.x, default_params, optimize_params)
|
|
132
123
|
best_acc = 100.0 - (result.fun * 100.0)
|
|
@@ -140,8 +131,7 @@ def tune_params_DE(query_data=None, reference_data=None, similarity_measure='cos
|
|
|
140
131
|
for k, v in best_full_params.items():
|
|
141
132
|
print(f" {k}: {v}")
|
|
142
133
|
print(f"\nBest accuracy: {best_acc:.3f}%")
|
|
143
|
-
|
|
144
|
-
|
|
134
|
+
_log(f"best = {result.x}, acc={100*(1-result.fun):.3f}%")
|
|
145
135
|
|
|
146
136
|
|
|
147
137
|
default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
|
|
@@ -577,7 +567,6 @@ def tune_params_on_NRMS_data_grid_shiny(query_data=None, reference_data=None, gr
|
|
|
577
567
|
|
|
578
568
|
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
579
569
|
|
|
580
|
-
#print('\n\n\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n\n')
|
|
581
570
|
n_top_matches_to_save = 1
|
|
582
571
|
|
|
583
572
|
all_similarity_scores = []
|
|
@@ -1,16 +1,15 @@
|
|
|
1
|
-
app.py,sha256=
|
|
2
|
-
app2.py,sha256=ZTmShHRlv27_HhSzCj8JOVAPv5LLsjtgEkxt1c-7r6I,2950
|
|
1
|
+
app.py,sha256=lc54MkUqNTpGDBaF-3sDkSKDBSem0lDzZXo875d4W0c,67545
|
|
3
2
|
pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
|
|
4
3
|
pycompound/plot_spectra.py,sha256=_yeHooNoJHYlTajaZ9hgUudisdWVlw1Zw1wJfV3tpqc,40632
|
|
5
4
|
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
6
5
|
pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
|
|
7
6
|
pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
|
|
8
|
-
pycompound/spec_lib_matching.py,sha256
|
|
7
|
+
pycompound/spec_lib_matching.py,sha256=Dqz8yU1W7aqqPLLiZefLVXU3V0ojEnXLbKoyZJFTbAA,73386
|
|
9
8
|
pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
|
|
10
9
|
pycompound/tuning_CLI_DE.py,sha256=PXy95LD_jmVeWdgiMlMwEZU_KqPGqDao1skwe5U4Sfc,9147
|
|
11
10
|
pycompound/tuning_CLI_grid.py,sha256=0XU-4ShZiZ2MQy5d0zydH0hphqXvqGtf4etl-ePNarU,8560
|
|
12
|
-
pycompound-0.1.
|
|
13
|
-
pycompound-0.1.
|
|
14
|
-
pycompound-0.1.
|
|
15
|
-
pycompound-0.1.
|
|
16
|
-
pycompound-0.1.
|
|
11
|
+
pycompound-0.1.6.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
12
|
+
pycompound-0.1.6.dist-info/METADATA,sha256=Rw4Z889RSMLStLvxDy8mVA-b_AKNzryMtSQHH_X0m2Q,1732
|
|
13
|
+
pycompound-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
pycompound-0.1.6.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
15
|
+
pycompound-0.1.6.dist-info/RECORD,,
|
app2.py
DELETED
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
# app.py
|
|
4
|
-
from shiny import App, ui, render, reactive
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
# Parameters to choose from + suggested default ranges
|
|
8
|
-
PARAMS = {
|
|
9
|
-
"window_size_centroiding": (0.0, 0.5),
|
|
10
|
-
"window_size_matching": (0.0, 0.5),
|
|
11
|
-
"noise_threshold": (0.0, 0.25),
|
|
12
|
-
"wf_mz": (0.0, 5.0),
|
|
13
|
-
"wf_int": (0.0, 5.0),
|
|
14
|
-
"LET_threshold": (0.0, 5.0),
|
|
15
|
-
"entropy_dimension": (1.0, 3.0),
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
app_ui = ui.page_fillable(
|
|
19
|
-
ui.layout_sidebar(
|
|
20
|
-
ui.sidebar(
|
|
21
|
-
ui.h3("Select parameters"),
|
|
22
|
-
ui.input_checkbox_group(
|
|
23
|
-
id="params",
|
|
24
|
-
label=None,
|
|
25
|
-
choices=list(PARAMS.keys()),
|
|
26
|
-
selected=["window_size_centroiding", "noise_threshold"],
|
|
27
|
-
),
|
|
28
|
-
ui.hr(),
|
|
29
|
-
ui.h4("Bounds for selected parameters"),
|
|
30
|
-
ui.output_ui("bounds_inputs"),
|
|
31
|
-
width=360,
|
|
32
|
-
),
|
|
33
|
-
)
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
def server(input, output, session):
|
|
37
|
-
@output
|
|
38
|
-
@render.ui
|
|
39
|
-
def bounds_inputs():
|
|
40
|
-
selected = input.params()
|
|
41
|
-
if not selected:
|
|
42
|
-
return ui.div(ui.em("Select one or more parameters above."))
|
|
43
|
-
|
|
44
|
-
blocks = []
|
|
45
|
-
for name in selected:
|
|
46
|
-
lo, hi = PARAMS.get(name, (0.0, 1.0))
|
|
47
|
-
blocks.append(
|
|
48
|
-
ui.card(
|
|
49
|
-
ui.card_header(name),
|
|
50
|
-
ui.layout_columns(
|
|
51
|
-
ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
|
|
52
|
-
ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
|
|
53
|
-
)
|
|
54
|
-
)
|
|
55
|
-
)
|
|
56
|
-
return ui.div(*blocks)
|
|
57
|
-
|
|
58
|
-
def _read_bounds_dict():
|
|
59
|
-
selected = input.params()
|
|
60
|
-
out = {}
|
|
61
|
-
for name in selected:
|
|
62
|
-
lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
|
|
63
|
-
lo_id = f"min_{name}"
|
|
64
|
-
hi_id = f"max_{name}"
|
|
65
|
-
|
|
66
|
-
# Use input[...]() and guard with "in input"
|
|
67
|
-
lo_val = input[lo_id]() if lo_id in input else lo_default
|
|
68
|
-
hi_val = input[hi_id]() if hi_id in input else hi_default
|
|
69
|
-
|
|
70
|
-
out[name] = (float(lo_val), float(hi_val))
|
|
71
|
-
return out
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
# Table of current bounds
|
|
76
|
-
@output
|
|
77
|
-
@render.data_frame
|
|
78
|
-
def bounds_table():
|
|
79
|
-
b = _read_bounds_dict()
|
|
80
|
-
if not b:
|
|
81
|
-
return pd.DataFrame(columns=["parameter", "lower", "upper"])
|
|
82
|
-
rows = [{"parameter": k, "lower": v[0], "upper": v[1]} for k, v in b.items()]
|
|
83
|
-
return pd.DataFrame(rows)
|
|
84
|
-
|
|
85
|
-
# JSON-ish view (string) you can parse/use elsewhere
|
|
86
|
-
@output
|
|
87
|
-
@render.text
|
|
88
|
-
def bounds_json():
|
|
89
|
-
b = _read_bounds_dict()
|
|
90
|
-
if not b:
|
|
91
|
-
return "{}"
|
|
92
|
-
# Pretty-print as Python dict literal for quick copy/paste
|
|
93
|
-
lines = ["{"]
|
|
94
|
-
for k, (lo, hi) in b.items():
|
|
95
|
-
lines.append(f" '{k}': ({lo}, {hi}),")
|
|
96
|
-
lines.append("}")
|
|
97
|
-
return "\n".join(lines)
|
|
98
|
-
|
|
99
|
-
app = App(app_ui, server)
|
|
100
|
-
|
|
101
|
-
|
|
File without changes
|
|
File without changes
|