pycompound 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycompound-0.1.3/src/pycompound.egg-info → pycompound-0.1.4}/PKG-INFO +1 -1
- {pycompound-0.1.3 → pycompound-0.1.4}/pyproject.toml +1 -1
- {pycompound-0.1.3 → pycompound-0.1.4}/src/app.py +31 -2
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/spec_lib_matching.py +14 -10
- pycompound-0.1.4/src/pycompound/tuning_CLI_DE.py +233 -0
- pycompound-0.1.3/src/pycompound/tuning_CLI.py → pycompound-0.1.4/src/pycompound/tuning_CLI_grid.py +4 -4
- {pycompound-0.1.3 → pycompound-0.1.4/src/pycompound.egg-info}/PKG-INFO +1 -1
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound.egg-info/SOURCES.txt +2 -1
- {pycompound-0.1.3 → pycompound-0.1.4}/tests/test_spec_lib_matching.py +2 -0
- pycompound-0.1.4/tests/test_tuning.py +52 -0
- pycompound-0.1.3/tests/test_tuning.py +0 -21
- {pycompound-0.1.3 → pycompound-0.1.4}/LICENSE +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/README.md +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/setup.cfg +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/build_library.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/plot_spectra.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/plot_spectra_CLI.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/processing.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/similarity_measures.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound/spec_lib_matching_CLI.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound.egg-info/dependency_links.txt +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound.egg-info/requires.txt +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/src/pycompound.egg-info/top_level.txt +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/tests/test_build_library.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/tests/test_plot_spectra.py +0 -0
- {pycompound-0.1.3 → pycompound-0.1.4}/tests/test_similarity_measures.py +0 -0
|
@@ -25,7 +25,6 @@ import ast
|
|
|
25
25
|
from numbers import Real
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
29
28
|
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
30
29
|
|
|
31
30
|
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
@@ -474,8 +473,15 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
474
473
|
|
|
475
474
|
|
|
476
475
|
|
|
476
|
+
'''
|
|
477
|
+
app_ui = ui.page_fluid(
|
|
478
|
+
ui.output_ui("main_ui"),
|
|
479
|
+
ui.output_text("status_output")
|
|
480
|
+
)
|
|
481
|
+
'''
|
|
477
482
|
|
|
478
483
|
app_ui = ui.page_fluid(
|
|
484
|
+
ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
|
|
479
485
|
ui.output_ui("main_ui"),
|
|
480
486
|
ui.output_text("status_output")
|
|
481
487
|
)
|
|
@@ -688,7 +694,6 @@ def server(input, output, session):
|
|
|
688
694
|
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
689
695
|
return img
|
|
690
696
|
|
|
691
|
-
|
|
692
697
|
@output
|
|
693
698
|
@render.ui
|
|
694
699
|
def main_ui():
|
|
@@ -697,6 +702,7 @@ def server(input, output, session):
|
|
|
697
702
|
ui.h2("Main Menu"),
|
|
698
703
|
ui.div(
|
|
699
704
|
ui.output_image("image"),
|
|
705
|
+
#ui.img(src="emblem.png", width="320px", height="250px"),
|
|
700
706
|
style=(
|
|
701
707
|
"position:fixed; top:0; left:50%; transform:translateX(-50%); "
|
|
702
708
|
"z-index:1000; text-align:center; padding:10px; background-color:white;"
|
|
@@ -1122,3 +1128,26 @@ def server(input, output, session):
|
|
|
1122
1128
|
app = App(app_ui, server)
|
|
1123
1129
|
|
|
1124
1130
|
|
|
1131
|
+
|
|
1132
|
+
'''
|
|
1133
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
1134
|
+
from starlette.requests import Request
|
|
1135
|
+
|
|
1136
|
+
class _InjectBaseURLMiddleware(BaseHTTPMiddleware):
|
|
1137
|
+
async def dispatch(self, request: Request, call_next):
|
|
1138
|
+
hdrs = dict(request.scope.get("headers", []))
|
|
1139
|
+
if b"rstudio-connect-app-base-url" not in hdrs:
|
|
1140
|
+
host = request.headers.get("x-forwarded-host") or request.headers.get("host") or ""
|
|
1141
|
+
proto = request.headers.get("x-forwarded-proto") or "https"
|
|
1142
|
+
root_path = (request.scope.get("root_path") or "").rstrip("/")
|
|
1143
|
+
base = f"{proto}://{host}{root_path}"
|
|
1144
|
+
new_headers = list(request.scope.get("headers", [])) + [
|
|
1145
|
+
(b"rstudio-connect-app-base-url", base.encode("utf-8"))
|
|
1146
|
+
]
|
|
1147
|
+
request.scope["headers"] = new_headers
|
|
1148
|
+
return await call_next(request)
|
|
1149
|
+
|
|
1150
|
+
app.starlette_app.add_middleware(_InjectBaseURLMiddleware)
|
|
1151
|
+
'''
|
|
1152
|
+
|
|
1153
|
+
|
|
@@ -37,6 +37,7 @@ def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_id
|
|
|
37
37
|
LET_threshold=LET_threshold_tmp,
|
|
38
38
|
entropy_dimension=entropy_dimension_tmp,
|
|
39
39
|
high_quality_reference_library=high_quality_reference_library_tmp,
|
|
40
|
+
verbose=True
|
|
40
41
|
)
|
|
41
42
|
|
|
42
43
|
return (
|
|
@@ -441,21 +442,25 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
|
|
|
441
442
|
|
|
442
443
|
|
|
443
444
|
|
|
444
|
-
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
|
|
445
|
+
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
445
446
|
|
|
446
447
|
n_top_matches_to_save = 1
|
|
447
448
|
|
|
448
449
|
all_similarity_scores = []
|
|
449
450
|
for query_idx in range(0,len(unique_query_ids)):
|
|
450
|
-
|
|
451
|
+
if verbose is True:
|
|
452
|
+
print(f'query spectrum #{query_idx} is being identified')
|
|
451
453
|
q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
|
|
452
454
|
q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
455
|
+
#q_spec_tmp = q_spec_tmp.astype(float)
|
|
453
456
|
|
|
454
457
|
similarity_scores = []
|
|
455
458
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
456
459
|
q_spec = q_spec_tmp
|
|
457
460
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
458
461
|
r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
462
|
+
#print(r_spec)
|
|
463
|
+
#r_spec = r_spec.astype(float)
|
|
459
464
|
|
|
460
465
|
is_matched = False
|
|
461
466
|
for transformation in spectrum_preprocessing_order:
|
|
@@ -529,7 +534,7 @@ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
529
534
|
|
|
530
535
|
|
|
531
536
|
|
|
532
|
-
def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
|
|
537
|
+
def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
533
538
|
|
|
534
539
|
n_top_matches_to_save = 1
|
|
535
540
|
|
|
@@ -546,7 +551,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
546
551
|
similarity_scores = []
|
|
547
552
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
548
553
|
q_spec = q_spec_tmp
|
|
549
|
-
if ref_idx % 1000 == 0:
|
|
554
|
+
if verbose is True and ref_idx % 1000 == 0:
|
|
550
555
|
print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
551
556
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
552
557
|
r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
@@ -615,7 +620,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
615
620
|
|
|
616
621
|
|
|
617
622
|
|
|
618
|
-
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
|
|
623
|
+
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False, verbose=True):
|
|
619
624
|
'''
|
|
620
625
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data
|
|
621
626
|
|
|
@@ -762,14 +767,13 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
762
767
|
|
|
763
768
|
all_similarity_scores = []
|
|
764
769
|
for query_idx in range(0,len(unique_query_ids)):
|
|
765
|
-
|
|
770
|
+
if verbose is True:
|
|
771
|
+
print(f'query spectrum #{query_idx} is being identified')
|
|
766
772
|
q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
|
|
767
773
|
q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
768
774
|
|
|
769
775
|
similarity_scores = []
|
|
770
776
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
771
|
-
#if ref_idx % 100 == 0:
|
|
772
|
-
# print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
773
777
|
q_spec = q_spec_tmp
|
|
774
778
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
775
779
|
r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
@@ -1008,9 +1012,9 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
1008
1012
|
|
|
1009
1013
|
similarity_scores = []
|
|
1010
1014
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
1011
|
-
|
|
1012
|
-
if ref_idx % 1000 == 0:
|
|
1015
|
+
if verbose is True and ref_idx % 1000 == 0:
|
|
1013
1016
|
print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
1017
|
+
q_spec = q_spec_tmp
|
|
1014
1018
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
1015
1019
|
r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
1016
1020
|
r_spec = convert_spec(r_spec_tmp,mzs)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
|
|
2
|
+
#!/usr/bin/env python3
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Tuple
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy.optimize import differential_evolution
|
|
11
|
+
from pycompound.spec_lib_matching import get_acc_HRMS, get_acc_NRMS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
ALL_PARAMS = [
|
|
15
|
+
"window_size_centroiding",
|
|
16
|
+
"window_size_matching",
|
|
17
|
+
"noise_threshold",
|
|
18
|
+
"wf_mz",
|
|
19
|
+
"wf_int",
|
|
20
|
+
"LET_threshold",
|
|
21
|
+
"entropy_dimension"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
SUGGESTED_BOUNDS = {
|
|
25
|
+
"window_size_centroiding": (0.0, 0.5),
|
|
26
|
+
"window_size_matching": (0.0, 0.5),
|
|
27
|
+
"noise_threshold": (0.0, 0.25),
|
|
28
|
+
"wf_mz": (0.0, 5.0),
|
|
29
|
+
"wf_int": (0.0, 5.0),
|
|
30
|
+
"LET_threshold": (0.0, 5.0),
|
|
31
|
+
"entropy_dimension": (1.0, 3.0)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DEFAULT_PARAMS = {
|
|
35
|
+
"window_size_centroiding": 0.5,
|
|
36
|
+
"window_size_matching": 0.5,
|
|
37
|
+
"noise_threshold": 0.10,
|
|
38
|
+
"wf_mz": 0.0,
|
|
39
|
+
"wf_int": 1.0,
|
|
40
|
+
"LET_threshold": 0.0,
|
|
41
|
+
"entropy_dimension": 1.1
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ---------- Utilities ----------
|
|
46
|
+
def parse_bound(s: str) -> Tuple[str, Tuple[float, float]]:
|
|
47
|
+
# "name=min:max" → (name, (min, max))
|
|
48
|
+
if "=" not in s or ":" not in s:
|
|
49
|
+
raise argparse.ArgumentTypeError(f"Bad --bound format '{s}'. Use name=min:max")
|
|
50
|
+
name, rng = s.split("=", 1)
|
|
51
|
+
lo, hi = rng.split(":", 1)
|
|
52
|
+
try:
|
|
53
|
+
lo_f, hi_f = float(lo), float(hi)
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
raise argparse.ArgumentTypeError(f"Non-numeric bound in '{s}': {e}")
|
|
56
|
+
if lo_f > hi_f:
|
|
57
|
+
raise argparse.ArgumentTypeError(f"Lower bound > upper bound in '{s}'")
|
|
58
|
+
return name.strip(), (lo_f, hi_f)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_default(s: str) -> Tuple[str, float]:
|
|
62
|
+
# "name=value" → (name, value)
|
|
63
|
+
if "=" not in s:
|
|
64
|
+
raise argparse.ArgumentTypeError(f"Bad --default format '{s}'. Use name=value")
|
|
65
|
+
name, val = s.split("=", 1)
|
|
66
|
+
try:
|
|
67
|
+
v = float(val)
|
|
68
|
+
except ValueError as e:
|
|
69
|
+
raise argparse.ArgumentTypeError(f"Non-numeric default in '{s}': {e}")
|
|
70
|
+
return name.strip(), v
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _vector_to_full_params(X: np.ndarray, default_params: Dict[str, float], optimize_params: List[str]) -> Dict[str, float]:
|
|
74
|
+
params = dict(default_params)
|
|
75
|
+
for name, val in zip(optimize_params, X):
|
|
76
|
+
params[name] = float(val)
|
|
77
|
+
return params
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ---------- Objective wrappers (top-level, pickle-friendly) ----------
|
|
81
|
+
def objective_HRMS(X: np.ndarray, ctx: dict) -> float:
|
|
82
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
83
|
+
acc = get_acc_HRMS(
|
|
84
|
+
ctx["df_query"], ctx["df_reference"],
|
|
85
|
+
ctx["uq"], ctx["ur"],
|
|
86
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
87
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
88
|
+
p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
|
|
89
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
90
|
+
p["entropy_dimension"],
|
|
91
|
+
ctx["high_quality_reference_library"],
|
|
92
|
+
verbose=False
|
|
93
|
+
)
|
|
94
|
+
print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
|
|
95
|
+
return 1.0 - acc
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def objective_NRMS(X: np.ndarray, ctx: dict) -> float:
|
|
99
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
100
|
+
acc = get_acc_NRMS(
|
|
101
|
+
ctx["df_query"], ctx["df_reference"],
|
|
102
|
+
ctx["uq"], ctx["ur"],
|
|
103
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
104
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
105
|
+
p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
|
|
106
|
+
ctx["high_quality_reference_library"],
|
|
107
|
+
verbose=False
|
|
108
|
+
)
|
|
109
|
+
print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
|
|
110
|
+
return 1.0 - acc
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ---------- Main CLI ----------
|
|
114
|
+
def main():
|
|
115
|
+
p = argparse.ArgumentParser(
|
|
116
|
+
description="Parameter tuning via Differential Evolution for HRMS/NRMS using pycompound."
|
|
117
|
+
)
|
|
118
|
+
p.add_argument("--chromatography_platform", choices=["HRMS", "NRMS"], default="HRMS", help="Chromatography Platform.")
|
|
119
|
+
p.add_argument("--query_data", required=True, help="Path to query CSV (must contain 'id' column).")
|
|
120
|
+
p.add_argument("--reference_data", required=True, nargs="+", help="Path(s) to reference CSV(s) (must contain 'id').")
|
|
121
|
+
p.add_argument("--similarity_measure", default="cosine", choices=["cosine", "renyi", "tsallis"], help="Similarity measure.")
|
|
122
|
+
p.add_argument("--weights", default="", help="Weights spec; empty means None.")
|
|
123
|
+
p.add_argument("--spectrum-order", default="CNMWL", help="Spectrum preprocessing order string.")
|
|
124
|
+
p.add_argument("--mz-min", type=float, default=0.0)
|
|
125
|
+
p.add_argument("--mz-max", type=float, default=999_999_999.0)
|
|
126
|
+
p.add_argument("--int-min", type=float, default=0.0)
|
|
127
|
+
p.add_argument("--int-max", type=float, default=999_999_999.0)
|
|
128
|
+
p.add_argument("--hq-ref-lib", action="store_true", help="Use high-quality reference library flag.")
|
|
129
|
+
p.add_argument("--opt", nargs="+", default=["window_size_centroiding", "noise_threshold", "wf_mz", "wf_int"],
|
|
130
|
+
help=f"Parameters to optimize (subset of {ALL_PARAMS}).")
|
|
131
|
+
p.add_argument("--bound", action="append", default=[], type=parse_bound,
|
|
132
|
+
help="Bound spec 'name=min:max'. Repeatable.")
|
|
133
|
+
p.add_argument("--default", dest="defaults", action="append", default=[], type=parse_default,
|
|
134
|
+
help="Override a default 'name=value' for non-optimized params or initial values.")
|
|
135
|
+
p.add_argument("--maxiter", type=int, default=15)
|
|
136
|
+
p.add_argument("--seed", type=int, default=1)
|
|
137
|
+
p.add_argument("--workers", type=int, default=-1, help="Use -1 for all cores; 1 to disable parallelism.")
|
|
138
|
+
args = p.parse_args()
|
|
139
|
+
|
|
140
|
+
unknown = [x for x in args.opt if x not in ALL_PARAMS]
|
|
141
|
+
if unknown:
|
|
142
|
+
sys.exit(f"Error: unknown --opt params: {unknown}")
|
|
143
|
+
|
|
144
|
+
qpath = Path(args.query_data)
|
|
145
|
+
if not qpath.exists():
|
|
146
|
+
sys.exit(f"Query CSV not found: {qpath}")
|
|
147
|
+
|
|
148
|
+
df_query = pd.read_csv(qpath)
|
|
149
|
+
if "id" not in df_query.columns:
|
|
150
|
+
sys.exit("Query CSV must contain an 'id' column.")
|
|
151
|
+
|
|
152
|
+
ref_paths = [Path(pth) for pth in args.reference_data]
|
|
153
|
+
for r in ref_paths:
|
|
154
|
+
if not r.exists():
|
|
155
|
+
sys.exit(f"Reference CSV not found: {r}")
|
|
156
|
+
df_reference = pd.concat([pd.read_csv(r) for r in ref_paths], axis=0, ignore_index=True)
|
|
157
|
+
if "id" not in df_reference.columns:
|
|
158
|
+
sys.exit("Reference CSV must contain an 'id' column.")
|
|
159
|
+
|
|
160
|
+
uq = df_query["id"].unique().tolist()
|
|
161
|
+
ur = df_reference["id"].unique().tolist()
|
|
162
|
+
|
|
163
|
+
default_params = dict(DEFAULT_PARAMS)
|
|
164
|
+
for name, val in args.defaults:
|
|
165
|
+
if name not in DEFAULT_PARAMS:
|
|
166
|
+
sys.exit(f"--default refers to unknown parameter '{name}'. Allowed: {list(DEFAULT_PARAMS)}")
|
|
167
|
+
default_params[name] = val
|
|
168
|
+
|
|
169
|
+
param_bounds: Dict[str, Tuple[float, float]] = dict(SUGGESTED_BOUNDS)
|
|
170
|
+
for name, (lo, hi) in args.bound:
|
|
171
|
+
if name not in SUGGESTED_BOUNDS:
|
|
172
|
+
sys.exit(f"--bound refers to unknown parameter '{name}'. Allowed: {list(SUGGESTED_BOUNDS)}")
|
|
173
|
+
param_bounds[name] = (lo, hi)
|
|
174
|
+
|
|
175
|
+
bounds = [param_bounds[p] for p in args.opt]
|
|
176
|
+
|
|
177
|
+
ctx = dict(
|
|
178
|
+
df_query=df_query,
|
|
179
|
+
df_reference=df_reference,
|
|
180
|
+
uq=uq,
|
|
181
|
+
ur=ur,
|
|
182
|
+
similarity_measure=args.similarity_measure,
|
|
183
|
+
weights=(None if args.weights.strip() == "" else args.weights),
|
|
184
|
+
spectrum_preprocessing_order=args.spectrum_order,
|
|
185
|
+
mz_min=float(args.mz_min),
|
|
186
|
+
mz_max=float(args.mz_max),
|
|
187
|
+
int_min=float(args.int_min),
|
|
188
|
+
int_max=float(args.int_max),
|
|
189
|
+
high_quality_reference_library=bool(args.hq_ref_lib),
|
|
190
|
+
default_params=default_params,
|
|
191
|
+
optimize_params=args.opt,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
history_acc: List[float] = []
|
|
195
|
+
|
|
196
|
+
def _cb(xk, convergence):
|
|
197
|
+
if args.chromatography_platform == "HRMS":
|
|
198
|
+
acc_pct = (1.0 - objective_HRMS(xk, ctx)) * 100.0
|
|
199
|
+
else:
|
|
200
|
+
acc_pct = (1.0 - objective_NRMS(xk, ctx)) * 100.0
|
|
201
|
+
history_acc.append(acc_pct)
|
|
202
|
+
|
|
203
|
+
objective = objective_HRMS if args.chromatography_platform == "HRMS" else objective_NRMS
|
|
204
|
+
|
|
205
|
+
result = differential_evolution(
|
|
206
|
+
objective,
|
|
207
|
+
bounds=bounds,
|
|
208
|
+
args=(ctx,),
|
|
209
|
+
maxiter=int(args.maxiter),
|
|
210
|
+
tol=0.0,
|
|
211
|
+
seed=int(args.seed),
|
|
212
|
+
workers=int(args.workers),
|
|
213
|
+
callback=_cb,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
best_params = _vector_to_full_params(result.x, default_params, args.opt)
|
|
217
|
+
best_acc_pct = (1.0 - result.fun) * 100.0
|
|
218
|
+
|
|
219
|
+
print("\n=== Differential Evolution Result ===")
|
|
220
|
+
print(f"Mode: {args.chromatography_platform}")
|
|
221
|
+
print(f"Optimized over: {args.opt}")
|
|
222
|
+
print("Best values (selected params):")
|
|
223
|
+
for name in args.opt:
|
|
224
|
+
print(f" {name}: {best_params[name]}")
|
|
225
|
+
print("\nFull parameter set used in final evaluation:")
|
|
226
|
+
for k in ALL_PARAMS:
|
|
227
|
+
print(f" {k}: {best_params[k]}")
|
|
228
|
+
print(f"\nBest accuracy: {best_acc_pct:.3f}%")
|
|
229
|
+
print(f"DE raw: success={result.success}, nfev={result.nfev}, nit={result.nit}, message='{result.message}'")
|
|
230
|
+
|
|
231
|
+
if __name__ == "__main__":
|
|
232
|
+
main()
|
|
233
|
+
|
pycompound-0.1.3/src/pycompound/tuning_CLI.py → pycompound-0.1.4/src/pycompound/tuning_CLI_grid.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
from pycompound.spec_lib_matching import
|
|
3
|
-
from pycompound.spec_lib_matching import
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
4
4
|
import argparse
|
|
5
5
|
import json
|
|
6
6
|
from pathlib import Path
|
|
@@ -61,9 +61,9 @@ grid['entropy_dimension'] = [float(x) for x in grid['entropy_dimension']]
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
if args.chromatography_platform == 'HRMS':
|
|
64
|
-
|
|
64
|
+
tune_params_on_HRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
|
|
65
65
|
|
|
66
66
|
if args.chromatography_platform == 'NRMS':
|
|
67
|
-
|
|
67
|
+
tune_params_on_NRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
|
|
68
68
|
|
|
69
69
|
|
|
@@ -9,7 +9,8 @@ src/pycompound/processing.py
|
|
|
9
9
|
src/pycompound/similarity_measures.py
|
|
10
10
|
src/pycompound/spec_lib_matching.py
|
|
11
11
|
src/pycompound/spec_lib_matching_CLI.py
|
|
12
|
-
src/pycompound/
|
|
12
|
+
src/pycompound/tuning_CLI_DE.py
|
|
13
|
+
src/pycompound/tuning_CLI_grid.py
|
|
13
14
|
src/pycompound.egg-info/PKG-INFO
|
|
14
15
|
src/pycompound.egg-info/SOURCES.txt
|
|
15
16
|
src/pycompound.egg-info/dependency_links.txt
|
|
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
import os
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
'''
|
|
8
9
|
print('\n\ntest #1:')
|
|
9
10
|
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='hello')
|
|
10
11
|
|
|
@@ -34,6 +35,7 @@ run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_lib
|
|
|
34
35
|
|
|
35
36
|
print('\n\ntest #10:')
|
|
36
37
|
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', normalization_method='tanh')
|
|
38
|
+
'''
|
|
37
39
|
|
|
38
40
|
print('\n\ntest #11:')
|
|
39
41
|
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='tsallis', wf_mz=2, wf_intensity=0.5, entropy_dimension=2, n_top_matches_to_save=3, print_id_results=True)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
4
|
+
from pycompound.spec_lib_matching import tune_params_DE
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
print('\n\ntest #1:')
|
|
10
|
+
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
|
|
11
|
+
reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
|
|
12
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
|
|
13
|
+
|
|
14
|
+
print('\n\ntest #2:')
|
|
15
|
+
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
|
|
16
|
+
reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
|
|
17
|
+
grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
|
|
18
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
|
|
19
|
+
|
|
20
|
+
print('\n\ntest #3:')
|
|
21
|
+
tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
|
|
22
|
+
reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
|
|
23
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
|
|
24
|
+
|
|
25
|
+
print('\n\ntest #4:')
|
|
26
|
+
tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
|
|
27
|
+
reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
|
|
28
|
+
grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
|
|
29
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
|
|
30
|
+
|
|
31
|
+
print('\n\ntest #5:')
|
|
32
|
+
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
|
|
33
|
+
reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
|
|
34
|
+
grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]},
|
|
35
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
|
|
36
|
+
|
|
37
|
+
print('\n\ntest #6:')
|
|
38
|
+
tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_query_data.csv',
|
|
39
|
+
reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_reference_data.csv',
|
|
40
|
+
similarity_measure='shannon',
|
|
41
|
+
optimize_params=["window_size_matching","noise_threshold","wf_mz","wf_int"],
|
|
42
|
+
param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
|
|
43
|
+
default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
|
|
44
|
+
|
|
45
|
+
print('\n\ntest #7:')
|
|
46
|
+
tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_query_data.csv',
|
|
47
|
+
reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_reference_data.csv',
|
|
48
|
+
similarity_measure='renyi',
|
|
49
|
+
optimize_params=["wf_mz","wf_int","LET_threshold","entropy_dimension"],
|
|
50
|
+
param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0,5),"entropy_dimension":(1.01,3)},
|
|
51
|
+
default_params={"noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
|
|
52
|
+
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
3
|
-
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
import os
|
|
6
|
-
|
|
7
|
-
print('\n\ntest #1:')
|
|
8
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
|
|
9
|
-
|
|
10
|
-
print('\n\ntest #2:')
|
|
11
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
|
|
12
|
-
|
|
13
|
-
print('\n\ntest #3:')
|
|
14
|
-
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
|
|
15
|
-
|
|
16
|
-
print('\n\ntest #4:')
|
|
17
|
-
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
|
|
18
|
-
|
|
19
|
-
print('\n\ntest #5:')
|
|
20
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]}, output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
|
|
21
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|