pycompound 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycompound-0.1.2/src/pycompound.egg-info → pycompound-0.1.4}/PKG-INFO +1 -1
- {pycompound-0.1.2 → pycompound-0.1.4}/pyproject.toml +1 -1
- {pycompound-0.1.2 → pycompound-0.1.4}/src/app.py +40 -4
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/plot_spectra.py +15 -10
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/spec_lib_matching.py +14 -10
- pycompound-0.1.4/src/pycompound/tuning_CLI_DE.py +233 -0
- pycompound-0.1.2/src/pycompound/tuning_CLI.py → pycompound-0.1.4/src/pycompound/tuning_CLI_grid.py +4 -4
- {pycompound-0.1.2 → pycompound-0.1.4/src/pycompound.egg-info}/PKG-INFO +1 -1
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound.egg-info/SOURCES.txt +2 -1
- {pycompound-0.1.2 → pycompound-0.1.4}/tests/test_spec_lib_matching.py +2 -0
- pycompound-0.1.4/tests/test_tuning.py +52 -0
- pycompound-0.1.2/tests/test_tuning.py +0 -21
- {pycompound-0.1.2 → pycompound-0.1.4}/LICENSE +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/README.md +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/setup.cfg +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/build_library.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/plot_spectra_CLI.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/processing.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/similarity_measures.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/spec_lib_matching_CLI.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound.egg-info/dependency_links.txt +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound.egg-info/requires.txt +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound.egg-info/top_level.txt +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/tests/test_build_library.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/tests/test_plot_spectra.py +0 -0
- {pycompound-0.1.2 → pycompound-0.1.4}/tests/test_similarity_measures.py +0 -0
|
@@ -25,7 +25,6 @@ import ast
|
|
|
25
25
|
from numbers import Real
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
29
28
|
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
30
29
|
|
|
31
30
|
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
@@ -474,8 +473,15 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
474
473
|
|
|
475
474
|
|
|
476
475
|
|
|
476
|
+
'''
|
|
477
|
+
app_ui = ui.page_fluid(
|
|
478
|
+
ui.output_ui("main_ui"),
|
|
479
|
+
ui.output_text("status_output")
|
|
480
|
+
)
|
|
481
|
+
'''
|
|
477
482
|
|
|
478
483
|
app_ui = ui.page_fluid(
|
|
484
|
+
ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
|
|
479
485
|
ui.output_ui("main_ui"),
|
|
480
486
|
ui.output_text("status_output")
|
|
481
487
|
)
|
|
@@ -688,7 +694,6 @@ def server(input, output, session):
|
|
|
688
694
|
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
689
695
|
return img
|
|
690
696
|
|
|
691
|
-
|
|
692
697
|
@output
|
|
693
698
|
@render.ui
|
|
694
699
|
def main_ui():
|
|
@@ -697,6 +702,7 @@ def server(input, output, session):
|
|
|
697
702
|
ui.h2("Main Menu"),
|
|
698
703
|
ui.div(
|
|
699
704
|
ui.output_image("image"),
|
|
705
|
+
#ui.img(src="emblem.png", width="320px", height="250px"),
|
|
700
706
|
style=(
|
|
701
707
|
"position:fixed; top:0; left:50%; transform:translateX(-50%); "
|
|
702
708
|
"z-index:1000; text-align:center; padding:10px; background-color:white;"
|
|
@@ -884,11 +890,18 @@ def server(input, output, session):
|
|
|
884
890
|
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
885
891
|
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
886
892
|
|
|
893
|
+
high_quality_reference_library_tmp2 = False
|
|
894
|
+
if input.high_quality_reference_library() != 'False':
|
|
895
|
+
high_quality_reference_library_tmp2 = True
|
|
896
|
+
|
|
897
|
+
print(input.high_quality_reference_library())
|
|
898
|
+
print(high_quality_reference_library_tmp2)
|
|
899
|
+
|
|
887
900
|
if input.chromatography_platform() == "HRMS":
|
|
888
|
-
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=
|
|
901
|
+
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
889
902
|
plt.show()
|
|
890
903
|
elif input.chromatography_platform() == "NRMS":
|
|
891
|
-
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=
|
|
904
|
+
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
892
905
|
plt.show()
|
|
893
906
|
with io.BytesIO() as buf:
|
|
894
907
|
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
@@ -1115,3 +1128,26 @@ def server(input, output, session):
|
|
|
1115
1128
|
app = App(app_ui, server)
|
|
1116
1129
|
|
|
1117
1130
|
|
|
1131
|
+
|
|
1132
|
+
'''
|
|
1133
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
1134
|
+
from starlette.requests import Request
|
|
1135
|
+
|
|
1136
|
+
class _InjectBaseURLMiddleware(BaseHTTPMiddleware):
|
|
1137
|
+
async def dispatch(self, request: Request, call_next):
|
|
1138
|
+
hdrs = dict(request.scope.get("headers", []))
|
|
1139
|
+
if b"rstudio-connect-app-base-url" not in hdrs:
|
|
1140
|
+
host = request.headers.get("x-forwarded-host") or request.headers.get("host") or ""
|
|
1141
|
+
proto = request.headers.get("x-forwarded-proto") or "https"
|
|
1142
|
+
root_path = (request.scope.get("root_path") or "").rstrip("/")
|
|
1143
|
+
base = f"{proto}://{host}{root_path}"
|
|
1144
|
+
new_headers = list(request.scope.get("headers", [])) + [
|
|
1145
|
+
(b"rstudio-connect-app-base-url", base.encode("utf-8"))
|
|
1146
|
+
]
|
|
1147
|
+
request.scope["headers"] = new_headers
|
|
1148
|
+
return await call_next(request)
|
|
1149
|
+
|
|
1150
|
+
app.starlette_app.add_middleware(_InjectBaseURLMiddleware)
|
|
1151
|
+
'''
|
|
1152
|
+
|
|
1153
|
+
|
|
@@ -92,8 +92,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
92
92
|
print(f'Error: spectrum_preprocessing_order must contain only \'C\', \'F\', \'M\', \'N\', \'L\', \'W\'.')
|
|
93
93
|
sys.exit()
|
|
94
94
|
|
|
95
|
-
if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','
|
|
96
|
-
print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski,
|
|
95
|
+
if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
|
|
96
|
+
print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
|
|
97
97
|
sys.exit()
|
|
98
98
|
|
|
99
99
|
if isinstance(int_min,int) is True:
|
|
@@ -243,10 +243,12 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
243
243
|
r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method=normalization_method)
|
|
244
244
|
if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
245
245
|
q_spec = remove_noise(q_spec, nr = noise_threshold)
|
|
246
|
-
|
|
246
|
+
if high_quality_reference_library == False or high_quality_reference_library == 'False':
|
|
247
|
+
r_spec = remove_noise(r_spec, nr = noise_threshold)
|
|
247
248
|
if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
248
249
|
q_spec = filter_spec_lcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
|
|
249
|
-
|
|
250
|
+
if high_quality_reference_library == False or high_quality_reference_library == 'False':
|
|
251
|
+
r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
|
|
250
252
|
|
|
251
253
|
q_ints = q_spec[:,1]
|
|
252
254
|
r_ints = r_spec[:,1]
|
|
@@ -291,12 +293,15 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
291
293
|
plt.yticks([])
|
|
292
294
|
|
|
293
295
|
|
|
296
|
+
print('\n\n\n')
|
|
297
|
+
print(high_quality_reference_library)
|
|
298
|
+
print('\n\n\n')
|
|
294
299
|
plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
|
|
295
300
|
plt.figlegend(loc = 'upper center')
|
|
296
301
|
fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
|
|
297
302
|
fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
|
|
298
303
|
fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
|
|
299
|
-
fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
|
|
304
|
+
fig.text(0.05, 0.09, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
|
|
300
305
|
fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
|
|
301
306
|
fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
|
|
302
307
|
fig.text(0.45, 0.18, f'Raw-Scale M/Z Range: [{mz_min_tmp},{mz_max_tmp}]', fontsize=7)
|
|
@@ -387,8 +392,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
387
392
|
print(f'Error: spectrum_preprocessing_order must contain only \'F\', \'N\', \'W\', \'L\'.')
|
|
388
393
|
sys.exit()
|
|
389
394
|
|
|
390
|
-
if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','
|
|
391
|
-
print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski,
|
|
395
|
+
if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
|
|
396
|
+
print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
|
|
392
397
|
sys.exit()
|
|
393
398
|
|
|
394
399
|
if isinstance(int_min,int) is True:
|
|
@@ -528,11 +533,11 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
528
533
|
r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method)
|
|
529
534
|
if transformation == 'N':
|
|
530
535
|
q_spec = remove_noise(q_spec, nr = noise_threshold)
|
|
531
|
-
if high_quality_reference_library == False:
|
|
536
|
+
if high_quality_reference_library == False or high_quality_reference_library == 'False':
|
|
532
537
|
r_spec = remove_noise(r_spec, nr = noise_threshold)
|
|
533
538
|
if transformation == 'F':
|
|
534
539
|
q_spec = filter_spec_gcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
|
|
535
|
-
if high_quality_reference_library == False:
|
|
540
|
+
if high_quality_reference_library == False or high_quality_reference_library == 'False':
|
|
536
541
|
r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
|
|
537
542
|
|
|
538
543
|
if q_spec.shape[0] > 1:
|
|
@@ -580,7 +585,7 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
580
585
|
fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
|
|
581
586
|
fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
|
|
582
587
|
fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
|
|
583
|
-
fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
|
|
588
|
+
fig.text(0.05, 0.06, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
|
|
584
589
|
fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
|
|
585
590
|
fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
|
|
586
591
|
fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
|
|
@@ -37,6 +37,7 @@ def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_id
|
|
|
37
37
|
LET_threshold=LET_threshold_tmp,
|
|
38
38
|
entropy_dimension=entropy_dimension_tmp,
|
|
39
39
|
high_quality_reference_library=high_quality_reference_library_tmp,
|
|
40
|
+
verbose=True
|
|
40
41
|
)
|
|
41
42
|
|
|
42
43
|
return (
|
|
@@ -441,21 +442,25 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
|
|
|
441
442
|
|
|
442
443
|
|
|
443
444
|
|
|
444
|
-
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
|
|
445
|
+
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
445
446
|
|
|
446
447
|
n_top_matches_to_save = 1
|
|
447
448
|
|
|
448
449
|
all_similarity_scores = []
|
|
449
450
|
for query_idx in range(0,len(unique_query_ids)):
|
|
450
|
-
|
|
451
|
+
if verbose is True:
|
|
452
|
+
print(f'query spectrum #{query_idx} is being identified')
|
|
451
453
|
q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
|
|
452
454
|
q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
455
|
+
#q_spec_tmp = q_spec_tmp.astype(float)
|
|
453
456
|
|
|
454
457
|
similarity_scores = []
|
|
455
458
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
456
459
|
q_spec = q_spec_tmp
|
|
457
460
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
458
461
|
r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
462
|
+
#print(r_spec)
|
|
463
|
+
#r_spec = r_spec.astype(float)
|
|
459
464
|
|
|
460
465
|
is_matched = False
|
|
461
466
|
for transformation in spectrum_preprocessing_order:
|
|
@@ -529,7 +534,7 @@ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
529
534
|
|
|
530
535
|
|
|
531
536
|
|
|
532
|
-
def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
|
|
537
|
+
def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
533
538
|
|
|
534
539
|
n_top_matches_to_save = 1
|
|
535
540
|
|
|
@@ -546,7 +551,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
546
551
|
similarity_scores = []
|
|
547
552
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
548
553
|
q_spec = q_spec_tmp
|
|
549
|
-
if ref_idx % 1000 == 0:
|
|
554
|
+
if verbose is True and ref_idx % 1000 == 0:
|
|
550
555
|
print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
551
556
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
552
557
|
r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
@@ -615,7 +620,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
615
620
|
|
|
616
621
|
|
|
617
622
|
|
|
618
|
-
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
|
|
623
|
+
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False, verbose=True):
|
|
619
624
|
'''
|
|
620
625
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data
|
|
621
626
|
|
|
@@ -762,14 +767,13 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
762
767
|
|
|
763
768
|
all_similarity_scores = []
|
|
764
769
|
for query_idx in range(0,len(unique_query_ids)):
|
|
765
|
-
|
|
770
|
+
if verbose is True:
|
|
771
|
+
print(f'query spectrum #{query_idx} is being identified')
|
|
766
772
|
q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
|
|
767
773
|
q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
768
774
|
|
|
769
775
|
similarity_scores = []
|
|
770
776
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
771
|
-
#if ref_idx % 100 == 0:
|
|
772
|
-
# print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
773
777
|
q_spec = q_spec_tmp
|
|
774
778
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
775
779
|
r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
@@ -1008,9 +1012,9 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
1008
1012
|
|
|
1009
1013
|
similarity_scores = []
|
|
1010
1014
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
1011
|
-
|
|
1012
|
-
if ref_idx % 1000 == 0:
|
|
1015
|
+
if verbose is True and ref_idx % 1000 == 0:
|
|
1013
1016
|
print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
1017
|
+
q_spec = q_spec_tmp
|
|
1014
1018
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
1015
1019
|
r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
1016
1020
|
r_spec = convert_spec(r_spec_tmp,mzs)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
|
|
2
|
+
#!/usr/bin/env python3
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Tuple
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy.optimize import differential_evolution
|
|
11
|
+
from pycompound.spec_lib_matching import get_acc_HRMS, get_acc_NRMS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
ALL_PARAMS = [
|
|
15
|
+
"window_size_centroiding",
|
|
16
|
+
"window_size_matching",
|
|
17
|
+
"noise_threshold",
|
|
18
|
+
"wf_mz",
|
|
19
|
+
"wf_int",
|
|
20
|
+
"LET_threshold",
|
|
21
|
+
"entropy_dimension"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
SUGGESTED_BOUNDS = {
|
|
25
|
+
"window_size_centroiding": (0.0, 0.5),
|
|
26
|
+
"window_size_matching": (0.0, 0.5),
|
|
27
|
+
"noise_threshold": (0.0, 0.25),
|
|
28
|
+
"wf_mz": (0.0, 5.0),
|
|
29
|
+
"wf_int": (0.0, 5.0),
|
|
30
|
+
"LET_threshold": (0.0, 5.0),
|
|
31
|
+
"entropy_dimension": (1.0, 3.0)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DEFAULT_PARAMS = {
|
|
35
|
+
"window_size_centroiding": 0.5,
|
|
36
|
+
"window_size_matching": 0.5,
|
|
37
|
+
"noise_threshold": 0.10,
|
|
38
|
+
"wf_mz": 0.0,
|
|
39
|
+
"wf_int": 1.0,
|
|
40
|
+
"LET_threshold": 0.0,
|
|
41
|
+
"entropy_dimension": 1.1
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ---------- Utilities ----------
|
|
46
|
+
def parse_bound(s: str) -> Tuple[str, Tuple[float, float]]:
|
|
47
|
+
# "name=min:max" → (name, (min, max))
|
|
48
|
+
if "=" not in s or ":" not in s:
|
|
49
|
+
raise argparse.ArgumentTypeError(f"Bad --bound format '{s}'. Use name=min:max")
|
|
50
|
+
name, rng = s.split("=", 1)
|
|
51
|
+
lo, hi = rng.split(":", 1)
|
|
52
|
+
try:
|
|
53
|
+
lo_f, hi_f = float(lo), float(hi)
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
raise argparse.ArgumentTypeError(f"Non-numeric bound in '{s}': {e}")
|
|
56
|
+
if lo_f > hi_f:
|
|
57
|
+
raise argparse.ArgumentTypeError(f"Lower bound > upper bound in '{s}'")
|
|
58
|
+
return name.strip(), (lo_f, hi_f)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_default(s: str) -> Tuple[str, float]:
|
|
62
|
+
# "name=value" → (name, value)
|
|
63
|
+
if "=" not in s:
|
|
64
|
+
raise argparse.ArgumentTypeError(f"Bad --default format '{s}'. Use name=value")
|
|
65
|
+
name, val = s.split("=", 1)
|
|
66
|
+
try:
|
|
67
|
+
v = float(val)
|
|
68
|
+
except ValueError as e:
|
|
69
|
+
raise argparse.ArgumentTypeError(f"Non-numeric default in '{s}': {e}")
|
|
70
|
+
return name.strip(), v
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _vector_to_full_params(X: np.ndarray, default_params: Dict[str, float], optimize_params: List[str]) -> Dict[str, float]:
|
|
74
|
+
params = dict(default_params)
|
|
75
|
+
for name, val in zip(optimize_params, X):
|
|
76
|
+
params[name] = float(val)
|
|
77
|
+
return params
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ---------- Objective wrappers (top-level, pickle-friendly) ----------
|
|
81
|
+
def objective_HRMS(X: np.ndarray, ctx: dict) -> float:
|
|
82
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
83
|
+
acc = get_acc_HRMS(
|
|
84
|
+
ctx["df_query"], ctx["df_reference"],
|
|
85
|
+
ctx["uq"], ctx["ur"],
|
|
86
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
87
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
88
|
+
p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
|
|
89
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
90
|
+
p["entropy_dimension"],
|
|
91
|
+
ctx["high_quality_reference_library"],
|
|
92
|
+
verbose=False
|
|
93
|
+
)
|
|
94
|
+
print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
|
|
95
|
+
return 1.0 - acc
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def objective_NRMS(X: np.ndarray, ctx: dict) -> float:
|
|
99
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
100
|
+
acc = get_acc_NRMS(
|
|
101
|
+
ctx["df_query"], ctx["df_reference"],
|
|
102
|
+
ctx["uq"], ctx["ur"],
|
|
103
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
104
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
105
|
+
p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
|
|
106
|
+
ctx["high_quality_reference_library"],
|
|
107
|
+
verbose=False
|
|
108
|
+
)
|
|
109
|
+
print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
|
|
110
|
+
return 1.0 - acc
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ---------- Main CLI ----------
|
|
114
|
+
def main():
|
|
115
|
+
p = argparse.ArgumentParser(
|
|
116
|
+
description="Parameter tuning via Differential Evolution for HRMS/NRMS using pycompound."
|
|
117
|
+
)
|
|
118
|
+
p.add_argument("--chromatography_platform", choices=["HRMS", "NRMS"], default="HRMS", help="Chromatography Platform.")
|
|
119
|
+
p.add_argument("--query_data", required=True, help="Path to query CSV (must contain 'id' column).")
|
|
120
|
+
p.add_argument("--reference_data", required=True, nargs="+", help="Path(s) to reference CSV(s) (must contain 'id').")
|
|
121
|
+
p.add_argument("--similarity_measure", default="cosine", choices=["cosine", "renyi", "tsallis"], help="Similarity measure.")
|
|
122
|
+
p.add_argument("--weights", default="", help="Weights spec; empty means None.")
|
|
123
|
+
p.add_argument("--spectrum-order", default="CNMWL", help="Spectrum preprocessing order string.")
|
|
124
|
+
p.add_argument("--mz-min", type=float, default=0.0)
|
|
125
|
+
p.add_argument("--mz-max", type=float, default=999_999_999.0)
|
|
126
|
+
p.add_argument("--int-min", type=float, default=0.0)
|
|
127
|
+
p.add_argument("--int-max", type=float, default=999_999_999.0)
|
|
128
|
+
p.add_argument("--hq-ref-lib", action="store_true", help="Use high-quality reference library flag.")
|
|
129
|
+
p.add_argument("--opt", nargs="+", default=["window_size_centroiding", "noise_threshold", "wf_mz", "wf_int"],
|
|
130
|
+
help=f"Parameters to optimize (subset of {ALL_PARAMS}).")
|
|
131
|
+
p.add_argument("--bound", action="append", default=[], type=parse_bound,
|
|
132
|
+
help="Bound spec 'name=min:max'. Repeatable.")
|
|
133
|
+
p.add_argument("--default", dest="defaults", action="append", default=[], type=parse_default,
|
|
134
|
+
help="Override a default 'name=value' for non-optimized params or initial values.")
|
|
135
|
+
p.add_argument("--maxiter", type=int, default=15)
|
|
136
|
+
p.add_argument("--seed", type=int, default=1)
|
|
137
|
+
p.add_argument("--workers", type=int, default=-1, help="Use -1 for all cores; 1 to disable parallelism.")
|
|
138
|
+
args = p.parse_args()
|
|
139
|
+
|
|
140
|
+
unknown = [x for x in args.opt if x not in ALL_PARAMS]
|
|
141
|
+
if unknown:
|
|
142
|
+
sys.exit(f"Error: unknown --opt params: {unknown}")
|
|
143
|
+
|
|
144
|
+
qpath = Path(args.query_data)
|
|
145
|
+
if not qpath.exists():
|
|
146
|
+
sys.exit(f"Query CSV not found: {qpath}")
|
|
147
|
+
|
|
148
|
+
df_query = pd.read_csv(qpath)
|
|
149
|
+
if "id" not in df_query.columns:
|
|
150
|
+
sys.exit("Query CSV must contain an 'id' column.")
|
|
151
|
+
|
|
152
|
+
ref_paths = [Path(pth) for pth in args.reference_data]
|
|
153
|
+
for r in ref_paths:
|
|
154
|
+
if not r.exists():
|
|
155
|
+
sys.exit(f"Reference CSV not found: {r}")
|
|
156
|
+
df_reference = pd.concat([pd.read_csv(r) for r in ref_paths], axis=0, ignore_index=True)
|
|
157
|
+
if "id" not in df_reference.columns:
|
|
158
|
+
sys.exit("Reference CSV must contain an 'id' column.")
|
|
159
|
+
|
|
160
|
+
uq = df_query["id"].unique().tolist()
|
|
161
|
+
ur = df_reference["id"].unique().tolist()
|
|
162
|
+
|
|
163
|
+
default_params = dict(DEFAULT_PARAMS)
|
|
164
|
+
for name, val in args.defaults:
|
|
165
|
+
if name not in DEFAULT_PARAMS:
|
|
166
|
+
sys.exit(f"--default refers to unknown parameter '{name}'. Allowed: {list(DEFAULT_PARAMS)}")
|
|
167
|
+
default_params[name] = val
|
|
168
|
+
|
|
169
|
+
param_bounds: Dict[str, Tuple[float, float]] = dict(SUGGESTED_BOUNDS)
|
|
170
|
+
for name, (lo, hi) in args.bound:
|
|
171
|
+
if name not in SUGGESTED_BOUNDS:
|
|
172
|
+
sys.exit(f"--bound refers to unknown parameter '{name}'. Allowed: {list(SUGGESTED_BOUNDS)}")
|
|
173
|
+
param_bounds[name] = (lo, hi)
|
|
174
|
+
|
|
175
|
+
bounds = [param_bounds[p] for p in args.opt]
|
|
176
|
+
|
|
177
|
+
ctx = dict(
|
|
178
|
+
df_query=df_query,
|
|
179
|
+
df_reference=df_reference,
|
|
180
|
+
uq=uq,
|
|
181
|
+
ur=ur,
|
|
182
|
+
similarity_measure=args.similarity_measure,
|
|
183
|
+
weights=(None if args.weights.strip() == "" else args.weights),
|
|
184
|
+
spectrum_preprocessing_order=args.spectrum_order,
|
|
185
|
+
mz_min=float(args.mz_min),
|
|
186
|
+
mz_max=float(args.mz_max),
|
|
187
|
+
int_min=float(args.int_min),
|
|
188
|
+
int_max=float(args.int_max),
|
|
189
|
+
high_quality_reference_library=bool(args.hq_ref_lib),
|
|
190
|
+
default_params=default_params,
|
|
191
|
+
optimize_params=args.opt,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
history_acc: List[float] = []
|
|
195
|
+
|
|
196
|
+
def _cb(xk, convergence):
|
|
197
|
+
if args.chromatography_platform == "HRMS":
|
|
198
|
+
acc_pct = (1.0 - objective_HRMS(xk, ctx)) * 100.0
|
|
199
|
+
else:
|
|
200
|
+
acc_pct = (1.0 - objective_NRMS(xk, ctx)) * 100.0
|
|
201
|
+
history_acc.append(acc_pct)
|
|
202
|
+
|
|
203
|
+
objective = objective_HRMS if args.chromatography_platform == "HRMS" else objective_NRMS
|
|
204
|
+
|
|
205
|
+
result = differential_evolution(
|
|
206
|
+
objective,
|
|
207
|
+
bounds=bounds,
|
|
208
|
+
args=(ctx,),
|
|
209
|
+
maxiter=int(args.maxiter),
|
|
210
|
+
tol=0.0,
|
|
211
|
+
seed=int(args.seed),
|
|
212
|
+
workers=int(args.workers),
|
|
213
|
+
callback=_cb,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
best_params = _vector_to_full_params(result.x, default_params, args.opt)
|
|
217
|
+
best_acc_pct = (1.0 - result.fun) * 100.0
|
|
218
|
+
|
|
219
|
+
print("\n=== Differential Evolution Result ===")
|
|
220
|
+
print(f"Mode: {args.chromatography_platform}")
|
|
221
|
+
print(f"Optimized over: {args.opt}")
|
|
222
|
+
print("Best values (selected params):")
|
|
223
|
+
for name in args.opt:
|
|
224
|
+
print(f" {name}: {best_params[name]}")
|
|
225
|
+
print("\nFull parameter set used in final evaluation:")
|
|
226
|
+
for k in ALL_PARAMS:
|
|
227
|
+
print(f" {k}: {best_params[k]}")
|
|
228
|
+
print(f"\nBest accuracy: {best_acc_pct:.3f}%")
|
|
229
|
+
print(f"DE raw: success={result.success}, nfev={result.nfev}, nit={result.nit}, message='{result.message}'")
|
|
230
|
+
|
|
231
|
+
if __name__ == "__main__":
|
|
232
|
+
main()
|
|
233
|
+
|
pycompound-0.1.2/src/pycompound/tuning_CLI.py → pycompound-0.1.4/src/pycompound/tuning_CLI_grid.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
from pycompound.spec_lib_matching import
|
|
3
|
-
from pycompound.spec_lib_matching import
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
4
4
|
import argparse
|
|
5
5
|
import json
|
|
6
6
|
from pathlib import Path
|
|
@@ -61,9 +61,9 @@ grid['entropy_dimension'] = [float(x) for x in grid['entropy_dimension']]
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
if args.chromatography_platform == 'HRMS':
|
|
64
|
-
|
|
64
|
+
tune_params_on_HRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
|
|
65
65
|
|
|
66
66
|
if args.chromatography_platform == 'NRMS':
|
|
67
|
-
|
|
67
|
+
tune_params_on_NRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
|
|
68
68
|
|
|
69
69
|
|
|
@@ -9,7 +9,8 @@ src/pycompound/processing.py
|
|
|
9
9
|
src/pycompound/similarity_measures.py
|
|
10
10
|
src/pycompound/spec_lib_matching.py
|
|
11
11
|
src/pycompound/spec_lib_matching_CLI.py
|
|
12
|
-
src/pycompound/
|
|
12
|
+
src/pycompound/tuning_CLI_DE.py
|
|
13
|
+
src/pycompound/tuning_CLI_grid.py
|
|
13
14
|
src/pycompound.egg-info/PKG-INFO
|
|
14
15
|
src/pycompound.egg-info/SOURCES.txt
|
|
15
16
|
src/pycompound.egg-info/dependency_links.txt
|
|
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
import os
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
'''
|
|
8
9
|
print('\n\ntest #1:')
|
|
9
10
|
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='hello')
|
|
10
11
|
|
|
@@ -34,6 +35,7 @@ run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_lib
|
|
|
34
35
|
|
|
35
36
|
print('\n\ntest #10:')
|
|
36
37
|
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', normalization_method='tanh')
|
|
38
|
+
'''
|
|
37
39
|
|
|
38
40
|
print('\n\ntest #11:')
|
|
39
41
|
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='tsallis', wf_mz=2, wf_intensity=0.5, entropy_dimension=2, n_top_matches_to_save=3, print_id_results=True)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
4
|
+
from pycompound.spec_lib_matching import tune_params_DE
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
print('\n\ntest #1:')
|
|
10
|
+
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
|
|
11
|
+
reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
|
|
12
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
|
|
13
|
+
|
|
14
|
+
print('\n\ntest #2:')
|
|
15
|
+
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
|
|
16
|
+
reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
|
|
17
|
+
grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
|
|
18
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
|
|
19
|
+
|
|
20
|
+
print('\n\ntest #3:')
|
|
21
|
+
tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
|
|
22
|
+
reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
|
|
23
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
|
|
24
|
+
|
|
25
|
+
print('\n\ntest #4:')
|
|
26
|
+
tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
|
|
27
|
+
reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
|
|
28
|
+
grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
|
|
29
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
|
|
30
|
+
|
|
31
|
+
print('\n\ntest #5:')
|
|
32
|
+
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
|
|
33
|
+
reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
|
|
34
|
+
grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]},
|
|
35
|
+
output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
|
|
36
|
+
|
|
37
|
+
print('\n\ntest #6:')
|
|
38
|
+
tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_query_data.csv',
|
|
39
|
+
reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_reference_data.csv',
|
|
40
|
+
similarity_measure='shannon',
|
|
41
|
+
optimize_params=["window_size_matching","noise_threshold","wf_mz","wf_int"],
|
|
42
|
+
param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
|
|
43
|
+
default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
|
|
44
|
+
|
|
45
|
+
print('\n\ntest #7:')
|
|
46
|
+
tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_query_data.csv',
|
|
47
|
+
reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_reference_data.csv',
|
|
48
|
+
similarity_measure='renyi',
|
|
49
|
+
optimize_params=["wf_mz","wf_int","LET_threshold","entropy_dimension"],
|
|
50
|
+
param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0,5),"entropy_dimension":(1.01,3)},
|
|
51
|
+
default_params={"noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
|
|
52
|
+
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
3
|
-
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
import os
|
|
6
|
-
|
|
7
|
-
print('\n\ntest #1:')
|
|
8
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
|
|
9
|
-
|
|
10
|
-
print('\n\ntest #2:')
|
|
11
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
|
|
12
|
-
|
|
13
|
-
print('\n\ntest #3:')
|
|
14
|
-
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
|
|
15
|
-
|
|
16
|
-
print('\n\ntest #4:')
|
|
17
|
-
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
|
|
18
|
-
|
|
19
|
-
print('\n\ntest #5:')
|
|
20
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]}, output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
|
|
21
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|