pycompound 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app.py CHANGED
@@ -25,7 +25,6 @@ import ast
25
25
  from numbers import Real
26
26
 
27
27
 
28
-
29
28
  _LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
30
29
 
31
30
  def _run_with_redirects(fn, writer, *args, **kwargs):
@@ -474,8 +473,15 @@ def run_parameter_tuning_ui(platform: str):
474
473
 
475
474
 
476
475
 
476
+ '''
477
+ app_ui = ui.page_fluid(
478
+ ui.output_ui("main_ui"),
479
+ ui.output_text("status_output")
480
+ )
481
+ '''
477
482
 
478
483
  app_ui = ui.page_fluid(
484
+ ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
479
485
  ui.output_ui("main_ui"),
480
486
  ui.output_text("status_output")
481
487
  )
@@ -688,7 +694,6 @@ def server(input, output, session):
688
694
  img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
689
695
  return img
690
696
 
691
-
692
697
  @output
693
698
  @render.ui
694
699
  def main_ui():
@@ -697,6 +702,7 @@ def server(input, output, session):
697
702
  ui.h2("Main Menu"),
698
703
  ui.div(
699
704
  ui.output_image("image"),
705
+ #ui.img(src="emblem.png", width="320px", height="250px"),
700
706
  style=(
701
707
  "position:fixed; top:0; left:50%; transform:translateX(-50%); "
702
708
  "z-index:1000; text-align:center; padding:10px; background-color:white;"
@@ -884,11 +890,18 @@ def server(input, output, session):
884
890
  weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
885
891
  weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
886
892
 
893
+ high_quality_reference_library_tmp2 = False
894
+ if input.high_quality_reference_library() != 'False':
895
+ high_quality_reference_library_tmp2 = True
896
+
897
+ print(input.high_quality_reference_library())
898
+ print(high_quality_reference_library_tmp2)
899
+
887
900
  if input.chromatography_platform() == "HRMS":
888
- fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
901
+ fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
889
902
  plt.show()
890
903
  elif input.chromatography_platform() == "NRMS":
891
- fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
904
+ fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
892
905
  plt.show()
893
906
  with io.BytesIO() as buf:
894
907
  fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
@@ -1115,3 +1128,26 @@ def server(input, output, session):
1115
1128
  app = App(app_ui, server)
1116
1129
 
1117
1130
 
1131
+
1132
+ '''
1133
+ from starlette.middleware.base import BaseHTTPMiddleware
1134
+ from starlette.requests import Request
1135
+
1136
+ class _InjectBaseURLMiddleware(BaseHTTPMiddleware):
1137
+ async def dispatch(self, request: Request, call_next):
1138
+ hdrs = dict(request.scope.get("headers", []))
1139
+ if b"rstudio-connect-app-base-url" not in hdrs:
1140
+ host = request.headers.get("x-forwarded-host") or request.headers.get("host") or ""
1141
+ proto = request.headers.get("x-forwarded-proto") or "https"
1142
+ root_path = (request.scope.get("root_path") or "").rstrip("/")
1143
+ base = f"{proto}://{host}{root_path}"
1144
+ new_headers = list(request.scope.get("headers", [])) + [
1145
+ (b"rstudio-connect-app-base-url", base.encode("utf-8"))
1146
+ ]
1147
+ request.scope["headers"] = new_headers
1148
+ return await call_next(request)
1149
+
1150
+ app.starlette_app.add_middleware(_InjectBaseURLMiddleware)
1151
+ '''
1152
+
1153
+
@@ -92,8 +92,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
92
92
  print(f'Error: spectrum_preprocessing_order must contain only \'C\', \'F\', \'M\', \'N\', \'L\', \'W\'.')
93
93
  sys.exit()
94
94
 
95
- if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
96
- print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
95
+ if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
96
+ print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
97
97
  sys.exit()
98
98
 
99
99
  if isinstance(int_min,int) is True:
@@ -243,10 +243,12 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
243
243
  r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method=normalization_method)
244
244
  if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
245
245
  q_spec = remove_noise(q_spec, nr = noise_threshold)
246
- r_spec = remove_noise(r_spec, nr = noise_threshold)
246
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
247
+ r_spec = remove_noise(r_spec, nr = noise_threshold)
247
248
  if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
248
249
  q_spec = filter_spec_lcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
249
- r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
250
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
251
+ r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
250
252
 
251
253
  q_ints = q_spec[:,1]
252
254
  r_ints = r_spec[:,1]
@@ -291,12 +293,15 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
291
293
  plt.yticks([])
292
294
 
293
295
 
296
+ print('\n\n\n')
297
+ print(high_quality_reference_library)
298
+ print('\n\n\n')
294
299
  plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
295
300
  plt.figlegend(loc = 'upper center')
296
301
  fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
297
302
  fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
298
303
  fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
299
- fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
304
+ fig.text(0.05, 0.09, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
300
305
  fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
301
306
  fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
302
307
  fig.text(0.45, 0.18, f'Raw-Scale M/Z Range: [{mz_min_tmp},{mz_max_tmp}]', fontsize=7)
@@ -387,8 +392,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
387
392
  print(f'Error: spectrum_preprocessing_order must contain only \'F\', \'N\', \'W\', \'L\'.')
388
393
  sys.exit()
389
394
 
390
- if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
391
- print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
395
+ if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
396
+ print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
392
397
  sys.exit()
393
398
 
394
399
  if isinstance(int_min,int) is True:
@@ -528,11 +533,11 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
528
533
  r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method)
529
534
  if transformation == 'N':
530
535
  q_spec = remove_noise(q_spec, nr = noise_threshold)
531
- if high_quality_reference_library == False:
536
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
532
537
  r_spec = remove_noise(r_spec, nr = noise_threshold)
533
538
  if transformation == 'F':
534
539
  q_spec = filter_spec_gcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
535
- if high_quality_reference_library == False:
540
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
536
541
  r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
537
542
 
538
543
  if q_spec.shape[0] > 1:
@@ -580,7 +585,7 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
580
585
  fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
581
586
  fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
582
587
  fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
583
- fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
588
+ fig.text(0.05, 0.06, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
584
589
  fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
585
590
  fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
586
591
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
@@ -37,6 +37,7 @@ def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_id
37
37
  LET_threshold=LET_threshold_tmp,
38
38
  entropy_dimension=entropy_dimension_tmp,
39
39
  high_quality_reference_library=high_quality_reference_library_tmp,
40
+ verbose=True
40
41
  )
41
42
 
42
43
  return (
@@ -441,21 +442,25 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
441
442
 
442
443
 
443
444
 
444
- def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
445
+ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
445
446
 
446
447
  n_top_matches_to_save = 1
447
448
 
448
449
  all_similarity_scores = []
449
450
  for query_idx in range(0,len(unique_query_ids)):
450
- print(f'query spectrum #{query_idx} is being identified')
451
+ if verbose is True:
452
+ print(f'query spectrum #{query_idx} is being identified')
451
453
  q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
452
454
  q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
455
+ #q_spec_tmp = q_spec_tmp.astype(float)
453
456
 
454
457
  similarity_scores = []
455
458
  for ref_idx in range(0,len(unique_reference_ids)):
456
459
  q_spec = q_spec_tmp
457
460
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
458
461
  r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
462
+ #print(r_spec)
463
+ #r_spec = r_spec.astype(float)
459
464
 
460
465
  is_matched = False
461
466
  for transformation in spectrum_preprocessing_order:
@@ -529,7 +534,7 @@ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
529
534
 
530
535
 
531
536
 
532
- def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
537
+ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
533
538
 
534
539
  n_top_matches_to_save = 1
535
540
 
@@ -546,7 +551,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
546
551
  similarity_scores = []
547
552
  for ref_idx in range(0,len(unique_reference_ids)):
548
553
  q_spec = q_spec_tmp
549
- if ref_idx % 1000 == 0:
554
+ if verbose is True and ref_idx % 1000 == 0:
550
555
  print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
551
556
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
552
557
  r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
@@ -615,7 +620,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
615
620
 
616
621
 
617
622
 
618
- def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
623
+ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False, verbose=True):
619
624
  '''
620
625
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data
621
626
 
@@ -762,14 +767,13 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
762
767
 
763
768
  all_similarity_scores = []
764
769
  for query_idx in range(0,len(unique_query_ids)):
765
- print(f'query spectrum #{query_idx} is being identified')
770
+ if verbose is True:
771
+ print(f'query spectrum #{query_idx} is being identified')
766
772
  q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
767
773
  q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
768
774
 
769
775
  similarity_scores = []
770
776
  for ref_idx in range(0,len(unique_reference_ids)):
771
- #if ref_idx % 100 == 0:
772
- # print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
773
777
  q_spec = q_spec_tmp
774
778
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
775
779
  r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
@@ -1008,9 +1012,9 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
1008
1012
 
1009
1013
  similarity_scores = []
1010
1014
  for ref_idx in range(0,len(unique_reference_ids)):
1011
- q_spec = q_spec_tmp
1012
- if ref_idx % 1000 == 0:
1015
+ if verbose is True and ref_idx % 1000 == 0:
1013
1016
  print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
1017
+ q_spec = q_spec_tmp
1014
1018
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
1015
1019
  r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
1016
1020
  r_spec = convert_spec(r_spec_tmp,mzs)
@@ -0,0 +1,233 @@
1
+
2
+ #!/usr/bin/env python3
3
+ import argparse
4
+ import sys
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Dict, List, Tuple
8
+ import numpy as np
9
+ import pandas as pd
10
+ from scipy.optimize import differential_evolution
11
+ from pycompound.spec_lib_matching import get_acc_HRMS, get_acc_NRMS
12
+
13
+
14
+ ALL_PARAMS = [
15
+ "window_size_centroiding",
16
+ "window_size_matching",
17
+ "noise_threshold",
18
+ "wf_mz",
19
+ "wf_int",
20
+ "LET_threshold",
21
+ "entropy_dimension"
22
+ ]
23
+
24
+ SUGGESTED_BOUNDS = {
25
+ "window_size_centroiding": (0.0, 0.5),
26
+ "window_size_matching": (0.0, 0.5),
27
+ "noise_threshold": (0.0, 0.25),
28
+ "wf_mz": (0.0, 5.0),
29
+ "wf_int": (0.0, 5.0),
30
+ "LET_threshold": (0.0, 5.0),
31
+ "entropy_dimension": (1.0, 3.0)
32
+ }
33
+
34
+ DEFAULT_PARAMS = {
35
+ "window_size_centroiding": 0.5,
36
+ "window_size_matching": 0.5,
37
+ "noise_threshold": 0.10,
38
+ "wf_mz": 0.0,
39
+ "wf_int": 1.0,
40
+ "LET_threshold": 0.0,
41
+ "entropy_dimension": 1.1
42
+ }
43
+
44
+
45
+ # ---------- Utilities ----------
46
+ def parse_bound(s: str) -> Tuple[str, Tuple[float, float]]:
47
+ # "name=min:max" → (name, (min, max))
48
+ if "=" not in s or ":" not in s:
49
+ raise argparse.ArgumentTypeError(f"Bad --bound format '{s}'. Use name=min:max")
50
+ name, rng = s.split("=", 1)
51
+ lo, hi = rng.split(":", 1)
52
+ try:
53
+ lo_f, hi_f = float(lo), float(hi)
54
+ except ValueError as e:
55
+ raise argparse.ArgumentTypeError(f"Non-numeric bound in '{s}': {e}")
56
+ if lo_f > hi_f:
57
+ raise argparse.ArgumentTypeError(f"Lower bound > upper bound in '{s}'")
58
+ return name.strip(), (lo_f, hi_f)
59
+
60
+
61
+ def parse_default(s: str) -> Tuple[str, float]:
62
+ # "name=value" → (name, value)
63
+ if "=" not in s:
64
+ raise argparse.ArgumentTypeError(f"Bad --default format '{s}'. Use name=value")
65
+ name, val = s.split("=", 1)
66
+ try:
67
+ v = float(val)
68
+ except ValueError as e:
69
+ raise argparse.ArgumentTypeError(f"Non-numeric default in '{s}': {e}")
70
+ return name.strip(), v
71
+
72
+
73
+ def _vector_to_full_params(X: np.ndarray, default_params: Dict[str, float], optimize_params: List[str]) -> Dict[str, float]:
74
+ params = dict(default_params)
75
+ for name, val in zip(optimize_params, X):
76
+ params[name] = float(val)
77
+ return params
78
+
79
+
80
+ # ---------- Objective wrappers (top-level, pickle-friendly) ----------
81
+ def objective_HRMS(X: np.ndarray, ctx: dict) -> float:
82
+ p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
83
+ acc = get_acc_HRMS(
84
+ ctx["df_query"], ctx["df_reference"],
85
+ ctx["uq"], ctx["ur"],
86
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
87
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
88
+ p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
89
+ p["wf_mz"], p["wf_int"], p["LET_threshold"],
90
+ p["entropy_dimension"],
91
+ ctx["high_quality_reference_library"],
92
+ verbose=False
93
+ )
94
+ print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
95
+ return 1.0 - acc
96
+
97
+
98
+ def objective_NRMS(X: np.ndarray, ctx: dict) -> float:
99
+ p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
100
+ acc = get_acc_NRMS(
101
+ ctx["df_query"], ctx["df_reference"],
102
+ ctx["uq"], ctx["ur"],
103
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
104
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
105
+ p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
106
+ ctx["high_quality_reference_library"],
107
+ verbose=False
108
+ )
109
+ print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
110
+ return 1.0 - acc
111
+
112
+
113
+ # ---------- Main CLI ----------
114
+ def main():
115
+ p = argparse.ArgumentParser(
116
+ description="Parameter tuning via Differential Evolution for HRMS/NRMS using pycompound."
117
+ )
118
+ p.add_argument("--chromatography_platform", choices=["HRMS", "NRMS"], default="HRMS", help="Chromatography Platform.")
119
+ p.add_argument("--query_data", required=True, help="Path to query CSV (must contain 'id' column).")
120
+ p.add_argument("--reference_data", required=True, nargs="+", help="Path(s) to reference CSV(s) (must contain 'id').")
121
+ p.add_argument("--similarity_measure", default="cosine", choices=["cosine", "renyi", "tsallis"], help="Similarity measure.")
122
+ p.add_argument("--weights", default="", help="Weights spec; empty means None.")
123
+ p.add_argument("--spectrum-order", default="CNMWL", help="Spectrum preprocessing order string.")
124
+ p.add_argument("--mz-min", type=float, default=0.0)
125
+ p.add_argument("--mz-max", type=float, default=999_999_999.0)
126
+ p.add_argument("--int-min", type=float, default=0.0)
127
+ p.add_argument("--int-max", type=float, default=999_999_999.0)
128
+ p.add_argument("--hq-ref-lib", action="store_true", help="Use high-quality reference library flag.")
129
+ p.add_argument("--opt", nargs="+", default=["window_size_centroiding", "noise_threshold", "wf_mz", "wf_int"],
130
+ help=f"Parameters to optimize (subset of {ALL_PARAMS}).")
131
+ p.add_argument("--bound", action="append", default=[], type=parse_bound,
132
+ help="Bound spec 'name=min:max'. Repeatable.")
133
+ p.add_argument("--default", dest="defaults", action="append", default=[], type=parse_default,
134
+ help="Override a default 'name=value' for non-optimized params or initial values.")
135
+ p.add_argument("--maxiter", type=int, default=15)
136
+ p.add_argument("--seed", type=int, default=1)
137
+ p.add_argument("--workers", type=int, default=-1, help="Use -1 for all cores; 1 to disable parallelism.")
138
+ args = p.parse_args()
139
+
140
+ unknown = [x for x in args.opt if x not in ALL_PARAMS]
141
+ if unknown:
142
+ sys.exit(f"Error: unknown --opt params: {unknown}")
143
+
144
+ qpath = Path(args.query_data)
145
+ if not qpath.exists():
146
+ sys.exit(f"Query CSV not found: {qpath}")
147
+
148
+ df_query = pd.read_csv(qpath)
149
+ if "id" not in df_query.columns:
150
+ sys.exit("Query CSV must contain an 'id' column.")
151
+
152
+ ref_paths = [Path(pth) for pth in args.reference_data]
153
+ for r in ref_paths:
154
+ if not r.exists():
155
+ sys.exit(f"Reference CSV not found: {r}")
156
+ df_reference = pd.concat([pd.read_csv(r) for r in ref_paths], axis=0, ignore_index=True)
157
+ if "id" not in df_reference.columns:
158
+ sys.exit("Reference CSV must contain an 'id' column.")
159
+
160
+ uq = df_query["id"].unique().tolist()
161
+ ur = df_reference["id"].unique().tolist()
162
+
163
+ default_params = dict(DEFAULT_PARAMS)
164
+ for name, val in args.defaults:
165
+ if name not in DEFAULT_PARAMS:
166
+ sys.exit(f"--default refers to unknown parameter '{name}'. Allowed: {list(DEFAULT_PARAMS)}")
167
+ default_params[name] = val
168
+
169
+ param_bounds: Dict[str, Tuple[float, float]] = dict(SUGGESTED_BOUNDS)
170
+ for name, (lo, hi) in args.bound:
171
+ if name not in SUGGESTED_BOUNDS:
172
+ sys.exit(f"--bound refers to unknown parameter '{name}'. Allowed: {list(SUGGESTED_BOUNDS)}")
173
+ param_bounds[name] = (lo, hi)
174
+
175
+ bounds = [param_bounds[p] for p in args.opt]
176
+
177
+ ctx = dict(
178
+ df_query=df_query,
179
+ df_reference=df_reference,
180
+ uq=uq,
181
+ ur=ur,
182
+ similarity_measure=args.similarity_measure,
183
+ weights=(None if args.weights.strip() == "" else args.weights),
184
+ spectrum_preprocessing_order=args.spectrum_order,
185
+ mz_min=float(args.mz_min),
186
+ mz_max=float(args.mz_max),
187
+ int_min=float(args.int_min),
188
+ int_max=float(args.int_max),
189
+ high_quality_reference_library=bool(args.hq_ref_lib),
190
+ default_params=default_params,
191
+ optimize_params=args.opt,
192
+ )
193
+
194
+ history_acc: List[float] = []
195
+
196
+ def _cb(xk, convergence):
197
+ if args.chromatography_platform == "HRMS":
198
+ acc_pct = (1.0 - objective_HRMS(xk, ctx)) * 100.0
199
+ else:
200
+ acc_pct = (1.0 - objective_NRMS(xk, ctx)) * 100.0
201
+ history_acc.append(acc_pct)
202
+
203
+ objective = objective_HRMS if args.chromatography_platform == "HRMS" else objective_NRMS
204
+
205
+ result = differential_evolution(
206
+ objective,
207
+ bounds=bounds,
208
+ args=(ctx,),
209
+ maxiter=int(args.maxiter),
210
+ tol=0.0,
211
+ seed=int(args.seed),
212
+ workers=int(args.workers),
213
+ callback=_cb,
214
+ )
215
+
216
+ best_params = _vector_to_full_params(result.x, default_params, args.opt)
217
+ best_acc_pct = (1.0 - result.fun) * 100.0
218
+
219
+ print("\n=== Differential Evolution Result ===")
220
+ print(f"Mode: {args.chromatography_platform}")
221
+ print(f"Optimized over: {args.opt}")
222
+ print("Best values (selected params):")
223
+ for name in args.opt:
224
+ print(f" {name}: {best_params[name]}")
225
+ print("\nFull parameter set used in final evaluation:")
226
+ for k in ALL_PARAMS:
227
+ print(f" {k}: {best_params[k]}")
228
+ print(f"\nBest accuracy: {best_acc_pct:.3f}%")
229
+ print(f"DE raw: success={result.success}, nfev={result.nfev}, nit={result.nit}, message='{result.message}'")
230
+
231
+ if __name__ == "__main__":
232
+ main()
233
+
@@ -1,6 +1,6 @@
1
1
 
2
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data
3
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data
2
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
3
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
4
4
  import argparse
5
5
  import json
6
6
  from pathlib import Path
@@ -61,9 +61,9 @@ grid['entropy_dimension'] = [float(x) for x in grid['entropy_dimension']]
61
61
 
62
62
 
63
63
  if args.chromatography_platform == 'HRMS':
64
- tune_params_on_HRMS_data(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
64
+ tune_params_on_HRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
65
65
 
66
66
  if args.chromatography_platform == 'NRMS':
67
- tune_params_on_NRMS_data(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
67
+ tune_params_on_NRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
68
68
 
69
69
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -0,0 +1,15 @@
1
+ app.py,sha256=e_QULBLubrZLk_huIyDvL64lFzgDRo_vaYV0MIx9VJQ,51950
2
+ pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
3
+ pycompound/plot_spectra.py,sha256=_yeHooNoJHYlTajaZ9hgUudisdWVlw1Zw1wJfV3tpqc,40632
4
+ pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
5
+ pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
6
+ pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
7
+ pycompound/spec_lib_matching.py,sha256=TBH7cZLeIWNk5XkXcN7kiBfCSe6eRkseXIPMMLB54iI,67261
8
+ pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
9
+ pycompound/tuning_CLI_DE.py,sha256=PXy95LD_jmVeWdgiMlMwEZU_KqPGqDao1skwe5U4Sfc,9147
10
+ pycompound/tuning_CLI_grid.py,sha256=0XU-4ShZiZ2MQy5d0zydH0hphqXvqGtf4etl-ePNarU,8560
11
+ pycompound-0.1.4.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
12
+ pycompound-0.1.4.dist-info/METADATA,sha256=qlCzW19-6MpmuGoHbmQVL9ljKRCO4JXZ55-GTyKc3_I,1732
13
+ pycompound-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ pycompound-0.1.4.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
15
+ pycompound-0.1.4.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- app.py,sha256=aN6HFOY3rWZmQYCKYLtCQ6PT-E-rdE8B1iv8G2SL7PQ,50511
2
- pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
3
- pycompound/plot_spectra.py,sha256=cPuHDPTyMGdkw-uL1a5x2pBjHuUPku7dwFLrA43PEnU,40236
4
- pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
5
- pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
6
- pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
7
- pycompound/spec_lib_matching.py,sha256=hEMUD5rAuEfvX1PnDEwilXIWVubnvgDBp-EVwVHu9ro,67141
8
- pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
9
- pycompound/tuning_CLI.py,sha256=8gdT4EhIpvLHG3PcYtQBmUiPE9fJybwr3LCCilX-EfE,8540
10
- pycompound-0.1.2.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
11
- pycompound-0.1.2.dist-info/METADATA,sha256=Vlts9C3VGnnDt3vxoiObyWVJo5gYJ61ar6eeNePzwgw,1732
12
- pycompound-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- pycompound-0.1.2.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
14
- pycompound-0.1.2.dist-info/RECORD,,