pycompound 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app.py CHANGED
@@ -18,8 +18,25 @@ import matplotlib.pyplot as plt
18
18
  import pandas as pd
19
19
  import numpy as np
20
20
  import netCDF4 as nc
21
- from pyteomics import mgf
22
- from pyteomics import mzml
21
+ from pyteomics import mgf, mzml
22
+ import ast
23
+
24
+
25
+ _LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
26
+
27
+
28
+ def strip_text(s):
29
+ return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
30
+
31
+
32
+ def strip_numeric(s):
33
+ return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
34
+
35
+
36
+ def strip_weights(s):
37
+ tuples = ast.literal_eval(s)
38
+ keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
39
+ return [dict(zip(keys,t)) for t in tuples]
23
40
 
24
41
 
25
42
  def build_library(input_path=None, output_path=None):
@@ -152,30 +169,37 @@ def extract_first_column_ids(file_path: str, max_ids: int = 20000):
152
169
  return []
153
170
 
154
171
 
172
+ def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
173
+ """Send PNG bytes to browser and open in a new window as a data URL."""
174
+ b64 = base64.b64encode(png_bytes).decode("ascii")
175
+ data_url = f"data:image/png;base64,{b64}"
176
+ session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
177
+
178
+
155
179
  def plot_spectra_ui(platform: str):
156
180
  # Base inputs common to all platforms
157
181
  base_inputs = [
158
182
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
159
183
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
160
- ui.input_selectize(
161
- "spectrum_ID1",
162
- "Select spectrum ID 1:",
163
- choices=[],
164
- multiple=False,
165
- options={"placeholder": "Upload a query file to load IDs..."},
166
- ),
167
- ui.input_selectize(
168
- "spectrum_ID2",
169
- "Select spectrum ID 2 (optional):",
170
- choices=[],
171
- multiple=False,
172
- options={"placeholder": "Upload a reference file to load IDs..."},
173
- ),
184
+ ui.input_selectize(
185
+ "spectrum_ID1",
186
+ "Select spectrum ID 1 (default is the first spectrum in the library):",
187
+ choices=[],
188
+ multiple=False,
189
+ options={"placeholder": "Upload a library..."},
190
+ ),
191
+ ui.input_selectize(
192
+ "spectrum_ID2",
193
+ "Select spectrum ID 2 (default is the first spectrum in the library):",
194
+ choices=[],
195
+ multiple=False,
196
+ options={"placeholder": "Upload a library..."},
197
+ ),
174
198
  ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
199
+ ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
175
200
  ui.input_select(
176
201
  "high_quality_reference_library",
177
- "Indicate whether the reference library is considered high quality. "
178
- "If True, filtering and noise removal are only applied to the query spectra.",
202
+ "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
179
203
  [False, True],
180
204
  ),
181
205
  ]
@@ -185,7 +209,7 @@ def plot_spectra_ui(platform: str):
185
209
  extra_inputs = [
186
210
  ui.input_text(
187
211
  "spectrum_preprocessing_order",
188
- "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
212
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
189
213
  "FCNMWL",
190
214
  ),
191
215
  ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -195,7 +219,7 @@ def plot_spectra_ui(platform: str):
195
219
  extra_inputs = [
196
220
  ui.input_text(
197
221
  "spectrum_preprocessing_order",
198
- "Sequence of characters for preprocessing order (F, N, L, W).",
222
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
199
223
  "FNLW",
200
224
  )
201
225
  ]
@@ -227,19 +251,19 @@ def plot_spectra_ui(platform: str):
227
251
  # Layout base_inputs and extra_inputs in columns
228
252
  if platform == "HRMS":
229
253
  inputs_columns = ui.layout_columns(
230
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
231
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
254
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
255
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
232
256
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
233
257
  ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
234
- col_widths=(3, 3, 3, 3),
258
+ col_widths=(3,3,3,3),
235
259
  )
236
260
  elif platform == "NRMS":
237
261
  inputs_columns = ui.layout_columns(
238
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
239
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
262
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
263
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
240
264
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
241
265
  ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
242
- col_widths=(3, 3, 3, 3),
266
+ col_widths=(3,3,3,3),
243
267
  )
244
268
 
245
269
  # Combine everything
@@ -249,7 +273,8 @@ def plot_spectra_ui(platform: str):
249
273
  inputs_columns,
250
274
  run_button_plot_spectra,
251
275
  back_button,
252
- ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
276
+ ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
277
+ ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
253
278
  ),
254
279
  )
255
280
 
@@ -261,12 +286,26 @@ def run_spec_lib_matching_ui(platform: str):
261
286
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
262
287
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
263
288
  ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
289
+ ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
290
+ ui.input_selectize(
291
+ "spectrum_ID1",
292
+ "Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
293
+ choices=[],
294
+ multiple=False,
295
+ options={"placeholder": "Upload a library..."},
296
+ ),
297
+ ui.input_selectize(
298
+ "spectrum_ID2",
299
+ "Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
300
+ choices=[],
301
+ multiple=False,
302
+ options={"placeholder": "Upload a library..."},
303
+ ),
264
304
  ui.input_select(
265
305
  "high_quality_reference_library",
266
- "Indicate whether the reference library is considered high quality. "
267
- "If True, filtering and noise removal are only applied to the query spectra.",
306
+ "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
268
307
  [False, True],
269
- ),
308
+ )
270
309
  ]
271
310
 
272
311
  # Extra inputs depending on platform
@@ -274,7 +313,7 @@ def run_spec_lib_matching_ui(platform: str):
274
313
  extra_inputs = [
275
314
  ui.input_text(
276
315
  "spectrum_preprocessing_order",
277
- "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
316
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
278
317
  "FCNMWL",
279
318
  ),
280
319
  ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -284,7 +323,7 @@ def run_spec_lib_matching_ui(platform: str):
284
323
  extra_inputs = [
285
324
  ui.input_text(
286
325
  "spectrum_preprocessing_order",
287
- "Sequence of characters for preprocessing order (F, N, L, W).",
326
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
288
327
  "FNLW",
289
328
  )
290
329
  ]
@@ -300,30 +339,31 @@ def run_spec_lib_matching_ui(platform: str):
300
339
  ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
301
340
  ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
302
341
  ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
303
- ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 1),
342
+ ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
304
343
  ]
305
344
 
306
345
 
307
346
  # Run and Back buttons
308
- run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
347
+ run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
348
+ run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
309
349
  back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
310
350
 
311
351
  # Layout base_inputs and extra_inputs in columns
312
352
  if platform == "HRMS":
313
353
  inputs_columns = ui.layout_columns(
314
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
315
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
354
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
355
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
316
356
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
317
357
  ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
318
- col_widths=(3, 3, 3, 3),
358
+ col_widths=(3,3,3,3)
319
359
  )
320
360
  elif platform == "NRMS":
321
361
  inputs_columns = ui.layout_columns(
322
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
323
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
362
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
363
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
324
364
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
325
365
  ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
326
- col_widths=(3, 3, 3, 3),
366
+ col_widths=(3,3,3,3)
327
367
  )
328
368
 
329
369
  log_panel = ui.card(
@@ -338,13 +378,100 @@ def run_spec_lib_matching_ui(platform: str):
338
378
  ui.h2("Run Spectral Library Matching"),
339
379
  inputs_columns,
340
380
  run_button_spec_lib_matching,
381
+ run_button_plot_spectra_within_spec_lib_matching,
341
382
  back_button,
342
- log_panel,
383
+ log_panel
343
384
  ),
344
385
  )
345
386
 
346
387
 
347
388
 
389
+ def run_parameter_tuning_ui(platform: str):
390
+ # Base inputs common to all platforms
391
+ base_inputs = [
392
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
393
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
394
+ ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
395
+ ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25), (0.2, 0.3, 0.4, 0.1))'),
396
+ ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
397
+ ]
398
+
399
+ # Extra inputs depending on platform
400
+ if platform == "HRMS":
401
+ extra_inputs = [
402
+ ui.input_text(
403
+ "spectrum_preprocessing_order",
404
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
405
+ "[FCNMWL,CWM]",
406
+ ),
407
+ ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
408
+ ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
409
+ ]
410
+ else:
411
+ extra_inputs = [
412
+ ui.input_text(
413
+ "spectrum_preprocessing_order",
414
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
415
+ "[FNLW,WNL]",
416
+ )
417
+ ]
418
+
419
+ # Numeric inputs
420
+ numeric_inputs = [
421
+ ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
422
+ ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
423
+ ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
424
+ ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
425
+ ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
426
+ ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
427
+ ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
428
+ ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
429
+ ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
430
+ ]
431
+
432
+
433
+ # Run and Back buttons
434
+ run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
435
+ back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
436
+
437
+ # Layout base_inputs and extra_inputs in columns
438
+ if platform == "HRMS":
439
+ inputs_columns = ui.layout_columns(
440
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
441
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
442
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
443
+ ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
444
+ col_widths=(3, 3, 3, 3),
445
+ )
446
+ elif platform == "NRMS":
447
+ inputs_columns = ui.layout_columns(
448
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
449
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
450
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
451
+ ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
452
+ col_widths=(3, 3, 3, 3),
453
+ )
454
+
455
+ log_panel = ui.card(
456
+ ui.card_header("Identification log"),
457
+ ui.output_text_verbatim("match_log"),
458
+ style="max-height:300px; overflow:auto"
459
+ )
460
+
461
+ # Combine everything
462
+ return ui.div(
463
+ ui.TagList(
464
+ ui.h2("Tune parameters"),
465
+ inputs_columns,
466
+ run_button_parameter_tuning,
467
+ back_button,
468
+ log_panel
469
+ ),
470
+ )
471
+
472
+
473
+
474
+
348
475
  app_ui = ui.page_fluid(
349
476
  ui.output_ui("main_ui"),
350
477
  ui.output_text("status_output")
@@ -361,8 +488,12 @@ def server(input, output, session):
361
488
 
362
489
  run_status_plot_spectra = reactive.Value("")
363
490
  run_status_spec_lib_matching = reactive.Value("")
491
+ run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
492
+ run_status_parameter_tuning = reactive.Value("")
493
+ is_tuning_running = reactive.Value(False)
364
494
  match_log_rv = reactive.Value("")
365
495
  is_matching_rv = reactive.Value(False)
496
+ is_any_job_running = reactive.Value(False)
366
497
 
367
498
  query_ids_rv = reactive.Value([])
368
499
  query_file_path_rv = reactive.Value(None)
@@ -377,6 +508,29 @@ def server(input, output, session):
377
508
  converted_reference_path_rv = reactive.Value(None)
378
509
 
379
510
 
511
+ #def _drain_queue_nowait(q: asyncio.Queue[str]) -> list[str]:
512
+ def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
513
+ out = []
514
+ try:
515
+ while True:
516
+ out.append(q.get_nowait())
517
+ except asyncio.QueueEmpty:
518
+ pass
519
+ return out
520
+
521
+
522
+ @reactive.effect
523
+ async def _pump_logs():
524
+ if not is_any_job_running.get():
525
+ return
526
+
527
+ reactive.invalidate_later(0.1)
528
+ msgs = _drain_queue_nowait(_LOG_QUEUE)
529
+ if msgs:
530
+ match_log_rv.set(match_log_rv.get() + "".join(msgs))
531
+ await reactive.flush()
532
+
533
+
380
534
  def process_database(file_path: str):
381
535
  suffix = Path(file_path).suffix.lower()
382
536
  return {"path": file_path, "suffix": suffix}
@@ -385,13 +539,14 @@ def server(input, output, session):
385
539
  def plot_query_status():
386
540
  return query_status_rv.get() or ""
387
541
 
542
+ @render.text
543
+ def plot_reference_status():
544
+ return reference_status_rv.get() or ""
545
+
388
546
 
389
547
  @reactive.effect
390
548
  @reactive.event(input.query_data)
391
549
  async def _on_query_upload():
392
- if current_page() != "plot_spectra":
393
- return
394
-
395
550
  files = input.query_data()
396
551
  req(files and len(files) > 0)
397
552
 
@@ -414,9 +569,6 @@ def server(input, output, session):
414
569
  @reactive.effect
415
570
  @reactive.event(input.reference_data)
416
571
  async def _on_reference_upload():
417
- if current_page() != "plot_spectra":
418
- return
419
-
420
572
  files = input.reference_data()
421
573
  req(files and len(files) > 0)
422
574
 
@@ -467,6 +619,9 @@ def server(input, output, session):
467
619
  elif input.run_spec_lib_matching() > match_clicks.get():
468
620
  current_page.set("run_spec_lib_matching")
469
621
  match_clicks.set(input.run_spec_lib_matching())
622
+ elif input.run_parameter_tuning() > match_clicks.get():
623
+ current_page.set("run_parameter_tuning")
624
+ match_clicks.set(input.run_parameter_tuning())
470
625
  elif hasattr(input, "back") and input.back() > back_clicks.get():
471
626
  current_page.set("main_menu")
472
627
  back_clicks.set(input.back())
@@ -512,6 +667,7 @@ def server(input, output, session):
512
667
  ),
513
668
  ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
514
669
  ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
670
+ ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
515
671
  ui.div(
516
672
  "References:",
517
673
  style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
@@ -562,14 +718,16 @@ def server(input, output, session):
562
718
  return plot_spectra_ui(input.chromatography_platform())
563
719
  elif current_page() == "run_spec_lib_matching":
564
720
  return run_spec_lib_matching_ui(input.chromatography_platform())
721
+ elif current_page() == "run_parameter_tuning":
722
+ return run_parameter_tuning_ui(input.chromatography_platform())
565
723
 
566
724
 
567
725
 
568
726
  @reactive.effect
569
727
  @reactive.event(input.query_data)
570
728
  async def _populate_ids_from_query_upload():
571
- if current_page() != "plot_spectra":
572
- return
729
+ #if current_page() != "plot_spectra":
730
+ # return
573
731
 
574
732
  files = input.query_data()
575
733
  if not files:
@@ -628,8 +786,8 @@ def server(input, output, session):
628
786
  @reactive.effect
629
787
  @reactive.event(input.reference_data)
630
788
  async def _populate_ids_from_reference_upload():
631
- if current_page() != "plot_spectra":
632
- return
789
+ #if current_page() != "plot_spectra":
790
+ # return
633
791
 
634
792
  files = input.reference_data()
635
793
  if not files:
@@ -685,19 +843,23 @@ def server(input, output, session):
685
843
  raise
686
844
 
687
845
 
688
-
689
846
  @render.download(filename=lambda: f"plot.png")
690
847
  def run_btn_plot_spectra():
691
848
  spectrum_ID1 = input.spectrum_ID1() or None
692
849
  spectrum_ID2 = input.spectrum_ID2() or None
693
850
 
851
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
852
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
853
+
694
854
  if input.chromatography_platform() == "HRMS":
695
- fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
696
- #run_status_plot_spectra.set("✅ Plotting has finished.")
855
+ fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
856
+ plt.show()
697
857
  elif input.chromatography_platform() == "NRMS":
698
858
  fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
859
+ plt.show()
699
860
  with io.BytesIO() as buf:
700
861
  fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
862
+ plt.close()
701
863
  yield buf.getvalue()
702
864
 
703
865
 
@@ -705,6 +867,7 @@ def server(input, output, session):
705
867
  def status_output():
706
868
  return run_status_plot_spectra.get()
707
869
  return run_status_spec_lib_matching.get()
870
+ return run_status_parameter_tuning.get()
708
871
 
709
872
 
710
873
  class ReactiveWriter(io.TextIOBase):
@@ -718,7 +881,6 @@ def server(input, output, session):
718
881
  def _apply():
719
882
  self.rv.set(self.rv.get() + s)
720
883
  self.loop.create_task(reactive.flush())
721
-
722
884
  self.loop.call_soon_threadsafe(_apply)
723
885
  return len(s)
724
886
 
@@ -728,22 +890,24 @@ def server(input, output, session):
728
890
 
729
891
  @render.download(filename="identification_output.csv")
730
892
  async def run_btn_spec_lib_matching():
731
- # 1) quick first paint
732
893
  match_log_rv.set("Starting identification...\n")
733
894
  await reactive.flush()
734
895
 
735
- # 2) normalize inputs (same as before)
736
896
  hq = input.high_quality_reference_library()
737
897
  if isinstance(hq, str):
738
898
  hq = hq.lower() == "true"
739
899
  elif isinstance(hq, (int, float)):
740
900
  hq = bool(hq)
741
901
 
902
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
903
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
904
+
742
905
  common_kwargs = dict(
743
906
  query_data=input.query_data()[0]["datapath"],
744
907
  reference_data=input.reference_data()[0]["datapath"],
745
908
  likely_reference_ids=None,
746
909
  similarity_measure=input.similarity_measure(),
910
+ weights=weights,
747
911
  spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
748
912
  high_quality_reference_library=hq,
749
913
  mz_min=input.mz_min(), mz_max=input.mz_max(),
@@ -752,7 +916,7 @@ def server(input, output, session):
752
916
  wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
753
917
  LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
754
918
  n_top_matches_to_save=input.n_top_matches_to_save(),
755
- print_id_results=True, # ensure the library actually prints progress
919
+ print_id_results=True, # ensure the library actually prints
756
920
  output_identification=str(Path.cwd() / "identification_output.csv"),
757
921
  output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
758
922
  return_ID_output=True,
@@ -761,7 +925,6 @@ def server(input, output, session):
761
925
  loop = asyncio.get_running_loop()
762
926
  rw = ReactiveWriter(match_log_rv, loop)
763
927
 
764
- # 3) run the heavy function in a thread so the event loop can repaint
765
928
  try:
766
929
  with redirect_stdout(rw), redirect_stderr(rw):
767
930
  if input.chromatography_platform() == "HRMS":
@@ -772,9 +935,7 @@ def server(input, output, session):
772
935
  **common_kwargs
773
936
  )
774
937
  else:
775
- df_out = await asyncio.to_thread(
776
- run_spec_lib_matching_on_NRMS_data, **common_kwargs
777
- )
938
+ df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
778
939
  match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
779
940
  await reactive.flush()
780
941
  except Exception as e:
@@ -782,8 +943,212 @@ def server(input, output, session):
782
943
  await reactive.flush()
783
944
  raise
784
945
 
785
- # 4) stream CSV back to the browser
946
+ yield df_out.to_csv(index=True)
947
+
948
+
949
+
950
+ @render.download(filename="plot.png")
951
+ def run_btn_plot_spectra_within_spec_lib_matching():
952
+ req(input.query_data(), input.reference_data())
953
+
954
+ spectrum_ID1 = input.spectrum_ID1() or None
955
+ spectrum_ID2 = input.spectrum_ID2() or None
956
+
957
+ hq = input.high_quality_reference_library()
958
+ if isinstance(hq, str):
959
+ hq = hq.lower() == "true"
960
+ elif isinstance(hq, (int, float)):
961
+ hq = bool(hq)
962
+
963
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
964
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
965
+
966
+ common = dict(
967
+ query_data=input.query_data()[0]['datapath'],
968
+ reference_data=input.reference_data()[0]['datapath'],
969
+ spectrum_ID1=spectrum_ID1,
970
+ spectrum_ID2=spectrum_ID2,
971
+ similarity_measure=input.similarity_measure(),
972
+ weights=weights,
973
+ spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
974
+ high_quality_reference_library=hq,
975
+ mz_min=input.mz_min(), mz_max=input.mz_max(),
976
+ int_min=input.int_min(), int_max=input.int_max(),
977
+ noise_threshold=input.noise_threshold(),
978
+ wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
979
+ LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
980
+ y_axis_transformation="normalized",
981
+ return_plot=True
982
+ )
983
+
984
+ if input.chromatography_platform() == "HRMS":
985
+ fig = generate_plots_on_HRMS_data(
986
+ window_size_centroiding=input.window_size_centroiding(),
987
+ window_size_matching=input.window_size_matching(),
988
+ **common
989
+ )
990
+ plt.show()
991
+ else:
992
+ fig = generate_plots_on_NRMS_data(**common)
993
+ plt.show()
994
+
995
+ with io.BytesIO() as buf:
996
+ fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
997
+ plt.close()
998
+ yield buf.getvalue()
999
+
1000
+
1001
+ '''
1002
+ @render.download(filename="parameter_tuning_output.csv")
1003
+ async def run_btn_parameter_tuning():
1004
+ match_log_rv.set("Running grid search of all parameters specified...\n")
1005
+
1006
+ similarity_measure_tmp = list(input.similarity_measure())
1007
+ high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
1008
+ spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
1009
+ mz_min_tmp = strip_numeric(input.mz_min())
1010
+ mz_max_tmp = strip_numeric(input.mz_max())
1011
+ int_min_tmp = strip_numeric(input.int_min())
1012
+ int_max_tmp = strip_numeric(input.int_max())
1013
+ noise_threshold_tmp = strip_numeric(input.noise_threshold())
1014
+ wf_mz_tmp = strip_numeric(input.wf_mz())
1015
+ wf_int_tmp = strip_numeric(input.wf_int())
1016
+ LET_threshold_tmp = strip_numeric(input.LET_threshold())
1017
+ entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
1018
+ weights_tmp = strip_weights(input.weights())
1019
+
1020
+ common_kwargs = dict(
1021
+ query_data=input.query_data()[0]["datapath"],
1022
+ reference_data=input.reference_data()[0]["datapath"],
1023
+ output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
1024
+ return_output=True
1025
+ )
1026
+
1027
+ loop = asyncio.get_running_loop()
1028
+ rw = ReactiveWriter(match_log_rv, loop)
1029
+
1030
+ try:
1031
+ with redirect_stdout(rw), redirect_stderr(rw):
1032
+ if input.chromatography_platform() == "HRMS":
1033
+ window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
1034
+ window_size_matching_tmp = strip_numeric(input.window_size_matching())
1035
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
1036
+ df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
1037
+ else:
1038
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
1039
+ df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
1040
+ match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1041
+ #await reactive.flush()
1042
+ except Exception as e:
1043
+ match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1044
+ #await reactive.flush()
1045
+ raise
1046
+
786
1047
  yield df_out.to_csv(index=False)
1048
+ '''
1049
+
1050
+
1051
+ @render.download(filename="parameter_tuning_output.csv")
1052
+ async def run_btn_parameter_tuning():
1053
+ is_any_job_running.set(True)
1054
+ is_tuning_running.set(True)
1055
+ match_log_rv.set("Running grid search of all parameters specified...\n")
1056
+ _drain_queue_nowait(_LOG_QUEUE)
1057
+
1058
+ similarity_measure_tmp = list(input.similarity_measure())
1059
+ high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
1060
+ spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
1061
+ mz_min_tmp = strip_numeric(input.mz_min())
1062
+ mz_max_tmp = strip_numeric(input.mz_max())
1063
+ int_min_tmp = strip_numeric(input.int_min())
1064
+ int_max_tmp = strip_numeric(input.int_max())
1065
+ noise_threshold_tmp = strip_numeric(input.noise_threshold())
1066
+ wf_mz_tmp = strip_numeric(input.wf_mz())
1067
+ wf_int_tmp = strip_numeric(input.wf_int())
1068
+ LET_threshold_tmp = strip_numeric(input.LET_threshold())
1069
+ entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
1070
+ weights_tmp = strip_weights(input.weights())
1071
+
1072
+ common_kwargs = dict(
1073
+ query_data=input.query_data()[0]["datapath"],
1074
+ reference_data=input.reference_data()[0]["datapath"],
1075
+ output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
1076
+ return_output=True
1077
+ )
1078
+
1079
+ loop = asyncio.get_running_loop()
1080
+ rw = ReactiveWriter(match_log_rv,loop)
1081
+
1082
+ try:
1083
+ with redirect_stdout(ReactiveWriter(match_log_rv, asyncio.get_running_loop())), redirect_stderr(ReactiveWriter(match_log_rv, asyncio.get_running_loop())):
1084
+ if input.chromatography_platform() == "HRMS":
1085
+ window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
1086
+ window_size_matching_tmp = strip_numeric(input.window_size_matching())
1087
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
1088
+ df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
1089
+ else:
1090
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
1091
+ df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
1092
+
1093
+ match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1094
+ except Exception as e:
1095
+ match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1096
+ raise
1097
+ finally:
1098
+ is_tuning_running.set(False)
1099
+ is_any_job_running.set(False)
1100
+ trailing = _drain_queue_nowait(_LOG_QUEUE)
1101
+ if trailing:
1102
+ match_log_rv.set(match_log_rv.get() + "".join(trailing))
1103
+ await reactive.flush()
1104
+
1105
+ #yield df_out.to_csv(index=False)
1106
+ csv_bytes = df_out.to_csv(index=False).encode('utf-8')
1107
+ yield csv_bytes
1108
+
1109
+
1110
+
1111
+ @render.text
1112
+ def status_output():
1113
+ return run_status_plot_spectra.get()
1114
+ return run_status_spec_lib_matching.get()
1115
+ return run_status_parameter_tuning.get()
1116
+
1117
+
1118
+ class ReactiveWriter(io.TextIOBase):
1119
+ def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
1120
+ self._rv = rv
1121
+ self._loop = loop
1122
+
1123
+ def write(self, s: str):
1124
+ if not s:
1125
+ return 0
1126
+ self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
1127
+ return len(s)
1128
+
1129
+ def flush(self):
1130
+ pass
1131
+
1132
+
1133
+ @reactive.effect
1134
+ async def _pump_reactive_writer_logs():
1135
+ if not is_tuning_running.get():
1136
+ return
1137
+
1138
+ reactive.invalidate_later(0.1)
1139
+ msgs = _drain_queue_nowait(_LOG_QUEUE)
1140
+ if msgs:
1141
+ match_log_rv.set(match_log_rv.get() + "".join(msgs))
1142
+ await reactive.flush()
1143
+
1144
+
1145
+
1146
+ @render.text
1147
+ def status_output():
1148
+ return run_status_plot_spectra.get()
1149
+ return run_status_spec_lib_matching.get()
1150
+ return run_status_parameter_tuning.get()
1151
+
787
1152
 
788
1153
 
789
1154
  app = App(app_ui, server)
@@ -177,8 +177,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
177
177
  spec_tmp = spectrum_ID1
178
178
  spectrum_ID1 = spectrum_ID2
179
179
  spectrum_ID2 = spec_tmp
180
- print(unique_query_ids)
181
- print(spectrum_ID1)
182
180
  query_idx = unique_query_ids.index(spectrum_ID1)
183
181
  reference_idx = unique_reference_ids.index(spectrum_ID2)
184
182
  q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
@@ -311,7 +309,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
311
309
  plt.figlegend(loc = 'upper center')
312
310
  fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
313
311
  fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
314
- fig.text(0.05, 0.12, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
312
+ fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
315
313
  fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
316
314
  fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
317
315
  fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
@@ -320,6 +318,9 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
320
318
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
321
319
  fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
322
320
  fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
321
+ if similarity_measure == 'mixture':
322
+ fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
323
+
323
324
  plt.savefig(output_path, format='pdf')
324
325
 
325
326
  if return_plot == True:
@@ -606,13 +607,15 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
606
607
  plt.figlegend(loc = 'upper center')
607
608
  fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
608
609
  fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
609
- fig.text(0.05, 0.09, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
610
+ fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
610
611
  fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
611
612
  fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
612
613
  fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
613
614
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
614
615
  fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
615
616
  fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
617
+ if similarity_measure=='mixture':
618
+ fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
616
619
  plt.savefig(output_path, format='pdf')
617
620
 
618
621
  if return_plot == True:
@@ -9,6 +9,12 @@ from pathlib import Path
9
9
  import json
10
10
  from itertools import product
11
11
  from joblib import Parallel, delayed
12
+ import csv
13
+
14
+
15
+ default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
16
+ default_NRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
17
+
12
18
 
13
19
  def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
14
20
  similarity_measure_tmp, weight,
@@ -71,7 +77,8 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
71
77
  )
72
78
 
73
79
 
74
- def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
80
+
81
+ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
75
82
  """
76
83
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
77
84
 
@@ -81,6 +88,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
81
88
  --output_path: accuracy from each choice of parameter set is saved to a CSV file here.
82
89
  """
83
90
 
91
+ grid = {**default_HRMS_grid, **(grid or {})}
84
92
  for key, value in grid.items():
85
93
  globals()[key] = value
86
94
 
@@ -118,24 +126,35 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
118
126
  print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
119
127
 
120
128
  if output_path is None:
121
- output_path = f'{Path.cwd()}/tuning_param_output.csv'
129
+ output_path = f'{Path.cwd()}/tuning_param_output.txt'
122
130
  print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
123
131
 
124
- # build parameter grid out of the lists you already set
125
132
  param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold,
126
133
  window_size_centroiding, window_size_matching, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
127
- # run in parallel on all CPUs
128
134
  results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_HRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
129
135
 
130
136
  df_out = pd.DataFrame(results, columns=[
131
137
  'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX','NOISE.THRESHOLD',
132
138
  'WINDOW.SIZE.CENTROIDING','WINDOW.SIZE.MATCHING', 'WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
133
139
  ])
134
- df_out = df_out.drop(columns=['WEIGHT'])
135
- df_out.to_csv(output_path, index=False)
140
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
141
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
142
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
143
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
144
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
145
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
146
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
147
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
148
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
149
+ df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
150
+
151
+ if return_output is False:
152
+ df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
153
+ else:
154
+ return df_out
136
155
 
137
156
 
138
- def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
157
+ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
139
158
  """
140
159
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
141
160
 
@@ -145,10 +164,10 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
145
164
  --output_path: accuracy from each choice of parameter set is saved to a CSV file here
146
165
  """
147
166
 
167
+ grid = {**default_NRMS_grid, **(grid or {})}
148
168
  for key, value in grid.items():
149
169
  globals()[key] = value
150
170
 
151
- # load query and reference libraries
152
171
  if query_data is None:
153
172
  print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
154
173
  sys.exit()
@@ -182,21 +201,30 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
182
201
  print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
183
202
 
184
203
  if output_path is None:
185
- output_path = f'{Path.cwd()}/tuning_param_output.csv'
204
+ output_path = f'{Path.cwd()}/tuning_param_output.txt'
186
205
  print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
187
206
 
188
- # build parameter grid out of the lists you already set
189
207
  param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max,
190
208
  noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
191
- # run in parallel on all CPUs
192
209
  results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_NRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
193
210
 
194
211
  df_out = pd.DataFrame(results, columns=[
195
212
  'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX',
196
213
  'NOISE.THRESHOLD','WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
197
214
  ])
198
- df_out = df_out.drop(columns=['WEIGHT'])
199
- df_out.to_csv(output_path, index=False)
215
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
216
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
217
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
218
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
219
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
220
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
221
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
222
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
223
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
224
+ if return_output is False:
225
+ df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
226
+ else:
227
+ return df_out
200
228
 
201
229
 
202
230
 
@@ -413,8 +441,8 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
413
441
  --entropy_dimension: Entropy dimension parameter. Must have positive value other than 1. When the entropy dimension is 1, then Renyi and Tsallis entropy are equivalent to Shannon entropy. Therefore, this parameter only applies to the renyi and tsallis similarity measures. This parameter will be ignored if similarity measure cosine or shannon is chosen. Default: 1.1
414
442
  --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
415
443
  --print_id_results: Flag that prints identification results if True. Default: False
416
- --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
417
- --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
444
+ --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
445
+ --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
418
446
  '''
419
447
 
420
448
  # load query and reference libraries
@@ -528,11 +556,11 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
528
556
  sys.exit()
529
557
 
530
558
  if output_identification is None:
531
- output_identification = f'{Path.cwd()}/output_identification.csv'
559
+ output_identification = f'{Path.cwd()}/output_identification.txt'
532
560
  print(f'Warning: writing identification output to {output_identification}')
533
561
 
534
562
  if output_similarity_scores is None:
535
- output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
563
+ output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
536
564
  print(f'Warning: writing similarity scores to {output_similarity_scores}')
537
565
 
538
566
 
@@ -644,10 +672,10 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
644
672
 
645
673
  if return_ID_output is False:
646
674
  # write spectral library matching results to disk
647
- df_top_ref_specs.to_csv(output_identification)
675
+ df_top_ref_specs.to_csv(output_identification, sep='\t')
648
676
 
649
677
  # write all similarity scores to disk
650
- df_scores.to_csv(output_similarity_scores)
678
+ df_scores.to_csv(output_similarity_scores, sep='\t')
651
679
  else:
652
680
  return df_top_ref_specs
653
681
 
@@ -678,8 +706,8 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
678
706
  --normalization_method: Method used to normalize the intensities of each spectrum so that the intensities sum to 1. Since the objects entropy quantifies the uncertainy of must be probability distributions, the intensities of a given spectrum must sum to 1 prior to computing the entropy of the given spectrum intensities. Options: \'standard\' and \'softmax\'. Default: standard.
679
707
  --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
680
708
  --print_id_results: Flag that prints identification results if True. Default: False
681
- --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
682
- --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
709
+ --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
710
+ --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
683
711
  '''
684
712
 
685
713
  # load query and reference libraries
@@ -778,11 +806,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
778
806
  sys.exit()
779
807
 
780
808
  if output_identification is None:
781
- output_identification = f'{Path.cwd()}/output_identification.csv'
809
+ output_identification = f'{Path.cwd()}/output_identification.txt'
782
810
  print(f'Warning: writing identification output to {output_identification}')
783
811
 
784
812
  if output_similarity_scores is None:
785
- output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
813
+ output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
786
814
  print(f'Warning: writing similarity scores to {output_similarity_scores}')
787
815
 
788
816
 
@@ -894,11 +922,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
894
922
 
895
923
  if return_ID_output is False:
896
924
  # write spectral library matching results to disk
897
- df_top_ref_specs.to_csv(output_identification)
925
+ df_top_ref_specs.to_csv(output_identification, sep='\t')
898
926
 
899
927
  # write all similarity scores to disk
900
928
  df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
901
- df_scores.to_csv(output_similarity_scores)
929
+ df_scores.to_csv(output_similarity_scores, sep='\t')
902
930
  else:
903
931
  return df_top_ref_specs
904
932
 
pycompound/tuning_CLI.py CHANGED
@@ -40,8 +40,7 @@ else:
40
40
  sys.exit()
41
41
 
42
42
 
43
- grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':args.weights, 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
44
-
43
+ grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':[args.weights], 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
45
44
 
46
45
  if args.chromatography_platform == 'HRMS':
47
46
  grid['mz_min'] = [float(x) for x in grid['mz_min']]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -0,0 +1,14 @@
1
+ app.py,sha256=k5mPyctA1eWkGjtnKrJb7STuweh_aH4HmPUH07jO92Y,53841
2
+ pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
3
+ pycompound/plot_spectra.py,sha256=Q7nDSW3Y5pR_Ql4JeEmyd6KRRyzvxk9j0yaUR0hfjJc,42275
4
+ pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
5
+ pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
6
+ pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
7
+ pycompound/spec_lib_matching.py,sha256=AAMxWqi6LXWo-tJ-uqJ4QxfHSg8bX3G_DJVt2bLLMcM,61860
8
+ pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
9
+ pycompound/tuning_CLI.py,sha256=dSFLwMiI0_6G4YDZR5ubqn9-75ixOvDPZMOoGS-_B6w,8540
10
+ pycompound-0.1.1.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
11
+ pycompound-0.1.1.dist-info/METADATA,sha256=XZtkvSau_Z723iCgy_LTR1CkYryDxXBdIFtb_D_E9u0,1732
12
+ pycompound-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ pycompound-0.1.1.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
14
+ pycompound-0.1.1.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
2
- pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
3
- pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
4
- pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
5
- pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
6
- pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
7
- pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
8
- pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
9
- pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
10
- pycompound-0.1.0.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
11
- pycompound-0.1.0.dist-info/METADATA,sha256=qfM4rP0BeGThYpxvGa7vOseRsUgtJ4aH8hgUtio0Ugw,1732
12
- pycompound-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- pycompound-0.1.0.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
14
- pycompound-0.1.0.dist-info/RECORD,,