pycompound 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app.py CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- from shiny import App, ui, reactive, render
2
+ from shiny import App, ui, reactive, render, req
3
3
  from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
4
4
  from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
5
  from pycompound.spec_lib_matching import tune_params_on_HRMS_data
@@ -7,14 +7,149 @@ from pycompound.spec_lib_matching import tune_params_on_NRMS_data
7
7
  from pycompound.plot_spectra import generate_plots_on_HRMS_data
8
8
  from pycompound.plot_spectra import generate_plots_on_NRMS_data
9
9
  from pathlib import Path
10
+ from contextlib import redirect_stdout, redirect_stderr
10
11
  import subprocess
11
12
  import traceback
12
13
  import asyncio
13
14
  import io
14
- #import matplotlib
15
- #matplotlib.use('agg')
15
+ import os
16
+ import sys
16
17
  import matplotlib.pyplot as plt
17
- #from matplotlib.figure import Figure
18
+ import pandas as pd
19
+ import numpy as np
20
+ import netCDF4 as nc
21
+ from pyteomics import mgf
22
+ from pyteomics import mzml
23
+
24
+
25
+ def build_library(input_path=None, output_path=None):
26
+ last_three_chars = input_path[(len(input_path)-3):len(input_path)]
27
+ last_four_chars = input_path[(len(input_path)-4):len(input_path)]
28
+ if last_three_chars == 'csv' or last_three_chars == 'CSV':
29
+ return pd.read_csv(input_path)
30
+ else:
31
+ if last_three_chars == 'mgf' or last_three_chars == 'MGF':
32
+ input_file_type = 'mgf'
33
+ elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
34
+ input_file_type = 'mzML'
35
+ elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
36
+ input_file_type = 'cdf'
37
+ elif last_three_chars == 'msp' or last_three_chars == 'MSP':
38
+ input_file_type = 'msp'
39
+ else:
40
+ print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
41
+ sys.exit()
42
+
43
+ spectra = []
44
+ if input_file_type == 'mgf':
45
+ with mgf.read(input_path, index_by_scans = True) as reader:
46
+ for spec in reader:
47
+ spectra.append(spec)
48
+ if input_file_type == 'mzML':
49
+ with mzml.read(input_path) as reader:
50
+ for spec in reader:
51
+ spectra.append(spec)
52
+
53
+ if input_file_type == 'mgf' or input_file_type == 'mzML':
54
+ ids = []
55
+ mzs = []
56
+ ints = []
57
+ for i in range(0,len(spectra)):
58
+ for j in range(0,len(spectra[i]['m/z array'])):
59
+ if input_file_type == 'mzML':
60
+ ids.append(f'ID_{i+1}')
61
+ else:
62
+ ids.append(spectra[i]['params']['name'])
63
+ mzs.append(spectra[i]['m/z array'][j])
64
+ ints.append(spectra[i]['intensity array'][j])
65
+
66
+ if input_file_type == 'cdf':
67
+ dataset = nc.Dataset(input_path, 'r')
68
+ all_mzs = dataset.variables['mass_values'][:]
69
+ all_ints = dataset.variables['intensity_values'][:]
70
+ scan_idxs = dataset.variables['scan_index'][:]
71
+ dataset.close()
72
+
73
+ ids = []
74
+ mzs = []
75
+ ints = []
76
+ for i in range(0,(len(scan_idxs)-1)):
77
+ if i % 1000 == 0:
78
+ print(f'analyzed {i} out of {len(scan_idxs)} scans')
79
+ s_idx = scan_idxs[i]
80
+ e_idx = scan_idxs[i+1]
81
+
82
+ mzs_tmp = all_mzs[s_idx:e_idx]
83
+ ints_tmp = all_ints[s_idx:e_idx]
84
+
85
+ for j in range(0,len(mzs_tmp)):
86
+ ids.append(f'ID_{i+1}')
87
+ mzs.append(mzs_tmp[j])
88
+ ints.append(ints_tmp[j])
89
+
90
+ if input_file_type == 'msp':
91
+ ids = []
92
+ mzs = []
93
+ ints = []
94
+ with open(input_path, 'r') as f:
95
+ i = 0
96
+ for line in f:
97
+ line = line.strip()
98
+ if line.startswith('Name:'):
99
+ i += 1
100
+ spectrum_id = line.replace('Name: ','')
101
+ elif line and line[0].isdigit():
102
+ try:
103
+ mz, intensity = map(float, line.split()[:2])
104
+ ids.append(spectrum_id)
105
+ mzs.append(mz)
106
+ ints.append(intensity)
107
+ except ValueError:
108
+ continue
109
+
110
+ df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
111
+ return df
112
+
113
+
114
+
115
+ def extract_first_column_ids(file_path: str, max_ids: int = 20000):
116
+ suffix = Path(file_path).suffix.lower()
117
+
118
+ if suffix == ".csv":
119
+ df = pd.read_csv(file_path, usecols=[0])
120
+ ids = df.iloc[:, 0].astype(str).dropna()
121
+ ids = [x for x in ids if x.strip() != ""]
122
+ seen = set()
123
+ uniq = []
124
+ for x in ids:
125
+ if x not in seen:
126
+ uniq.append(x)
127
+ seen.add(x)
128
+ return uniq[:max_ids]
129
+
130
+ ids = []
131
+ try:
132
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
133
+ for line in f:
134
+ ls = line.strip()
135
+ if ls.startswith("TITLE="):
136
+ ids.append(ls.split("=", 1)[1].strip())
137
+ elif ls.lower().startswith("name:"):
138
+ ids.append(ls.split(":", 1)[1].strip())
139
+ if len(ids) >= max_ids:
140
+ break
141
+ except Exception:
142
+ pass
143
+
144
+ if ids:
145
+ seen = set()
146
+ uniq = []
147
+ for x in ids:
148
+ if x not in seen:
149
+ uniq.append(x)
150
+ seen.add(x)
151
+ return uniq
152
+ return []
18
153
 
19
154
 
20
155
  def plot_spectra_ui(platform: str):
@@ -22,8 +157,20 @@ def plot_spectra_ui(platform: str):
22
157
  base_inputs = [
23
158
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
24
159
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
25
- ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
26
- ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
160
+ ui.input_selectize(
161
+ "spectrum_ID1",
162
+ "Select spectrum ID 1:",
163
+ choices=[],
164
+ multiple=False,
165
+ options={"placeholder": "Upload a query file to load IDs..."},
166
+ ),
167
+ ui.input_selectize(
168
+ "spectrum_ID2",
169
+ "Select spectrum ID 2 (optional):",
170
+ choices=[],
171
+ multiple=False,
172
+ options={"placeholder": "Upload a reference file to load IDs..."},
173
+ ),
27
174
  ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
28
175
  ui.input_select(
29
176
  "high_quality_reference_library",
@@ -77,7 +224,6 @@ def plot_spectra_ui(platform: str):
77
224
  run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
78
225
  back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
79
226
 
80
- #print(len(extra_inputs))
81
227
  # Layout base_inputs and extra_inputs in columns
82
228
  if platform == "HRMS":
83
229
  inputs_columns = ui.layout_columns(
@@ -102,7 +248,8 @@ def plot_spectra_ui(platform: str):
102
248
  ui.h2("Plot Spectra"),
103
249
  inputs_columns,
104
250
  run_button_plot_spectra,
105
- back_button
251
+ back_button,
252
+ ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
106
253
  ),
107
254
  )
108
255
 
@@ -179,13 +326,20 @@ def run_spec_lib_matching_ui(platform: str):
179
326
  col_widths=(3, 3, 3, 3),
180
327
  )
181
328
 
329
+ log_panel = ui.card(
330
+ ui.card_header("Identification log"),
331
+ ui.output_text_verbatim("match_log"),
332
+ style="max-height:300px; overflow:auto"
333
+ )
334
+
182
335
  # Combine everything
183
336
  return ui.div(
184
337
  ui.TagList(
185
338
  ui.h2("Run Spectral Library Matching"),
186
339
  inputs_columns,
187
340
  run_button_spec_lib_matching,
188
- back_button
341
+ back_button,
342
+ log_panel,
189
343
  ),
190
344
  )
191
345
 
@@ -207,11 +361,106 @@ def server(input, output, session):
207
361
 
208
362
  run_status_plot_spectra = reactive.Value("")
209
363
  run_status_spec_lib_matching = reactive.Value("")
364
+ match_log_rv = reactive.Value("")
365
+ is_matching_rv = reactive.Value(False)
366
+
367
+ query_ids_rv = reactive.Value([])
368
+ query_file_path_rv = reactive.Value(None)
369
+ query_result_rv = reactive.Value(None)
370
+ query_status_rv = reactive.Value("")
371
+ reference_ids_rv = reactive.Value([])
372
+ reference_file_path_rv = reactive.Value(None)
373
+ reference_result_rv = reactive.Value(None)
374
+ reference_status_rv = reactive.Value("")
375
+
376
+ converted_query_path_rv = reactive.Value(None)
377
+ converted_reference_path_rv = reactive.Value(None)
378
+
379
+
380
+ def process_database(file_path: str):
381
+ suffix = Path(file_path).suffix.lower()
382
+ return {"path": file_path, "suffix": suffix}
383
+
384
+ @render.text
385
+ def plot_query_status():
386
+ return query_status_rv.get() or ""
387
+
388
+
389
+ @reactive.effect
390
+ @reactive.event(input.query_data)
391
+ async def _on_query_upload():
392
+ if current_page() != "plot_spectra":
393
+ return
394
+
395
+ files = input.query_data()
396
+ req(files and len(files) > 0)
397
+
398
+ file_path = files[0]["datapath"]
399
+ query_file_path_rv.set(file_path)
400
+
401
+ query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
402
+ await reactive.flush()
403
+
404
+ try:
405
+ result = await asyncio.to_thread(process_database, file_path)
406
+ query_result_rv.set(result)
407
+ query_status_rv.set("✅ Query database processed.")
408
+ await reactive.flush()
409
+ except Exception as e:
410
+ query_status_rv.set(f"❌ Failed to process query database: {e}")
411
+ await reactive.flush()
412
+
413
+
414
+ @reactive.effect
415
+ @reactive.event(input.reference_data)
416
+ async def _on_reference_upload():
417
+ if current_page() != "plot_spectra":
418
+ return
419
+
420
+ files = input.reference_data()
421
+ req(files and len(files) > 0)
422
+
423
+ file_path = files[0]["datapath"]
424
+ reference_file_path_rv.set(file_path)
425
+
426
+ reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
427
+ await reactive.flush()
428
+
429
+ try:
430
+ result = await asyncio.to_thread(process_database, file_path)
431
+ reference_result_rv.set(result)
432
+ reference_status_rv.set("✅ Reference database processed.")
433
+ await reactive.flush()
434
+ except Exception as e:
435
+ reference_status_rv.set(f"❌ Failed to process reference database: {e}")
436
+ await reactive.flush()
437
+
438
+
439
+ @render.text
440
+ def match_log():
441
+ return match_log_rv.get()
442
+
443
+
444
+ class ReactiveWriter(io.TextIOBase):
445
+ def __init__(self, rv):
446
+ self.rv = rv
447
+ def write(self, s: str):
448
+ if not s:
449
+ return 0
450
+ self.rv.set(self.rv.get() + s)
451
+ try:
452
+ loop = asyncio.get_running_loop()
453
+ loop.create_task(reactive.flush())
454
+ except RuntimeError:
455
+ pass
456
+ return len(s)
457
+ def flush(self):
458
+ pass
459
+
210
460
 
211
461
 
212
462
  @reactive.Effect
213
463
  def _():
214
- # Main menu buttons
215
464
  if input.plot_spectra() > plot_clicks.get():
216
465
  current_page.set("plot_spectra")
217
466
  plot_clicks.set(input.plot_spectra())
@@ -315,56 +564,126 @@ def server(input, output, session):
315
564
  return run_spec_lib_matching_ui(input.chromatography_platform())
316
565
 
317
566
 
318
- '''
567
+
319
568
  @reactive.effect
320
- @reactive.event(input.run_btn_plot_spectra)
321
- def _():
322
- if current_page() == "plot_spectra":
323
- if len(input.spectrum_ID1())==0:
324
- spectrum_ID1 = None
325
- else:
326
- spectrum_ID1 = input.spectrum_ID1()
327
- if len(input.spectrum_ID2())==0:
328
- spectrum_ID2 = None
569
+ @reactive.event(input.query_data)
570
+ async def _populate_ids_from_query_upload():
571
+ if current_page() != "plot_spectra":
572
+ return
573
+
574
+ files = input.query_data()
575
+ if not files:
576
+ return
577
+
578
+ in_path = Path(files[0]["datapath"])
579
+ suffix = in_path.suffix.lower()
580
+
581
+ # Decide what CSV to read IDs from
582
+ try:
583
+ if suffix == ".csv":
584
+ csv_path = in_path
585
+ converted_query_path_rv.set(str(csv_path))
329
586
  else:
330
- spectrum_ID2 = input.spectrum_ID2()
331
-
332
- if input.chromatography_platform() == "HRMS":
333
- try:
334
- fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
335
- #plt.show()
336
- with io.BytesIO() as buf:
337
- plt.savefig(buf, format="png", dpi=150, bbox_inches="tight")
338
- yield buf.getvalue()
339
- run_status_plot_spectra.set(f"✅ Plotting has finished.")
340
- except Exception as e:
341
- run_status_plot_spectra.set(f"❌ Error: {traceback.format_exc()}")
342
- elif input.chromatography_platform() == "NRMS":
343
- try:
344
- generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
345
- #plt.show()
346
- run_status_plot_spectra.set(f"✅ Plotting has finished.")
347
- except Exception as e:
348
- run_status_plot_spectra.set(f"❌ Error: {traceback.format_exc()}")
587
+ query_status_rv.set(f"Converting {in_path.name} → CSV …")
588
+ await reactive.flush()
589
+
590
+ # Choose an output temp path next to the upload
591
+ tmp_csv_path = in_path.with_suffix(".converted.csv")
592
+
593
+ out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
594
+
595
+ # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
596
+ if isinstance(out_obj, (str, os.PathLike, Path)):
597
+ csv_path = Path(out_obj)
598
+ elif isinstance(out_obj, pd.DataFrame):
599
+ # Write the DF to our chosen path
600
+ out_obj.to_csv(tmp_csv_path, index=False)
601
+ csv_path = tmp_csv_path
602
+ else:
603
+ raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
604
+
605
+ converted_query_path_rv.set(str(csv_path))
606
+
607
+ query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
608
+ await reactive.flush()
609
+
610
+ # Extract IDs from the CSV’s first column
611
+ ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
612
+ query_ids_rv.set(ids)
613
+
614
+ # Update dropdowns
615
+ ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
616
+
617
+ query_status_rv.set(
618
+ f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
619
+ )
620
+ await reactive.flush()
621
+
622
+ except Exception as e:
623
+ query_status_rv.set(f"❌ Failed: {e}")
624
+ await reactive.flush()
625
+ raise
349
626
 
350
627
 
351
628
  @reactive.effect
352
- @reactive.event(input.run_btn_run_spec_lib_matching)
353
- def _():
354
- if current_page() == 'run_spec_lib_matching':
355
- if input.chromatography_platform() == 'HRMS':
356
- try:
357
- run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/')
358
- run_status_spec_lib_matching.set(f"✅ Spectral library matching has finished.")
359
- except Exception as e:
360
- run_status_spec_lib_matching.set(f"❌ Error: {traceback.format_exc()}")
361
- elif input.chromatography_platform() == 'NRMS':
362
- try:
363
- run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
364
- run_status_spec_lib_matching.set(f"✅ Spectral library matching has finished.")
365
- except Exception as e:
366
- run_status_spec_lib_matching.set(f"❌ Error: {traceback.format_exc()}")
367
- '''
629
+ @reactive.event(input.reference_data)
630
+ async def _populate_ids_from_reference_upload():
631
+ if current_page() != "plot_spectra":
632
+ return
633
+
634
+ files = input.reference_data()
635
+ if not files:
636
+ return
637
+
638
+ in_path = Path(files[0]["datapath"])
639
+ suffix = in_path.suffix.lower()
640
+
641
+ # Decide what CSV to read IDs from
642
+ try:
643
+ if suffix == ".csv":
644
+ csv_path = in_path
645
+ converted_reference_path_rv.set(str(csv_path))
646
+ else:
647
+ reference_status_rv.set(f"Converting {in_path.name} → CSV …")
648
+ await reactive.flush()
649
+
650
+ # Choose an output temp path next to the upload
651
+ tmp_csv_path = in_path.with_suffix(".converted.csv")
652
+
653
+ out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
654
+
655
+ # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
656
+ if isinstance(out_obj, (str, os.PathLike, Path)):
657
+ csv_path = Path(out_obj)
658
+ elif isinstance(out_obj, pd.DataFrame):
659
+ # Write the DF to our chosen path
660
+ out_obj.to_csv(tmp_csv_path, index=False)
661
+ csv_path = tmp_csv_path
662
+ else:
663
+ raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
664
+
665
+ converted_reference_path_rv.set(str(csv_path))
666
+
667
+ reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
668
+ await reactive.flush()
669
+
670
+ # Extract IDs from the CSV’s first column
671
+ ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
672
+ reference_ids_rv.set(ids)
673
+
674
+ # Update dropdowns
675
+ ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
676
+
677
+ reference_status_rv.set(
678
+ f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
679
+ )
680
+ await reactive.flush()
681
+
682
+ except Exception as e:
683
+ reference_status_rv.set(f"❌ Failed: {e}")
684
+ await reactive.flush()
685
+ raise
686
+
368
687
 
369
688
 
370
689
  @render.download(filename=lambda: f"plot.png")
@@ -382,23 +701,90 @@ def server(input, output, session):
382
701
  yield buf.getvalue()
383
702
 
384
703
 
385
- @render.download(filename=lambda: f"plot.png")
386
- def run_btn_spec_lib_matching():
387
- if input.chromatography_platform() == "HRMS":
388
- df_out = run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/', return_ID_output=True)
389
- elif input.chromatography_platform() == "NRMS":
390
- df_out = run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv', return_ID_output=True)
391
-
392
- df_out.to_csv(io.StringIO(), index=False)
393
- return buf.getvalue().encode('utf-8')
394
-
395
-
396
704
  @render.text
397
705
  def status_output():
398
706
  return run_status_plot_spectra.get()
399
707
  return run_status_spec_lib_matching.get()
400
708
 
401
709
 
710
+ class ReactiveWriter(io.TextIOBase):
711
+ def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
712
+ self.rv = rv
713
+ self.loop = loop
714
+
715
+ def write(self, s: str):
716
+ if not s:
717
+ return 0
718
+ def _apply():
719
+ self.rv.set(self.rv.get() + s)
720
+ self.loop.create_task(reactive.flush())
721
+
722
+ self.loop.call_soon_threadsafe(_apply)
723
+ return len(s)
724
+
725
+ def flush(self):
726
+ pass
727
+
728
+
729
+ @render.download(filename="identification_output.csv")
730
+ async def run_btn_spec_lib_matching():
731
+ # 1) quick first paint
732
+ match_log_rv.set("Starting identification...\n")
733
+ await reactive.flush()
734
+
735
+ # 2) normalize inputs (same as before)
736
+ hq = input.high_quality_reference_library()
737
+ if isinstance(hq, str):
738
+ hq = hq.lower() == "true"
739
+ elif isinstance(hq, (int, float)):
740
+ hq = bool(hq)
741
+
742
+ common_kwargs = dict(
743
+ query_data=input.query_data()[0]["datapath"],
744
+ reference_data=input.reference_data()[0]["datapath"],
745
+ likely_reference_ids=None,
746
+ similarity_measure=input.similarity_measure(),
747
+ spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
748
+ high_quality_reference_library=hq,
749
+ mz_min=input.mz_min(), mz_max=input.mz_max(),
750
+ int_min=input.int_min(), int_max=input.int_max(),
751
+ noise_threshold=input.noise_threshold(),
752
+ wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
753
+ LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
754
+ n_top_matches_to_save=input.n_top_matches_to_save(),
755
+ print_id_results=True, # ensure the library actually prints progress
756
+ output_identification=str(Path.cwd() / "identification_output.csv"),
757
+ output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
758
+ return_ID_output=True,
759
+ )
760
+
761
+ loop = asyncio.get_running_loop()
762
+ rw = ReactiveWriter(match_log_rv, loop)
763
+
764
+ # 3) run the heavy function in a thread so the event loop can repaint
765
+ try:
766
+ with redirect_stdout(rw), redirect_stderr(rw):
767
+ if input.chromatography_platform() == "HRMS":
768
+ df_out = await asyncio.to_thread(
769
+ run_spec_lib_matching_on_HRMS_data,
770
+ window_size_centroiding=input.window_size_centroiding(),
771
+ window_size_matching=input.window_size_matching(),
772
+ **common_kwargs
773
+ )
774
+ else:
775
+ df_out = await asyncio.to_thread(
776
+ run_spec_lib_matching_on_NRMS_data, **common_kwargs
777
+ )
778
+ match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
779
+ await reactive.flush()
780
+ except Exception as e:
781
+ match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
782
+ await reactive.flush()
783
+ raise
784
+
785
+ # 4) stream CSV back to the browser
786
+ yield df_out.to_csv(index=False)
787
+
402
788
 
403
789
  app = App(app_ui, server)
404
790
 
@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
45
45
  extension = extension[(len(extension)-1)]
46
46
  if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
47
47
  output_path_tmp = query_data[:-3] + 'csv'
48
- build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
48
+ build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
49
49
  df_query = pd.read_csv(output_path_tmp)
50
50
  if extension == 'csv' or extension == 'CSV':
51
51
  df_query = pd.read_csv(query_data)
@@ -177,6 +177,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
177
177
  spec_tmp = spectrum_ID1
178
178
  spectrum_ID1 = spectrum_ID2
179
179
  spectrum_ID2 = spec_tmp
180
+ print(unique_query_ids)
181
+ print(spectrum_ID1)
180
182
  query_idx = unique_query_ids.index(spectrum_ID1)
181
183
  reference_idx = unique_reference_ids.index(spectrum_ID2)
182
184
  q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.0.9
3
+ Version: 0.0.10
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -1,14 +1,14 @@
1
- app.py,sha256=eJmAAdCDWS7-3jjCkp8gueBtBkefh8oIbyr45Snh8C0,26187
1
+ app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
2
2
  pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
3
- pycompound/plot_spectra.py,sha256=wOnf2oOAfifj7FYkTZAcIeD7dHW1aRHzmsspPpySDcY,42023
3
+ pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
4
4
  pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
5
5
  pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
6
6
  pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
7
7
  pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
8
8
  pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
9
9
  pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
10
- pycompound-0.0.9.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
11
- pycompound-0.0.9.dist-info/METADATA,sha256=--hu6G380jnsb6J7XvGFiwXJ_lZ6of0stydeLWYjp6U,1732
12
- pycompound-0.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- pycompound-0.0.9.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
14
- pycompound-0.0.9.dist-info/RECORD,,
10
+ pycompound-0.0.10.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
11
+ pycompound-0.0.10.dist-info/METADATA,sha256=Gb0d0ZbClc4AFRcDjMnNWcb4TCuq84CJl-AKCNjY2wU,1733
12
+ pycompound-0.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ pycompound-0.0.10.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
14
+ pycompound-0.0.10.dist-info/RECORD,,