pycompound 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +452 -66
- pycompound/plot_spectra.py +3 -1
- {pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/METADATA +1 -1
- {pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/RECORD +7 -7
- {pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/WHEEL +0 -0
- {pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/licenses/LICENSE +0 -0
- {pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/top_level.txt +0 -0
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
from shiny import App, ui, reactive, render
|
|
2
|
+
from shiny import App, ui, reactive, render, req
|
|
3
3
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
4
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
5
|
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
@@ -7,14 +7,149 @@ from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
|
7
7
|
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
8
8
|
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
9
9
|
from pathlib import Path
|
|
10
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
10
11
|
import subprocess
|
|
11
12
|
import traceback
|
|
12
13
|
import asyncio
|
|
13
14
|
import io
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
16
17
|
import matplotlib.pyplot as plt
|
|
17
|
-
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import numpy as np
|
|
20
|
+
import netCDF4 as nc
|
|
21
|
+
from pyteomics import mgf
|
|
22
|
+
from pyteomics import mzml
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_library(input_path=None, output_path=None):
|
|
26
|
+
last_three_chars = input_path[(len(input_path)-3):len(input_path)]
|
|
27
|
+
last_four_chars = input_path[(len(input_path)-4):len(input_path)]
|
|
28
|
+
if last_three_chars == 'csv' or last_three_chars == 'CSV':
|
|
29
|
+
return pd.read_csv(input_path)
|
|
30
|
+
else:
|
|
31
|
+
if last_three_chars == 'mgf' or last_three_chars == 'MGF':
|
|
32
|
+
input_file_type = 'mgf'
|
|
33
|
+
elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
|
|
34
|
+
input_file_type = 'mzML'
|
|
35
|
+
elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
|
|
36
|
+
input_file_type = 'cdf'
|
|
37
|
+
elif last_three_chars == 'msp' or last_three_chars == 'MSP':
|
|
38
|
+
input_file_type = 'msp'
|
|
39
|
+
else:
|
|
40
|
+
print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
|
|
41
|
+
sys.exit()
|
|
42
|
+
|
|
43
|
+
spectra = []
|
|
44
|
+
if input_file_type == 'mgf':
|
|
45
|
+
with mgf.read(input_path, index_by_scans = True) as reader:
|
|
46
|
+
for spec in reader:
|
|
47
|
+
spectra.append(spec)
|
|
48
|
+
if input_file_type == 'mzML':
|
|
49
|
+
with mzml.read(input_path) as reader:
|
|
50
|
+
for spec in reader:
|
|
51
|
+
spectra.append(spec)
|
|
52
|
+
|
|
53
|
+
if input_file_type == 'mgf' or input_file_type == 'mzML':
|
|
54
|
+
ids = []
|
|
55
|
+
mzs = []
|
|
56
|
+
ints = []
|
|
57
|
+
for i in range(0,len(spectra)):
|
|
58
|
+
for j in range(0,len(spectra[i]['m/z array'])):
|
|
59
|
+
if input_file_type == 'mzML':
|
|
60
|
+
ids.append(f'ID_{i+1}')
|
|
61
|
+
else:
|
|
62
|
+
ids.append(spectra[i]['params']['name'])
|
|
63
|
+
mzs.append(spectra[i]['m/z array'][j])
|
|
64
|
+
ints.append(spectra[i]['intensity array'][j])
|
|
65
|
+
|
|
66
|
+
if input_file_type == 'cdf':
|
|
67
|
+
dataset = nc.Dataset(input_path, 'r')
|
|
68
|
+
all_mzs = dataset.variables['mass_values'][:]
|
|
69
|
+
all_ints = dataset.variables['intensity_values'][:]
|
|
70
|
+
scan_idxs = dataset.variables['scan_index'][:]
|
|
71
|
+
dataset.close()
|
|
72
|
+
|
|
73
|
+
ids = []
|
|
74
|
+
mzs = []
|
|
75
|
+
ints = []
|
|
76
|
+
for i in range(0,(len(scan_idxs)-1)):
|
|
77
|
+
if i % 1000 == 0:
|
|
78
|
+
print(f'analyzed {i} out of {len(scan_idxs)} scans')
|
|
79
|
+
s_idx = scan_idxs[i]
|
|
80
|
+
e_idx = scan_idxs[i+1]
|
|
81
|
+
|
|
82
|
+
mzs_tmp = all_mzs[s_idx:e_idx]
|
|
83
|
+
ints_tmp = all_ints[s_idx:e_idx]
|
|
84
|
+
|
|
85
|
+
for j in range(0,len(mzs_tmp)):
|
|
86
|
+
ids.append(f'ID_{i+1}')
|
|
87
|
+
mzs.append(mzs_tmp[j])
|
|
88
|
+
ints.append(ints_tmp[j])
|
|
89
|
+
|
|
90
|
+
if input_file_type == 'msp':
|
|
91
|
+
ids = []
|
|
92
|
+
mzs = []
|
|
93
|
+
ints = []
|
|
94
|
+
with open(input_path, 'r') as f:
|
|
95
|
+
i = 0
|
|
96
|
+
for line in f:
|
|
97
|
+
line = line.strip()
|
|
98
|
+
if line.startswith('Name:'):
|
|
99
|
+
i += 1
|
|
100
|
+
spectrum_id = line.replace('Name: ','')
|
|
101
|
+
elif line and line[0].isdigit():
|
|
102
|
+
try:
|
|
103
|
+
mz, intensity = map(float, line.split()[:2])
|
|
104
|
+
ids.append(spectrum_id)
|
|
105
|
+
mzs.append(mz)
|
|
106
|
+
ints.append(intensity)
|
|
107
|
+
except ValueError:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
|
|
111
|
+
return df
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def extract_first_column_ids(file_path: str, max_ids: int = 20000):
|
|
116
|
+
suffix = Path(file_path).suffix.lower()
|
|
117
|
+
|
|
118
|
+
if suffix == ".csv":
|
|
119
|
+
df = pd.read_csv(file_path, usecols=[0])
|
|
120
|
+
ids = df.iloc[:, 0].astype(str).dropna()
|
|
121
|
+
ids = [x for x in ids if x.strip() != ""]
|
|
122
|
+
seen = set()
|
|
123
|
+
uniq = []
|
|
124
|
+
for x in ids:
|
|
125
|
+
if x not in seen:
|
|
126
|
+
uniq.append(x)
|
|
127
|
+
seen.add(x)
|
|
128
|
+
return uniq[:max_ids]
|
|
129
|
+
|
|
130
|
+
ids = []
|
|
131
|
+
try:
|
|
132
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
133
|
+
for line in f:
|
|
134
|
+
ls = line.strip()
|
|
135
|
+
if ls.startswith("TITLE="):
|
|
136
|
+
ids.append(ls.split("=", 1)[1].strip())
|
|
137
|
+
elif ls.lower().startswith("name:"):
|
|
138
|
+
ids.append(ls.split(":", 1)[1].strip())
|
|
139
|
+
if len(ids) >= max_ids:
|
|
140
|
+
break
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
if ids:
|
|
145
|
+
seen = set()
|
|
146
|
+
uniq = []
|
|
147
|
+
for x in ids:
|
|
148
|
+
if x not in seen:
|
|
149
|
+
uniq.append(x)
|
|
150
|
+
seen.add(x)
|
|
151
|
+
return uniq
|
|
152
|
+
return []
|
|
18
153
|
|
|
19
154
|
|
|
20
155
|
def plot_spectra_ui(platform: str):
|
|
@@ -22,8 +157,20 @@ def plot_spectra_ui(platform: str):
|
|
|
22
157
|
base_inputs = [
|
|
23
158
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
24
159
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
25
|
-
|
|
26
|
-
|
|
160
|
+
ui.input_selectize(
|
|
161
|
+
"spectrum_ID1",
|
|
162
|
+
"Select spectrum ID 1:",
|
|
163
|
+
choices=[],
|
|
164
|
+
multiple=False,
|
|
165
|
+
options={"placeholder": "Upload a query file to load IDs..."},
|
|
166
|
+
),
|
|
167
|
+
ui.input_selectize(
|
|
168
|
+
"spectrum_ID2",
|
|
169
|
+
"Select spectrum ID 2 (optional):",
|
|
170
|
+
choices=[],
|
|
171
|
+
multiple=False,
|
|
172
|
+
options={"placeholder": "Upload a reference file to load IDs..."},
|
|
173
|
+
),
|
|
27
174
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
28
175
|
ui.input_select(
|
|
29
176
|
"high_quality_reference_library",
|
|
@@ -77,7 +224,6 @@ def plot_spectra_ui(platform: str):
|
|
|
77
224
|
run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
78
225
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
79
226
|
|
|
80
|
-
#print(len(extra_inputs))
|
|
81
227
|
# Layout base_inputs and extra_inputs in columns
|
|
82
228
|
if platform == "HRMS":
|
|
83
229
|
inputs_columns = ui.layout_columns(
|
|
@@ -102,7 +248,8 @@ def plot_spectra_ui(platform: str):
|
|
|
102
248
|
ui.h2("Plot Spectra"),
|
|
103
249
|
inputs_columns,
|
|
104
250
|
run_button_plot_spectra,
|
|
105
|
-
back_button
|
|
251
|
+
back_button,
|
|
252
|
+
ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
|
|
106
253
|
),
|
|
107
254
|
)
|
|
108
255
|
|
|
@@ -179,13 +326,20 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
179
326
|
col_widths=(3, 3, 3, 3),
|
|
180
327
|
)
|
|
181
328
|
|
|
329
|
+
log_panel = ui.card(
|
|
330
|
+
ui.card_header("Identification log"),
|
|
331
|
+
ui.output_text_verbatim("match_log"),
|
|
332
|
+
style="max-height:300px; overflow:auto"
|
|
333
|
+
)
|
|
334
|
+
|
|
182
335
|
# Combine everything
|
|
183
336
|
return ui.div(
|
|
184
337
|
ui.TagList(
|
|
185
338
|
ui.h2("Run Spectral Library Matching"),
|
|
186
339
|
inputs_columns,
|
|
187
340
|
run_button_spec_lib_matching,
|
|
188
|
-
back_button
|
|
341
|
+
back_button,
|
|
342
|
+
log_panel,
|
|
189
343
|
),
|
|
190
344
|
)
|
|
191
345
|
|
|
@@ -207,11 +361,106 @@ def server(input, output, session):
|
|
|
207
361
|
|
|
208
362
|
run_status_plot_spectra = reactive.Value("")
|
|
209
363
|
run_status_spec_lib_matching = reactive.Value("")
|
|
364
|
+
match_log_rv = reactive.Value("")
|
|
365
|
+
is_matching_rv = reactive.Value(False)
|
|
366
|
+
|
|
367
|
+
query_ids_rv = reactive.Value([])
|
|
368
|
+
query_file_path_rv = reactive.Value(None)
|
|
369
|
+
query_result_rv = reactive.Value(None)
|
|
370
|
+
query_status_rv = reactive.Value("")
|
|
371
|
+
reference_ids_rv = reactive.Value([])
|
|
372
|
+
reference_file_path_rv = reactive.Value(None)
|
|
373
|
+
reference_result_rv = reactive.Value(None)
|
|
374
|
+
reference_status_rv = reactive.Value("")
|
|
375
|
+
|
|
376
|
+
converted_query_path_rv = reactive.Value(None)
|
|
377
|
+
converted_reference_path_rv = reactive.Value(None)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def process_database(file_path: str):
|
|
381
|
+
suffix = Path(file_path).suffix.lower()
|
|
382
|
+
return {"path": file_path, "suffix": suffix}
|
|
383
|
+
|
|
384
|
+
@render.text
|
|
385
|
+
def plot_query_status():
|
|
386
|
+
return query_status_rv.get() or ""
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@reactive.effect
|
|
390
|
+
@reactive.event(input.query_data)
|
|
391
|
+
async def _on_query_upload():
|
|
392
|
+
if current_page() != "plot_spectra":
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
files = input.query_data()
|
|
396
|
+
req(files and len(files) > 0)
|
|
397
|
+
|
|
398
|
+
file_path = files[0]["datapath"]
|
|
399
|
+
query_file_path_rv.set(file_path)
|
|
400
|
+
|
|
401
|
+
query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
|
|
402
|
+
await reactive.flush()
|
|
403
|
+
|
|
404
|
+
try:
|
|
405
|
+
result = await asyncio.to_thread(process_database, file_path)
|
|
406
|
+
query_result_rv.set(result)
|
|
407
|
+
query_status_rv.set("✅ Query database processed.")
|
|
408
|
+
await reactive.flush()
|
|
409
|
+
except Exception as e:
|
|
410
|
+
query_status_rv.set(f"❌ Failed to process query database: {e}")
|
|
411
|
+
await reactive.flush()
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
@reactive.effect
|
|
415
|
+
@reactive.event(input.reference_data)
|
|
416
|
+
async def _on_reference_upload():
|
|
417
|
+
if current_page() != "plot_spectra":
|
|
418
|
+
return
|
|
419
|
+
|
|
420
|
+
files = input.reference_data()
|
|
421
|
+
req(files and len(files) > 0)
|
|
422
|
+
|
|
423
|
+
file_path = files[0]["datapath"]
|
|
424
|
+
reference_file_path_rv.set(file_path)
|
|
425
|
+
|
|
426
|
+
reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
|
|
427
|
+
await reactive.flush()
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
result = await asyncio.to_thread(process_database, file_path)
|
|
431
|
+
reference_result_rv.set(result)
|
|
432
|
+
reference_status_rv.set("✅ Reference database processed.")
|
|
433
|
+
await reactive.flush()
|
|
434
|
+
except Exception as e:
|
|
435
|
+
reference_status_rv.set(f"❌ Failed to process reference database: {e}")
|
|
436
|
+
await reactive.flush()
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
@render.text
|
|
440
|
+
def match_log():
|
|
441
|
+
return match_log_rv.get()
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class ReactiveWriter(io.TextIOBase):
|
|
445
|
+
def __init__(self, rv):
|
|
446
|
+
self.rv = rv
|
|
447
|
+
def write(self, s: str):
|
|
448
|
+
if not s:
|
|
449
|
+
return 0
|
|
450
|
+
self.rv.set(self.rv.get() + s)
|
|
451
|
+
try:
|
|
452
|
+
loop = asyncio.get_running_loop()
|
|
453
|
+
loop.create_task(reactive.flush())
|
|
454
|
+
except RuntimeError:
|
|
455
|
+
pass
|
|
456
|
+
return len(s)
|
|
457
|
+
def flush(self):
|
|
458
|
+
pass
|
|
459
|
+
|
|
210
460
|
|
|
211
461
|
|
|
212
462
|
@reactive.Effect
|
|
213
463
|
def _():
|
|
214
|
-
# Main menu buttons
|
|
215
464
|
if input.plot_spectra() > plot_clicks.get():
|
|
216
465
|
current_page.set("plot_spectra")
|
|
217
466
|
plot_clicks.set(input.plot_spectra())
|
|
@@ -315,56 +564,126 @@ def server(input, output, session):
|
|
|
315
564
|
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
316
565
|
|
|
317
566
|
|
|
318
|
-
|
|
567
|
+
|
|
319
568
|
@reactive.effect
|
|
320
|
-
@reactive.event(input.
|
|
321
|
-
def
|
|
322
|
-
if current_page()
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
569
|
+
@reactive.event(input.query_data)
|
|
570
|
+
async def _populate_ids_from_query_upload():
|
|
571
|
+
if current_page() != "plot_spectra":
|
|
572
|
+
return
|
|
573
|
+
|
|
574
|
+
files = input.query_data()
|
|
575
|
+
if not files:
|
|
576
|
+
return
|
|
577
|
+
|
|
578
|
+
in_path = Path(files[0]["datapath"])
|
|
579
|
+
suffix = in_path.suffix.lower()
|
|
580
|
+
|
|
581
|
+
# Decide what CSV to read IDs from
|
|
582
|
+
try:
|
|
583
|
+
if suffix == ".csv":
|
|
584
|
+
csv_path = in_path
|
|
585
|
+
converted_query_path_rv.set(str(csv_path))
|
|
329
586
|
else:
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
587
|
+
query_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
588
|
+
await reactive.flush()
|
|
589
|
+
|
|
590
|
+
# Choose an output temp path next to the upload
|
|
591
|
+
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
592
|
+
|
|
593
|
+
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
594
|
+
|
|
595
|
+
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
596
|
+
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
597
|
+
csv_path = Path(out_obj)
|
|
598
|
+
elif isinstance(out_obj, pd.DataFrame):
|
|
599
|
+
# Write the DF to our chosen path
|
|
600
|
+
out_obj.to_csv(tmp_csv_path, index=False)
|
|
601
|
+
csv_path = tmp_csv_path
|
|
602
|
+
else:
|
|
603
|
+
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
604
|
+
|
|
605
|
+
converted_query_path_rv.set(str(csv_path))
|
|
606
|
+
|
|
607
|
+
query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
608
|
+
await reactive.flush()
|
|
609
|
+
|
|
610
|
+
# Extract IDs from the CSV’s first column
|
|
611
|
+
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
612
|
+
query_ids_rv.set(ids)
|
|
613
|
+
|
|
614
|
+
# Update dropdowns
|
|
615
|
+
ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
|
|
616
|
+
|
|
617
|
+
query_status_rv.set(
|
|
618
|
+
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
619
|
+
)
|
|
620
|
+
await reactive.flush()
|
|
621
|
+
|
|
622
|
+
except Exception as e:
|
|
623
|
+
query_status_rv.set(f"❌ Failed: {e}")
|
|
624
|
+
await reactive.flush()
|
|
625
|
+
raise
|
|
349
626
|
|
|
350
627
|
|
|
351
628
|
@reactive.effect
|
|
352
|
-
@reactive.event(input.
|
|
353
|
-
def
|
|
354
|
-
if current_page()
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
629
|
+
@reactive.event(input.reference_data)
|
|
630
|
+
async def _populate_ids_from_reference_upload():
|
|
631
|
+
if current_page() != "plot_spectra":
|
|
632
|
+
return
|
|
633
|
+
|
|
634
|
+
files = input.reference_data()
|
|
635
|
+
if not files:
|
|
636
|
+
return
|
|
637
|
+
|
|
638
|
+
in_path = Path(files[0]["datapath"])
|
|
639
|
+
suffix = in_path.suffix.lower()
|
|
640
|
+
|
|
641
|
+
# Decide what CSV to read IDs from
|
|
642
|
+
try:
|
|
643
|
+
if suffix == ".csv":
|
|
644
|
+
csv_path = in_path
|
|
645
|
+
converted_reference_path_rv.set(str(csv_path))
|
|
646
|
+
else:
|
|
647
|
+
reference_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
648
|
+
await reactive.flush()
|
|
649
|
+
|
|
650
|
+
# Choose an output temp path next to the upload
|
|
651
|
+
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
652
|
+
|
|
653
|
+
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
654
|
+
|
|
655
|
+
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
656
|
+
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
657
|
+
csv_path = Path(out_obj)
|
|
658
|
+
elif isinstance(out_obj, pd.DataFrame):
|
|
659
|
+
# Write the DF to our chosen path
|
|
660
|
+
out_obj.to_csv(tmp_csv_path, index=False)
|
|
661
|
+
csv_path = tmp_csv_path
|
|
662
|
+
else:
|
|
663
|
+
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
664
|
+
|
|
665
|
+
converted_reference_path_rv.set(str(csv_path))
|
|
666
|
+
|
|
667
|
+
reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
668
|
+
await reactive.flush()
|
|
669
|
+
|
|
670
|
+
# Extract IDs from the CSV’s first column
|
|
671
|
+
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
672
|
+
reference_ids_rv.set(ids)
|
|
673
|
+
|
|
674
|
+
# Update dropdowns
|
|
675
|
+
ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
|
|
676
|
+
|
|
677
|
+
reference_status_rv.set(
|
|
678
|
+
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
679
|
+
)
|
|
680
|
+
await reactive.flush()
|
|
681
|
+
|
|
682
|
+
except Exception as e:
|
|
683
|
+
reference_status_rv.set(f"❌ Failed: {e}")
|
|
684
|
+
await reactive.flush()
|
|
685
|
+
raise
|
|
686
|
+
|
|
368
687
|
|
|
369
688
|
|
|
370
689
|
@render.download(filename=lambda: f"plot.png")
|
|
@@ -382,23 +701,90 @@ def server(input, output, session):
|
|
|
382
701
|
yield buf.getvalue()
|
|
383
702
|
|
|
384
703
|
|
|
385
|
-
@render.download(filename=lambda: f"plot.png")
|
|
386
|
-
def run_btn_spec_lib_matching():
|
|
387
|
-
if input.chromatography_platform() == "HRMS":
|
|
388
|
-
df_out = run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/', return_ID_output=True)
|
|
389
|
-
elif input.chromatography_platform() == "NRMS":
|
|
390
|
-
df_out = run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv', return_ID_output=True)
|
|
391
|
-
|
|
392
|
-
df_out.to_csv(io.StringIO(), index=False)
|
|
393
|
-
return buf.getvalue().encode('utf-8')
|
|
394
|
-
|
|
395
|
-
|
|
396
704
|
@render.text
|
|
397
705
|
def status_output():
|
|
398
706
|
return run_status_plot_spectra.get()
|
|
399
707
|
return run_status_spec_lib_matching.get()
|
|
400
708
|
|
|
401
709
|
|
|
710
|
+
class ReactiveWriter(io.TextIOBase):
|
|
711
|
+
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
712
|
+
self.rv = rv
|
|
713
|
+
self.loop = loop
|
|
714
|
+
|
|
715
|
+
def write(self, s: str):
|
|
716
|
+
if not s:
|
|
717
|
+
return 0
|
|
718
|
+
def _apply():
|
|
719
|
+
self.rv.set(self.rv.get() + s)
|
|
720
|
+
self.loop.create_task(reactive.flush())
|
|
721
|
+
|
|
722
|
+
self.loop.call_soon_threadsafe(_apply)
|
|
723
|
+
return len(s)
|
|
724
|
+
|
|
725
|
+
def flush(self):
|
|
726
|
+
pass
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
@render.download(filename="identification_output.csv")
|
|
730
|
+
async def run_btn_spec_lib_matching():
|
|
731
|
+
# 1) quick first paint
|
|
732
|
+
match_log_rv.set("Starting identification...\n")
|
|
733
|
+
await reactive.flush()
|
|
734
|
+
|
|
735
|
+
# 2) normalize inputs (same as before)
|
|
736
|
+
hq = input.high_quality_reference_library()
|
|
737
|
+
if isinstance(hq, str):
|
|
738
|
+
hq = hq.lower() == "true"
|
|
739
|
+
elif isinstance(hq, (int, float)):
|
|
740
|
+
hq = bool(hq)
|
|
741
|
+
|
|
742
|
+
common_kwargs = dict(
|
|
743
|
+
query_data=input.query_data()[0]["datapath"],
|
|
744
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
745
|
+
likely_reference_ids=None,
|
|
746
|
+
similarity_measure=input.similarity_measure(),
|
|
747
|
+
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
748
|
+
high_quality_reference_library=hq,
|
|
749
|
+
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
750
|
+
int_min=input.int_min(), int_max=input.int_max(),
|
|
751
|
+
noise_threshold=input.noise_threshold(),
|
|
752
|
+
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
753
|
+
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
754
|
+
n_top_matches_to_save=input.n_top_matches_to_save(),
|
|
755
|
+
print_id_results=True, # ensure the library actually prints progress
|
|
756
|
+
output_identification=str(Path.cwd() / "identification_output.csv"),
|
|
757
|
+
output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
|
|
758
|
+
return_ID_output=True,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
loop = asyncio.get_running_loop()
|
|
762
|
+
rw = ReactiveWriter(match_log_rv, loop)
|
|
763
|
+
|
|
764
|
+
# 3) run the heavy function in a thread so the event loop can repaint
|
|
765
|
+
try:
|
|
766
|
+
with redirect_stdout(rw), redirect_stderr(rw):
|
|
767
|
+
if input.chromatography_platform() == "HRMS":
|
|
768
|
+
df_out = await asyncio.to_thread(
|
|
769
|
+
run_spec_lib_matching_on_HRMS_data,
|
|
770
|
+
window_size_centroiding=input.window_size_centroiding(),
|
|
771
|
+
window_size_matching=input.window_size_matching(),
|
|
772
|
+
**common_kwargs
|
|
773
|
+
)
|
|
774
|
+
else:
|
|
775
|
+
df_out = await asyncio.to_thread(
|
|
776
|
+
run_spec_lib_matching_on_NRMS_data, **common_kwargs
|
|
777
|
+
)
|
|
778
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
|
|
779
|
+
await reactive.flush()
|
|
780
|
+
except Exception as e:
|
|
781
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
782
|
+
await reactive.flush()
|
|
783
|
+
raise
|
|
784
|
+
|
|
785
|
+
# 4) stream CSV back to the browser
|
|
786
|
+
yield df_out.to_csv(index=False)
|
|
787
|
+
|
|
402
788
|
|
|
403
789
|
app = App(app_ui, server)
|
|
404
790
|
|
pycompound/plot_spectra.py
CHANGED
|
@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
45
45
|
extension = extension[(len(extension)-1)]
|
|
46
46
|
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
47
47
|
output_path_tmp = query_data[:-3] + 'csv'
|
|
48
|
-
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=
|
|
48
|
+
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
|
|
49
49
|
df_query = pd.read_csv(output_path_tmp)
|
|
50
50
|
if extension == 'csv' or extension == 'CSV':
|
|
51
51
|
df_query = pd.read_csv(query_data)
|
|
@@ -177,6 +177,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
177
177
|
spec_tmp = spectrum_ID1
|
|
178
178
|
spectrum_ID1 = spectrum_ID2
|
|
179
179
|
spectrum_ID2 = spec_tmp
|
|
180
|
+
print(unique_query_ids)
|
|
181
|
+
print(spectrum_ID1)
|
|
180
182
|
query_idx = unique_query_ids.index(spectrum_ID1)
|
|
181
183
|
reference_idx = unique_reference_ids.index(spectrum_ID2)
|
|
182
184
|
q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
app.py,sha256=
|
|
1
|
+
app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
|
|
2
2
|
pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
|
|
3
|
-
pycompound/plot_spectra.py,sha256=
|
|
3
|
+
pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
|
|
4
4
|
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
5
5
|
pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
|
|
6
6
|
pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
|
|
7
7
|
pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
|
|
8
8
|
pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
|
|
9
9
|
pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
|
|
10
|
-
pycompound-0.0.
|
|
11
|
-
pycompound-0.0.
|
|
12
|
-
pycompound-0.0.
|
|
13
|
-
pycompound-0.0.
|
|
14
|
-
pycompound-0.0.
|
|
10
|
+
pycompound-0.0.10.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
11
|
+
pycompound-0.0.10.dist-info/METADATA,sha256=Gb0d0ZbClc4AFRcDjMnNWcb4TCuq84CJl-AKCNjY2wU,1733
|
|
12
|
+
pycompound-0.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
pycompound-0.0.10.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
14
|
+
pycompound-0.0.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|