pycompound 0.1.9__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycompound-0.1.9/src/pycompound.egg-info → pycompound-0.1.10}/PKG-INFO +1 -1
- {pycompound-0.1.9 → pycompound-0.1.10}/README.md +2 -2
- {pycompound-0.1.9 → pycompound-0.1.10}/pyproject.toml +1 -1
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/plot_spectra.py +6 -6
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/spec_lib_matching.py +2 -2
- {pycompound-0.1.9 → pycompound-0.1.10/src/pycompound.egg-info}/PKG-INFO +1 -1
- {pycompound-0.1.9 → pycompound-0.1.10}/tests/test_plot_spectra.py +33 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/tests/test_spec_lib_matching.py +10 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/tests/test_tuning.py +26 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/LICENSE +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/README_PyPI.md +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/setup.cfg +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/build_library.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/plot_spectra_CLI.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/processing.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/similarity_measures.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/spec_lib_matching_CLI.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/tuning_CLI_DE.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound/tuning_CLI_grid.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound.egg-info/SOURCES.txt +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound.egg-info/dependency_links.txt +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound.egg-info/requires.txt +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/src/pycompound.egg-info/top_level.txt +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/tests/test_build_library.py +0 -0
- {pycompound-0.1.9 → pycompound-0.1.10}/tests/test_similarity_measures.py +0 -0
|
@@ -19,9 +19,9 @@ A Python-based tool for spectral library matching, PyCompound is available as a
|
|
|
19
19
|
## 1. Install dependencies
|
|
20
20
|
PyCompound requires the Python dependencies Matplotlib, NumPy, Pandas, SciPy, Pyteomics, and netCDF4. Specifically, this software was validated with python=3.12.4, matplotlib=3.8.4, numpy=1.26.4, pandas=2.2.2, scipy=1.13.1, pyteomics=4.7.2, netCDF4=1.6.5, lxml=5.1.0, joblib=1.5.2, and shiny=1.4.0, although it may work with other versions of these tools. A user may consider creating a conda environment (see [https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html) for guidance on getting started with conda if you are unfamiliar). For a system with conda installed, one can create the environment pycompound_env, activate it, and install the necessary dependencies with:
|
|
21
21
|
```
|
|
22
|
-
conda create -n pycompound_env python=3.12
|
|
22
|
+
conda create -n pycompound_env python=3.12 -y
|
|
23
23
|
conda activate pycompound_env
|
|
24
|
-
pip install pycompound==0.1.
|
|
24
|
+
pip install pycompound==0.1.10
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
<a name="functionality"></a>
|
|
@@ -14,7 +14,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
14
14
|
else:
|
|
15
15
|
extension = query_data.rsplit('.',1)
|
|
16
16
|
extension = extension[(len(extension)-1)]
|
|
17
|
-
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
17
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF' or extension == 'msp' or extension == 'MSP' or extension == 'json' or extension == 'JSON':
|
|
18
18
|
output_path_tmp = query_data[:-3] + 'txt'
|
|
19
19
|
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
|
|
20
20
|
df_query = pd.read_csv(output_path_tmp, sep='\t')
|
|
@@ -29,7 +29,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
29
29
|
else:
|
|
30
30
|
extension = reference_data.rsplit('.',1)
|
|
31
31
|
extension = extension[(len(extension)-1)]
|
|
32
|
-
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
32
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF' or extension == 'msp' or extension == 'MSP' or extension == 'json' or extension == 'JSON':
|
|
33
33
|
output_path_tmp = reference_data[:-3] + 'txt'
|
|
34
34
|
build_library_from_raw_data(input_path=reference_data, output_path=output_path_tmp, is_reference=True)
|
|
35
35
|
df_reference = pd.read_csv(output_path_tmp, sep='\t')
|
|
@@ -298,7 +298,7 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
298
298
|
else:
|
|
299
299
|
extension = query_data.rsplit('.',1)
|
|
300
300
|
extension = extension[(len(extension)-1)]
|
|
301
|
-
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
301
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF' or extension == 'msp' or extension == 'MSP' or extension == 'json' or extension == 'JSON':
|
|
302
302
|
output_path_tmp = query_data[:-3] + 'txt'
|
|
303
303
|
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
|
|
304
304
|
df_query = pd.read_csv(output_path_tmp, sep='\t')
|
|
@@ -312,7 +312,7 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
312
312
|
else:
|
|
313
313
|
extension = reference_data.rsplit('.',1)
|
|
314
314
|
extension = extension[(len(extension)-1)]
|
|
315
|
-
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
315
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF' or extension == 'msp' or extension == 'MSP' or extension == 'json' or extension == 'JSON':
|
|
316
316
|
output_path_tmp = reference_data[:-3] + 'txt'
|
|
317
317
|
build_library_from_raw_data(input_path=reference_data, output_path=output_path_tmp, is_reference=True)
|
|
318
318
|
df_reference = pd.read_csv(output_path_tmp, sep='\t')
|
|
@@ -395,8 +395,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
395
395
|
print(f'Warning: plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.')
|
|
396
396
|
output_path = f'{Path.cwd()}/spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}.pdf'
|
|
397
397
|
|
|
398
|
-
min_mz = np.min([np.min(df_query['mz_ratio'].tolist()), np.min(df_reference['mz_ratio'].tolist())])
|
|
399
|
-
max_mz = np.max([np.max(df_query['mz_ratio'].tolist()), np.max(df_reference['mz_ratio'].tolist())])
|
|
398
|
+
min_mz = int(np.min([np.min(df_query['mz_ratio'].tolist()), np.min(df_reference['mz_ratio'].tolist())]))
|
|
399
|
+
max_mz = int(np.max([np.max(df_query['mz_ratio'].tolist()), np.max(df_reference['mz_ratio'].tolist())]))
|
|
400
400
|
mzs = np.linspace(min_mz,max_mz,(max_mz-min_mz+1))
|
|
401
401
|
|
|
402
402
|
unique_query_ids = df_query['id'].unique().tolist()
|
|
@@ -65,7 +65,7 @@ def tune_params_DE(query_data=None, reference_data=None, chromatography_platform
|
|
|
65
65
|
extension = extension[(len(extension)-1)]
|
|
66
66
|
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF' or extension == 'msp' or extension == 'MSP' or extension == 'json' or extension == 'JSON':
|
|
67
67
|
output_path_tmp = query_data[:-3] + 'txt'
|
|
68
|
-
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=
|
|
68
|
+
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
|
|
69
69
|
df_query = pd.read_csv(output_path_tmp, sep='\t')
|
|
70
70
|
if extension == 'txt' or extension == 'TXT':
|
|
71
71
|
df_query = pd.read_csv(query_data, sep='\t')
|
|
@@ -805,7 +805,7 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
805
805
|
else:
|
|
806
806
|
extension = query_data.rsplit('.',1)
|
|
807
807
|
extension = extension[(len(extension)-1)]
|
|
808
|
-
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
808
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF' or extension == 'msp' or extension == 'MSP' or extension == 'json' or extension == 'JSON':
|
|
809
809
|
output_path_tmp = query_data[:-3] + 'txt'
|
|
810
810
|
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
|
|
811
811
|
df_query = pd.read_csv(output_path_tmp, sep='\t')
|
|
@@ -248,3 +248,36 @@ generate_plots_on_HRMS_data(
|
|
|
248
248
|
weights={'Cosine':0.1, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.4},
|
|
249
249
|
output_path=f'{Path.cwd()}/plots/test32.pdf')
|
|
250
250
|
|
|
251
|
+
print('\n\ntest #33:')
|
|
252
|
+
generate_plots_on_HRMS_data(
|
|
253
|
+
query_data=f'{Path.cwd()}/data/lcms_query.msp',
|
|
254
|
+
reference_data=f'{Path.cwd()}/data/trimmed_GNPS_reference_library.txt',
|
|
255
|
+
high_quality_reference_library=True,
|
|
256
|
+
noise_threshold=0.1,
|
|
257
|
+
mz_min=100,
|
|
258
|
+
output_path=f'{Path.cwd()}/plots/test33.pdf')
|
|
259
|
+
|
|
260
|
+
print('\n\ntest #34:')
|
|
261
|
+
generate_plots_on_HRMS_data(
|
|
262
|
+
query_data=f'{Path.cwd()}/data/lcms_query_tuning.msp',
|
|
263
|
+
reference_data=f'{Path.cwd()}/data/trimmed_GNPS_reference_library.txt',
|
|
264
|
+
high_quality_reference_library=True,
|
|
265
|
+
noise_threshold=0.1,
|
|
266
|
+
mz_min=100,
|
|
267
|
+
output_path=f'{Path.cwd()}/plots/test34.pdf')
|
|
268
|
+
|
|
269
|
+
print('\n\ntest #35:')
|
|
270
|
+
generate_plots_on_NRMS_data(
|
|
271
|
+
query_data=f'{Path.cwd()}/data/gcms_query.msp',
|
|
272
|
+
reference_data=f'{Path.cwd()}/data/trimmed_gcms_reference_library.txt',
|
|
273
|
+
similarity_measure='shannon',
|
|
274
|
+
weights={'Cosine':0.5, 'Shannon':0.3, 'Renyi':0.1, 'Tsallis':0.1},
|
|
275
|
+
output_path=f'{Path.cwd()}/plots/test35.pdf')
|
|
276
|
+
|
|
277
|
+
print('\n\ntest #36:')
|
|
278
|
+
generate_plots_on_NRMS_data(
|
|
279
|
+
query_data=f'{Path.cwd()}/data/gcms_query.msp',
|
|
280
|
+
reference_data=f'{Path.cwd()}/data/trimmed_gcms_reference_library.txt',
|
|
281
|
+
similarity_measure='cosine',
|
|
282
|
+
output_path=f'{Path.cwd()}/plots/test36.pdf')
|
|
283
|
+
|
|
@@ -316,5 +316,15 @@ run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/MoNA-export-Hu
|
|
|
316
316
|
adduct='H',
|
|
317
317
|
precursor_ion_mz_tolerance=0.5,
|
|
318
318
|
print_id_results=True)
|
|
319
|
+
|
|
320
|
+
print('\n\ntest #53:')
|
|
321
|
+
run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query.msp',
|
|
322
|
+
reference_data=f'{Path.cwd()}/data/trimmed_GNPS_reference_library.txt',
|
|
323
|
+
similarity_measure='cosine')
|
|
324
|
+
print('\n\ntest #54:')
|
|
325
|
+
run_spec_lib_matching_on_NRMS_data(query_data=f'{Path.cwd()}/data/gcms_query.msp',
|
|
326
|
+
reference_data=f'{Path.cwd()}/data/trimmed_gcms_reference_library.txt',
|
|
327
|
+
similarity_measure='cosine')
|
|
328
|
+
|
|
319
329
|
"""
|
|
320
330
|
|
|
@@ -5,7 +5,18 @@ from pycompound.spec_lib_matching import tune_params_DE
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
import os
|
|
7
7
|
|
|
8
|
+
print('\n\ntest #9:')
|
|
9
|
+
tune_params_DE(query_data=f'{Path.cwd()}/data/gcms_query_tuning.msp',
|
|
10
|
+
reference_data=f'{Path.cwd()}/data/trimmed_gcms_reference_library.txt',
|
|
11
|
+
chromatography_platform='NRMS',
|
|
12
|
+
similarity_measure='tsallis',
|
|
13
|
+
optimize_params=["wf_mz","wf_int","LET_threshold","entropy_dimension"],
|
|
14
|
+
param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0,5),"entropy_dimension":(1.01,3)},
|
|
15
|
+
default_params={"noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1},
|
|
16
|
+
maxiters=10,
|
|
17
|
+
de_workers=5)
|
|
8
18
|
|
|
19
|
+
"""
|
|
9
20
|
print('\n\ntest #1:')
|
|
10
21
|
tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/lcms_query_tuning.txt',
|
|
11
22
|
reference_data=f'{Path.cwd()}/data/trimmed_GNPS_reference_library.txt',
|
|
@@ -69,3 +80,18 @@ tune_params_DE(query_data=f'{Path.cwd()}/data/gcms_query_tuning.txt',
|
|
|
69
80
|
maxiters=10,
|
|
70
81
|
de_workers=5)
|
|
71
82
|
|
|
83
|
+
print('\n\ntest #8:')
|
|
84
|
+
tune_params_DE(query_data=f'{Path.cwd()}/data/lcms_query_tuning.msp',
|
|
85
|
+
reference_data=f'{Path.cwd()}/data/trimmed_GNPS_reference_library.txt',
|
|
86
|
+
precursor_ion_mz_tolerance=0.1,
|
|
87
|
+
ionization_mode='Positive',
|
|
88
|
+
adduct='H',
|
|
89
|
+
chromatography_platform='HRMS',
|
|
90
|
+
similarity_measure='shannon',
|
|
91
|
+
optimize_params=["wf_mz","wf_int"],
|
|
92
|
+
param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
|
|
93
|
+
default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1},
|
|
94
|
+
maxiters=10,
|
|
95
|
+
de_workers=6)
|
|
96
|
+
"""
|
|
97
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|