pycompound 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +470 -144
- pycompound/build_library.py +2 -9
- pycompound/plot_spectra.py +17 -42
- pycompound/processing.py +0 -9
- pycompound/similarity_measures.py +0 -3
- pycompound/spec_lib_matching.py +295 -102
- pycompound/spec_lib_matching_CLI.py +2 -7
- pycompound/tuning_CLI.py +2 -3
- {pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/METADATA +1 -1
- pycompound-0.1.2.dist-info/RECORD +14 -0
- pycompound-0.1.0.dist-info/RECORD +0 -14
- {pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/WHEEL +0 -0
- {pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,4 @@
|
|
|
1
1
|
|
|
2
|
-
# this script performs spectral library matching to identify unknown query compound(s) from GC-MS data
|
|
3
|
-
|
|
4
|
-
# load libraries
|
|
5
2
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
6
3
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
7
4
|
from pathlib import Path
|
|
@@ -11,10 +8,8 @@ import sys
|
|
|
11
8
|
import json
|
|
12
9
|
|
|
13
10
|
|
|
14
|
-
# create argparse object so command-line input can be imported
|
|
15
11
|
parser = argparse.ArgumentParser()
|
|
16
12
|
|
|
17
|
-
# import optional command-line arguments
|
|
18
13
|
parser.add_argument('--query_data', type=str, metavar='\b', help='CSV file of query mass spectrum/spectra to be identified. Each row should correspond to a mass spectrum, the left-most column should contain an identifier, and each of the other columns should correspond to a single mass/charge ratio. Mandatory argument.')
|
|
19
14
|
parser.add_argument('--reference_data', type=str, metavar='\b', help='CSV file of the reference mass spectra. Each row should correspond to a mass spectrum, the left-most column should contain in identifier (i.e. the CAS registry number or the compound name), and the remaining column should correspond to a single mass/charge ratio. Mandatory argument.')
|
|
20
15
|
parser.add_argument('--likely_reference_ids', type=str, metavar='\b', help='CSV file with one column containing the IDs of a subset of all compounds in the reference_data to be used in spectral library matching. Each ID in this file must be an ID in the reference library. Default: none (i.e. default is to use entire reference library)')
|
|
@@ -37,8 +32,8 @@ parser.add_argument('--entropy_dimension', type=float, default=1.1, metavar='\b'
|
|
|
37
32
|
parser.add_argument('--normalization_method', type=str, default='standard', metavar='\b', help='Method used to normalize the intensities of each spectrum so that the intensities sum to 1. Since the objects entropy quantifies the uncertainy of must be probability distributions, the intensities of a given spectrum must sum to 1 prior to computing the entropy of the given spectrum intensities. Options: \'standard\' and \'softmax\'. Default: standard.')
|
|
38
33
|
parser.add_argument('--n_top_matches_to_save', type=int, default=1, metavar='\b', help='The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1.')
|
|
39
34
|
parser.add_argument('--print_id_results', type=str, default=False, metavar='\b', help='Flag that prints identification results if True. Default: False')
|
|
40
|
-
parser.add_argument('--output_identification', type=str, default=f'{Path.cwd()}/output_identification.
|
|
41
|
-
parser.add_argument('--output_similarity_scores', type=str, default=f'{Path.cwd()}/output_all_similarity_scores.
|
|
35
|
+
parser.add_argument('--output_identification', type=str, default=f'{Path.cwd()}/output_identification.txt', metavar='\b', help='Output TXT file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory (i.e. same directory this script is contained in) with filename \'output_identification.txt\'.')
|
|
36
|
+
parser.add_argument('--output_similarity_scores', type=str, default=f'{Path.cwd()}/output_all_similarity_scores.txt', metavar='\b', help='Output TXT file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this TXT file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
|
|
42
37
|
|
|
43
38
|
args = parser.parse_args()
|
|
44
39
|
|
pycompound/tuning_CLI.py
CHANGED
|
@@ -27,7 +27,7 @@ parser.add_argument('--wf_mz', type=str, default='0', metavar='\b', help='Mass/c
|
|
|
27
27
|
parser.add_argument('--wf_intensity', type=str, default='1', metavar='\b', help='Intensity weight factor parameter. Default: 1.')
|
|
28
28
|
parser.add_argument('--LET_threshold', type=str, default='0', metavar='\b', help='Low-entropy transformation threshold parameter. Spectra with Shannon entropy less than LET_threshold are transformed according to intensitiesNew=intensitiesOriginal^{(1+S)/(1+LET_threshold)}. Default: 0.')
|
|
29
29
|
parser.add_argument('--entropy_dimension', type=str, default='1.1', metavar='\b', help='Entropy dimension parameter. Must have positive value other than 1. When the entropy dimension is 1, then Renyi and Tsallis entropy are equivalent to Shannon entropy. Therefore, this parameter only applies to the renyi and tsallis similarity measures. This parameter will be ignored if similarity measure cosine or shannon is chosen. Default: 1.1.')
|
|
30
|
-
parser.add_argument('--output_path', type=str, default=f'{Path.cwd()}/output_tuning.
|
|
30
|
+
parser.add_argument('--output_path', type=str, default=f'{Path.cwd()}/output_tuning.txt', metavar='\b', help='Output TXT file containing one row for each parameter set used along with its corresponding accuracy. If no argument passed, then this TXT file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
|
|
31
31
|
|
|
32
32
|
args = parser.parse_args()
|
|
33
33
|
|
|
@@ -40,8 +40,7 @@ else:
|
|
|
40
40
|
sys.exit()
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':args.weights, 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
|
|
44
|
-
|
|
43
|
+
grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':[args.weights], 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
|
|
45
44
|
|
|
46
45
|
if args.chromatography_platform == 'HRMS':
|
|
47
46
|
grid['mz_min'] = [float(x) for x in grid['mz_min']]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
app.py,sha256=aN6HFOY3rWZmQYCKYLtCQ6PT-E-rdE8B1iv8G2SL7PQ,50511
|
|
2
|
+
pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
|
|
3
|
+
pycompound/plot_spectra.py,sha256=cPuHDPTyMGdkw-uL1a5x2pBjHuUPku7dwFLrA43PEnU,40236
|
|
4
|
+
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
5
|
+
pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
|
|
6
|
+
pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
|
|
7
|
+
pycompound/spec_lib_matching.py,sha256=hEMUD5rAuEfvX1PnDEwilXIWVubnvgDBp-EVwVHu9ro,67141
|
|
8
|
+
pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
|
|
9
|
+
pycompound/tuning_CLI.py,sha256=8gdT4EhIpvLHG3PcYtQBmUiPE9fJybwr3LCCilX-EfE,8540
|
|
10
|
+
pycompound-0.1.2.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
11
|
+
pycompound-0.1.2.dist-info/METADATA,sha256=Vlts9C3VGnnDt3vxoiObyWVJo5gYJ61ar6eeNePzwgw,1732
|
|
12
|
+
pycompound-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
pycompound-0.1.2.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
14
|
+
pycompound-0.1.2.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
|
|
2
|
-
pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
|
|
3
|
-
pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
|
|
4
|
-
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
5
|
-
pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
|
|
6
|
-
pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
|
|
7
|
-
pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
|
|
8
|
-
pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
|
|
9
|
-
pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
|
|
10
|
-
pycompound-0.1.0.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
11
|
-
pycompound-0.1.0.dist-info/METADATA,sha256=qfM4rP0BeGThYpxvGa7vOseRsUgtJ4aH8hgUtio0Ugw,1732
|
|
12
|
-
pycompound-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
pycompound-0.1.0.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
14
|
-
pycompound-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|