PyPI - pycompound - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

pycompound 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

app.py +2589 -237
pycompound/build_library.py +77 -20
pycompound/plot_spectra.py +1 -1
pycompound/processing.py +5 -5
pycompound/spec_lib_matching.py +245 -471
pycompound/spec_lib_matching_CLI.py +48 -2
pycompound/tuning_CLI_DE.py +22 -22
pycompound/tuning_CLI_grid.py +22 -6
{pycompound-0.1.6.dist-info → pycompound-0.1.7.dist-info}/METADATA +1 -1
pycompound-0.1.7.dist-info/RECORD +15 -0
pycompound-0.1.6.dist-info/RECORD +0 -15
{pycompound-0.1.6.dist-info → pycompound-0.1.7.dist-info}/WHEEL +0 -0
{pycompound-0.1.6.dist-info → pycompound-0.1.7.dist-info}/licenses/LICENSE +0 -0
{pycompound-0.1.6.dist-info → pycompound-0.1.7.dist-info}/top_level.txt +0 -0

pycompound/build_library.py CHANGED Viewed

@@ -6,18 +6,19 @@ from pathlib import Path
 from pyteomics import mgf
 from pyteomics import mzml
 import sys
+import json
 def build_library_from_raw_data(input_path=None, output_path=None, is_reference=False):
     '''
-    Converts mgf, mzML, cdf, or msp file to the necessary format for spectral library matching.
+    Converts mgf, mzML, cdf, json, or msp file to the necessary format for spectral library matching.
-    --input_path: Path to input file (must be mgf, mzML, cdf, or msp file). Mandatory argument.
+    --input_path: Path to input file (must be mgf, mzML, cdf, json, or msp file). Mandatory argument.
     --output_path: Path to output TXT file. Default: current working directory.
     --is_reference: Boolean flag indicating whether IDs of spectra should be written to output. Only pass true if building a reference library with known compound IDs. Only applicable to mgf and msp files. Options: \'True\', \'False\'. Optional argument. Default: False.
     '''
     if input_path is None:
-        print('Error: please specify input_path (i.e. the path to the input mgf, mzML, cdf, or msp file). Mandatory argument.')
+        print('Error: please specify input_path (i.e. the path to the input mgf, mzML, cdf, json, or msp file). Mandatory argument.')
         sys.exit()
     if output_path is None:
@@ -37,18 +38,21 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
         input_file_type = 'mgf'
     elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
         input_file_type = 'mzML'
+    elif last_four_chars == 'json' or last_four_chars == 'JSON':
+        input_file_type = 'json'
     elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
         input_file_type = 'cdf'
     elif last_three_chars == 'msp' or last_three_chars == 'MSP':
         input_file_type = 'msp'
     else:
-        print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
+        print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', \'json\', or \'msp\' file must be passed to --input_path')
         sys.exit()
     spectra = []
     if input_file_type == 'mgf':
-        with mgf.read(input_path, index_by_scans = True) as reader:
+        #with mgf.read(input_path, index_by_scans = True) as reader:
+        with mgf.read(input_path, use_index=False) as reader:
             for spec in reader:
                 spectra.append(spec)
     if input_file_type == 'mzML':
@@ -61,18 +65,24 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
         ids = []
         mzs = []
         ints = []
+        precursor_ion_mzs = []
         for i in range(0,len(spectra)):
             for j in range(0,len(spectra[i]['m/z array'])):
                 if input_file_type == 'mzML':
-                    ids.append(f'ID_{i+1}')
-                else:
                     if is_reference == False:
                         ids.append(f'ID_{i+1}')
-                    elif is_reference == True:
+                    else:
+                        ids.append(spectra[i]['id'])
+                elif input_file_type == 'mgf':
+                    precursor_ion_mzs.append(spectra[i]['params']['pepmass'][0])
+                    if is_reference == False:
+                        ids.append(f'ID_{i+1}')
+                    else:
                         ids.append(spectra[i]['params']['name'])
                 mzs.append(spectra[i]['m/z array'][j])
                 ints.append(spectra[i]['intensity array'][j])
     if input_file_type == 'cdf':
         dataset = nc.Dataset(input_path, 'r')
         all_mzs = dataset.variables['mass_values'][:]
@@ -98,31 +108,78 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
                 ints.append(ints_tmp[j])
-    if input_file_type == 'msp':
+    if input_file_type == "msp":
         ids = []
         mzs = []
         ints = []
-        with open(input_path, 'r') as f:
+        precursor_ion_mzs = []
+        spectrum_id = None
+        precursor_ion_mz = None
+        with open(input_path, "r", encoding="utf-8", errors="ignore") as f:
             i = 0
             for line in f:
                 line = line.strip()
-                if line.startswith('Name:'):
+                if not line:
+                    continue
+                if line.startswith("Name:"):
                     i += 1
-                    if is_reference == False:
-                        spectrum_id = f'ID_{i+1}'
-                    elif is_reference == True:
-                        spectrum_id = line.replace('Name: ','')
-                elif line and line[0].isdigit():
+                    if not is_reference:
+                        spectrum_id = f"ID_{i}"
+                    else:
+                        spectrum_id = line.replace("Name:", "", 1).strip()
+                elif line.startswith("PrecursorMZ:"):
+                    try:
+                        precursor_ion_mz = float(line.replace("PrecursorMZ:", "", 1).strip())
+                    except ValueError:
+                        precursor_ion_mz = None
+                elif line[0].isdigit():
                     try:
                         mz, intensity = map(float, line.split()[:2])
-                        ids.append(spectrum_id)
-                        mzs.append(mz)
-                        ints.append(intensity)
                     except ValueError:
                         continue
+                    if spectrum_id is None:
+                        continue
+                    ids.append(spectrum_id)
+                    mzs.append(mz)
+                    ints.append(intensity)
+                    precursor_ion_mzs.append(precursor_ion_mz)
+    if input_file_type == 'json':
+        data = json.load(open(input_path))
+        ids = []
+        mzs = []
+        ints = []
+        precursor_ion_mzs = []
+        for i in range(0,len(data)):
+            spec_ID_tmp = data[i]['spectrum_id']
+            tmp = data[i]['peaks_json']
+            tmp = tmp[1:-1].split(",")
+            tmp = [a.replace("[","") for a in tmp]
+            tmp = [a.replace("]","") for a in tmp]
+            mzs_tmp = tmp[0::2]
+            ints_tmp = tmp[1::2]
+            if is_reference == False:
+                ids.extend([f'ID_{i+1}'] * len(mzs_tmp))
+            elif is_reference == True:
+                ids.extend([spec_ID_tmp] * len(mzs_tmp))
+            mzs.extend(mzs_tmp)
+            ints.extend(ints_tmp)
+            precursor_ion_mzs.extend([data[i]['Precursor_MZ']] * len(mzs_tmp))
+    if len(precursor_ion_mzs) > 0:
+        df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints, 'precursor_ion_mz':precursor_ion_mzs})
+    else:
+        df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
-    df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
     df.to_csv(output_path, index=False, sep='\t')

pycompound/plot_spectra.py CHANGED Viewed

@@ -315,7 +315,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
     plt.savefig(output_path, format='pdf')
     if return_plot == True:
-        return plt
+        return fig

pycompound/processing.py CHANGED Viewed

@@ -295,13 +295,13 @@ def get_reference_df(reference_data, likely_reference_IDs=None):
     extension = reference_data.rsplit('.',1)
     extension = extension[(len(extension)-1)]
     if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
-        output_path_tmp = reference_data[:-3] + 'csv'
+        output_path_tmp = reference_data[:-3] + 'txt'
         build_library_from_raw_data(input_path=reference_data, output_path=output_path_tmp, is_reference=True)
-        df_reference = pd.read_csv(output_path_tmp)
-    if extension == 'csv' or extension == 'CSV':
-        df_reference = pd.read_csv(reference_data)
+        df_reference = pd.read_csv(output_path_tmp, sep='\t')
+    if extension == 'txt' or extension == 'TXT':
+        df_reference = pd.read_csv(reference_data, sep='\t')
     if likely_reference_IDs is not None:
-        likely_reference_IDs = pd.read_csv(likely_reference_IDs, header=None)
+        likely_reference_IDs = pd.read_csv(likely_reference_IDs, header=None, sep='\t')
         df_reference = df_reference.loc[df_reference.iloc[:,0].isin(likely_reference_IDs.iloc[:,0].tolist())]
     return df_reference

pycompound 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

pycompound 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl