PyPI - chromaquant - Versions diffs - 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

chromaquant 0.3.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

chromaquant/Handle/__init__.py +2 -1
chromaquant/Handle/fileChecks.py +172 -0
chromaquant/Handle/handleDirectories.py +1 -1
chromaquant/Hydro/__init__.py +12 -0
chromaquant/Hydro/hydroMain.py +496 -0
chromaquant/Match/AutoFpmMatch.py +48 -48
chromaquant/Match/MatchSub/__init__.py +13 -0
chromaquant/Match/MatchSub/matchTools.py +282 -0
chromaquant/Match/MatchSub/peakTools.py +259 -0
chromaquant/Match/__init__.py +2 -1
chromaquant/Match/matchMain.py +233 -0
chromaquant/Quant/QuantSub/__init__.py +15 -0
chromaquant/Quant/QuantSub/gasFID.py +241 -0
chromaquant/Quant/QuantSub/gasTCD.py +425 -0
chromaquant/Quant/QuantSub/liquidFID.py +310 -0
chromaquant/Quant/QuantSub/parseTools.py +162 -0
chromaquant/Quant/__init__.py +1 -1
chromaquant/Quant/quantMain.py +417 -0
chromaquant/UAPP/__init__.py +12 -0
chromaquant/UAPP/uappMain.py +427 -0
chromaquant/__main__.py +426 -393
chromaquant/oldui.py +492 -0
chromaquant/properties.json +1 -1
{chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/METADATA +3 -3
chromaquant-0.4.0.dist-info/RECORD +38 -0
{chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/WHEEL +1 -1
chromaquant-0.3.1.dist-info/RECORD +0 -22
{chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/entry_points.txt +0 -0
{chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
{chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/licenses/LICENSES_bundled.txt +0 -0

chromaquant/Quant/QuantSub/liquidFID.py ADDED Viewed

@@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+COPYRIGHT STATEMENT:
+ChromaQuant – A quantification software for complex gas chromatographic data
+Copyright (c) 2024, by Julia Hancock
+              Affiliation: Dr. Julie Elaine Rorrer
+	      URL: https://www.rorrerlab.com/
+License: BSD 3-Clause License
+---
+SUBPACKAGE FOR PERFORMING LIQUID QUANTIFICATION STEPS
+Julia Hancock
+Started 12-29-2024
+"""
+""" PACKAGES """
+import pandas as pd
+import math
+import numpy as np
+from chemformula import ChemFormula
+""" FUNCTION """
+#Function for quantifying liquid FID data
+def liquidFID(BreakdownDF,DBRF,Label_info,sinfo):
+    #Unpack compound type and carbon number dictionaries from list
+    CL_Dict, CT_Dict = Label_info
+    """ FUNCTIONS """
+    #Function to assign compound type and carbon number to compound using formula
+    def assignCTCN(BreakdownDF,CT_dict):
+        #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
+        for i, row in BreakdownDF.iterrows():
+            #If there exists a formula.. #FIND ALTERNATIVE BESIDES TRY
+            try:
+                #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
+                BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
+                #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
+                chemFormDict = ChemFormula(row['Formula']).element
+                #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
+                BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
+            #Otherwise, pass
+            except:
+                pass
+        return BreakdownDF
+    #Function to assign response factor by carbon number and compound type
+    def assignRF(BreakdownDF,DBRF,CL_Dict):
+        """
+        Function takes a dataframe containing matched FID and MS peak information and
+        compares it against a provided response factor database to assign response
+        factors to the matched peak dataframe.
+        Parameters
+        ----------
+        BreakdownDF : DataFrame
+            Dataframe containing columns associated with matched FID and MS peak data
+        DBRF : Dataframe
+            Dataframe containing nested dataframes with associated chemical lumps,
+            likely imported from an excel sheet where each sheet is specific to
+            a given chemical lump. The top-level keys must be associated with the
+            predefined chemical lumps given in 'LABELS' section above
+        CL_Dict : Dict
+            Dictionary containing key:value pairs defined as
+            (chemical lump abbreviation):(full chemical lump name)
+        Returns
+        -------
+        BreakdownDF : DataFrame
+            Dataframe containing columns associated with matched FID and MS peak data
+        """
+        #Define an initial response factor
+        RF = 1
+        #Loop through every labelled peak in the breakdown DataFrame
+        for i, row in BreakdownDF.iterrows():
+            #Find the compound name, carbon number, and compound type abbreviation
+            cmp_name = row['Compound Name']
+            cmp_carbon = row['Carbon Number']
+            cmp_type = row['Compound Type Abbreviation']
+            #If any of these pieces of infomation is NAN, skip the row and set the RF Source accordingly
+            if pd.isna(cmp_name) or pd.isna(cmp_carbon) or pd.isna(cmp_type):
+                BreakdownDF.at[i,'RF Source'] = 'No RF assigned, at least one of the following were missing: compound name, formula, or type abbreviation'
+                pass
+            #Or, if the compound type is Other, "O", skip the row and set the RF source accordingly
+            elif cmp_type == "O":
+                BreakdownDF.at[i,'RF Source'] = 'No RF assigned, compound type is listed as "Other"'
+                pass
+            #Otherwise...
+            else:
+                #If the compound name is in the sheet corresponding to the compound type abbreviation..
+                if cmp_name in list(DBRF[cmp_type]['Compound Name'].values):
+                    #Get the response factors sheet index where it is listed
+                    dbrf_index = DBRF[cmp_type].index[DBRF[cmp_type]['Compound Name'] == cmp_name]
+                    #Assign the listed response factor in the matched sheet to the RF variable
+                    RF = DBRF[cmp_type].loc[dbrf_index,'Response Factor'].iloc[0]
+                    #If the listed RF is nan...
+                    if math.isnan(RF):
+                        #Set the RF to 1
+                        RF = 1
+                        #Set the value for response factor in the breakdown dataframe to RF
+                        BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
+                        #Set the RF source
+                        BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound found in RF sheet without RF'
+                    #Otherwise...
+                    else:
+                        #Set the value for response factor in the breakdown dataframe to RF
+                        BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
+                        #Set the RF source
+                        BreakdownDF.at[i,'RF Source'] = 'Assigned empirical RF, exact compound found in response factors sheet'
+                #Otherwise, if the compound name is not in the sheet...
+                else:
+                    #Get the m and b parameters listed in the RF linear fit for that compound type
+                    fit_m = DBRF[cmp_type].loc[0,'Linear fit m']
+                    fit_b = DBRF[cmp_type].loc[0,'Linear fit b']
+                    #If both the m and b parameters are nan, assign a response factor of 1
+                    if math.isnan(fit_m) and math.isnan(fit_b):
+                        #Set the RF to 1
+                        RF = 1
+                        #Set the value for response factor in the breakdown dataframe to RF
+                        BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
+                        #Set the RF source to
+                        BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound type does not have a carbon number fit'
+                    #Otherwise, assign a response factor by carbon number
+                    else:
+                        #Get response factor using fit and carbon number
+                        RF = fit_m*cmp_carbon+fit_b
+                        #If the estimated response factor is negative or larger than 5, set RF to 1
+                        if RF < 0 or RF > 5:
+                            RF = 1
+                            #Set the value for response factor in the breakdown dataframe to RF
+                            BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
+                            #Set the RF source to "Assumed 1, estimated response factor exists but is out of range"
+                            BreakdownDF.at[i,'RF Source'] = 'Assumed 1, could estimate a response factor exists but is out of range (negative or over 5)'
+                        #Otherwise...
+                        else:
+                            #Set the value for response factor in the breakdown dataframe to RF
+                            BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
+                            #Set the RF source
+                            BreakdownDF.at[i,'RF Source'] = 'Assigned using carbon number linear fit for compound type {0} and carbon number {1}'.format(cmp_type,int(cmp_carbon))
+        return BreakdownDF
+    def quantMain(BreakdownDF,sinfo):
+        """
+        Function that takes in matched FID and MS data with assigned response factors
+        and returns quantitative data
+        Parameters
+        ----------
+        BreakdownDF : DataFrame
+            Dataframe containing columns associated with matched FID and MS peak data.
+        IS_m : Int
+            Amount of internal standard added to sample in mg.
+        IS_name : Str
+            Name of internal standard added to sample
+        Returns
+        -------
+        BreakdownDF : DataFrame
+            Dataframe containing columns associated with matched FID and MS peak data.
+        """
+        #Get IS_m and IS_name from sinfo
+        IS_m, IS_name = [sinfo['Internal Standard Mass (mg)'],sinfo['Internal Standard Name']]
+        #Find the index where the internal standard is listed – if it's listed more than once, take the largest area peak
+        IS_index = BreakdownDF[BreakdownDF['Compound Name'] == IS_name]['FID Area'].idxmax()
+        #Get the FID area associated with the internal standard
+        IS_Area = BreakdownDF.at[IS_index,'FID Area']
+        #Loop through breakdown dataframe, calculating an area ratio and mass for each row
+        for i, row in BreakdownDF.iterrows():
+            #If the row's compound name is the internal standard name or either form of no match, skip the row
+            if row['Compound Name'] == IS_name or row['Compound Name'] == 'No match' or row['Compound Name'] == 'No Match':
+                pass
+            #Otherwise, continue
+            else:
+                #Calculate area ratio
+                Aratio = row['FID Area']/IS_Area
+                #Calculate mass using response factor column
+                m_i = Aratio*IS_m/row['Response Factor ((A_i/A_T)/(m_i/m_T))']
+                #Assign area ratio and mass to their respective columns in the breakdown dataframe
+                BreakdownDF.at[i,'A_i/A_T'] = Aratio
+                BreakdownDF.at[i,'m_i'] = m_i
+        return BreakdownDF
+    def moreBreakdown(BreakdownDF,CT_dict,sinfo):
+        """
+        This function prepares further breakdown dictionaries for use in exporting to Excel
+        Parameters
+        ----------
+        BreakdownDF : DataFrame
+            Dataframe containing columns associated with matched FID and MS peak data.
+        CT_dict : Dict
+            Dictionary of all compound type abbreviations in use and their associated expansions
+        sinfo : Dict
+            Dictionary containing sample information.
+        Returns
+        -------
+        BreakdownDF : DataFrame
+            Dataframe containing columns associated with matched FID and MS peak data.
+        """
+        #Get the total mass of product from the breakdown dataframe
+        m_total = np.nansum(BreakdownDF['m_i'])
+        #Get maximum carbon number in breakdown dataframe
+        CN_max = int(BreakdownDF['Carbon Number'].max())
+        #Create a dataframe for saving quantitative results organized by compound type
+        CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
+                                                    'Cycloalkanes','Alkenes/Alkynes','Other'],
+                                    'Mass (mg)':np.empty(6),
+                                    'Mass fraction':np.empty(6)})
+        #Create a dataframe for saving quantitative results organized by carbon number
+        CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
+                                    'Mass (mg)':np.empty(CN_max)})
+        #Create a dataframe for saving quantitative results organized by both compound type and carbon number
+        CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
+                                'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
+                                'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
+                                'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
+                                'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
+                                'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
+        #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
+        for i, row in CT_DF.iterrows():
+            #Define a temporary dataframe which contains all rows matching the ith compound type
+            tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
+            #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
+            CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
+            #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
+            CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
+        #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
+        for i, row in CN_DF.iterrows():
+            #Define a temporary dataframe which contains all rows matching the ith carbon number
+            tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
+            #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
+            CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
+        #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
+        for i, row in CTCN_DF.iterrows():
+            #For every entry in row
+            for j in row.index:
+                #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
+                tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
+                #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
+                CTCN_DF.loc[i,j] = np.nansum(tempDF['m_i'])
+        #Get total masses from CT, CN, and CTCN dataframes
+        CT_mass = np.nansum(CT_DF['Mass (mg)'])
+        CN_mass = np.nansum(CN_DF['Mass (mg)'])
+        CTCN_mass = np.nansum(CTCN_DF)
+        #Create total mass dataframe
+        mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
+        return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
+    """ BREAKDOWN FORMATION """
+    #Use the assignCTCN function to assign compound type and carbon number
+    BreakdownDF = assignCTCN(BreakdownDF,CT_Dict)
+    #Use the assignRF function to assign response factors, preferring empirical RF's to estimated ones and assigning 1 when no other RF can be applied
+    BreakdownDF = assignRF(BreakdownDF,DBRF,CL_Dict)
+    #Use the quantMain function to add quantitative data to BreakdownDF
+    BreakdownDF = quantMain(BreakdownDF,sinfo)
+    #Use the moreBreakdown function to prepare compound type and carbon number breakdowns
+    BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF,CT_Dict,sinfo)
+    return [BreakdownDF,CT_DF,CN_DF,CTCN_DF,mass_DF,]

chromaquant/Quant/QuantSub/parseTools.py ADDED Viewed

@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+COPYRIGHT STATEMENT:
+ChromaQuant – A quantification software for complex gas chromatographic data
+Copyright (c) 2024, by Julia Hancock
+              Affiliation: Dr. Julie Elaine Rorrer
+	      URL: https://www.rorrerlab.com/
+License: BSD 3-Clause License
+---
+SUBPACKAGE FOR PARSING QUANTIFICATION INFORMATION
+Julia Hancock
+Started 12-29-2024
+"""
+""" PACKAGES """
+import os
+import datetime
+""" FUNCTIONS """
+#Function that evaluates runtime parameters
+def evalRunParam(quantphases):
+    #Write whether or not to run liquid and gas analysis based on system argument
+    if quantphases == 'L':
+        #Format is [Liquid Bool, Gas Bool]
+        lgTF = [True,False]
+    elif quantphases == 'G':
+        lgTF = [False,True]
+    elif quantphases == 'LG':
+        lgTF = [True,True]
+    else:
+        lgTF = None
+    return lgTF
+#Function that finds most recent response factor file
+def findRecentFile(prefix,suffix,path):
+    #Files must be of the form prefix_mm-dd-yy.suffix
+    #Function that checks whether the filtered files list is empty
+    def checkEmpty(list):
+        #If list is empty...
+        if not list:
+            return False
+        #If list is not empty...
+        else:
+            return True
+    #Get list of files in response factor directory
+    files = os.listdir(path)
+    #TEMPORARY FOR TESTING FILES
+    #files = ['LRF_07-24-24.xlsx','LRF_07-29-24.xlsx','LRF_08-21-24.xlsx']
+    #Predefine filtered files list
+    filter_files = []
+    #Get files with the right prefix by looping through dictionary
+    for i in files:
+        #If current file has the passed prefix...
+        if prefix == i[:len(prefix)]:
+            #Add current file to filtered files list
+            filter_files.append(i)
+        #Otherwise, pass
+        else:
+            pass
+    #Check if filtered file list is empty
+    checkTF = checkEmpty(filter_files)
+    #Predefine filtered suffix list
+    filter_suffix_files = []
+    #If list if not empty...
+    if checkTF:
+        #Take filtered files list and find files with the correct suffix
+        for i in filter_files:
+            #If current file has the passed suffix...
+            if suffix == i[len(i)-len(suffix):]:
+                #Add current file to filtered files list
+                filter_suffix_files.append(i)
+            #Otherwise, pass
+            else:
+                pass
+        #Check if filtered file list is empty
+        checkTF = checkEmpty(filter_suffix_files)
+        #If list is not empty...
+        if checkTF:
+            #If list has one element, return the full path to that element's path
+            if len(filter_suffix_files) == 1:
+                return os.path.join(path , filter_suffix_files[0])
+            #Otherwise, filter the list based on which file is most recent
+            else:
+                #Define date format
+                format = '%m-%d-%y'
+                #Define current datetime
+                current = datetime.datetime.now()
+                #Predefine dictionary of datestrings, for each file get the date string and fill the respective dictionary value
+                date_dict = {i : i[len(prefix)+1:len(i)-len(suffix)] for i in filter_suffix_files}
+                #Convert string format into datetime format
+                datetime_dict = {i : datetime.datetime.strptime(date_dict[i],format) for i in date_dict}
+                #Get difference between current time and each file datetime
+                for i in datetime_dict:
+                    datetime_dict[i] = (current - datetime_dict[i]).total_seconds()
+                #Select the most recent file
+                recent_file = min(datetime_dict, key=datetime_dict.get)
+                return os.path.join(path , recent_file)
+        #If list is empty...
+        else:
+            #Break function and return None
+            return None
+    #If list is empty...
+    else:
+        #Break function and return None
+        return None
+#Define function that inserts a column to a CTCN Dataframe labeling the carbon number
+def insertCN(CTCN_DF):
+    #Get the length of the dataframe, take this to be the maximum carbon number
+    CN_max = len(CTCN_DF)
+    #Get a list of carbon numbers for each row
+    CN_list = [i for i in range(1,CN_max+1)]
+    #Insert this list as a new column at the beginning of the dataframe
+    CTCN_DF.insert(loc=0, column='Carbon Number', value=CN_list)
+    return CTCN_DF
+#findRecentFile('LRF','.xlsx','/Users/connards/Documents/ChromaQuant/response-factors')

chromaquant/Quant/__init__.py CHANGED Viewed

@@ -9,4 +9,4 @@ Created 10-19-2024
 """
-from .AutoQuantification import main_AutoQuantification
+from .quantMain import mainQuant

chromaquant 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

chromaquant 0.3.1py3-none-any.whl → 0.4.0py3-none-any.whl