chromaquant 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,233 @@
1
+ """
2
+
3
+ COPYRIGHT STATEMENT:
4
+
5
+ ChromaQuant – A quantification software for complex gas chromatographic data
6
+
7
+ Copyright (c) 2024, by Julia Hancock
8
+ Affiliation: Dr. Julie Elaine Rorrer
9
+ URL: https://www.rorrerlab.com/
10
+
11
+ License: BSD 3-Clause License
12
+
13
+ ---
14
+
15
+ SCRIPT THAT MATCHES FID AND MS PEAKS
16
+
17
+ Julia Hancock
18
+ Started 12/10/2023
19
+
20
+ """
21
+ """ PACKAGES """
22
+ import sys
23
+ import pandas as pd
24
+ import os
25
+ from molmass import Formula
26
+ import math
27
+ import numpy as np
28
+ from chemformula import ChemFormula
29
+ import json
30
+ from datetime import datetime
31
+ import logging
32
+ import scipy
33
+ import importlib.util
34
+
35
+ """ DIRECTORIES (MANUAL) """
36
+ """
37
+ testPath = "/Users/connards/Desktop/University/Rorrer Lab/Scripts/chromaquant/src/chromaquant/"
38
+ #Define file directory
39
+ D_files = "/Users/connards/Documents/ChromaQuant"
40
+
41
+ #Define app directory
42
+ D_app = "/Users/connards/Desktop/University/Rorrer Lab/Scripts/chromaquant/src/chromaquant"
43
+
44
+ #Define resources directory
45
+ D_rsc = os.path.join(D_files,'resources')
46
+
47
+ #Define theme directory
48
+ D_theme = os.path.join(D_rsc,'forest','forest-light.tcl')
49
+
50
+ #Define response factors directory
51
+ D_rf = os.path.join(D_files,'response-factors')
52
+
53
+ #Define data directory
54
+ D_data = os.path.join(D_files,'data')
55
+
56
+ #Define images directory
57
+ D_img = os.path.join(D_files,'images')
58
+
59
+ directories = {'files':D_files,'resources':D_rsc,'theme':D_theme,'rf':D_rf,'data':D_data,'images':D_img}
60
+ """
61
+
62
+ """ VARIABLES FOR TESTING"""
63
+
64
+ #sname = 'example2'
65
+ #sphase = 'L'
66
+
67
+ """ LOCAL PACKAGES """
68
+
69
+ #Get package directory
70
+ app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
71
+
72
+ #Get absolute directories for subpackages
73
+ subpack_dir = {'Handle':os.path.join(app_dir,'Handle','__init__.py'),
74
+ 'Manual':os.path.join(app_dir,'Manual','__init__.py'),
75
+ 'MatchSub':os.path.join(app_dir,'Match','MatchSub','__init__.py')}
76
+
77
+ #Define function to import from path
78
+ def import_from_path(module_name,path):
79
+ #Define spec
80
+ spec = importlib.util.spec_from_file_location(module_name,path)
81
+ #Define modules
82
+ module = importlib.util.module_from_spec(spec)
83
+ #Expand sys.modules dict
84
+ sys.modules[module_name] = module
85
+ #Load module
86
+ spec.loader.exec_module(module)
87
+ return module
88
+
89
+ #Import all local packages
90
+ hd = import_from_path("hd",subpack_dir['Handle'])
91
+ mn = import_from_path("mn",subpack_dir['Manual'])
92
+ mtsb = import_from_path("mtsb",subpack_dir['MatchSub'])
93
+
94
+ """ FIT DEFINITION FUNCTION """
95
+
96
+ #Function that defines fit lambda based on parameters in analysis-config
97
+ def getFitLambda(fit_param):
98
+ """ This function requires a dictionary of polynomial parameters, with keys "a", "b", etc. """
99
+
100
+ if fit_param['fit-type'] == "First Order":
101
+
102
+ fit = lambda FID_RT: fit_param['a']*FID_RT + fit_param['b']
103
+
104
+ elif fit_param['fit-type'] == "Third Order":
105
+
106
+ fit = lambda FID_RT: fit_param['a']*FID_RT**3 + fit_param['b']*FID_RT**2 + fit_param['c']*FID_RT + fit_param['d']
107
+
108
+ else:
109
+
110
+ fit = None
111
+
112
+ return fit
113
+
114
+ """ MATCH FUNCTION """
115
+ def mainMatch(sname,sphase,model):
116
+
117
+ print("[matchMain] Beginning match...")
118
+
119
+ #Get current time
120
+ print("[matchMain] Getting current time...")
121
+ now = datetime.now()
122
+
123
+ """ DIRECTORIES """
124
+ print("[matchMain] Getting directories...")
125
+ #Get directories from handling script
126
+ directories = hd.handle(app_dir)
127
+
128
+ #Data file log directory
129
+ directories['log'] = os.path.join(directories['data'],sname,'log')
130
+
131
+ #Data file breakdowns directory
132
+ directories['break'] = os.path.join(directories['data'],sname,'breakdowns')
133
+
134
+ #Raw data file directory
135
+ directories['raw'] = os.path.join(directories['data'],sname,'raw data')
136
+
137
+ """ ANALYSIS CONFIGURATION """
138
+ print("[matchMain] Interpreting analysis configuration...")
139
+ #Read analysis configuration file
140
+ with open(os.path.join(directories['resources'],'analysis-config.json')) as f:
141
+ analysis_config = json.load(f)
142
+
143
+ #Extract analysis configuration info
144
+ #This dictionary contain lists of substrings to be checked against compound name strings to
145
+ #assign a compound type
146
+
147
+ #Six compound types exist: linear alkanes (L), branched alkanes (B), aromatics (A), cycloalkanes (C),
148
+ #alkenes/alkynes (E), and other (O)
149
+
150
+ #Each compound type abbreviation will have an entry in the dictionary corresponding to a list of
151
+ #substrings to be checked against a compound name string
152
+
153
+ contains = analysis_config["CT-assign-contains"]
154
+
155
+ #Tuple of contains keys in order of priority
156
+ keyloop = analysis_config["CT-assign-keyloop"]
157
+
158
+ #Tuple of elements to be excluded and automatically labelled as 'O'
159
+ element_exclude = analysis_config["CT-assign-element-exclude"]
160
+
161
+ #File suffixes to add to form data filenames
162
+ file_suffix = analysis_config["file-suffix"]
163
+
164
+ #Fit parameters for matching FID and MS accortding to polynomial fit
165
+ match_fit_parameters = analysis_config["match-fit-parameters"]
166
+
167
+ #Acceptable peak errors for matching
168
+ peak_errors = analysis_config["peak-errors"]
169
+
170
+ """ RUN FUNCTIONS """
171
+ print("[matchMain] Running match functions...")
172
+
173
+ #Run the file naming function – this function will create paths to all relevant files for matching FID and MS peaks according to sample name and phase
174
+ print("[matchMain] Getting data pths...")
175
+ paths = hd.fileNamer(sname,sphase,file_suffix,directories['raw'])
176
+
177
+ # Run the file checking function – this function will search for an existing FIDpMS file, creating one if it does not exist.
178
+ # It will then read the file as a pandas DataFrame. The tf Boolean describes whether or not there exist manually-matched peaks.
179
+ print("[matchMain] Checking for FIDpMS file...")
180
+ fpmDF, tf = hd.checkFile(paths[2],paths[0])
181
+
182
+ # Import MS UPP data
183
+ print("[matchMain] Importing mass spectrometry data...")
184
+ mDF = pd.read_csv(paths[1])
185
+
186
+ # Get only relevant columns of MS UPP data
187
+ print("[matchMain] Cleaning mass spectrometry data...")
188
+ mDF = mDF.loc[:,['Component RT','Compound Name','Formula','Match Factor']]
189
+
190
+ # Third order function for testing
191
+ #fit = lambda FID_RT: 0.0252*FID_RT**3 - 0.5274*FID_RT**2 + 4.8067*FID_RT - 3.0243
192
+
193
+ # Get fit function using analysis-config
194
+ print("[matchMain] Extracting match fit parameters...")
195
+ fit = getFitLambda(match_fit_parameters)
196
+
197
+ #If model is polynomial...
198
+ if model == 'P':
199
+ # Run the matching function – this function takes a passed function describing an estimated relationship between MS RT's and FID RT's and matches peaks.
200
+ # Function can be of any form as long as it returns a floating point value for the estimated MS RT
201
+ print("[matchMain] Matching peaks according to polynomial fit...")
202
+ fpmDF = mtsb.matchPeaks(fpmDF,mDF,fit,peak_errors['peak-error-third'])
203
+
204
+ #If model is retention time...
205
+ if model == 'R':
206
+ # Run the matching function – this function takes a passed function describing an estimated relationship between MS RT's and FID RT's and matches peaks.
207
+ # Function can be of any form as long as it returns a floating point value for the estimated MS RT
208
+ print("[matchMain] Matching peaks by retention time...")
209
+ fpmDF = mtsb.matchRT(fpmDF,mDF,peakError=peak_errors['peak-error-RT'])
210
+ fpmDF = mtsb.matchPeaks(fpmDF,mDF,fit,peak_errors['peak-error-third'])
211
+
212
+ # Run the compound type abbreviation assignment function – this function takes a passed matched FID and MS list and assigns
213
+ # compound type abbreviations to each matched entry
214
+ print("[matchMain] Assigning compound types...")
215
+ fpmDF = mtsb.ctaAssign(fpmDF, contains, keyloop, element_exclude)
216
+
217
+ print("[matchMain] Handling duplicates...")
218
+ #Run the duplicate handling function
219
+ fpmDF = mtsb.duplicateHandle(fpmDF)
220
+
221
+ print("[AutoFpmMatch] Saving results...")
222
+ #Save the FIDpMS data
223
+ fpmDF.to_csv(paths[2],index=False)
224
+
225
+ #Print computation time
226
+ compTime = datetime.now().timestamp()*1000 - now.timestamp()*1000
227
+ print("[matchMain] Time taken: {:.3f} ms".format(compTime))
228
+
229
+ #Close main function by returning
230
+ return None
231
+
232
+ #For testing
233
+ #mainMatch('example2','L','R')
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ ChromaQuant.Match.MatchSub package initialization
6
+
7
+ Julia Hancock
8
+ Created 10-19-2024
9
+
10
+ """
11
+
12
+ from .parseTools import *
13
+ from .liquidFID import *
14
+ from .gasFID import *
15
+ from .gasTCD import *
@@ -0,0 +1,241 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ COPYRIGHT STATEMENT:
5
+
6
+ ChromaQuant – A quantification software for complex gas chromatographic data
7
+
8
+ Copyright (c) 2024, by Julia Hancock
9
+ Affiliation: Dr. Julie Elaine Rorrer
10
+ URL: https://www.rorrerlab.com/
11
+
12
+ License: BSD 3-Clause License
13
+
14
+ ---
15
+
16
+ SUBPACKAGE FOR PERFORMING GAS FID QUANTIFICATION STEPS
17
+
18
+ Julia Hancock
19
+ Started 12-29-2024
20
+
21
+ """
22
+ """ PACKAGES """
23
+ import pandas as pd
24
+ import numpy as np
25
+ from chemformula import ChemFormula
26
+
27
+ #Function for quantifying gas FID data w/ external standard
28
+ def gasFID_ES(BreakdownDF,DBRF,Label_info,gasBag_cond,total_volume,cutoff=4):
29
+ """
30
+ Function quantifies gas FID data and returns a breakdown dataframe
31
+
32
+ Parameters
33
+ ----------
34
+ BreakdownDF : DataFrame
35
+ Dataframe containing columns associated with matched FID and MS peak data
36
+ DBRF : Dataframe
37
+ Dataframe containing nested dataframes with associated chemical lumps,
38
+ likely imported from an excel sheet where each sheet is specific to
39
+ a given chemical lump. The top-level keys must be associated with the
40
+ predefined chemical lumps given in 'LABELS' section above
41
+ gasBag_cond : List
42
+ List containing gas bag temperature [0] and gas bag pressure [1]
43
+ Label_info : List
44
+ List of dictionaries containing chemical lump and compound type abbreviations
45
+ total_volume : Float
46
+ Float describing the total amount of gas estimated by the external standard volume percent, mL
47
+ cutoff : Integer, optional
48
+ Integer representing the maximum cutoff carbon number that can be
49
+ quantified using FID.The default is 4.
50
+
51
+ Returns
52
+ -------
53
+ BreakdownDF : DataFrame
54
+ Dataframe containing columns associated with matched FID and MS peak data
55
+
56
+ """
57
+ #Function for assigning response factors to compounds
58
+ def assignRF(BreakdownDF,DBRF):
59
+
60
+ #Get a dictionary of average response factors by carbon number
61
+ avgRF = {}
62
+ #Loop through every carbon number up to the max in DBRF
63
+ for i in range(1,DBRF['Carbon Number'].max()+1):
64
+ #Get a slice of all rows in DBRF with a given carbon number
65
+ slicer = DBRF.loc[DBRF['Carbon Number']==i]
66
+ #Average the response factor entries in this slice, appending the result to the average RF dictionary
67
+ avgRF['{0}'.format(i)] = slicer['RF'].mean()
68
+
69
+ #Loop through every row in the FIDpMS dataframe
70
+ for i, row in BreakdownDF.iterrows():
71
+ #Check that the formula is not nan
72
+ if not pd.isna(row['Formula']):
73
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
74
+ chemFormDict = ChemFormula(row['Formula']).element
75
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
76
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
77
+
78
+ #If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
79
+ if row['Compound Name'] in DBRF['Compound Name'].values:
80
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
81
+ #Assign response factor source
82
+ BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
83
+ #Otherwise, assign response factor based on average carbon number RF
84
+ else:
85
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
86
+ #Assign response factor source
87
+ BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
88
+ #Otherwise if the row's formula is nan, pass
89
+ else:
90
+ pass
91
+
92
+
93
+ return BreakdownDF
94
+
95
+ #Function for quantifying compounds using ideal gas law
96
+ def gasQuant(BreakdownDF,DBRF,total_volume,cutoff):
97
+
98
+ #Remove rows in BreakdownDF with a carbon number at or below cutoff
99
+ BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
100
+
101
+ #Get gas bag conditions
102
+ temp = gasBag_cond[0] #temperature of gas bag, C
103
+ pressure = gasBag_cond[1] #sample pressure in gas bag, psi
104
+
105
+ #Convert gas bag conditions to new units
106
+ temp = temp + 273.15 #gas bag temperature, K
107
+ pressure = pressure / 14.504*100000 #gas bag pressure, Pa
108
+ total_volume /= 10**6 #gas bag volume, m^3
109
+ #Define ideal gas constant, m^3*Pa/K*mol
110
+ R = 8.314
111
+
112
+ #Loop through every row in BreakdownDF
113
+ for i, row in BreakdownDF.iterrows():
114
+
115
+ #Add molecular weight using ChemFormula
116
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
117
+
118
+ #Get volume percent using response factor
119
+ BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
120
+
121
+ #Get moles using ideal gas law (PV=nRT)
122
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*total_volume*pressure/(temp*R)
123
+
124
+ #Get mass (mg) using moles and molar mass
125
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
126
+
127
+ return BreakdownDF
128
+
129
+ #Function for further breaking down product distribution
130
+ def moreBreakdown(BreakdownDF,CT_dict):
131
+ """
132
+ This function prepares further breakdown dictionaries for use in exporting to Excel
133
+
134
+ Parameters
135
+ ----------
136
+ BreakdownDF : DataFrame
137
+ Dataframe containing columns associated with matched FID and MS peak data.
138
+ CT_dict : Dict
139
+ Dictionary of all compound type abbreviations in use and their associated expansions
140
+
141
+ Returns
142
+ -------
143
+ BreakdownDF : DataFrame
144
+ Dataframe containing columns associated with matched FID and MS peak data.
145
+
146
+ """
147
+
148
+ #Get the total mass of product from the breakdown dataframe
149
+ m_total = np.nansum(BreakdownDF['Mass (mg)'])
150
+
151
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
152
+ for i, row in BreakdownDF.iterrows():
153
+ #If there exists a formula..
154
+ try:
155
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
156
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
157
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
158
+ chemFormDict = ChemFormula(row['Formula']).element
159
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
160
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
161
+ #Otherwise, pass
162
+ except:
163
+ pass
164
+
165
+ #Get maximum carbon number in breakdown dataframe
166
+ CN_max = int(BreakdownDF['Carbon Number'].max())
167
+
168
+ #Create a dataframe for saving quantitative results organized by compound type
169
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
170
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
171
+ 'Mass (mg)':np.empty(6),
172
+ 'Mass fraction':np.empty(6)})
173
+
174
+ #Create a dataframe for saving quantitative results organized by carbon number
175
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
176
+ 'Mass (mg)':np.empty(CN_max)})
177
+
178
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
179
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
180
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
181
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
182
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
183
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
184
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
185
+
186
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
187
+ for i, row in CT_DF.iterrows():
188
+
189
+ #Define a temporary dataframe which contains all rows matching the ith compound type
190
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
191
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
192
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
193
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
194
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
195
+
196
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
197
+ for i, row in CN_DF.iterrows():
198
+
199
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
200
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
201
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
202
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
203
+
204
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
205
+ for i, row in CTCN_DF.iterrows():
206
+
207
+ #For every entry in row
208
+ for j in row.index:
209
+
210
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
211
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
212
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
213
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
214
+
215
+
216
+ #Get total masses from CT, CN, and CTCN dataframes
217
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
218
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
219
+ CTCN_mass = np.nansum(CTCN_DF)
220
+
221
+ #Create total mass dataframe
222
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
223
+
224
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
225
+
226
+ #Unpack compound type and carbon number dictionaries from list
227
+ CL_Dict, CT_Dict = Label_info
228
+
229
+ #Filter dataframe to remove compounds that do not contain carbon
230
+ BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
231
+ #Reset the dataframe index
232
+ BreakdownDF.reset_index()
233
+
234
+ #Run response factor assignment function
235
+ BreakdownDF = assignRF(BreakdownDF, DBRF)
236
+ #Run gas quantification function
237
+ BreakdownDF = gasQuant(BreakdownDF,DBRF,total_volume,cutoff)
238
+ #Run further breakdown function
239
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict)
240
+
241
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF