chromaquant 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ COPYRIGHT STATEMENT:
5
+
6
+ ChromaQuant – A quantification software for complex gas chromatographic data
7
+
8
+ Copyright (c) 2024, by Julia Hancock
9
+ Affiliation: Dr. Julie Elaine Rorrer
10
+ URL: https://www.rorrerlab.com/
11
+
12
+ License: BSD 3-Clause License
13
+
14
+ ---
15
+
16
+ SUBPACKAGE FOR PERFORMING LIQUID QUANTIFICATION STEPS
17
+
18
+ Julia Hancock
19
+ Started 12-29-2024
20
+
21
+ """
22
+ """ PACKAGES """
23
+ import pandas as pd
24
+ import math
25
+ import numpy as np
26
+ from chemformula import ChemFormula
27
+
28
+ """ FUNCTION """
29
+ #Function for quantifying liquid FID data
30
+ def liquidFID(BreakdownDF,DBRF,Label_info,sinfo):
31
+
32
+ #Unpack compound type and carbon number dictionaries from list
33
+ CL_Dict, CT_Dict = Label_info
34
+
35
+ """ FUNCTIONS """
36
+ #Function to assign compound type and carbon number to compound using formula
37
+ def assignCTCN(BreakdownDF,CT_dict):
38
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
39
+ for i, row in BreakdownDF.iterrows():
40
+ #If there exists a formula.. #FIND ALTERNATIVE BESIDES TRY
41
+ try:
42
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
43
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
44
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
45
+ chemFormDict = ChemFormula(row['Formula']).element
46
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
47
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
48
+ #Otherwise, pass
49
+ except:
50
+ pass
51
+
52
+ return BreakdownDF
53
+
54
+ #Function to assign response factor by carbon number and compound type
55
+ def assignRF(BreakdownDF,DBRF,CL_Dict):
56
+ """
57
+ Function takes a dataframe containing matched FID and MS peak information and
58
+ compares it against a provided response factor database to assign response
59
+ factors to the matched peak dataframe.
60
+
61
+ Parameters
62
+ ----------
63
+ BreakdownDF : DataFrame
64
+ Dataframe containing columns associated with matched FID and MS peak data
65
+
66
+ DBRF : Dataframe
67
+ Dataframe containing nested dataframes with associated chemical lumps,
68
+ likely imported from an excel sheet where each sheet is specific to
69
+ a given chemical lump. The top-level keys must be associated with the
70
+ predefined chemical lumps given in 'LABELS' section above
71
+
72
+ CL_Dict : Dict
73
+ Dictionary containing key:value pairs defined as
74
+ (chemical lump abbreviation):(full chemical lump name)
75
+
76
+ Returns
77
+ -------
78
+ BreakdownDF : DataFrame
79
+ Dataframe containing columns associated with matched FID and MS peak data
80
+
81
+ """
82
+ #Define an initial response factor
83
+ RF = 1
84
+
85
+ #Loop through every labelled peak in the breakdown DataFrame
86
+ for i, row in BreakdownDF.iterrows():
87
+ #Find the compound name, carbon number, and compound type abbreviation
88
+ cmp_name = row['Compound Name']
89
+ cmp_carbon = row['Carbon Number']
90
+ cmp_type = row['Compound Type Abbreviation']
91
+
92
+ #If any of these pieces of infomation is NAN, skip the row and set the RF Source accordingly
93
+ if pd.isna(cmp_name) or pd.isna(cmp_carbon) or pd.isna(cmp_type):
94
+ BreakdownDF.at[i,'RF Source'] = 'No RF assigned, at least one of the following were missing: compound name, formula, or type abbreviation'
95
+ pass
96
+
97
+ #Or, if the compound type is Other, "O", skip the row and set the RF source accordingly
98
+ elif cmp_type == "O":
99
+ BreakdownDF.at[i,'RF Source'] = 'No RF assigned, compound type is listed as "Other"'
100
+ pass
101
+
102
+ #Otherwise...
103
+ else:
104
+ #If the compound name is in the sheet corresponding to the compound type abbreviation..
105
+ if cmp_name in list(DBRF[cmp_type]['Compound Name'].values):
106
+
107
+ #Get the response factors sheet index where it is listed
108
+ dbrf_index = DBRF[cmp_type].index[DBRF[cmp_type]['Compound Name'] == cmp_name]
109
+
110
+ #Assign the listed response factor in the matched sheet to the RF variable
111
+ RF = DBRF[cmp_type].loc[dbrf_index,'Response Factor'].iloc[0]
112
+
113
+ #If the listed RF is nan...
114
+ if math.isnan(RF):
115
+ #Set the RF to 1
116
+ RF = 1
117
+ #Set the value for response factor in the breakdown dataframe to RF
118
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
119
+ #Set the RF source
120
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound found in RF sheet without RF'
121
+
122
+ #Otherwise...
123
+ else:
124
+ #Set the value for response factor in the breakdown dataframe to RF
125
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
126
+ #Set the RF source
127
+ BreakdownDF.at[i,'RF Source'] = 'Assigned empirical RF, exact compound found in response factors sheet'
128
+
129
+ #Otherwise, if the compound name is not in the sheet...
130
+ else:
131
+
132
+ #Get the m and b parameters listed in the RF linear fit for that compound type
133
+ fit_m = DBRF[cmp_type].loc[0,'Linear fit m']
134
+ fit_b = DBRF[cmp_type].loc[0,'Linear fit b']
135
+
136
+ #If both the m and b parameters are nan, assign a response factor of 1
137
+ if math.isnan(fit_m) and math.isnan(fit_b):
138
+ #Set the RF to 1
139
+ RF = 1
140
+ #Set the value for response factor in the breakdown dataframe to RF
141
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
142
+ #Set the RF source to
143
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound type does not have a carbon number fit'
144
+
145
+ #Otherwise, assign a response factor by carbon number
146
+ else:
147
+ #Get response factor using fit and carbon number
148
+ RF = fit_m*cmp_carbon+fit_b
149
+
150
+ #If the estimated response factor is negative or larger than 5, set RF to 1
151
+ if RF < 0 or RF > 5:
152
+ RF = 1
153
+ #Set the value for response factor in the breakdown dataframe to RF
154
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
155
+ #Set the RF source to "Assumed 1, estimated response factor exists but is out of range"
156
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, could estimate a response factor exists but is out of range (negative or over 5)'
157
+
158
+ #Otherwise...
159
+ else:
160
+ #Set the value for response factor in the breakdown dataframe to RF
161
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
162
+ #Set the RF source
163
+ BreakdownDF.at[i,'RF Source'] = 'Assigned using carbon number linear fit for compound type {0} and carbon number {1}'.format(cmp_type,int(cmp_carbon))
164
+
165
+ return BreakdownDF
166
+
167
+ def quantMain(BreakdownDF,sinfo):
168
+ """
169
+ Function that takes in matched FID and MS data with assigned response factors
170
+ and returns quantitative data
171
+
172
+ Parameters
173
+ ----------
174
+ BreakdownDF : DataFrame
175
+ Dataframe containing columns associated with matched FID and MS peak data.
176
+ IS_m : Int
177
+ Amount of internal standard added to sample in mg.
178
+ IS_name : Str
179
+ Name of internal standard added to sample
180
+
181
+ Returns
182
+ -------
183
+ BreakdownDF : DataFrame
184
+ Dataframe containing columns associated with matched FID and MS peak data.
185
+
186
+ """
187
+ #Get IS_m and IS_name from sinfo
188
+ IS_m, IS_name = [sinfo['Internal Standard Mass (mg)'],sinfo['Internal Standard Name']]
189
+ #Find the index where the internal standard is listed – if it's listed more than once, take the largest area peak
190
+ IS_index = BreakdownDF[BreakdownDF['Compound Name'] == IS_name]['FID Area'].idxmax()
191
+
192
+ #Get the FID area associated with the internal standard
193
+ IS_Area = BreakdownDF.at[IS_index,'FID Area']
194
+
195
+ #Loop through breakdown dataframe, calculating an area ratio and mass for each row
196
+ for i, row in BreakdownDF.iterrows():
197
+ #If the row's compound name is the internal standard name or either form of no match, skip the row
198
+ if row['Compound Name'] == IS_name or row['Compound Name'] == 'No match' or row['Compound Name'] == 'No Match':
199
+ pass
200
+ #Otherwise, continue
201
+ else:
202
+ #Calculate area ratio
203
+ Aratio = row['FID Area']/IS_Area
204
+ #Calculate mass using response factor column
205
+ m_i = Aratio*IS_m/row['Response Factor ((A_i/A_T)/(m_i/m_T))']
206
+ #Assign area ratio and mass to their respective columns in the breakdown dataframe
207
+ BreakdownDF.at[i,'A_i/A_T'] = Aratio
208
+ BreakdownDF.at[i,'m_i'] = m_i
209
+
210
+ return BreakdownDF
211
+
212
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
213
+ """
214
+ This function prepares further breakdown dictionaries for use in exporting to Excel
215
+
216
+ Parameters
217
+ ----------
218
+ BreakdownDF : DataFrame
219
+ Dataframe containing columns associated with matched FID and MS peak data.
220
+ CT_dict : Dict
221
+ Dictionary of all compound type abbreviations in use and their associated expansions
222
+ sinfo : Dict
223
+ Dictionary containing sample information.
224
+
225
+ Returns
226
+ -------
227
+ BreakdownDF : DataFrame
228
+ Dataframe containing columns associated with matched FID and MS peak data.
229
+
230
+ """
231
+
232
+ #Get the total mass of product from the breakdown dataframe
233
+ m_total = np.nansum(BreakdownDF['m_i'])
234
+
235
+ #Get maximum carbon number in breakdown dataframe
236
+ CN_max = int(BreakdownDF['Carbon Number'].max())
237
+
238
+ #Create a dataframe for saving quantitative results organized by compound type
239
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
240
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
241
+ 'Mass (mg)':np.empty(6),
242
+ 'Mass fraction':np.empty(6)})
243
+
244
+ #Create a dataframe for saving quantitative results organized by carbon number
245
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
246
+ 'Mass (mg)':np.empty(CN_max)})
247
+
248
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
249
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
250
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
251
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
252
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
253
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
254
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
255
+
256
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
257
+ for i, row in CT_DF.iterrows():
258
+
259
+ #Define a temporary dataframe which contains all rows matching the ith compound type
260
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
261
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
262
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
263
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
264
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
265
+
266
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
267
+ for i, row in CN_DF.iterrows():
268
+
269
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
270
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
271
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
272
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
273
+
274
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
275
+ for i, row in CTCN_DF.iterrows():
276
+
277
+ #For every entry in row
278
+ for j in row.index:
279
+
280
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
281
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
282
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
283
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['m_i'])
284
+
285
+
286
+ #Get total masses from CT, CN, and CTCN dataframes
287
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
288
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
289
+ CTCN_mass = np.nansum(CTCN_DF)
290
+
291
+ #Create total mass dataframe
292
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
293
+
294
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
295
+
296
+ """ BREAKDOWN FORMATION """
297
+
298
+ #Use the assignCTCN function to assign compound type and carbon number
299
+ BreakdownDF = assignCTCN(BreakdownDF,CT_Dict)
300
+
301
+ #Use the assignRF function to assign response factors, preferring empirical RF's to estimated ones and assigning 1 when no other RF can be applied
302
+ BreakdownDF = assignRF(BreakdownDF,DBRF,CL_Dict)
303
+
304
+ #Use the quantMain function to add quantitative data to BreakdownDF
305
+ BreakdownDF = quantMain(BreakdownDF,sinfo)
306
+
307
+ #Use the moreBreakdown function to prepare compound type and carbon number breakdowns
308
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF,CT_Dict,sinfo)
309
+
310
+ return [BreakdownDF,CT_DF,CN_DF,CTCN_DF,mass_DF,]
@@ -0,0 +1,162 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ COPYRIGHT STATEMENT:
5
+
6
+ ChromaQuant – A quantification software for complex gas chromatographic data
7
+
8
+ Copyright (c) 2024, by Julia Hancock
9
+ Affiliation: Dr. Julie Elaine Rorrer
10
+ URL: https://www.rorrerlab.com/
11
+
12
+ License: BSD 3-Clause License
13
+
14
+ ---
15
+
16
+ SUBPACKAGE FOR PARSING QUANTIFICATION INFORMATION
17
+
18
+ Julia Hancock
19
+ Started 12-29-2024
20
+
21
+ """
22
+
23
+ """ PACKAGES """
24
+ import os
25
+ import datetime
26
+
27
+ """ FUNCTIONS """
28
+ #Function that evaluates runtime parameters
29
+ def evalRunParam(quantphases):
30
+
31
+ #Write whether or not to run liquid and gas analysis based on system argument
32
+ if quantphases == 'L':
33
+ #Format is [Liquid Bool, Gas Bool]
34
+ lgTF = [True,False]
35
+
36
+ elif quantphases == 'G':
37
+ lgTF = [False,True]
38
+
39
+ elif quantphases == 'LG':
40
+ lgTF = [True,True]
41
+
42
+ else:
43
+ lgTF = None
44
+
45
+ return lgTF
46
+
47
+ #Function that finds most recent response factor file
48
+ def findRecentFile(prefix,suffix,path):
49
+
50
+ #Files must be of the form prefix_mm-dd-yy.suffix
51
+
52
+ #Function that checks whether the filtered files list is empty
53
+ def checkEmpty(list):
54
+
55
+ #If list is empty...
56
+ if not list:
57
+
58
+ return False
59
+
60
+ #If list is not empty...
61
+ else:
62
+
63
+ return True
64
+
65
+ #Get list of files in response factor directory
66
+ files = os.listdir(path)
67
+
68
+ #TEMPORARY FOR TESTING FILES
69
+ #files = ['LRF_07-24-24.xlsx','LRF_07-29-24.xlsx','LRF_08-21-24.xlsx']
70
+
71
+ #Predefine filtered files list
72
+ filter_files = []
73
+
74
+ #Get files with the right prefix by looping through dictionary
75
+ for i in files:
76
+ #If current file has the passed prefix...
77
+ if prefix == i[:len(prefix)]:
78
+ #Add current file to filtered files list
79
+ filter_files.append(i)
80
+ #Otherwise, pass
81
+ else:
82
+ pass
83
+
84
+ #Check if filtered file list is empty
85
+ checkTF = checkEmpty(filter_files)
86
+
87
+ #Predefine filtered suffix list
88
+ filter_suffix_files = []
89
+
90
+ #If list if not empty...
91
+ if checkTF:
92
+
93
+ #Take filtered files list and find files with the correct suffix
94
+ for i in filter_files:
95
+ #If current file has the passed suffix...
96
+ if suffix == i[len(i)-len(suffix):]:
97
+ #Add current file to filtered files list
98
+ filter_suffix_files.append(i)
99
+ #Otherwise, pass
100
+ else:
101
+ pass
102
+
103
+ #Check if filtered file list is empty
104
+ checkTF = checkEmpty(filter_suffix_files)
105
+
106
+ #If list is not empty...
107
+ if checkTF:
108
+
109
+ #If list has one element, return the full path to that element's path
110
+ if len(filter_suffix_files) == 1:
111
+
112
+ return os.path.join(path , filter_suffix_files[0])
113
+
114
+ #Otherwise, filter the list based on which file is most recent
115
+ else:
116
+
117
+ #Define date format
118
+ format = '%m-%d-%y'
119
+
120
+ #Define current datetime
121
+ current = datetime.datetime.now()
122
+
123
+ #Predefine dictionary of datestrings, for each file get the date string and fill the respective dictionary value
124
+ date_dict = {i : i[len(prefix)+1:len(i)-len(suffix)] for i in filter_suffix_files}
125
+
126
+ #Convert string format into datetime format
127
+ datetime_dict = {i : datetime.datetime.strptime(date_dict[i],format) for i in date_dict}
128
+
129
+ #Get difference between current time and each file datetime
130
+ for i in datetime_dict:
131
+ datetime_dict[i] = (current - datetime_dict[i]).total_seconds()
132
+
133
+ #Select the most recent file
134
+ recent_file = min(datetime_dict, key=datetime_dict.get)
135
+
136
+ return os.path.join(path , recent_file)
137
+
138
+ #If list is empty...
139
+ else:
140
+ #Break function and return None
141
+ return None
142
+
143
+ #If list is empty...
144
+ else:
145
+ #Break function and return None
146
+ return None
147
+
148
+ #Define function that inserts a column to a CTCN Dataframe labeling the carbon number
149
+ def insertCN(CTCN_DF):
150
+
151
+ #Get the length of the dataframe, take this to be the maximum carbon number
152
+ CN_max = len(CTCN_DF)
153
+
154
+ #Get a list of carbon numbers for each row
155
+ CN_list = [i for i in range(1,CN_max+1)]
156
+
157
+ #Insert this list as a new column at the beginning of the dataframe
158
+ CTCN_DF.insert(loc=0, column='Carbon Number', value=CN_list)
159
+
160
+ return CTCN_DF
161
+
162
+ #findRecentFile('LRF','.xlsx','/Users/connards/Documents/ChromaQuant/response-factors')
@@ -9,4 +9,4 @@ Created 10-19-2024
9
9
 
10
10
  """
11
11
 
12
- from .AutoQuantification import main_AutoQuantification
12
+ from .quantMain import mainQuant