chromaquant 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chromaquant/Handle/__init__.py +12 -0
- chromaquant/Handle/handleDirectories.py +89 -0
- chromaquant/Manual/HydroUI.py +418 -0
- chromaquant/Manual/QuantUPP.py +373 -0
- chromaquant/Manual/Quantification.py +1305 -0
- chromaquant/Manual/__init__.py +10 -0
- chromaquant/Manual/duplicateMatch.py +211 -0
- chromaquant/Manual/fpm_match.py +798 -0
- chromaquant/Manual/label-type.py +179 -0
- chromaquant/Match/AutoFpmMatch.py +1133 -0
- chromaquant/Match/__init__.py +12 -0
- chromaquant/Quant/AutoQuantification.py +1329 -0
- chromaquant/Quant/__init__.py +12 -0
- chromaquant/__init__.py +10 -0
- chromaquant/__main__.py +493 -0
- chromaquant/properties.json +4 -0
- chromaquant-0.3.1.dist-info/METADATA +189 -0
- chromaquant-0.3.1.dist-info/RECORD +22 -0
- chromaquant-0.3.1.dist-info/WHEEL +4 -0
- chromaquant-0.3.1.dist-info/entry_points.txt +2 -0
- chromaquant-0.3.1.dist-info/licenses/LICENSE.txt +18 -0
- chromaquant-0.3.1.dist-info/licenses/LICENSES_bundled.txt +1035 -0
|
@@ -0,0 +1,1329 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
COPYRIGHT STATEMENT:
|
|
6
|
+
|
|
7
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2024, by Julia Hancock
|
|
10
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
11
|
+
URL: https://www.rorrerlab.com/
|
|
12
|
+
|
|
13
|
+
License: BSD 3-Clause License
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
SCRIPT TO QUANTIFY COMPOUNDS IN SAMPLE USING DEFINED RESPONSE FACTORS
|
|
18
|
+
|
|
19
|
+
Julia Hancock
|
|
20
|
+
Started 12/14/2023
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
""" PACKAGES """
|
|
25
|
+
import sys
|
|
26
|
+
import pandas as pd
|
|
27
|
+
import os
|
|
28
|
+
from molmass import Formula
|
|
29
|
+
import math
|
|
30
|
+
import numpy as np
|
|
31
|
+
from chemformula import ChemFormula
|
|
32
|
+
import json
|
|
33
|
+
from datetime import datetime
|
|
34
|
+
import logging
|
|
35
|
+
import openpyxl
|
|
36
|
+
|
|
37
|
+
""" QUANTIFICATION MAIN FUNCTION"""
|
|
38
|
+
def main_AutoQuantification(sname,quantphases,directories):
|
|
39
|
+
|
|
40
|
+
print("[AutoQuantification] Evaluating run parameters...")
|
|
41
|
+
#Write whether or not to run liquid and gas analysis based on system argument
|
|
42
|
+
if quantphases == "Liquid":
|
|
43
|
+
#Format is [Liquid Bool, Gas Bool]
|
|
44
|
+
lgTF = [True,False]
|
|
45
|
+
elif quantphases == "Gas":
|
|
46
|
+
lgTF = [False,True]
|
|
47
|
+
elif quantphases == "Liquid and Gas":
|
|
48
|
+
lgTF = [True,True]
|
|
49
|
+
else:
|
|
50
|
+
print("No phases selected, terminating script")
|
|
51
|
+
#Terminate script
|
|
52
|
+
sys.exit()
|
|
53
|
+
|
|
54
|
+
print("[AutoQuantification] Defining hard-coded analysis conditions...")
|
|
55
|
+
#Define retention time error within which TCD peaks may be assigned
|
|
56
|
+
peak_error = 0.5
|
|
57
|
+
|
|
58
|
+
#Define boolean describing whether or not an external standard was used for gas analysis
|
|
59
|
+
ES_bool = True
|
|
60
|
+
|
|
61
|
+
#Define temperature and pressure of gas bag used in sample injection
|
|
62
|
+
gasBag_temp = 18 #C
|
|
63
|
+
gasBag_pressure = 14.7 #psi
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
""" RESPONSE FACTOR INFO """
|
|
67
|
+
print("[AutoQuantification] Searching for response factors...")
|
|
68
|
+
#Liquid response factor file name
|
|
69
|
+
LRF_file = "LRF_7-24-24.xlsx"
|
|
70
|
+
#FID gas response factor file name
|
|
71
|
+
GRF_file = "FIDRF_7-24-24.csv"
|
|
72
|
+
#TCD gas response factor file name
|
|
73
|
+
GRFT_file = "TCDRF_7-24-24.csv"
|
|
74
|
+
|
|
75
|
+
""" DIRECTORIES """
|
|
76
|
+
print("[AutoQuantification] Finding directories...")
|
|
77
|
+
|
|
78
|
+
#Unpack directories from passed variable
|
|
79
|
+
#Primary files directory
|
|
80
|
+
files = directories['files']
|
|
81
|
+
#Resources directory
|
|
82
|
+
RE_Dir = directories['resources']
|
|
83
|
+
#Theme directory
|
|
84
|
+
theme_Dir = directories['theme']
|
|
85
|
+
#Response factor directory
|
|
86
|
+
RF_Dir = directories['rf']
|
|
87
|
+
#Data directory
|
|
88
|
+
DF_Dir = directories['data']
|
|
89
|
+
#Images directory
|
|
90
|
+
img_Dir = directories['images']
|
|
91
|
+
#Data file log directory
|
|
92
|
+
DFlog_Dir = os.path.join(DF_Dir,sname,"log")
|
|
93
|
+
#Data file breakdowns directory
|
|
94
|
+
DFbreak_Dir = os.path.join(DF_Dir,sname,"breakdowns")
|
|
95
|
+
#Raw data file directory
|
|
96
|
+
DFR_Dir = os.path.join(DF_Dir,sname,'raw data')
|
|
97
|
+
|
|
98
|
+
""" LOGGING """
|
|
99
|
+
print("[AutoQuantification] Initializing logging [WIP]...")
|
|
100
|
+
#Get current datetime
|
|
101
|
+
now = datetime.now()
|
|
102
|
+
#Get current datetime string
|
|
103
|
+
nows = now.strftime('%Y%m%d')
|
|
104
|
+
|
|
105
|
+
#If log directory does not exist within sample folder, create it
|
|
106
|
+
if not os.path.exists(DFlog_Dir):
|
|
107
|
+
os.makedirs(DFlog_Dir)
|
|
108
|
+
|
|
109
|
+
#Instantiate a logger
|
|
110
|
+
logger = logging.getLogger(__name__)
|
|
111
|
+
#Initialize logging file using current datetime
|
|
112
|
+
fh = logging.FileHandler(os.path.join(DFlog_Dir,'quantlog_'+nows+'.log'))
|
|
113
|
+
logger.addHandler(fh)
|
|
114
|
+
logger.propagate = False
|
|
115
|
+
#Set logging level
|
|
116
|
+
logger.setLevel(logging.INFO)
|
|
117
|
+
#Create a formatter and assign to logger
|
|
118
|
+
formatter = logging.Formatter('[%(filename)s] %(asctime)s - [%(levelname)s]: %(message)s')
|
|
119
|
+
fh.setFormatter(formatter)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
""" LABELS """
|
|
123
|
+
print("[AutoQuantification] Defining chemical lumps and compound types...")
|
|
124
|
+
#Dictionary of all chemical lump abbreviations in use and their associated expansions
|
|
125
|
+
#OLD DICTIONARY
|
|
126
|
+
#CL_Dict = {'MBE':'Methyl benzenes', 'ABE':'Alkyl benzenes', 'NAP':'Napthalenes', 'MAL':'Methl alkanes',
|
|
127
|
+
# 'DAL':'Dimethyl alkanes','TAL':'Trimethyl alkanes','MCA':'Methyl cycloalkanes','ACA':'Alkyl cycloalkanes',
|
|
128
|
+
# 'AAL':'Alkyl alkanes','MAE':'Methyl alkenes','DAE':'Dimethyl alkenes','AAE':'Alkyl alkenes',
|
|
129
|
+
# 'LAL':'Linear alkanes','CAE':'Cycloalkenes','IND':'Indenes','PAH':'Polycyclic aromatic hydrocarbons',
|
|
130
|
+
# 'AKY':'Alkynes'}
|
|
131
|
+
|
|
132
|
+
#7-24-24: Could have removed the CL_Dict infrastructure, but nice to have in place in case we want to
|
|
133
|
+
#add more complexity to response factor assignment later
|
|
134
|
+
|
|
135
|
+
#Dictionary of all compound type abbreviations in use and their associated expansions
|
|
136
|
+
CL_Dict = {'A':'Aromatics','L':'Linear Alkanes','B':'Branched Alkanes',
|
|
137
|
+
'C':'Cycloalkanes','E':'Alkenes/Alkynes'}
|
|
138
|
+
|
|
139
|
+
#Alphabetize lump abbreviation dictionary
|
|
140
|
+
CL_Dict = dict(sorted(CL_Dict.items()))
|
|
141
|
+
|
|
142
|
+
#Dictionary of all compound type abbreviations in use and their associated expansions
|
|
143
|
+
CT_Dict = {'A':'Aromatics','L':'Linear Alkanes','B':'Branched Alkanes',
|
|
144
|
+
'C':'Cycloalkanes','E':'Alkenes/Alkynes','O':'Other'}
|
|
145
|
+
|
|
146
|
+
#Alphabetize compound type abbreviation dictionary
|
|
147
|
+
CT_Dict = dict(sorted(CT_Dict.items()))
|
|
148
|
+
|
|
149
|
+
""" FUNCTIONS """
|
|
150
|
+
print("[AutoQuantification] Defining functions...")
|
|
151
|
+
#Function for checking if file exists and adding number if so
|
|
152
|
+
def fileCheck(path):
|
|
153
|
+
#Inspired by https://stackoverflow.com/questions/13852700/create-file-but-if-name-exists-add-number
|
|
154
|
+
filename, extension = os.path.splitext(path)
|
|
155
|
+
i = 1
|
|
156
|
+
|
|
157
|
+
while os.path.exists(path):
|
|
158
|
+
path = filename + " ("+str(i)+")" + extension
|
|
159
|
+
i += 1
|
|
160
|
+
|
|
161
|
+
return path
|
|
162
|
+
|
|
163
|
+
#Function for quantifying liquid FID data
|
|
164
|
+
def liquidFID(BreakdownDF,DBRF,Label_info,sinfo):
|
|
165
|
+
|
|
166
|
+
#Unpack compound type and carbon number dictionaries from list
|
|
167
|
+
CL_Dict, CT_Dict = Label_info
|
|
168
|
+
|
|
169
|
+
""" FUNCTIONS """
|
|
170
|
+
#Function to assign compound type and carbon number to compound using formula
|
|
171
|
+
def assignCTCN(BreakdownDF,CT_dict):
|
|
172
|
+
#Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
|
|
173
|
+
for i, row in BreakdownDF.iterrows():
|
|
174
|
+
#If there exists a formula..
|
|
175
|
+
try:
|
|
176
|
+
#Set breakdown compound type according to the abbreviation already in the breakdown dataframe
|
|
177
|
+
BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
|
|
178
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
179
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
180
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
181
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
182
|
+
#Otherwise, pass
|
|
183
|
+
except:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
return BreakdownDF
|
|
187
|
+
|
|
188
|
+
#Function to assign response factor by carbon number and compound type
|
|
189
|
+
def assignRF(BreakdownDF,DBRF,CL_Dict):
|
|
190
|
+
"""
|
|
191
|
+
Function takes a dataframe containing matched FID and MS peak information and
|
|
192
|
+
compares it against a provided response factor database to assign response
|
|
193
|
+
factors to the matched peak dataframe.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
BreakdownDF : DataFrame
|
|
198
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
199
|
+
|
|
200
|
+
DBRF : Dataframe
|
|
201
|
+
Dataframe containing nested dataframes with associated chemical lumps,
|
|
202
|
+
likely imported from an excel sheet where each sheet is specific to
|
|
203
|
+
a given chemical lump. The top-level keys must be associated with the
|
|
204
|
+
predefined chemical lumps given in 'LABELS' section above
|
|
205
|
+
|
|
206
|
+
CL_Dict : Dict
|
|
207
|
+
Dictionary containing key:value pairs defined as
|
|
208
|
+
(chemical lump abbreviation):(full chemical lump name)
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
BreakdownDF : DataFrame
|
|
213
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
214
|
+
|
|
215
|
+
"""
|
|
216
|
+
#Define an initial response factor
|
|
217
|
+
RF = 1
|
|
218
|
+
|
|
219
|
+
#Loop through every labelled peak in the breakdown DataFrame
|
|
220
|
+
for i, row in BreakdownDF.iterrows():
|
|
221
|
+
#Find the compound name, carbon number, and compound type abbreviation
|
|
222
|
+
cmp_name = row['Compound Name']
|
|
223
|
+
cmp_carbon = row['Carbon Number']
|
|
224
|
+
cmp_type = row['Compound Type Abbreviation']
|
|
225
|
+
|
|
226
|
+
#If any of these pieces of infomation is NAN, skip the row and set the RF Source accordingly
|
|
227
|
+
if pd.isna(cmp_name) or pd.isna(cmp_carbon) or pd.isna(cmp_type):
|
|
228
|
+
BreakdownDF.at[i,'RF Source'] = 'No RF assigned, at least one of the following were missing: compound name, formula, or type abbreviation'
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
#Or, if the compound type is Other, "O", skip the row and set the RF source accordingly
|
|
232
|
+
elif cmp_type == "O":
|
|
233
|
+
BreakdownDF.at[i,'RF Source'] = 'No RF assigned, compound type is listed as "Other"'
|
|
234
|
+
pass
|
|
235
|
+
|
|
236
|
+
#Otherwise...
|
|
237
|
+
else:
|
|
238
|
+
#If the compound name is in the sheet corresponding to the compound type abbreviation..
|
|
239
|
+
if cmp_name in list(DBRF[cmp_type]['Compound Name'].values):
|
|
240
|
+
|
|
241
|
+
#Get the response factors sheet index where it is listed
|
|
242
|
+
dbrf_index = DBRF[cmp_type].index[DBRF[cmp_type]['Compound Name'] == cmp_name]
|
|
243
|
+
|
|
244
|
+
#Assign the listed response factor in the matched sheet to the RF variable
|
|
245
|
+
RF = DBRF[cmp_type].loc[dbrf_index,'Response Factor'].iloc[0]
|
|
246
|
+
|
|
247
|
+
#If the listed RF is nan...
|
|
248
|
+
if math.isnan(RF):
|
|
249
|
+
#Set the RF to 1
|
|
250
|
+
RF = 1
|
|
251
|
+
#Set the value for response factor in the breakdown dataframe to RF
|
|
252
|
+
BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
|
|
253
|
+
#Set the RF source
|
|
254
|
+
BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound found in RF sheet without RF'
|
|
255
|
+
|
|
256
|
+
#Otherwise...
|
|
257
|
+
else:
|
|
258
|
+
#Set the value for response factor in the breakdown dataframe to RF
|
|
259
|
+
BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
|
|
260
|
+
#Set the RF source
|
|
261
|
+
BreakdownDF.at[i,'RF Source'] = 'Assigned empirical RF, exact compound found in response factors sheet'
|
|
262
|
+
|
|
263
|
+
#Otherwise, if the compound name is not in the sheet...
|
|
264
|
+
else:
|
|
265
|
+
|
|
266
|
+
#Get the m and b parameters listed in the RF linear fit for that compound type
|
|
267
|
+
fit_m = DBRF[cmp_type].loc[0,'Linear fit m']
|
|
268
|
+
fit_b = DBRF[cmp_type].loc[0,'Linear fit b']
|
|
269
|
+
|
|
270
|
+
#If both the m and b parameters are nan, assign a response factor of 1
|
|
271
|
+
if math.isnan(fit_m) and math.isnan(fit_b):
|
|
272
|
+
#Set the RF to 1
|
|
273
|
+
RF = 1
|
|
274
|
+
#Set the value for response factor in the breakdown dataframe to RF
|
|
275
|
+
BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
|
|
276
|
+
#Set the RF source to
|
|
277
|
+
BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound type does not have a carbon number fit'
|
|
278
|
+
|
|
279
|
+
#Otherwise, assign a response factor by carbon number
|
|
280
|
+
else:
|
|
281
|
+
#Get response factor using fit and carbon number
|
|
282
|
+
RF = fit_m*cmp_carbon+fit_b
|
|
283
|
+
|
|
284
|
+
#If the estimated response factor is negative or larger than 5, set RF to 1
|
|
285
|
+
if RF < 0 or RF > 5:
|
|
286
|
+
RF = 1
|
|
287
|
+
#Set the value for response factor in the breakdown dataframe to RF
|
|
288
|
+
BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
|
|
289
|
+
#Set the RF source to "Assumed 1, estimated response factor exists but is out of range"
|
|
290
|
+
BreakdownDF.at[i,'RF Source'] = 'Assumed 1, could estimate a response factor exists but is out of range (negative or over 5)'
|
|
291
|
+
|
|
292
|
+
#Otherwise...
|
|
293
|
+
else:
|
|
294
|
+
#Set the value for response factor in the breakdown dataframe to RF
|
|
295
|
+
BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
|
|
296
|
+
#Set the RF source
|
|
297
|
+
BreakdownDF.at[i,'RF Source'] = 'Assigned using carbon number linear fit for compound type {0} and carbon number {1}'.format(cmp_type,int(cmp_carbon))
|
|
298
|
+
|
|
299
|
+
return BreakdownDF
|
|
300
|
+
|
|
301
|
+
def quantMain(BreakdownDF,sinfo):
|
|
302
|
+
"""
|
|
303
|
+
Function that takes in matched FID and MS data with assigned response factors
|
|
304
|
+
and returns quantitative data
|
|
305
|
+
|
|
306
|
+
Parameters
|
|
307
|
+
----------
|
|
308
|
+
BreakdownDF : DataFrame
|
|
309
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
310
|
+
IS_m : Int
|
|
311
|
+
Amount of internal standard added to sample in mg.
|
|
312
|
+
IS_name : Str
|
|
313
|
+
Name of internal standard added to sample
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
BreakdownDF : DataFrame
|
|
318
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
319
|
+
|
|
320
|
+
"""
|
|
321
|
+
#Get IS_m and IS_name from sinfo
|
|
322
|
+
IS_m, IS_name = [sinfo['Internal Standard Mass (mg)'],sinfo['Internal Standard Name']]
|
|
323
|
+
#Find the index where the internal standard is listed – if it's listed more than once, take the largest area peak
|
|
324
|
+
IS_index = BreakdownDF[BreakdownDF['Compound Name'] == IS_name]['FID Area'].idxmax()
|
|
325
|
+
|
|
326
|
+
#Get the FID area associated with the internal standard
|
|
327
|
+
IS_Area = BreakdownDF.at[IS_index,'FID Area']
|
|
328
|
+
|
|
329
|
+
#Loop through breakdown dataframe, calculating an area ratio and mass for each row
|
|
330
|
+
for i, row in BreakdownDF.iterrows():
|
|
331
|
+
#If the row's compound name is the internal standard name or either form of no match, skip the row
|
|
332
|
+
if row['Compound Name'] == IS_name or row['Compound Name'] == 'No match' or row['Compound Name'] == 'No Match':
|
|
333
|
+
pass
|
|
334
|
+
#Otherwise, continue
|
|
335
|
+
else:
|
|
336
|
+
#Calculate area ratio
|
|
337
|
+
Aratio = row['FID Area']/IS_Area
|
|
338
|
+
#Calculate mass using response factor column
|
|
339
|
+
m_i = Aratio*IS_m/row['Response Factor ((A_i/A_T)/(m_i/m_T))']
|
|
340
|
+
#Assign area ratio and mass to their respective columns in the breakdown dataframe
|
|
341
|
+
BreakdownDF.at[i,'A_i/A_T'] = Aratio
|
|
342
|
+
BreakdownDF.at[i,'m_i'] = m_i
|
|
343
|
+
|
|
344
|
+
return BreakdownDF
|
|
345
|
+
|
|
346
|
+
def moreBreakdown(BreakdownDF,CT_dict,sinfo):
|
|
347
|
+
"""
|
|
348
|
+
This function prepares further breakdown dictionaries for use in exporting to Excel
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
BreakdownDF : DataFrame
|
|
353
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
354
|
+
CT_dict : Dict
|
|
355
|
+
Dictionary of all compound type abbreviations in use and their associated expansions
|
|
356
|
+
sinfo : Dict
|
|
357
|
+
Dictionary containing sample information.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
BreakdownDF : DataFrame
|
|
362
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
363
|
+
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
#Get the total mass of product from the breakdown dataframe
|
|
367
|
+
m_total = np.nansum(BreakdownDF['m_i'])
|
|
368
|
+
|
|
369
|
+
#Get maximum carbon number in breakdown dataframe
|
|
370
|
+
CN_max = int(BreakdownDF['Carbon Number'].max())
|
|
371
|
+
|
|
372
|
+
#Create a dataframe for saving quantitative results organized by compound type
|
|
373
|
+
CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
|
|
374
|
+
'Cycloalkanes','Alkenes/Alkynes','Other'],
|
|
375
|
+
'Mass (mg)':np.empty(6),
|
|
376
|
+
'Mass fraction':np.empty(6)})
|
|
377
|
+
|
|
378
|
+
#Create a dataframe for saving quantitative results organized by carbon number
|
|
379
|
+
CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
|
|
380
|
+
'Mass (mg)':np.empty(CN_max)})
|
|
381
|
+
|
|
382
|
+
#Create a dataframe for saving quantitative results organized by both compound type and carbon number
|
|
383
|
+
CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
384
|
+
'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
385
|
+
'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
386
|
+
'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
387
|
+
'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
388
|
+
'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
|
|
389
|
+
|
|
390
|
+
#Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
|
|
391
|
+
for i, row in CT_DF.iterrows():
|
|
392
|
+
|
|
393
|
+
#Define a temporary dataframe which contains all rows matching the ith compound type
|
|
394
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
|
|
395
|
+
#Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
396
|
+
CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
|
|
397
|
+
#Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
|
|
398
|
+
CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
|
|
399
|
+
|
|
400
|
+
#Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
|
|
401
|
+
for i, row in CN_DF.iterrows():
|
|
402
|
+
|
|
403
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number
|
|
404
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
|
|
405
|
+
#Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
406
|
+
CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
|
|
407
|
+
|
|
408
|
+
#Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
|
|
409
|
+
for i, row in CTCN_DF.iterrows():
|
|
410
|
+
|
|
411
|
+
#For every entry in row
|
|
412
|
+
for j in row.index:
|
|
413
|
+
|
|
414
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
|
|
415
|
+
tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
|
|
416
|
+
#Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
417
|
+
CTCN_DF.loc[i,j] = np.nansum(tempDF['m_i'])
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
#Get total masses from CT, CN, and CTCN dataframes
|
|
421
|
+
CT_mass = np.nansum(CT_DF['Mass (mg)'])
|
|
422
|
+
CN_mass = np.nansum(CN_DF['Mass (mg)'])
|
|
423
|
+
CTCN_mass = np.nansum(CTCN_DF)
|
|
424
|
+
|
|
425
|
+
#Create total mass dataframe
|
|
426
|
+
mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
|
|
427
|
+
|
|
428
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|
|
429
|
+
|
|
430
|
+
""" BREAKDOWN FORMATION """
|
|
431
|
+
|
|
432
|
+
#Use the assignCTCN function to assign compound type and carbon number
|
|
433
|
+
BreakdownDF = assignCTCN(BreakdownDF,CT_Dict)
|
|
434
|
+
|
|
435
|
+
#Use the assignRF function to assign response factors, preferring empirical RF's to estimated ones and assigning 1 when no other RF can be applied
|
|
436
|
+
BreakdownDF = assignRF(BreakdownDF,DBRF,CL_Dict)
|
|
437
|
+
|
|
438
|
+
#Use the quantMain function to add quantitative data to BreakdownDF
|
|
439
|
+
BreakdownDF = quantMain(BreakdownDF,sinfo)
|
|
440
|
+
|
|
441
|
+
#Use the moreBreakdown function to prepare compound type and carbon number breakdowns
|
|
442
|
+
BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF,CT_Dict,sinfo)
|
|
443
|
+
|
|
444
|
+
return [BreakdownDF,CT_DF,CN_DF,CTCN_DF,mass_DF,]
|
|
445
|
+
|
|
446
|
+
#Function for quantifying gas TCD data w/o external standard
|
|
447
|
+
def gasTCD(BreakdownDF,DBRF,sinfo,peak_error):
|
|
448
|
+
|
|
449
|
+
#Add min and max peak assignment values to DBRF
|
|
450
|
+
for i, row in DBRF.iterrows():
|
|
451
|
+
DBRF.at[i,'RT Max'] = DBRF.at[i,'RT (min)'] + peak_error
|
|
452
|
+
DBRF.at[i,'RT Min'] = DBRF.at[i,'RT (min)'] - peak_error
|
|
453
|
+
|
|
454
|
+
#Unpack sinfo to get local variables
|
|
455
|
+
vol = sinfo['Reactor Volume (mL)'] #reactor volume, mL
|
|
456
|
+
pressure = sinfo['Quench Pressure (psi)'] #sample pressure, psi
|
|
457
|
+
temp = sinfo['Quench Temperature (C)'] #sample temperature, C
|
|
458
|
+
|
|
459
|
+
#Convert sinfo variables to new units
|
|
460
|
+
vol = vol / 10**6 #reactor volume, m^3
|
|
461
|
+
pressure = pressure / 14.504*100000 #reactor pressure, Pa
|
|
462
|
+
temp = temp + 273.15 #reactor temperature, K
|
|
463
|
+
|
|
464
|
+
#Define ideal gas constant, m^3*Pa/K*mol
|
|
465
|
+
R = 8.314
|
|
466
|
+
|
|
467
|
+
#Iterate through every row in BreakdownDF
|
|
468
|
+
for i, row in BreakdownDF.iterrows():
|
|
469
|
+
|
|
470
|
+
#Iterate through every row in DBRF
|
|
471
|
+
for i2, row2 in DBRF.iterrows():
|
|
472
|
+
|
|
473
|
+
#If the TCD response factor is within the range for a given DBRF entry..
|
|
474
|
+
if row2['RT Min'] <= row['RT'] <= row2['RT Max']:
|
|
475
|
+
|
|
476
|
+
#Add the compound name to the breakdown dataframe
|
|
477
|
+
BreakdownDF.at[i,'Compound Name'] = row2['Compound Name']
|
|
478
|
+
|
|
479
|
+
#Add the other relevant information to the breakdown dataframe
|
|
480
|
+
BreakdownDF.at[i,'Formula'] = row2['Formula']
|
|
481
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = row2['RF']
|
|
482
|
+
BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row2['Formula']).formula_weight
|
|
483
|
+
|
|
484
|
+
#Get volume percent using response factor
|
|
485
|
+
BreakdownDF.at[i,'Vol.%'] = row['Area']/row2['RF']
|
|
486
|
+
|
|
487
|
+
#Get moles using ideal gas law (PV=nRT)
|
|
488
|
+
BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*vol*pressure/(temp*R)
|
|
489
|
+
|
|
490
|
+
#Get mass (mg) using moles and molar mass
|
|
491
|
+
BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
|
|
492
|
+
|
|
493
|
+
#Otherwise, pass
|
|
494
|
+
else:
|
|
495
|
+
pass
|
|
496
|
+
|
|
497
|
+
return BreakdownDF, DBRF, [vol, pressure, temp]
|
|
498
|
+
|
|
499
|
+
#Function for quantifying gas TCD data w/ external standard
|
|
500
|
+
def gasTCD_ES(BreakdownDF,DBRF,sinfo,gasBag_cond,peak_error):
|
|
501
|
+
|
|
502
|
+
#Unpack gas bag conditions
|
|
503
|
+
temp = gasBag_cond[0] #temperature of gas bag, C
|
|
504
|
+
pressure = gasBag_cond[1] #sample pressure in gas bag, psi
|
|
505
|
+
|
|
506
|
+
#Initialize compound name column in BreakdownDF
|
|
507
|
+
BreakdownDF['Compound Name'] = 'None'
|
|
508
|
+
|
|
509
|
+
#Function to find if CO2 peak exists
|
|
510
|
+
def getCO2(BreakdownDF,DBRF,TCD_cond,peak_error):
|
|
511
|
+
|
|
512
|
+
#Unpack TCD conditions
|
|
513
|
+
co2 = TCD_cond[0]
|
|
514
|
+
pressure = TCD_cond[1]
|
|
515
|
+
temp = TCD_cond[2]
|
|
516
|
+
R = TCD_cond[3]
|
|
517
|
+
|
|
518
|
+
#Find the CO2 peak row in DBRF
|
|
519
|
+
CO2_row = DBRF.loc[DBRF['Compound Name'] == "Carbon Dioxide"].iloc[0]
|
|
520
|
+
|
|
521
|
+
#Get the retention time
|
|
522
|
+
CO2_RT = CO2_row['RT (min)']
|
|
523
|
+
|
|
524
|
+
#Get the minimum and maximum of the RT range using the peak error
|
|
525
|
+
CO2_RTmin = CO2_RT - peak_error
|
|
526
|
+
CO2_RTmax = CO2_RT + peak_error
|
|
527
|
+
|
|
528
|
+
#Define boolean describing whether or not CO2 match has been found
|
|
529
|
+
CO2_bool = False
|
|
530
|
+
#Define volume estimate
|
|
531
|
+
volume = 0
|
|
532
|
+
|
|
533
|
+
#Iterate through every row in BreakdownDF
|
|
534
|
+
for i, row in BreakdownDF.iterrows():
|
|
535
|
+
|
|
536
|
+
#If the TCD retention time is within range of the CO2 entry...
|
|
537
|
+
if CO2_RTmin <= row['RT'] <= CO2_RTmax:
|
|
538
|
+
|
|
539
|
+
#Add the compound name to the breakdown dataframe
|
|
540
|
+
BreakdownDF.at[i,'Compound Name'] = 'Carbon Dioxide'
|
|
541
|
+
|
|
542
|
+
#Add the other relevant information to the breakdown dataframe
|
|
543
|
+
BreakdownDF.at[i,'Formula'] = 'CO2'
|
|
544
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = CO2_row['RF']
|
|
545
|
+
BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula('CO2').formula_weight
|
|
546
|
+
|
|
547
|
+
#Get volume percent using response factor
|
|
548
|
+
volpercent = row['Area']/CO2_row['RF']
|
|
549
|
+
BreakdownDF.at[i,'Vol.%'] = volpercent
|
|
550
|
+
|
|
551
|
+
#Calculate total volume using volume percent
|
|
552
|
+
volume = co2 * 100 / volpercent #total volume, m^3
|
|
553
|
+
|
|
554
|
+
#Assign CO2 volume
|
|
555
|
+
BreakdownDF.at[i,'Volume (m^3)'] = co2
|
|
556
|
+
|
|
557
|
+
#Get moles using ideal gas law (PV=nRT)
|
|
558
|
+
BreakdownDF.at[i,'Moles (mol)'] = co2*pressure/(temp*R)
|
|
559
|
+
|
|
560
|
+
#Get mass (mg) using moles and molar mass
|
|
561
|
+
BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles (mol)'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
|
|
562
|
+
|
|
563
|
+
#Set CO2_bool to True
|
|
564
|
+
CO2_bool = True
|
|
565
|
+
|
|
566
|
+
break
|
|
567
|
+
|
|
568
|
+
#Otherwise, pass
|
|
569
|
+
else:
|
|
570
|
+
pass
|
|
571
|
+
|
|
572
|
+
return CO2_bool, volume, BreakdownDF
|
|
573
|
+
|
|
574
|
+
#Add min and max peak assignment values to DBRF
|
|
575
|
+
for i, row in DBRF.iterrows():
|
|
576
|
+
DBRF.at[i,'RT Max'] = DBRF.at[i,'RT (min)'] + peak_error
|
|
577
|
+
DBRF.at[i,'RT Min'] = DBRF.at[i,'RT (min)'] - peak_error
|
|
578
|
+
|
|
579
|
+
#Unpack sinfo to get CO2 injection volume
|
|
580
|
+
co2 = sinfo['Injected CO2 (mL)'] #volume injected CO2, mL
|
|
581
|
+
|
|
582
|
+
#Convert sinfo variables to new units
|
|
583
|
+
co2 = co2 / 10**6 #volume injected CO2, mL
|
|
584
|
+
temp = temp + 273.15 #reactor temperature, K
|
|
585
|
+
pressure = pressure / 14.504*100000 #reactor pressure, Pa
|
|
586
|
+
|
|
587
|
+
#Define ideal gas constant, m^3*Pa/K*mol
|
|
588
|
+
R = 8.314
|
|
589
|
+
|
|
590
|
+
#Define variable to total volume (m^3)
|
|
591
|
+
volume = 0
|
|
592
|
+
|
|
593
|
+
#Define list of conditions
|
|
594
|
+
TCD_cond = [co2,pressure,temp,R]
|
|
595
|
+
|
|
596
|
+
#Check if there is a peak in the BreakdownDF that can be assigned to CO2
|
|
597
|
+
CO2_bool, volume, BreakdownDF = getCO2(BreakdownDF,DBRF,TCD_cond,peak_error)
|
|
598
|
+
|
|
599
|
+
if CO2_bool:
|
|
600
|
+
#Iterate through every row in BreakdownDF
|
|
601
|
+
for i, row in BreakdownDF.iterrows():
|
|
602
|
+
|
|
603
|
+
#Iterate through every row in DBRF
|
|
604
|
+
for i2, row2 in DBRF.iterrows():
|
|
605
|
+
|
|
606
|
+
#If the TCD retention time is within the range for a given DBRF entry...
|
|
607
|
+
if row2['RT Min'] <= row['RT'] <= row2['RT Max']:
|
|
608
|
+
|
|
609
|
+
#Add the compound name to the breakdown dataframe
|
|
610
|
+
BreakdownDF.at[i,'Compound Name'] = row2['Compound Name']
|
|
611
|
+
|
|
612
|
+
#Add the other relevant information to the breakdown dataframe
|
|
613
|
+
BreakdownDF.at[i,'Formula'] = row2['Formula']
|
|
614
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = row2['RF']
|
|
615
|
+
BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row2['Formula']).formula_weight
|
|
616
|
+
|
|
617
|
+
#Get volume percent using response factor
|
|
618
|
+
volpercent = row['Area']/row2['RF']
|
|
619
|
+
BreakdownDF.at[i,'Vol.%'] = volpercent
|
|
620
|
+
|
|
621
|
+
#Get volume using volume percent
|
|
622
|
+
vol = volume*volpercent/100
|
|
623
|
+
BreakdownDF.at[i,'Volume (m^3)'] = vol
|
|
624
|
+
|
|
625
|
+
#Get moles using ideal gas law (PV=nRT)
|
|
626
|
+
BreakdownDF.at[i,'Moles (mol)'] = vol*pressure/(temp*R)
|
|
627
|
+
|
|
628
|
+
#Get mass (mg) using moles and molar mass
|
|
629
|
+
BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles (mol)'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
|
|
630
|
+
|
|
631
|
+
#Otherwise, pass
|
|
632
|
+
else:
|
|
633
|
+
pass
|
|
634
|
+
#Otherwise, pass
|
|
635
|
+
else:
|
|
636
|
+
pass
|
|
637
|
+
|
|
638
|
+
return BreakdownDF, DBRF, volume, TCD_cond
|
|
639
|
+
|
|
640
|
+
#Function for quantifying gas FID data w/o external standard
|
|
641
|
+
def gasFID(BreakdownDF,DBRF,Label_info,sinfo,cutoff=4):
|
|
642
|
+
"""
|
|
643
|
+
Function quantifies gas FID data and returns a breakdown dataframe
|
|
644
|
+
|
|
645
|
+
Parameters
|
|
646
|
+
----------
|
|
647
|
+
BreakdownDF : DataFrame
|
|
648
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
649
|
+
DBRF : Dataframe
|
|
650
|
+
Dataframe containing nested dataframes with associated chemical lumps,
|
|
651
|
+
likely imported from an excel sheet where each sheet is specific to
|
|
652
|
+
a given chemical lump. The top-level keys must be associated with the
|
|
653
|
+
predefined chemical lumps given in 'LABELS' section above
|
|
654
|
+
Label_info : List
|
|
655
|
+
List of dictionaries containing chemical lump and compound type abbreviations
|
|
656
|
+
sinfo : Dict
|
|
657
|
+
Dictionary containing key sample information
|
|
658
|
+
cutoff : Integer, optional
|
|
659
|
+
Integer representing the maximum cutoff carbon number that can be
|
|
660
|
+
quantified using FID.The default is 4.
|
|
661
|
+
|
|
662
|
+
Returns
|
|
663
|
+
-------
|
|
664
|
+
BreakdownDF : DataFrame
|
|
665
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
666
|
+
|
|
667
|
+
"""
|
|
668
|
+
#Function for assigning response factors to compounds
|
|
669
|
+
def assignRF(BreakdownDF,DBRF):
|
|
670
|
+
|
|
671
|
+
#Get a dictionary of average response factors by carbon number
|
|
672
|
+
avgRF = {}
|
|
673
|
+
#Loop through every carbon number up to the max in DBRF
|
|
674
|
+
for i in range(1,DBRF['Carbon Number'].max()+1):
|
|
675
|
+
#Get a slice of all rows in DBRF with a given carbon number
|
|
676
|
+
slicer = DBRF.loc[DBRF['Carbon Number']==i]
|
|
677
|
+
#Average the response factor entries in this slice, appending the result to the average RF dictionary
|
|
678
|
+
avgRF['{0}'.format(i)] = slicer['RF'].mean()
|
|
679
|
+
|
|
680
|
+
#Loop through every row in the FIDpMS dataframe
|
|
681
|
+
for i, row in BreakdownDF.iterrows():
|
|
682
|
+
#Check that the formula is not nan
|
|
683
|
+
if not pd.isna(row['Formula']):
|
|
684
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
685
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
686
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
687
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
688
|
+
|
|
689
|
+
#If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
|
|
690
|
+
if row['Compound Name'] in DBRF['Compound Name'].values:
|
|
691
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
|
|
692
|
+
#Assign response factor source
|
|
693
|
+
BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
|
|
694
|
+
#Otherwise, assign response factor based on average carbon number RF
|
|
695
|
+
else:
|
|
696
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
|
|
697
|
+
#Assign response factor source
|
|
698
|
+
BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
|
|
699
|
+
#Otherwise if the row's formula is nan, pass
|
|
700
|
+
else:
|
|
701
|
+
pass
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
return BreakdownDF
|
|
705
|
+
|
|
706
|
+
#Function for quantifying compounds using ideal gas law
|
|
707
|
+
def gasQuant(BreakdownDF,DBRF,sinfo,cutoff):
|
|
708
|
+
|
|
709
|
+
#Remove columns in BreakdownDF with a carbon number at or below cutoff
|
|
710
|
+
BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
|
|
711
|
+
|
|
712
|
+
#Unpack sinfo to get local variables
|
|
713
|
+
vol = sinfo['Reactor Volume (mL)'] #reactor volume, mL
|
|
714
|
+
pressure = sinfo['Quench Pressure (psi)'] #sample pressure, psi
|
|
715
|
+
temp = sinfo['Quench Temperature (C)'] #sample temperature, C
|
|
716
|
+
|
|
717
|
+
#Convert sinfo variables to new units
|
|
718
|
+
vol = vol / 10**6 #reactor volume, m^3
|
|
719
|
+
pressure = pressure / 14.504*100000 #reactor pressure, Pa
|
|
720
|
+
temp = temp + 273.15 #reactor temperature, K
|
|
721
|
+
|
|
722
|
+
#Define ideal gas constant, m^3*Pa/K*mol
|
|
723
|
+
R = 8.314
|
|
724
|
+
|
|
725
|
+
#Loop through every row in BreakdownDF
|
|
726
|
+
for i, row in BreakdownDF.iterrows():
|
|
727
|
+
|
|
728
|
+
#Add molecular weight using ChemFormula
|
|
729
|
+
BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
|
|
730
|
+
|
|
731
|
+
#Get volume percent using response factor
|
|
732
|
+
BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
|
|
733
|
+
|
|
734
|
+
#Get moles using ideal gas law (PV=nRT)
|
|
735
|
+
BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*vol*pressure/(temp*R)
|
|
736
|
+
|
|
737
|
+
#Get mass (mg) using moles and molar mass
|
|
738
|
+
BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
|
|
739
|
+
|
|
740
|
+
return BreakdownDF
|
|
741
|
+
|
|
742
|
+
#Function for further breaking down product distribution
|
|
743
|
+
def moreBreakdown(BreakdownDF,CT_dict,sinfo):
|
|
744
|
+
"""
|
|
745
|
+
This function prepares further breakdown dictionaries for use in exporting to Excel
|
|
746
|
+
|
|
747
|
+
Parameters
|
|
748
|
+
----------
|
|
749
|
+
BreakdownDF : DataFrame
|
|
750
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
751
|
+
CT_dict : Dict
|
|
752
|
+
Dictionary of all compound type abbreviations in use and their associated expansions
|
|
753
|
+
sinfo : Dict
|
|
754
|
+
Dictionary containing sample information.
|
|
755
|
+
|
|
756
|
+
Returns
|
|
757
|
+
-------
|
|
758
|
+
BreakdownDF : DataFrame
|
|
759
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
760
|
+
|
|
761
|
+
"""
|
|
762
|
+
|
|
763
|
+
#Get the total mass of product from the breakdown dataframe
|
|
764
|
+
m_total = np.nansum(BreakdownDF['Mass (mg)'])
|
|
765
|
+
|
|
766
|
+
#Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
|
|
767
|
+
for i, row in BreakdownDF.iterrows():
|
|
768
|
+
#If there exists a formula..
|
|
769
|
+
try:
|
|
770
|
+
#Set breakdown compound type according to the abbreviation already in the breakdown dataframe
|
|
771
|
+
BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
|
|
772
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
773
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
774
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
775
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
776
|
+
#Otherwise, pass
|
|
777
|
+
except:
|
|
778
|
+
pass
|
|
779
|
+
|
|
780
|
+
#Get maximum carbon number in breakdown dataframe
|
|
781
|
+
CN_max = int(BreakdownDF['Carbon Number'].max())
|
|
782
|
+
|
|
783
|
+
#Create a dataframe for saving quantitative results organized by compound type
|
|
784
|
+
CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
|
|
785
|
+
'Cycloalkanes','Alkenes/Alkynes','Other'],
|
|
786
|
+
'Mass (mg)':np.empty(6),
|
|
787
|
+
'Mass fraction':np.empty(6)})
|
|
788
|
+
|
|
789
|
+
#Create a dataframe for saving quantitative results organized by carbon number
|
|
790
|
+
CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
|
|
791
|
+
'Mass (mg)':np.empty(CN_max)})
|
|
792
|
+
|
|
793
|
+
#Create a dataframe for saving quantitative results organized by both compound type and carbon number
|
|
794
|
+
CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
795
|
+
'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
796
|
+
'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
797
|
+
'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
798
|
+
'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
799
|
+
'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
|
|
800
|
+
|
|
801
|
+
#Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
|
|
802
|
+
for i, row in CT_DF.iterrows():
|
|
803
|
+
|
|
804
|
+
#Define a temporary dataframe which contains all rows matching the ith compound type
|
|
805
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
|
|
806
|
+
#Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
807
|
+
CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
|
|
808
|
+
#Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
|
|
809
|
+
CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
|
|
810
|
+
|
|
811
|
+
#Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
|
|
812
|
+
for i, row in CN_DF.iterrows():
|
|
813
|
+
|
|
814
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number
|
|
815
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
|
|
816
|
+
#Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
817
|
+
CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
|
|
818
|
+
|
|
819
|
+
#Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
|
|
820
|
+
for i, row in CTCN_DF.iterrows():
|
|
821
|
+
|
|
822
|
+
#For every entry in row
|
|
823
|
+
for j in row.index:
|
|
824
|
+
|
|
825
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
|
|
826
|
+
tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
|
|
827
|
+
#Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
828
|
+
CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
#Get total masses from CT, CN, and CTCN dataframes
|
|
832
|
+
CT_mass = np.nansum(CT_DF['Mass (mg)'])
|
|
833
|
+
CN_mass = np.nansum(CN_DF['Mass (mg)'])
|
|
834
|
+
CTCN_mass = np.nansum(CTCN_DF)
|
|
835
|
+
|
|
836
|
+
#Create total mass dataframe
|
|
837
|
+
mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
|
|
838
|
+
|
|
839
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|
|
840
|
+
|
|
841
|
+
#Unpack compound type and carbon number dictionaries from list
|
|
842
|
+
CL_Dict, CT_Dict = Label_info
|
|
843
|
+
|
|
844
|
+
#Filter dataframe to remove compounds that do not contain carbon
|
|
845
|
+
BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
|
|
846
|
+
#Reset the dataframe index
|
|
847
|
+
BreakdownDF.reset_index()
|
|
848
|
+
|
|
849
|
+
#Run response factor assignment function
|
|
850
|
+
BreakdownDF = assignRF(BreakdownDF, DBRF)
|
|
851
|
+
#Run gas quantification function
|
|
852
|
+
BreakdownDF = gasQuant(BreakdownDF,DBRF,sinfo,cutoff)
|
|
853
|
+
#Run further breakdown function
|
|
854
|
+
BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict, sinfo)
|
|
855
|
+
|
|
856
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|
|
857
|
+
|
|
858
|
+
#Function for quantifying gas FID data w/ external standard
|
|
859
|
+
def gasFID_ES(BreakdownDF,DBRF,Label_info,sinfo,gasBag_cond,total_volume,cutoff=4):
|
|
860
|
+
"""
|
|
861
|
+
Function quantifies gas FID data and returns a breakdown dataframe
|
|
862
|
+
|
|
863
|
+
Parameters
|
|
864
|
+
----------
|
|
865
|
+
BreakdownDF : DataFrame
|
|
866
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
867
|
+
DBRF : Dataframe
|
|
868
|
+
Dataframe containing nested dataframes with associated chemical lumps,
|
|
869
|
+
likely imported from an excel sheet where each sheet is specific to
|
|
870
|
+
a given chemical lump. The top-level keys must be associated with the
|
|
871
|
+
predefined chemical lumps given in 'LABELS' section above
|
|
872
|
+
Label_info : List
|
|
873
|
+
List of dictionaries containing chemical lump and compound type abbreviations
|
|
874
|
+
sinfo : Dict
|
|
875
|
+
Dictionary containing key sample information
|
|
876
|
+
total_volume : Float
|
|
877
|
+
Float describing the total amount of gas estimated by the external standard volume percent
|
|
878
|
+
cutoff : Integer, optional
|
|
879
|
+
Integer representing the maximum cutoff carbon number that can be
|
|
880
|
+
quantified using FID.The default is 4.
|
|
881
|
+
|
|
882
|
+
Returns
|
|
883
|
+
-------
|
|
884
|
+
BreakdownDF : DataFrame
|
|
885
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
886
|
+
|
|
887
|
+
"""
|
|
888
|
+
#Function for assigning response factors to compounds
|
|
889
|
+
def assignRF(BreakdownDF,DBRF):
|
|
890
|
+
|
|
891
|
+
#Get a dictionary of average response factors by carbon number
|
|
892
|
+
avgRF = {}
|
|
893
|
+
#Loop through every carbon number up to the max in DBRF
|
|
894
|
+
for i in range(1,DBRF['Carbon Number'].max()+1):
|
|
895
|
+
#Get a slice of all rows in DBRF with a given carbon number
|
|
896
|
+
slicer = DBRF.loc[DBRF['Carbon Number']==i]
|
|
897
|
+
#Average the response factor entries in this slice, appending the result to the average RF dictionary
|
|
898
|
+
avgRF['{0}'.format(i)] = slicer['RF'].mean()
|
|
899
|
+
|
|
900
|
+
#Loop through every row in the FIDpMS dataframe
|
|
901
|
+
for i, row in BreakdownDF.iterrows():
|
|
902
|
+
#Check that the formula is not nan
|
|
903
|
+
if not pd.isna(row['Formula']):
|
|
904
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
905
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
906
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
907
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
908
|
+
|
|
909
|
+
#If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
|
|
910
|
+
if row['Compound Name'] in DBRF['Compound Name'].values:
|
|
911
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
|
|
912
|
+
#Assign response factor source
|
|
913
|
+
BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
|
|
914
|
+
#Otherwise, assign response factor based on average carbon number RF
|
|
915
|
+
else:
|
|
916
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
|
|
917
|
+
#Assign response factor source
|
|
918
|
+
BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
|
|
919
|
+
#Otherwise if the row's formula is nan, pass
|
|
920
|
+
else:
|
|
921
|
+
pass
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
return BreakdownDF
|
|
925
|
+
|
|
926
|
+
#Function for quantifying compounds using ideal gas law
|
|
927
|
+
def gasQuant(BreakdownDF,DBRF,sinfo,total_volume,cutoff):
|
|
928
|
+
|
|
929
|
+
#Remove rows in BreakdownDF with a carbon number at or below cutoff
|
|
930
|
+
BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
|
|
931
|
+
|
|
932
|
+
#Get gas bag conditions
|
|
933
|
+
temp = gasBag_cond[0] #temperature of gas bag, C
|
|
934
|
+
pressure = gasBag_cond[1] #sample pressure in gas bag, psi
|
|
935
|
+
|
|
936
|
+
#Convert sinfo variables to new units
|
|
937
|
+
temp = temp + 273.15 #gas bag temperature, K
|
|
938
|
+
pressure = pressure / 14.504*100000 #gas bag pressure, Pa
|
|
939
|
+
|
|
940
|
+
#Define ideal gas constant, m^3*Pa/K*mol
|
|
941
|
+
R = 8.314
|
|
942
|
+
|
|
943
|
+
#Loop through every row in BreakdownDF
|
|
944
|
+
for i, row in BreakdownDF.iterrows():
|
|
945
|
+
|
|
946
|
+
#Add molecular weight using ChemFormula
|
|
947
|
+
BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
|
|
948
|
+
|
|
949
|
+
#Get volume percent using response factor
|
|
950
|
+
BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
|
|
951
|
+
|
|
952
|
+
#Get moles using ideal gas law (PV=nRT)
|
|
953
|
+
BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*total_volume*pressure/(temp*R)
|
|
954
|
+
|
|
955
|
+
#Get mass (mg) using moles and molar mass
|
|
956
|
+
BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
|
|
957
|
+
|
|
958
|
+
return BreakdownDF
|
|
959
|
+
|
|
960
|
+
#Function for further breaking down product distribution
|
|
961
|
+
def moreBreakdown(BreakdownDF,CT_dict,sinfo):
|
|
962
|
+
"""
|
|
963
|
+
This function prepares further breakdown dictionaries for use in exporting to Excel
|
|
964
|
+
|
|
965
|
+
Parameters
|
|
966
|
+
----------
|
|
967
|
+
BreakdownDF : DataFrame
|
|
968
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
969
|
+
CT_dict : Dict
|
|
970
|
+
Dictionary of all compound type abbreviations in use and their associated expansions
|
|
971
|
+
sinfo : Dict
|
|
972
|
+
Dictionary containing sample information.
|
|
973
|
+
|
|
974
|
+
Returns
|
|
975
|
+
-------
|
|
976
|
+
BreakdownDF : DataFrame
|
|
977
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
978
|
+
|
|
979
|
+
"""
|
|
980
|
+
|
|
981
|
+
#Get the total mass of product from the breakdown dataframe
|
|
982
|
+
m_total = np.nansum(BreakdownDF['Mass (mg)'])
|
|
983
|
+
|
|
984
|
+
#Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
|
|
985
|
+
for i, row in BreakdownDF.iterrows():
|
|
986
|
+
#If there exists a formula..
|
|
987
|
+
try:
|
|
988
|
+
#Set breakdown compound type according to the abbreviation already in the breakdown dataframe
|
|
989
|
+
BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
|
|
990
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
991
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
992
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
993
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
994
|
+
#Otherwise, pass
|
|
995
|
+
except:
|
|
996
|
+
pass
|
|
997
|
+
|
|
998
|
+
#Get maximum carbon number in breakdown dataframe
|
|
999
|
+
CN_max = int(BreakdownDF['Carbon Number'].max())
|
|
1000
|
+
|
|
1001
|
+
#Create a dataframe for saving quantitative results organized by compound type
|
|
1002
|
+
CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
|
|
1003
|
+
'Cycloalkanes','Alkenes/Alkynes','Other'],
|
|
1004
|
+
'Mass (mg)':np.empty(6),
|
|
1005
|
+
'Mass fraction':np.empty(6)})
|
|
1006
|
+
|
|
1007
|
+
#Create a dataframe for saving quantitative results organized by carbon number
|
|
1008
|
+
CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
|
|
1009
|
+
'Mass (mg)':np.empty(CN_max)})
|
|
1010
|
+
|
|
1011
|
+
#Create a dataframe for saving quantitative results organized by both compound type and carbon number
|
|
1012
|
+
CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1013
|
+
'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1014
|
+
'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1015
|
+
'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1016
|
+
'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1017
|
+
'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
|
|
1018
|
+
|
|
1019
|
+
#Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
|
|
1020
|
+
for i, row in CT_DF.iterrows():
|
|
1021
|
+
|
|
1022
|
+
#Define a temporary dataframe which contains all rows matching the ith compound type
|
|
1023
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
|
|
1024
|
+
#Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
1025
|
+
CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
|
|
1026
|
+
#Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
|
|
1027
|
+
CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
|
|
1028
|
+
|
|
1029
|
+
#Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
|
|
1030
|
+
for i, row in CN_DF.iterrows():
|
|
1031
|
+
|
|
1032
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number
|
|
1033
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
|
|
1034
|
+
#Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
1035
|
+
CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
|
|
1036
|
+
|
|
1037
|
+
#Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
|
|
1038
|
+
for i, row in CTCN_DF.iterrows():
|
|
1039
|
+
|
|
1040
|
+
#For every entry in row
|
|
1041
|
+
for j in row.index:
|
|
1042
|
+
|
|
1043
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
|
|
1044
|
+
tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
|
|
1045
|
+
#Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
1046
|
+
CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
#Get total masses from CT, CN, and CTCN dataframes
|
|
1050
|
+
CT_mass = np.nansum(CT_DF['Mass (mg)'])
|
|
1051
|
+
CN_mass = np.nansum(CN_DF['Mass (mg)'])
|
|
1052
|
+
CTCN_mass = np.nansum(CTCN_DF)
|
|
1053
|
+
|
|
1054
|
+
#Create total mass dataframe
|
|
1055
|
+
mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
|
|
1056
|
+
|
|
1057
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|
|
1058
|
+
|
|
1059
|
+
#Unpack compound type and carbon number dictionaries from list
|
|
1060
|
+
CL_Dict, CT_Dict = Label_info
|
|
1061
|
+
|
|
1062
|
+
#Filter dataframe to remove compounds that do not contain carbon
|
|
1063
|
+
BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
|
|
1064
|
+
#Reset the dataframe index
|
|
1065
|
+
BreakdownDF.reset_index()
|
|
1066
|
+
|
|
1067
|
+
#Run response factor assignment function
|
|
1068
|
+
BreakdownDF = assignRF(BreakdownDF, DBRF)
|
|
1069
|
+
#Run gas quantification function
|
|
1070
|
+
BreakdownDF = gasQuant(BreakdownDF,DBRF,sinfo,total_volume,cutoff)
|
|
1071
|
+
#Run further breakdown function
|
|
1072
|
+
BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict, sinfo)
|
|
1073
|
+
|
|
1074
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|
|
1075
|
+
|
|
1076
|
+
#Define function that inserts a column to a CTCN Dataframe labeling the carbon number
|
|
1077
|
+
def insertCN(CTCN_DF):
|
|
1078
|
+
|
|
1079
|
+
#Get the length of the dataframe, take this to be the maximum carbon number
|
|
1080
|
+
CN_max = len(CTCN_DF)
|
|
1081
|
+
|
|
1082
|
+
#Get a list of carbon numbers for each row
|
|
1083
|
+
CN_list = [i for i in range(1,CN_max+1)]
|
|
1084
|
+
|
|
1085
|
+
#Insert this list as a new column at the beginning of the dataframe
|
|
1086
|
+
CTCN_DF.insert(loc=0, column='Carbon Number', value=CN_list)
|
|
1087
|
+
|
|
1088
|
+
return CTCN_DF
|
|
1089
|
+
|
|
1090
|
+
""" DATA IMPORTS """
|
|
1091
|
+
print("[AutoQuantification] Importing data...")
|
|
1092
|
+
#Import sample information from json file
|
|
1093
|
+
with open(os.path.join(DF_Dir,sname,sname+'_INFO.json')) as sinfo_f:
|
|
1094
|
+
sinfo = json.load(sinfo_f)
|
|
1095
|
+
|
|
1096
|
+
#Change ISO date-time strings into datetime objects
|
|
1097
|
+
sinfo['Start Time'] = datetime.fromisoformat(sinfo['Start Time'])
|
|
1098
|
+
sinfo['End Time'] = datetime.fromisoformat(sinfo['End Time'])
|
|
1099
|
+
|
|
1100
|
+
#Calculate a reaction time using the start, end, and heat time values and add to sinfo
|
|
1101
|
+
sinfo['Reaction Time'] = abs(sinfo['End Time']-sinfo['Start Time']).total_seconds()/3600 - sinfo['Heat Time']
|
|
1102
|
+
|
|
1103
|
+
#Dictionary of substrings to add to sample name to create file names
|
|
1104
|
+
sub_Dict = {'Gas TCD+FID':['_GS2_TCD_CSO.csv'],
|
|
1105
|
+
'Gas Labelled MS Peaks':['_GS1_UA_Comp_UPP.csv'],
|
|
1106
|
+
'Gas FID+MS':['_GS2_FIDpMS.csv'],
|
|
1107
|
+
'Liquid FID':['_LQ1_FID_CSO.csv'],
|
|
1108
|
+
'Liquid Labelled MS Peaks':['_LQ1_UA_Comp_UPP'],
|
|
1109
|
+
'Liquid FID+MS':['_LQ1_FIDpMS.csv']}
|
|
1110
|
+
|
|
1111
|
+
#Use sample name to form file names using sub_Dict and append full pathnames for all entries
|
|
1112
|
+
for key in sub_Dict:
|
|
1113
|
+
sub_Dict[key] = [sub_Dict[key][0],os.path.join(DFR_Dir,sname+sub_Dict[key][0])]
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
#If the run liquid analysis Boolean is True..
|
|
1117
|
+
if lgTF[0]:
|
|
1118
|
+
#DEFINE DIRECTORIES FOR LIQUID FID QUANTIFICATION
|
|
1119
|
+
#Define directory for liquid matched MS and FID peaks
|
|
1120
|
+
DIR_LQ1_FIDpMS = sub_Dict['Liquid FID+MS'][1]
|
|
1121
|
+
#Define directory for liquid response factors
|
|
1122
|
+
DIR_LQRF = os.path.join(RF_Dir,LRF_file)
|
|
1123
|
+
|
|
1124
|
+
#Read matched peak data between liquid FID and MS
|
|
1125
|
+
LQ1_FIDpMS = pd.read_csv(DIR_LQ1_FIDpMS)
|
|
1126
|
+
|
|
1127
|
+
#Filter FIDpMS to only include rows with non-NaN compounds
|
|
1128
|
+
LQ1_FIDpMS_Filtered = LQ1_FIDpMS[LQ1_FIDpMS['Compound Name'].notnull()].reset_index(drop=True)
|
|
1129
|
+
|
|
1130
|
+
#Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
|
|
1131
|
+
LQ_FID_BreakdownDF = LQ1_FIDpMS_Filtered.copy()
|
|
1132
|
+
|
|
1133
|
+
#Read liquid response factors data
|
|
1134
|
+
LQRF = {i:pd.read_excel(DIR_LQRF,sheet_name=i) for i in CL_Dict.keys()}
|
|
1135
|
+
else:
|
|
1136
|
+
pass
|
|
1137
|
+
|
|
1138
|
+
#If the run gas analysis Boolean is True..
|
|
1139
|
+
if lgTF[1]:
|
|
1140
|
+
#DEFINE DIRECTORIES FOR GAS TCD AND FID QUANTIFICATION
|
|
1141
|
+
#Define directory for gas TCD peaks
|
|
1142
|
+
DIR_GS2_TCD = sub_Dict['Gas TCD+FID'][1]
|
|
1143
|
+
#Define directory for gas FID peaks
|
|
1144
|
+
DIR_GS2_FIDpMS = sub_Dict['Gas FID+MS'][1]
|
|
1145
|
+
#Define directory for gas TCD response factors
|
|
1146
|
+
DIR_TCDRF = os.path.join(RF_Dir,GRFT_file)
|
|
1147
|
+
#Define directory for gas FID response factors
|
|
1148
|
+
DIR_FIDRF = os.path.join(RF_Dir,GRF_file)
|
|
1149
|
+
|
|
1150
|
+
#Read gas FID and TCD Peak data
|
|
1151
|
+
GS2_TCD = pd.read_csv(DIR_GS2_TCD)
|
|
1152
|
+
|
|
1153
|
+
#Create a duplicate of the gas TCD/FID dataframe for future saving as a breakdown
|
|
1154
|
+
#Also filter breakdown dataframe to only include rows sourced from TCD
|
|
1155
|
+
GS_TCD_BreakdownDF = GS2_TCD.loc[GS2_TCD['Signal Name'] == 'TCD2B'].copy()
|
|
1156
|
+
|
|
1157
|
+
#Read matched peak data between gas FID and MS
|
|
1158
|
+
GS2_FIDpMS = pd.read_csv(DIR_GS2_FIDpMS)
|
|
1159
|
+
|
|
1160
|
+
#Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
|
|
1161
|
+
GS_FID_BreakdownDF = GS2_FIDpMS.copy()
|
|
1162
|
+
|
|
1163
|
+
#Read gas TCD response factors data
|
|
1164
|
+
TCDRF = pd.read_csv(DIR_TCDRF)
|
|
1165
|
+
#Read gas FID response factors data
|
|
1166
|
+
GSRF = pd.read_csv(DIR_FIDRF)
|
|
1167
|
+
|
|
1168
|
+
else:
|
|
1169
|
+
pass
|
|
1170
|
+
|
|
1171
|
+
""" MAIN SCRIPT """
|
|
1172
|
+
|
|
1173
|
+
#If the run liquid analysis Boolean is True..
|
|
1174
|
+
if lgTF[0]:
|
|
1175
|
+
print("[AutoQuantification] Analyzing liquids...")
|
|
1176
|
+
#Get liquid FID breakdown and miscellaneous dataframes
|
|
1177
|
+
LQ_FID_BreakdownDF, LQCT_DF, LQCN_DF, LQCTCN_DF, LQmass_DF = liquidFID(LQ_FID_BreakdownDF, LQRF, [CL_Dict, CT_Dict], sinfo)
|
|
1178
|
+
|
|
1179
|
+
#Insert the carbon number column to LQCTCN_DF
|
|
1180
|
+
LQCTCN_DF = insertCN(LQCTCN_DF)
|
|
1181
|
+
|
|
1182
|
+
#If the run gas analysis Boolean is True..
|
|
1183
|
+
if lgTF[1]:
|
|
1184
|
+
print("[AutoQuantification] Analyzing gases...")
|
|
1185
|
+
#If the external standard Boolean is True..
|
|
1186
|
+
if ES_bool:
|
|
1187
|
+
#Get gas TCD breakdown and miscellaneous dataframes
|
|
1188
|
+
GS_TCD_BreakdownDF, TCDRF, total_volume, TCD_cond = gasTCD_ES(GS_TCD_BreakdownDF,TCDRF,sinfo,[gasBag_temp,gasBag_pressure],peak_error)
|
|
1189
|
+
|
|
1190
|
+
#Get gas FID breakdown and miscellaneous dataframes
|
|
1191
|
+
GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = gasFID_ES(GS_FID_BreakdownDF,GSRF,[CL_Dict, CT_Dict], sinfo,[gasBag_temp,gasBag_pressure],total_volume)
|
|
1192
|
+
#Otherwise..
|
|
1193
|
+
else:
|
|
1194
|
+
#Get gas TCD breakdown and miscellaneous dataframes
|
|
1195
|
+
GS_TCD_BreakdownDF, TCDRF, TCD_cond = gasTCD(GS_TCD_BreakdownDF,TCDRF,sinfo,peak_error)
|
|
1196
|
+
|
|
1197
|
+
#Get gas FID breakdown and miscellaneous dataframes
|
|
1198
|
+
GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = gasFID(GS_FID_BreakdownDF,GSRF,[CL_Dict, CT_Dict], sinfo)
|
|
1199
|
+
|
|
1200
|
+
#Insert the carbon number column to GSCTCN_DF
|
|
1201
|
+
GSCTCN_DF = insertCN(GSCTCN_DF)
|
|
1202
|
+
|
|
1203
|
+
#If both the gas and liquid analysis Booleans are True..
|
|
1204
|
+
if lgTF[0] and lgTF[1]:
|
|
1205
|
+
print("[AutoQuantification] Totaling contributions from liquid and gas phases...")
|
|
1206
|
+
#Get maximum carbon number between breakdown dataframes
|
|
1207
|
+
CN_max = max([int(GS_FID_BreakdownDF['Carbon Number'].max()),int(LQ_FID_BreakdownDF['Carbon Number'].max())])
|
|
1208
|
+
|
|
1209
|
+
#Sum the liquid and gas breakdown carbon number and compound type dataframes
|
|
1210
|
+
#Initiate an empty CTCN dataframe
|
|
1211
|
+
total_CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1212
|
+
'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1213
|
+
'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1214
|
+
'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1215
|
+
'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
1216
|
+
'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
|
|
1217
|
+
|
|
1218
|
+
|
|
1219
|
+
#For every row in this sum dataframe...
|
|
1220
|
+
for i, row in total_CTCN_DF.iterrows():
|
|
1221
|
+
#For every entry in this row...
|
|
1222
|
+
for j, value in row.items():
|
|
1223
|
+
#If the current index is below the carbon number limit of both the gas and liquid dataframes...
|
|
1224
|
+
if i <= len(LQCTCN_DF.index)-1 and i <= len(GSCTCN_DF.index)-1:
|
|
1225
|
+
total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j] + GSCTCN_DF.at[i,j]
|
|
1226
|
+
#Otherwise, if the current index is below the carbon number limit of only the liquid dataframe...
|
|
1227
|
+
elif i <= len(LQCTCN_DF.index)-1:
|
|
1228
|
+
total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j]
|
|
1229
|
+
#Otherwise, if the current index is below the carbon number limit of only the gas dataframe...
|
|
1230
|
+
elif i <= len(GSCTCN_DF.index)-1:
|
|
1231
|
+
total_CTCN_DF.at[i,j] = GSCTCN_DF.at[i,j]
|
|
1232
|
+
#Otherwise, pass
|
|
1233
|
+
else:
|
|
1234
|
+
pass
|
|
1235
|
+
|
|
1236
|
+
#Add the TCD data afterwards
|
|
1237
|
+
#Filter the TCD breakdown dataframe to only include entries with non-nan formulas
|
|
1238
|
+
GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF[GS_TCD_BreakdownDF['Formula'].notnull()]
|
|
1239
|
+
#Filter the TCD breakdown dataframe to only include formulas with carbon in them
|
|
1240
|
+
GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF_filter[(GS_TCD_BreakdownDF_filter['Formula'].str.contains('C')) & (GS_TCD_BreakdownDF_filter['Formula'].str.contains('H'))]
|
|
1241
|
+
|
|
1242
|
+
#For every row in this filtered TCD dataframe
|
|
1243
|
+
for i, row in GS_TCD_BreakdownDF_filter.iterrows():
|
|
1244
|
+
#Get a chemical formula dictionary for the row's formula
|
|
1245
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
1246
|
+
#If the carbon number is less than four...
|
|
1247
|
+
if chemFormDict['C'] < 4:
|
|
1248
|
+
#Assign the mass value to the linear entry for the given carbon number in the total dataframe
|
|
1249
|
+
total_CTCN_DF.at[chemFormDict['C']-1,'Linear Alkanes'] = row['Mass (mg)']
|
|
1250
|
+
#Otherwise, if the compound is isobutane...
|
|
1251
|
+
elif row['Compound Name'] == 'Isobutane':
|
|
1252
|
+
#Add the mass value to the branched entry for carbon number 4 in the total dataframe
|
|
1253
|
+
total_CTCN_DF.at[3,'Branched Alkanes'] = row['Mass (mg)']
|
|
1254
|
+
#Otherwise, if the compound is butane...
|
|
1255
|
+
elif row['Compound Name'] == 'n-Butane':
|
|
1256
|
+
#Add the mass value to the linear entry for carbon number 4 in the total dataframe
|
|
1257
|
+
total_CTCN_DF.at[3,'Linear Alkanes'] = row['Mass (mg)']
|
|
1258
|
+
#Otherwise, pass
|
|
1259
|
+
else:
|
|
1260
|
+
pass
|
|
1261
|
+
|
|
1262
|
+
#Insert the carbon number column to total_CTCN_DF
|
|
1263
|
+
total_CTCN_DF = insertCN(total_CTCN_DF)
|
|
1264
|
+
|
|
1265
|
+
#Otherwise, pass
|
|
1266
|
+
else:
|
|
1267
|
+
pass
|
|
1268
|
+
|
|
1269
|
+
""" BREAKDOWN SAVING """
|
|
1270
|
+
print("[AutoQuantification] Formatting and saving breakdown file...")
|
|
1271
|
+
#If breakdown directory does not exist within sample folder, create it
|
|
1272
|
+
if not os.path.exists(DFbreak_Dir):
|
|
1273
|
+
os.makedirs(DFbreak_Dir)
|
|
1274
|
+
|
|
1275
|
+
#Define breakdown file name
|
|
1276
|
+
bfn = sname+"_Breakdown_"+nows+".xlsx"
|
|
1277
|
+
|
|
1278
|
+
#Create pandas Excel writers
|
|
1279
|
+
writer = pd.ExcelWriter(fileCheck(os.path.join(DFbreak_Dir,bfn)), engine="xlsxwriter")
|
|
1280
|
+
|
|
1281
|
+
#Get dataframe for sample info
|
|
1282
|
+
sinfo_DF = pd.DataFrame(sinfo,index=[0])
|
|
1283
|
+
|
|
1284
|
+
#If the run liquid analysis Boolean is True..
|
|
1285
|
+
if lgTF[0]:
|
|
1286
|
+
#Position the liquid FID dataframes in the worksheet.
|
|
1287
|
+
sinfo_DF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=1, index=False)
|
|
1288
|
+
LQ_FID_BreakdownDF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=4, index=False)
|
|
1289
|
+
LQCT_DF.to_excel(writer, sheet_name="Liquid FID",startcol=16, startrow=7, index=False)
|
|
1290
|
+
LQCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=16, startrow=15, index=False)
|
|
1291
|
+
LQmass_DF.to_excel(writer, sheet_name="Liquid FID",startcol=22, startrow=1,index=False)
|
|
1292
|
+
LQCTCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=20, startrow=7, index=False)
|
|
1293
|
+
else:
|
|
1294
|
+
pass
|
|
1295
|
+
|
|
1296
|
+
#If the run gas analysis Boolean is True..
|
|
1297
|
+
if lgTF[1]:
|
|
1298
|
+
#Position the gas FID dataframes in the worksheet.
|
|
1299
|
+
sinfo_DF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=1, index=False)
|
|
1300
|
+
GS_FID_BreakdownDF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=4, index=False)
|
|
1301
|
+
GSCT_DF.to_excel(writer, sheet_name="Gas FID",startcol=18, startrow=7, index=False)
|
|
1302
|
+
GSCN_DF.to_excel(writer, sheet_name="Gas FID", startcol=18, startrow=15, index=False)
|
|
1303
|
+
GSmass_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=1,index=False)
|
|
1304
|
+
GSCTCN_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=7,index=False)
|
|
1305
|
+
|
|
1306
|
+
#Expand sample info dataframe to include total TCD mass and gas bag volume
|
|
1307
|
+
sinfo_DF.at[0,'Total product (mg)'] = GS_TCD_BreakdownDF['Mass (mg)'].sum()
|
|
1308
|
+
sinfo_DF.at[0,'Gas bag volume (m^3)'] = total_volume
|
|
1309
|
+
|
|
1310
|
+
#Position the gas TCD dataframes in the worksheet
|
|
1311
|
+
GS_TCD_BreakdownDF.to_excel(writer, sheet_name="Gas TCD",startcol=1,startrow=4, index=False)
|
|
1312
|
+
sinfo_DF.to_excel(writer, sheet_name="Gas TCD",startcol=1, startrow=1, index=False)
|
|
1313
|
+
else:
|
|
1314
|
+
pass
|
|
1315
|
+
|
|
1316
|
+
#If both the gas and liquid analysis Booleans are True..
|
|
1317
|
+
if lgTF[0] and lgTF[1]:
|
|
1318
|
+
#Position the total product dataframe in the worksheet
|
|
1319
|
+
total_CTCN_DF.to_excel(writer, sheet_name = "Total",startcol=1, startrow=1,index=False)
|
|
1320
|
+
|
|
1321
|
+
#Close the Excel writer
|
|
1322
|
+
writer.close()
|
|
1323
|
+
|
|
1324
|
+
#Log that a new Excel breakdown has been saved
|
|
1325
|
+
logger.info("New breakdown created: " + bfn)
|
|
1326
|
+
|
|
1327
|
+
print("[AutoQuantification] Matching complete.")
|
|
1328
|
+
#Close main function by returning
|
|
1329
|
+
return None
|