chromaquant 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1329 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ COPYRIGHT STATEMENT:
6
+
7
+ ChromaQuant – A quantification software for complex gas chromatographic data
8
+
9
+ Copyright (c) 2024, by Julia Hancock
10
+ Affiliation: Dr. Julie Elaine Rorrer
11
+ URL: https://www.rorrerlab.com/
12
+
13
+ License: BSD 3-Clause License
14
+
15
+ ---
16
+
17
+ SCRIPT TO QUANTIFY COMPOUNDS IN SAMPLE USING DEFINED RESPONSE FACTORS
18
+
19
+ Julia Hancock
20
+ Started 12/14/2023
21
+
22
+ """
23
+
24
+ """ PACKAGES """
25
+ import sys
26
+ import pandas as pd
27
+ import os
28
+ from molmass import Formula
29
+ import math
30
+ import numpy as np
31
+ from chemformula import ChemFormula
32
+ import json
33
+ from datetime import datetime
34
+ import logging
35
+ import openpyxl
36
+
37
+ """ QUANTIFICATION MAIN FUNCTION"""
38
+ def main_AutoQuantification(sname,quantphases,directories):
39
+
40
+ print("[AutoQuantification] Evaluating run parameters...")
41
+ #Write whether or not to run liquid and gas analysis based on system argument
42
+ if quantphases == "Liquid":
43
+ #Format is [Liquid Bool, Gas Bool]
44
+ lgTF = [True,False]
45
+ elif quantphases == "Gas":
46
+ lgTF = [False,True]
47
+ elif quantphases == "Liquid and Gas":
48
+ lgTF = [True,True]
49
+ else:
50
+ print("No phases selected, terminating script")
51
+ #Terminate script
52
+ sys.exit()
53
+
54
+ print("[AutoQuantification] Defining hard-coded analysis conditions...")
55
+ #Define retention time error within which TCD peaks may be assigned
56
+ peak_error = 0.5
57
+
58
+ #Define boolean describing whether or not an external standard was used for gas analysis
59
+ ES_bool = True
60
+
61
+ #Define temperature and pressure of gas bag used in sample injection
62
+ gasBag_temp = 18 #C
63
+ gasBag_pressure = 14.7 #psi
64
+
65
+
66
+ """ RESPONSE FACTOR INFO """
67
+ print("[AutoQuantification] Searching for response factors...")
68
+ #Liquid response factor file name
69
+ LRF_file = "LRF_7-24-24.xlsx"
70
+ #FID gas response factor file name
71
+ GRF_file = "FIDRF_7-24-24.csv"
72
+ #TCD gas response factor file name
73
+ GRFT_file = "TCDRF_7-24-24.csv"
74
+
75
+ """ DIRECTORIES """
76
+ print("[AutoQuantification] Finding directories...")
77
+
78
+ #Unpack directories from passed variable
79
+ #Primary files directory
80
+ files = directories['files']
81
+ #Resources directory
82
+ RE_Dir = directories['resources']
83
+ #Theme directory
84
+ theme_Dir = directories['theme']
85
+ #Response factor directory
86
+ RF_Dir = directories['rf']
87
+ #Data directory
88
+ DF_Dir = directories['data']
89
+ #Images directory
90
+ img_Dir = directories['images']
91
+ #Data file log directory
92
+ DFlog_Dir = os.path.join(DF_Dir,sname,"log")
93
+ #Data file breakdowns directory
94
+ DFbreak_Dir = os.path.join(DF_Dir,sname,"breakdowns")
95
+ #Raw data file directory
96
+ DFR_Dir = os.path.join(DF_Dir,sname,'raw data')
97
+
98
+ """ LOGGING """
99
+ print("[AutoQuantification] Initializing logging [WIP]...")
100
+ #Get current datetime
101
+ now = datetime.now()
102
+ #Get current datetime string
103
+ nows = now.strftime('%Y%m%d')
104
+
105
+ #If log directory does not exist within sample folder, create it
106
+ if not os.path.exists(DFlog_Dir):
107
+ os.makedirs(DFlog_Dir)
108
+
109
+ #Instantiate a logger
110
+ logger = logging.getLogger(__name__)
111
+ #Initialize logging file using current datetime
112
+ fh = logging.FileHandler(os.path.join(DFlog_Dir,'quantlog_'+nows+'.log'))
113
+ logger.addHandler(fh)
114
+ logger.propagate = False
115
+ #Set logging level
116
+ logger.setLevel(logging.INFO)
117
+ #Create a formatter and assign to logger
118
+ formatter = logging.Formatter('[%(filename)s] %(asctime)s - [%(levelname)s]: %(message)s')
119
+ fh.setFormatter(formatter)
120
+
121
+
122
+ """ LABELS """
123
+ print("[AutoQuantification] Defining chemical lumps and compound types...")
124
+ #Dictionary of all chemical lump abbreviations in use and their associated expansions
125
+ #OLD DICTIONARY
126
+ #CL_Dict = {'MBE':'Methyl benzenes', 'ABE':'Alkyl benzenes', 'NAP':'Napthalenes', 'MAL':'Methl alkanes',
127
+ # 'DAL':'Dimethyl alkanes','TAL':'Trimethyl alkanes','MCA':'Methyl cycloalkanes','ACA':'Alkyl cycloalkanes',
128
+ # 'AAL':'Alkyl alkanes','MAE':'Methyl alkenes','DAE':'Dimethyl alkenes','AAE':'Alkyl alkenes',
129
+ # 'LAL':'Linear alkanes','CAE':'Cycloalkenes','IND':'Indenes','PAH':'Polycyclic aromatic hydrocarbons',
130
+ # 'AKY':'Alkynes'}
131
+
132
+ #7-24-24: Could have removed the CL_Dict infrastructure, but nice to have in place in case we want to
133
+ #add more complexity to response factor assignment later
134
+
135
+ #Dictionary of all compound type abbreviations in use and their associated expansions
136
+ CL_Dict = {'A':'Aromatics','L':'Linear Alkanes','B':'Branched Alkanes',
137
+ 'C':'Cycloalkanes','E':'Alkenes/Alkynes'}
138
+
139
+ #Alphabetize lump abbreviation dictionary
140
+ CL_Dict = dict(sorted(CL_Dict.items()))
141
+
142
+ #Dictionary of all compound type abbreviations in use and their associated expansions
143
+ CT_Dict = {'A':'Aromatics','L':'Linear Alkanes','B':'Branched Alkanes',
144
+ 'C':'Cycloalkanes','E':'Alkenes/Alkynes','O':'Other'}
145
+
146
+ #Alphabetize compound type abbreviation dictionary
147
+ CT_Dict = dict(sorted(CT_Dict.items()))
148
+
149
+ """ FUNCTIONS """
150
+ print("[AutoQuantification] Defining functions...")
151
+ #Function for checking if file exists and adding number if so
152
+ def fileCheck(path):
153
+ #Inspired by https://stackoverflow.com/questions/13852700/create-file-but-if-name-exists-add-number
154
+ filename, extension = os.path.splitext(path)
155
+ i = 1
156
+
157
+ while os.path.exists(path):
158
+ path = filename + " ("+str(i)+")" + extension
159
+ i += 1
160
+
161
+ return path
162
+
163
+ #Function for quantifying liquid FID data
164
+ def liquidFID(BreakdownDF,DBRF,Label_info,sinfo):
165
+
166
+ #Unpack compound type and carbon number dictionaries from list
167
+ CL_Dict, CT_Dict = Label_info
168
+
169
+ """ FUNCTIONS """
170
+ #Function to assign compound type and carbon number to compound using formula
171
+ def assignCTCN(BreakdownDF,CT_dict):
172
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
173
+ for i, row in BreakdownDF.iterrows():
174
+ #If there exists a formula..
175
+ try:
176
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
177
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
178
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
179
+ chemFormDict = ChemFormula(row['Formula']).element
180
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
181
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
182
+ #Otherwise, pass
183
+ except:
184
+ pass
185
+
186
+ return BreakdownDF
187
+
188
+ #Function to assign response factor by carbon number and compound type
189
+ def assignRF(BreakdownDF,DBRF,CL_Dict):
190
+ """
191
+ Function takes a dataframe containing matched FID and MS peak information and
192
+ compares it against a provided response factor database to assign response
193
+ factors to the matched peak dataframe.
194
+
195
+ Parameters
196
+ ----------
197
+ BreakdownDF : DataFrame
198
+ Dataframe containing columns associated with matched FID and MS peak data
199
+
200
+ DBRF : Dataframe
201
+ Dataframe containing nested dataframes with associated chemical lumps,
202
+ likely imported from an excel sheet where each sheet is specific to
203
+ a given chemical lump. The top-level keys must be associated with the
204
+ predefined chemical lumps given in 'LABELS' section above
205
+
206
+ CL_Dict : Dict
207
+ Dictionary containing key:value pairs defined as
208
+ (chemical lump abbreviation):(full chemical lump name)
209
+
210
+ Returns
211
+ -------
212
+ BreakdownDF : DataFrame
213
+ Dataframe containing columns associated with matched FID and MS peak data
214
+
215
+ """
216
+ #Define an initial response factor
217
+ RF = 1
218
+
219
+ #Loop through every labelled peak in the breakdown DataFrame
220
+ for i, row in BreakdownDF.iterrows():
221
+ #Find the compound name, carbon number, and compound type abbreviation
222
+ cmp_name = row['Compound Name']
223
+ cmp_carbon = row['Carbon Number']
224
+ cmp_type = row['Compound Type Abbreviation']
225
+
226
+ #If any of these pieces of infomation is NAN, skip the row and set the RF Source accordingly
227
+ if pd.isna(cmp_name) or pd.isna(cmp_carbon) or pd.isna(cmp_type):
228
+ BreakdownDF.at[i,'RF Source'] = 'No RF assigned, at least one of the following were missing: compound name, formula, or type abbreviation'
229
+ pass
230
+
231
+ #Or, if the compound type is Other, "O", skip the row and set the RF source accordingly
232
+ elif cmp_type == "O":
233
+ BreakdownDF.at[i,'RF Source'] = 'No RF assigned, compound type is listed as "Other"'
234
+ pass
235
+
236
+ #Otherwise...
237
+ else:
238
+ #If the compound name is in the sheet corresponding to the compound type abbreviation..
239
+ if cmp_name in list(DBRF[cmp_type]['Compound Name'].values):
240
+
241
+ #Get the response factors sheet index where it is listed
242
+ dbrf_index = DBRF[cmp_type].index[DBRF[cmp_type]['Compound Name'] == cmp_name]
243
+
244
+ #Assign the listed response factor in the matched sheet to the RF variable
245
+ RF = DBRF[cmp_type].loc[dbrf_index,'Response Factor'].iloc[0]
246
+
247
+ #If the listed RF is nan...
248
+ if math.isnan(RF):
249
+ #Set the RF to 1
250
+ RF = 1
251
+ #Set the value for response factor in the breakdown dataframe to RF
252
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
253
+ #Set the RF source
254
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound found in RF sheet without RF'
255
+
256
+ #Otherwise...
257
+ else:
258
+ #Set the value for response factor in the breakdown dataframe to RF
259
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
260
+ #Set the RF source
261
+ BreakdownDF.at[i,'RF Source'] = 'Assigned empirical RF, exact compound found in response factors sheet'
262
+
263
+ #Otherwise, if the compound name is not in the sheet...
264
+ else:
265
+
266
+ #Get the m and b parameters listed in the RF linear fit for that compound type
267
+ fit_m = DBRF[cmp_type].loc[0,'Linear fit m']
268
+ fit_b = DBRF[cmp_type].loc[0,'Linear fit b']
269
+
270
+ #If both the m and b parameters are nan, assign a response factor of 1
271
+ if math.isnan(fit_m) and math.isnan(fit_b):
272
+ #Set the RF to 1
273
+ RF = 1
274
+ #Set the value for response factor in the breakdown dataframe to RF
275
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
276
+ #Set the RF source to
277
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound type does not have a carbon number fit'
278
+
279
+ #Otherwise, assign a response factor by carbon number
280
+ else:
281
+ #Get response factor using fit and carbon number
282
+ RF = fit_m*cmp_carbon+fit_b
283
+
284
+ #If the estimated response factor is negative or larger than 5, set RF to 1
285
+ if RF < 0 or RF > 5:
286
+ RF = 1
287
+ #Set the value for response factor in the breakdown dataframe to RF
288
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
289
+ #Set the RF source to "Assumed 1, estimated response factor exists but is out of range"
290
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, could estimate a response factor exists but is out of range (negative or over 5)'
291
+
292
+ #Otherwise...
293
+ else:
294
+ #Set the value for response factor in the breakdown dataframe to RF
295
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
296
+ #Set the RF source
297
+ BreakdownDF.at[i,'RF Source'] = 'Assigned using carbon number linear fit for compound type {0} and carbon number {1}'.format(cmp_type,int(cmp_carbon))
298
+
299
+ return BreakdownDF
300
+
301
+ def quantMain(BreakdownDF,sinfo):
302
+ """
303
+ Function that takes in matched FID and MS data with assigned response factors
304
+ and returns quantitative data
305
+
306
+ Parameters
307
+ ----------
308
+ BreakdownDF : DataFrame
309
+ Dataframe containing columns associated with matched FID and MS peak data.
310
+ IS_m : Int
311
+ Amount of internal standard added to sample in mg.
312
+ IS_name : Str
313
+ Name of internal standard added to sample
314
+
315
+ Returns
316
+ -------
317
+ BreakdownDF : DataFrame
318
+ Dataframe containing columns associated with matched FID and MS peak data.
319
+
320
+ """
321
+ #Get IS_m and IS_name from sinfo
322
+ IS_m, IS_name = [sinfo['Internal Standard Mass (mg)'],sinfo['Internal Standard Name']]
323
+ #Find the index where the internal standard is listed – if it's listed more than once, take the largest area peak
324
+ IS_index = BreakdownDF[BreakdownDF['Compound Name'] == IS_name]['FID Area'].idxmax()
325
+
326
+ #Get the FID area associated with the internal standard
327
+ IS_Area = BreakdownDF.at[IS_index,'FID Area']
328
+
329
+ #Loop through breakdown dataframe, calculating an area ratio and mass for each row
330
+ for i, row in BreakdownDF.iterrows():
331
+ #If the row's compound name is the internal standard name or either form of no match, skip the row
332
+ if row['Compound Name'] == IS_name or row['Compound Name'] == 'No match' or row['Compound Name'] == 'No Match':
333
+ pass
334
+ #Otherwise, continue
335
+ else:
336
+ #Calculate area ratio
337
+ Aratio = row['FID Area']/IS_Area
338
+ #Calculate mass using response factor column
339
+ m_i = Aratio*IS_m/row['Response Factor ((A_i/A_T)/(m_i/m_T))']
340
+ #Assign area ratio and mass to their respective columns in the breakdown dataframe
341
+ BreakdownDF.at[i,'A_i/A_T'] = Aratio
342
+ BreakdownDF.at[i,'m_i'] = m_i
343
+
344
+ return BreakdownDF
345
+
346
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
347
+ """
348
+ This function prepares further breakdown dictionaries for use in exporting to Excel
349
+
350
+ Parameters
351
+ ----------
352
+ BreakdownDF : DataFrame
353
+ Dataframe containing columns associated with matched FID and MS peak data.
354
+ CT_dict : Dict
355
+ Dictionary of all compound type abbreviations in use and their associated expansions
356
+ sinfo : Dict
357
+ Dictionary containing sample information.
358
+
359
+ Returns
360
+ -------
361
+ BreakdownDF : DataFrame
362
+ Dataframe containing columns associated with matched FID and MS peak data.
363
+
364
+ """
365
+
366
+ #Get the total mass of product from the breakdown dataframe
367
+ m_total = np.nansum(BreakdownDF['m_i'])
368
+
369
+ #Get maximum carbon number in breakdown dataframe
370
+ CN_max = int(BreakdownDF['Carbon Number'].max())
371
+
372
+ #Create a dataframe for saving quantitative results organized by compound type
373
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
374
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
375
+ 'Mass (mg)':np.empty(6),
376
+ 'Mass fraction':np.empty(6)})
377
+
378
+ #Create a dataframe for saving quantitative results organized by carbon number
379
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
380
+ 'Mass (mg)':np.empty(CN_max)})
381
+
382
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
383
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
384
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
385
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
386
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
387
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
388
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
389
+
390
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
391
+ for i, row in CT_DF.iterrows():
392
+
393
+ #Define a temporary dataframe which contains all rows matching the ith compound type
394
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
395
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
396
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
397
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
398
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
399
+
400
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
401
+ for i, row in CN_DF.iterrows():
402
+
403
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
404
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
405
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
406
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
407
+
408
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
409
+ for i, row in CTCN_DF.iterrows():
410
+
411
+ #For every entry in row
412
+ for j in row.index:
413
+
414
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
415
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
416
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
417
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['m_i'])
418
+
419
+
420
+ #Get total masses from CT, CN, and CTCN dataframes
421
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
422
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
423
+ CTCN_mass = np.nansum(CTCN_DF)
424
+
425
+ #Create total mass dataframe
426
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
427
+
428
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
429
+
430
+ """ BREAKDOWN FORMATION """
431
+
432
+ #Use the assignCTCN function to assign compound type and carbon number
433
+ BreakdownDF = assignCTCN(BreakdownDF,CT_Dict)
434
+
435
+ #Use the assignRF function to assign response factors, preferring empirical RF's to estimated ones and assigning 1 when no other RF can be applied
436
+ BreakdownDF = assignRF(BreakdownDF,DBRF,CL_Dict)
437
+
438
+ #Use the quantMain function to add quantitative data to BreakdownDF
439
+ BreakdownDF = quantMain(BreakdownDF,sinfo)
440
+
441
+ #Use the moreBreakdown function to prepare compound type and carbon number breakdowns
442
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF,CT_Dict,sinfo)
443
+
444
+ return [BreakdownDF,CT_DF,CN_DF,CTCN_DF,mass_DF,]
445
+
446
+ #Function for quantifying gas TCD data w/o external standard
447
+ def gasTCD(BreakdownDF,DBRF,sinfo,peak_error):
448
+
449
+ #Add min and max peak assignment values to DBRF
450
+ for i, row in DBRF.iterrows():
451
+ DBRF.at[i,'RT Max'] = DBRF.at[i,'RT (min)'] + peak_error
452
+ DBRF.at[i,'RT Min'] = DBRF.at[i,'RT (min)'] - peak_error
453
+
454
+ #Unpack sinfo to get local variables
455
+ vol = sinfo['Reactor Volume (mL)'] #reactor volume, mL
456
+ pressure = sinfo['Quench Pressure (psi)'] #sample pressure, psi
457
+ temp = sinfo['Quench Temperature (C)'] #sample temperature, C
458
+
459
+ #Convert sinfo variables to new units
460
+ vol = vol / 10**6 #reactor volume, m^3
461
+ pressure = pressure / 14.504*100000 #reactor pressure, Pa
462
+ temp = temp + 273.15 #reactor temperature, K
463
+
464
+ #Define ideal gas constant, m^3*Pa/K*mol
465
+ R = 8.314
466
+
467
+ #Iterate through every row in BreakdownDF
468
+ for i, row in BreakdownDF.iterrows():
469
+
470
+ #Iterate through every row in DBRF
471
+ for i2, row2 in DBRF.iterrows():
472
+
473
+ #If the TCD response factor is within the range for a given DBRF entry..
474
+ if row2['RT Min'] <= row['RT'] <= row2['RT Max']:
475
+
476
+ #Add the compound name to the breakdown dataframe
477
+ BreakdownDF.at[i,'Compound Name'] = row2['Compound Name']
478
+
479
+ #Add the other relevant information to the breakdown dataframe
480
+ BreakdownDF.at[i,'Formula'] = row2['Formula']
481
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = row2['RF']
482
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row2['Formula']).formula_weight
483
+
484
+ #Get volume percent using response factor
485
+ BreakdownDF.at[i,'Vol.%'] = row['Area']/row2['RF']
486
+
487
+ #Get moles using ideal gas law (PV=nRT)
488
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*vol*pressure/(temp*R)
489
+
490
+ #Get mass (mg) using moles and molar mass
491
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
492
+
493
+ #Otherwise, pass
494
+ else:
495
+ pass
496
+
497
+ return BreakdownDF, DBRF, [vol, pressure, temp]
498
+
499
+ #Function for quantifying gas TCD data w/ external standard
500
+ def gasTCD_ES(BreakdownDF,DBRF,sinfo,gasBag_cond,peak_error):
501
+
502
+ #Unpack gas bag conditions
503
+ temp = gasBag_cond[0] #temperature of gas bag, C
504
+ pressure = gasBag_cond[1] #sample pressure in gas bag, psi
505
+
506
+ #Initialize compound name column in BreakdownDF
507
+ BreakdownDF['Compound Name'] = 'None'
508
+
509
+ #Function to find if CO2 peak exists
510
+ def getCO2(BreakdownDF,DBRF,TCD_cond,peak_error):
511
+
512
+ #Unpack TCD conditions
513
+ co2 = TCD_cond[0]
514
+ pressure = TCD_cond[1]
515
+ temp = TCD_cond[2]
516
+ R = TCD_cond[3]
517
+
518
+ #Find the CO2 peak row in DBRF
519
+ CO2_row = DBRF.loc[DBRF['Compound Name'] == "Carbon Dioxide"].iloc[0]
520
+
521
+ #Get the retention time
522
+ CO2_RT = CO2_row['RT (min)']
523
+
524
+ #Get the minimum and maximum of the RT range using the peak error
525
+ CO2_RTmin = CO2_RT - peak_error
526
+ CO2_RTmax = CO2_RT + peak_error
527
+
528
+ #Define boolean describing whether or not CO2 match has been found
529
+ CO2_bool = False
530
+ #Define volume estimate
531
+ volume = 0
532
+
533
+ #Iterate through every row in BreakdownDF
534
+ for i, row in BreakdownDF.iterrows():
535
+
536
+ #If the TCD retention time is within range of the CO2 entry...
537
+ if CO2_RTmin <= row['RT'] <= CO2_RTmax:
538
+
539
+ #Add the compound name to the breakdown dataframe
540
+ BreakdownDF.at[i,'Compound Name'] = 'Carbon Dioxide'
541
+
542
+ #Add the other relevant information to the breakdown dataframe
543
+ BreakdownDF.at[i,'Formula'] = 'CO2'
544
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = CO2_row['RF']
545
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula('CO2').formula_weight
546
+
547
+ #Get volume percent using response factor
548
+ volpercent = row['Area']/CO2_row['RF']
549
+ BreakdownDF.at[i,'Vol.%'] = volpercent
550
+
551
+ #Calculate total volume using volume percent
552
+ volume = co2 * 100 / volpercent #total volume, m^3
553
+
554
+ #Assign CO2 volume
555
+ BreakdownDF.at[i,'Volume (m^3)'] = co2
556
+
557
+ #Get moles using ideal gas law (PV=nRT)
558
+ BreakdownDF.at[i,'Moles (mol)'] = co2*pressure/(temp*R)
559
+
560
+ #Get mass (mg) using moles and molar mass
561
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles (mol)'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
562
+
563
+ #Set CO2_bool to True
564
+ CO2_bool = True
565
+
566
+ break
567
+
568
+ #Otherwise, pass
569
+ else:
570
+ pass
571
+
572
+ return CO2_bool, volume, BreakdownDF
573
+
574
+ #Add min and max peak assignment values to DBRF
575
+ for i, row in DBRF.iterrows():
576
+ DBRF.at[i,'RT Max'] = DBRF.at[i,'RT (min)'] + peak_error
577
+ DBRF.at[i,'RT Min'] = DBRF.at[i,'RT (min)'] - peak_error
578
+
579
+ #Unpack sinfo to get CO2 injection volume
580
+ co2 = sinfo['Injected CO2 (mL)'] #volume injected CO2, mL
581
+
582
+ #Convert sinfo variables to new units
583
+ co2 = co2 / 10**6 #volume injected CO2, mL
584
+ temp = temp + 273.15 #reactor temperature, K
585
+ pressure = pressure / 14.504*100000 #reactor pressure, Pa
586
+
587
+ #Define ideal gas constant, m^3*Pa/K*mol
588
+ R = 8.314
589
+
590
+ #Define variable to total volume (m^3)
591
+ volume = 0
592
+
593
+ #Define list of conditions
594
+ TCD_cond = [co2,pressure,temp,R]
595
+
596
+ #Check if there is a peak in the BreakdownDF that can be assigned to CO2
597
+ CO2_bool, volume, BreakdownDF = getCO2(BreakdownDF,DBRF,TCD_cond,peak_error)
598
+
599
+ if CO2_bool:
600
+ #Iterate through every row in BreakdownDF
601
+ for i, row in BreakdownDF.iterrows():
602
+
603
+ #Iterate through every row in DBRF
604
+ for i2, row2 in DBRF.iterrows():
605
+
606
+ #If the TCD retention time is within the range for a given DBRF entry...
607
+ if row2['RT Min'] <= row['RT'] <= row2['RT Max']:
608
+
609
+ #Add the compound name to the breakdown dataframe
610
+ BreakdownDF.at[i,'Compound Name'] = row2['Compound Name']
611
+
612
+ #Add the other relevant information to the breakdown dataframe
613
+ BreakdownDF.at[i,'Formula'] = row2['Formula']
614
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = row2['RF']
615
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row2['Formula']).formula_weight
616
+
617
+ #Get volume percent using response factor
618
+ volpercent = row['Area']/row2['RF']
619
+ BreakdownDF.at[i,'Vol.%'] = volpercent
620
+
621
+ #Get volume using volume percent
622
+ vol = volume*volpercent/100
623
+ BreakdownDF.at[i,'Volume (m^3)'] = vol
624
+
625
+ #Get moles using ideal gas law (PV=nRT)
626
+ BreakdownDF.at[i,'Moles (mol)'] = vol*pressure/(temp*R)
627
+
628
+ #Get mass (mg) using moles and molar mass
629
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles (mol)'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
630
+
631
+ #Otherwise, pass
632
+ else:
633
+ pass
634
+ #Otherwise, pass
635
+ else:
636
+ pass
637
+
638
+ return BreakdownDF, DBRF, volume, TCD_cond
639
+
640
+ #Function for quantifying gas FID data w/o external standard
641
+ def gasFID(BreakdownDF,DBRF,Label_info,sinfo,cutoff=4):
642
+ """
643
+ Function quantifies gas FID data and returns a breakdown dataframe
644
+
645
+ Parameters
646
+ ----------
647
+ BreakdownDF : DataFrame
648
+ Dataframe containing columns associated with matched FID and MS peak data
649
+ DBRF : Dataframe
650
+ Dataframe containing nested dataframes with associated chemical lumps,
651
+ likely imported from an excel sheet where each sheet is specific to
652
+ a given chemical lump. The top-level keys must be associated with the
653
+ predefined chemical lumps given in 'LABELS' section above
654
+ Label_info : List
655
+ List of dictionaries containing chemical lump and compound type abbreviations
656
+ sinfo : Dict
657
+ Dictionary containing key sample information
658
+ cutoff : Integer, optional
659
+ Integer representing the maximum cutoff carbon number that can be
660
+ quantified using FID.The default is 4.
661
+
662
+ Returns
663
+ -------
664
+ BreakdownDF : DataFrame
665
+ Dataframe containing columns associated with matched FID and MS peak data
666
+
667
+ """
668
+ #Function for assigning response factors to compounds
669
+ def assignRF(BreakdownDF,DBRF):
670
+
671
+ #Get a dictionary of average response factors by carbon number
672
+ avgRF = {}
673
+ #Loop through every carbon number up to the max in DBRF
674
+ for i in range(1,DBRF['Carbon Number'].max()+1):
675
+ #Get a slice of all rows in DBRF with a given carbon number
676
+ slicer = DBRF.loc[DBRF['Carbon Number']==i]
677
+ #Average the response factor entries in this slice, appending the result to the average RF dictionary
678
+ avgRF['{0}'.format(i)] = slicer['RF'].mean()
679
+
680
+ #Loop through every row in the FIDpMS dataframe
681
+ for i, row in BreakdownDF.iterrows():
682
+ #Check that the formula is not nan
683
+ if not pd.isna(row['Formula']):
684
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
685
+ chemFormDict = ChemFormula(row['Formula']).element
686
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
687
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
688
+
689
+ #If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
690
+ if row['Compound Name'] in DBRF['Compound Name'].values:
691
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
692
+ #Assign response factor source
693
+ BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
694
+ #Otherwise, assign response factor based on average carbon number RF
695
+ else:
696
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
697
+ #Assign response factor source
698
+ BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
699
+ #Otherwise if the row's formula is nan, pass
700
+ else:
701
+ pass
702
+
703
+
704
+ return BreakdownDF
705
+
706
+ #Function for quantifying compounds using ideal gas law
707
+ def gasQuant(BreakdownDF,DBRF,sinfo,cutoff):
708
+
709
+ #Remove columns in BreakdownDF with a carbon number at or below cutoff
710
+ BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
711
+
712
+ #Unpack sinfo to get local variables
713
+ vol = sinfo['Reactor Volume (mL)'] #reactor volume, mL
714
+ pressure = sinfo['Quench Pressure (psi)'] #sample pressure, psi
715
+ temp = sinfo['Quench Temperature (C)'] #sample temperature, C
716
+
717
+ #Convert sinfo variables to new units
718
+ vol = vol / 10**6 #reactor volume, m^3
719
+ pressure = pressure / 14.504*100000 #reactor pressure, Pa
720
+ temp = temp + 273.15 #reactor temperature, K
721
+
722
+ #Define ideal gas constant, m^3*Pa/K*mol
723
+ R = 8.314
724
+
725
+ #Loop through every row in BreakdownDF
726
+ for i, row in BreakdownDF.iterrows():
727
+
728
+ #Add molecular weight using ChemFormula
729
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
730
+
731
+ #Get volume percent using response factor
732
+ BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
733
+
734
+ #Get moles using ideal gas law (PV=nRT)
735
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*vol*pressure/(temp*R)
736
+
737
+ #Get mass (mg) using moles and molar mass
738
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
739
+
740
+ return BreakdownDF
741
+
742
+ #Function for further breaking down product distribution
743
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
744
+ """
745
+ This function prepares further breakdown dictionaries for use in exporting to Excel
746
+
747
+ Parameters
748
+ ----------
749
+ BreakdownDF : DataFrame
750
+ Dataframe containing columns associated with matched FID and MS peak data.
751
+ CT_dict : Dict
752
+ Dictionary of all compound type abbreviations in use and their associated expansions
753
+ sinfo : Dict
754
+ Dictionary containing sample information.
755
+
756
+ Returns
757
+ -------
758
+ BreakdownDF : DataFrame
759
+ Dataframe containing columns associated with matched FID and MS peak data.
760
+
761
+ """
762
+
763
+ #Get the total mass of product from the breakdown dataframe
764
+ m_total = np.nansum(BreakdownDF['Mass (mg)'])
765
+
766
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
767
+ for i, row in BreakdownDF.iterrows():
768
+ #If there exists a formula..
769
+ try:
770
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
771
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
772
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
773
+ chemFormDict = ChemFormula(row['Formula']).element
774
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
775
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
776
+ #Otherwise, pass
777
+ except:
778
+ pass
779
+
780
+ #Get maximum carbon number in breakdown dataframe
781
+ CN_max = int(BreakdownDF['Carbon Number'].max())
782
+
783
+ #Create a dataframe for saving quantitative results organized by compound type
784
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
785
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
786
+ 'Mass (mg)':np.empty(6),
787
+ 'Mass fraction':np.empty(6)})
788
+
789
+ #Create a dataframe for saving quantitative results organized by carbon number
790
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
791
+ 'Mass (mg)':np.empty(CN_max)})
792
+
793
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
794
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
795
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
796
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
797
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
798
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
799
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
800
+
801
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
802
+ for i, row in CT_DF.iterrows():
803
+
804
+ #Define a temporary dataframe which contains all rows matching the ith compound type
805
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
806
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
807
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
808
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
809
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
810
+
811
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
812
+ for i, row in CN_DF.iterrows():
813
+
814
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
815
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
816
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
817
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
818
+
819
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
820
+ for i, row in CTCN_DF.iterrows():
821
+
822
+ #For every entry in row
823
+ for j in row.index:
824
+
825
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
826
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
827
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
828
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
829
+
830
+
831
+ #Get total masses from CT, CN, and CTCN dataframes
832
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
833
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
834
+ CTCN_mass = np.nansum(CTCN_DF)
835
+
836
+ #Create total mass dataframe
837
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
838
+
839
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
840
+
841
+ #Unpack compound type and carbon number dictionaries from list
842
+ CL_Dict, CT_Dict = Label_info
843
+
844
+ #Filter dataframe to remove compounds that do not contain carbon
845
+ BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
846
+ #Reset the dataframe index
847
+ BreakdownDF.reset_index()
848
+
849
+ #Run response factor assignment function
850
+ BreakdownDF = assignRF(BreakdownDF, DBRF)
851
+ #Run gas quantification function
852
+ BreakdownDF = gasQuant(BreakdownDF,DBRF,sinfo,cutoff)
853
+ #Run further breakdown function
854
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict, sinfo)
855
+
856
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
857
+
858
+ #Function for quantifying gas FID data w/ external standard
859
+ def gasFID_ES(BreakdownDF,DBRF,Label_info,sinfo,gasBag_cond,total_volume,cutoff=4):
860
+ """
861
+ Function quantifies gas FID data and returns a breakdown dataframe
862
+
863
+ Parameters
864
+ ----------
865
+ BreakdownDF : DataFrame
866
+ Dataframe containing columns associated with matched FID and MS peak data
867
+ DBRF : Dataframe
868
+ Dataframe containing nested dataframes with associated chemical lumps,
869
+ likely imported from an excel sheet where each sheet is specific to
870
+ a given chemical lump. The top-level keys must be associated with the
871
+ predefined chemical lumps given in 'LABELS' section above
872
+ Label_info : List
873
+ List of dictionaries containing chemical lump and compound type abbreviations
874
+ sinfo : Dict
875
+ Dictionary containing key sample information
876
+ total_volume : Float
877
+ Float describing the total amount of gas estimated by the external standard volume percent
878
+ cutoff : Integer, optional
879
+ Integer representing the maximum cutoff carbon number that can be
880
+ quantified using FID.The default is 4.
881
+
882
+ Returns
883
+ -------
884
+ BreakdownDF : DataFrame
885
+ Dataframe containing columns associated with matched FID and MS peak data
886
+
887
+ """
888
+ #Function for assigning response factors to compounds
889
+ def assignRF(BreakdownDF,DBRF):
890
+
891
+ #Get a dictionary of average response factors by carbon number
892
+ avgRF = {}
893
+ #Loop through every carbon number up to the max in DBRF
894
+ for i in range(1,DBRF['Carbon Number'].max()+1):
895
+ #Get a slice of all rows in DBRF with a given carbon number
896
+ slicer = DBRF.loc[DBRF['Carbon Number']==i]
897
+ #Average the response factor entries in this slice, appending the result to the average RF dictionary
898
+ avgRF['{0}'.format(i)] = slicer['RF'].mean()
899
+
900
+ #Loop through every row in the FIDpMS dataframe
901
+ for i, row in BreakdownDF.iterrows():
902
+ #Check that the formula is not nan
903
+ if not pd.isna(row['Formula']):
904
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
905
+ chemFormDict = ChemFormula(row['Formula']).element
906
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
907
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
908
+
909
+ #If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
910
+ if row['Compound Name'] in DBRF['Compound Name'].values:
911
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
912
+ #Assign response factor source
913
+ BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
914
+ #Otherwise, assign response factor based on average carbon number RF
915
+ else:
916
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
917
+ #Assign response factor source
918
+ BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
919
+ #Otherwise if the row's formula is nan, pass
920
+ else:
921
+ pass
922
+
923
+
924
+ return BreakdownDF
925
+
926
+ #Function for quantifying compounds using ideal gas law
927
+ def gasQuant(BreakdownDF,DBRF,sinfo,total_volume,cutoff):
928
+
929
+ #Remove rows in BreakdownDF with a carbon number at or below cutoff
930
+ BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
931
+
932
+ #Get gas bag conditions
933
+ temp = gasBag_cond[0] #temperature of gas bag, C
934
+ pressure = gasBag_cond[1] #sample pressure in gas bag, psi
935
+
936
+ #Convert sinfo variables to new units
937
+ temp = temp + 273.15 #gas bag temperature, K
938
+ pressure = pressure / 14.504*100000 #gas bag pressure, Pa
939
+
940
+ #Define ideal gas constant, m^3*Pa/K*mol
941
+ R = 8.314
942
+
943
+ #Loop through every row in BreakdownDF
944
+ for i, row in BreakdownDF.iterrows():
945
+
946
+ #Add molecular weight using ChemFormula
947
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
948
+
949
+ #Get volume percent using response factor
950
+ BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
951
+
952
+ #Get moles using ideal gas law (PV=nRT)
953
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*total_volume*pressure/(temp*R)
954
+
955
+ #Get mass (mg) using moles and molar mass
956
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
957
+
958
+ return BreakdownDF
959
+
960
+ #Function for further breaking down product distribution
961
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
962
+ """
963
+ This function prepares further breakdown dictionaries for use in exporting to Excel
964
+
965
+ Parameters
966
+ ----------
967
+ BreakdownDF : DataFrame
968
+ Dataframe containing columns associated with matched FID and MS peak data.
969
+ CT_dict : Dict
970
+ Dictionary of all compound type abbreviations in use and their associated expansions
971
+ sinfo : Dict
972
+ Dictionary containing sample information.
973
+
974
+ Returns
975
+ -------
976
+ BreakdownDF : DataFrame
977
+ Dataframe containing columns associated with matched FID and MS peak data.
978
+
979
+ """
980
+
981
+ #Get the total mass of product from the breakdown dataframe
982
+ m_total = np.nansum(BreakdownDF['Mass (mg)'])
983
+
984
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
985
+ for i, row in BreakdownDF.iterrows():
986
+ #If there exists a formula..
987
+ try:
988
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
989
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
990
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
991
+ chemFormDict = ChemFormula(row['Formula']).element
992
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
993
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
994
+ #Otherwise, pass
995
+ except:
996
+ pass
997
+
998
+ #Get maximum carbon number in breakdown dataframe
999
+ CN_max = int(BreakdownDF['Carbon Number'].max())
1000
+
1001
+ #Create a dataframe for saving quantitative results organized by compound type
1002
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
1003
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
1004
+ 'Mass (mg)':np.empty(6),
1005
+ 'Mass fraction':np.empty(6)})
1006
+
1007
+ #Create a dataframe for saving quantitative results organized by carbon number
1008
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
1009
+ 'Mass (mg)':np.empty(CN_max)})
1010
+
1011
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
1012
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
1013
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
1014
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1015
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1016
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1017
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
1018
+
1019
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
1020
+ for i, row in CT_DF.iterrows():
1021
+
1022
+ #Define a temporary dataframe which contains all rows matching the ith compound type
1023
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
1024
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
1025
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
1026
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
1027
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
1028
+
1029
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
1030
+ for i, row in CN_DF.iterrows():
1031
+
1032
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
1033
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
1034
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
1035
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
1036
+
1037
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
1038
+ for i, row in CTCN_DF.iterrows():
1039
+
1040
+ #For every entry in row
1041
+ for j in row.index:
1042
+
1043
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
1044
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
1045
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
1046
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
1047
+
1048
+
1049
+ #Get total masses from CT, CN, and CTCN dataframes
1050
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
1051
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
1052
+ CTCN_mass = np.nansum(CTCN_DF)
1053
+
1054
+ #Create total mass dataframe
1055
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
1056
+
1057
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
1058
+
1059
+ #Unpack compound type and carbon number dictionaries from list
1060
+ CL_Dict, CT_Dict = Label_info
1061
+
1062
+ #Filter dataframe to remove compounds that do not contain carbon
1063
+ BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
1064
+ #Reset the dataframe index
1065
+ BreakdownDF.reset_index()
1066
+
1067
+ #Run response factor assignment function
1068
+ BreakdownDF = assignRF(BreakdownDF, DBRF)
1069
+ #Run gas quantification function
1070
+ BreakdownDF = gasQuant(BreakdownDF,DBRF,sinfo,total_volume,cutoff)
1071
+ #Run further breakdown function
1072
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict, sinfo)
1073
+
1074
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
1075
+
1076
+ #Define function that inserts a column to a CTCN Dataframe labeling the carbon number
1077
+ def insertCN(CTCN_DF):
1078
+
1079
+ #Get the length of the dataframe, take this to be the maximum carbon number
1080
+ CN_max = len(CTCN_DF)
1081
+
1082
+ #Get a list of carbon numbers for each row
1083
+ CN_list = [i for i in range(1,CN_max+1)]
1084
+
1085
+ #Insert this list as a new column at the beginning of the dataframe
1086
+ CTCN_DF.insert(loc=0, column='Carbon Number', value=CN_list)
1087
+
1088
+ return CTCN_DF
1089
+
1090
+ """ DATA IMPORTS """
1091
+ print("[AutoQuantification] Importing data...")
1092
+ #Import sample information from json file
1093
+ with open(os.path.join(DF_Dir,sname,sname+'_INFO.json')) as sinfo_f:
1094
+ sinfo = json.load(sinfo_f)
1095
+
1096
+ #Change ISO date-time strings into datetime objects
1097
+ sinfo['Start Time'] = datetime.fromisoformat(sinfo['Start Time'])
1098
+ sinfo['End Time'] = datetime.fromisoformat(sinfo['End Time'])
1099
+
1100
+ #Calculate a reaction time using the start, end, and heat time values and add to sinfo
1101
+ sinfo['Reaction Time'] = abs(sinfo['End Time']-sinfo['Start Time']).total_seconds()/3600 - sinfo['Heat Time']
1102
+
1103
+ #Dictionary of substrings to add to sample name to create file names
1104
+ sub_Dict = {'Gas TCD+FID':['_GS2_TCD_CSO.csv'],
1105
+ 'Gas Labelled MS Peaks':['_GS1_UA_Comp_UPP.csv'],
1106
+ 'Gas FID+MS':['_GS2_FIDpMS.csv'],
1107
+ 'Liquid FID':['_LQ1_FID_CSO.csv'],
1108
+ 'Liquid Labelled MS Peaks':['_LQ1_UA_Comp_UPP'],
1109
+ 'Liquid FID+MS':['_LQ1_FIDpMS.csv']}
1110
+
1111
+ #Use sample name to form file names using sub_Dict and append full pathnames for all entries
1112
+ for key in sub_Dict:
1113
+ sub_Dict[key] = [sub_Dict[key][0],os.path.join(DFR_Dir,sname+sub_Dict[key][0])]
1114
+
1115
+
1116
+ #If the run liquid analysis Boolean is True..
1117
+ if lgTF[0]:
1118
+ #DEFINE DIRECTORIES FOR LIQUID FID QUANTIFICATION
1119
+ #Define directory for liquid matched MS and FID peaks
1120
+ DIR_LQ1_FIDpMS = sub_Dict['Liquid FID+MS'][1]
1121
+ #Define directory for liquid response factors
1122
+ DIR_LQRF = os.path.join(RF_Dir,LRF_file)
1123
+
1124
+ #Read matched peak data between liquid FID and MS
1125
+ LQ1_FIDpMS = pd.read_csv(DIR_LQ1_FIDpMS)
1126
+
1127
+ #Filter FIDpMS to only include rows with non-NaN compounds
1128
+ LQ1_FIDpMS_Filtered = LQ1_FIDpMS[LQ1_FIDpMS['Compound Name'].notnull()].reset_index(drop=True)
1129
+
1130
+ #Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
1131
+ LQ_FID_BreakdownDF = LQ1_FIDpMS_Filtered.copy()
1132
+
1133
+ #Read liquid response factors data
1134
+ LQRF = {i:pd.read_excel(DIR_LQRF,sheet_name=i) for i in CL_Dict.keys()}
1135
+ else:
1136
+ pass
1137
+
1138
+ #If the run gas analysis Boolean is True..
1139
+ if lgTF[1]:
1140
+ #DEFINE DIRECTORIES FOR GAS TCD AND FID QUANTIFICATION
1141
+ #Define directory for gas TCD peaks
1142
+ DIR_GS2_TCD = sub_Dict['Gas TCD+FID'][1]
1143
+ #Define directory for gas FID peaks
1144
+ DIR_GS2_FIDpMS = sub_Dict['Gas FID+MS'][1]
1145
+ #Define directory for gas TCD response factors
1146
+ DIR_TCDRF = os.path.join(RF_Dir,GRFT_file)
1147
+ #Define directory for gas FID response factors
1148
+ DIR_FIDRF = os.path.join(RF_Dir,GRF_file)
1149
+
1150
+ #Read gas FID and TCD Peak data
1151
+ GS2_TCD = pd.read_csv(DIR_GS2_TCD)
1152
+
1153
+ #Create a duplicate of the gas TCD/FID dataframe for future saving as a breakdown
1154
+ #Also filter breakdown dataframe to only include rows sourced from TCD
1155
+ GS_TCD_BreakdownDF = GS2_TCD.loc[GS2_TCD['Signal Name'] == 'TCD2B'].copy()
1156
+
1157
+ #Read matched peak data between gas FID and MS
1158
+ GS2_FIDpMS = pd.read_csv(DIR_GS2_FIDpMS)
1159
+
1160
+ #Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
1161
+ GS_FID_BreakdownDF = GS2_FIDpMS.copy()
1162
+
1163
+ #Read gas TCD response factors data
1164
+ TCDRF = pd.read_csv(DIR_TCDRF)
1165
+ #Read gas FID response factors data
1166
+ GSRF = pd.read_csv(DIR_FIDRF)
1167
+
1168
+ else:
1169
+ pass
1170
+
1171
+ """ MAIN SCRIPT """
1172
+
1173
+ #If the run liquid analysis Boolean is True..
1174
+ if lgTF[0]:
1175
+ print("[AutoQuantification] Analyzing liquids...")
1176
+ #Get liquid FID breakdown and miscellaneous dataframes
1177
+ LQ_FID_BreakdownDF, LQCT_DF, LQCN_DF, LQCTCN_DF, LQmass_DF = liquidFID(LQ_FID_BreakdownDF, LQRF, [CL_Dict, CT_Dict], sinfo)
1178
+
1179
+ #Insert the carbon number column to LQCTCN_DF
1180
+ LQCTCN_DF = insertCN(LQCTCN_DF)
1181
+
1182
+ #If the run gas analysis Boolean is True..
1183
+ if lgTF[1]:
1184
+ print("[AutoQuantification] Analyzing gases...")
1185
+ #If the external standard Boolean is True..
1186
+ if ES_bool:
1187
+ #Get gas TCD breakdown and miscellaneous dataframes
1188
+ GS_TCD_BreakdownDF, TCDRF, total_volume, TCD_cond = gasTCD_ES(GS_TCD_BreakdownDF,TCDRF,sinfo,[gasBag_temp,gasBag_pressure],peak_error)
1189
+
1190
+ #Get gas FID breakdown and miscellaneous dataframes
1191
+ GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = gasFID_ES(GS_FID_BreakdownDF,GSRF,[CL_Dict, CT_Dict], sinfo,[gasBag_temp,gasBag_pressure],total_volume)
1192
+ #Otherwise..
1193
+ else:
1194
+ #Get gas TCD breakdown and miscellaneous dataframes
1195
+ GS_TCD_BreakdownDF, TCDRF, TCD_cond = gasTCD(GS_TCD_BreakdownDF,TCDRF,sinfo,peak_error)
1196
+
1197
+ #Get gas FID breakdown and miscellaneous dataframes
1198
+ GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = gasFID(GS_FID_BreakdownDF,GSRF,[CL_Dict, CT_Dict], sinfo)
1199
+
1200
+ #Insert the carbon number column to GSCTCN_DF
1201
+ GSCTCN_DF = insertCN(GSCTCN_DF)
1202
+
1203
+ #If both the gas and liquid analysis Booleans are True..
1204
+ if lgTF[0] and lgTF[1]:
1205
+ print("[AutoQuantification] Totaling contributions from liquid and gas phases...")
1206
+ #Get maximum carbon number between breakdown dataframes
1207
+ CN_max = max([int(GS_FID_BreakdownDF['Carbon Number'].max()),int(LQ_FID_BreakdownDF['Carbon Number'].max())])
1208
+
1209
+ #Sum the liquid and gas breakdown carbon number and compound type dataframes
1210
+ #Initiate an empty CTCN dataframe
1211
+ total_CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
1212
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
1213
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1214
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1215
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1216
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
1217
+
1218
+
1219
+ #For every row in this sum dataframe...
1220
+ for i, row in total_CTCN_DF.iterrows():
1221
+ #For every entry in this row...
1222
+ for j, value in row.items():
1223
+ #If the current index is below the carbon number limit of both the gas and liquid dataframes...
1224
+ if i <= len(LQCTCN_DF.index)-1 and i <= len(GSCTCN_DF.index)-1:
1225
+ total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j] + GSCTCN_DF.at[i,j]
1226
+ #Otherwise, if the current index is below the carbon number limit of only the liquid dataframe...
1227
+ elif i <= len(LQCTCN_DF.index)-1:
1228
+ total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j]
1229
+ #Otherwise, if the current index is below the carbon number limit of only the gas dataframe...
1230
+ elif i <= len(GSCTCN_DF.index)-1:
1231
+ total_CTCN_DF.at[i,j] = GSCTCN_DF.at[i,j]
1232
+ #Otherwise, pass
1233
+ else:
1234
+ pass
1235
+
1236
+ #Add the TCD data afterwards
1237
+ #Filter the TCD breakdown dataframe to only include entries with non-nan formulas
1238
+ GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF[GS_TCD_BreakdownDF['Formula'].notnull()]
1239
+ #Filter the TCD breakdown dataframe to only include formulas with carbon in them
1240
+ GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF_filter[(GS_TCD_BreakdownDF_filter['Formula'].str.contains('C')) & (GS_TCD_BreakdownDF_filter['Formula'].str.contains('H'))]
1241
+
1242
+ #For every row in this filtered TCD dataframe
1243
+ for i, row in GS_TCD_BreakdownDF_filter.iterrows():
1244
+ #Get a chemical formula dictionary for the row's formula
1245
+ chemFormDict = ChemFormula(row['Formula']).element
1246
+ #If the carbon number is less than four...
1247
+ if chemFormDict['C'] < 4:
1248
+ #Assign the mass value to the linear entry for the given carbon number in the total dataframe
1249
+ total_CTCN_DF.at[chemFormDict['C']-1,'Linear Alkanes'] = row['Mass (mg)']
1250
+ #Otherwise, if the compound is isobutane...
1251
+ elif row['Compound Name'] == 'Isobutane':
1252
+ #Add the mass value to the branched entry for carbon number 4 in the total dataframe
1253
+ total_CTCN_DF.at[3,'Branched Alkanes'] = row['Mass (mg)']
1254
+ #Otherwise, if the compound is butane...
1255
+ elif row['Compound Name'] == 'n-Butane':
1256
+ #Add the mass value to the linear entry for carbon number 4 in the total dataframe
1257
+ total_CTCN_DF.at[3,'Linear Alkanes'] = row['Mass (mg)']
1258
+ #Otherwise, pass
1259
+ else:
1260
+ pass
1261
+
1262
+ #Insert the carbon number column to total_CTCN_DF
1263
+ total_CTCN_DF = insertCN(total_CTCN_DF)
1264
+
1265
+ #Otherwise, pass
1266
+ else:
1267
+ pass
1268
+
1269
+ """ BREAKDOWN SAVING """
1270
+ print("[AutoQuantification] Formatting and saving breakdown file...")
1271
+ #If breakdown directory does not exist within sample folder, create it
1272
+ if not os.path.exists(DFbreak_Dir):
1273
+ os.makedirs(DFbreak_Dir)
1274
+
1275
+ #Define breakdown file name
1276
+ bfn = sname+"_Breakdown_"+nows+".xlsx"
1277
+
1278
+ #Create pandas Excel writers
1279
+ writer = pd.ExcelWriter(fileCheck(os.path.join(DFbreak_Dir,bfn)), engine="xlsxwriter")
1280
+
1281
+ #Get dataframe for sample info
1282
+ sinfo_DF = pd.DataFrame(sinfo,index=[0])
1283
+
1284
+ #If the run liquid analysis Boolean is True..
1285
+ if lgTF[0]:
1286
+ #Position the liquid FID dataframes in the worksheet.
1287
+ sinfo_DF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=1, index=False)
1288
+ LQ_FID_BreakdownDF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=4, index=False)
1289
+ LQCT_DF.to_excel(writer, sheet_name="Liquid FID",startcol=16, startrow=7, index=False)
1290
+ LQCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=16, startrow=15, index=False)
1291
+ LQmass_DF.to_excel(writer, sheet_name="Liquid FID",startcol=22, startrow=1,index=False)
1292
+ LQCTCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=20, startrow=7, index=False)
1293
+ else:
1294
+ pass
1295
+
1296
+ #If the run gas analysis Boolean is True..
1297
+ if lgTF[1]:
1298
+ #Position the gas FID dataframes in the worksheet.
1299
+ sinfo_DF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=1, index=False)
1300
+ GS_FID_BreakdownDF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=4, index=False)
1301
+ GSCT_DF.to_excel(writer, sheet_name="Gas FID",startcol=18, startrow=7, index=False)
1302
+ GSCN_DF.to_excel(writer, sheet_name="Gas FID", startcol=18, startrow=15, index=False)
1303
+ GSmass_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=1,index=False)
1304
+ GSCTCN_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=7,index=False)
1305
+
1306
+ #Expand sample info dataframe to include total TCD mass and gas bag volume
1307
+ sinfo_DF.at[0,'Total product (mg)'] = GS_TCD_BreakdownDF['Mass (mg)'].sum()
1308
+ sinfo_DF.at[0,'Gas bag volume (m^3)'] = total_volume
1309
+
1310
+ #Position the gas TCD dataframes in the worksheet
1311
+ GS_TCD_BreakdownDF.to_excel(writer, sheet_name="Gas TCD",startcol=1,startrow=4, index=False)
1312
+ sinfo_DF.to_excel(writer, sheet_name="Gas TCD",startcol=1, startrow=1, index=False)
1313
+ else:
1314
+ pass
1315
+
1316
+ #If both the gas and liquid analysis Booleans are True..
1317
+ if lgTF[0] and lgTF[1]:
1318
+ #Position the total product dataframe in the worksheet
1319
+ total_CTCN_DF.to_excel(writer, sheet_name = "Total",startcol=1, startrow=1,index=False)
1320
+
1321
+ #Close the Excel writer
1322
+ writer.close()
1323
+
1324
+ #Log that a new Excel breakdown has been saved
1325
+ logger.info("New breakdown created: " + bfn)
1326
+
1327
+ print("[AutoQuantification] Matching complete.")
1328
+ #Close main function by returning
1329
+ return None