chromaquant 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1305 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ COPYRIGHT STATEMENT:
6
+
7
+ ChromaQuant – A quantification software for complex gas chromatographic data
8
+
9
+ Copyright (c) 2024, by Julia Hancock
10
+ Affiliation: Dr. Julie Elaine Rorrer
11
+ URL: https://www.rorrerlab.com/
12
+
13
+ License: BSD 3-Clause License
14
+
15
+ ---
16
+
17
+ SCRIPT TO QUANTIFY COMPOUNDS IN SAMPLE USING DEFINED RESPONSE FACTORS
18
+
19
+ Julia Hancock
20
+ Started 12/14/2023
21
+ Updated 7/24/2024
22
+
23
+ """
24
+
25
+ """ PACKAGES """
26
+ import sys
27
+ import pandas as pd
28
+ import os
29
+ from molmass import Formula
30
+ import math
31
+ import numpy as np
32
+ from chemformula import ChemFormula
33
+ import json
34
+ from datetime import datetime
35
+ import logging
36
+
37
+ """ SAMPLE INFO """
38
+ #Write sample name
39
+ sname = "example2"
40
+
41
+ #Write whether or not to run liquid and gas analysis
42
+ lgTF = [True,True]
43
+
44
+ #Define retention time error within which TCD peaks may be assigned
45
+ peak_error = 0.5
46
+
47
+ #Define boolean describing whether or not an external standard was used for gas analysis
48
+ ES_bool = True
49
+
50
+ #Define temperature and pressure of gas bag used in sample injection
51
+ gasBag_temp = 18 #C
52
+ gasBag_pressure = 14.7 #psi
53
+
54
+
55
+ """ RESPONSE FACTOR INFO """
56
+ #Liquid response factor file name
57
+ LRF_file = "LRF_7-24-24.xlsx"
58
+ #FID gas response factor file name
59
+ GRF_file = "FIDRF_7-24-24.csv"
60
+ #TCD gas response factor file name
61
+ GRFT_file = "TCDRF_7-24-24.csv"
62
+
63
+ """ DIRECTORIES """
64
+
65
+ #Main directory
66
+ cwd = "/Users/connards/Desktop/University/Rorrer Lab/Scripts/AutoQuant/"
67
+ #Data files directory
68
+ DF_Dir = cwd+"data/"+sname+"/"
69
+ #Raw data files directory
70
+ DFR_Dir = DF_Dir+"raw data/"
71
+ #Response factors directory
72
+ RF_Dir = cwd+"response-factors/"
73
+ #Resources directory
74
+ RE_Dir = cwd+"resources/"
75
+ #Data file log directory
76
+ DFlog_Dir = DF_Dir+"log/"
77
+ #Data file breakdowns directory
78
+ DFbreak_Dir = DF_Dir+"breakdowns/"
79
+
80
+ """ LOGGING """
81
+ #Get current datetime
82
+ now = datetime.now()
83
+ #Get current datetime string
84
+ nows = now.strftime('%Y%m%d')
85
+
86
+ #If log directory does not exist within sample folder, create it
87
+ if not os.path.exists(DFlog_Dir):
88
+ os.makedirs(DFlog_Dir)
89
+
90
+ #Instantiate a logger
91
+ logger = logging.getLogger(__name__)
92
+ #Initialize logging file using current datetime
93
+ fh = logging.FileHandler(DFlog_Dir+'quantlog_'+nows+'.log')
94
+ logger.addHandler(fh)
95
+ logger.propagate = False
96
+ #Set logging level
97
+ logger.setLevel(logging.INFO)
98
+ #Create a formatter and assign to logger
99
+ formatter = logging.Formatter('[%(filename)s] %(asctime)s - [%(levelname)s]: %(message)s')
100
+ fh.setFormatter(formatter)
101
+
102
+
103
+ """ LABELS """
104
+
105
+ #Dictionary of all chemical lump abbreviations in use and their associated expansions
106
+ #OLD DICTIONARY
107
+ #CL_Dict = {'MBE':'Methyl benzenes', 'ABE':'Alkyl benzenes', 'NAP':'Napthalenes', 'MAL':'Methl alkanes',
108
+ # 'DAL':'Dimethyl alkanes','TAL':'Trimethyl alkanes','MCA':'Methyl cycloalkanes','ACA':'Alkyl cycloalkanes',
109
+ # 'AAL':'Alkyl alkanes','MAE':'Methyl alkenes','DAE':'Dimethyl alkenes','AAE':'Alkyl alkenes',
110
+ # 'LAL':'Linear alkanes','CAE':'Cycloalkenes','IND':'Indenes','PAH':'Polycyclic aromatic hydrocarbons',
111
+ # 'AKY':'Alkynes'}
112
+
113
+ #7-24-24: Could have removed the CL_Dict infrastructure, but nice to have in place in case we want to
114
+ #add more complexity to response factor assignment later
115
+
116
+ #Dictionary of all compound type abbreviations in use and their associated expansions
117
+ CL_Dict = {'A':'Aromatics','L':'Linear Alkanes','B':'Branched Alkanes',
118
+ 'C':'Cycloalkanes','E':'Alkenes/Alkynes'}
119
+
120
+ #Alphabetize lump abbreviation dictionary
121
+ CL_Dict = dict(sorted(CL_Dict.items()))
122
+
123
+ #Dictionary of all compound type abbreviations in use and their associated expansions
124
+ CT_Dict = {'A':'Aromatics','L':'Linear Alkanes','B':'Branched Alkanes',
125
+ 'C':'Cycloalkanes','E':'Alkenes/Alkynes','O':'Other'}
126
+
127
+ #Alphabetize compound type abbreviation dictionary
128
+ CT_Dict = dict(sorted(CT_Dict.items()))
129
+
130
+ """ FUNCTIONS """
131
+
132
+ #Function for checking if file exists and adding number if so
133
+ def fileCheck(path):
134
+ #Inspired by https://stackoverflow.com/questions/13852700/create-file-but-if-name-exists-add-number
135
+ filename, extension = os.path.splitext(path)
136
+ i = 1
137
+
138
+ while os.path.exists(path):
139
+ path = filename + " ("+str(i)+")" + extension
140
+ i += 1
141
+
142
+ return path
143
+
144
+ #Function for quantifying liquid FID data
145
+ def liquidFID(BreakdownDF,DBRF,Label_info,sinfo):
146
+
147
+ #Unpack compound type and carbon number dictionaries from list
148
+ CL_Dict, CT_Dict = Label_info
149
+
150
+ """ FUNCTIONS """
151
+ #Function to assign compound type and carbon number to compound using formula
152
+ def assignCTCN(BreakdownDF,CT_dict):
153
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
154
+ for i, row in BreakdownDF.iterrows():
155
+ #If there exists a formula..
156
+ try:
157
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
158
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
159
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
160
+ chemFormDict = ChemFormula(row['Formula']).element
161
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
162
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
163
+ #Otherwise, pass
164
+ except:
165
+ pass
166
+
167
+ return BreakdownDF
168
+
169
+ #Function to assign response factor by carbon number and compound type
170
+ def assignRF(BreakdownDF,DBRF,CL_Dict):
171
+ """
172
+ Function takes a dataframe containing matched FID and MS peak information and
173
+ compares it against a provided response factor database to assign response
174
+ factors to the matched peak dataframe.
175
+
176
+ Parameters
177
+ ----------
178
+ BreakdownDF : DataFrame
179
+ Dataframe containing columns associated with matched FID and MS peak data
180
+
181
+ DBRF : Dataframe
182
+ Dataframe containing nested dataframes with associated chemical lumps,
183
+ likely imported from an excel sheet where each sheet is specific to
184
+ a given chemical lump. The top-level keys must be associated with the
185
+ predefined chemical lumps given in 'LABELS' section above
186
+
187
+ CL_Dict : Dict
188
+ Dictionary containing key:value pairs defined as
189
+ (chemical lump abbreviation):(full chemical lump name)
190
+
191
+ Returns
192
+ -------
193
+ BreakdownDF : DataFrame
194
+ Dataframe containing columns associated with matched FID and MS peak data
195
+
196
+ """
197
+ #Define an initial response factor
198
+ RF = 1
199
+
200
+ #Loop through every labelled peak in the breakdown DataFrame
201
+ for i, row in BreakdownDF.iterrows():
202
+ #Find the compound name, carbon number, and compound type abbreviation
203
+ cmp_name = row['Compound Name']
204
+ cmp_carbon = row['Carbon Number']
205
+ cmp_type = row['Compound Type Abbreviation']
206
+
207
+ #If any of these pieces of infomation is NAN, skip the row and set the RF Source accordingly
208
+ if pd.isna(cmp_name) or pd.isna(cmp_carbon) or pd.isna(cmp_type):
209
+ BreakdownDF.at[i,'RF Source'] = 'No RF assigned, at least one of the following were missing: compound name, formula, or type abbreviation'
210
+ pass
211
+
212
+ #Or, if the compound type is Other, "O", skip the row and set the RF source accordingly
213
+ elif cmp_type == "O":
214
+ BreakdownDF.at[i,'RF Source'] = 'No RF assigned, compound type is listed as "Other"'
215
+ pass
216
+
217
+ #Otherwise...
218
+ else:
219
+ #If the compound name is in the sheet corresponding to the compound type abbreviation..
220
+ if cmp_name in list(DBRF[cmp_type]['Compound Name'].values):
221
+
222
+ #Get the response factors sheet index where it is listed
223
+ dbrf_index = DBRF[cmp_type].index[DBRF[cmp_type]['Compound Name'] == cmp_name]
224
+
225
+ #Assign the listed response factor in the matched sheet to the RF variable
226
+ RF = DBRF[cmp_type].loc[dbrf_index,'Response Factor'].iloc[0]
227
+
228
+ #If the listed RF is nan...
229
+ if math.isnan(RF):
230
+ #Set the RF to 1
231
+ RF = 1
232
+ #Set the value for response factor in the breakdown dataframe to RF
233
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
234
+ #Set the RF source
235
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound found in RF sheet without RF'
236
+
237
+ #Otherwise...
238
+ else:
239
+ #Set the value for response factor in the breakdown dataframe to RF
240
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
241
+ #Set the RF source
242
+ BreakdownDF.at[i,'RF Source'] = 'Assigned empirical RF, exact compound found in response factors sheet'
243
+
244
+ #Otherwise, if the compound name is not in the sheet...
245
+ else:
246
+
247
+ #Get the m and b parameters listed in the RF linear fit for that compound type
248
+ fit_m = DBRF[cmp_type].loc[0,'Linear fit m']
249
+ fit_b = DBRF[cmp_type].loc[0,'Linear fit b']
250
+
251
+ #If both the m and b parameters are nan, assign a response factor of 1
252
+ if math.isnan(fit_m) and math.isnan(fit_b):
253
+ #Set the RF to 1
254
+ RF = 1
255
+ #Set the value for response factor in the breakdown dataframe to RF
256
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
257
+ #Set the RF source to
258
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, compound type does not have a carbon number fit'
259
+
260
+ #Otherwise, assign a response factor by carbon number
261
+ else:
262
+ #Get response factor using fit and carbon number
263
+ RF = fit_m*cmp_carbon+fit_b
264
+
265
+ #If the estimated response factor is negative or larger than 5, set RF to 1
266
+ if RF < 0 or RF > 5:
267
+ RF = 1
268
+ #Set the value for response factor in the breakdown dataframe to RF
269
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
270
+ #Set the RF source to "Assumed 1, estimated response factor exists but is out of range"
271
+ BreakdownDF.at[i,'RF Source'] = 'Assumed 1, could estimate a response factor exists but is out of range (negative or over 5)'
272
+
273
+ #Otherwise...
274
+ else:
275
+ #Set the value for response factor in the breakdown dataframe to RF
276
+ BreakdownDF.at[i,'Response Factor ((A_i/A_T)/(m_i/m_T))'] = RF
277
+ #Set the RF source
278
+ BreakdownDF.at[i,'RF Source'] = 'Assigned using carbon number linear fit for compound type {0} and carbon number {1}'.format(cmp_type,int(cmp_carbon))
279
+
280
+ return BreakdownDF
281
+
282
+ def quantMain(BreakdownDF,sinfo):
283
+ """
284
+ Function that takes in matched FID and MS data with assigned response factors
285
+ and returns quantitative data
286
+
287
+ Parameters
288
+ ----------
289
+ BreakdownDF : DataFrame
290
+ Dataframe containing columns associated with matched FID and MS peak data.
291
+ IS_m : Int
292
+ Amount of internal standard added to sample in mg.
293
+ IS_name : Str
294
+ Name of internal standard added to sample
295
+
296
+ Returns
297
+ -------
298
+ BreakdownDF : DataFrame
299
+ Dataframe containing columns associated with matched FID and MS peak data.
300
+
301
+ """
302
+ #Get IS_m and IS_name from sinfo
303
+ IS_m, IS_name = [sinfo['Internal Standard Mass (mg)'],sinfo['Internal Standard Name']]
304
+ #Find the index where the internal standard is listed – if it's listed more than once, take the largest area peak
305
+ IS_index = BreakdownDF[BreakdownDF['Compound Name'] == IS_name]['FID Area'].idxmax()
306
+
307
+ #Get the FID area associated with the internal standard
308
+ IS_Area = BreakdownDF.at[IS_index,'FID Area']
309
+
310
+ #Loop through breakdown dataframe, calculating an area ratio and mass for each row
311
+ for i, row in BreakdownDF.iterrows():
312
+ #If the row's compound name is the internal standard name or either form of no match, skip the row
313
+ if row['Compound Name'] == IS_name or row['Compound Name'] == 'No match' or row['Compound Name'] == 'No Match':
314
+ pass
315
+ #Otherwise, continue
316
+ else:
317
+ #Calculate area ratio
318
+ Aratio = row['FID Area']/IS_Area
319
+ #Calculate mass using response factor column
320
+ m_i = Aratio*IS_m/row['Response Factor ((A_i/A_T)/(m_i/m_T))']
321
+ #Assign area ratio and mass to their respective columns in the breakdown dataframe
322
+ BreakdownDF.at[i,'A_i/A_T'] = Aratio
323
+ BreakdownDF.at[i,'m_i'] = m_i
324
+
325
+ return BreakdownDF
326
+
327
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
328
+ """
329
+ This function prepares further breakdown dictionaries for use in exporting to Excel
330
+
331
+ Parameters
332
+ ----------
333
+ BreakdownDF : DataFrame
334
+ Dataframe containing columns associated with matched FID and MS peak data.
335
+ CT_dict : Dict
336
+ Dictionary of all compound type abbreviations in use and their associated expansions
337
+ sinfo : Dict
338
+ Dictionary containing sample information.
339
+
340
+ Returns
341
+ -------
342
+ BreakdownDF : DataFrame
343
+ Dataframe containing columns associated with matched FID and MS peak data.
344
+
345
+ """
346
+
347
+ #Get the total mass of product from the breakdown dataframe
348
+ m_total = np.nansum(BreakdownDF['m_i'])
349
+
350
+ #Get maximum carbon number in breakdown dataframe
351
+ CN_max = int(BreakdownDF['Carbon Number'].max())
352
+
353
+ #Create a dataframe for saving quantitative results organized by compound type
354
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
355
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
356
+ 'Mass (mg)':np.empty(6),
357
+ 'Mass fraction':np.empty(6)})
358
+
359
+ #Create a dataframe for saving quantitative results organized by carbon number
360
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
361
+ 'Mass (mg)':np.empty(CN_max)})
362
+
363
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
364
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
365
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
366
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
367
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
368
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
369
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
370
+
371
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
372
+ for i, row in CT_DF.iterrows():
373
+
374
+ #Define a temporary dataframe which contains all rows matching the ith compound type
375
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
376
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
377
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
378
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
379
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
380
+
381
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
382
+ for i, row in CN_DF.iterrows():
383
+
384
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
385
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
386
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
387
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['m_i'])
388
+
389
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
390
+ for i, row in CTCN_DF.iterrows():
391
+
392
+ #For every entry in row
393
+ for j in row.index:
394
+
395
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
396
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
397
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
398
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['m_i'])
399
+
400
+
401
+ #Get total masses from CT, CN, and CTCN dataframes
402
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
403
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
404
+ CTCN_mass = np.nansum(CTCN_DF)
405
+
406
+ #Create total mass dataframe
407
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
408
+
409
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
410
+
411
+ """ BREAKDOWN FORMATION """
412
+
413
+ #Use the assignCTCN function to assign compound type and carbon number
414
+ BreakdownDF = assignCTCN(BreakdownDF,CT_Dict)
415
+
416
+ #Use the assignRF function to assign response factors, preferring empirical RF's to estimated ones and assigning 1 when no other RF can be applied
417
+ BreakdownDF = assignRF(BreakdownDF,DBRF,CL_Dict)
418
+
419
+ #Use the quantMain function to add quantitative data to BreakdownDF
420
+ BreakdownDF = quantMain(BreakdownDF,sinfo)
421
+
422
+ #Use the moreBreakdown function to prepare compound type and carbon number breakdowns
423
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF,CT_Dict,sinfo)
424
+
425
+ return [BreakdownDF,CT_DF,CN_DF,CTCN_DF,mass_DF,]
426
+
427
+ #Function for quantifying gas TCD data w/o external standard
428
+ def gasTCD(BreakdownDF,DBRF,sinfo,peak_error):
429
+
430
+ #Add min and max peak assignment values to DBRF
431
+ for i, row in DBRF.iterrows():
432
+ DBRF.at[i,'RT Max'] = DBRF.at[i,'RT (min)'] + peak_error
433
+ DBRF.at[i,'RT Min'] = DBRF.at[i,'RT (min)'] - peak_error
434
+
435
+ #Unpack sinfo to get local variables
436
+ vol = sinfo['Reactor Volume (mL)'] #reactor volume, mL
437
+ pressure = sinfo['Quench Pressure (psi)'] #sample pressure, psi
438
+ temp = sinfo['Quench Temperature (C)'] #sample temperature, C
439
+
440
+ #Convert sinfo variables to new units
441
+ vol = vol / 10**6 #reactor volume, m^3
442
+ pressure = pressure / 14.504*100000 #reactor pressure, Pa
443
+ temp = temp + 273.15 #reactor temperature, K
444
+
445
+ #Define ideal gas constant, m^3*Pa/K*mol
446
+ R = 8.314
447
+
448
+ #Iterate through every row in BreakdownDF
449
+ for i, row in BreakdownDF.iterrows():
450
+
451
+ #Iterate through every row in DBRF
452
+ for i2, row2 in DBRF.iterrows():
453
+
454
+ #If the TCD response factor is within the range for a given DBRF entry..
455
+ if row2['RT Min'] <= row['RT'] <= row2['RT Max']:
456
+
457
+ #Add the compound name to the breakdown dataframe
458
+ BreakdownDF.at[i,'Compound Name'] = row2['Compound Name']
459
+
460
+ #Add the other relevant information to the breakdown dataframe
461
+ BreakdownDF.at[i,'Formula'] = row2['Formula']
462
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = row2['RF']
463
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row2['Formula']).formula_weight
464
+
465
+ #Get volume percent using response factor
466
+ BreakdownDF.at[i,'Vol.%'] = row['Area']/row2['RF']
467
+
468
+ #Get moles using ideal gas law (PV=nRT)
469
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*vol*pressure/(temp*R)
470
+
471
+ #Get mass (mg) using moles and molar mass
472
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
473
+
474
+ #Otherwise, pass
475
+ else:
476
+ pass
477
+
478
+ return BreakdownDF, DBRF, [vol, pressure, temp]
479
+
480
+ #Function for quantifying gas TCD data w/ external standard
481
+ def gasTCD_ES(BreakdownDF,DBRF,sinfo,gasBag_cond,peak_error):
482
+
483
+ #Unpack gas bag conditions
484
+ temp = gasBag_cond[0] #temperature of gas bag, C
485
+ pressure = gasBag_cond[1] #sample pressure in gas bag, psi
486
+
487
+ #Function to find if CO2 peak exists
488
+ def getCO2(BreakdownDF,DBRF,TCD_cond,peak_error):
489
+
490
+ #Unpack TCD conditions
491
+ co2 = TCD_cond[0]
492
+ pressure = TCD_cond[1]
493
+ temp = TCD_cond[2]
494
+ R = TCD_cond[3]
495
+
496
+ #Find the CO2 peak row in DBRF
497
+ CO2_row = DBRF.loc[DBRF['Compound Name'] == "Carbon Dioxide"].iloc[0]
498
+
499
+ #Get the retention time
500
+ CO2_RT = CO2_row['RT (min)']
501
+
502
+ #Get the minimum and maximum of the RT range using the peak error
503
+ CO2_RTmin = CO2_RT - peak_error
504
+ CO2_RTmax = CO2_RT + peak_error
505
+
506
+ #Define boolean describing whether or not CO2 match has been found
507
+ CO2_bool = False
508
+ #Define volume estimate
509
+ volume = 0
510
+
511
+ #Iterate through every row in BreakdownDF
512
+ for i, row in BreakdownDF.iterrows():
513
+
514
+ #If the TCD retention time is within range of the CO2 entry...
515
+ if CO2_RTmin <= row['RT'] <= CO2_RTmax:
516
+
517
+ #Add the compound name to the breakdown dataframe
518
+ BreakdownDF.at[i,'Compound Name'] = 'Carbon Dioxide'
519
+
520
+ #Add the other relevant information to the breakdown dataframe
521
+ BreakdownDF.at[i,'Formula'] = 'CO2'
522
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = CO2_row['RF']
523
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula('CO2').formula_weight
524
+
525
+ #Get volume percent using response factor
526
+ volpercent = row['Area']/CO2_row['RF']
527
+ BreakdownDF.at[i,'Vol.%'] = volpercent
528
+
529
+ #Calculate total volume using volume percent
530
+ volume = co2 * 100 / volpercent #total volume, m^3
531
+
532
+ #Assign CO2 volume
533
+ BreakdownDF.at[i,'Volume (m^3)'] = co2
534
+
535
+ #Get moles using ideal gas law (PV=nRT)
536
+ BreakdownDF.at[i,'Moles (mol)'] = co2*pressure/(temp*R)
537
+
538
+ #Get mass (mg) using moles and molar mass
539
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles (mol)'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
540
+
541
+ #Set CO2_bool to True
542
+ CO2_bool = True
543
+
544
+ break
545
+
546
+ #Otherwise, pass
547
+ else:
548
+ pass
549
+
550
+ return CO2_bool, volume, BreakdownDF
551
+
552
+ #Add min and max peak assignment values to DBRF
553
+ for i, row in DBRF.iterrows():
554
+ DBRF.at[i,'RT Max'] = DBRF.at[i,'RT (min)'] + peak_error
555
+ DBRF.at[i,'RT Min'] = DBRF.at[i,'RT (min)'] - peak_error
556
+
557
+ #Unpack sinfo to get CO2 injection volume
558
+ co2 = sinfo['Injected CO2 (mL)'] #volume injected CO2, mL
559
+
560
+ #Convert sinfo variables to new units
561
+ co2 = co2 / 10**6 #volume injected CO2, mL
562
+ temp = temp + 273.15 #reactor temperature, K
563
+ pressure = pressure / 14.504*100000 #reactor pressure, Pa
564
+
565
+ #Define ideal gas constant, m^3*Pa/K*mol
566
+ R = 8.314
567
+
568
+ #Define variable to total volume (m^3)
569
+ volume = 0
570
+
571
+ #Define list of conditions
572
+ TCD_cond = [co2,pressure,temp,R]
573
+
574
+ #Check if there is a peak in the BreakdownDF that can be assigned to CO2
575
+ CO2_bool, volume, BreakdownDF = getCO2(BreakdownDF,DBRF,TCD_cond,peak_error)
576
+
577
+ if CO2_bool:
578
+ #Iterate through every row in BreakdownDF
579
+ for i, row in BreakdownDF.iterrows():
580
+
581
+ #Iterate through every row in DBRF
582
+ for i2, row2 in DBRF.iterrows():
583
+
584
+ #If the TCD retention time is within the range for a given DBRF entry...
585
+ if row2['RT Min'] <= row['RT'] <= row2['RT Max']:
586
+
587
+ #Add the compound name to the breakdown dataframe
588
+ BreakdownDF.at[i,'Compound Name'] = row2['Compound Name']
589
+
590
+ #Add the other relevant information to the breakdown dataframe
591
+ BreakdownDF.at[i,'Formula'] = row2['Formula']
592
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = row2['RF']
593
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row2['Formula']).formula_weight
594
+
595
+ #Get volume percent using response factor
596
+ volpercent = row['Area']/row2['RF']
597
+ BreakdownDF.at[i,'Vol.%'] = volpercent
598
+
599
+ #Get volume using volume percent
600
+ vol = volume*volpercent/100
601
+ BreakdownDF.at[i,'Volume (m^3)'] = vol
602
+
603
+ #Get moles using ideal gas law (PV=nRT)
604
+ BreakdownDF.at[i,'Moles (mol)'] = vol*pressure/(temp*R)
605
+
606
+ #Get mass (mg) using moles and molar mass
607
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles (mol)'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
608
+
609
+ #Otherwise, pass
610
+ else:
611
+ pass
612
+ #Otherwise, pass
613
+ else:
614
+ pass
615
+
616
+ return BreakdownDF, DBRF, volume, TCD_cond
617
+
618
+ #Function for quantifying gas FID data w/o external standard
619
+ def gasFID(BreakdownDF,DBRF,Label_info,sinfo,cutoff=4):
620
+ """
621
+ Function quantifies gas FID data and returns a breakdown dataframe
622
+
623
+ Parameters
624
+ ----------
625
+ BreakdownDF : DataFrame
626
+ Dataframe containing columns associated with matched FID and MS peak data
627
+ DBRF : Dataframe
628
+ Dataframe containing nested dataframes with associated chemical lumps,
629
+ likely imported from an excel sheet where each sheet is specific to
630
+ a given chemical lump. The top-level keys must be associated with the
631
+ predefined chemical lumps given in 'LABELS' section above
632
+ Label_info : List
633
+ List of dictionaries containing chemical lump and compound type abbreviations
634
+ sinfo : Dict
635
+ Dictionary containing key sample information
636
+ cutoff : Integer, optional
637
+ Integer representing the maximum cutoff carbon number that can be
638
+ quantified using FID.The default is 4.
639
+
640
+ Returns
641
+ -------
642
+ BreakdownDF : DataFrame
643
+ Dataframe containing columns associated with matched FID and MS peak data
644
+
645
+ """
646
+ #Function for assigning response factors to compounds
647
+ def assignRF(BreakdownDF,DBRF):
648
+
649
+ #Get a dictionary of average response factors by carbon number
650
+ avgRF = {}
651
+ #Loop through every carbon number up to the max in DBRF
652
+ for i in range(1,DBRF['Carbon Number'].max()+1):
653
+ #Get a slice of all rows in DBRF with a given carbon number
654
+ slicer = DBRF.loc[DBRF['Carbon Number']==i]
655
+ #Average the response factor entries in this slice, appending the result to the average RF dictionary
656
+ avgRF['{0}'.format(i)] = slicer['RF'].mean()
657
+
658
+ #Loop through every row in the FIDpMS dataframe
659
+ for i, row in BreakdownDF.iterrows():
660
+ #Check that the formula is not nan
661
+ if not pd.isna(row['Formula']):
662
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
663
+ chemFormDict = ChemFormula(row['Formula']).element
664
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
665
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
666
+
667
+ #If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
668
+ if row['Compound Name'] in DBRF['Compound Name'].values:
669
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
670
+ #Assign response factor source
671
+ BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
672
+ #Otherwise, assign response factor based on average carbon number RF
673
+ else:
674
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
675
+ #Assign response factor source
676
+ BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
677
+ #Otherwise if the row's formula is nan, pass
678
+ else:
679
+ pass
680
+
681
+
682
+ return BreakdownDF
683
+
684
+ #Function for quantifying compounds using ideal gas law
685
+ def gasQuant(BreakdownDF,DBRF,sinfo,cutoff):
686
+
687
+ #Remove columns in BreakdownDF with a carbon number at or below cutoff
688
+ BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
689
+
690
+ #Unpack sinfo to get local variables
691
+ vol = sinfo['Reactor Volume (mL)'] #reactor volume, mL
692
+ pressure = sinfo['Quench Pressure (psi)'] #sample pressure, psi
693
+ temp = sinfo['Quench Temperature (C)'] #sample temperature, C
694
+
695
+ #Convert sinfo variables to new units
696
+ vol = vol / 10**6 #reactor volume, m^3
697
+ pressure = pressure / 14.504*100000 #reactor pressure, Pa
698
+ temp = temp + 273.15 #reactor temperature, K
699
+
700
+ #Define ideal gas constant, m^3*Pa/K*mol
701
+ R = 8.314
702
+
703
+ #Loop through every row in BreakdownDF
704
+ for i, row in BreakdownDF.iterrows():
705
+
706
+ #Add molecular weight using ChemFormula
707
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
708
+
709
+ #Get volume percent using response factor
710
+ BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
711
+
712
+ #Get moles using ideal gas law (PV=nRT)
713
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*vol*pressure/(temp*R)
714
+
715
+ #Get mass (mg) using moles and molar mass
716
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
717
+
718
+ return BreakdownDF
719
+
720
+ #Function for further breaking down product distribution
721
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
722
+ """
723
+ This function prepares further breakdown dictionaries for use in exporting to Excel
724
+
725
+ Parameters
726
+ ----------
727
+ BreakdownDF : DataFrame
728
+ Dataframe containing columns associated with matched FID and MS peak data.
729
+ CT_dict : Dict
730
+ Dictionary of all compound type abbreviations in use and their associated expansions
731
+ sinfo : Dict
732
+ Dictionary containing sample information.
733
+
734
+ Returns
735
+ -------
736
+ BreakdownDF : DataFrame
737
+ Dataframe containing columns associated with matched FID and MS peak data.
738
+
739
+ """
740
+
741
+ #Get the total mass of product from the breakdown dataframe
742
+ m_total = np.nansum(BreakdownDF['Mass (mg)'])
743
+
744
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
745
+ for i, row in BreakdownDF.iterrows():
746
+ #If there exists a formula..
747
+ try:
748
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
749
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
750
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
751
+ chemFormDict = ChemFormula(row['Formula']).element
752
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
753
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
754
+ #Otherwise, pass
755
+ except:
756
+ pass
757
+
758
+ #Get maximum carbon number in breakdown dataframe
759
+ CN_max = int(BreakdownDF['Carbon Number'].max())
760
+
761
+ #Create a dataframe for saving quantitative results organized by compound type
762
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
763
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
764
+ 'Mass (mg)':np.empty(6),
765
+ 'Mass fraction':np.empty(6)})
766
+
767
+ #Create a dataframe for saving quantitative results organized by carbon number
768
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
769
+ 'Mass (mg)':np.empty(CN_max)})
770
+
771
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
772
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
773
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
774
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
775
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
776
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
777
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
778
+
779
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
780
+ for i, row in CT_DF.iterrows():
781
+
782
+ #Define a temporary dataframe which contains all rows matching the ith compound type
783
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
784
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
785
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
786
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
787
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
788
+
789
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
790
+ for i, row in CN_DF.iterrows():
791
+
792
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
793
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
794
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
795
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
796
+
797
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
798
+ for i, row in CTCN_DF.iterrows():
799
+
800
+ #For every entry in row
801
+ for j in row.index:
802
+
803
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
804
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
805
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
806
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
807
+
808
+
809
+ #Get total masses from CT, CN, and CTCN dataframes
810
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
811
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
812
+ CTCN_mass = np.nansum(CTCN_DF)
813
+
814
+ #Create total mass dataframe
815
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
816
+
817
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
818
+
819
+ #Unpack compound type and carbon number dictionaries from list
820
+ CL_Dict, CT_Dict = Label_info
821
+
822
+ #Filter dataframe to remove compounds that do not contain carbon
823
+ BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
824
+ #Reset the dataframe index
825
+ BreakdownDF.reset_index()
826
+
827
+ #Run response factor assignment function
828
+ BreakdownDF = assignRF(BreakdownDF, DBRF)
829
+ #Run gas quantification function
830
+ BreakdownDF = gasQuant(BreakdownDF,DBRF,sinfo,cutoff)
831
+ #Run further breakdown function
832
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict, sinfo)
833
+
834
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
835
+
836
+ #Function for quantifying gas FID data w/ external standard
837
+ def gasFID_ES(BreakdownDF,DBRF,Label_info,sinfo,gasBag_cond,total_volume,cutoff=4):
838
+ """
839
+ Function quantifies gas FID data and returns a breakdown dataframe
840
+
841
+ Parameters
842
+ ----------
843
+ BreakdownDF : DataFrame
844
+ Dataframe containing columns associated with matched FID and MS peak data
845
+ DBRF : Dataframe
846
+ Dataframe containing nested dataframes with associated chemical lumps,
847
+ likely imported from an excel sheet where each sheet is specific to
848
+ a given chemical lump. The top-level keys must be associated with the
849
+ predefined chemical lumps given in 'LABELS' section above
850
+ Label_info : List
851
+ List of dictionaries containing chemical lump and compound type abbreviations
852
+ sinfo : Dict
853
+ Dictionary containing key sample information
854
+ total_volume : Float
855
+ Float describing the total amount of gas estimated by the external standard volume percent
856
+ cutoff : Integer, optional
857
+ Integer representing the maximum cutoff carbon number that can be
858
+ quantified using FID.The default is 4.
859
+
860
+ Returns
861
+ -------
862
+ BreakdownDF : DataFrame
863
+ Dataframe containing columns associated with matched FID and MS peak data
864
+
865
+ """
866
+ #Function for assigning response factors to compounds
867
+ def assignRF(BreakdownDF,DBRF):
868
+
869
+ #Get a dictionary of average response factors by carbon number
870
+ avgRF = {}
871
+ #Loop through every carbon number up to the max in DBRF
872
+ for i in range(1,DBRF['Carbon Number'].max()+1):
873
+ #Get a slice of all rows in DBRF with a given carbon number
874
+ slicer = DBRF.loc[DBRF['Carbon Number']==i]
875
+ #Average the response factor entries in this slice, appending the result to the average RF dictionary
876
+ avgRF['{0}'.format(i)] = slicer['RF'].mean()
877
+
878
+ #Loop through every row in the FIDpMS dataframe
879
+ for i, row in BreakdownDF.iterrows():
880
+ #Check that the formula is not nan
881
+ if not pd.isna(row['Formula']):
882
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
883
+ chemFormDict = ChemFormula(row['Formula']).element
884
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
885
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
886
+
887
+ #If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
888
+ if row['Compound Name'] in DBRF['Compound Name'].values:
889
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
890
+ #Assign response factor source
891
+ BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
892
+ #Otherwise, assign response factor based on average carbon number RF
893
+ else:
894
+ BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
895
+ #Assign response factor source
896
+ BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
897
+ #Otherwise if the row's formula is nan, pass
898
+ else:
899
+ pass
900
+
901
+
902
+ return BreakdownDF
903
+
904
+ #Function for quantifying compounds using ideal gas law
905
+ def gasQuant(BreakdownDF,DBRF,sinfo,total_volume,cutoff):
906
+
907
+ #Remove rows in BreakdownDF with a carbon number at or below cutoff
908
+ BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
909
+
910
+ #Get gas bag conditions
911
+ temp = gasBag_cond[0] #temperature of gas bag, C
912
+ pressure = gasBag_cond[1] #sample pressure in gas bag, psi
913
+
914
+ #Convert sinfo variables to new units
915
+ temp = temp + 273.15 #gas bag temperature, K
916
+ pressure = pressure / 14.504*100000 #gas bag pressure, Pa
917
+
918
+ #Define ideal gas constant, m^3*Pa/K*mol
919
+ R = 8.314
920
+
921
+ #Loop through every row in BreakdownDF
922
+ for i, row in BreakdownDF.iterrows():
923
+
924
+ #Add molecular weight using ChemFormula
925
+ BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
926
+
927
+ #Get volume percent using response factor
928
+ BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
929
+
930
+ #Get moles using ideal gas law (PV=nRT)
931
+ BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*total_volume*pressure/(temp*R)
932
+
933
+ #Get mass (mg) using moles and molar mass
934
+ BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
935
+
936
+ return BreakdownDF
937
+
938
+ #Function for further breaking down product distribution
939
+ def moreBreakdown(BreakdownDF,CT_dict,sinfo):
940
+ """
941
+ This function prepares further breakdown dictionaries for use in exporting to Excel
942
+
943
+ Parameters
944
+ ----------
945
+ BreakdownDF : DataFrame
946
+ Dataframe containing columns associated with matched FID and MS peak data.
947
+ CT_dict : Dict
948
+ Dictionary of all compound type abbreviations in use and their associated expansions
949
+ sinfo : Dict
950
+ Dictionary containing sample information.
951
+
952
+ Returns
953
+ -------
954
+ BreakdownDF : DataFrame
955
+ Dataframe containing columns associated with matched FID and MS peak data.
956
+
957
+ """
958
+
959
+ #Get the total mass of product from the breakdown dataframe
960
+ m_total = np.nansum(BreakdownDF['Mass (mg)'])
961
+
962
+ #Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
963
+ for i, row in BreakdownDF.iterrows():
964
+ #If there exists a formula..
965
+ try:
966
+ #Set breakdown compound type according to the abbreviation already in the breakdown dataframe
967
+ BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
968
+ #Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
969
+ chemFormDict = ChemFormula(row['Formula']).element
970
+ #Use the carbon entry from the above dictionary to assign a carbon number to the ith row
971
+ BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
972
+ #Otherwise, pass
973
+ except:
974
+ pass
975
+
976
+ #Get maximum carbon number in breakdown dataframe
977
+ CN_max = int(BreakdownDF['Carbon Number'].max())
978
+
979
+ #Create a dataframe for saving quantitative results organized by compound type
980
+ CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
981
+ 'Cycloalkanes','Alkenes/Alkynes','Other'],
982
+ 'Mass (mg)':np.empty(6),
983
+ 'Mass fraction':np.empty(6)})
984
+
985
+ #Create a dataframe for saving quantitative results organized by carbon number
986
+ CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
987
+ 'Mass (mg)':np.empty(CN_max)})
988
+
989
+ #Create a dataframe for saving quantitative results organized by both compound type and carbon number
990
+ CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
991
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
992
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
993
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
994
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
995
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
996
+
997
+ #Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
998
+ for i, row in CT_DF.iterrows():
999
+
1000
+ #Define a temporary dataframe which contains all rows matching the ith compound type
1001
+ tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
1002
+ #Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
1003
+ CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
1004
+ #Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
1005
+ CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
1006
+
1007
+ #Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
1008
+ for i, row in CN_DF.iterrows():
1009
+
1010
+ #Define a temporary dataframe which contains all rows matching the ith carbon number
1011
+ tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
1012
+ #Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
1013
+ CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
1014
+
1015
+ #Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
1016
+ for i, row in CTCN_DF.iterrows():
1017
+
1018
+ #For every entry in row
1019
+ for j in row.index:
1020
+
1021
+ #Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
1022
+ tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
1023
+ #Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
1024
+ CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
1025
+
1026
+
1027
+ #Get total masses from CT, CN, and CTCN dataframes
1028
+ CT_mass = np.nansum(CT_DF['Mass (mg)'])
1029
+ CN_mass = np.nansum(CN_DF['Mass (mg)'])
1030
+ CTCN_mass = np.nansum(CTCN_DF)
1031
+
1032
+ #Create total mass dataframe
1033
+ mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
1034
+
1035
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
1036
+
1037
+ #Unpack compound type and carbon number dictionaries from list
1038
+ CL_Dict, CT_Dict = Label_info
1039
+
1040
+ #Filter dataframe to remove compounds that do not contain carbon
1041
+ BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
1042
+ #Reset the dataframe index
1043
+ BreakdownDF.reset_index()
1044
+
1045
+ #Run response factor assignment function
1046
+ BreakdownDF = assignRF(BreakdownDF, DBRF)
1047
+ #Run gas quantification function
1048
+ BreakdownDF = gasQuant(BreakdownDF,DBRF,sinfo,total_volume,cutoff)
1049
+ #Run further breakdown function
1050
+ BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict, sinfo)
1051
+
1052
+ return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
1053
+
1054
+ """ DATA IMPORTS """
1055
+ #Import sample information from json file
1056
+ with open(DF_Dir+sname+'_INFO.json') as sinfo_f:
1057
+ sinfo = json.load(sinfo_f)
1058
+
1059
+ #Change ISO date-time strings into datetime objects
1060
+ sinfo['Start Time'] = datetime.fromisoformat(sinfo['Start Time'])
1061
+ sinfo['End Time'] = datetime.fromisoformat(sinfo['End Time'])
1062
+
1063
+ #Calculate a reaction time using the start, end, and heat time values and add to sinfo
1064
+ sinfo['Reaction Time'] = abs(sinfo['End Time']-sinfo['Start Time']).total_seconds()/3600 - sinfo['Heat Time']
1065
+
1066
+ #Dictionary of substrings to add to sample name to create file names
1067
+ sub_Dict = {'Gas TCD+FID':['_GS2_TCD_CSO.csv'],
1068
+ 'Gas Labelled MS Peaks':['_GS1_UA_Comp_UPP.csv'],
1069
+ 'Gas FID+MS':['_GS2_FIDpMS.csv'],
1070
+ 'Liquid FID':['_LQ1_FID_CSO.csv'],
1071
+ 'Liquid Labelled MS Peaks':['_LQ1_UA_Comp_UPP'],
1072
+ 'Liquid FID+MS':['_LQ1_FIDpMS.csv']}
1073
+
1074
+ #Use sample name to form file names using sub_Dict and append full pathnames for all entries
1075
+ for key in sub_Dict:
1076
+ sub_Dict[key] = [sub_Dict[key][0],DFR_Dir+sname+sub_Dict[key][0]]
1077
+
1078
+
1079
+ #If the run liquid analysis Boolean is True..
1080
+ if lgTF[0]:
1081
+ #DEFINE DIRECTORIES FOR LIQUID FID QUANTIFICATION
1082
+ #Define directory for liquid matched MS and FID peaks
1083
+ DIR_LQ1_FIDpMS = sub_Dict['Liquid FID+MS'][1]
1084
+ #Define directory for liquid response factors
1085
+ DIR_LQRF = RF_Dir+LRF_file
1086
+
1087
+ #Read matched peak data between liquid FID and MS
1088
+ LQ1_FIDpMS = pd.read_csv(DIR_LQ1_FIDpMS)
1089
+
1090
+ #Filter FIDpMS to only include rows with non-NaN compounds
1091
+ LQ1_FIDpMS_Filtered = LQ1_FIDpMS[LQ1_FIDpMS['Compound Name'].notnull()].reset_index(drop=True)
1092
+
1093
+ #Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
1094
+ LQ_FID_BreakdownDF = LQ1_FIDpMS_Filtered.copy()
1095
+
1096
+ #Read liquid response factors data
1097
+ LQRF = {i:pd.read_excel(DIR_LQRF,sheet_name=i) for i in CL_Dict.keys()}
1098
+ else:
1099
+ pass
1100
+
1101
+ #If the run gas analysis Boolean is True..
1102
+ if lgTF[1]:
1103
+ #DEFINE DIRECTORIES FOR GAS TCD AND FID QUANTIFICATION
1104
+ #Define directory for gas TCD peaks
1105
+ DIR_GS2_TCD = sub_Dict['Gas TCD+FID'][1]
1106
+ #Define directory for gas FID peaks
1107
+ DIR_GS2_FIDpMS = sub_Dict['Gas FID+MS'][1]
1108
+ #Define directory for gas TCD response factors
1109
+ DIR_TCDRF = RF_Dir+GRFT_file
1110
+ #Define directory for gas FID response factors
1111
+ DIR_FIDRF = RF_Dir+GRF_file
1112
+
1113
+ #Read gas FID and TCD Peak data
1114
+ GS2_TCD = pd.read_csv(DIR_GS2_TCD)
1115
+
1116
+ #Create a duplicate of the gas TCD/FID dataframe for future saving as a breakdown
1117
+ #Also filter breakdown dataframe to only include rows sourced from TCD
1118
+ GS_TCD_BreakdownDF = GS2_TCD.loc[GS2_TCD['Signal Name'] == 'TCD2B'].copy()
1119
+
1120
+ #Read matched peak data between gas FID and MS
1121
+ GS2_FIDpMS = pd.read_csv(DIR_GS2_FIDpMS)
1122
+
1123
+ #Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
1124
+ GS_FID_BreakdownDF = GS2_FIDpMS.copy()
1125
+
1126
+ #Read gas TCD response factors data
1127
+ TCDRF = pd.read_csv(DIR_TCDRF)
1128
+ #Read gas FID response factors data
1129
+ GSRF = pd.read_csv(DIR_FIDRF)
1130
+
1131
+ else:
1132
+ pass
1133
+
1134
+ """ MAIN SCRIPT """
1135
+
1136
+ #If the run liquid analysis Boolean is True..
1137
+ if lgTF[0]:
1138
+ #Get liquid FID breakdown and miscellaneous dataframes
1139
+ LQ_FID_BreakdownDF, LQCT_DF, LQCN_DF, LQCTCN_DF, LQmass_DF = liquidFID(LQ_FID_BreakdownDF, LQRF, [CL_Dict, CT_Dict], sinfo)
1140
+ else:
1141
+ pass
1142
+
1143
+ #If the run gas analysis Boolean is True..
1144
+ if lgTF[1]:
1145
+ #If the external standard Boolean is True..
1146
+ if ES_bool:
1147
+ #Get gas TCD breakdown and miscellaneous dataframes
1148
+ GS_TCD_BreakdownDF, TCDRF, total_volume, TCD_cond = gasTCD_ES(GS_TCD_BreakdownDF,TCDRF,sinfo,[gasBag_temp,gasBag_pressure],peak_error)
1149
+
1150
+ #Get gas FID breakdown and miscellaneous dataframes
1151
+ GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = gasFID_ES(GS_FID_BreakdownDF,GSRF,[CL_Dict, CT_Dict], sinfo,[gasBag_temp,gasBag_pressure],total_volume)
1152
+ #Otherwise..
1153
+ else:
1154
+ #Get gas TCD breakdown and miscellaneous dataframes
1155
+ GS_TCD_BreakdownDF, TCDRF, TCD_cond = gasTCD(GS_TCD_BreakdownDF,TCDRF,sinfo,peak_error)
1156
+
1157
+ #Get gas FID breakdown and miscellaneous dataframes
1158
+ GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = gasFID(GS_FID_BreakdownDF,GSRF,[CL_Dict, CT_Dict], sinfo)
1159
+ else:
1160
+ pass
1161
+
1162
+ #Get dataframe for sample info
1163
+ sinfo_DF = pd.DataFrame(sinfo,index=[0])
1164
+
1165
+ #Get maximum carbon number in breakdown dataframe
1166
+ CN_max = max([int(GS_FID_BreakdownDF['Carbon Number'].max()),int(LQ_FID_BreakdownDF['Carbon Number'].max())])
1167
+
1168
+ #Sum the liquid and gas breakdown carbon number and compound type dataframes
1169
+ #Initiate an empty CTCN dataframe
1170
+ total_CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
1171
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
1172
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1173
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1174
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
1175
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
1176
+
1177
+ #For every row in this sum dataframe...
1178
+ for i, row in total_CTCN_DF.iterrows():
1179
+ #For every entry in this row...
1180
+ for j, value in row.items():
1181
+ #If the current index is below the carbon number limit of both the gas and liquid dataframes...
1182
+ if i <= len(LQCTCN_DF.index)-1 and i <= len(GSCTCN_DF.index)-1:
1183
+ total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j] + GSCTCN_DF.at[i,j]
1184
+ #Otherwise, if the current index is below the carbon number limit of only the liquid dataframe...
1185
+ elif i <= len(LQCTCN_DF.index)-1:
1186
+ total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j]
1187
+ #Otherwise, if the current index is below the carbon number limit of only the gas dataframe...
1188
+ elif i <= len(GSCTCN_DF.index)-1:
1189
+ total_CTCN_DF.at[i,j] = GSCTCN_DF.at[i,j]
1190
+ #Otherwise, pass
1191
+ else:
1192
+ pass
1193
+
1194
+ #Add the TCD data afterwards
1195
+ #Filter the TCD breakdown dataframe to only include entries with non-nan formulas
1196
+ GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF[GS_TCD_BreakdownDF['Formula'].notnull()]
1197
+ #Filter the TCD breakdown dataframe to only include formulas with carbon in them
1198
+ GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF_filter[(GS_TCD_BreakdownDF_filter['Formula'].str.contains('C')) & (GS_TCD_BreakdownDF_filter['Formula'].str.contains('H'))]
1199
+
1200
+ #For every row in this filtered TCD dataframe
1201
+ for i, row in GS_TCD_BreakdownDF_filter.iterrows():
1202
+ #Get a chemical formula dictionary for the row's formula
1203
+ chemFormDict = ChemFormula(row['Formula']).element
1204
+ #If the carbon number is less than four...
1205
+ if chemFormDict['C'] < 4:
1206
+ #Assign the mass value to the linear entry for the given carbon number in the total dataframe
1207
+ total_CTCN_DF.at[chemFormDict['C']-1,'Linear Alkanes'] = row['Mass (mg)']
1208
+ #Otherwise, if the compound is isobutane...
1209
+ elif row['Compound Name'] == 'Isobutane':
1210
+ #Add the mass value to the branched entry for carbon number 4 in the total dataframe
1211
+ total_CTCN_DF.at[3,'Branched Alkanes'] = row['Mass (mg)']
1212
+ #Otherwise, if the compound is butane...
1213
+ elif row['Compound Name'] == 'n-Butane':
1214
+ #Add the mass value to the linear entry for carbon number 4 in the total dataframe
1215
+ total_CTCN_DF.at[3,'Linear Alkanes'] = row['Mass (mg)']
1216
+ #Otherwise, pass
1217
+ else:
1218
+ pass
1219
+
1220
+ """ BREAKDOWN SAVING """
1221
+
1222
+ #If breakdown directory does not exist within sample folder, create it
1223
+ if not os.path.exists(DFbreak_Dir):
1224
+ os.makedirs(DFbreak_Dir)
1225
+
1226
+ #Define breakdown file name
1227
+ bfn = sname+"_Breakdown_"+nows+".xlsx"
1228
+
1229
+ #Create pandas Excel writer
1230
+ writer = pd.ExcelWriter(fileCheck(DFbreak_Dir+bfn), engine="xlsxwriter")
1231
+
1232
+ #If the run liquid analysis Boolean is True..
1233
+ if lgTF[0]:
1234
+ #Position the liquid FID dataframes in the worksheet.
1235
+ sinfo_DF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=1, index=False)
1236
+ LQ_FID_BreakdownDF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=4, index=False)
1237
+ LQCT_DF.to_excel(writer, sheet_name="Liquid FID",startcol=16, startrow=7, index=False)
1238
+ LQCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=16, startrow=15, index=False)
1239
+ LQmass_DF.to_excel(writer, sheet_name="Liquid FID",startcol=22, startrow=1,index=False)
1240
+ LQCTCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=20, startrow=7, index=False)
1241
+ else:
1242
+ pass
1243
+
1244
+ #If the run gas analysis Boolean is True..
1245
+ if lgTF[1]:
1246
+ #Position the gas FID dataframes in the worksheet.
1247
+ sinfo_DF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=1, index=False)
1248
+ GS_FID_BreakdownDF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=4, index=False)
1249
+ GSCT_DF.to_excel(writer, sheet_name="Gas FID",startcol=18, startrow=7, index=False)
1250
+ GSCN_DF.to_excel(writer, sheet_name="Gas FID", startcol=18, startrow=15, index=False)
1251
+ GSmass_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=1,index=False)
1252
+ GSCTCN_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=7,index=False)
1253
+
1254
+ #Expand sample info dataframe to include total TCD mass and gas bag volume
1255
+ sinfo_DF.at[0,'Total product (mg)'] = GS_TCD_BreakdownDF['Mass (mg)'].sum()
1256
+ sinfo_DF.at[0,'Gas bag volume (m^3)'] = total_volume
1257
+
1258
+ #Position the gas TCD dataframes in the worksheet
1259
+ GS_TCD_BreakdownDF.to_excel(writer, sheet_name="Gas TCD",startcol=1,startrow=4, index=False)
1260
+ sinfo_DF.to_excel(writer, sheet_name="Gas TCD",startcol=1, startrow=1, index=False)
1261
+ else:
1262
+ pass
1263
+
1264
+ #Close the Excel writer
1265
+ writer.close()
1266
+
1267
+ #Log that a new Excel breakdown has been saved
1268
+ logger.info("New breakdown created: " + bfn)
1269
+
1270
+
1271
+ """ SAMPLE INFO SAVING"""
1272
+
1273
+ """
1274
+ x = {'Sample Name':sname,
1275
+ 'Reactor Name':'MB01',
1276
+ 'Catalyst Type':'Ru/C+BEA',
1277
+ 'Catalyst Amount (mg)':59.9,
1278
+ 'Plastic Type':'PE4k Sigma-Aldrich',
1279
+ 'Plastic Amount (mg)':299.7,
1280
+ 'Reaction Temperature (C)':256,
1281
+ 'Quench Temperature (C)':26,
1282
+ 'Reaction Pressure (psi)':269,
1283
+ 'Initial Pressure (psi)':122,
1284
+ 'Quench Pressure (psi)':27,
1285
+ 'Start Time':'2023-12-01 13:29:00.000',
1286
+ 'End Time':'2023-12-04 12:43:00.000',
1287
+ 'Heat Time':1+17/60,
1288
+ 'Internal Standard Name':'TTBB',
1289
+ 'Internal Standard Mass (mg)':13.5}
1290
+
1291
+ #Write to JSON
1292
+ with open(cwd+sname+'_INFO.json','w',encoding='utf-8') as f:
1293
+ json.dump(x,f,ensure_ascii=False, indent=4)
1294
+ """
1295
+
1296
+
1297
+
1298
+
1299
+
1300
+
1301
+
1302
+
1303
+
1304
+
1305
+