chromaquant 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chromaquant/Handle/__init__.py +2 -1
- chromaquant/Handle/fileChecks.py +172 -0
- chromaquant/Handle/handleDirectories.py +1 -1
- chromaquant/Hydro/__init__.py +12 -0
- chromaquant/Hydro/hydroMain.py +496 -0
- chromaquant/Match/AutoFpmMatch.py +48 -48
- chromaquant/Match/MatchSub/__init__.py +13 -0
- chromaquant/Match/MatchSub/matchTools.py +282 -0
- chromaquant/Match/MatchSub/peakTools.py +259 -0
- chromaquant/Match/__init__.py +2 -1
- chromaquant/Match/matchMain.py +233 -0
- chromaquant/Quant/QuantSub/__init__.py +15 -0
- chromaquant/Quant/QuantSub/gasFID.py +241 -0
- chromaquant/Quant/QuantSub/gasTCD.py +425 -0
- chromaquant/Quant/QuantSub/liquidFID.py +310 -0
- chromaquant/Quant/QuantSub/parseTools.py +162 -0
- chromaquant/Quant/__init__.py +1 -1
- chromaquant/Quant/quantMain.py +417 -0
- chromaquant/UAPP/__init__.py +12 -0
- chromaquant/UAPP/uappMain.py +427 -0
- chromaquant/__main__.py +426 -393
- chromaquant/oldui.py +492 -0
- chromaquant/properties.json +1 -1
- {chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/METADATA +3 -3
- chromaquant-0.4.0.dist-info/RECORD +38 -0
- {chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/WHEEL +1 -1
- chromaquant-0.3.1.dist-info/RECORD +0 -22
- {chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/entry_points.txt +0 -0
- {chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
- {chromaquant-0.3.1.dist-info → chromaquant-0.4.0.dist-info}/licenses/LICENSES_bundled.txt +0 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
COPYRIGHT STATEMENT:
|
|
4
|
+
|
|
5
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2024, by Julia Hancock
|
|
8
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
9
|
+
URL: https://www.rorrerlab.com/
|
|
10
|
+
|
|
11
|
+
License: BSD 3-Clause License
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
SCRIPT THAT MATCHES FID AND MS PEAKS
|
|
16
|
+
|
|
17
|
+
Julia Hancock
|
|
18
|
+
Started 12/10/2023
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
""" PACKAGES """
|
|
22
|
+
import sys
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import os
|
|
25
|
+
from molmass import Formula
|
|
26
|
+
import math
|
|
27
|
+
import numpy as np
|
|
28
|
+
from chemformula import ChemFormula
|
|
29
|
+
import json
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
import logging
|
|
32
|
+
import scipy
|
|
33
|
+
import importlib.util
|
|
34
|
+
|
|
35
|
+
""" DIRECTORIES (MANUAL) """
|
|
36
|
+
"""
|
|
37
|
+
testPath = "/Users/connards/Desktop/University/Rorrer Lab/Scripts/chromaquant/src/chromaquant/"
|
|
38
|
+
#Define file directory
|
|
39
|
+
D_files = "/Users/connards/Documents/ChromaQuant"
|
|
40
|
+
|
|
41
|
+
#Define app directory
|
|
42
|
+
D_app = "/Users/connards/Desktop/University/Rorrer Lab/Scripts/chromaquant/src/chromaquant"
|
|
43
|
+
|
|
44
|
+
#Define resources directory
|
|
45
|
+
D_rsc = os.path.join(D_files,'resources')
|
|
46
|
+
|
|
47
|
+
#Define theme directory
|
|
48
|
+
D_theme = os.path.join(D_rsc,'forest','forest-light.tcl')
|
|
49
|
+
|
|
50
|
+
#Define response factors directory
|
|
51
|
+
D_rf = os.path.join(D_files,'response-factors')
|
|
52
|
+
|
|
53
|
+
#Define data directory
|
|
54
|
+
D_data = os.path.join(D_files,'data')
|
|
55
|
+
|
|
56
|
+
#Define images directory
|
|
57
|
+
D_img = os.path.join(D_files,'images')
|
|
58
|
+
|
|
59
|
+
directories = {'files':D_files,'resources':D_rsc,'theme':D_theme,'rf':D_rf,'data':D_data,'images':D_img}
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
""" VARIABLES FOR TESTING"""
|
|
63
|
+
|
|
64
|
+
#sname = 'example2'
|
|
65
|
+
#sphase = 'L'
|
|
66
|
+
|
|
67
|
+
""" LOCAL PACKAGES """
|
|
68
|
+
|
|
69
|
+
#Get package directory
|
|
70
|
+
app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
71
|
+
|
|
72
|
+
#Get absolute directories for subpackages
|
|
73
|
+
subpack_dir = {'Handle':os.path.join(app_dir,'Handle','__init__.py'),
|
|
74
|
+
'Manual':os.path.join(app_dir,'Manual','__init__.py'),
|
|
75
|
+
'MatchSub':os.path.join(app_dir,'Match','MatchSub','__init__.py')}
|
|
76
|
+
|
|
77
|
+
#Define function to import from path
|
|
78
|
+
def import_from_path(module_name,path):
|
|
79
|
+
#Define spec
|
|
80
|
+
spec = importlib.util.spec_from_file_location(module_name,path)
|
|
81
|
+
#Define modules
|
|
82
|
+
module = importlib.util.module_from_spec(spec)
|
|
83
|
+
#Expand sys.modules dict
|
|
84
|
+
sys.modules[module_name] = module
|
|
85
|
+
#Load module
|
|
86
|
+
spec.loader.exec_module(module)
|
|
87
|
+
return module
|
|
88
|
+
|
|
89
|
+
#Import all local packages
|
|
90
|
+
hd = import_from_path("hd",subpack_dir['Handle'])
|
|
91
|
+
mn = import_from_path("mn",subpack_dir['Manual'])
|
|
92
|
+
mtsb = import_from_path("mtsb",subpack_dir['MatchSub'])
|
|
93
|
+
|
|
94
|
+
""" FIT DEFINITION FUNCTION """
|
|
95
|
+
|
|
96
|
+
#Function that defines fit lambda based on parameters in analysis-config
|
|
97
|
+
def getFitLambda(fit_param):
|
|
98
|
+
""" This function requires a dictionary of polynomial parameters, with keys "a", "b", etc. """
|
|
99
|
+
|
|
100
|
+
if fit_param['fit-type'] == "First Order":
|
|
101
|
+
|
|
102
|
+
fit = lambda FID_RT: fit_param['a']*FID_RT + fit_param['b']
|
|
103
|
+
|
|
104
|
+
elif fit_param['fit-type'] == "Third Order":
|
|
105
|
+
|
|
106
|
+
fit = lambda FID_RT: fit_param['a']*FID_RT**3 + fit_param['b']*FID_RT**2 + fit_param['c']*FID_RT + fit_param['d']
|
|
107
|
+
|
|
108
|
+
else:
|
|
109
|
+
|
|
110
|
+
fit = None
|
|
111
|
+
|
|
112
|
+
return fit
|
|
113
|
+
|
|
114
|
+
""" MATCH FUNCTION """
|
|
115
|
+
def mainMatch(sname,sphase,model):
|
|
116
|
+
|
|
117
|
+
print("[matchMain] Beginning match...")
|
|
118
|
+
|
|
119
|
+
#Get current time
|
|
120
|
+
print("[matchMain] Getting current time...")
|
|
121
|
+
now = datetime.now()
|
|
122
|
+
|
|
123
|
+
""" DIRECTORIES """
|
|
124
|
+
print("[matchMain] Getting directories...")
|
|
125
|
+
#Get directories from handling script
|
|
126
|
+
directories = hd.handle(app_dir)
|
|
127
|
+
|
|
128
|
+
#Data file log directory
|
|
129
|
+
directories['log'] = os.path.join(directories['data'],sname,'log')
|
|
130
|
+
|
|
131
|
+
#Data file breakdowns directory
|
|
132
|
+
directories['break'] = os.path.join(directories['data'],sname,'breakdowns')
|
|
133
|
+
|
|
134
|
+
#Raw data file directory
|
|
135
|
+
directories['raw'] = os.path.join(directories['data'],sname,'raw data')
|
|
136
|
+
|
|
137
|
+
""" ANALYSIS CONFIGURATION """
|
|
138
|
+
print("[matchMain] Interpreting analysis configuration...")
|
|
139
|
+
#Read analysis configuration file
|
|
140
|
+
with open(os.path.join(directories['resources'],'analysis-config.json')) as f:
|
|
141
|
+
analysis_config = json.load(f)
|
|
142
|
+
|
|
143
|
+
#Extract analysis configuration info
|
|
144
|
+
#This dictionary contain lists of substrings to be checked against compound name strings to
|
|
145
|
+
#assign a compound type
|
|
146
|
+
|
|
147
|
+
#Six compound types exist: linear alkanes (L), branched alkanes (B), aromatics (A), cycloalkanes (C),
|
|
148
|
+
#alkenes/alkynes (E), and other (O)
|
|
149
|
+
|
|
150
|
+
#Each compound type abbreviation will have an entry in the dictionary corresponding to a list of
|
|
151
|
+
#substrings to be checked against a compound name string
|
|
152
|
+
|
|
153
|
+
contains = analysis_config["CT-assign-contains"]
|
|
154
|
+
|
|
155
|
+
#Tuple of contains keys in order of priority
|
|
156
|
+
keyloop = analysis_config["CT-assign-keyloop"]
|
|
157
|
+
|
|
158
|
+
#Tuple of elements to be excluded and automatically labelled as 'O'
|
|
159
|
+
element_exclude = analysis_config["CT-assign-element-exclude"]
|
|
160
|
+
|
|
161
|
+
#File suffixes to add to form data filenames
|
|
162
|
+
file_suffix = analysis_config["file-suffix"]
|
|
163
|
+
|
|
164
|
+
#Fit parameters for matching FID and MS accortding to polynomial fit
|
|
165
|
+
match_fit_parameters = analysis_config["match-fit-parameters"]
|
|
166
|
+
|
|
167
|
+
#Acceptable peak errors for matching
|
|
168
|
+
peak_errors = analysis_config["peak-errors"]
|
|
169
|
+
|
|
170
|
+
""" RUN FUNCTIONS """
|
|
171
|
+
print("[matchMain] Running match functions...")
|
|
172
|
+
|
|
173
|
+
#Run the file naming function – this function will create paths to all relevant files for matching FID and MS peaks according to sample name and phase
|
|
174
|
+
print("[matchMain] Getting data pths...")
|
|
175
|
+
paths = hd.fileNamer(sname,sphase,file_suffix,directories['raw'])
|
|
176
|
+
|
|
177
|
+
# Run the file checking function – this function will search for an existing FIDpMS file, creating one if it does not exist.
|
|
178
|
+
# It will then read the file as a pandas DataFrame. The tf Boolean describes whether or not there exist manually-matched peaks.
|
|
179
|
+
print("[matchMain] Checking for FIDpMS file...")
|
|
180
|
+
fpmDF, tf = hd.checkFile(paths[2],paths[0])
|
|
181
|
+
|
|
182
|
+
# Import MS UPP data
|
|
183
|
+
print("[matchMain] Importing mass spectrometry data...")
|
|
184
|
+
mDF = pd.read_csv(paths[1])
|
|
185
|
+
|
|
186
|
+
# Get only relevant columns of MS UPP data
|
|
187
|
+
print("[matchMain] Cleaning mass spectrometry data...")
|
|
188
|
+
mDF = mDF.loc[:,['Component RT','Compound Name','Formula','Match Factor']]
|
|
189
|
+
|
|
190
|
+
# Third order function for testing
|
|
191
|
+
#fit = lambda FID_RT: 0.0252*FID_RT**3 - 0.5274*FID_RT**2 + 4.8067*FID_RT - 3.0243
|
|
192
|
+
|
|
193
|
+
# Get fit function using analysis-config
|
|
194
|
+
print("[matchMain] Extracting match fit parameters...")
|
|
195
|
+
fit = getFitLambda(match_fit_parameters)
|
|
196
|
+
|
|
197
|
+
#If model is polynomial...
|
|
198
|
+
if model == 'P':
|
|
199
|
+
# Run the matching function – this function takes a passed function describing an estimated relationship between MS RT's and FID RT's and matches peaks.
|
|
200
|
+
# Function can be of any form as long as it returns a floating point value for the estimated MS RT
|
|
201
|
+
print("[matchMain] Matching peaks according to polynomial fit...")
|
|
202
|
+
fpmDF = mtsb.matchPeaks(fpmDF,mDF,fit,peak_errors['peak-error-third'])
|
|
203
|
+
|
|
204
|
+
#If model is retention time...
|
|
205
|
+
if model == 'R':
|
|
206
|
+
# Run the matching function – this function takes a passed function describing an estimated relationship between MS RT's and FID RT's and matches peaks.
|
|
207
|
+
# Function can be of any form as long as it returns a floating point value for the estimated MS RT
|
|
208
|
+
print("[matchMain] Matching peaks by retention time...")
|
|
209
|
+
fpmDF = mtsb.matchRT(fpmDF,mDF,peakError=peak_errors['peak-error-RT'])
|
|
210
|
+
fpmDF = mtsb.matchPeaks(fpmDF,mDF,fit,peak_errors['peak-error-third'])
|
|
211
|
+
|
|
212
|
+
# Run the compound type abbreviation assignment function – this function takes a passed matched FID and MS list and assigns
|
|
213
|
+
# compound type abbreviations to each matched entry
|
|
214
|
+
print("[matchMain] Assigning compound types...")
|
|
215
|
+
fpmDF = mtsb.ctaAssign(fpmDF, contains, keyloop, element_exclude)
|
|
216
|
+
|
|
217
|
+
print("[matchMain] Handling duplicates...")
|
|
218
|
+
#Run the duplicate handling function
|
|
219
|
+
fpmDF = mtsb.duplicateHandle(fpmDF)
|
|
220
|
+
|
|
221
|
+
print("[AutoFpmMatch] Saving results...")
|
|
222
|
+
#Save the FIDpMS data
|
|
223
|
+
fpmDF.to_csv(paths[2],index=False)
|
|
224
|
+
|
|
225
|
+
#Print computation time
|
|
226
|
+
compTime = datetime.now().timestamp()*1000 - now.timestamp()*1000
|
|
227
|
+
print("[matchMain] Time taken: {:.3f} ms".format(compTime))
|
|
228
|
+
|
|
229
|
+
#Close main function by returning
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
#For testing
|
|
233
|
+
#mainMatch('example2','L','R')
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
ChromaQuant.Match.MatchSub package initialization
|
|
6
|
+
|
|
7
|
+
Julia Hancock
|
|
8
|
+
Created 10-19-2024
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .parseTools import *
|
|
13
|
+
from .liquidFID import *
|
|
14
|
+
from .gasFID import *
|
|
15
|
+
from .gasTCD import *
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
COPYRIGHT STATEMENT:
|
|
5
|
+
|
|
6
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024, by Julia Hancock
|
|
9
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
10
|
+
URL: https://www.rorrerlab.com/
|
|
11
|
+
|
|
12
|
+
License: BSD 3-Clause License
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
SUBPACKAGE FOR PERFORMING GAS FID QUANTIFICATION STEPS
|
|
17
|
+
|
|
18
|
+
Julia Hancock
|
|
19
|
+
Started 12-29-2024
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
""" PACKAGES """
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import numpy as np
|
|
25
|
+
from chemformula import ChemFormula
|
|
26
|
+
|
|
27
|
+
#Function for quantifying gas FID data w/ external standard
|
|
28
|
+
def gasFID_ES(BreakdownDF,DBRF,Label_info,gasBag_cond,total_volume,cutoff=4):
|
|
29
|
+
"""
|
|
30
|
+
Function quantifies gas FID data and returns a breakdown dataframe
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
BreakdownDF : DataFrame
|
|
35
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
36
|
+
DBRF : Dataframe
|
|
37
|
+
Dataframe containing nested dataframes with associated chemical lumps,
|
|
38
|
+
likely imported from an excel sheet where each sheet is specific to
|
|
39
|
+
a given chemical lump. The top-level keys must be associated with the
|
|
40
|
+
predefined chemical lumps given in 'LABELS' section above
|
|
41
|
+
gasBag_cond : List
|
|
42
|
+
List containing gas bag temperature [0] and gas bag pressure [1]
|
|
43
|
+
Label_info : List
|
|
44
|
+
List of dictionaries containing chemical lump and compound type abbreviations
|
|
45
|
+
total_volume : Float
|
|
46
|
+
Float describing the total amount of gas estimated by the external standard volume percent, mL
|
|
47
|
+
cutoff : Integer, optional
|
|
48
|
+
Integer representing the maximum cutoff carbon number that can be
|
|
49
|
+
quantified using FID.The default is 4.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
BreakdownDF : DataFrame
|
|
54
|
+
Dataframe containing columns associated with matched FID and MS peak data
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
#Function for assigning response factors to compounds
|
|
58
|
+
def assignRF(BreakdownDF,DBRF):
|
|
59
|
+
|
|
60
|
+
#Get a dictionary of average response factors by carbon number
|
|
61
|
+
avgRF = {}
|
|
62
|
+
#Loop through every carbon number up to the max in DBRF
|
|
63
|
+
for i in range(1,DBRF['Carbon Number'].max()+1):
|
|
64
|
+
#Get a slice of all rows in DBRF with a given carbon number
|
|
65
|
+
slicer = DBRF.loc[DBRF['Carbon Number']==i]
|
|
66
|
+
#Average the response factor entries in this slice, appending the result to the average RF dictionary
|
|
67
|
+
avgRF['{0}'.format(i)] = slicer['RF'].mean()
|
|
68
|
+
|
|
69
|
+
#Loop through every row in the FIDpMS dataframe
|
|
70
|
+
for i, row in BreakdownDF.iterrows():
|
|
71
|
+
#Check that the formula is not nan
|
|
72
|
+
if not pd.isna(row['Formula']):
|
|
73
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
74
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
75
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
76
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
77
|
+
|
|
78
|
+
#If the row's compound name exists in the RF list explicitly, assign the row to the appropriate RF
|
|
79
|
+
if row['Compound Name'] in DBRF['Compound Name'].values:
|
|
80
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = DBRF.loc[DBRF['Compound Name']==row['Compound Name'],'RF'].iloc[0]
|
|
81
|
+
#Assign response factor source
|
|
82
|
+
BreakdownDF.at[i,'RF Source'] = 'Direct RF assignment based on compound name'
|
|
83
|
+
#Otherwise, assign response factor based on average carbon number RF
|
|
84
|
+
else:
|
|
85
|
+
BreakdownDF.at[i,'RF (Area/vol.%)'] = avgRF['{0}'.format(int(BreakdownDF.at[i,'Carbon Number']))]
|
|
86
|
+
#Assign response factor source
|
|
87
|
+
BreakdownDF.at[i,'RF Source'] = 'RF assignment based on average response factor for DBRF carbon number entries'
|
|
88
|
+
#Otherwise if the row's formula is nan, pass
|
|
89
|
+
else:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
return BreakdownDF
|
|
94
|
+
|
|
95
|
+
#Function for quantifying compounds using ideal gas law
|
|
96
|
+
def gasQuant(BreakdownDF,DBRF,total_volume,cutoff):
|
|
97
|
+
|
|
98
|
+
#Remove rows in BreakdownDF with a carbon number at or below cutoff
|
|
99
|
+
BreakdownDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] > cutoff].copy()
|
|
100
|
+
|
|
101
|
+
#Get gas bag conditions
|
|
102
|
+
temp = gasBag_cond[0] #temperature of gas bag, C
|
|
103
|
+
pressure = gasBag_cond[1] #sample pressure in gas bag, psi
|
|
104
|
+
|
|
105
|
+
#Convert gas bag conditions to new units
|
|
106
|
+
temp = temp + 273.15 #gas bag temperature, K
|
|
107
|
+
pressure = pressure / 14.504*100000 #gas bag pressure, Pa
|
|
108
|
+
total_volume /= 10**6 #gas bag volume, m^3
|
|
109
|
+
#Define ideal gas constant, m^3*Pa/K*mol
|
|
110
|
+
R = 8.314
|
|
111
|
+
|
|
112
|
+
#Loop through every row in BreakdownDF
|
|
113
|
+
for i, row in BreakdownDF.iterrows():
|
|
114
|
+
|
|
115
|
+
#Add molecular weight using ChemFormula
|
|
116
|
+
BreakdownDF.at[i,'MW (g/mol)'] = ChemFormula(row['Formula']).formula_weight
|
|
117
|
+
|
|
118
|
+
#Get volume percent using response factor
|
|
119
|
+
BreakdownDF.at[i,'Vol.%'] = row['FID Area']/row['RF (Area/vol.%)']
|
|
120
|
+
|
|
121
|
+
#Get moles using ideal gas law (PV=nRT)
|
|
122
|
+
BreakdownDF.at[i,'Moles'] = BreakdownDF.at[i,'Vol.%']/100*total_volume*pressure/(temp*R)
|
|
123
|
+
|
|
124
|
+
#Get mass (mg) using moles and molar mass
|
|
125
|
+
BreakdownDF.at[i,'Mass (mg)'] = BreakdownDF.at[i,'Moles'] * BreakdownDF.at[i,'MW (g/mol)'] * 1000
|
|
126
|
+
|
|
127
|
+
return BreakdownDF
|
|
128
|
+
|
|
129
|
+
#Function for further breaking down product distribution
|
|
130
|
+
def moreBreakdown(BreakdownDF,CT_dict):
|
|
131
|
+
"""
|
|
132
|
+
This function prepares further breakdown dictionaries for use in exporting to Excel
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
BreakdownDF : DataFrame
|
|
137
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
138
|
+
CT_dict : Dict
|
|
139
|
+
Dictionary of all compound type abbreviations in use and their associated expansions
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
BreakdownDF : DataFrame
|
|
144
|
+
Dataframe containing columns associated with matched FID and MS peak data.
|
|
145
|
+
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
#Get the total mass of product from the breakdown dataframe
|
|
149
|
+
m_total = np.nansum(BreakdownDF['Mass (mg)'])
|
|
150
|
+
|
|
151
|
+
#Iterate through every species in the breakdown dataframe and add entries in two new columns: Compound Type and Carbon Number
|
|
152
|
+
for i, row in BreakdownDF.iterrows():
|
|
153
|
+
#If there exists a formula..
|
|
154
|
+
try:
|
|
155
|
+
#Set breakdown compound type according to the abbreviation already in the breakdown dataframe
|
|
156
|
+
BreakdownDF.at[i,'Compound Type'] = CT_dict[BreakdownDF.at[i,'Compound Type Abbreviation']]
|
|
157
|
+
#Obtain a dictionary containing key:value pairs as element:count using the formula string for the ith row
|
|
158
|
+
chemFormDict = ChemFormula(row['Formula']).element
|
|
159
|
+
#Use the carbon entry from the above dictionary to assign a carbon number to the ith row
|
|
160
|
+
BreakdownDF.at[i,'Carbon Number'] = chemFormDict['C']
|
|
161
|
+
#Otherwise, pass
|
|
162
|
+
except:
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
#Get maximum carbon number in breakdown dataframe
|
|
166
|
+
CN_max = int(BreakdownDF['Carbon Number'].max())
|
|
167
|
+
|
|
168
|
+
#Create a dataframe for saving quantitative results organized by compound type
|
|
169
|
+
CT_DF = pd.DataFrame({'Compound Type':['Aromatics','Linear Alkanes','Branched Alkanes',
|
|
170
|
+
'Cycloalkanes','Alkenes/Alkynes','Other'],
|
|
171
|
+
'Mass (mg)':np.empty(6),
|
|
172
|
+
'Mass fraction':np.empty(6)})
|
|
173
|
+
|
|
174
|
+
#Create a dataframe for saving quantitative results organized by carbon number
|
|
175
|
+
CN_DF = pd.DataFrame({'Carbon Number':range(1,CN_max+1,1),
|
|
176
|
+
'Mass (mg)':np.empty(CN_max)})
|
|
177
|
+
|
|
178
|
+
#Create a dataframe for saving quantitative results organized by both compound type and carbon number
|
|
179
|
+
CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
180
|
+
'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
181
|
+
'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
182
|
+
'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
183
|
+
'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
|
|
184
|
+
'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
|
|
185
|
+
|
|
186
|
+
#Iterate through every compound type in the compound type dataframe, summing the total respective masses from the breakdown dataframe
|
|
187
|
+
for i, row in CT_DF.iterrows():
|
|
188
|
+
|
|
189
|
+
#Define a temporary dataframe which contains all rows matching the ith compound type
|
|
190
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Compound Type'] == row['Compound Type']]
|
|
191
|
+
#Assign the ith compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
192
|
+
CT_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
|
|
193
|
+
#Calculate and assign the ith compound type's mass fraction usingthe total mass from earlier
|
|
194
|
+
CT_DF.at[i,'Mass fraction'] = CT_DF.at[i,'Mass (mg)']/m_total
|
|
195
|
+
|
|
196
|
+
#Iterate through every carbon number in the carbon number dataframe, summing the total respective masses from the breakdown dataframe
|
|
197
|
+
for i, row in CN_DF.iterrows():
|
|
198
|
+
|
|
199
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number
|
|
200
|
+
tempDF = BreakdownDF.loc[BreakdownDF['Carbon Number'] == row['Carbon Number']]
|
|
201
|
+
#Assign the ith carbon number's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
202
|
+
CN_DF.at[i,'Mass (mg)'] = np.nansum(tempDF['Mass (mg)'])
|
|
203
|
+
|
|
204
|
+
#Iterate through the entire dataframe, getting masses for every compound type - carbon number pair
|
|
205
|
+
for i, row in CTCN_DF.iterrows():
|
|
206
|
+
|
|
207
|
+
#For every entry in row
|
|
208
|
+
for j in row.index:
|
|
209
|
+
|
|
210
|
+
#Define a temporary dataframe which contains all rows matching the ith carbon number and compound type
|
|
211
|
+
tempDF = BreakdownDF.loc[(BreakdownDF['Carbon Number'] == i+1) & (BreakdownDF['Compound Type'] == j)]
|
|
212
|
+
#Assign the ith carbon number/jth compound type's mass as the sum of the temporary dataframe's m_i column, treating nan as zero
|
|
213
|
+
CTCN_DF.loc[i,j] = np.nansum(tempDF['Mass (mg)'])
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
#Get total masses from CT, CN, and CTCN dataframes
|
|
217
|
+
CT_mass = np.nansum(CT_DF['Mass (mg)'])
|
|
218
|
+
CN_mass = np.nansum(CN_DF['Mass (mg)'])
|
|
219
|
+
CTCN_mass = np.nansum(CTCN_DF)
|
|
220
|
+
|
|
221
|
+
#Create total mass dataframe
|
|
222
|
+
mass_DF = pd.DataFrame({'Total mass source':['Overall breakdown','Compound Type Breakdown','Carbon Number Breakdown','Compound Type + Carbon Number Breakdown'],'Mass (mg)':[m_total,CT_mass,CN_mass,CTCN_mass]})
|
|
223
|
+
|
|
224
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|
|
225
|
+
|
|
226
|
+
#Unpack compound type and carbon number dictionaries from list
|
|
227
|
+
CL_Dict, CT_Dict = Label_info
|
|
228
|
+
|
|
229
|
+
#Filter dataframe to remove compounds that do not contain carbon
|
|
230
|
+
BreakdownDF = BreakdownDF.drop(BreakdownDF[[not i for i in BreakdownDF['Formula'].str.contains('C')]].index)
|
|
231
|
+
#Reset the dataframe index
|
|
232
|
+
BreakdownDF.reset_index()
|
|
233
|
+
|
|
234
|
+
#Run response factor assignment function
|
|
235
|
+
BreakdownDF = assignRF(BreakdownDF, DBRF)
|
|
236
|
+
#Run gas quantification function
|
|
237
|
+
BreakdownDF = gasQuant(BreakdownDF,DBRF,total_volume,cutoff)
|
|
238
|
+
#Run further breakdown function
|
|
239
|
+
BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF = moreBreakdown(BreakdownDF, CT_Dict)
|
|
240
|
+
|
|
241
|
+
return BreakdownDF, CT_DF, CN_DF, CTCN_DF, mass_DF
|