chromaquant 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,417 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ COPYRIGHT STATEMENT:
5
+
6
+ ChromaQuant – A quantification software for complex gas chromatographic data
7
+
8
+ Copyright (c) 2024, by Julia Hancock
9
+ Affiliation: Dr. Julie Elaine Rorrer
10
+ URL: https://www.rorrerlab.com/
11
+
12
+ License: BSD 3-Clause License
13
+
14
+ ---
15
+
16
+ SCRIPT FOR PERFORMING QUANTIFICATION STEPS
17
+
18
+ Julia Hancock
19
+ Started 12-29-2024
20
+
21
+ """
22
+
23
+ """ PACKAGES """
24
+ import sys
25
+ import pandas as pd
26
+ import os
27
+ from molmass import Formula
28
+ import math
29
+ import numpy as np
30
+ from chemformula import ChemFormula
31
+ import json
32
+ from datetime import datetime
33
+ import logging
34
+ import importlib.util
35
+
36
+ """ LOCAL PACKAGES """
37
+
38
+ #Get package directory
39
+ app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
40
+
41
+ #Get absolute directories for subpackages
42
+ subpack_dir = {'Handle':os.path.join(app_dir,'Handle','__init__.py'),
43
+ 'Manual':os.path.join(app_dir,'Manual','__init__.py'),
44
+ 'QuantSub':os.path.join(app_dir,'Quant','QuantSub','__init__.py')}
45
+
46
+ #Define function to import from path
47
+ def import_from_path(module_name,path):
48
+ #Define spec
49
+ spec = importlib.util.spec_from_file_location(module_name,path)
50
+ #Define modules
51
+ module = importlib.util.module_from_spec(spec)
52
+ #Expand sys.modules dict
53
+ sys.modules[module_name] = module
54
+ #Load module
55
+ spec.loader.exec_module(module)
56
+ return module
57
+
58
+ #Import all local packages
59
+ hd = import_from_path("hd",subpack_dir['Handle'])
60
+ mn = import_from_path("mn",subpack_dir['Manual'])
61
+ qtsb = import_from_path("qt",subpack_dir['QuantSub'])
62
+
63
+ """ VARIABLES FOR TESTING """
64
+
65
+ sname = 'example2'
66
+ quantphases = 'LG'
67
+
68
+ """ DIRECTORIES """
69
+
70
+ def mainQuant(sname,quantphases,quantmodel):
71
+
72
+ print("[quantMain] Beginning quantification...")
73
+
74
+ #Get current time
75
+ print("[quantMain] Getting current time...")
76
+ now = datetime.now()
77
+
78
+ print("[quantMain] Getting directories...")
79
+ #Get directories from handling script
80
+ directories = hd.handle(app_dir)
81
+
82
+ #Data file log directory
83
+ directories['log'] = os.path.join(directories['data'],sname,'log')
84
+
85
+ #Data file breakdowns directory
86
+ directories['break'] = os.path.join(directories['data'],sname,'breakdowns')
87
+
88
+ #Raw data file directory
89
+ directories['raw'] = os.path.join(directories['data'],sname,'raw data')
90
+
91
+ """ ANALYSIS CONFIGURATION """
92
+
93
+ print("[quantMain] Interpreting analysis configuration...")
94
+ #Read analysis configuration file
95
+ with open(os.path.join(directories['resources'],'analysis-config.json')) as f:
96
+ analysis_config = json.load(f)
97
+
98
+ #Extract analysis configuration info
99
+ #This dictionary contain lists of substrings to be checked against compound name strings to
100
+ #assign a compound type
101
+
102
+ #Six compound types exist: linear alkanes (L), branched alkanes (B), aromatics (A), cycloalkanes (C),
103
+ #alkenes/alkynes (E), and other (O)
104
+
105
+ #Each compound type abbreviation will have an entry in the dictionary corresponding to a list of
106
+ #substrings to be checked against a compound name string
107
+
108
+ #File suffixes to add to form data filenames
109
+ file_suffix = analysis_config['file-suffix']
110
+
111
+ #Acceptable peak errors for matching
112
+ peak_errors = analysis_config['peak-errors']
113
+
114
+ #Dictionary of compound lumps
115
+ CL_Dict = analysis_config['CL_Dict']
116
+
117
+ #Dictionary of compound types
118
+ CT_Dict = analysis_config['CT_Dict']
119
+
120
+ #Atmospheric pressure
121
+ atmospheric_conditions = analysis_config['atmospheric-conditions']
122
+ P_0 = atmospheric_conditions['P_0']
123
+
124
+ #Response factor file names
125
+ RF_file_names = analysis_config['RF-file-names']
126
+
127
+ """ EVALUATING PARAMETERS """
128
+
129
+ print("[quantMain] Evaluating run parameters...")
130
+
131
+ #Define liquid-gas Boolean for running analysis
132
+ lgTF = qtsb.evalRunParam(quantphases)
133
+
134
+ #If liquid-gas Boolean is None, terminate quantification
135
+ if lgTF == None:
136
+ print("[quantMain] No phases selected, terminating script")
137
+ #Terminate script
138
+ sys.exit()
139
+
140
+ #Define peak error using analysis-config
141
+ peak_error = peak_errors['peak-error-third']
142
+
143
+ #Define boolean describing whether or not an external standard was used for gas analysis
144
+ ES_bool = True
145
+
146
+ #Define temperature and pressure of gas bag used in sample injection
147
+ gasBag_temp = analysis_config['sample-injection-conditions']['gas-bag-temp-C'] #C
148
+ gasBag_pressure = analysis_config['sample-injection-conditions']['gas-bag-pressure-psia'] #psi
149
+
150
+ """ RESPONSE FACTOR INFO """
151
+
152
+ print("[quantMain] Searching for response factors...")
153
+ #Liquid response factor file path
154
+ LRF_path = qtsb.findRecentFile(RF_file_names['Liquid FID'],'.xlsx',directories['rf'])
155
+ #FID gas response factor file path
156
+ FIDRF_path = qtsb.findRecentFile(RF_file_names['Gas FID'],'.csv',directories['rf'])
157
+ #TCD gas response factor file path
158
+ TCDRF_path = qtsb.findRecentFile(RF_file_names['TCD'],'.csv',directories['rf'])
159
+ #TCD gas internal standard response factor file path
160
+ TCDRF_IS_path = qtsb.findRecentFile(RF_file_names['TCD IS'],'.csv',directories['rf'])
161
+
162
+ """ DATA IMPORTS """
163
+
164
+ print("[quantMain] Importing data...")
165
+ #Import sample information from json file
166
+ with open(os.path.join(directories['data'],sname,sname+'_INFO.json')) as sinfo_f:
167
+ sinfo = json.load(sinfo_f)
168
+
169
+ #Change ISO date-time strings into datetime objects
170
+ sinfo['Start Time'] = datetime.fromisoformat(sinfo['Start Time'])
171
+ sinfo['End Time'] = datetime.fromisoformat(sinfo['End Time'])
172
+
173
+ #Calculate a reaction time using the start, end, and heat time values and add to sinfo
174
+ sinfo['Reaction Time'] = abs(sinfo['End Time']-sinfo['Start Time']).total_seconds()/3600 - sinfo['Heat Time']
175
+
176
+ #Get the reactor conditions
177
+ #Quench pressure, psig
178
+ P_f = sinfo['Quench Pressure (psi)']
179
+ V_R = sinfo['Reactor Volume (mL)']
180
+
181
+ #Use sample name to form file names using file_suffix and append full pathnames for all entries
182
+ for key in file_suffix:
183
+ file_suffix[key] = [file_suffix[key][0],os.path.join(directories['raw'],sname+file_suffix[key][0])]
184
+
185
+ #If the run liquid analysis Boolean is True..
186
+ if lgTF[0]:
187
+ #DEFINE DIRECTORIES FOR LIQUID FID QUANTIFICATION
188
+ #Define directory for liquid matched MS and FID peaks
189
+ DIR_LQ1_FIDpMS = file_suffix['Liquid FID+MS'][1]
190
+
191
+ #Read matched peak data between liquid FID and MS
192
+ LQ1_FIDpMS = pd.read_csv(DIR_LQ1_FIDpMS)
193
+
194
+ #Filter FIDpMS to only include rows with non-NaN compounds
195
+ LQ1_FIDpMS_Filtered = LQ1_FIDpMS[LQ1_FIDpMS['Compound Name'].notnull()].reset_index(drop=True)
196
+
197
+ #Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
198
+ LQ_FID_BreakdownDF = LQ1_FIDpMS_Filtered.copy()
199
+
200
+ #Read liquid response factors data
201
+ LQRF = {i:pd.read_excel(LRF_path,sheet_name=i) for i in CL_Dict.keys()}
202
+
203
+ print("[quantMain] Analyzing liquids...")
204
+ #Get liquid FID breakdown and miscellaneous dataframes
205
+ LQ_FID_BreakdownDF, LQCT_DF, LQCN_DF, LQCTCN_DF, LQmass_DF = qtsb.liquidFID(LQ_FID_BreakdownDF, LQRF, [CL_Dict, CT_Dict], sinfo)
206
+
207
+ #Insert the carbon number column to LQCTCN_DF
208
+ LQCTCN_DF = qtsb.insertCN(LQCTCN_DF)
209
+
210
+ else:
211
+ pass
212
+
213
+ #If the run gas analysis Boolean is True..
214
+ if lgTF[1]:
215
+
216
+ #DEFINE DIRECTORIES FOR GAS TCD AND FID QUANTIFICATION
217
+
218
+ #Read gas FID and TCD Peak data
219
+ GS2_TCD = pd.read_csv(file_suffix['Gas TCD+FID'][1])
220
+
221
+ #Create a duplicate of the gas TCD/FID dataframe for future saving as a breakdown
222
+ #Also filter breakdown dataframe to only include rows sourced from TCD
223
+ GS_TCD_BreakdownDF = GS2_TCD.loc[GS2_TCD['Signal Name'] == 'TCD2B'].copy()
224
+
225
+ #Read matched peak data between gas FID and MS
226
+ GS2_FIDpMS = pd.read_csv(file_suffix['Gas FID+MS'][1])
227
+
228
+ #Create a duplicate of the FIDpMS dataframe for future saving as a breakdown
229
+ GS_FID_BreakdownDF = GS2_FIDpMS.copy()
230
+
231
+ #Read gas TCD response factors data
232
+ TCDRF = pd.read_csv(TCDRF_path)
233
+ #Read gas TCD IS response factors data
234
+ TCDRF_IS = pd.read_csv(TCDRF_IS_path)
235
+ #Read gas FID response factors data
236
+ GSRF = pd.read_csv(FIDRF_path)
237
+
238
+ print("[quantMain] Analyzing gases...")
239
+
240
+ #If the model to be used is Volume Estimation...
241
+ if quantmodel == 'C':
242
+ #Get gas TCD breakdown and miscellaneous dataframes
243
+ GS_TCD_BreakdownDF, V_TC = qtsb.gasTCD_VE(GS_TCD_BreakdownDF,TCDRF,[gasBag_temp,gasBag_pressure,sinfo['Injected CO2 (mL)']],\
244
+ peak_error)
245
+
246
+ #Otherwise if the model to be used is Scale Factor...
247
+ elif quantmodel == 'S':
248
+ #Get reactor conditions
249
+ reactor_cond = [P_f, V_R, P_0]
250
+ #Get gas TCD breakdown and miscellaneous dataframes
251
+ GS_TCD_BreakdownDF, V_TC, SF = qtsb.gasTCD_SF(GS_TCD_BreakdownDF,TCDRF,[gasBag_temp,gasBag_pressure,sinfo['Injected CO2 (mL)']],\
252
+ reactor_cond,peak_error)
253
+
254
+ #Otherwise if the model to be used is Internal Standard...
255
+ elif quantmodel == 'I':
256
+ #Get reactor conditions
257
+ reactor_cond = [P_f, V_R, P_0]
258
+ #Get gas TCD breakdown and miscellaneous dataframes
259
+ GS_TCD_BreakdownDF, V_TC = qtsb.gasTCD_IS(GS_TCD_BreakdownDF,TCDRF_IS,[gasBag_temp,gasBag_pressure,sinfo['Injected CO2 (mL)']],\
260
+ reactor_cond,peak_error)
261
+
262
+ #Get gas FID breakdown and miscellaneous dataframes
263
+ GS_FID_BreakdownDF, GSCT_DF, GSCN_DF, GSCTCN_DF, GSmass_DF = qtsb.gasFID_ES(GS_FID_BreakdownDF,GSRF,\
264
+ [CL_Dict, CT_Dict],\
265
+ [gasBag_temp,gasBag_pressure],\
266
+ V_TC)
267
+
268
+ #Insert the carbon number column to GSCTCN_DF
269
+ GSCTCN_DF = qtsb.insertCN(GSCTCN_DF)
270
+
271
+ else:
272
+ pass
273
+
274
+ #If both the gas and liquid analysis Booleans are True..
275
+ if lgTF[0] and lgTF[1]:
276
+ print("[quantMain] Totaling contributions from liquid and gas phases...")
277
+ #Get maximum carbon number between breakdown dataframes
278
+ CN_max = max([int(GS_FID_BreakdownDF['Carbon Number'].max()),int(LQ_FID_BreakdownDF['Carbon Number'].max())])
279
+
280
+ #Sum the liquid and gas breakdown carbon number and compound type dataframes
281
+ #Initiate an empty CTCN dataframe
282
+ total_CTCN_DF = pd.DataFrame({'Aromatics': pd.Series(np.empty(CN_max),index=range(CN_max)),
283
+ 'Linear Alkanes': pd.Series(np.empty(CN_max),index=range(CN_max)),
284
+ 'Branched Alkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
285
+ 'Cycloalkanes':pd.Series(np.empty(CN_max),index=range(CN_max)),
286
+ 'Alkenes/Alkynes':pd.Series(np.empty(CN_max),index=range(CN_max)),
287
+ 'Other':pd.Series(np.empty(CN_max),index=range(CN_max))})
288
+
289
+
290
+ #For every row in this sum dataframe...
291
+ for i, row in total_CTCN_DF.iterrows():
292
+ #For every entry in this row...
293
+ for j, value in row.items():
294
+ #If the current index is below the carbon number limit of both the gas and liquid dataframes...
295
+ if i <= len(LQCTCN_DF.index)-1 and i <= len(GSCTCN_DF.index)-1:
296
+ total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j] + GSCTCN_DF.at[i,j]
297
+ #Otherwise, if the current index is below the carbon number limit of only the liquid dataframe...
298
+ elif i <= len(LQCTCN_DF.index)-1:
299
+ total_CTCN_DF.at[i,j] = LQCTCN_DF.at[i,j]
300
+ #Otherwise, if the current index is below the carbon number limit of only the gas dataframe...
301
+ elif i <= len(GSCTCN_DF.index)-1:
302
+ total_CTCN_DF.at[i,j] = GSCTCN_DF.at[i,j]
303
+ #Otherwise, pass
304
+ else:
305
+ pass
306
+
307
+ #Add the TCD data afterwards
308
+ #Filter the TCD breakdown dataframe to only include entries with non-nan formulas
309
+ GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF[GS_TCD_BreakdownDF['Formula'].notnull()]
310
+ #Filter the TCD breakdown dataframe to only include formulas with carbon in them
311
+ GS_TCD_BreakdownDF_filter = GS_TCD_BreakdownDF_filter[(GS_TCD_BreakdownDF_filter['Formula'].str.contains('C')) & (GS_TCD_BreakdownDF_filter['Formula'].str.contains('H'))]
312
+
313
+ #For every row in this filtered TCD dataframe
314
+ for i, row in GS_TCD_BreakdownDF_filter.iterrows():
315
+ #Get a chemical formula dictionary for the row's formula
316
+ chemFormDict = ChemFormula(row['Formula']).element
317
+ #If the carbon number is less than four...
318
+ if chemFormDict['C'] < 4:
319
+ #Assign the mass value to the linear entry for the given carbon number in the total dataframe
320
+ total_CTCN_DF.at[chemFormDict['C']-1,'Linear Alkanes'] = row['Mass (mg)']
321
+ #Otherwise, if the compound is isobutane...
322
+ elif row['Compound Name'] == 'Isobutane':
323
+ #Add the mass value to the branched entry for carbon number 4 in the total dataframe
324
+ total_CTCN_DF.at[3,'Branched Alkanes'] = row['Mass (mg)']
325
+ #Otherwise, if the compound is butane...
326
+ elif row['Compound Name'] == 'n-Butane':
327
+ #Add the mass value to the linear entry for carbon number 4 in the total dataframe
328
+ total_CTCN_DF.at[3,'Linear Alkanes'] = row['Mass (mg)']
329
+ #Otherwise, pass
330
+ else:
331
+ pass
332
+
333
+ #Insert the carbon number column to total_CTCN_DF
334
+ total_CTCN_DF = qtsb.insertCN(total_CTCN_DF)
335
+
336
+ #Otherwise, pass
337
+ else:
338
+ pass
339
+
340
+ """ BREAKDOWN SAVING """
341
+ print("[quantMain] Formatting and saving breakdown file...")
342
+ #If breakdown directory does not exist within sample folder, create it
343
+ if not os.path.exists(directories['break']):
344
+ os.makedirs(directories['break'])
345
+
346
+ #Get current datetime string
347
+ nows = datetime.now().strftime('%Y%m%d')
348
+
349
+ #Define breakdown file name
350
+ bfn = sname+"_Breakdown_"+nows+".xlsx"
351
+
352
+ #Create pandas Excel writers
353
+ writer = pd.ExcelWriter(hd.fileCheck(os.path.join(directories['break'],bfn)), engine="xlsxwriter")
354
+
355
+ #Get dataframe for sample info
356
+ sinfo_DF = pd.DataFrame(sinfo,index=[0])
357
+
358
+ #If the run liquid analysis Boolean is True..
359
+ if lgTF[0]:
360
+ #Position the liquid FID dataframes in the worksheet.
361
+ sinfo_DF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=1, index=False)
362
+ LQ_FID_BreakdownDF.to_excel(writer, sheet_name="Liquid FID",startcol=1, startrow=4, index=False)
363
+ LQCT_DF.to_excel(writer, sheet_name="Liquid FID",startcol=16, startrow=7, index=False)
364
+ LQCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=16, startrow=15, index=False)
365
+ LQmass_DF.to_excel(writer, sheet_name="Liquid FID",startcol=22, startrow=4,index=False)
366
+ LQCTCN_DF.to_excel(writer, sheet_name="Liquid FID", startcol=20, startrow=10, index=False)
367
+ else:
368
+ pass
369
+
370
+ #If the run gas analysis Boolean is True..
371
+ if lgTF[1]:
372
+
373
+ #Expand sample info dataframe to include total TCD mass and gas bag volume
374
+ sinfo_DF.at[0,'Total product (mg)'] = GS_TCD_BreakdownDF['Mass (mg)'].sum()
375
+ sinfo_DF.at[0,'Gas bag volume with CO2 (mL)'] = V_TC
376
+
377
+ #If the Scale Factor method was used...
378
+ if quantmodel == 'S':
379
+ #Expand sample info dataframe to include scale factor
380
+ sinfo_DF.at[0,'Scale Factor'] = SF
381
+ #Otherwise, pass
382
+ else:
383
+ pass
384
+
385
+ #Position the gas FID dataframes in the worksheet.
386
+ sinfo_DF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=1, index=False)
387
+ GS_FID_BreakdownDF.to_excel(writer, sheet_name="Gas FID",startcol=1, startrow=4, index=False)
388
+ GSCT_DF.to_excel(writer, sheet_name="Gas FID",startcol=18, startrow=7, index=False)
389
+ GSCN_DF.to_excel(writer, sheet_name="Gas FID", startcol=18, startrow=15, index=False)
390
+ GSmass_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=4,index=False)
391
+ GSCTCN_DF.to_excel(writer, sheet_name="Gas FID",startcol=22, startrow=10,index=False)
392
+
393
+ #Position the gas TCD dataframes in the worksheet
394
+ GS_TCD_BreakdownDF.to_excel(writer, sheet_name="Gas TCD",startcol=1,startrow=4, index=False)
395
+ sinfo_DF.to_excel(writer, sheet_name="Gas TCD",startcol=1, startrow=1, index=False)
396
+
397
+ else:
398
+ pass
399
+
400
+ #If both the gas and liquid analysis Booleans are True..
401
+ if lgTF[0] and lgTF[1]:
402
+ #Position the total product dataframe in the worksheet
403
+ total_CTCN_DF.to_excel(writer, sheet_name = "Total",startcol=1, startrow=1,index=False)
404
+
405
+ #Close the Excel writer
406
+ writer.close()
407
+
408
+ print("[quantMain] Quantification complete.")
409
+
410
+ #Print computation time
411
+ compTime = datetime.now().timestamp()*1000 - now.timestamp()*1000
412
+ print("[quantMain] Time taken: {:.3f} ms".format(compTime))
413
+
414
+ #Close main function by returning
415
+ return None
416
+
417
+ #mainQuant(sname,quantphases)
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ ChromaQuant package initialization
6
+
7
+ Julia Hancock
8
+ Created 12-05-2024
9
+
10
+ """
11
+
12
+ from .uappMain import mainUAPP