chromaquant 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. chromaquant/__init__.py +9 -2
  2. chromaquant/data/__init__.py +14 -0
  3. chromaquant/data/breakdown.py +430 -0
  4. chromaquant/data/dataset.py +195 -0
  5. chromaquant/data/table.py +412 -0
  6. chromaquant/data/value.py +215 -0
  7. chromaquant/formula/__init__.py +13 -0
  8. chromaquant/formula/base_formulas.py +168 -0
  9. chromaquant/formula/formula.py +507 -0
  10. chromaquant/import_local_packages.py +55 -0
  11. chromaquant/logging_and_handling.py +76 -0
  12. chromaquant/match/__init__.py +13 -0
  13. chromaquant/match/match.py +184 -0
  14. chromaquant/match/match_config.py +296 -0
  15. chromaquant/match/match_tools.py +154 -0
  16. chromaquant/{Quant → results}/__init__.py +2 -2
  17. chromaquant/results/reporting_tools.py +190 -0
  18. chromaquant/results/results.py +250 -0
  19. chromaquant/utils/__init__.py +14 -0
  20. chromaquant/utils/categories.py +127 -0
  21. chromaquant/utils/chemical_formulas.py +104 -0
  22. chromaquant/utils/dataframe_processing.py +222 -0
  23. chromaquant/utils/file_tools.py +100 -0
  24. chromaquant/utils/formula_tools.py +119 -0
  25. chromaquant-0.5.0.dist-info/METADATA +61 -0
  26. chromaquant-0.5.0.dist-info/RECORD +29 -0
  27. {chromaquant-0.4.0.dist-info → chromaquant-0.5.0.dist-info}/WHEEL +1 -1
  28. {chromaquant-0.4.0.dist-info → chromaquant-0.5.0.dist-info}/licenses/LICENSE.txt +1 -1
  29. chromaquant-0.5.0.dist-info/licenses/LICENSES_bundled.txt +251 -0
  30. chromaquant/Handle/__init__.py +0 -13
  31. chromaquant/Handle/fileChecks.py +0 -172
  32. chromaquant/Handle/handleDirectories.py +0 -89
  33. chromaquant/Hydro/__init__.py +0 -12
  34. chromaquant/Hydro/hydroMain.py +0 -496
  35. chromaquant/Manual/HydroUI.py +0 -418
  36. chromaquant/Manual/QuantUPP.py +0 -373
  37. chromaquant/Manual/Quantification.py +0 -1305
  38. chromaquant/Manual/__init__.py +0 -10
  39. chromaquant/Manual/duplicateMatch.py +0 -211
  40. chromaquant/Manual/fpm_match.py +0 -798
  41. chromaquant/Manual/label-type.py +0 -179
  42. chromaquant/Match/AutoFpmMatch.py +0 -1133
  43. chromaquant/Match/MatchSub/__init__.py +0 -13
  44. chromaquant/Match/MatchSub/matchTools.py +0 -282
  45. chromaquant/Match/MatchSub/peakTools.py +0 -259
  46. chromaquant/Match/__init__.py +0 -13
  47. chromaquant/Match/matchMain.py +0 -233
  48. chromaquant/Quant/AutoQuantification.py +0 -1329
  49. chromaquant/Quant/QuantSub/__init__.py +0 -15
  50. chromaquant/Quant/QuantSub/gasFID.py +0 -241
  51. chromaquant/Quant/QuantSub/gasTCD.py +0 -425
  52. chromaquant/Quant/QuantSub/liquidFID.py +0 -310
  53. chromaquant/Quant/QuantSub/parseTools.py +0 -162
  54. chromaquant/Quant/quantMain.py +0 -417
  55. chromaquant/UAPP/__init__.py +0 -12
  56. chromaquant/UAPP/uappMain.py +0 -427
  57. chromaquant/__main__.py +0 -526
  58. chromaquant/oldui.py +0 -492
  59. chromaquant/properties.json +0 -4
  60. chromaquant-0.4.0.dist-info/METADATA +0 -189
  61. chromaquant-0.4.0.dist-info/RECORD +0 -38
  62. chromaquant-0.4.0.dist-info/entry_points.txt +0 -2
  63. chromaquant-0.4.0.dist-info/licenses/LICENSES_bundled.txt +0 -1035
@@ -1,1133 +0,0 @@
1
- """
2
-
3
- COPYRIGHT STATEMENT:
4
-
5
- ChromaQuant – A quantification software for complex gas chromatographic data
6
-
7
- Copyright (c) 2024, by Julia Hancock
8
- Affiliation: Dr. Julie Elaine Rorrer
9
- URL: https://www.rorrerlab.com/
10
-
11
- License: BSD 3-Clause License
12
-
13
- ---
14
-
15
- SCRIPT WHICH MATCHES FID AND MS PEAKS
16
-
17
- Julia Hancock
18
- Started 12/29/2023
19
-
20
- """
21
- """ PACKAGES """
22
- import sys
23
- import pandas as pd
24
- import os
25
- from molmass import Formula
26
- import math
27
- import numpy as np
28
- from chemformula import ChemFormula
29
- import json
30
- from datetime import datetime
31
- import logging
32
- import scipy
33
-
34
- """ FID AND MS MATCHING MAIN FUNCTION"""
35
- def main_AutoFpmMatch(sname,sphase,splab_TF,model,directories):
36
-
37
- """ DIRECTORIES """
38
- print("[AutoFpmMatch] Finding directories...")
39
-
40
- #Unpack directories from passed variable
41
- #Primary files directory
42
- files = directories['files']
43
- #Resources directory
44
- RE_Dir = directories['resources']
45
- #Theme directory
46
- theme_Dir = directories['theme']
47
- #Response factor directory
48
- RF_Dir = directories['rf']
49
- #Data directory
50
- DF_Dir = directories['data']
51
- #Images directory
52
- img_Dir = directories['images']
53
- #Data file log directory
54
- DFlog_Dir = os.path.join(DF_Dir,sname,'log')
55
- #Data file breakdowns directory
56
- DFbreak_Dir = os.path.join(DF_Dir,sname,'breakdowns')
57
- #Raw data file directory
58
- Raw_Dir = os.path.join(DF_Dir,sname,'raw data')
59
-
60
- #Dictionary of substrings to add to sample name to create file names
61
- sub_Dict = {'Gas TCD+FID':['_GS2_TCD_CSO.csv'],
62
- 'Gas Labelled MS Peaks':['_GS1_UA_Comp_UPP.csv'],
63
- 'Gas FID+MS':['_GS2_FIDpMS.csv'],
64
- 'Liquid FID':['_LQ1_FID_CSO.csv'],
65
- 'Liquid Labelled MS Peaks':['_LQ1_UA_Comp_UPP.csv'],
66
- 'Liquid FID+MS':['_LQ1_FIDpMS.csv'],
67
- 'Info':['_INFO.json']}
68
-
69
- """ PARAMETERS """
70
- print("[AutoFpmMatch] Defining parameters...")
71
- #Default third order fit arguments for gas FID and MS peak matching
72
- #a (x^3)
73
- a_tof = 0.0252
74
- #b (x^2)
75
- b_tof = -0.5274
76
- #c (x)
77
- c_tof = 4.8067
78
- #d
79
- d_tof = -3.0243
80
- #Combine into a list
81
- fit_const = [a_tof,b_tof,c_tof,d_tof]
82
-
83
- """ PROCESSING SYSTEM ARGUMENTS """
84
- print("[AutoFpmMatch] Processing system arguments...")
85
- #Specify the allowable error for linear, third order, and speculative peak matching
86
- peakError = 0.06
87
-
88
- #Specify the allowable error for direct FID-MS RT matching
89
- peakErrorRT = 0.05
90
-
91
- #Specify the restrictions and preferences to be implemented in speculative labeling
92
- #The first list contains properties which must match in order for something to be labelled
93
- #The second dictionary contains properties which are preferred in deciding between multiple matches
94
- #The dictionary should have key:value pairs of the form "kc_rsc":"allowable error between speculative entry and sample value"
95
- #The preferences listed are applied in order such that the first preference is more valued than the last
96
- restrictList = [['Gas'],{'Reaction Temperature (C)':5}]
97
-
98
- """ COMPOUND TYPE ASSIGNMENT VARIABLES """
99
- print("[AutoFpmMatch] Defining compound type variables...")
100
- #This dictionary contain lists of substrings to be checked against compound name strings to
101
- #assign a compound type
102
-
103
- #Six compound types exist: linear alkanes (L), branched alkanes (B), aromatics (A), cycloalkanes (C),
104
- #alkenes/alkynes (E), and other (O)
105
-
106
- #Each compound type abbreviation will have an entry in the dictionary corresponding to a list of
107
- #substrings to be checked against a compound name string
108
-
109
- contains = {
110
- "L":["methane","ethane","propane","butane","pentane","hexane","heptane","octane","nonane",
111
- "decane","undecane","hendecane","dodecane","tridecane","tetradecane","pentadecane","hexadecane","heptadecane","octadecane","nonadecane",
112
- "icosane","eicosane","heneicosane","henicosane","docosane","tricosane","tetracosane","pentacosane","hexacosane","cerane","heptacosane","octacosane","nonacosane",
113
- "triacontane","hentriacontane","untriacontane","dotriacontane","dicetyl","tritriacontane","tetratriacontane","pentatriacontane","hexatriacontane","heptatriacontane","octatriacontane","nonatriacontane",
114
- "tetracontane","hentetracontane","dotetracontane","tritetracontane","tetratetracontane","pentatetracontane","hexatetracontane","heptatetracontane","octatetracontane","nonatetracontane","pentacontane"],
115
-
116
- "B":["iso","neo","methyl","ethyl","propyl","butyl","pentyl","hexyl","heptyl","octyl","nonyl",
117
- "decyl","undecyl","dodecyl","tridecyl","tetradecyl","pentadecyl","hexadecyl","heptadecyl","octadecyl","nonadecyl",
118
- "icosyl","eicosyl","heneicosyl","henicosyl","docosyl","tricosyl","tetracosyl","pentacosyl","hexacosyl","heptacosyl","octacosyl","nonacosyl",
119
- "triacontyl","hentriacontyl","untriacontyl","dotriacontyl","tritriacontyl","tetratriacontyl","pentatriacontyl","hexatriacontyl","heptatriacontyl","octatriacontyl","nonatriacontyl",
120
- "tetracontyl","hentetracontyl","dotetracontyl","tritetracontyl","tetratetracontyl","pentatetracontyl","hexatetracontyl","heptatetracontyl","octatetracontyl","nonatetracontyl","pentacontyl"],
121
-
122
- "A":["benzyl","benzo","phenyl","benzene","toluene","xylene","mesitylene","durene","naphthalene","fluorene","anthracene","phenanthrene","phenalene",
123
- "tetracene","chrysene","triphenylene","pyrene","pentacene","perylene","corannulene","coronene","ovalene","indan","indene","tetralin","decahydronaphthalene","decalin"],
124
-
125
- "C":["cyclo","menthane"],
126
-
127
- "E":["ene","yne"],
128
-
129
- "O":[]}
130
-
131
- #Tuple of contains keys in order of priority
132
- keyLoop = ('A','C','E','B','L')
133
-
134
- #Tuple of elements to be excluded and automatically labelled as 'O'
135
- elementExclude = ('He','Li','Be','B','N','O','F','Ne','Na','Mg','Al','Si','P',\
136
- 'S','Cl','Ar','K','Ca','Sc','Ti','V','Cr','Mn','Fe','Co',\
137
- 'Ni','Cu','Zn')
138
-
139
- """ LOGGING """
140
- print("[AutoFpmMatch] Initializing logging [WIP]...")
141
- #Get current datetime
142
- now = datetime.now()
143
- #Get current datetime string
144
- nows = now.strftime('%Y%m%d')
145
-
146
- #If log directory does not exist within sample folder, create it
147
- if not os.path.exists(DFlog_Dir):
148
- os.makedirs(DFlog_Dir)
149
-
150
- #Instantiate a logger
151
- logger = logging.getLogger(__name__)
152
- #Initialize logging file using current datetime
153
- fh = logging.FileHandler(os.path.join(DFlog_Dir,'quantlog_'+nows+'.log'))
154
- logger.addHandler(fh)
155
- #Set logging level
156
- logger.setLevel(logging.INFO)
157
- #Create a formatter and assign to logger
158
- formatter = logging.Formatter('[%(filename)s] %(asctime)s - [%(levelname)s]: %(message)s')
159
- fh.setFormatter(formatter)
160
-
161
- """ FUNCTIONS """
162
- print("[AutoFpmMatch] Defining functions...")
163
- #Function for selecting FID peak, MS peak, and FIDpMS pathnames according to sample name and phase
164
- def fileNamer(sname,sphase,sub_Dict,pathData):
165
- """
166
- Parameters
167
- ----------
168
- sname : STR
169
- The name of the sample.
170
- sphase : STR
171
- A string that describes whether sample is gas ("G") or liquid ("L").
172
- sub_Dict : Dict
173
- A dictionary of substrings to add to sample name to create file names
174
- pathData : STR
175
- A string containing the pathname to the datafiles directory
176
-
177
- Returns
178
- -------
179
- paths : List
180
- A list of pathnames to return.
181
-
182
- """
183
- #If sample phase is liquid, set pathnames accordingly
184
- if sphase == "L":
185
- pathFID = os.path.join(pathData,sname+sub_Dict['Liquid FID'][0])
186
- pathMS = os.path.join(pathData,sname+sub_Dict['Liquid Labelled MS Peaks'][0])
187
- pathFIDpMS = os.path.join(pathData,sname+sub_Dict['Liquid FID+MS'][0])
188
-
189
- #Else if sample phase is gas, set pathnames accordingly
190
- elif sphase == "G":
191
- pathFID = os.path.join(pathData,sname+sub_Dict['Gas TCD+FID'][0])
192
- pathMS = os.path.join(pathData,sname+sub_Dict['Gas Labelled MS Peaks'][0])
193
- pathFIDpMS = os.path.join(pathData,sname+sub_Dict['Gas FID+MS'][0])
194
-
195
- #Otherwise, set all paths to None
196
- else:
197
- pathFID = None
198
- pathMS = None
199
- pathFIDpMS = None
200
-
201
- paths = [pathFID,pathMS,pathFIDpMS]
202
-
203
- return paths
204
-
205
- #Function for checking if FIDpMS file exists – creates it if necessary and imports/returns the data
206
- def checkFile(fpmDir,fDir):
207
- """
208
- Parameters
209
- ----------
210
- fpmDir : STR
211
- A string containing the pathname of the FIDpMS file in question.
212
- fDir : STR
213
- A string containing the pathname of the FID file of the same sample/phase as the FIDpMS file.
214
-
215
- Returns
216
- -------
217
- fpmDF : DataFrame
218
- A DataFrame containing the contents of the FIDpMS file.
219
- exists : BOOL
220
- A boolean describing whether or not the relevant file exists and has manually added peaks.
221
-
222
- """
223
- #If FIDpMS file does not exist in data file directory, create it and return False
224
- if not os.path.exists(fpmDir):
225
- #Log that file wasn't found and a new one is being created
226
- logger.info('FIDpMS file not found for sample and phase, creating new...')
227
- #Read FID dataframe
228
- fDF = pd.read_csv(fDir)
229
- #Filter FID dataframe to only include FID rows, as gas samples may have TCD rows, and set to fpmDF
230
- fpmDF = fDF.loc[fDF['Signal Name'] == 'FID1A'].copy()
231
- #Rename FID RT and FID Area columns, as well as rename the Height column to MS RT
232
- fpmDF = fpmDF.rename(columns={'RT':'FID RT','Area':'FID Area','Height':'MS RT'})
233
- #Clear the contents of the MS RT column
234
- fpmDF['MS RT'] = np.nan
235
- #Create list of new columns to create
236
- lnc = ['Formula','Match Factor','Compound Source','Compound Type Abbreviation']
237
-
238
- #Loop through lnc, adding nan columns for each entry
239
- for i in lnc:
240
- fpmDF[i] = np.nan
241
-
242
- #Remove the Injection Data File Name and Signal Name columns
243
- fpmDF = fpmDF.drop(['Injection Data File Name','Signal Name'],axis=1).copy()
244
- #Save fpmDF to provided pathname
245
- fpmDF.to_csv(fpmDir, index=False)
246
-
247
- return fpmDF, False
248
-
249
- #Otherwise..
250
- else:
251
- fpmDF = pd.read_csv(fpmDir)
252
- #If the FIDpMS exists and there exist any peaks..
253
- if fpmDF['Compound Name'].any():
254
- #Define a new dataframe which includes all rows with labelled peaks
255
- fpmDF_labelled = fpmDF.loc[~fpmDF['Compound Name'].isna()]['Compound Source']
256
- #If those peaks are manually assigned or have a blank source, return the dataframe and True
257
- if 'Manual' in fpmDF_labelled.values.tolist() or pd.isna(fpmDF_labelled.values).any():
258
- #Create a log entry
259
- logger.info('FIDpMS file exists and contains manual and/or blank sourced entries')
260
- return fpmDF, True
261
- #Otherwise, if there exist no manually assigned peaks or labelled peaks with a blank source, return False
262
- else:
263
- #Create a log entry
264
- logger.info('FIDpMS file exists but does not contains manual or blank sourced entries')
265
- return fpmDF, False
266
-
267
- #If the FIDpMS file exists but has no peaks, return False
268
- else:
269
- #Create a log entry
270
- logger.info('FIDpMS file exists but contains no labelled peaks')
271
- return fpmDF, False
272
-
273
- #Function describing a third order fit for gas analysis
274
- def defaultGas(FIDRT,fpmDF,fit_const=[a_tof,b_tof,c_tof,d_tof]):
275
- """
276
- A function used to describe the default fit for gas analysis peak matching
277
-
278
- Parameters
279
- ----------
280
- FIDRT : Float
281
- A float describing the FID retention time requiring a corresponding MS retention time.
282
- fpmDF : DataFrame
283
- A dataframe containing FID and MS peak info.
284
- fit_const : List, optional
285
- A list of floats describing a third order fit. The default is [a_tof,b_tof,c_tof,d_tof].
286
-
287
- Returns
288
- -------
289
- MSRT : Float
290
- A float describing the calculated MS RT using the third order fit and the FID RT
291
- """
292
-
293
- MSRT = fit_const[0]*FIDRT**3+fit_const[1]*FIDRT**2+fit_const[2]*FIDRT+fit_const[3]
294
-
295
- return MSRT
296
-
297
- #TODO: Function for creating a third order fit using manually matched peaks
298
-
299
- #Function for creating a linear fit using manually matched peaks
300
- def RTlinfit(fpmDF):
301
-
302
- #Get a new dataframe containing only rows with labelled peaks
303
- fpmDF_lab = fpmDF.loc[~fpmDF['Compound Name'].isna()]
304
-
305
- #Get a new dataframe containing only rows with manual/blank peaks
306
- fpmDF_mb = fpmDF_lab.loc[(fpmDF_lab['Compound Source']=='Manual') | (fpmDF_lab['Compound Source'].isna())]
307
-
308
- #If dataframe contains any rows with 'Manual' as a source..
309
- if 'Manual' in fpmDF_lab['Compound Source'].tolist():
310
- #If dataframe also contains rows with 'nan' as a source..
311
- if pd.isna(fpmDF_lab['Compound Source'].values).any():
312
-
313
- #Manual and blank counts are appropriately assigned
314
- manual_count = fpmDF_lab['Compound Source'].value_counts()['Manual']
315
- blank_count = fpmDF_lab['Compound Source'].isna().sum()
316
- #All count will include both manual and blank entries
317
- all_count = manual_count + blank_count
318
-
319
- #Otherwise, all count will only include manual entries
320
- else:
321
- manual_count = fpmDF_lab['Compound Source'].value_counts()['Manual']
322
- blank_count = 0
323
- all_count = manual_count
324
-
325
- #Else if dataframe contains anyrows with 'nan' as a source..
326
- elif pd.isna(fpmDF_lab['Compound Source'].values).any():
327
- #All count will include only blank entries
328
- manual_count = 0
329
- blank_count = fpmDF_lab['Compound Source'].isna().sum()
330
- all_count = fpmDF_lab['Compound Source'].isna().sum()
331
-
332
- #Otherwise, log that the provided dataframe has no manual or blank entries and return None or 0 for all returns
333
- else:
334
- logger.error('Linear fit function provided a dataframe without manual or blank entries')
335
- return None, [0,0,0]
336
-
337
- #If the blank count is larger than zero, log a warning stating that one or more entries contain a blank source
338
- if blank_count > 0:
339
- logger.warning("One or more labelled peaks in the FIDpMS file have no entry for Compound Source")
340
- #Otherwise, pass
341
- else:
342
- pass
343
-
344
- #Predefine variables for use in linear fitting
345
- peakDrift = 0 #Peak drift, the linear slope describing drift between FID and MS RT's
346
- peakOffset = 0 #Peak offset, the initial offset between FID and MS RT's
347
- peakDiff = 0 #Peak difference, the difference between a given FID and MS RT
348
- r2 = 0 #Coefficient of determination, the r^2 value of a linear fit
349
-
350
- #If all_count is equal to 1..
351
- if all_count == 1:
352
-
353
- #Set the peak offset to the peak difference for the single labelled peak
354
- peakDiff = fpmDF_mb['FID RT'].iloc[0] - fpmDF_mb['MS RT'].iloc[0]
355
- peakOffset = peakDiff
356
-
357
- else:
358
-
359
- #Loop through every labelled peak, calculating the peak difference
360
- for i, row in fpmDF_mb.iterrows():
361
- peakDiff = row['FID RT'] - row['MS RT']
362
- #Add this peak difference to a new column in the dataframe
363
- fpmDF_mb.at[i,'peakDiff'] = peakDiff
364
- #Get a linear fit for peak drift and peak offset using peak differences as y-values and FID RT's as x-values
365
- peakDrift, peakOffset, r_value, p_value, std_err = scipy.stats.linregress(fpmDF_mb['FID RT'],fpmDF_mb['peakDiff'])
366
- #Get a coefficient of determination
367
- r2 = r_value**2
368
- #Get a list of all peak counts
369
- counts = [all_count,manual_count,blank_count]
370
-
371
- return fpmDF_mb, [peakDrift,peakOffset,r2], counts
372
-
373
- #Function that estimates unknown MS RT's and matches FID and MS peaks using a provided linear fit
374
- def matchPeaksLinear(fpmDF,mDF,linfits,peakError=0.06):
375
- """
376
- Parameters
377
- ----------
378
- fpmDF : DataFrame
379
- Dataframe containing FID and MS peak info
380
- mDF : DataFrame
381
- Dataframe containing MS info about identified compounds (UA_UPP)
382
- linfits : List
383
- List containing info about a linear fit for estimated MS RT's in the form [m,b,r2]
384
- peakError : Float, optional
385
- Allowable error between estimated MS RT's and actual MS RT's. The default is 0.01.
386
-
387
- Returns
388
- -------
389
- fpmDF : DataFrame
390
- Dataframe containing FID and MS peak info
391
- """
392
-
393
- def matchOne(fpmDF,fpmiter,linfits,peakError):
394
- """
395
- Parameters
396
- ----------
397
- fpmDF : DataFrame
398
- Dataframe containing FID and MS peak info
399
- fpmiter : List
400
- List containing current index and row in fpmDF of interest in form [i,row]
401
- linfits : List
402
- List containing info about a linear fit for estimated MS RT's in the form [m,b,r2]
403
- peakError : float
404
- Allowable error between estimated MS RT's and actual MS RT's
405
-
406
- Returns
407
- -------
408
- fpmDF : DataFrame
409
- Dataframe containing FID and MS peak info
410
- """
411
-
412
- #Unpack fpmDF iterating info
413
- fpmi = int(fpmiter[0])
414
- fpmrow = fpmiter[1]
415
-
416
- #Estimate an MS RT for the row's FID RT using the linear fit
417
- est_MSRT = fpmrow['FID RT'] - (peakDrift*fpmrow['FID RT'] + peakOffset)
418
- #Compare the estimated MS RT to all real MS RT's, seeing if there is a match within error
419
- mDF_match = mDF.loc[(mDF['Component RT'] >= est_MSRT-peakError) & (mDF['Component RT'] <= est_MSRT+peakError)].copy()
420
- #If there is more than one match, select the entry with the smallest error
421
- if len(mDF_match) > 1:
422
- #Add an RT error to all mDF_match entries
423
- for i, row in mDF_match.iterrows():
424
- mDF_match.at[i,'RT Error'] = abs(fpmrow['FID RT']-est_MSRT)
425
-
426
- #Set mDF_match to the row with minimum RT Error
427
- mDF_match = mDF_match.nsmallest(1,'RT Error')
428
-
429
- #Reset the mDF_match index
430
- mDF_match = mDF_match.reset_index().copy()
431
-
432
- #If the length of mDF_match is greater than zero..
433
- if len(mDF_match) > 0:
434
-
435
- #Add the MS info to the FIDpMS dataframe
436
- fpmDF.at[fpmi,'MS RT'] = mDF_match.at[0,'Component RT']
437
- fpmDF.at[fpmi,'Compound Name'] = mDF_match.at[0,'Compound Name']
438
- fpmDF.at[fpmi,'Formula'] = mDF_match.at[0,'Formula']
439
- fpmDF.at[fpmi,'Match Factor'] = mDF_match.at[0,'Match Factor']
440
- fpmDF.at[fpmi,'Compound Source'] = 'Automatically assigned using a linear fit of manual peak assignments'
441
-
442
- #Otherwise, pass
443
- else:
444
- pass
445
-
446
- return fpmDF
447
-
448
- #Get peak drift and peak offset parameters from linfits, as well as coefficient of determination
449
- peakDrift = linfits[0]
450
- peakOffset = linfits[1]
451
- r2 = linfits[2]
452
-
453
- #Loop through every row in the dataframe
454
- for i, row in fpmDF.iterrows():
455
- #If the row's compound name is not blank
456
- if not pd.isna(row['Compound Name']):
457
- #If the row's compound source is either manual or blank, skip it
458
- if row['Compound Source'] == 'Manual' or pd.isna(row['Compound Source']):
459
- pass
460
- #Otherwise..
461
- else:
462
- #Match one FID peak
463
- fpmDF = matchOne(fpmDF, [i,row], linfits, peakError)
464
- #Otherwise, if the row's compound name is blank..
465
- else:
466
- #Match one FID peak
467
- fpmDF = matchOne(fpmDF, [i,row], linfits, peakError)
468
-
469
- return fpmDF
470
-
471
- #Function that estimates unknown MS RT's and matches FID and MS peaks using a provided third order fit
472
- def matchPeaksThird(fpmDF,mDF,fit_const,peakError=0.06):
473
- """
474
- Parameters
475
- ----------
476
- fpmDF : DataFrame
477
- Dataframe containing FID and MS peak info
478
- mDF : DataFrame
479
- Dataframe containing MS info about identified compounds (UA_UPP)
480
- fit_const : List
481
- A list of floats describing a third order fit.
482
- peakError : Float, optional
483
- Allowable error between estimated MS RT's and actual MS RT's. The default is 0.01.
484
-
485
- Returns
486
- -------
487
- fpmDF : DataFrame
488
- Dataframe containing FID and MS peak info
489
- """
490
-
491
- def matchOne(fpmDF,fpmiter,fit_const,peakError):
492
- """
493
- Parameters
494
- ----------
495
- fpmDF : DataFrame
496
- Dataframe containing FID and MS peak info
497
- fpmiter : List
498
- List containing current index and row in fpmDF of interest in form [i,row]
499
- fit_const : List
500
- A list of floats describing a third order fit.
501
- peakError : float
502
- Allowable error between estimated MS RT's and actual MS RT's
503
-
504
- Returns
505
- -------
506
- fpmDF : DataFrame
507
- Dataframe containing FID and MS peak info
508
- """
509
-
510
- #Unpack fpmDF iterating info
511
- fpmi = int(fpmiter[0])
512
- fpmrow = fpmiter[1]
513
-
514
- #Define x as fpmrow['FID RT] for convenience
515
- x = fpmrow['FID RT']
516
- #Estimate an MS RT for the row's FID RT using the third order fit
517
- est_MSRT = fit_const[0]*x**3 + fit_const[1]*x**2 + fit_const[2]*x + fit_const[3]
518
- #Compare the estimated MS RT to all real MS RT's, seeing if there is a match within error
519
- mDF_match = mDF.loc[(mDF['Component RT'] >= est_MSRT-peakError) & (mDF['Component RT'] <= est_MSRT+peakError)].copy()
520
- #If there is more than one match, select the entry with the smallest error
521
- if len(mDF_match) > 1:
522
- #Add an RT error to all mDF_match entries
523
- for i, row in mDF_match.iterrows():
524
- mDF_match.at[i,'RT Error'] = abs(mDF_match.at[i,'Component RT']-est_MSRT)
525
-
526
- #Set mDF_match to the row with minimum RT Error
527
- mDF_match = mDF_match.nsmallest(1,'RT Error')
528
-
529
- #Reset the mDF_match index
530
- mDF_match = mDF_match.reset_index().copy()
531
-
532
- #If the length of mDF_match is greater than zero..
533
- if len(mDF_match) > 0:
534
-
535
- #Add the MS info to the FIDpMS dataframe
536
- fpmDF.at[fpmi,'MS RT'] = mDF_match.at[0,'Component RT']
537
- fpmDF.at[fpmi,'Compound Name'] = mDF_match.at[0,'Compound Name']
538
- fpmDF.at[fpmi,'Formula'] = mDF_match.at[0,'Formula']
539
- fpmDF.at[fpmi,'Match Factor'] = mDF_match.at[0,'Match Factor']
540
- fpmDF.at[fpmi,'Compound Source'] = 'Automatically assigned using a predetermined third-order fit'
541
-
542
- #Otherwise, pass
543
- else:
544
- pass
545
-
546
- return fpmDF
547
-
548
- #Loop through every row in the dataframe
549
- for i, row in fpmDF.iterrows():
550
- #If the row's compound name is not blank
551
- if not pd.isna(row['Compound Name']):
552
- #If the row's compound source is either manual or a gasPeaks known peak match or blank, skip it
553
- if row['Compound Source'] == 'Manual' or row['Compound Source'] == 'Automatically assigned using gas pairs provided in resources' or pd.isna(row['Compound Source']):
554
- pass
555
- #Otherwise..
556
- else:
557
- #Match one FID peak
558
- fpmDF = matchOne(fpmDF, [i,row], fit_const, peakError)
559
- #Otherwise, if the row's compound name is blank..
560
- else:
561
- #Match one FID peak
562
- fpmDF = matchOne(fpmDF, [i,row], fit_const, peakError)
563
-
564
- return fpmDF
565
-
566
- #Function that performs a subset of speculative labeling, using known peaks hard-coded in a file gasPairs_FIDpMS.csv
567
- def matchKnownPeaks(fpmDF,mDF,gp_rsc):
568
- def matchOne(fpmDF,fpmiter,gp_rsc):
569
- """
570
- Parameters
571
- ----------
572
- fpmDF : DataFrame
573
- Dataframe containing FID and MS peak info
574
- fpmiter : List
575
- List containing current index and row in fpmDF of interest in form [i,row]
576
- gp_rsc : DataFrame
577
- Dataframe containing opened gasPairs resource.
578
- peakError : float
579
- Allowable error between estimated MS RT's and actual MS RT's
580
-
581
- Returns
582
- -------
583
- fpmDF : DataFrame
584
- Dataframe containing FID and MS peak info
585
- """
586
-
587
- #Unpack fpmDF iterating info
588
- fpmi = int(fpmiter[0])
589
- fpmrow = fpmiter[1]
590
-
591
- #Search the gasPairs resource to see if any known peaks/RT's match the FID peak list
592
- for i, row in gp_rsc.iterrows():
593
- #Set gp_match to empty string
594
- gp_match = pd.Series()
595
- #Define error as two times the standard deviation for the FID RT in the gasPeaks resource
596
- gp_error = row['Stdev FID RT']*2
597
- #Extract the FID RT from the resource
598
- gp_FIDRT = row['Average FID RT']
599
- #If the current fpmrow FID RT is within the error bounds of an entry in the resource, match it
600
- #NOTE: prefers the first match, even if the next match is closer. Most resourceRT's are more than
601
- #2*error away from each other
602
- if (fpmrow['FID RT'] >= gp_FIDRT - gp_error) and (fpmrow['FID RT'] <= gp_FIDRT + gp_error):
603
- gp_match = row
604
- break
605
- #Otherwise, pass
606
- else:
607
- pass
608
-
609
- #If gp_match is empty, pass
610
- if gp_match.empty:
611
- pass
612
- #Otherwise, add the match info
613
- else:
614
- #Add the resource match info to the FIDpMS dataframe
615
- fpmDF.at[fpmi,'Compound Name'] = gp_match['Species']
616
- fpmDF.at[fpmi,'Formula'] = gp_match['Formula']
617
- fpmDF.at[fpmi,'Compound Source'] = 'Automatically assigned using gas pairs provided in resources'
618
-
619
- return fpmDF
620
-
621
- #Loop through every row in the dataframe
622
- for i, row in fpmDF.iterrows():
623
- #If the row's compound name is not blank
624
- if not pd.isna(row['Compound Name']):
625
- #If the row's compound source is either manual or blank, skip it
626
- if row['Compound Source'] == 'Manual' or pd.isna(row['Compound Source']):
627
- pass
628
- #Otherwise..
629
- else:
630
- #Match one FID peak
631
- fpmDF = matchOne(fpmDF, [i,row], gp_rsc)
632
- #Otherwise, if the row's compound name is blank..
633
- else:
634
- #Match one FID peak
635
- fpmDF = matchOne(fpmDF, [i,row], gp_rsc)
636
-
637
- return fpmDF
638
-
639
- #Function that performs speculative labeling to label FID peaks which do not have a match
640
- def specLab(fpmDF,kc_rsc,sinfo,counts,peakError,restrictList):
641
-
642
- #Unpack restrictList
643
- trueRestrict, prefer = restrictList
644
- #Log that speculative labeling is being performed
645
- logger.info('Performing speculative labeling on {0} with {1} peaks, {2} of which are labelled: {3} sourced manually and {4} with an unknown source'.format(sinfo['Sample Name'],len(fpmDF),counts[0],counts[1],counts[2]))
646
-
647
- #Loop through every entry in fpmDF
648
- for i, row in fpmDF.iterrows():
649
- #Define a Boolean for use in determining whether to run the next if statement or not
650
- Bool_kc_check = True
651
-
652
- #If the compound name is blank or either form of "No Match"..
653
- if pd.isna(row['Compound Name']) or row['Compound Name'] == 'No Match' or row['Compound Name'] == 'No Match':
654
-
655
- #Get a copy of kc_rsc
656
- kc_check = kc_rsc.copy()
657
- #Find rows where the FID peak RT is within provided error
658
- kc_check = kc_check.loc[(kc_check['FID RT']>=row['FID RT']-peakError) & (kc_check['FID RT']<=row['FID RT']+peakError)]
659
- #Filter out rows that label the peak as No Match or No match
660
- kc_check = kc_check.loc[(kc_check['Compound Name']!='No Match') & (kc_check['Compound Name']!='No match')]
661
-
662
- #For every entry in trueRestrict, filter out rows where the entry property does not match
663
- for entry in trueRestrict:
664
- kc_check = kc_check.loc[kc_check[entry]==sinfo[entry]]
665
-
666
- #If kc_check has more than one row...
667
- if len(kc_check)>1:
668
-
669
- #Make a copy of kc_check
670
- kc_check_2 = kc_check.copy()
671
-
672
- #Loop through every entry in prefer
673
- for key in prefer:
674
-
675
- #Select rows in which the given entry property in prefer has a value within the provided range
676
- kc_check_2 = kc_check.loc[(kc_check[key]>=sinfo[key]-prefer[key])&(kc_check[key]<=sinfo[key]+prefer[key])]
677
- #If this results in a DataFrame with more than one entry, filter the original kc_check
678
- if len(kc_check_2)>1:
679
- kc_check = kc_check_2.copy()
680
- pass
681
- #If this results in a DataFrame with one entry, break the loop
682
- elif len(kc_check_2)==1:
683
- kc_check = kc_check_2.iloc[0].copy()
684
- #Define a Boolean for use in determining whether to run the next if statement or not
685
- Bool_kc_check = False
686
- break
687
- #If this results in a DataFrame with fewer than one entry (the only other possible option)..
688
- else:
689
- #Pass and do not apply this preference
690
- pass
691
-
692
- #If kc_check still has more than one row..
693
- if len(kc_check)>1 and Bool_kc_check:
694
- #Get the row with the highest match factor
695
- kc_check = kc_check.loc[kc_check['Match Factor'].idxmax()]
696
-
697
- #Otherwise, pass
698
- else:
699
- pass
700
-
701
- #Else if kc_check has only one row..
702
- elif len(kc_check)==1:
703
- #Convert the DataFrame into a Series
704
- kc_check = kc_check.iloc[0]
705
- #Otherwise, pass
706
- else:
707
- pass
708
-
709
- #If kc_check is not 0..
710
- if len(kc_check) > 0:
711
- #Add the new kc_check entry to fpmDF for the given row
712
- fpmDF.at[i,'Compound Name'] = kc_check['Compound Name']
713
- fpmDF.at[i,'Formula'] = kc_check['Formula']
714
- fpmDF.at[i,'Compound Source'] = 'Speculated based on {0}, which used {1} at {2}C and {3}psi'.format(kc_check['Sample Name'],kc_check['Catalyst'],kc_check['Reaction Temperature (C)'],kc_check['Reaction pressure (psi)'])
715
- #Otherwise, pass
716
- else:
717
- pass
718
-
719
- #Otherwise, pass
720
- else:
721
- pass
722
-
723
- return fpmDF
724
-
725
- #Function that matches FID and MS peaks by their retention time
726
- def matchRT(fpmDF,mDF,peakError=0.06):
727
- """
728
- Parameters
729
- ----------
730
- fpmDF : DataFrame
731
- Dataframe containing FID and MS peak info
732
- mDF : DataFrame
733
- Dataframe containing MS info about identified compounds (UA_UPP)
734
- peakError : Float, optional
735
- Allowable error between estimated MS RT's and actual MS RT's. The default is 0.01.
736
-
737
- Returns
738
- -------
739
- fpmDF : DataFrame
740
- Dataframe containing FID and MS peak info
741
- """
742
-
743
- def matchOne(fpmDF,fpmiter,peakError):
744
- """
745
- Parameters
746
- ----------
747
- fpmDF : DataFrame
748
- Dataframe containing FID and MS peak info
749
- fpmiter : List
750
- List containing current index and row in fpmDF of interest in form [i,row]
751
- peakError : float
752
- Allowable error between estimated MS RT's and actual MS RT's
753
-
754
- Returns
755
- -------
756
- fpmDF : DataFrame
757
- Dataframe containing FID and MS peak info
758
- """
759
-
760
- #Unpack fpmDF iterating info
761
- fpmi = int(fpmiter[0])
762
- fpmrow = fpmiter[1]
763
-
764
- #Compare the FID RT to the MS RT, collecting all matches within the specified peak error
765
- mDF_match = mDF.loc[(mDF['Component RT'] >= fpmrow['FID RT']-peakError) & (mDF['Component RT'] <= fpmrow['FID RT']+peakError)].copy()
766
- #If there is more than one MS RT match, select the entry with the smallest error from the FID RT
767
- if len(mDF_match) > 1:
768
- #Add an RT error to all mDF_match entries
769
- for i, row in mDF_match.iterrows():
770
- mDF_match.at[i,'RT Error'] = abs(fpmrow['FID RT']-row['Component RT'])
771
-
772
- #Set mDF_match to the row with minimum RT Error
773
- mDF_match = mDF_match.nsmallest(1,'RT Error')
774
-
775
- #Reset the mDF_match index
776
- mDF_match = mDF_match.reset_index().copy()
777
-
778
- #If the length of mDF_match is greater than zero..
779
- if len(mDF_match) > 0:
780
-
781
- #Add the MS info to the FIDpMS dataframe
782
- fpmDF.at[fpmi,'MS RT'] = mDF_match.at[0,'Component RT']
783
- fpmDF.at[fpmi,'Compound Name'] = mDF_match.at[0,'Compound Name']
784
- fpmDF.at[fpmi,'Formula'] = mDF_match.at[0,'Formula']
785
- fpmDF.at[fpmi,'Match Factor'] = mDF_match.at[0,'Match Factor']
786
- fpmDF.at[fpmi,'Compound Source'] = 'Automatically assigned by comparing FID and MS retention times'
787
-
788
- #Otherwise, pass
789
- else:
790
- pass
791
-
792
- return fpmDF
793
-
794
- #Loop through every row in the dataframe
795
- for i, row in fpmDF.iterrows():
796
- #If the row's compound name is not blank
797
- if not pd.isna(row['Compound Name']):
798
- #If the row's compound source is either manual or blank, skip it
799
- if row['Compound Source'] == 'Manual' or pd.isna(row['Compound Source']):
800
- pass
801
- #Otherwise..
802
- else:
803
- #Match one FID peak
804
- fpmDF = matchOne(fpmDF, [i,row], peakError)
805
- #Otherwise, if the row's compound name is blank..
806
- else:
807
- #Match one FID peak
808
- fpmDF = matchOne(fpmDF, [i,row], peakError)
809
-
810
- return fpmDF
811
-
812
- #Function that performs compound type abbreviation assignment
813
- def ctaAssign(importDF, contains, keyLoop, elementExclude):
814
-
815
- #Function that returns a compound type abbreviation corresponding to a compound
816
- def assignType(compoundName,contains,keyLoop):
817
-
818
- #Define default compound type abbreviation as 'O'
819
- CTA = 'O'
820
-
821
- #Function that accepts a list of substrings to check against a string and returns a boolean
822
- def stringSearch(string,subList):
823
- #Define export boolean default value
824
- checkTF = False
825
- #For every substring in subList...
826
- for i in range(len(subList)):
827
-
828
- #If the substring can be found in the string...
829
- if subList[i] in string:
830
- #Assign boolean to True and break
831
- checkTF = True
832
- break
833
- #Otherwise, pass
834
- else:
835
- pass
836
-
837
- return checkTF
838
-
839
- #Loop through every key (compound type abbreviation) in contains
840
- for i in keyLoop:
841
-
842
- #If at least one substring in the key's list is found in compoundName...
843
- if stringSearch(compoundName,contains[i]):
844
- #Assign the compound type abbreviation to the current key and break the loop
845
- CTA = i
846
- break
847
- #Otherwise, pass
848
- else:
849
- pass
850
-
851
- return CTA
852
-
853
- #Function that checks if formula string contains any of a list of elements
854
- def checkElements(compoundFormula,elementList):
855
- #Assign default export boolean to False
856
- checkTF = False
857
-
858
- #For every substring in elementList...
859
- for i in range(len(elementList)):
860
- #If the substring can be found in the compound formula...
861
- if elementList[i] in compoundFormula:
862
- #Set boolean to True and break
863
- checkTF = True
864
- break
865
- #Otherwise, pass
866
- else:
867
- pass
868
-
869
- return checkTF
870
-
871
- #For every entry in the csv, assign a compound type abbreviation
872
- for i, row in importDF.iterrows():
873
-
874
- #Retrieve compound name and formula from row entry
875
- compoundName = row['Compound Name']
876
- compoundFormula = row['Formula']
877
-
878
- #If the compound formula is a string...
879
- if isinstance(compoundFormula,str):
880
-
881
- #If the formula contains excluded elements...
882
- if checkElements(compoundFormula,elementExclude):
883
-
884
- #Assign 'O' to the row's compound type abbreviation entry
885
- importDF.at[i,'Compound Type Abbreviation'] = 'O'
886
-
887
- #Otherwise...
888
- else:
889
-
890
- #If the compound name is a string...
891
- if isinstance(compoundName,str):
892
-
893
- #Change compound name to lowercase
894
- compoundName = compoundName.lower()
895
- #Get a corresponding compound type abbreviation
896
- CTA = assignType(compoundName, contains, keyLoop)
897
- #Assign this CTA to the row's compound type abbreviation entry
898
- importDF.at[i,'Compound Type Abbreviation'] = CTA
899
-
900
- #Otherwise, pass
901
- else:
902
- pass
903
-
904
- return importDF
905
-
906
- #Define function that loops through every row in a DataFrame and modifies rows with duplicate compounds
907
- def duplicateHandle(DF):
908
-
909
- #Define function that searches for rows in a DataFrame with duplicate compound names
910
- def duplicateSearch(DF,cmp_name):
911
-
912
- #Get a new dataframe that is a copy of the first argument
913
- DF_out = DF.copy()
914
-
915
- #Filter the dataframe using the provided compound name
916
- DF_out = DF_out[DF_out['Compound Name'] == cmp_name]
917
-
918
- #Define a Boolean describing whether or not there are duplicate rows
919
- duplicate_TF = False
920
-
921
- #If the DF_out dataframe is longer than one (if there are duplicate rows)...
922
- if len(DF_out) > 1:
923
-
924
- #Assign the Boolean to true
925
- duplicate_TF = True
926
-
927
- #Define the dataframe to be returned
928
- DF_return = DF_out.copy()
929
-
930
- #Otherwise, define the return dataframe as empty
931
- else:
932
- DF_return = pd.DataFrame
933
-
934
- #Return the boolean and the filtered DataFrame
935
- return duplicate_TF, DF_return
936
-
937
- #Define function that handles a given DataFrame of duplicates
938
- def duplicateLogic(DF_search):
939
-
940
- #Define the output DataFrame as an in copy
941
- DF_logic = DF_search.copy()
942
-
943
- #Get the row in the DataFrame with the largest area
944
- maxSeries = DF_logic.loc[DF_logic['FID Area'].idxmax()]
945
-
946
- #Get the name and compound type of this compound
947
- max_name = maxSeries['Compound Name']
948
- max_type = maxSeries['Compound Type Abbreviation']
949
-
950
- #Get the remaining entries in the DataFrame
951
- DF_logic = DF_logic.drop([maxSeries.name],axis=0)
952
-
953
- #For every row in the remaining entries DataFrame, rename the compound to 'Isomer of..'
954
- for i, row in DF_logic.iterrows():
955
-
956
- #Get the new compound name
957
- new_cmp_name = 'Isomer of ' + max_name
958
-
959
- #Replace the compound name
960
- DF_logic.at[i,'Compound Name'] = new_cmp_name
961
-
962
- #If the compound type of the maxSeries is linear alkanes...
963
- if max_type == 'L':
964
-
965
- #Set the current row's compound type to branched alkanes
966
- DF_logic.at[i,'Compound Type Abbreviation'] = 'B'
967
-
968
- #Otherwise, pass
969
- else:
970
- pass
971
-
972
- #Return the logic DataFrame
973
- return DF_logic
974
-
975
- #Define a function that replaces rows in the primary DataFrame with matches in the secondary, assuming the indices match
976
- def duplicateReplace(pDF,sDF):
977
-
978
- #For every entry in the secondary DataFrame...
979
- for i, row in sDF.iterrows():
980
-
981
- #Get the row's name, which is the numeric index in the DataFrame
982
- row_name = row.name
983
-
984
- #For every index in the row...
985
- for j in row.index:
986
-
987
- #Replace the corresponding entry in the pDF at the preserved sDF index
988
- pDF.at[row_name,j] = row[j]
989
-
990
- return pDF
991
-
992
- #Define a list of compound names already handled
993
- cmp_nameList = []
994
-
995
- #Create a copy of the argument DataFrame to be used
996
- DF_in = DF.copy()
997
-
998
- #Initiate a DataFrame for the logic output
999
- DF_logic = pd.DataFrame()
1000
-
1001
- #For every row in the provided DataFrame
1002
- for i, row in DF_in.iterrows():
1003
-
1004
- #Get the compound name in that row
1005
- cmp_name = row['Compound Name']
1006
-
1007
- #If the compound name is in the list of compound names handled, pass
1008
- if cmp_name in cmp_nameList:
1009
- pass
1010
-
1011
- #Otherwise...
1012
- else:
1013
-
1014
- #If the compound name is 'No Match' or 'No match' or nan, pass
1015
- if cmp_name == 'No Match' or cmp_name == 'No Match' or pd.isna(cmp_name):
1016
- pass
1017
-
1018
- #Otherwise...
1019
- else:
1020
-
1021
- #Run the duplicate search function for that compound name
1022
- duplicate_TF, DF_search = duplicateSearch(DF_in,cmp_name)
1023
-
1024
- #If duplicate_TF is True...
1025
- if duplicate_TF:
1026
- #Run the duplicate logic funcion
1027
- DF_logic = duplicateLogic(DF_search)
1028
-
1029
- #Run the duplicate replace function
1030
- DF_done = duplicateReplace(DF_in,DF_logic)
1031
-
1032
- #Otherwise, pass
1033
- else:
1034
- pass
1035
-
1036
- #Add the compound name to the compound name list
1037
- cmp_nameList.append(cmp_name)
1038
-
1039
- return DF_done
1040
-
1041
- """ DATA IMPORTS """
1042
- print("[AutoFpmMatch] Importing data...")
1043
- #Import sample information from json file
1044
- with open(os.path.join(DF_Dir,sname,sname+sub_Dict['Info'][0])) as sinfo_f:
1045
- sinfo = json.load(sinfo_f)
1046
-
1047
- #Change ISO date-time strings into datetime objects
1048
- sinfo['Start Time'] = datetime.fromisoformat(sinfo['Start Time'])
1049
- sinfo['End Time'] = datetime.fromisoformat(sinfo['End Time'])
1050
-
1051
- #Calculate a reaction time using the start, end, and heat time values and add to sinfo
1052
- sinfo['Reaction Time (hr)'] = abs(sinfo['End Time']-sinfo['Start Time']).total_seconds()/3600 - sinfo['Heat Time']
1053
-
1054
- #Run the file naming function
1055
- paths = fileNamer(sname,sphase,sub_Dict,Raw_Dir)
1056
-
1057
- #Import MS UPP data
1058
- mDF = pd.read_csv(paths[1])
1059
-
1060
- #Get only relevant columns of MS UPP data
1061
- mDF = mDF.loc[:,['Component RT','Compound Name','Formula','Match Factor']]
1062
-
1063
- #Import known compounds resource
1064
- kc_rsc = pd.read_csv(os.path.join(RE_Dir,'known_compounds.csv'))
1065
- #Filter known compounds to only include rows with the same catalyst
1066
- #AND compounds which were not identified by the current sample
1067
- kc_rsc = kc_rsc.loc[(kc_rsc['Catalyst']==sinfo['Catalyst Type'])&(kc_rsc['Sample Name']!=sinfo['Sample Name'])]
1068
-
1069
- #Import gasPairs_FIDpMS.csv resource
1070
- gp_rsc = pd.read_csv(os.path.join(RE_Dir,'gasPairs_FIDpMS.csv'))
1071
-
1072
- """ CODE """
1073
- print("[AutoFpmMatch] Checking files...")
1074
- #Run the file checking function
1075
- fpmDF, tf = checkFile(paths[2],paths[0])
1076
-
1077
- #If the specified model is linear...
1078
- if model == "L":
1079
- #If the file contains manually matched peaks..
1080
- if tf:
1081
- print("[AutoFpmMatch] Matching by linear fit...")
1082
- #Run the linear fit function
1083
- fpmDF_mb, linfits, counts = RTlinfit(fpmDF)
1084
- #Run the peak matching function
1085
- fpmDF = matchPeaksLinear(fpmDF,mDF,linfits,peakError)
1086
-
1087
- else:
1088
- pass
1089
-
1090
- #Otherwise, if the specified model is third order...
1091
- elif model == "T":
1092
- print("[AutoFpmMatch] Matching by third order fit...")
1093
- #Run the gasPeaks_FIDpMS resource matching function
1094
- fpmDF = matchKnownPeaks(fpmDF,mDF,gp_rsc)
1095
- #Run the third order peak matching function
1096
- fpmDF = matchPeaksThird(fpmDF,mDF,fit_const,peakError)
1097
-
1098
- #Otherwise, if the specified model is retention time match...
1099
- elif model == "R":
1100
- print("[AutoFpmMatch] Matching by retention time...")
1101
- #Run the liquid retention time matching function
1102
- fpmDF = matchRT(fpmDF,mDF,peakErrorRT)
1103
-
1104
- #Otherwise, pass
1105
- else:
1106
- pass
1107
-
1108
- #Run the speculative labeling function
1109
- if splab_TF == "True":
1110
- print("[AutoFpmMatch] Running speculative labeling...")
1111
- fpmDF = specLab(fpmDF, kc_rsc, sinfo, counts, peakError, restrictList)
1112
- else:
1113
- pass
1114
-
1115
- print("[AutoFpmMatch] Matching complete.")
1116
-
1117
- print("[AutoFpmMatch] Assigning compound type abbreviations...")
1118
- #Run the compound type abbreviation assignment function
1119
- fpmDF = ctaAssign(fpmDF, contains, keyLoop, elementExclude)
1120
- print("[AutoFpmMatch] Handling duplicates...")
1121
- #Run the duplicate handling function
1122
- fpmDF = duplicateHandle(fpmDF)
1123
-
1124
- print("[AutoFpmMatch] Saving results...")
1125
- #Save the FIDpMS data
1126
- fpmDF.to_csv(paths[2],index=False)
1127
-
1128
- print("[AutoFpmMatch] Matching complete.")
1129
- #Close main function by returning
1130
- return None
1131
-
1132
-
1133
-