chromaquant 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. chromaquant/__init__.py +9 -2
  2. chromaquant/data/__init__.py +14 -0
  3. chromaquant/data/breakdown.py +430 -0
  4. chromaquant/data/dataset.py +195 -0
  5. chromaquant/data/table.py +412 -0
  6. chromaquant/data/value.py +215 -0
  7. chromaquant/formula/__init__.py +13 -0
  8. chromaquant/formula/base_formulas.py +168 -0
  9. chromaquant/formula/formula.py +507 -0
  10. chromaquant/import_local_packages.py +55 -0
  11. chromaquant/logging_and_handling.py +76 -0
  12. chromaquant/match/__init__.py +13 -0
  13. chromaquant/match/match.py +184 -0
  14. chromaquant/match/match_config.py +296 -0
  15. chromaquant/match/match_tools.py +154 -0
  16. chromaquant/{Quant → results}/__init__.py +2 -2
  17. chromaquant/results/reporting_tools.py +190 -0
  18. chromaquant/results/results.py +250 -0
  19. chromaquant/utils/__init__.py +14 -0
  20. chromaquant/utils/categories.py +127 -0
  21. chromaquant/utils/chemical_formulas.py +104 -0
  22. chromaquant/utils/dataframe_processing.py +222 -0
  23. chromaquant/utils/file_tools.py +100 -0
  24. chromaquant/utils/formula_tools.py +119 -0
  25. chromaquant-0.5.0.dist-info/METADATA +61 -0
  26. chromaquant-0.5.0.dist-info/RECORD +29 -0
  27. {chromaquant-0.4.0.dist-info → chromaquant-0.5.0.dist-info}/WHEEL +1 -1
  28. {chromaquant-0.4.0.dist-info → chromaquant-0.5.0.dist-info}/licenses/LICENSE.txt +1 -1
  29. chromaquant-0.5.0.dist-info/licenses/LICENSES_bundled.txt +251 -0
  30. chromaquant/Handle/__init__.py +0 -13
  31. chromaquant/Handle/fileChecks.py +0 -172
  32. chromaquant/Handle/handleDirectories.py +0 -89
  33. chromaquant/Hydro/__init__.py +0 -12
  34. chromaquant/Hydro/hydroMain.py +0 -496
  35. chromaquant/Manual/HydroUI.py +0 -418
  36. chromaquant/Manual/QuantUPP.py +0 -373
  37. chromaquant/Manual/Quantification.py +0 -1305
  38. chromaquant/Manual/__init__.py +0 -10
  39. chromaquant/Manual/duplicateMatch.py +0 -211
  40. chromaquant/Manual/fpm_match.py +0 -798
  41. chromaquant/Manual/label-type.py +0 -179
  42. chromaquant/Match/AutoFpmMatch.py +0 -1133
  43. chromaquant/Match/MatchSub/__init__.py +0 -13
  44. chromaquant/Match/MatchSub/matchTools.py +0 -282
  45. chromaquant/Match/MatchSub/peakTools.py +0 -259
  46. chromaquant/Match/__init__.py +0 -13
  47. chromaquant/Match/matchMain.py +0 -233
  48. chromaquant/Quant/AutoQuantification.py +0 -1329
  49. chromaquant/Quant/QuantSub/__init__.py +0 -15
  50. chromaquant/Quant/QuantSub/gasFID.py +0 -241
  51. chromaquant/Quant/QuantSub/gasTCD.py +0 -425
  52. chromaquant/Quant/QuantSub/liquidFID.py +0 -310
  53. chromaquant/Quant/QuantSub/parseTools.py +0 -162
  54. chromaquant/Quant/quantMain.py +0 -417
  55. chromaquant/UAPP/__init__.py +0 -12
  56. chromaquant/UAPP/uappMain.py +0 -427
  57. chromaquant/__main__.py +0 -526
  58. chromaquant/oldui.py +0 -492
  59. chromaquant/properties.json +0 -4
  60. chromaquant-0.4.0.dist-info/METADATA +0 -189
  61. chromaquant-0.4.0.dist-info/RECORD +0 -38
  62. chromaquant-0.4.0.dist-info/entry_points.txt +0 -2
  63. chromaquant-0.4.0.dist-info/licenses/LICENSES_bundled.txt +0 -1035
@@ -1,427 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
-
5
- COPYRIGHT STATEMENT:
6
-
7
- ChromaQuant – A quantification software for complex gas chromatographic data
8
-
9
- Copyright (c) 2024, by Julia Hancock
10
- Affiliation: Dr. Julie Elaine Rorrer
11
- URL: https://www.rorrerlab.com/
12
-
13
- License: BSD 3-Clause License
14
-
15
- ---
16
-
17
- UNKNOWNS ANALYSIS POST PROCESSING
18
- Intended to sort through raw UA output to find best hits considering
19
- compound constraints.
20
-
21
- Julia Hancock
22
- 01/05/2025
23
-
24
- First version (v1) completion: 01/05/2025
25
-
26
- Improvement notes: -Add places to throw error and redirect user through console when user-inputted data goes wrong
27
- -Separate functions into packages, redesign nested function trees
28
- -Check if saving data will cause an overwrite - if it does, add an additional suffix
29
- """
30
-
31
- """ PACKAGES """
32
-
33
- import pandas as pd
34
- import numpy as np
35
- import os
36
- from pathlib import Path
37
- import re
38
- from datetime import datetime
39
- import importlib.util
40
- import sys
41
- import math
42
-
43
- """ LOCAL PACKAGES """
44
-
45
- #Get package directory
46
- app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
47
-
48
- #Get absolute directories for subpackages
49
- subpack_dir = {'Handle':os.path.join(app_dir,'Handle','__init__.py'),
50
- 'Manual':os.path.join(app_dir,'Manual','__init__.py'),
51
- 'MatchSub':os.path.join(app_dir,'Match','MatchSub','__init__.py')}
52
-
53
- #Define function to import from path
54
- def import_from_path(module_name,path):
55
- #Define spec
56
- spec = importlib.util.spec_from_file_location(module_name,path)
57
- #Define modules
58
- module = importlib.util.module_from_spec(spec)
59
- #Expand sys.modules dict
60
- sys.modules[module_name] = module
61
- #Load module
62
- spec.loader.exec_module(module)
63
- return module
64
-
65
- #Import all local packages
66
- hd = import_from_path("hd",subpack_dir['Handle'])
67
- mn = import_from_path("mn",subpack_dir['Manual'])
68
- mtsb = import_from_path("mtsb",subpack_dir['MatchSub'])
69
-
70
- """ PARAMETERS """
71
-
72
- def mainUAPP(sname):
73
-
74
- print("[uappMain] Beginning match...")
75
-
76
- #Get current time
77
- print("[uappMain] Getting current time...")
78
- now = datetime.now()
79
-
80
- """ DIRECTORIES """
81
- print("[uappMain] Getting directories...")
82
- #Get directories from handling script
83
- directories = hd.handle(app_dir)
84
-
85
- #Data file log directory
86
- directories['log'] = os.path.join(directories['data'],sname,'log')
87
-
88
- #Data file breakdowns directory
89
- directories['break'] = os.path.join(directories['data'],sname,'breakdowns')
90
-
91
- #Raw data file directory
92
- directories['raw'] = os.path.join(directories['data'],sname,'raw data')
93
-
94
- #PARAMETERS
95
- #Limit of identical peak RT
96
- PeakRTLim = 0.005
97
-
98
- """ DIRECTORIES """
99
-
100
- #Define final files and files location list
101
- files = []
102
- fileLoc = []
103
- #Unpack variables from walking through raw data directory
104
- for path, subdirs, files_original in os.walk(directories['raw']):
105
- for i in files_original:
106
- if i[-11:] == "UA_Comp.csv":
107
- files.append(i)
108
- fileLoc.append(os.path.join(path, i))
109
- else:
110
- pass
111
-
112
- """ COMPOUND CONSTRAINS """
113
- #Establish lists for two levels of element restrictions:
114
- #1st list (softBar) - elements that will be allowed in compound match if there are no compound matches
115
- # with only allowed elements. When time comes, list will be searched for matches in
116
- # order of priority
117
- #2nd list (noBar) - elements that will always be allowed
118
-
119
- #Class for elements to enable compound constraints
120
- class element:
121
- def __init__(self, Symbol, Name, Priority=float("nan")):
122
- #Element must always have symbol and name
123
- self.Symbol = Symbol
124
- self.Name = Name
125
- #Element does not necessarily need priority - this is an integer allowing for more precise
126
- #control over choosing compound matches
127
- if Priority == float("nan"):
128
- pass
129
- else:
130
- if isinstance(Priority, int) and Priority > 0:
131
- self.Priority = Priority
132
- else:
133
- pass
134
-
135
- #softBar list of semi-allowed elements
136
- #softBar = [element("O","Oxygen",1),element("N","Nitrogen",2),element("Si","Silicon",4)]
137
- softBar = [element("O","Oxygen",1),element("Si","Silicon",4)]
138
- #noBar list of allowed elements
139
- noBar = [element("H","Hydrogen"),element("C","Carbon")]
140
-
141
- """ FUNCTIONS """
142
- #Function to unpack .csv file
143
- def unpackUA(filepath):
144
- Df = pd.read_csv(filepath)
145
- return Df
146
-
147
- #Function to add match data to dataframe
148
- def concatDF(dataSlice, DFin):
149
- #Assumes a dataframe provided with these columns: ['Component RT','Compound Name','Formula','Match Factor']
150
- #Also assumes dataSlice will contain at least these same columns
151
-
152
- #Define columns
153
- col = ['Component RT','Compound Name','Formula','Match Factor','Previous Best Compound Name',\
154
- 'Previous Best Formula','Previous Best Match Factor','Previous Worst Compound Name',\
155
- 'Previous Worst Formula','Previous Worst Match Factor']
156
-
157
- #Define list containing slices to be exported
158
- listOut = [dataSlice[col[i]] for i in range(len(col))]
159
-
160
- #If DFin is not empty...
161
- if not DFin.empty:
162
- #Form DFout using concat
163
- DFout = pd.concat([pd.DataFrame([listOut], columns=DFin.columns), DFin], ignore_index=True)
164
-
165
- #Otherwise...
166
- else:
167
- #Define DFout as the listout slice
168
- DFout = pd.DataFrame([listOut], columns=DFin.columns)
169
-
170
- return DFout
171
-
172
- #Function to add series of matches with best and worst match factor to a selected match series
173
- def concatSeries(dataSlice, bestSlice, worstSlice):
174
- #Assumes all Series have these columns: ['Component RT','Compound Name','Formula','Match Factor']
175
-
176
- #Define dictionaries of new index names for bestSlice and worstSlice
177
- bindex = {'Component RT':'Previous Best Component RT','Compound Name':'Previous Best Compound Name',
178
- 'Formula':'Previous Best Formula','Match Factor':'Previous Best Match Factor'}
179
- windex = {'Component RT':'Previous Worst Component RT','Compound Name':'Previous Worst Compound Name',
180
- 'Formula':'Previous Worst Formula','Match Factor':'Previous Worst Match Factor'}
181
-
182
- #Rename bestSlice and worstSlice indices
183
- bestSlice = bestSlice.copy()
184
- worstSlice = worstSlice.copy()
185
- bestSlice.rename(index=bindex, inplace=True)
186
- worstSlice.rename(index=windex, inplace=True)
187
-
188
- #Lists of indices from best/worst slices we want to add to dataSlice
189
- bindexList = ['Previous Best Compound Name','Previous Best Formula','Previous Best Match Factor']
190
- windexList = ['Previous Worst Compound Name','Previous Worst Formula','Previous Worst Match Factor']
191
-
192
- #Define returnSeries
193
- returnSlice = pd.concat([dataSlice,bestSlice.loc[bindexList],worstSlice.loc[windexList]], axis=0)
194
- return returnSlice
195
-
196
- #Function to group retention times, taking median to be value of grouped peaks
197
- def groupRT(rawDF):
198
-
199
- #Redefine for clarity
200
- filterDF = rawDF.copy()
201
-
202
- #Set up empty list for output RT (RT_permF), an empty list for temporary (original) RT's
203
- #(RT_temp) with only the first original RT, and the median of that list
204
- RT_permF = []
205
- RT_temp = [rawDF['Component RT'][0]]
206
- RT_temp_median = RT_temp[0]
207
-
208
- #For all raw retention times, group times within the PeakRTLim of each other.
209
- for i in range(1,len(rawDF['Component RT'])):
210
- #Current retention time
211
- RT_current = rawDF['Component RT'][i]
212
-
213
- #If current retention time within the median plus the peak limits, redefine median
214
- if RT_current < RT_temp_median+PeakRTLim and RT_current > RT_temp_median-PeakRTLim:
215
- #Append to list of like retention times
216
- RT_temp.append(RT_current)
217
- #Recalculate median, rounding to 4 decimal places
218
- RT_temp_median = round(np.median(RT_temp),4)
219
- #If it's reached the end of the dataframe, append what's left
220
- if i == len(rawDF['Component RT']) - 1:
221
- RT_permF.extend(np.full(len(RT_temp),RT_temp_median))
222
- RT_temp_median = RT_current
223
- RT_temp = [RT_current]
224
-
225
- #Otherwise, save the RT_temp_median to all RT_temp positions, redefine RT_temp and RT_temp_median
226
- else:
227
- #Set old retention times to median
228
- filterDF.loc[i-len(RT_temp):i, ('Component RT')] = RT_temp_median
229
- RT_permF.extend(np.full(len(RT_temp),RT_temp_median))
230
- RT_temp_median = RT_current
231
- RT_temp = [RT_current]
232
-
233
- #Delete/return variables
234
- del RT_permF, RT_temp, RT_temp_median, RT_current
235
- return filterDF
236
-
237
- #Function to return True if formula only contains noBar restrictions
238
- def donoBar(formula, noBar):
239
-
240
- #Find all elements present in formula
241
- elements = re.findall('[A-Z][a-z]?',formula)
242
- #Get list of allowed elements from noBar dataframe
243
- allowed_elements = [noBar[i].Symbol for i in range(len(noBar))]
244
-
245
- #..If a set of the difference between the lists is not empty (there are formula elements besides allowed ones), return False
246
- if set(elements).difference(set(allowed_elements)):
247
- tf = False
248
- #..Otherwise, return True
249
- else:
250
- tf = True
251
-
252
- return tf
253
-
254
- #Function to return True if formula only contains softBar restrictions of given priority
255
- def dosoftBar(formula,noBar,softBar,priority):
256
-
257
- elements = re.findall('[A-Z][a-z]?',formula)
258
- #Get dataframe of elements and priority from softBar
259
- ePDF = pd.DataFrame.from_dict({"Symbol":[obj.Symbol for obj in softBar], "Priority":[obj.Priority for obj in softBar]})
260
- #Get list of symbols with provided priority or lower, add elements from noBar
261
- allowed_elements = ePDF.loc[ePDF['Priority']<=priority, 'Symbol'].to_list()
262
- allowed_elements.extend([noBar[i].Symbol for i in range(len(noBar))])
263
- #Delete elements dataframe
264
- del ePDF
265
-
266
- #..If a set of the difference between the lists is not empty (there are formula elements besides allowed ones), return False
267
- trial = set(elements).difference(set(allowed_elements))
268
- if set(elements).difference(set(allowed_elements)):
269
- tf = False
270
- #..Otherwise, return True
271
- else:
272
- tf = True
273
-
274
- return tf
275
-
276
- #Function to choose best matches according to compound constraints
277
- def constrain(filterDF, constList):
278
- """
279
- This function loops through the dataframe, selecting the best match out of duplicate retention time matches.
280
-
281
- INPUTS: filterDF - the dataframe to be filtered
282
- constList - a list containing constraints in the form [noBar, softBar]
283
-
284
- OUTPUTS: constDF - a dataframe containing the best matches for each retention time
285
-
286
- APPROACH: 1) Get a list of all retention times in the dataframe;
287
- 2) Loop through each retention time, getting a slice of each dataframe;
288
- 3) Loop through compound constraints to pick the best match in the slice;
289
- 4) Append result to new, constrained dataframe
290
-
291
- SELECTING BEST MATCH: 1) If first formula of sorted slice contains only noBar, add to constrained dataframe
292
- 2) Otherwise, test next formula
293
- 3) If all other formulas have elements besides noBar, go back to first value and
294
- allow its formula if it contains only highest priority elements
295
- 4) If it contains lower priority/blocklist elements, repeat down slice
296
- 5) If all formulas contain lower priority elements, allow the next priority and repeat search
297
- 5) If all formulas contain elements not listed in noBar or softBar, add "No Match" row
298
- """
299
-
300
- #Unpack constList into softBar and hardBar
301
- noBar, softBar = constList
302
- #Get list of written priorities from softBar and sort them by descending
303
- priorList = sorted(list(set([x.Priority for x in softBar])))
304
- #Get list of all retention times
305
- arrayRF = filterDF['Component RT'].unique()
306
- #Create DataFrame for outputted data
307
- constDF = pd.DataFrame(columns=['Component RT','Compound Name','Formula','Match Factor','Previous Best Compound Name',\
308
- 'Previous Best Formula','Previous Best Match Factor','Previous Worst Compound Name',\
309
- 'Previous Worst Formula','Previous Worst Match Factor'])
310
-
311
-
312
- #For every listed retention time, select best match
313
- for RTi in arrayRF:
314
-
315
- #Get a slice containing all possible compounds at given RT
316
- compound_slice = filterDF.loc[(filterDF["Component RT"] == RTi)]
317
- #Remove Unknowns from slice, if slice is empty then skip one loop
318
- compound_slice = compound_slice.loc[~compound_slice["Compound Name"].str.contains("Unknown")]
319
- #Sort slice by match factor, reset indices
320
- test_slice = compound_slice.sort_values(by=['Match Factor'], ascending=True).reset_index(drop=True)
321
-
322
- #Find rows with best and worst match factors
323
- try:
324
- best_match = test_slice.iloc[0,:]
325
- worst_match = test_slice.iloc[len(test_slice)-1,:]
326
- except:
327
- best_match = pd.Series(dtype='float64',index=['Component RT','Compound Name','Formula','Match Factor'])
328
- worst_match = pd.Series(dtype='float64',index=['Component RT','Compound Name','Formula','Match Factor'])
329
-
330
- #Set search True/False Boolean to True
331
- search_tf = True
332
- #Set counted_loops to 0
333
- counted_loops = 0
334
- #While loop to continue search function until match is either found or not
335
- while search_tf == True and counted_loops < 100:
336
-
337
- #For every row in the slice sorted by match factor..
338
- for index, row in test_slice.iterrows():
339
-
340
- #..If the loop number is greater than the number of listed priorities OR the row has a NaN formula,
341
- # add row with "No Match" and formula NaN
342
- if counted_loops > len(priorList) or not isinstance(row['Formula'], str):
343
- constSeries = concatSeries(pd.Series({"Component RT":RTi,"Compound Name":"No Match",\
344
- "Match Factor":float('nan'),"Formula":float('nan')}),\
345
- best_match,worst_match)
346
- constDF = concatDF(constSeries,constDF)
347
- search_tf = False
348
- break
349
-
350
- #..Otherwise if the formula meets the noBar criteria, choose row and break formula
351
- elif donoBar(row['Formula'],noBar) and counted_loops == 0:
352
- constSeries = concatSeries(row,best_match,worst_match)
353
- constDF = concatDF(constSeries,constDF)
354
- search_tf = False
355
- break
356
-
357
- #..Otherwise if the loop number is greater than 0 and less than the
358
- # number of unique softBar priorities, determine if formula meets softBar criteria
359
- elif counted_loops > 0 and counted_loops < len(priorList):
360
- #Try/except in case the counted loops goes higher than the priority list
361
- try:
362
- if dosoftBar(row['Formula'],noBar,softBar,priorList[counted_loops-1]):
363
- constSeries = concatSeries(row,best_match,worst_match)
364
- constDF = concatDF(constSeries,constDF)
365
- search_tf = False
366
- break
367
- else:
368
- pass
369
- except:
370
- pass
371
-
372
- #Count one while loop
373
- counted_loops += 1
374
-
375
-
376
- return constDF
377
-
378
- #Function to save dataframe to .csv file
379
- def outputCSV(constDF_Dict, file_directory, infilenames):
380
- #Create names of exported files by adding "_UPP" to the name before .csv
381
- outfilenames = [x[:x.index('.csv')] + '_UPP' + x[x.index('.csv'):] for x in infilenames]
382
- #Create list of filepaths from export directory + filename.csv
383
- filepathList = [os.path.join(file_directory,outfilenames[i]) for i in range(len(outfilenames))]
384
-
385
- #For every filename, save a .csv
386
- for i in range(len(infilenames)):
387
- constDF_Dict[infilenames[i]].to_csv(filepathList[i])
388
-
389
- return None
390
-
391
- """ CODE """
392
-
393
- #Unpack all .csv files with ending "UA_Comp.csv" in provided directory
394
- print("[uappMain] Unpacking data from provided directory...")
395
- UAData_raw = {}
396
-
397
- for i in range(len(files)):
398
- UAData_raw[files[i]] = unpackUA(fileLoc[i])
399
-
400
- print("[uappMain] Data unpacked.")
401
-
402
- #Dictionaries for filtered and constrained data for each file
403
- filterDF_Dict = {}
404
- constDF_Dict = {}
405
-
406
- #For all files, run the constraint workflow
407
- for i in range(len(files)):
408
-
409
- #Group retention times for all files
410
- print("[uappMain][" + files[i] + "] Grouping retention times...")
411
- filterDF = groupRT(UAData_raw[files[i]])
412
- filterDF_Dict[files[i]] = filterDF
413
-
414
- #Apply constraints to all files
415
- print("[uappMain][" + files[i] + "] Applying compound constraints...")
416
- constDF = constrain(filterDF, [noBar,softBar])
417
- constDF_Dict[files[i]] = constDF
418
-
419
- #Save results
420
- print("[uappMain] Saving results...")
421
- outputCSV(constDF_Dict, directories['raw'], files)
422
- print("[uappMain] Files saved to " + str(directories['raw']))
423
-
424
- #Complete program
425
- print("[uappMain] Unknowns post processing finished.")
426
-
427
- return None