chromaquant 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chromaquant/Handle/__init__.py +12 -0
- chromaquant/Handle/handleDirectories.py +89 -0
- chromaquant/Manual/HydroUI.py +418 -0
- chromaquant/Manual/QuantUPP.py +373 -0
- chromaquant/Manual/Quantification.py +1305 -0
- chromaquant/Manual/__init__.py +10 -0
- chromaquant/Manual/duplicateMatch.py +211 -0
- chromaquant/Manual/fpm_match.py +798 -0
- chromaquant/Manual/label-type.py +179 -0
- chromaquant/Match/AutoFpmMatch.py +1133 -0
- chromaquant/Match/__init__.py +12 -0
- chromaquant/Quant/AutoQuantification.py +1329 -0
- chromaquant/Quant/__init__.py +12 -0
- chromaquant/__init__.py +10 -0
- chromaquant/__main__.py +493 -0
- chromaquant/properties.json +4 -0
- chromaquant-0.3.1.dist-info/METADATA +189 -0
- chromaquant-0.3.1.dist-info/RECORD +22 -0
- chromaquant-0.3.1.dist-info/WHEEL +4 -0
- chromaquant-0.3.1.dist-info/entry_points.txt +2 -0
- chromaquant-0.3.1.dist-info/licenses/LICENSE.txt +18 -0
- chromaquant-0.3.1.dist-info/licenses/LICENSES_bundled.txt +1035 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
COPYRIGHT STATEMENT:
|
|
6
|
+
|
|
7
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2024, by Julia Hancock
|
|
10
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
11
|
+
URL: https://www.rorrerlab.com/
|
|
12
|
+
|
|
13
|
+
License: BSD 3-Clause License
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
SCRIPT THAT TAKES MATCHED FID AND MS PEAKS AND ASSESSES DUPLICATES
|
|
18
|
+
Julia Hancock
|
|
19
|
+
|
|
20
|
+
Started 7-26-2024
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
""" PACKAGES """
|
|
25
|
+
|
|
26
|
+
import sys
|
|
27
|
+
import pandas as pd
|
|
28
|
+
import os
|
|
29
|
+
|
|
30
|
+
""" SAMPLE INFO """
|
|
31
|
+
#Sample name
|
|
32
|
+
sname = 'example2'
|
|
33
|
+
|
|
34
|
+
#Sample phase
|
|
35
|
+
phase = 'L'
|
|
36
|
+
|
|
37
|
+
""" DIRECTORIES """
|
|
38
|
+
|
|
39
|
+
#Get current working directory
|
|
40
|
+
cwd = os.path.dirname(__file__)
|
|
41
|
+
|
|
42
|
+
#Set up dictionary containing all relevant directories
|
|
43
|
+
direcDict = {'cwd':cwd, #Main directory
|
|
44
|
+
'resources':cwd+'/resources/', #Resources directory
|
|
45
|
+
'DF_Dir':cwd+"/data/"+sname+"/", #Data files directory
|
|
46
|
+
'DF_raw':cwd+"/data/"+sname+"/raw data/", #Raw data files directory
|
|
47
|
+
'DFlog_Dir':cwd+"/data/"+sname+"/log/"} #Data file log directory
|
|
48
|
+
|
|
49
|
+
#Dictionary of substrings to add to sample name to create file names
|
|
50
|
+
sub_Dict = {'Gas TCD+FID':['_GS2_TCD_CSO.csv'],
|
|
51
|
+
'Gas Labelled MS Peaks':['_GS1_UA_Comp_UPP.csv'],
|
|
52
|
+
'Gas FID+MS':['_GS2_FIDpMS.csv'],
|
|
53
|
+
'Liquid FID':['_LQ1_FID_CSO.csv'],
|
|
54
|
+
'Liquid Labelled MS Peaks':['_LQ1_UA_Comp_UPP.csv'],
|
|
55
|
+
'Liquid FID+MS':['_LQ1_FIDpMS.csv'],
|
|
56
|
+
'Info':['_INFO.json']}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
""" FUNCTIONS """
|
|
60
|
+
|
|
61
|
+
#Define function that loops through every row and modifies rows with duplicate compounds
|
|
62
|
+
def duplicateHandle(DF):
|
|
63
|
+
|
|
64
|
+
#Define function that searches for rows in a DataFrame with duplicate compound names
|
|
65
|
+
def duplicateSearch(DF,cmp_name):
|
|
66
|
+
|
|
67
|
+
#Get a new dataframe that is a copy of the first argument
|
|
68
|
+
DF_out = DF.copy()
|
|
69
|
+
|
|
70
|
+
#Filter the dataframe using the provided compound name
|
|
71
|
+
DF_out = DF_out[DF_out['Compound Name'] == cmp_name]
|
|
72
|
+
|
|
73
|
+
#Define a Boolean describing whether or not there are duplicate rows
|
|
74
|
+
duplicate_TF = False
|
|
75
|
+
|
|
76
|
+
#If the DF_out dataframe is longer than one (if there are duplicate rows)...
|
|
77
|
+
if len(DF_out) > 1:
|
|
78
|
+
|
|
79
|
+
#Assign the Boolean to true
|
|
80
|
+
duplicate_TF = True
|
|
81
|
+
|
|
82
|
+
#Define the dataframe to be returned
|
|
83
|
+
DF_return = DF_out.copy()
|
|
84
|
+
|
|
85
|
+
#Otherwise, define the return dataframe as empty
|
|
86
|
+
else:
|
|
87
|
+
DF_return = pd.DataFrame
|
|
88
|
+
|
|
89
|
+
#Return the boolean and the filtered DataFrame
|
|
90
|
+
return duplicate_TF, DF_return
|
|
91
|
+
|
|
92
|
+
#Define function that handles a given DataFrame of duplicates
|
|
93
|
+
def duplicateLogic(DF_search):
|
|
94
|
+
|
|
95
|
+
#Define the output DataFrame as an in copy
|
|
96
|
+
DF_logic = DF_search.copy()
|
|
97
|
+
|
|
98
|
+
#Get the row in the DataFrame with the largest area
|
|
99
|
+
maxSeries = DF_logic.loc[DF_logic['FID Area'].idxmax()]
|
|
100
|
+
|
|
101
|
+
#Get the name and compound type of this compound
|
|
102
|
+
max_name = maxSeries['Compound Name']
|
|
103
|
+
max_type = maxSeries['Compound Type Abbreviation']
|
|
104
|
+
|
|
105
|
+
#Get the remaining entries in the DataFrame
|
|
106
|
+
DF_logic = DF_logic.drop([maxSeries.name],axis=0)
|
|
107
|
+
|
|
108
|
+
#For every row in the remaining entries DataFrame, rename the compound to 'Isomer of..'
|
|
109
|
+
for i, row in DF_logic.iterrows():
|
|
110
|
+
|
|
111
|
+
#Get the new compound name
|
|
112
|
+
new_cmp_name = 'Isomer of ' + max_name
|
|
113
|
+
|
|
114
|
+
#Replace the compound name
|
|
115
|
+
DF_logic.at[i,'Compound Name'] = new_cmp_name
|
|
116
|
+
|
|
117
|
+
#If the compound type of the maxSeries is linear alkanes...
|
|
118
|
+
if max_type == 'L':
|
|
119
|
+
|
|
120
|
+
#Set the current row's compound type to branched alkanes
|
|
121
|
+
DF_logic.at[i,'Compound Type Abbreviation'] = 'B'
|
|
122
|
+
|
|
123
|
+
#Otherwise, pass
|
|
124
|
+
else:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
#Return the logic DataFrame
|
|
128
|
+
return DF_logic
|
|
129
|
+
|
|
130
|
+
#Define a function that replaces rows in the primary DataFrame with matches in the secondary, assuming the indices match
|
|
131
|
+
def duplicateReplace(pDF,sDF):
|
|
132
|
+
|
|
133
|
+
#For every entry in the secondary DataFrame...
|
|
134
|
+
for i, row in sDF.iterrows():
|
|
135
|
+
|
|
136
|
+
#Get the row's name, which is the numeric index in the DataFrame
|
|
137
|
+
row_name = row.name
|
|
138
|
+
|
|
139
|
+
#For every index in the row...
|
|
140
|
+
for j in row.index:
|
|
141
|
+
|
|
142
|
+
#Replace the corresponding entry in the pDF at the preserved sDF index
|
|
143
|
+
pDF.at[row_name,j] = row[j]
|
|
144
|
+
|
|
145
|
+
return pDF
|
|
146
|
+
|
|
147
|
+
#Define a list of compound names already handled
|
|
148
|
+
cmp_nameList = []
|
|
149
|
+
|
|
150
|
+
#Create a copy of the argument DataFrame to be used
|
|
151
|
+
DF_in = DF.copy()
|
|
152
|
+
|
|
153
|
+
#Initiate a DataFrame for the logic output
|
|
154
|
+
DF_logic = pd.DataFrame()
|
|
155
|
+
|
|
156
|
+
#For every row in the provided DataFrame
|
|
157
|
+
for i, row in DF_in.iterrows():
|
|
158
|
+
|
|
159
|
+
#Get the compound name in that row
|
|
160
|
+
cmp_name = row['Compound Name']
|
|
161
|
+
|
|
162
|
+
#If the compound name is in the list of compound names handled, pass
|
|
163
|
+
if cmp_name in cmp_nameList:
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
#Otherwise...
|
|
167
|
+
else:
|
|
168
|
+
|
|
169
|
+
#If the compound name is 'No Match' or 'No match' or nan, pass
|
|
170
|
+
if cmp_name == 'No Match' or cmp_name == 'No Match' or pd.isna(cmp_name):
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
#Otherwise...
|
|
174
|
+
else:
|
|
175
|
+
|
|
176
|
+
#Run the duplicate search function for that compound name
|
|
177
|
+
duplicate_TF, DF_search = duplicateSearch(DF_in,cmp_name)
|
|
178
|
+
|
|
179
|
+
#If duplicate_TF is True...
|
|
180
|
+
if duplicate_TF:
|
|
181
|
+
#Run the duplicate logic funcion
|
|
182
|
+
DF_logic = duplicateLogic(DF_search)
|
|
183
|
+
|
|
184
|
+
#Run the duplicate replace function
|
|
185
|
+
DF_done = duplicateReplace(DF_in,DF_logic)
|
|
186
|
+
|
|
187
|
+
#Otherwise, pass
|
|
188
|
+
else:
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
#Add the compound name to the compound name list
|
|
192
|
+
cmp_nameList.append(cmp_name)
|
|
193
|
+
|
|
194
|
+
return DF_done
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
""" CODE """
|
|
198
|
+
|
|
199
|
+
#Get dataframe containing FID and MS matches
|
|
200
|
+
DF = pd.read_csv(direcDict['DF_raw']+sname+sub_Dict['Liquid FID+MS'][0])
|
|
201
|
+
|
|
202
|
+
#Run the compound search function
|
|
203
|
+
DF_done = duplicateHandle(DF)
|
|
204
|
+
|
|
205
|
+
cmp_name = '6,6-Dimethylhepta-2,4-diene'
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
|