chromaquant 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ ChromaQuant.Manual package initialization
6
+
7
+ Julia Hancock
8
+ Created 10-18-2024
9
+
10
+ """
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ COPYRIGHT STATEMENT:
6
+
7
+ ChromaQuant – A quantification software for complex gas chromatographic data
8
+
9
+ Copyright (c) 2024, by Julia Hancock
10
+ Affiliation: Dr. Julie Elaine Rorrer
11
+ URL: https://www.rorrerlab.com/
12
+
13
+ License: BSD 3-Clause License
14
+
15
+ ---
16
+
17
+ SCRIPT THAT TAKES MATCHED FID AND MS PEAKS AND ASSESSES DUPLICATES
18
+ Julia Hancock
19
+
20
+ Started 7-26-2024
21
+
22
+ """
23
+
24
+ """ PACKAGES """
25
+
26
+ import sys
27
+ import pandas as pd
28
+ import os
29
+
30
+ """ SAMPLE INFO """
31
+ #Sample name
32
+ sname = 'example2'
33
+
34
+ #Sample phase
35
+ phase = 'L'
36
+
37
+ """ DIRECTORIES """
38
+
39
+ #Get current working directory
40
+ cwd = os.path.dirname(__file__)
41
+
42
+ #Set up dictionary containing all relevant directories
43
+ direcDict = {'cwd':cwd, #Main directory
44
+ 'resources':cwd+'/resources/', #Resources directory
45
+ 'DF_Dir':cwd+"/data/"+sname+"/", #Data files directory
46
+ 'DF_raw':cwd+"/data/"+sname+"/raw data/", #Raw data files directory
47
+ 'DFlog_Dir':cwd+"/data/"+sname+"/log/"} #Data file log directory
48
+
49
+ #Dictionary of substrings to add to sample name to create file names
50
+ sub_Dict = {'Gas TCD+FID':['_GS2_TCD_CSO.csv'],
51
+ 'Gas Labelled MS Peaks':['_GS1_UA_Comp_UPP.csv'],
52
+ 'Gas FID+MS':['_GS2_FIDpMS.csv'],
53
+ 'Liquid FID':['_LQ1_FID_CSO.csv'],
54
+ 'Liquid Labelled MS Peaks':['_LQ1_UA_Comp_UPP.csv'],
55
+ 'Liquid FID+MS':['_LQ1_FIDpMS.csv'],
56
+ 'Info':['_INFO.json']}
57
+
58
+
59
+ """ FUNCTIONS """
60
+
61
+ #Define function that loops through every row and modifies rows with duplicate compounds
62
+ def duplicateHandle(DF):
63
+
64
+ #Define function that searches for rows in a DataFrame with duplicate compound names
65
+ def duplicateSearch(DF,cmp_name):
66
+
67
+ #Get a new dataframe that is a copy of the first argument
68
+ DF_out = DF.copy()
69
+
70
+ #Filter the dataframe using the provided compound name
71
+ DF_out = DF_out[DF_out['Compound Name'] == cmp_name]
72
+
73
+ #Define a Boolean describing whether or not there are duplicate rows
74
+ duplicate_TF = False
75
+
76
+ #If the DF_out dataframe is longer than one (if there are duplicate rows)...
77
+ if len(DF_out) > 1:
78
+
79
+ #Assign the Boolean to true
80
+ duplicate_TF = True
81
+
82
+ #Define the dataframe to be returned
83
+ DF_return = DF_out.copy()
84
+
85
+ #Otherwise, define the return dataframe as empty
86
+ else:
87
+ DF_return = pd.DataFrame
88
+
89
+ #Return the boolean and the filtered DataFrame
90
+ return duplicate_TF, DF_return
91
+
92
+ #Define function that handles a given DataFrame of duplicates
93
+ def duplicateLogic(DF_search):
94
+
95
+ #Define the output DataFrame as an in copy
96
+ DF_logic = DF_search.copy()
97
+
98
+ #Get the row in the DataFrame with the largest area
99
+ maxSeries = DF_logic.loc[DF_logic['FID Area'].idxmax()]
100
+
101
+ #Get the name and compound type of this compound
102
+ max_name = maxSeries['Compound Name']
103
+ max_type = maxSeries['Compound Type Abbreviation']
104
+
105
+ #Get the remaining entries in the DataFrame
106
+ DF_logic = DF_logic.drop([maxSeries.name],axis=0)
107
+
108
+ #For every row in the remaining entries DataFrame, rename the compound to 'Isomer of..'
109
+ for i, row in DF_logic.iterrows():
110
+
111
+ #Get the new compound name
112
+ new_cmp_name = 'Isomer of ' + max_name
113
+
114
+ #Replace the compound name
115
+ DF_logic.at[i,'Compound Name'] = new_cmp_name
116
+
117
+ #If the compound type of the maxSeries is linear alkanes...
118
+ if max_type == 'L':
119
+
120
+ #Set the current row's compound type to branched alkanes
121
+ DF_logic.at[i,'Compound Type Abbreviation'] = 'B'
122
+
123
+ #Otherwise, pass
124
+ else:
125
+ pass
126
+
127
+ #Return the logic DataFrame
128
+ return DF_logic
129
+
130
+ #Define a function that replaces rows in the primary DataFrame with matches in the secondary, assuming the indices match
131
+ def duplicateReplace(pDF,sDF):
132
+
133
+ #For every entry in the secondary DataFrame...
134
+ for i, row in sDF.iterrows():
135
+
136
+ #Get the row's name, which is the numeric index in the DataFrame
137
+ row_name = row.name
138
+
139
+ #For every index in the row...
140
+ for j in row.index:
141
+
142
+ #Replace the corresponding entry in the pDF at the preserved sDF index
143
+ pDF.at[row_name,j] = row[j]
144
+
145
+ return pDF
146
+
147
+ #Define a list of compound names already handled
148
+ cmp_nameList = []
149
+
150
+ #Create a copy of the argument DataFrame to be used
151
+ DF_in = DF.copy()
152
+
153
+ #Initiate a DataFrame for the logic output
154
+ DF_logic = pd.DataFrame()
155
+
156
+ #For every row in the provided DataFrame
157
+ for i, row in DF_in.iterrows():
158
+
159
+ #Get the compound name in that row
160
+ cmp_name = row['Compound Name']
161
+
162
+ #If the compound name is in the list of compound names handled, pass
163
+ if cmp_name in cmp_nameList:
164
+ pass
165
+
166
+ #Otherwise...
167
+ else:
168
+
169
+ #If the compound name is 'No Match' or 'No match' or nan, pass
170
+ if cmp_name == 'No Match' or cmp_name == 'No Match' or pd.isna(cmp_name):
171
+ pass
172
+
173
+ #Otherwise...
174
+ else:
175
+
176
+ #Run the duplicate search function for that compound name
177
+ duplicate_TF, DF_search = duplicateSearch(DF_in,cmp_name)
178
+
179
+ #If duplicate_TF is True...
180
+ if duplicate_TF:
181
+ #Run the duplicate logic funcion
182
+ DF_logic = duplicateLogic(DF_search)
183
+
184
+ #Run the duplicate replace function
185
+ DF_done = duplicateReplace(DF_in,DF_logic)
186
+
187
+ #Otherwise, pass
188
+ else:
189
+ pass
190
+
191
+ #Add the compound name to the compound name list
192
+ cmp_nameList.append(cmp_name)
193
+
194
+ return DF_done
195
+
196
+
197
+ """ CODE """
198
+
199
+ #Get dataframe containing FID and MS matches
200
+ DF = pd.read_csv(direcDict['DF_raw']+sname+sub_Dict['Liquid FID+MS'][0])
201
+
202
+ #Run the compound search function
203
+ DF_done = duplicateHandle(DF)
204
+
205
+ cmp_name = '6,6-Dimethylhepta-2,4-diene'
206
+
207
+
208
+
209
+
210
+
211
+