chromaquant 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+
5
+ COPYRIGHT STATEMENT:
6
+
7
+ ChromaQuant – A quantification software for complex gas chromatographic data
8
+
9
+ Copyright (c) 2024, by Julia Hancock
10
+ Affiliation: Dr. Julie Elaine Rorrer
11
+ URL: https://www.rorrerlab.com/
12
+
13
+ License: BSD 3-Clause License
14
+
15
+ ---
16
+
17
+ SCRIPT FOR LABELING A LIST OF COMPOUNDS ACCORDING TO PREDEFINED
18
+ COMPOUND TYPE RULES
19
+
20
+ Julia Hancock
21
+ 7-9-2024
22
+
23
+ """
24
+
25
+ """ PACKAGES """
26
+ import pandas as pd
27
+
28
+
29
+ """ CONTAIN DICTIONARIES """
30
+ #This dictionary contain lists of substrings to be checked against compound name strings to
31
+ #assign a compound type
32
+
33
+ #Six compound types exist: linear alkanes (L), branched alkanes (B), aromatics (A), cycloalkanes (C),
34
+ #alkenes/alkynes (E), and other (O)
35
+
36
+ #Each compound type abbreviation will have an entry in the dictionary corresponding to a list of
37
+ #substrings to be checked against a compound name string
38
+
39
+ contains = {'L':['methane','ethane','propane','butane','pentane','hexane','heptane','octane','nonane',\
40
+ 'decane','undecane','hendecane','dodecane','tridecane','tetradecane','pentadecane','hexadecane','heptadecane','octadecane','nonadecane',\
41
+ 'icosane','eicosane','heneicosane','henicosane','docosane','tricosane','tetracosane','pentacosane','hexacosane','cerane','heptacosane','octacosane','nonacosane',\
42
+ 'triacontane','hentriacontane','untriacontane','dotriacontane','dicetyl','tritriacontane','tetratriacontane','pentatriacontane','hexatriacontane','heptatriacontane','octatriacontane','nonatriacontane',\
43
+ 'tetracontane','hentetracontane','dotetracontane','tritetracontane','tetratetracontane','pentatetracontane','hexatetracontane','heptatetracontane','octatetracontane','nonatetracontane','pentacontane'],\
44
+
45
+ 'B':['iso','methyl','ethyl','propyl','butyl','pentyl','hexyl','heptyl','octyl','nonyl',\
46
+ 'decyl','undecyl','dodecyl','tridecyl','tetradecyl','pentadecyl','hexadecyl','heptadecyl','octadecyl','nonadecyl',\
47
+ 'icosyl','eicosyl','heneicosyl','henicosyl','docosyl','tricosyl','tetracosyl','pentacosyl','hexacosyl','heptacosyl','octacosyl','nonacosyl',\
48
+ 'triacontyl','hentriacontyl','untriacontyl','dotriacontyl','tritriacontyl','tetratriacontyl','pentatriacontyl','hexatriacontyl','heptatriacontyl','octatriacontyl','nonatriacontyl',\
49
+ 'tetracontyl','hentetracontyl','dotetracontyl','tritetracontyl','tetratetracontyl','pentatetracontyl','hexatetracontyl','heptatetracontyl','octatetracontyl','nonatetracontyl','pentacontyl'],
50
+
51
+ 'A':['benzyl','benzo','phenyl','benzene','toluene','xylene','mesitylene','durene','naphthalene','fluorene','anthracene','phenanthrene','phenalene',\
52
+ 'tetracene','chrysene','triphenylene','pyrene','pentacene','perylene','corannulene','coronene','ovalene','indan','indene','tetralin'],\
53
+
54
+ 'C':['cyclo','menthane'],\
55
+
56
+ 'E':['ene','yne'],\
57
+
58
+ 'O':[]}
59
+
60
+ #List of contains keys in order of priority
61
+ keyLoop = ['A','C','E','B','L']
62
+
63
+ #List of elements to be excluded and automatically labelled as 'O'
64
+ elementExclude = ['He','Li','Be','B','N','O','F','Ne','Na','Mg','Al','Si','P',\
65
+ 'S','Cl','Ar','K','Ca','Sc','Ti','V','Cr','Mn','Fe','Co',\
66
+ 'Ni','Cu','Zn']
67
+
68
+ """ FUNCTIONS """
69
+
70
+ #Function that returns a compound type abbreviation corresponding to a compound
71
+ def assignType(compoundName,contains,keyLoop):
72
+
73
+ #Define default compound type abbreviation as 'O'
74
+ CTA = 'O'
75
+
76
+ #Function that accepts a list of substrings to check against a string and returns a boolean
77
+ def stringSearch(string,subList):
78
+ #Define export boolean default value
79
+ checkTF = False
80
+ #For every substring in subList...
81
+ for i in range(len(subList)):
82
+
83
+ #If the substring can be found in the string...
84
+ if subList[i] in string:
85
+ #Assign boolean to True and break
86
+ checkTF = True
87
+ break
88
+ #Otherwise, pass
89
+ else:
90
+ pass
91
+
92
+ return checkTF
93
+
94
+ #Ordered list of keys to be looped through
95
+ keyLoop = ['A','C','E','B','L']
96
+
97
+ #Loop through every key (compound type abbreviation) in contains
98
+ for i in keyLoop:
99
+
100
+ #If at least one substring in the key's list is found in compoundName...
101
+ if stringSearch(compoundName,contains[i]):
102
+ #Assign the compound type abbreviation to the current key and break the loop
103
+ CTA = i
104
+ break
105
+ #Otherwise, pass
106
+ else:
107
+ pass
108
+
109
+ return CTA
110
+
111
+ #Function that checks if formula string contains any of a list of elements
112
+ def checkElements(compoundFormula,elementList):
113
+ #Assign default export boolean to False
114
+ checkTF = False
115
+
116
+ #For every substring in elementList...
117
+ for i in range(len(elementList)):
118
+ #If the substring can be found in the compound formula...
119
+ if elementList[i] in compoundFormula:
120
+ #Set boolean to True and break
121
+ checkTF = True
122
+ break
123
+ #Otherwise, pass
124
+ else:
125
+ pass
126
+
127
+ return checkTF
128
+
129
+
130
+ """ CODE """
131
+
132
+ #Define file path
133
+ path = "/Users/connards/Desktop/University/Rorrer Lab/Scripts/AutoQuant/data/example/raw data/example_GS2_FIDpMS.csv"
134
+
135
+ #Read csv at file path, assign to DataFrame importDF
136
+ importDF = pd.read_csv(path)
137
+
138
+ #For every entry in the csv, assign a compound type abbreviation
139
+ for i, row in importDF.iterrows():
140
+
141
+ #Retrieve compound name and formula from row entry
142
+ compoundName = row['Compound Name']
143
+ compoundFormula = row['Formula']
144
+
145
+ #If the compound formula is a string...
146
+ if isinstance(compoundFormula,str):
147
+
148
+ #If the formula contains excluded elements...
149
+ if checkElements(compoundFormula,elementExclude):
150
+
151
+ #Assign 'O' to the row's compound type abbreviation entry
152
+ importDF.at[i,'Compound Type Abbreviation'] = 'O'
153
+
154
+ #Otherwise...
155
+ else:
156
+
157
+ #If the compound name is a string...
158
+ if isinstance(compoundName,str):
159
+
160
+ #Change compound name to lowercase
161
+ compoundName = compoundName.lower()
162
+ #Get a corresponding compound type abbreviation
163
+ CTA = assignType(compoundName, contains, keyLoop)
164
+ #Assign this CTA to the row's compound type abbreviation entry
165
+ importDF.at[i,'Compound Type Abbreviation'] = CTA
166
+
167
+ #Otherwise, pass
168
+ else:
169
+ pass
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+