hjxdl 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdl/_version.py +2 -2
- hdl/datasets/city_code.json +2576 -0
- hdl/datasets/defined_BaseFeatures.fdef +236 -0
- hdl/datasets/las.tsv +0 -0
- hdl/datasets/route_template.json +113 -0
- hdl/datasets/vocab.txt +591 -0
- hdl/ju/__init__.py +0 -0
- hdl/ju/setup.py +55 -0
- hdl/jupyfuncs/__init__.py +0 -0
- hdl/jupyfuncs/chem/__init__.py +0 -0
- hdl/jupyfuncs/chem/mol.py +548 -0
- hdl/jupyfuncs/chem/norm.py +268 -0
- hdl/jupyfuncs/chem/pdb_ext.py +94 -0
- hdl/jupyfuncs/chem/scaffold.py +25 -0
- hdl/jupyfuncs/chem/shape.py +241 -0
- hdl/jupyfuncs/chem/tokenizers.py +2 -0
- hdl/jupyfuncs/dbtools/__init__.py +0 -0
- hdl/jupyfuncs/dbtools/pg.py +42 -0
- hdl/jupyfuncs/dbtools/query_info.py +150 -0
- hdl/jupyfuncs/dl/__init__.py +0 -0
- hdl/jupyfuncs/dl/cp.py +54 -0
- hdl/jupyfuncs/dl/dataframe.py +38 -0
- hdl/jupyfuncs/dl/fp.py +49 -0
- hdl/jupyfuncs/dl/list.py +20 -0
- hdl/jupyfuncs/dl/model_utils.py +97 -0
- hdl/jupyfuncs/dl/tensor.py +159 -0
- hdl/jupyfuncs/dl/uncs.py +112 -0
- hdl/jupyfuncs/llm/__init__.py +0 -0
- hdl/jupyfuncs/llm/extract.py +123 -0
- hdl/jupyfuncs/llm/openapi.py +94 -0
- hdl/jupyfuncs/network/__init__.py +0 -0
- hdl/jupyfuncs/network/proxy.py +20 -0
- hdl/jupyfuncs/path/__init__.py +0 -0
- hdl/jupyfuncs/path/glob.py +285 -0
- hdl/jupyfuncs/path/strings.py +65 -0
- hdl/jupyfuncs/show/__init__.py +0 -0
- hdl/jupyfuncs/show/pbar.py +50 -0
- hdl/jupyfuncs/show/plot.py +259 -0
- hdl/jupyfuncs/utils/__init__.py +0 -0
- hdl/jupyfuncs/utils/wrappers.py +8 -0
- hdl/utils/weather/__init__.py +0 -0
- hdl/utils/weather/weather.py +68 -0
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/METADATA +1 -1
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/RECORD +46 -5
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/WHEEL +1 -1
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,236 @@
|
|
1
|
+
# $Id$
|
2
|
+
#
|
3
|
+
# RDKit base fdef file.
|
4
|
+
# Created by Greg Landrum
|
5
|
+
#
|
6
|
+
|
7
|
+
AtomType NDonor [#7&!H0]
|
8
|
+
AtomType AmideN [$(N-C(=O))]
|
9
|
+
AtomType SulfonamideN [$([N;H0]S(=O)(=O))]
|
10
|
+
# AtomType NDonor [$([Nv3](-C)(-C)-C)]
|
11
|
+
|
12
|
+
# consider tautomer
|
13
|
+
# AtomType NDonor [$(n[n;H1]),$(nc[n;H1])]
|
14
|
+
|
15
|
+
AtomType ChalcDonor [O,S;H1;+0]
|
16
|
+
DefineFeature SingleAtomDonor [{NDonor},{ChalcDonor}]
|
17
|
+
Family Donor
|
18
|
+
Weights 1
|
19
|
+
EndFeature
|
20
|
+
|
21
|
+
# aromatic N, but not indole or pyrole or fusing two rings
|
22
|
+
AtomType NAcceptor [n;X2;H0]
|
23
|
+
# AtomType NAcceptor [$([N;H0]#[C&v4])]
|
24
|
+
# tertiary nitrogen adjacent to aromatic carbon
|
25
|
+
|
26
|
+
# AtomType NAcceptor [N&v3;H0;$(Nc)]
|
27
|
+
AtomType NAcceptor [$(N=,#*);X2,X1]
|
28
|
+
|
29
|
+
# removes thioether and nitro oxygen
|
30
|
+
AtomType ChalcAcceptor [O;!$(O=N-*);!$(*-N=O)]
|
31
|
+
# Atomtype ChalcAcceptor [O;-;!$(*-N=O)]
|
32
|
+
|
33
|
+
# Removed aromatic sulfur from ChalcAcceptor definition
|
34
|
+
Atomtype ChalcAcceptor [o;+0]
|
35
|
+
|
36
|
+
# Hydroxyls and acids
|
37
|
+
AtomType Hydroxyl [O;H1;v2]
|
38
|
+
|
39
|
+
# F is an acceptor so long as the C has no other halogen neighbors. This is maybe
|
40
|
+
# a bit too general, but the idea is to eliminate things like CF3
|
41
|
+
AtomType HalogenAcceptor [F;$(F-[#6]);!$(FC[F,Cl,Br,I])]
|
42
|
+
|
43
|
+
DefineFeature SingleAtomAcceptor [{Hydroxyl},{ChalcAcceptor},{NAcceptor},{HalogenAcceptor}]
|
44
|
+
Family Acceptor
|
45
|
+
Weights 1
|
46
|
+
EndFeature
|
47
|
+
|
48
|
+
# this one is delightfully easy:
|
49
|
+
#DefineFeature AcidicGroup [C,S](=[O,S,P])-[O;H1,H0&-1]
|
50
|
+
# Family NegIonizable
|
51
|
+
# Weights 1.0,1.0,1.0
|
52
|
+
#EndFeature
|
53
|
+
|
54
|
+
DefineFeature AcidicGroup [O;H0&-1,$(O=[C,P,S][O-])]
|
55
|
+
Family NegIonizable
|
56
|
+
Weights 1.0
|
57
|
+
EndFeature
|
58
|
+
|
59
|
+
AtomType Carbon_NotDouble [C;!$(C=*)]
|
60
|
+
AtomType BasicNH2 [$([N;H2&+0][{Carbon_NotDouble}])]
|
61
|
+
AtomType BasicNH1 [$([N;H1&+0]([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
|
62
|
+
AtomType PosNH3 [$([N;H3&+1][{Carbon_NotDouble}])]
|
63
|
+
AtomType PosNH2 [$([N;H2&+1]([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
|
64
|
+
AtomType PosNH1 [$([N;H1&+1]([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
|
65
|
+
AtomType BasicNH0 [$([N;H0&+0]([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
|
66
|
+
AtomType QuatN [$([N;H0&+1]([{Carbon_NotDouble}])([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
|
67
|
+
|
68
|
+
DefineFeature BasicGroup [$([#7;H3,H2,H1;+1]),$([#7]=[#6][#7;H3,H2,H1;+1])]
|
69
|
+
Family PosIonizable
|
70
|
+
Weights 1.0
|
71
|
+
EndFeature
|
72
|
+
|
73
|
+
#DefineFeature BasicGroup [{BasicNH2},{BasicNH1},{BasicNH0};!$(N[a])]
|
74
|
+
# Family PosIonizable
|
75
|
+
# Weights 1.0
|
76
|
+
#EndFeature
|
77
|
+
|
78
|
+
# 14.11.2007 (GL): add !$([N+]-[O-]) constraint so we don't match
|
79
|
+
# nitro (or similar) groups
|
80
|
+
#DefineFeature PosN [#7;+;!$([N+]-[O-])]
|
81
|
+
# Family PosIonizable
|
82
|
+
# Weights 1.0
|
83
|
+
#EndFeature
|
84
|
+
|
85
|
+
# imidazole group can be positively charged (too promiscuous?)
|
86
|
+
#DefineFeature Imidazole c1ncnc1
|
87
|
+
# Family PosIonizable
|
88
|
+
# Weights 1.0,1.0,1.0,1.0,1.0
|
89
|
+
#EndFeature
|
90
|
+
# guanidine group is positively charged (too promiscuous?)
|
91
|
+
#DefineFeature Guanidine NC(=N)N
|
92
|
+
# Family PosIonizable
|
93
|
+
# Weights 1.0,1.0,1.0,1.0
|
94
|
+
#EndFeature
|
95
|
+
|
96
|
+
# the LigZn binder features were adapted from combichem.fdl
|
97
|
+
DefineFeature ZnBinder1 [S;D1]-[#6]
|
98
|
+
Family ZnBinder
|
99
|
+
Weights 1,0
|
100
|
+
EndFeature
|
101
|
+
DefineFeature ZnBinder2 [#6]-C(=O)-C-[S;D1]
|
102
|
+
Family ZnBinder
|
103
|
+
Weights 0,0,1,0,1
|
104
|
+
EndFeature
|
105
|
+
DefineFeature ZnBinder3 [#6]-C(=O)-C-C-[S;D1]
|
106
|
+
Family ZnBinder
|
107
|
+
Weights 0,0,1,0,0,1
|
108
|
+
EndFeature
|
109
|
+
|
110
|
+
DefineFeature ZnBinder4 [#6]-C(=O)-N-[O;D1]
|
111
|
+
Family ZnBinder
|
112
|
+
Weights 0,0,1,0,1
|
113
|
+
EndFeature
|
114
|
+
DefineFeature ZnBinder5 [#6]-C(=O)-[O;D1]
|
115
|
+
Family ZnBinder
|
116
|
+
Weights 0,0,1,1
|
117
|
+
EndFeature
|
118
|
+
DefineFeature ZnBinder6 [#6]-P(=O)(-O)-[C,O,N]-[C,H]
|
119
|
+
Family ZnBinder
|
120
|
+
Weights 0,0,1,1,0,0
|
121
|
+
EndFeature
|
122
|
+
|
123
|
+
|
124
|
+
# aromatic rings of various sizes:
|
125
|
+
#
|
126
|
+
# Note that with the aromatics, it's important to include the ring-size queries along with
|
127
|
+
# the aromaticity query for two reasons:
|
128
|
+
# 1) Much of the current feature-location code assumes that the feature point is
|
129
|
+
# equidistant from the atoms defining it. Larger definitions like: a1aaaaaaaa1 will actually
|
130
|
+
# match things like 'o1c2cccc2ccc1', which have an aromatic unit spread across multiple simple
|
131
|
+
# rings and so don't fit that requirement.
|
132
|
+
# 2) It's *way* faster.
|
133
|
+
#
|
134
|
+
|
135
|
+
#
|
136
|
+
# 21.1.2008 (GL): update ring membership tests to reflect corrected meaning of
|
137
|
+
# "r" in SMARTS parser
|
138
|
+
#
|
139
|
+
AtomType AromR4 [a;r4,!R1&r3]
|
140
|
+
DefineFeature Arom4 [{AromR4}]1:[{AromR4}]:[{AromR4}]:[{AromR4}]:1
|
141
|
+
Family Aromatic
|
142
|
+
Weights 1.0,1.0,1.0,1.0
|
143
|
+
EndFeature
|
144
|
+
AtomType AromR5 [a;r5,!R1&r4,!R1&r3]
|
145
|
+
DefineFeature Arom5 [{AromR5}]1:[{AromR5}]:[{AromR5}]:[{AromR5}]:[{AromR5}]:1
|
146
|
+
Family Aromatic
|
147
|
+
Weights 1.0,1.0,1.0,1.0,1.0
|
148
|
+
EndFeature
|
149
|
+
AtomType AromR6 [a;r6,!R1&r5,!R1&r4,!R1&r3]
|
150
|
+
DefineFeature Arom6 [{AromR6}]1:[{AromR6}]:[{AromR6}]:[{AromR6}]:[{AromR6}]:[{AromR6}]:1
|
151
|
+
Family Aromatic
|
152
|
+
Weights 1.0,1.0,1.0,1.0,1.0,1.0
|
153
|
+
EndFeature
|
154
|
+
AtomType AromR7 [a;r7,!R1&r6,!R1&r5,!R1&r4,!R1&r3]
|
155
|
+
DefineFeature Arom7 [{AromR7}]1:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:1
|
156
|
+
Family Aromatic
|
157
|
+
Weights 1.0,1.0,1.0,1.0,1.0,1.0,1.0
|
158
|
+
EndFeature
|
159
|
+
AtomType AromR8 [a;r8,!R1&r7,!R1&r6,!R1&r5,!R1&r4,!R1&r3]
|
160
|
+
DefineFeature Arom8 [{AromR8}]1:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:1
|
161
|
+
Family Aromatic
|
162
|
+
Weights 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
|
163
|
+
EndFeature
|
164
|
+
|
165
|
+
# hydrophobic features
|
166
|
+
# any carbon that is not bonded to a polar atom is considered a hydrophobe
|
167
|
+
#
|
168
|
+
# 23.11.2007 (GL): match any bond (not just single bonds); add #6 at
|
169
|
+
# beginning to make it more efficient
|
170
|
+
AtomType Carbon_Polar [#6;$([#6]~[#7,#8,#9])]
|
171
|
+
# 23.11.2007 (GL): don't match charged carbon
|
172
|
+
AtomType Carbon_NonPolar [#6;+0;!{Carbon_Polar}]
|
173
|
+
|
174
|
+
DefineFeature ThreeWayAttach [D3,D4;{Carbon_NonPolar}]
|
175
|
+
Family Hydrophobe
|
176
|
+
Weights 1.0
|
177
|
+
EndFeature
|
178
|
+
|
179
|
+
DefineFeature ChainTwoWayAttach [R0;D2;{Carbon_NonPolar}]
|
180
|
+
Family Hydrophobe
|
181
|
+
Weights 1.0
|
182
|
+
EndFeature
|
183
|
+
|
184
|
+
DefineFeature AnyAtom [*]
|
185
|
+
Family Any
|
186
|
+
Weights 1.0
|
187
|
+
EndFeature
|
188
|
+
|
189
|
+
# hydrophobic atom
|
190
|
+
AtomType Hphobe [c,s,S&H0&v2,Br,I,{Carbon_NonPolar}]
|
191
|
+
AtomType RingHphobe [R;{Hphobe}]
|
192
|
+
|
193
|
+
# nitro groups in the RD code are always: *-[N+](=O)[O-]
|
194
|
+
DefineFeature Nitro2 [N;D3;+](=O)[O-]
|
195
|
+
Family LumpedHydrophobe
|
196
|
+
Weights 1.0,1.0,1.0
|
197
|
+
EndFeature
|
198
|
+
|
199
|
+
#
|
200
|
+
# 21.1.2008 (GL): update ring membership tests to reflect corrected meaning of
|
201
|
+
# "r" in SMARTS parser
|
202
|
+
#
|
203
|
+
AtomType Ring6 [r6,!R1&r5,!R1&r4,!R1&r3]
|
204
|
+
DefineFeature RH6_6 [{Ring6};{RingHphobe}]1[{Ring6};{RingHphobe}][{Ring6};{RingHphobe}][{Ring6};{RingHphobe}][{Ring6};{RingHphobe}][{Ring6};{RingHphobe}]1
|
205
|
+
Family LumpedHydrophobe
|
206
|
+
Weights 1.0,1.0,1.0,1.0,1.0,1.0
|
207
|
+
EndFeature
|
208
|
+
|
209
|
+
AtomType Ring5 [r5,!R1&r4,!R1&r3]
|
210
|
+
DefineFeature RH5_5 [{Ring5};{RingHphobe}]1[{Ring5};{RingHphobe}][{Ring5};{RingHphobe}][{Ring5};{RingHphobe}][{Ring5};{RingHphobe}]1
|
211
|
+
Family LumpedHydrophobe
|
212
|
+
Weights 1.0,1.0,1.0,1.0,1.0
|
213
|
+
EndFeature
|
214
|
+
|
215
|
+
AtomType Ring4 [r4,!R1&r3]
|
216
|
+
DefineFeature RH4_4 [{Ring4};{RingHphobe}]1[{Ring4};{RingHphobe}][{Ring4};{RingHphobe}][{Ring4};{RingHphobe}]1
|
217
|
+
Family LumpedHydrophobe
|
218
|
+
Weights 1.0,1.0,1.0,1.0
|
219
|
+
EndFeature
|
220
|
+
|
221
|
+
AtomType Ring3 [r3]
|
222
|
+
DefineFeature RH3_3 [{Ring3};{RingHphobe}]1[{Ring3};{RingHphobe}][{Ring3};{RingHphobe}]1
|
223
|
+
Family LumpedHydrophobe
|
224
|
+
Weights 1.0,1.0,1.0
|
225
|
+
EndFeature
|
226
|
+
|
227
|
+
DefineFeature tButyl [C;!R](-[CH3])(-[CH3])-[CH3]
|
228
|
+
Family LumpedHydrophobe
|
229
|
+
Weights 1.0,0.0,0.0,0.0
|
230
|
+
EndFeature
|
231
|
+
|
232
|
+
DefineFeature iPropyl [CH;!R](-[CH3])-[CH3]
|
233
|
+
Family LumpedHydrophobe
|
234
|
+
Weights 1.0,1.0,1.0
|
235
|
+
EndFeature
|
236
|
+
|
hdl/datasets/las.tsv
ADDED
File without changes
|
@@ -0,0 +1,113 @@
|
|
1
|
+
|
2
|
+
[
|
3
|
+
{
|
4
|
+
"mol_id": {
|
5
|
+
"id_type": "CAS",
|
6
|
+
"id_value": "111-222-333",
|
7
|
+
"smiles": "",
|
8
|
+
"routes": [
|
9
|
+
{
|
10
|
+
"rxn_template": "",
|
11
|
+
"rxn_type": "",
|
12
|
+
"yield": 0.0,
|
13
|
+
"children": [
|
14
|
+
{
|
15
|
+
"mol_id": {
|
16
|
+
"id_type": "CAS",
|
17
|
+
"id_value": "111-222-333",
|
18
|
+
"smiles": "",
|
19
|
+
"routes": []
|
20
|
+
}
|
21
|
+
},
|
22
|
+
{
|
23
|
+
"mol_id": {
|
24
|
+
"id_type": "CAS",
|
25
|
+
"id_value": "111-222-333",
|
26
|
+
"smiles": "",
|
27
|
+
"routes": []
|
28
|
+
}
|
29
|
+
}
|
30
|
+
]
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"rxn_template": "",
|
34
|
+
"rxn_type": "",
|
35
|
+
"yield": 0.0,
|
36
|
+
"children": [
|
37
|
+
{
|
38
|
+
"mol_id": {
|
39
|
+
"id_type": "CAS",
|
40
|
+
"id_value": "111-222-333",
|
41
|
+
"smiles": "",
|
42
|
+
"routes": []
|
43
|
+
}
|
44
|
+
},
|
45
|
+
{
|
46
|
+
"mol_id": {
|
47
|
+
"id_type": "CAS",
|
48
|
+
"id_value": "111-222-333",
|
49
|
+
"smiles": "",
|
50
|
+
"routes": []
|
51
|
+
}
|
52
|
+
}
|
53
|
+
]
|
54
|
+
}
|
55
|
+
]
|
56
|
+
}
|
57
|
+
},
|
58
|
+
{
|
59
|
+
"mol_id": {
|
60
|
+
"id_type": "CAS",
|
61
|
+
"id_value": "111-222-333",
|
62
|
+
"smiles": "",
|
63
|
+
"routes": [
|
64
|
+
{
|
65
|
+
"rxn_template": "",
|
66
|
+
"rxn_type": "",
|
67
|
+
"yield": 0.0,
|
68
|
+
"children": [
|
69
|
+
{
|
70
|
+
"mol_id": {
|
71
|
+
"id_type": "CAS",
|
72
|
+
"id_value": "111-222-333",
|
73
|
+
"smiles": "",
|
74
|
+
"routes": []
|
75
|
+
}
|
76
|
+
},
|
77
|
+
{
|
78
|
+
"mol_id": {
|
79
|
+
"id_type": "CAS",
|
80
|
+
"id_value": "111-222-333",
|
81
|
+
"smiles": "",
|
82
|
+
"routes": []
|
83
|
+
}
|
84
|
+
}
|
85
|
+
]
|
86
|
+
},
|
87
|
+
{
|
88
|
+
"rxn_template": "",
|
89
|
+
"rxn_type": "",
|
90
|
+
"yield": 0.0,
|
91
|
+
"children": [
|
92
|
+
{
|
93
|
+
"mol_id": {
|
94
|
+
"id_type": "CAS",
|
95
|
+
"id_value": "111-222-333",
|
96
|
+
"smiles": "",
|
97
|
+
"routes": []
|
98
|
+
}
|
99
|
+
},
|
100
|
+
{
|
101
|
+
"mol_id": {
|
102
|
+
"id_type": "CAS",
|
103
|
+
"id_value": "111-222-333",
|
104
|
+
"smiles": "",
|
105
|
+
"routes": []
|
106
|
+
}
|
107
|
+
}
|
108
|
+
]
|
109
|
+
}
|
110
|
+
]
|
111
|
+
}
|
112
|
+
}
|
113
|
+
]
|