sapiopycommons 2024.9.4a323__py3-none-any.whl → 2024.9.6a325__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sapiopycommons might be problematic. Click here for more details.
- sapiopycommons/chem/IndigoMolecules.py +1 -0
- sapiopycommons/chem/Molecules.py +77 -19
- sapiopycommons/general/aliases.py +1 -1
- sapiopycommons/general/custom_report_util.py +2 -1
- sapiopycommons/multimodal/multimodal_data.py +6 -3
- {sapiopycommons-2024.9.4a323.dist-info → sapiopycommons-2024.9.6a325.dist-info}/METADATA +1 -1
- {sapiopycommons-2024.9.4a323.dist-info → sapiopycommons-2024.9.6a325.dist-info}/RECORD +9 -9
- {sapiopycommons-2024.9.4a323.dist-info → sapiopycommons-2024.9.6a325.dist-info}/WHEEL +0 -0
- {sapiopycommons-2024.9.4a323.dist-info → sapiopycommons-2024.9.6a325.dist-info}/licenses/LICENSE +0 -0
|
@@ -9,6 +9,7 @@ indigo.setOption("ignore-stereochemistry-errors", True)
|
|
|
9
9
|
indigo.setOption("render-stereo-style", "ext")
|
|
10
10
|
indigo.setOption("aromaticity-model", "generic")
|
|
11
11
|
indigo.setOption("render-coloring", True)
|
|
12
|
+
indigo.setOption("molfile-saving-mode", "3000")
|
|
12
13
|
indigo_inchi = IndigoInchi(indigo);
|
|
13
14
|
|
|
14
15
|
|
sapiopycommons/chem/Molecules.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# Author Yechen Qiao
|
|
2
2
|
# Common Molecule Utilities for Molecule Transfers with Sapio
|
|
3
|
+
from typing import cast
|
|
3
4
|
|
|
4
5
|
from rdkit import Chem
|
|
5
6
|
from rdkit.Chem import Crippen, MolToInchi
|
|
@@ -20,6 +21,25 @@ tautomer_params.tautomerReassignStereo = False
|
|
|
20
21
|
tautomer_params.tautomerRemoveIsotopicHs = True
|
|
21
22
|
enumerator = rdMolStandardize.TautomerEnumerator(tautomer_params)
|
|
22
23
|
|
|
24
|
+
|
|
25
|
+
def get_enhanced_stereo_reg_hash(mol: Mol, enhanced_stereo: bool) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Get the Registration Hash for the molecule by the current registration configuration.
|
|
28
|
+
When we are running if we are canonicalization of tautomers or cleaning up any other way, do they first before calling.
|
|
29
|
+
:param mol: The molecule to obtain hash for.
|
|
30
|
+
:param canonical_tautomer: Whether the registry system canonicalize the tautomers.
|
|
31
|
+
:param enhanced_stereo: Whether we are computing enhanced stereo at all.
|
|
32
|
+
:return: The enhanced stereo hash.
|
|
33
|
+
"""
|
|
34
|
+
if enhanced_stereo:
|
|
35
|
+
from rdkit.Chem.RegistrationHash import GetMolLayers, GetMolHash, HashScheme
|
|
36
|
+
layers = GetMolLayers(mol, enable_tautomer_hash_v2=True)
|
|
37
|
+
hash_scheme: HashScheme = HashScheme.TAUTOMER_INSENSITIVE_LAYERS
|
|
38
|
+
return GetMolHash(layers, hash_scheme=hash_scheme)
|
|
39
|
+
else:
|
|
40
|
+
return ""
|
|
41
|
+
|
|
42
|
+
|
|
23
43
|
def neutralize_atoms(mol) -> Mol:
|
|
24
44
|
"""
|
|
25
45
|
Neutralize atoms per https://baoilleach.blogspot.com/2019/12/no-charge-simple-approach-to.html
|
|
@@ -86,7 +106,6 @@ def mol_to_img(mol_str: str) -> str:
|
|
|
86
106
|
return renderer.renderToString(mol)
|
|
87
107
|
|
|
88
108
|
|
|
89
|
-
|
|
90
109
|
def mol_to_sapio_partial_pojo(mol: Mol):
|
|
91
110
|
"""
|
|
92
111
|
Get the minimum information about molecule to Sapio, just its SMILES, V3000, and image data.
|
|
@@ -96,7 +115,7 @@ def mol_to_sapio_partial_pojo(mol: Mol):
|
|
|
96
115
|
Chem.SanitizeMol(mol)
|
|
97
116
|
mol.UpdatePropertyCache()
|
|
98
117
|
smiles = Chem.MolToSmiles(mol)
|
|
99
|
-
molBlock = Chem.MolToMolBlock(mol)
|
|
118
|
+
molBlock = Chem.MolToMolBlock(mol, forceV3000=True)
|
|
100
119
|
img = mol_to_img(mol)
|
|
101
120
|
molecule = dict()
|
|
102
121
|
molecule["smiles"] = smiles
|
|
@@ -105,23 +124,52 @@ def mol_to_sapio_partial_pojo(mol: Mol):
|
|
|
105
124
|
return molecule
|
|
106
125
|
|
|
107
126
|
|
|
108
|
-
def
|
|
127
|
+
def get_cxs_smiles_hash(mol: Mol, enhanced_stereo: bool) -> str:
|
|
128
|
+
"""
|
|
129
|
+
Return the SHA1 CXS Smiles hash for the canonical, isomeric CXS SMILES of the molecule.
|
|
130
|
+
"""
|
|
131
|
+
if not enhanced_stereo:
|
|
132
|
+
return ""
|
|
133
|
+
import hashlib
|
|
134
|
+
return hashlib.sha1(Chem.MolToCXSmiles(mol, canonical=True, isomericSmiles=True).encode()).hexdigest()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_has_or_group(mol: Mol, enhanced_stereo: bool) -> bool:
|
|
138
|
+
"""
|
|
139
|
+
Return true if and only if: enhanced stereochemistry is enabled and there is at least one OR group in mol.
|
|
140
|
+
"""
|
|
141
|
+
if not enhanced_stereo:
|
|
142
|
+
return False
|
|
143
|
+
from rdkit.Chem import StereoGroup_vect, STEREO_OR
|
|
144
|
+
stereo_groups: StereoGroup_vect = mol.GetStereoGroups()
|
|
145
|
+
for stereo_group in stereo_groups:
|
|
146
|
+
if stereo_group.GetGroupType() == STEREO_OR:
|
|
147
|
+
return True
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def mol_to_sapio_substance(mol: Mol, include_stereoisomers=False,
|
|
109
152
|
normalize: bool = False, remove_salt: bool = False, make_images: bool = False,
|
|
110
|
-
salt_def: str | None = None, canonical_tautomer: bool = True
|
|
153
|
+
salt_def: str | None = None, canonical_tautomer: bool = True,
|
|
154
|
+
enhanced_stereo: bool = False, remove_atom_map: bool = True):
|
|
111
155
|
"""
|
|
112
156
|
Convert a molecule in RDKit to a molecule POJO in Sapio.
|
|
113
157
|
|
|
114
158
|
:param mol: The molecule in RDKit.
|
|
115
|
-
:param include_stereoisomers: If true, will compute all stereoisomer permutations of this molecule.
|
|
116
159
|
:param normalize If true, will normalize the functional groups and return normalized result.
|
|
117
160
|
:param remove_salt If true, we will remove salts iteratively from the molecule before returning their data.
|
|
118
161
|
We will also populate desaltedList with molecules we deleted.
|
|
162
|
+
:param make_images Whether to make images as part of the result without having another script to resolve it.
|
|
119
163
|
:param salt_def: if not none, specifies custom salt to be used during the desalt process.
|
|
120
164
|
:param canonical_tautomer: if True, we will attempt to compute canonical tautomer for the molecule. Slow!
|
|
121
165
|
This is needed for a registry. Note it stops after enumeration of 1000.
|
|
166
|
+
:param enhanced_stereo: If enabled, enhanced stereo hash will be produced.
|
|
167
|
+
:param remove_atom_map: When set, clear all atom AAM maps that were set had it been merged into some reactions earlier.
|
|
122
168
|
:return: The molecule POJO for Sapio.
|
|
123
169
|
"""
|
|
124
170
|
molecule = dict()
|
|
171
|
+
if remove_atom_map:
|
|
172
|
+
[a.SetAtomMapNum(0) for a in mol.GetAtoms()]
|
|
125
173
|
Chem.SanitizeMol(mol)
|
|
126
174
|
mol.UpdatePropertyCache()
|
|
127
175
|
Chem.GetSymmSSSR(mol)
|
|
@@ -157,7 +205,7 @@ def mol_to_sapio_substance(mol: Mol, include_stereoisomers: bool = False,
|
|
|
157
205
|
exactMass = Descriptors.ExactMolWt(mol)
|
|
158
206
|
molFormula = rdMolDescriptors.CalcMolFormula(mol)
|
|
159
207
|
charge = Chem.GetFormalCharge(mol)
|
|
160
|
-
molBlock = Chem.MolToMolBlock(mol)
|
|
208
|
+
molBlock = Chem.MolToMolBlock(mol, forceV3000=True)
|
|
161
209
|
|
|
162
210
|
molecule["cLogP"] = cLogP
|
|
163
211
|
molecule["tpsa"] = tpsa
|
|
@@ -181,28 +229,38 @@ def mol_to_sapio_substance(mol: Mol, include_stereoisomers: bool = False,
|
|
|
181
229
|
# We need to test the INCHI can be loaded back to indigo.
|
|
182
230
|
indigo_mol = indigo.loadMolecule(molBlock)
|
|
183
231
|
indigo_mol.aromatize()
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
232
|
+
if enhanced_stereo:
|
|
233
|
+
# Remove enhanced stereo layer when generating InChI as the stereo hash is generated separately for reg.
|
|
234
|
+
mol_copy: Mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol))
|
|
235
|
+
Chem.CanonicalizeEnhancedStereo(mol_copy)
|
|
236
|
+
molecule["inchi"] = Chem.MolToInchi(mol_copy)
|
|
237
|
+
molecule["inchiKey"] = Chem.MolToInchiKey(mol_copy)
|
|
238
|
+
else:
|
|
239
|
+
indigo_inchi.resetOptions()
|
|
240
|
+
indigo_inchi_str = indigo_inchi.getInchi(indigo_mol)
|
|
241
|
+
molecule["inchi"] = indigo_inchi_str
|
|
242
|
+
indigo_inchi_key_str = indigo_inchi.getInchiKey(indigo_inchi_str)
|
|
243
|
+
molecule["inchiKey"] = indigo_inchi_key_str
|
|
189
244
|
molecule["smiles"] = indigo_mol.smiles()
|
|
245
|
+
molecule["reg_hash"] = get_enhanced_stereo_reg_hash(mol, enhanced_stereo=enhanced_stereo)
|
|
246
|
+
molecule["cxsmiles_hash"] = get_cxs_smiles_hash(mol, enhanced_stereo=enhanced_stereo)
|
|
247
|
+
molecule["has_or_group"] = get_has_or_group(mol, enhanced_stereo=enhanced_stereo)
|
|
190
248
|
|
|
191
|
-
if include_stereoisomers and has_chiral_centers(mol):
|
|
192
|
-
stereoisomers = find_all_possible_stereoisomers(mol, only_unassigned=False, try_embedding=False, unique=True)
|
|
193
|
-
molecule["stereoisomers"] = [mol_to_sapio_partial_pojo(x) for x in stereoisomers]
|
|
194
249
|
return molecule
|
|
195
250
|
|
|
196
251
|
|
|
197
|
-
def mol_to_sapio_compound(mol: Mol, include_stereoisomers: bool = False,
|
|
252
|
+
def mol_to_sapio_compound(mol: Mol, include_stereoisomers=False, enhanced_stereo: bool = False,
|
|
198
253
|
salt_def: str | None = None, resolve_canonical: bool = True,
|
|
199
|
-
make_images: bool = False, canonical_tautomer: bool = True
|
|
254
|
+
make_images: bool = False, canonical_tautomer: bool = True,
|
|
255
|
+
remove_atom_map: bool = True):
|
|
200
256
|
ret = dict()
|
|
201
|
-
ret['originalMol'] = mol_to_sapio_substance(mol, include_stereoisomers,
|
|
257
|
+
ret['originalMol'] = mol_to_sapio_substance(mol, include_stereoisomers=False,
|
|
202
258
|
normalize=False, remove_salt=False, make_images=make_images,
|
|
203
|
-
canonical_tautomer=canonical_tautomer
|
|
259
|
+
canonical_tautomer=canonical_tautomer,
|
|
260
|
+
enhanced_stereo=enhanced_stereo, remove_atom_map=remove_atom_map)
|
|
204
261
|
if resolve_canonical:
|
|
205
262
|
ret['canonicalMol'] = mol_to_sapio_substance(mol, include_stereoisomers=False,
|
|
206
263
|
normalize=True, remove_salt=True, make_images=make_images,
|
|
207
|
-
salt_def=salt_def, canonical_tautomer=canonical_tautomer
|
|
264
|
+
salt_def=salt_def, canonical_tautomer=canonical_tautomer,
|
|
265
|
+
enhanced_stereo=enhanced_stereo, remove_atom_map=remove_atom_map)
|
|
208
266
|
return ret
|
|
@@ -14,7 +14,7 @@ from sapiopycommons.general.exceptions import SapioException
|
|
|
14
14
|
|
|
15
15
|
FieldValue = int | float | str | bool | None
|
|
16
16
|
"""Allowable values for fields in the system."""
|
|
17
|
-
RecordModel = PyRecordModel | WrappedRecordModel
|
|
17
|
+
RecordModel = PyRecordModel | WrappedRecordModel
|
|
18
18
|
"""Different forms that a record model could take."""
|
|
19
19
|
SapioRecord = DataRecord | RecordModel
|
|
20
20
|
"""A record could be provided as either a DataRecord, PyRecordModel, or WrappedRecordModel (WrappedType)."""
|
|
@@ -243,7 +243,8 @@ class CustomReportUtil:
|
|
|
243
243
|
else:
|
|
244
244
|
encountered_names.append(field_name)
|
|
245
245
|
|
|
246
|
-
filters:
|
|
246
|
+
if filters:
|
|
247
|
+
filters: dict[str, Iterable[FieldValue]] = AliasUtil.to_data_field_names_dict(filters)
|
|
247
248
|
|
|
248
249
|
ret: list[dict[str, FieldValue]] = []
|
|
249
250
|
for row in rows:
|
|
@@ -38,6 +38,9 @@ class PyMolecule:
|
|
|
38
38
|
normError: str | None
|
|
39
39
|
desaltError: str | None
|
|
40
40
|
desaltedList: list[str] | None
|
|
41
|
+
registrationHash: str | None
|
|
42
|
+
hasOrGroup: bool
|
|
43
|
+
CXSMILESHash: str | None
|
|
41
44
|
|
|
42
45
|
|
|
43
46
|
@dataclass
|
|
@@ -100,9 +103,9 @@ class PyMoleculeLoaderResult:
|
|
|
100
103
|
compoundList: the compounds successfully loaded.
|
|
101
104
|
errorList: an error record is added here for each one we failed to load in Sapio.
|
|
102
105
|
"""
|
|
103
|
-
compoundByStr: dict[str, PyCompound]
|
|
104
|
-
compoundList: list[PyCompound]
|
|
105
|
-
errorList: list[ChemLoadingError]
|
|
106
|
+
compoundByStr: dict[str, PyCompound] | None
|
|
107
|
+
compoundList: list[PyCompound] | None
|
|
108
|
+
errorList: list[ChemLoadingError] | None
|
|
106
109
|
|
|
107
110
|
|
|
108
111
|
@dataclass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sapiopycommons
|
|
3
|
-
Version: 2024.9.
|
|
3
|
+
Version: 2024.9.6a325
|
|
4
4
|
Summary: Official Sapio Python API Utilities Package
|
|
5
5
|
Project-URL: Homepage, https://github.com/sapiosciences
|
|
6
6
|
Author-email: Jonathan Steck <jsteck@sapiosciences.com>, Yechen Qiao <yqiao@sapiosciences.com>
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
sapiopycommons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
sapiopycommons/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
sapiopycommons/callbacks/callback_util.py,sha256=7gUyJ-i3Owdy9bdQSOYKX_AgNRaT0PTbdhulA59tNys,63616
|
|
4
|
-
sapiopycommons/chem/IndigoMolecules.py,sha256=
|
|
5
|
-
sapiopycommons/chem/Molecules.py,sha256=
|
|
4
|
+
sapiopycommons/chem/IndigoMolecules.py,sha256=3f-aig3AJkKJhRmhlQ0cI-5G8oeaQk_3foJTDZCvoko,2040
|
|
5
|
+
sapiopycommons/chem/Molecules.py,sha256=0B_SsXB2swg2DiP50p0tcNOVO1ajlxumSI42YyDiSHI,11517
|
|
6
6
|
sapiopycommons/chem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
sapiopycommons/customreport/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
sapiopycommons/customreport/column_builder.py,sha256=sS_wZYOR72rs3syTNjwCVP4h8M8N0b0burkTxFQItVU,3019
|
|
@@ -24,16 +24,16 @@ sapiopycommons/files/file_validator.py,sha256=4OvY98ueJWPJdpndwnKv2nqVvLP9S2W7Il
|
|
|
24
24
|
sapiopycommons/files/file_writer.py,sha256=96Xl8TTT46Krxe_J8rmmlEMtel4nzZB961f5Yqtl1-I,17616
|
|
25
25
|
sapiopycommons/general/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
sapiopycommons/general/accession_service.py,sha256=HYgyOsH_UaoRnoury-c2yTW8SeG4OtjLemdpCzoV4R8,13484
|
|
27
|
-
sapiopycommons/general/aliases.py,sha256=
|
|
27
|
+
sapiopycommons/general/aliases.py,sha256=Gih9shHsj765q4HimfFTTI7wWDPAtXjoqCgHisyIQZY,10409
|
|
28
28
|
sapiopycommons/general/audit_log.py,sha256=tJi4uU4qRY2WWcK4ItkjRvoCHCwwiU9LwCNv4lP5-QQ,8713
|
|
29
|
-
sapiopycommons/general/custom_report_util.py,sha256=
|
|
29
|
+
sapiopycommons/general/custom_report_util.py,sha256=BGu9Ki0wn3m4Nk-LKM6inDSfe8ULUSG9d-HJJNOTtGc,15653
|
|
30
30
|
sapiopycommons/general/exceptions.py,sha256=DOlLKnpCatxQF-lVCToa8ryJgusWLvip6N_1ALN00QE,1679
|
|
31
31
|
sapiopycommons/general/popup_util.py,sha256=L-4qpTemSZdlD6_6oEsDYIzLOCiZgDK6wC6DqUwzOYA,31925
|
|
32
32
|
sapiopycommons/general/sapio_links.py,sha256=o9Z-8y2rz6AI0Cy6tq58ElPge9RBnisGc9NyccbaJxs,2610
|
|
33
33
|
sapiopycommons/general/storage_util.py,sha256=ovmK_jN7v09BoX07XxwShpBUC5WYQOM7dbKV_VeLXJU,8892
|
|
34
34
|
sapiopycommons/general/time_util.py,sha256=sXThADCRAQDWYDD9C5CdhcKYIt3qOaVNyZfGBR7HW9A,8701
|
|
35
35
|
sapiopycommons/multimodal/multimodal.py,sha256=A1QsC8QTPmgZyPr7KtMbPRedn2Ie4WIErodUvQ9otgU,6724
|
|
36
|
-
sapiopycommons/multimodal/multimodal_data.py,sha256=
|
|
36
|
+
sapiopycommons/multimodal/multimodal_data.py,sha256=t-0uY4cVgm88uXaSOL4ZeB6zmdHufowXuLFlMk61wFg,15087
|
|
37
37
|
sapiopycommons/processtracking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
sapiopycommons/processtracking/endpoints.py,sha256=w5bziI2xC7450M95rCF8JpRwkoni1kEDibyAux9B12Q,10848
|
|
39
39
|
sapiopycommons/recordmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -44,7 +44,7 @@ sapiopycommons/rules/on_save_rule_handler.py,sha256=Rkqvph20RbNq6m-RF4fbvCP-YfD2
|
|
|
44
44
|
sapiopycommons/webhook/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
sapiopycommons/webhook/webhook_handlers.py,sha256=jwc4xu-wwl8haS5k1dENZ1UIYK9GQk74TAo3CGxMW9U,16583
|
|
46
46
|
sapiopycommons/webhook/webservice_handlers.py,sha256=1J56zFI0pWl5MHoNTznvcZumITXgAHJMluj8-2BqYEw,3315
|
|
47
|
-
sapiopycommons-2024.9.
|
|
48
|
-
sapiopycommons-2024.9.
|
|
49
|
-
sapiopycommons-2024.9.
|
|
50
|
-
sapiopycommons-2024.9.
|
|
47
|
+
sapiopycommons-2024.9.6a325.dist-info/METADATA,sha256=NwL6x_WCcT82lr4t5YN-C5eIWyXrW7rKNEL2_GE9s9Y,3175
|
|
48
|
+
sapiopycommons-2024.9.6a325.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
49
|
+
sapiopycommons-2024.9.6a325.dist-info/licenses/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
|
|
50
|
+
sapiopycommons-2024.9.6a325.dist-info/RECORD,,
|
|
File without changes
|
{sapiopycommons-2024.9.4a323.dist-info → sapiopycommons-2024.9.6a325.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|