@datagrok/sequence-translator 1.0.17 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -3
- package/CHANGELOG.md +3 -0
- package/detectors.js +8 -0
- package/dist/package-test.js +2 -73079
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -72284
- package/dist/package.js.map +1 -0
- package/files/axolabs-style.json +97 -0
- package/files/codes-to-symbols.json +66 -0
- package/files/formats-to-helm.json +59 -0
- package/files/linkers.json +22 -0
- package/files/monomer-lib.json +1094 -0
- package/link-bio +7 -0
- package/package.json +30 -28
- package/scripts/build-monomer-lib.py +391 -122
- package/src/demo/demo-st-ui.ts +71 -0
- package/src/demo/handle-error.ts +12 -0
- package/src/model/axolabs/axolabs-tab.ts +111 -0
- package/src/model/axolabs/const.ts +33 -0
- package/src/{axolabs-tab → model/axolabs}/draw-svg.ts +1 -1
- package/src/{axolabs-tab → model/axolabs}/helpers.ts +7 -5
- package/src/model/const.ts +19 -0
- package/src/model/data-loading-utils/const.ts +8 -0
- package/src/model/data-loading-utils/json-loader.ts +38 -0
- package/src/model/data-loading-utils/types.ts +30 -0
- package/src/model/format-translation/const.ts +8 -0
- package/src/model/format-translation/conversion-utils.ts +48 -0
- package/src/model/format-translation/format-converter.ts +107 -0
- package/src/model/helpers.ts +12 -0
- package/src/model/monomer-lib/const.ts +3 -0
- package/src/model/monomer-lib/lib-wrapper.ts +106 -0
- package/src/model/parsing-validation/format-detector.ts +57 -0
- package/src/model/parsing-validation/sequence-validator.ts +52 -0
- package/src/model/sequence-to-structure-utils/const.ts +1 -0
- package/src/{utils/structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +33 -41
- package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
- package/src/model/sequence-to-structure-utils/sdf-tab.ts +94 -0
- package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
- package/src/package.ts +104 -92
- package/src/tests/const.ts +17 -0
- package/src/tests/smiles-tests.ts +32 -457
- package/src/view/const/main-tab.ts +3 -0
- package/src/view/const/view.ts +10 -0
- package/src/view/css/axolabs-tab.css +1 -0
- package/src/view/css/colored-text-input.css +27 -0
- package/src/view/css/main-tab.css +46 -0
- package/src/view/css/sdf-tab.css +39 -0
- package/src/view/monomer-lib-viewer/viewer.ts +22 -0
- package/src/view/tabs/axolabs.ts +720 -0
- package/src/view/tabs/main.ts +174 -0
- package/src/view/tabs/sdf.ts +173 -0
- package/src/view/utils/app-info-dialog.ts +18 -0
- package/src/view/utils/colored-input/colored-text-input.ts +56 -0
- package/src/view/utils/colored-input/input-painters.ts +44 -0
- package/src/view/utils/draw-molecule.ts +86 -0
- package/src/view/utils/molecule-img.ts +106 -0
- package/src/view/view.ts +129 -0
- package/tsconfig.json +12 -18
- package/webpack.config.js +17 -4
- package/README.md +0 -84
- package/css/style.css +0 -18
- package/img/Sequence Translator Axolabs.png +0 -0
- package/jest.config.js +0 -33
- package/setup-unlink-clean.cmd +0 -14
- package/setup-unlink-clean.sh +0 -21
- package/setup.cmd +0 -14
- package/setup.sh +0 -37
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -97
- package/src/apps/oligo-sd-file-app.ts +0 -58
- package/src/autostart/calculations.ts +0 -40
- package/src/autostart/constants.ts +0 -37
- package/src/autostart/registration.ts +0 -306
- package/src/axolabs-tab/axolabs-tab.ts +0 -873
- package/src/axolabs-tab/define-pattern.ts +0 -874
- package/src/hardcode-to-be-eliminated/ICDs.ts +0 -3
- package/src/hardcode-to-be-eliminated/IDPs.ts +0 -3
- package/src/hardcode-to-be-eliminated/const.ts +0 -5
- package/src/hardcode-to-be-eliminated/constants.ts +0 -101
- package/src/hardcode-to-be-eliminated/converters.ts +0 -323
- package/src/hardcode-to-be-eliminated/map.ts +0 -720
- package/src/hardcode-to-be-eliminated/salts.ts +0 -2
- package/src/hardcode-to-be-eliminated/sources.ts +0 -3
- package/src/hardcode-to-be-eliminated/users.ts +0 -3
- package/src/main-tab/main-tab.ts +0 -210
- package/src/sdf-tab/sdf-tab.ts +0 -163
- package/src/sdf-tab/sequence-codes-tools.ts +0 -347
- package/src/utils/const.ts +0 -0
- package/src/utils/helpers.ts +0 -28
- package/src/utils/parse.ts +0 -27
- package/src/utils/sdf-add-columns.ts +0 -118
- package/src/utils/sdf-save-table.ts +0 -56
- package/src/utils/structures-works/draw-molecule.ts +0 -84
- package/src/utils/structures-works/from-monomers.ts +0 -266
- package/test-SequenceTranslator-6288c2fbe346-695b7b55.html +0 -259
- package/vendors/openchemlib-full.js +0 -293
package/link-bio
ADDED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.0
|
|
4
|
+
"version": "1.1.0",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Alexey Choposky",
|
|
7
7
|
"email": "achopovsky@datagrok.ai"
|
|
@@ -13,12 +13,13 @@
|
|
|
13
13
|
"directory": "packages/SequenceTranslator"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
|
+
"@datagrok-libraries/chem-meta": "^1.0.9",
|
|
16
17
|
"@datagrok-libraries/utils": "^1.17.2",
|
|
18
|
+
"@datagrok-libraries/tutorials": "^1.3.2",
|
|
19
|
+
"cash-dom": "^8.1.0",
|
|
20
|
+
"datagrok-api": "^1.10.2",
|
|
17
21
|
"@types/react": "^18.0.15",
|
|
18
|
-
"@datagrok-libraries/bio": "^5.
|
|
19
|
-
"@deck.gl/core": "8.8.12",
|
|
20
|
-
"@luma.gl/core": "8.5.17",
|
|
21
|
-
"datagrok-api": "^1.8.2",
|
|
22
|
+
"@datagrok-libraries/bio": "^5.32.1",
|
|
22
23
|
"datagrok-tools": "^4.1.2",
|
|
23
24
|
"npm": "^8.11.0",
|
|
24
25
|
"openchemlib": "6.0.1",
|
|
@@ -27,26 +28,23 @@
|
|
|
27
28
|
"typescript": "^4.7.4"
|
|
28
29
|
},
|
|
29
30
|
"devDependencies": {
|
|
30
|
-
"@types/jest": "^27.0.0",
|
|
31
31
|
"@types/jquery": "^3.5.14",
|
|
32
|
-
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
|
33
|
-
"@typescript-eslint/parser": "^4.29.1",
|
|
34
|
-
"cash-dom": "^8.1.0",
|
|
35
|
-
"eslint": "^7.32.0",
|
|
36
|
-
"eslint-config-google": "^0.14.0",
|
|
37
|
-
"jest": "^27.0.0",
|
|
38
|
-
"jest-html-reporter": "^3.5.0",
|
|
39
|
-
"puppeteer": "^13.7.0",
|
|
40
|
-
"ts-jest": "^27.0.0",
|
|
41
|
-
"webpack": "^5.31.0",
|
|
42
|
-
"webpack-cli": "^4.6.0",
|
|
43
32
|
"@types/js-yaml": "^4.0.5",
|
|
44
|
-
"js-yaml": "^4.1.0",
|
|
45
33
|
"@types/node-fetch": "^2.6.2",
|
|
46
|
-
"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"
|
|
34
|
+
"@types/react": "^18.0.15",
|
|
35
|
+
"@typescript-eslint/eslint-plugin": "latest",
|
|
36
|
+
"@typescript-eslint/parser": "parser",
|
|
37
|
+
"css-loader": "^6.7.3",
|
|
38
|
+
"datagrok-tools": "^4.7.10",
|
|
39
|
+
"eslint": "^7.32.0",
|
|
40
|
+
"eslint-config-google": "latest",
|
|
41
|
+
"style-loader": "^3.3.1",
|
|
42
|
+
"ts-loader": "^9.3.1",
|
|
43
|
+
"typescript": "^4.7.4",
|
|
44
|
+
"webpack": "^5.75.0",
|
|
45
|
+
"webpack-cli": "latest",
|
|
46
|
+
"@datagrok/chem": "1.5.7",
|
|
47
|
+
"@datagrok/bio": "^2.1.12"
|
|
50
48
|
},
|
|
51
49
|
"scripts": {
|
|
52
50
|
"link-api": "npm link datagrok-api",
|
|
@@ -59,13 +57,17 @@
|
|
|
59
57
|
"release-sequencetranslator-public": "grok publish public --release",
|
|
60
58
|
"debug-sequencetranslator-local": "grok publish local",
|
|
61
59
|
"release-sequencetranslator-local": "grok publish local --release",
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"test
|
|
60
|
+
"lint": "eslint \"./src/**/*.ts\"",
|
|
61
|
+
"lint-fix": "eslint \"./src/**/*.ts\" --fix",
|
|
62
|
+
"test": "grok test",
|
|
63
|
+
"test-dev": "grok test --host dev",
|
|
64
|
+
"test-local": "grok test --host localhost"
|
|
65
65
|
},
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
-
|
|
66
|
+
"canEdit": [
|
|
67
|
+
"Developers"
|
|
68
|
+
],
|
|
69
|
+
"canView": [
|
|
70
|
+
"All users"
|
|
69
71
|
],
|
|
70
72
|
"category": "Bioinformatics"
|
|
71
73
|
}
|
|
@@ -1,178 +1,447 @@
|
|
|
1
|
+
# pylint: disable=no-member
|
|
2
|
+
import os.path
|
|
3
|
+
import sys
|
|
1
4
|
from io import TextIOWrapper
|
|
5
|
+
from typing import Optional
|
|
2
6
|
|
|
3
7
|
from rdkit import Chem
|
|
8
|
+
from rdkit.Chem.rdchem import Mol
|
|
4
9
|
|
|
5
10
|
import orjson
|
|
6
11
|
|
|
7
12
|
import click
|
|
8
13
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
BEGIN_ATOM_LINE = 'M V30 BEGIN ATOM'
|
|
15
|
+
END_ATOM_LINE = 'M V30 END ATOM'
|
|
16
|
+
BEGIN_BOND_LINE = 'M V30 BEGIN BOND'
|
|
17
|
+
END_BOND_LINE = 'M V30 END BOND'
|
|
18
|
+
BEGIN_COLLECTION_LINE = 'M V30 BEGIN COLLECTION'
|
|
19
|
+
END_COLLECTION_LINE = 'M V30 END COLLECTION'
|
|
20
|
+
COLLECTION_STEABS_LINE = 'M V30 MDLV30/STEABS'
|
|
21
|
+
IDX_OF_FIRST_VALUE = 7
|
|
22
|
+
NUM_OF_BOND_POSITIONAL_ARGS = 4
|
|
23
|
+
CFG = "CFG="
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def mol_add_collection(mol: Mol,
|
|
27
|
+
name: str,
|
|
28
|
+
title: Optional[str] = None,
|
|
29
|
+
src_mol: Optional[str] = None) -> str:
|
|
14
30
|
"""
|
|
15
31
|
Get and postprocess (atom's CFG, title, e.t.c.) molblock
|
|
16
|
-
:param mol:
|
|
17
|
-
:param name:
|
|
18
|
-
:param title:
|
|
19
|
-
:
|
|
32
|
+
:param mol: Mol molecule structure / object
|
|
33
|
+
:param name: Monomer name to add to molblock title string
|
|
34
|
+
:param title: Title to replace in Chem.MolToMolBlock() string output
|
|
35
|
+
:param src_mol: Source molblock data, to restore optional CFG
|
|
36
|
+
:return: molblock string
|
|
20
37
|
"""
|
|
21
38
|
res: str = Chem.MolToMolBlock(mol, forceV3000=True) # MolToMolFile
|
|
22
39
|
|
|
23
|
-
|
|
40
|
+
molblock_line_list: list[str] = res.split('\n')
|
|
24
41
|
if title:
|
|
25
|
-
|
|
42
|
+
molblock_line_list[1] = title
|
|
26
43
|
|
|
27
|
-
if name and name not in
|
|
28
|
-
|
|
44
|
+
if name and name not in molblock_line_list[1]:
|
|
45
|
+
molblock_line_list[1] += '|' + name
|
|
29
46
|
|
|
30
|
-
end_bond_idx: int = mb_line_list.index('M V30 END BOND')
|
|
31
47
|
chirality = [atom.GetChiralTag() for atom in mol.GetAtoms()]
|
|
32
|
-
begin_atom_idx = mb_line_list.index('M V30 BEGIN ATOM')
|
|
33
|
-
end_atom_idx = mb_line_list.index('M V30 END ATOM')
|
|
34
|
-
for atom_idx in range(1, end_atom_idx - begin_atom_idx):
|
|
35
|
-
line_idx = begin_atom_idx + atom_idx
|
|
36
|
-
atom_ch = chirality[atom_idx - 1]
|
|
37
|
-
if atom_ch != Chem.rdchem.CHI_UNSPECIFIED:
|
|
38
|
-
mb_line_list[line_idx] += " CFG={0}".format(int(atom_ch))
|
|
39
|
-
|
|
40
|
-
steabs: list[int] = [i + 1 for (i, ch) in enumerate(chirality) if ch != Chem.rdchem.CHI_UNSPECIFIED]
|
|
41
|
-
if len(steabs) > 0:
|
|
42
|
-
steabs_str: str = "M V30 MDLV30/STEABS ATOMS=({count} {list})" \
|
|
43
|
-
.format(count=len(steabs), list=' '.join([str(idx) for idx in steabs]))
|
|
44
|
-
|
|
45
|
-
mb_line_list = mb_line_list[:(end_bond_idx + 1)] + \
|
|
46
|
-
["M V30 BEGIN COLLECTION", steabs_str, "M V30 END COLLECTION"] + \
|
|
47
|
-
mb_line_list[(end_bond_idx + 1):]
|
|
48
|
-
|
|
49
|
-
return '\n'.join(mb_line_list)
|
|
50
48
|
|
|
49
|
+
# preserve chirality for bonds from src_mol
|
|
50
|
+
tgt_mol_file_map = MolFileMap.parse(res)
|
|
51
|
+
steabs = []
|
|
52
|
+
if src_mol:
|
|
53
|
+
src_mol_file_map = MolFileMap.parse(src_mol)
|
|
54
|
+
if len(tgt_mol_file_map.mol_file.atom_list) != len(src_mol_file_map.mol_file.atom_list):
|
|
55
|
+
raise ValueError(f"Atoms count of src and tgt differs for monomer '{name}'.")
|
|
56
|
+
|
|
57
|
+
# restore bond cfg values lost/transformed by rdkit
|
|
58
|
+
for (src_bond_idx0, (bond_key, src_bond)) in enumerate(src_mol_file_map.mol_file.bonds.items()):
|
|
59
|
+
if src_bond.cfg:
|
|
60
|
+
if bond_key not in tgt_mol_file_map.mol_file.bonds:
|
|
61
|
+
raise KeyError(f"Bond key '{bond_key}' not found in tgt bonds.")
|
|
62
|
+
tgt_bond: MolFileBond = tgt_mol_file_map.mol_file.bonds[bond_key]
|
|
63
|
+
tgt_bond_cfg_str: str = ' '.join(tgt_bond.cfg)
|
|
64
|
+
src_bond_cfg_str: str = ' '.join(src_bond.cfg)
|
|
65
|
+
if tgt_bond_cfg_str != src_bond_cfg_str:
|
|
66
|
+
molblock_line_list[tgt_mol_file_map.begin_bond_idx + tgt_bond.bond_idx] += f" {src_bond_cfg_str}"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# remove bond cfg values added by rdkit
|
|
70
|
+
for (tgt_bond_idx0, (bond_key, tgt_bond)) in enumerate(tgt_mol_file_map.mol_file.bonds.items()):
|
|
71
|
+
if tgt_bond.cfg:
|
|
72
|
+
if bond_key not in src_mol_file_map.mol_file.bonds:
|
|
73
|
+
raise KeyError(f"Bond key '{bond_key}' not found in src bonds.")
|
|
74
|
+
src_bond: MolFileBond = src_mol_file_map.mol_file.bonds[bond_key]
|
|
75
|
+
src_bond_cfg_str: str = ' '.join(src_bond.cfg)
|
|
76
|
+
tgt_bond_cfg_str: str = ' '.join(tgt_bond.cfg)
|
|
77
|
+
if tgt_bond_cfg_str != src_bond_cfg_str:
|
|
78
|
+
new_line = molblock_line_list[tgt_mol_file_map.begin_bond_idx + tgt_bond.bond_idx].replace(tgt_bond_cfg_str, "")
|
|
79
|
+
molblock_line_list[tgt_mol_file_map.begin_bond_idx + tgt_bond.bond_idx] = new_line
|
|
80
|
+
|
|
81
|
+
for (tgt_atom_idx0, tgt_atom) in enumerate(tgt_mol_file_map.mol_file.atom_list):
|
|
82
|
+
src_atom = src_mol_file_map.mol_file.atom_list[tgt_atom_idx0]
|
|
83
|
+
atom_chirality = chirality[tgt_atom_idx0]
|
|
84
|
+
if src_atom.cfg:
|
|
85
|
+
molblock_line_list[tgt_mol_file_map.begin_atom_idx + tgt_atom_idx0 + 1] += " {0}".format(
|
|
86
|
+
' '.join(src_atom.cfg))
|
|
87
|
+
steabs.append(tgt_atom_idx0 + 1)
|
|
88
|
+
elif atom_chirality != Chem.rdchem.CHI_UNSPECIFIED:
|
|
89
|
+
molblock_line_list[tgt_mol_file_map.begin_atom_idx + tgt_atom_idx0 + 1] += " CFG={0}".format(int(atom_chirality))
|
|
90
|
+
steabs.append(tgt_atom_idx0 + 1)
|
|
91
|
+
elif src_atom.atom_idx in src_mol_file_map.mol_file.collection_steabs:
|
|
92
|
+
raise KeyError(f"Source STEABS atom '{src_atom}' not accounted")
|
|
93
|
+
elif tgt_atom.atom_idx in tgt_mol_file_map.mol_file.collection_steabs:
|
|
94
|
+
raise KeyError(f"Target STEABS atom '{tgt_atom}' not accounted")
|
|
51
95
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
96
|
+
if len(steabs) > 0:
|
|
97
|
+
steabs_str: str = COLLECTION_STEABS_LINE + " ATOMS=({count} {list})".format(
|
|
98
|
+
count=len(steabs),
|
|
99
|
+
list=' '.join([str(idx) for idx in steabs]))
|
|
100
|
+
if tgt_mol_file_map.collection_steabs_idx:
|
|
101
|
+
molblock_line_list[tgt_mol_file_map.collection_steabs_idx] = steabs_str
|
|
102
|
+
elif tgt_mol_file_map.begin_collection_idx is not None:
|
|
103
|
+
tgt_collection_steabs_idx = tgt_mol_file_map.begin_collection_idx + 1
|
|
104
|
+
molblock_line_list = molblock_line_list[:tgt_collection_steabs_idx] + \
|
|
105
|
+
[steabs_str] + \
|
|
106
|
+
molblock_line_list[tgt_collection_steabs_idx:]
|
|
107
|
+
else:
|
|
108
|
+
tgt_collection_idx = tgt_mol_file_map.end_bond_idx + 1
|
|
109
|
+
molblock_line_list = molblock_line_list[:tgt_collection_idx] + \
|
|
110
|
+
[BEGIN_COLLECTION_LINE, steabs_str, END_COLLECTION_LINE] + \
|
|
111
|
+
molblock_line_list[tgt_collection_idx:]
|
|
112
|
+
|
|
113
|
+
return '\n'.join(molblock_line_list)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def prepare_molblock(src_molblock: str, name: str) -> str:
|
|
117
|
+
"""Loads mol from src_mol str. Fixed title, adds chirality to atoms and preserves chirality for bonds."""
|
|
118
|
+
# Using sanitize=False leads to unwanted moving stereo (invalid?) CFGs to other bonds
|
|
119
|
+
mol: Mol = Chem.MolFromMolBlock(src_molblock, removeHs=False)
|
|
120
|
+
src_molblock_lines = src_molblock.split('\n')
|
|
121
|
+
title = src_molblock_lines[1]
|
|
122
|
+
return mol_add_collection(mol, name, title=title, src_mol=src_molblock)
|
|
65
123
|
|
|
66
124
|
|
|
67
125
|
class Monomer:
|
|
68
|
-
def __init__(self,
|
|
69
|
-
|
|
70
|
-
codes: CodesType):
|
|
126
|
+
def __init__(self, symbol: str, name: str, molfile: str, smiles: str,
|
|
127
|
+
meta: dict):
|
|
71
128
|
self.monomerType = 'Backbone'
|
|
72
129
|
self.smiles = smiles
|
|
73
130
|
self.name = name
|
|
74
131
|
self.author = 'SequenceTranslator'
|
|
75
|
-
self.molfile =
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"capGroupName": "OH",
|
|
88
|
-
"label": "R2"
|
|
89
|
-
}]
|
|
132
|
+
self.molfile = prepare_molblock(molfile, name)
|
|
133
|
+
self.rgroups = [{
|
|
134
|
+
"capGroupSmiles": "O[*:1]",
|
|
135
|
+
"alternateId": "R1-OH",
|
|
136
|
+
"capGroupName": "OH",
|
|
137
|
+
"label": "R1"
|
|
138
|
+
}, {
|
|
139
|
+
"capGroupSmiles": "O[*:2]",
|
|
140
|
+
"alternateId": "R2-OH",
|
|
141
|
+
"capGroupName": "OH",
|
|
142
|
+
"label": "R2"
|
|
143
|
+
}]
|
|
90
144
|
self.createDate = None
|
|
91
145
|
self.id = 0
|
|
92
146
|
self.polymerType = 'RNA'
|
|
93
147
|
self.symbol = symbol
|
|
94
|
-
self.
|
|
148
|
+
self.meta = meta
|
|
95
149
|
|
|
96
150
|
@staticmethod
|
|
97
151
|
def from_json(src_json: {}):
|
|
98
152
|
obj = Monomer(src_json['symbol'], src_json['name'],
|
|
99
153
|
src_json['molfile'], src_json['smiles'],
|
|
100
|
-
src_json['
|
|
154
|
+
src_json['meta'])
|
|
101
155
|
return obj
|
|
102
156
|
|
|
103
157
|
def to_json(self):
|
|
104
158
|
return {
|
|
105
|
-
'
|
|
106
|
-
'smiles': self.smiles,
|
|
159
|
+
'symbol': self.symbol,
|
|
107
160
|
'name': self.name,
|
|
108
|
-
'author': self.author,
|
|
109
161
|
'molfile': self.molfile,
|
|
110
|
-
'
|
|
111
|
-
'rgroups': self.rgroups,
|
|
112
|
-
'createDate': self.createDate,
|
|
162
|
+
'author': self.author,
|
|
113
163
|
'id': self.id,
|
|
164
|
+
'rgroups': self.rgroups,
|
|
165
|
+
'smiles': self.smiles,
|
|
114
166
|
'polymerType': self.polymerType,
|
|
115
|
-
'
|
|
116
|
-
'
|
|
167
|
+
'monomerType': self.monomerType,
|
|
168
|
+
'createDate': self.createDate,
|
|
169
|
+
'meta': self.meta,
|
|
117
170
|
}
|
|
118
171
|
|
|
119
172
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
for
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
@
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
173
|
+
class MolFileAtom:
|
|
174
|
+
"""
|
|
175
|
+
Wrapper for data extracted from molfile atom line
|
|
176
|
+
"""
|
|
177
|
+
def __init__(self, v3k_atom_line: str):
|
|
178
|
+
self._atom_line = v3k_atom_line
|
|
179
|
+
self._atom_line_splitted: [] = self.\
|
|
180
|
+
_atom_line[IDX_OF_FIRST_VALUE:].split(' ')
|
|
181
|
+
self._atom_idx = int(self._atom_line_splitted[0].strip())
|
|
182
|
+
# we cannot use positional argument for cfg for it is a kwarg
|
|
183
|
+
cfg_item = list(filter(
|
|
184
|
+
lambda x: x.startswith(CFG), self._atom_line_splitted
|
|
185
|
+
))
|
|
186
|
+
self._cfg = cfg_item
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def atom_line_str(self) -> str:
|
|
190
|
+
return self._atom_line
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def atom_idx(self):
|
|
194
|
+
return self._atom_idx
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def atom_line_splitted(self) -> list[str]:
|
|
198
|
+
return self.atom_line_splitted
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def cfg(self) -> list[str]:
|
|
202
|
+
return self._cfg
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def cfg_int(self) -> int:
|
|
206
|
+
return self._cfg
|
|
207
|
+
|
|
208
|
+
def __str__(self):
|
|
209
|
+
return self._atom_line
|
|
210
|
+
|
|
211
|
+
def __repr__(self):
|
|
212
|
+
return str(self)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class MolFileBond:
|
|
216
|
+
"""
|
|
217
|
+
Wrapper for data extracted from molfile bond line
|
|
218
|
+
"""
|
|
219
|
+
def __init__(self, v3k_bond_line: str):
|
|
220
|
+
self._bond_line = v3k_bond_line
|
|
221
|
+
self._bond_line_splitted: [] = self.\
|
|
222
|
+
_bond_line[IDX_OF_FIRST_VALUE:].split(' ')
|
|
223
|
+
self._bond_idx = int(self._bond_line_splitted[0].strip())
|
|
224
|
+
self._key = self._bond_line_splitted[0:NUM_OF_BOND_POSITIONAL_ARGS]
|
|
225
|
+
cfg_item = list(filter(
|
|
226
|
+
lambda x: x.startswith(CFG), self._bond_line_splitted
|
|
227
|
+
))
|
|
228
|
+
self._cfg = cfg_item
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def bond_line(self) -> str:
|
|
232
|
+
return self._bond_line
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def bond_idx(self):
|
|
236
|
+
return self._bond_idx
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def bond_line_splitted(self) -> list[str]:
|
|
240
|
+
return self._bond_line_splitted
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def key(self):
|
|
244
|
+
return self._key
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def cfg(self) -> list[str]:
|
|
248
|
+
return self._cfg
|
|
249
|
+
|
|
250
|
+
def __str__(self):
|
|
251
|
+
return self._bond_line
|
|
252
|
+
|
|
253
|
+
def __repr__(self):
|
|
254
|
+
return str(self)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class MolFileV3K:
|
|
258
|
+
"""
|
|
259
|
+
Wrapper for data extracted from molfile
|
|
260
|
+
"""
|
|
261
|
+
def __init__(
|
|
262
|
+
self, title: str, atom_list: list[MolFileAtom],
|
|
263
|
+
bond_list: list[MolFileBond],
|
|
264
|
+
collection_steabs: list[int] = None
|
|
265
|
+
):
|
|
266
|
+
self._title = title
|
|
267
|
+
self._atom_list = atom_list
|
|
268
|
+
self._bond_list = bond_list
|
|
269
|
+
self.collection_steabs = [] if collection_steabs is None \
|
|
270
|
+
else collection_steabs
|
|
271
|
+
|
|
272
|
+
self._bonds: dict = {}
|
|
273
|
+
for bond in self._bond_list:
|
|
274
|
+
# list is unhashable type, but tuple is
|
|
275
|
+
bond_key = tuple((int(v) for v in bond.key))
|
|
276
|
+
self._bonds[bond_key] = bond
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def atom_list(self):
|
|
280
|
+
return self._atom_list
|
|
281
|
+
|
|
282
|
+
@property
|
|
283
|
+
def bond_list(self):
|
|
284
|
+
return self._bond_list
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def bonds(self) -> dict:
|
|
288
|
+
return self._bonds
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class MolFileMap:
|
|
292
|
+
def __init__(self, src: str, mol_file_obj: MolFileV3K,
|
|
293
|
+
atom_block_idx_boundaries: tuple[int, int],
|
|
294
|
+
bond_block_idx_boundaries: tuple[int, int],
|
|
295
|
+
collection_idx_boundaries: tuple[int, int] = None,
|
|
296
|
+
collection_steabs_idx: int = None):
|
|
297
|
+
self._src = src
|
|
298
|
+
self._mol_file = mol_file_obj
|
|
299
|
+
self.begin_atom_idx = atom_block_idx_boundaries[0]
|
|
300
|
+
self.end_atom_idx = atom_block_idx_boundaries[1]
|
|
301
|
+
self.begin_bond_idx = bond_block_idx_boundaries[0]
|
|
302
|
+
self.end_bond_idx = bond_block_idx_boundaries[1]
|
|
303
|
+
self.begin_collection_idx = None if collection_idx_boundaries is None \
|
|
304
|
+
else collection_idx_boundaries[0]
|
|
305
|
+
self.end_collection_idx = None if collection_idx_boundaries is None \
|
|
306
|
+
else collection_idx_boundaries[1]
|
|
307
|
+
self.collection_steabs_idx = collection_steabs_idx
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def src(self):
|
|
311
|
+
return self._src
|
|
312
|
+
|
|
313
|
+
@property
|
|
314
|
+
def mol_file(self):
|
|
315
|
+
return self._mol_file
|
|
316
|
+
|
|
317
|
+
@staticmethod
|
|
318
|
+
def parse(molblock_src: str):
|
|
319
|
+
molblock_line_list: list[str] = \
|
|
320
|
+
[line.rstrip() for line in molblock_src.split('\n')]
|
|
321
|
+
title: str = molblock_line_list[1]
|
|
322
|
+
|
|
323
|
+
def get_idx_boundaries(begin_str: str, end_str: str):
|
|
324
|
+
return tuple([
|
|
325
|
+
molblock_line_list.index(begin_str),
|
|
326
|
+
molblock_line_list.index(end_str)
|
|
327
|
+
])
|
|
328
|
+
|
|
329
|
+
def get_wrapper_list(
|
|
330
|
+
begin_idx: int, end_idx: int, wrapper_constructor
|
|
331
|
+
):
|
|
332
|
+
"""
|
|
333
|
+
For the list of atom/bond wrapper objects
|
|
334
|
+
"""
|
|
335
|
+
item_count = end_idx - begin_idx - 1 # for atoms or bonds
|
|
336
|
+
wrapper_list = [None] * item_count
|
|
337
|
+
for item_idx in range(1, item_count + 1):
|
|
338
|
+
line_idx = begin_idx + item_idx
|
|
339
|
+
line = molblock_line_list[line_idx]
|
|
340
|
+
item = wrapper_constructor(line)
|
|
341
|
+
wrapper_list[item_idx - 1] = item
|
|
342
|
+
return wrapper_list
|
|
343
|
+
|
|
344
|
+
atom_block_idx_boundaries = get_idx_boundaries(
|
|
345
|
+
BEGIN_ATOM_LINE, END_ATOM_LINE)
|
|
346
|
+
bond_block_idx_boundaries = get_idx_boundaries(
|
|
347
|
+
BEGIN_BOND_LINE, END_BOND_LINE)
|
|
348
|
+
atom_list = get_wrapper_list(
|
|
349
|
+
atom_block_idx_boundaries[0],
|
|
350
|
+
atom_block_idx_boundaries[1], MolFileAtom)
|
|
351
|
+
bond_list = get_wrapper_list(
|
|
352
|
+
bond_block_idx_boundaries[0],
|
|
353
|
+
bond_block_idx_boundaries[1], MolFileBond)
|
|
354
|
+
|
|
355
|
+
collection_idx_boundaries = None
|
|
356
|
+
collection_steabs_idx = None
|
|
357
|
+
collection_steabs: list[int] = []
|
|
358
|
+
if BEGIN_COLLECTION_LINE in molblock_line_list and END_COLLECTION_LINE in molblock_line_list:
|
|
359
|
+
collection_idx_boundaries = get_idx_boundaries(
|
|
360
|
+
BEGIN_COLLECTION_LINE, END_COLLECTION_LINE)
|
|
361
|
+
collection_count: int = collection_idx_boundaries[1] - \
|
|
362
|
+
collection_idx_boundaries[0] - 1
|
|
363
|
+
for collection_idx in range(1, collection_count + 1):
|
|
364
|
+
line_idx = collection_idx_boundaries[0] + collection_idx
|
|
365
|
+
collection_line = molblock_line_list[line_idx]
|
|
366
|
+
if collection_line.startswith(COLLECTION_STEABS_LINE):
|
|
367
|
+
steabs_str = collection_line[len(COLLECTION_STEABS_LINE + " ATOMS=("):-1]
|
|
368
|
+
collection_steabs = [int(atom_num_str.strip()) for atom_num_str in steabs_str.split(' ')[1:]]
|
|
369
|
+
collection_steabs_idx = line_idx
|
|
370
|
+
else:
|
|
371
|
+
raise ValueError(f"Unexpected collection line '{collection_line}'.")
|
|
372
|
+
|
|
373
|
+
mol_file = MolFileV3K(title, atom_list, bond_list, collection_steabs)
|
|
374
|
+
return MolFileMap(
|
|
375
|
+
molblock_src, mol_file,
|
|
376
|
+
atom_block_idx_boundaries, bond_block_idx_boundaries,
|
|
377
|
+
collection_idx_boundaries, collection_steabs_idx)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def compile_object_for_monomer(monomer_name: str):
|
|
381
|
+
"""
|
|
382
|
+
Compile HELM library object for the given monomers from files
|
|
383
|
+
"""
|
|
384
|
+
default = monomer_name + '/default.json'
|
|
385
|
+
meta = monomer_name + '/meta.json'
|
|
386
|
+
molfile = monomer_name + '/molfile.mol'
|
|
387
|
+
for file in [default, meta, molfile]:
|
|
388
|
+
if not os.path.isfile(file):
|
|
389
|
+
raise FileNotFoundError(file)
|
|
390
|
+
|
|
391
|
+
monomer_json = {}
|
|
392
|
+
default_json = {}
|
|
393
|
+
meta_json = {}
|
|
394
|
+
|
|
395
|
+
with open(default, 'r') as default_json_file:
|
|
396
|
+
default_json_str = default_json_file.read()
|
|
397
|
+
default_json = orjson.loads(default_json_str)
|
|
398
|
+
with open(meta, 'r') as meta_json_file:
|
|
399
|
+
meta_json_str = meta_json_file.read()
|
|
400
|
+
meta_json = orjson.loads(meta_json_str)
|
|
401
|
+
|
|
402
|
+
monomer_json = {**default_json, 'meta': meta_json}
|
|
403
|
+
with open(molfile, 'r') as monomer_mol_f:
|
|
404
|
+
monomer_mol_lines = [line.rstrip() for line in monomer_mol_f.readlines()]
|
|
405
|
+
monomer_mol_txt = '\n'.join(monomer_mol_lines)
|
|
406
|
+
monomer_json['molfile'] = monomer_mol_txt
|
|
407
|
+
# print(monomer_json)
|
|
408
|
+
return monomer_json
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
@click.command()
|
|
412
|
+
@click.option('--lib',
|
|
413
|
+
'output_library',
|
|
151
414
|
help='Output library (HELM format) file.',
|
|
152
415
|
type=click.File('wb', 'utf-8'))
|
|
153
|
-
@click.option('--add',
|
|
154
|
-
|
|
416
|
+
@click.option('--add-list',
|
|
417
|
+
'monomer_list_file',
|
|
418
|
+
multiple=False,
|
|
419
|
+
help='File with list of monomer names',
|
|
155
420
|
type=click.File('r', 'utf-8'))
|
|
156
|
-
def main(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
421
|
+
def main(output_library: TextIOWrapper,
|
|
422
|
+
monomer_list_file: TextIOWrapper):
|
|
423
|
+
name_to_monomer_dict: dict[str, Monomer] = {}
|
|
424
|
+
monomer_name_list = []
|
|
425
|
+
for monomer_name in [m for m in monomer_list_file.read().split('\n') if m]:
|
|
426
|
+
monomer_name_list.append(monomer_name)
|
|
160
427
|
|
|
161
|
-
|
|
428
|
+
print(monomer_name_list)
|
|
162
429
|
|
|
163
|
-
for
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
430
|
+
for monomer_name in monomer_name_list:
|
|
431
|
+
# trying to load mol data if file with .mol extension exists
|
|
432
|
+
monomer_obj = compile_object_for_monomer(monomer_name)
|
|
433
|
+
try:
|
|
434
|
+
monomer_obj = Monomer.from_json(monomer_obj)
|
|
435
|
+
name_to_monomer_dict[monomer_obj.name] = monomer_obj
|
|
436
|
+
except Exception as ex:
|
|
437
|
+
sys.stderr.write(f"Invalid monomer '{monomer_obj['name']}' error:\n{str(ex)}")
|
|
169
438
|
|
|
170
|
-
|
|
439
|
+
resulting_json = [obj.to_json() for obj in name_to_monomer_dict.values()]
|
|
440
|
+
resulting_json = sorted(resulting_json, key=lambda x: x['name'])
|
|
171
441
|
|
|
172
|
-
lib_json_txt = orjson.dumps(
|
|
173
|
-
|
|
174
|
-
k = 11
|
|
442
|
+
lib_json_txt = orjson.dumps(resulting_json, option=orjson.OPT_INDENT_2)
|
|
443
|
+
output_library.write(lib_json_txt)
|
|
175
444
|
|
|
176
445
|
|
|
177
446
|
if __name__ == '__main__':
|
|
178
|
-
|
|
447
|
+
main()
|