@datagrok-libraries/bio 5.32.3 → 5.32.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +17 -12
- package/package.json +1 -1
- package/src/aminoacids.js +3 -4
- package/src/aminoacids.js.map +1 -1
- package/src/monomer-works/consts.d.ts +27 -0
- package/src/monomer-works/consts.d.ts.map +1 -0
- package/src/monomer-works/consts.js +30 -0
- package/src/monomer-works/consts.js.map +1 -0
- package/src/monomer-works/monomer-utils.js +6 -17
- package/src/monomer-works/monomer-utils.js.map +1 -1
- package/src/monomer-works/seq-to-molfile-worker.d.ts +2 -0
- package/src/monomer-works/seq-to-molfile-worker.d.ts.map +1 -0
- package/src/monomer-works/seq-to-molfile-worker.js +19 -0
- package/src/monomer-works/seq-to-molfile-worker.js.map +1 -0
- package/src/monomer-works/seq-to-molfile.d.ts +9 -0
- package/src/monomer-works/seq-to-molfile.d.ts.map +1 -0
- package/src/monomer-works/seq-to-molfile.js +31 -0
- package/src/monomer-works/seq-to-molfile.js.map +1 -0
- package/src/monomer-works/to-atomic-level-utils.d.ts +23 -0
- package/src/monomer-works/to-atomic-level-utils.d.ts.map +1 -0
- package/src/monomer-works/to-atomic-level-utils.js +284 -0
- package/src/monomer-works/to-atomic-level-utils.js.map +1 -0
- package/src/monomer-works/to-atomic-level.d.ts +48 -4
- package/src/monomer-works/to-atomic-level.d.ts.map +1 -1
- package/src/monomer-works/to-atomic-level.js +273 -503
- package/src/monomer-works/to-atomic-level.js.map +1 -1
- package/src/monomer-works/types.d.ts +71 -0
- package/src/monomer-works/types.d.ts.map +1 -0
- package/src/monomer-works/types.js +2 -0
- package/src/monomer-works/types.js.map +1 -0
- package/src/pdb/pdb-helper.js +7 -18
- package/src/pdb/pdb-helper.js.map +1 -1
- package/src/tests/palettes-tests.js +20 -33
- package/src/tests/palettes-tests.js.map +1 -1
- package/src/trees/dendrogram.js +2 -13
- package/src/trees/dendrogram.js.map +1 -1
- package/src/trees/tree-helper.js +7 -18
- package/src/trees/tree-helper.js.map +1 -1
- package/src/utils/cell-renderer.js +3 -4
- package/src/utils/cell-renderer.js.map +1 -1
- package/src/utils/macromolecule/utils.js +4 -6
- package/src/utils/macromolecule/utils.js.map +1 -1
- package/src/utils/splitter.js +1 -1
- package/src/utils/splitter.js.map +1 -1
- package/src/utils/units-handler.js +26 -38
- package/src/utils/units-handler.js.map +1 -1
- package/src/utils.js +1 -1
- package/src/utils.js.map +1 -1
- package/src/viewers/ngl-gl-viewer.js +6 -17
- package/src/viewers/ngl-gl-viewer.js.map +1 -1
- package/src/viewers/phylocanvas-gl-viewer.js +7 -18
- package/src/viewers/phylocanvas-gl-viewer.js.map +1 -1
- package/tsconfig.json +1 -1
|
@@ -1,137 +1,71 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
1
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
11
2
|
import * as grok from 'datagrok-api/grok';
|
|
12
3
|
import * as DG from 'datagrok-api/dg';
|
|
13
|
-
import { HELM_CORE_FIELDS, } from '../utils/const';
|
|
14
|
-
import { getSplitter } from '../utils/macromolecule/utils';
|
|
15
4
|
import { NotationConverter } from '../utils/notation-converter';
|
|
16
|
-
import { errorToConsole } from '@datagrok-libraries/utils/src/to-console';
|
|
17
5
|
import { UnitsHandler } from '../utils/units-handler';
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
const V3K_COUNTS_SHIFT = 14;
|
|
24
|
-
const V3K_IDX_SHIFT = 7;
|
|
25
|
-
const V3K_HEADER_FIRST_LINE = '\nDatagrok macromolecule handler\n\n';
|
|
26
|
-
const V3K_HEADER_SECOND_LINE = ' 0 0 0 0 0 0 999 V3000\n';
|
|
27
|
-
const V3K_BEGIN_CTAB_BLOCK = 'M V30 BEGIN CTAB\n';
|
|
28
|
-
const V3K_END_CTAB_BLOCK = 'M V30 END CTAB\n';
|
|
29
|
-
const V3K_BEGIN_COUNTS_LINE = 'M V30 COUNTS ';
|
|
30
|
-
const V3K_COUNTS_LINE_ENDING = ' 0 0 0\n';
|
|
31
|
-
const V3K_BEGIN_ATOM_BLOCK = 'M V30 BEGIN ATOM\n';
|
|
32
|
-
const V3K_END_ATOM_BLOCK = 'M V30 END ATOM\n';
|
|
33
|
-
const V3K_BEGIN_BOND_BLOCK = 'M V30 BEGIN BOND\n';
|
|
34
|
-
const V3K_END_BOND_BLOCK = 'M V30 END BOND\n';
|
|
35
|
-
const V3K_BOND_CONFIG = ' CFG=';
|
|
36
|
-
const V3K_BEGIN_DATA_LINE = 'M V30 ';
|
|
37
|
-
const V3K_END = 'M END';
|
|
38
|
-
const PRECISION_FACTOR = 10000; // HELMCoreLibrary has 4 significant digits after decimal point in atom coordinates
|
|
39
|
-
// symbols for the corresponding monomers in HELM library
|
|
40
|
-
const DEOXYRIBOSE = 'd';
|
|
41
|
-
const RIBOSE = 'r';
|
|
42
|
-
const PHOSPHATE = 'p';
|
|
43
|
-
const OXYGEN = 'O';
|
|
44
|
-
const HYDROGEN = 'H';
|
|
6
|
+
import { getFormattedMonomerLib, keepPrecision } from './to-atomic-level-utils';
|
|
7
|
+
import { seqToMolFileWorker } from './seq-to-molfile';
|
|
8
|
+
import { monomerWorksConsts as C } from './consts';
|
|
9
|
+
import { errorToConsole } from '@datagrok-libraries/utils';
|
|
10
|
+
import { getSplitter } from '../utils/macromolecule';
|
|
45
11
|
// todo: verify that all functions have return types
|
|
46
|
-
/** Convert Macromolecule column into Molecule column storing molfile V3000 with the help of a monomer library
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
return { col: null, warnings: [msg] };
|
|
81
|
-
}
|
|
82
|
-
const monomerSequencesArray = getMonomerSequencesArray(srcCol);
|
|
83
|
-
const monomersDict = yield getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet);
|
|
84
|
-
const srcColLength = srcCol.length;
|
|
85
|
-
const molfileList = new Array(srcColLength);
|
|
86
|
-
const molfileWarningList = new Array(0);
|
|
87
|
-
for (let rowI = 0; rowI < srcColLength; ++rowI) {
|
|
88
|
-
try {
|
|
89
|
-
const monomerSeq = monomerSequencesArray[rowI];
|
|
90
|
-
molfileList[rowI] = monomerSeqToMolfile(monomerSeq, monomersDict, alphabet, polymerType);
|
|
91
|
-
}
|
|
92
|
-
catch (err) {
|
|
93
|
-
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
94
|
-
const msg = `Cannot get molfile of row #${rowI}: ${errMsg}.`;
|
|
95
|
-
molfileWarningList.push(msg);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
if (molfileWarningList.length > 0.05 * srcColLength)
|
|
99
|
-
throw new Error('Too many errors getting molfiles.');
|
|
100
|
-
// exclude name collisions
|
|
101
|
-
const name = `molfile(${srcCol.name})`;
|
|
102
|
-
const resColName = df.columns.getUnusedName(name);
|
|
103
|
-
const resCol = DG.Column.fromStrings(resColName, molfileList);
|
|
104
|
-
resCol.semType = DG.SEMTYPE.MOLECULE;
|
|
105
|
-
resCol.setTag(DG.TAGS.UNITS, DG.UNITS.Molecule.MOLBLOCK);
|
|
106
|
-
return { col: resCol, warnings: molfileWarningList };
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
/** Get a mapping of peptide symbols to HELM monomer library
|
|
110
|
-
* objects with selected fields.
|
|
111
|
-
*/
|
|
112
|
-
function getFormattedMonomerLib(monomerLib, polymerType, alphabet) {
|
|
113
|
-
const map = new Map();
|
|
114
|
-
for (const monomerSymbol of monomerLib.getMonomerSymbolsByType(polymerType)) {
|
|
115
|
-
const it = monomerLib.getMonomer(polymerType, monomerSymbol);
|
|
116
|
-
if (polymerType === "RNA" /* HELM_POLYMER_TYPE.RNA */ &&
|
|
117
|
-
(it["monomerType" /* HELM_FIELDS.MONOMER_TYPE */] === "Branch" /* HELM_MONOMER_TYPE.BRANCH */ ||
|
|
118
|
-
alphabet === "DNA" /* ALPHABET.DNA */ && it["symbol" /* HELM_FIELDS.SYMBOL */] === DEOXYRIBOSE ||
|
|
119
|
-
alphabet === "RNA" /* ALPHABET.RNA */ && it["symbol" /* HELM_FIELDS.SYMBOL */] === RIBOSE ||
|
|
120
|
-
it["symbol" /* HELM_FIELDS.SYMBOL */] === PHOSPHATE) ||
|
|
121
|
-
polymerType === "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */ &&
|
|
122
|
-
it["monomerType" /* HELM_FIELDS.MONOMER_TYPE */] !== "Branch" /* HELM_MONOMER_TYPE.BRANCH */) {
|
|
123
|
-
const monomerObject = {};
|
|
124
|
-
HELM_CORE_FIELDS.forEach((field) => {
|
|
125
|
-
//@ts-ignore
|
|
126
|
-
monomerObject[field] = it[field];
|
|
127
|
-
});
|
|
128
|
-
map.set(it["symbol" /* HELM_FIELDS.SYMBOL */], monomerObject);
|
|
129
|
-
}
|
|
12
|
+
/** Convert Macromolecule column into Molecule column storing molfile V3000 with the help of a monomer library
|
|
13
|
+
* @param {DG.DataFrame} df - DataFrame containing the column to be converted
|
|
14
|
+
* @param {DG.Column} seqCol - Column containing the macromolecule sequence
|
|
15
|
+
* @param {IMonomerLib} monomerLib - Monomer library
|
|
16
|
+
*/
|
|
17
|
+
export async function _toAtomicLevel(df, seqCol, monomerLib) {
|
|
18
|
+
// todo: remove this from the library
|
|
19
|
+
if (DG.Func.find({ package: 'Chem', name: 'getRdKitModule' }).length === 0) {
|
|
20
|
+
const msg = 'Transformation to atomic level requires the package "Chem" installed.';
|
|
21
|
+
return { col: null, warnings: [msg] };
|
|
22
|
+
}
|
|
23
|
+
if (seqCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
24
|
+
const msg = `Only the ${DG.SEMTYPE.MACROMOLECULE} columns can be converted to atomic level, ` +
|
|
25
|
+
`the chosen column has semType '${seqCol.semType}'`;
|
|
26
|
+
return { col: null, warnings: [msg] };
|
|
27
|
+
}
|
|
28
|
+
let srcCol = seqCol;
|
|
29
|
+
const seqUh = UnitsHandler.getOrCreate(seqCol);
|
|
30
|
+
// convert 'helm' to 'separator' units
|
|
31
|
+
if (seqUh.isHelm()) {
|
|
32
|
+
const converter = new NotationConverter(seqCol);
|
|
33
|
+
srcCol = converter.convert("separator" /* NOTATION.SEPARATOR */, '.');
|
|
34
|
+
srcCol.name = seqCol.name; // Replace converted col name 'separator(<original>)' to '<original>';
|
|
35
|
+
}
|
|
36
|
+
const srcUh = UnitsHandler.getOrCreate(srcCol);
|
|
37
|
+
const alphabet = srcUh.alphabet;
|
|
38
|
+
// determine the polymer type according to HELM specifications
|
|
39
|
+
let polymerType;
|
|
40
|
+
// todo: an exception from dart comes before this check if the alphabet is UN
|
|
41
|
+
if (alphabet === "PT" /* ALPHABET.PT */ || alphabet === "UN" /* ALPHABET.UN */) {
|
|
42
|
+
polymerType = "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */;
|
|
43
|
+
}
|
|
44
|
+
else if (alphabet === "RNA" /* ALPHABET.RNA */ || alphabet === "DNA" /* ALPHABET.DNA */) {
|
|
45
|
+
polymerType = "RNA" /* HELM_POLYMER_TYPE.RNA */;
|
|
130
46
|
}
|
|
131
|
-
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
|
|
47
|
+
else {
|
|
48
|
+
const msg = `Unexpected column's '${srcCol.name}' alphabet '${alphabet}'.`;
|
|
49
|
+
return { col: null, warnings: [msg] };
|
|
50
|
+
}
|
|
51
|
+
const monomerSequencesArray = getMonomerSequencesArray(srcCol);
|
|
52
|
+
const monomersDict = await getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet);
|
|
53
|
+
const srcColLength = srcCol.length;
|
|
54
|
+
const { molfileList, molfileWarningList } = await seqToMolFileWorker(monomerSequencesArray, monomersDict, alphabet, polymerType, srcColLength);
|
|
55
|
+
if (molfileWarningList.length > 0.05 * srcColLength)
|
|
56
|
+
throw new Error('Too many errors getting molfiles.');
|
|
57
|
+
// exclude name collisions
|
|
58
|
+
const name = `molfile(${srcCol.name})`;
|
|
59
|
+
const resColName = df.columns.getUnusedName(name);
|
|
60
|
+
const resCol = DG.Column.fromStrings(resColName, molfileList);
|
|
61
|
+
resCol.semType = DG.SEMTYPE.MOLECULE;
|
|
62
|
+
resCol.setTag(DG.TAGS.UNITS, DG.UNITS.Molecule.MOLBLOCK);
|
|
63
|
+
return { col: resCol, warnings: molfileWarningList };
|
|
64
|
+
}
|
|
65
|
+
/** Get jagged array of monomer symbols for the dataframe
|
|
66
|
+
* @param {DG.Column} macroMolCol - Column with macro-molecules
|
|
67
|
+
* @return {string[]} - Jagged array of monomer symbols for the dataframe */
|
|
68
|
+
export function getMonomerSequencesArray(macroMolCol) {
|
|
135
69
|
const columnLength = macroMolCol.length;
|
|
136
70
|
const result = new Array(columnLength);
|
|
137
71
|
// split the string into monomers
|
|
@@ -147,53 +81,53 @@ function getMonomerSequencesArray(macroMolCol) {
|
|
|
147
81
|
}
|
|
148
82
|
/** Get a mapping of monomer symbols to MolGraph objects. Notice, the
|
|
149
83
|
* transformation from molfile V2000 to V3000 takes place,
|
|
150
|
-
* with the help of async function call from Chem (RdKit module)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
84
|
+
* with the help of async function call from Chem (RdKit module)
|
|
85
|
+
* @param {string[]} monomerSequencesArray - Jagged array of monomer symbols for the dataframe
|
|
86
|
+
* @param {IMonomerLib} monomerLib - Monomer library
|
|
87
|
+
* @param {HELM_POLYMER_TYPE} polymerType - Polymer type
|
|
88
|
+
* @param {ALPHABET} alphabet - Alphabet
|
|
89
|
+
* @return {Map<string, MolGraph>} - Mapping of monomer symbols to MolGraph objects*/
|
|
90
|
+
export async function getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet) {
|
|
91
|
+
// todo: exception - no gaps, no empty string monomers
|
|
92
|
+
const formattedMonomerLib = getFormattedMonomerLib(monomerLib, polymerType, alphabet);
|
|
93
|
+
const monomersDict = new Map();
|
|
94
|
+
const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
|
|
95
|
+
const pointerToBranchAngle = {
|
|
96
|
+
value: null
|
|
97
|
+
};
|
|
98
|
+
// this must NOT be placed after translating monomer sequences
|
|
99
|
+
// because adding branch monomers for nucleobases relies on these data
|
|
100
|
+
if (polymerType === "RNA" /* HELM_POLYMER_TYPE.RNA */) {
|
|
101
|
+
const symbols = (alphabet === "RNA" /* ALPHABET.RNA */) ?
|
|
102
|
+
[C.RIBOSE, C.PHOSPHATE] : [C.DEOXYRIBOSE, C.PHOSPHATE];
|
|
103
|
+
for (const sym of symbols)
|
|
104
|
+
addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
|
|
105
|
+
}
|
|
106
|
+
for (let rowI = 0; rowI < monomerSequencesArray.length; ++rowI) {
|
|
107
|
+
const monomerSeq = monomerSequencesArray[rowI];
|
|
108
|
+
for (const sym of monomerSeq) {
|
|
109
|
+
if (sym === '')
|
|
110
|
+
continue; // Skip gap/empty monomer for MSA
|
|
111
|
+
try {
|
|
166
112
|
addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
try {
|
|
174
|
-
addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
|
|
175
|
-
}
|
|
176
|
-
catch (err) {
|
|
177
|
-
const errTxt = errorToConsole(err);
|
|
178
|
-
console.error(`bio lib: getMonomersDictFromLib() sym='${sym}', error:\n` + errTxt);
|
|
179
|
-
const errMsg = `Сan't get monomer '${sym}' from library: ${errTxt}`; // Text for Datagrok error baloon
|
|
180
|
-
throw new Error(errMsg);
|
|
181
|
-
}
|
|
113
|
+
}
|
|
114
|
+
catch (err) {
|
|
115
|
+
const errTxt = errorToConsole(err);
|
|
116
|
+
console.error(`bio lib: getMonomersDictFromLib() sym='${sym}', error:\n` + errTxt);
|
|
117
|
+
const errMsg = `Сan't get monomer '${sym}' from library: ${errTxt}`; // Text for Datagrok error baloon
|
|
118
|
+
throw new Error(errMsg);
|
|
182
119
|
}
|
|
183
120
|
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
}
|
|
187
|
-
function getAngleBetweenSugarBranchAndOY(molGraph) {
|
|
188
|
-
const x = molGraph.atoms.x;
|
|
189
|
-
const y = molGraph.atoms.y;
|
|
190
|
-
const rNode = molGraph.meta.rNodes[2] - 1;
|
|
191
|
-
const terminalNode = molGraph.meta.terminalNodes[2] - 1;
|
|
192
|
-
const xShift = x[rNode] - x[terminalNode];
|
|
193
|
-
const yShift = y[rNode] - y[terminalNode];
|
|
194
|
-
return Math.atan(yShift / xShift) + Math.PI / 2;
|
|
121
|
+
}
|
|
122
|
+
return monomersDict;
|
|
195
123
|
}
|
|
196
|
-
/** Adds MolGraph object for 'sym' to the monomers dict when necessary
|
|
124
|
+
/** Adds MolGraph object for 'sym' to the monomers dict when necessary
|
|
125
|
+
* @param {Map<string, MolGraph>} monomersDict - Monomers dictionary
|
|
126
|
+
* @param {string} sym - Monomer symbol
|
|
127
|
+
* @param {Map<string, any>} formattedMonomerLib - Formatted monomer library
|
|
128
|
+
* @param {any} moduleRdkit - RDKit module
|
|
129
|
+
* @param {HELM_POLYMER_TYPE} polymerType - Polymer type
|
|
130
|
+
* @param {NumberWrapper} pointerToBranchAngle - Pointer to branch angle*/
|
|
197
131
|
function addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle) {
|
|
198
132
|
if (!monomersDict.has(sym)) {
|
|
199
133
|
const monomerData = getMolGraph(sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
|
|
@@ -205,7 +139,13 @@ function addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, p
|
|
|
205
139
|
}
|
|
206
140
|
}
|
|
207
141
|
/** Construct the MolGraph object for specified monomerSymbol: the associated
|
|
208
|
-
* graph is adjusted in XY plane and filled with default R-groups
|
|
142
|
+
* graph is adjusted in XY plane and filled with default R-groups
|
|
143
|
+
* @param {string} monomerSymbol - Monomer symbol
|
|
144
|
+
* @param {Map<string, any>} formattedMonomerLib - Formatted monomer library
|
|
145
|
+
* @param {any} moduleRdkit - RDKit module
|
|
146
|
+
* @param {HELM_POLYMER_TYPE} polymerType - Polymer type
|
|
147
|
+
* @param {NumberWrapper} pointerToBranchAngle - Pointer to branch angle
|
|
148
|
+
* @return {MolGraph | null} - MolGraph object or null if monomerSymbol is absent in the library*/
|
|
209
149
|
function getMolGraph(monomerSymbol, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle) {
|
|
210
150
|
if (!formattedMonomerLib.has(monomerSymbol)) {
|
|
211
151
|
return null;
|
|
@@ -224,9 +164,9 @@ function getMolGraph(monomerSymbol, formattedMonomerLib, moduleRdkit, polymerTyp
|
|
|
224
164
|
adjustPeptideMonomerGraph(monomerGraph);
|
|
225
165
|
}
|
|
226
166
|
else { // nucleotides
|
|
227
|
-
if (monomerSymbol === RIBOSE || monomerSymbol === DEOXYRIBOSE)
|
|
167
|
+
if (monomerSymbol === C.RIBOSE || monomerSymbol === C.DEOXYRIBOSE)
|
|
228
168
|
adjustSugarMonomerGraph(monomerGraph, pointerToBranchAngle);
|
|
229
|
-
else if (monomerSymbol === PHOSPHATE)
|
|
169
|
+
else if (monomerSymbol === C.PHOSPHATE)
|
|
230
170
|
adjustPhosphateMonomerGraph(monomerGraph);
|
|
231
171
|
else
|
|
232
172
|
adjustBaseMonomerGraph(monomerGraph, pointerToBranchAngle);
|
|
@@ -244,7 +184,7 @@ function setShiftsAndTerminalNodes(polymerType, monomerGraph, monomerSymbol) {
|
|
|
244
184
|
removeNodeAndBonds(monomerGraph, monomerGraph.meta.rNodes[1]);
|
|
245
185
|
}
|
|
246
186
|
else { // nucleotides
|
|
247
|
-
if (monomerSymbol === RIBOSE || monomerSymbol === DEOXYRIBOSE) {
|
|
187
|
+
if (monomerSymbol === C.RIBOSE || monomerSymbol === C.DEOXYRIBOSE) {
|
|
248
188
|
// remove R2
|
|
249
189
|
removeNodeAndBonds(monomerGraph, monomerGraph.meta.rNodes[1]);
|
|
250
190
|
// set terminalNode2 (oxygen) as new R2
|
|
@@ -258,7 +198,7 @@ function setShiftsAndTerminalNodes(polymerType, monomerGraph, monomerSymbol) {
|
|
|
258
198
|
// remove the branching r-group
|
|
259
199
|
removeNodeAndBonds(monomerGraph, monomerGraph.meta.rNodes[2]);
|
|
260
200
|
}
|
|
261
|
-
else if (monomerSymbol === PHOSPHATE) {
|
|
201
|
+
else if (monomerSymbol === C.PHOSPHATE) {
|
|
262
202
|
monomerGraph.meta.terminalNodes[0] = monomerGraph.meta.rNodes[0];
|
|
263
203
|
shiftCoordinates(monomerGraph, -monomerGraph.atoms.x[monomerGraph.meta.terminalNodes[0] - 1], -monomerGraph.atoms.y[monomerGraph.meta.terminalNodes[0] - 1]);
|
|
264
204
|
setShifts(monomerGraph, polymerType);
|
|
@@ -269,7 +209,13 @@ function setShiftsAndTerminalNodes(polymerType, monomerGraph, monomerSymbol) {
|
|
|
269
209
|
}
|
|
270
210
|
}
|
|
271
211
|
}
|
|
272
|
-
|
|
212
|
+
/**
|
|
213
|
+
* Get monomer metadata object
|
|
214
|
+
* @param {Atoms} atoms - Atoms object
|
|
215
|
+
* @param {Bonds} bonds - Bonds object
|
|
216
|
+
* @param {string[]} capGroups - Cap groups
|
|
217
|
+
* @param {Map<number, number>} capGroupIdxMap - Cap group index map
|
|
218
|
+
* @return {MonomerMetadata}*/
|
|
273
219
|
function getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap) {
|
|
274
220
|
const meta = {
|
|
275
221
|
backboneShift: null,
|
|
@@ -282,8 +228,9 @@ function getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap) {
|
|
|
282
228
|
setTerminalNodes(bonds, meta);
|
|
283
229
|
return meta;
|
|
284
230
|
}
|
|
285
|
-
/** Parse element symbols for R-groups from the HELM monomer library R-
|
|
286
|
-
*
|
|
231
|
+
/** Parse element symbols for R-groups from the HELM monomer library R-group field
|
|
232
|
+
* @param {any[]} rGroupObjList - R-group object list
|
|
233
|
+
* @return {Map<number, number>} - Cap group index map*/
|
|
287
234
|
export function parseCapGroups(rGroupObjList) {
|
|
288
235
|
// specifically for HELMCoreLibrary
|
|
289
236
|
// considered only monoatomic rgroups
|
|
@@ -299,7 +246,10 @@ export function parseCapGroups(rGroupObjList) {
|
|
|
299
246
|
}
|
|
300
247
|
return capGroupsArray;
|
|
301
248
|
}
|
|
302
|
-
/** Substitute the cap group elements instead of R#
|
|
249
|
+
/** Substitute the cap group elements instead of R#
|
|
250
|
+
* @param {Atoms} atoms - Atoms object
|
|
251
|
+
* @param {string[]} capGroups - Cap groups
|
|
252
|
+
* @param {Map<number, number>} capGroupIdxMap - Cap group index map*/
|
|
303
253
|
function substituteCapGroups(atoms, capGroups, capGroupIdxMap) {
|
|
304
254
|
for (const [node, capIdx] of capGroupIdxMap)
|
|
305
255
|
atoms.atomTypes[node - 1] = capGroups[capIdx - 1]; // -1 because molfile indexing starts from 1
|
|
@@ -341,7 +291,9 @@ function setTerminalNodes(bonds, meta) {
|
|
|
341
291
|
++i;
|
|
342
292
|
}
|
|
343
293
|
}
|
|
344
|
-
/** Sets shifts in 'meta' attribute of MolGraph
|
|
294
|
+
/** Sets shifts in 'meta' attribute of MolGraph
|
|
295
|
+
* @param {MolGraph} molGraph - MolGraph object
|
|
296
|
+
* @param {HELM_POLYMER_TYPE} polymerType - Polymer type*/
|
|
345
297
|
function setShifts(molGraph, polymerType) {
|
|
346
298
|
if (molGraph.meta.rNodes.length > 1) {
|
|
347
299
|
molGraph.meta.backboneShift = getShiftBetweenNodes(molGraph, molGraph.meta.rNodes[1] - 1, molGraph.meta.terminalNodes[0] - 1);
|
|
@@ -350,7 +302,11 @@ function setShifts(molGraph, polymerType) {
|
|
|
350
302
|
molGraph.meta.branchShift = getShiftBetweenNodes(molGraph, molGraph.meta.rNodes[2] - 1, molGraph.meta.terminalNodes[0] - 1);
|
|
351
303
|
}
|
|
352
304
|
}
|
|
353
|
-
/** Returns the pair [xShift, yShift] for specified node indices
|
|
305
|
+
/** Returns the pair [xShift, yShift] for specified node indices
|
|
306
|
+
* @param {MolGraph} molGraph - MolGraph object
|
|
307
|
+
* @param {number} rightNodeIdx - Right node index
|
|
308
|
+
* @param {number} leftNodeIdx - Left node index
|
|
309
|
+
* @return {number[]} - Shift between nodes*/
|
|
354
310
|
function getShiftBetweenNodes(molGraph, rightNodeIdx, leftNodeIdx) {
|
|
355
311
|
return [
|
|
356
312
|
keepPrecision(molGraph.atoms.x[rightNodeIdx] -
|
|
@@ -360,35 +316,43 @@ function getShiftBetweenNodes(molGraph, rightNodeIdx, leftNodeIdx) {
|
|
|
360
316
|
];
|
|
361
317
|
}
|
|
362
318
|
/** Helper function necessary to build a correct V3000 molfile out of V2000 with
|
|
363
|
-
|
|
319
|
+
* specified r-groups
|
|
320
|
+
* @param {string} molfileV2K - V2000 molfile
|
|
321
|
+
* @return {string} - V2000 molfile without R-group lines*/
|
|
364
322
|
function removeRGroupLines(molfileV2K) {
|
|
365
|
-
let begin = molfileV2K.indexOf(V2K_A_LINE, 0);
|
|
323
|
+
let begin = molfileV2K.indexOf(C.V2K_A_LINE, 0);
|
|
366
324
|
if (begin === -1)
|
|
367
|
-
begin = molfileV2K.indexOf(V2K_RGP_LINE);
|
|
368
|
-
const end = molfileV2K.indexOf(V3K_END, begin);
|
|
325
|
+
begin = molfileV2K.indexOf(C.V2K_RGP_LINE);
|
|
326
|
+
const end = molfileV2K.indexOf(C.V3K_END, begin);
|
|
369
327
|
return molfileV2K.substring(0, begin) + molfileV2K.substring(end);
|
|
370
328
|
}
|
|
371
|
-
/** V2000 to V3000 converter
|
|
372
|
-
|
|
329
|
+
/** V2000 to V3000 converter
|
|
330
|
+
* @param {string} molfileV2K - V2000 molfile
|
|
331
|
+
* @param {any} moduleRdkit - RDKit module
|
|
332
|
+
* @return {string} - V3000 molfile*/
|
|
333
|
+
export function convertMolfileToV3K(molfileV2K, moduleRdkit) {
|
|
373
334
|
// The standard Chem converter is not used here because it relies on creation of moduleRdkit on each iteration
|
|
374
335
|
const molObj = moduleRdkit.get_mol(molfileV2K);
|
|
375
336
|
const molfileV3K = molObj.get_v3Kmolblock();
|
|
376
337
|
molObj.delete();
|
|
377
338
|
return molfileV3K;
|
|
378
339
|
}
|
|
379
|
-
/** Parse V3000 bond block and construct the Bonds object
|
|
380
|
-
|
|
340
|
+
/** Parse V3000 bond block and construct the Bonds object
|
|
341
|
+
* @param {string} molfileV3K - V3000 molfile
|
|
342
|
+
* @param {number} bondCount - Number of bonds
|
|
343
|
+
* @return {Bonds} - Bonds object*/
|
|
344
|
+
export function parseBondBlock(molfileV3K, bondCount) {
|
|
381
345
|
const bondTypes = new Uint32Array(bondCount);
|
|
382
346
|
const atomPairs = new Array(bondCount);
|
|
383
347
|
const bondConfiguration = new Map();
|
|
384
348
|
const kwargs = new Map();
|
|
385
|
-
let begin = molfileV3K.indexOf(V3K_BEGIN_BOND_BLOCK);
|
|
349
|
+
let begin = molfileV3K.indexOf(C.V3K_BEGIN_BOND_BLOCK);
|
|
386
350
|
begin = molfileV3K.indexOf('\n', begin);
|
|
387
351
|
let end = begin;
|
|
388
352
|
for (let i = 0; i < bondCount; ++i) {
|
|
389
353
|
// parse bond type and atom pair
|
|
390
354
|
const parsedValues = new Array(3);
|
|
391
|
-
begin = molfileV3K.indexOf(V3K_BEGIN_DATA_LINE, end) + V3K_IDX_SHIFT;
|
|
355
|
+
begin = molfileV3K.indexOf(C.V3K_BEGIN_DATA_LINE, end) + C.V3K_IDX_SHIFT;
|
|
392
356
|
end = molfileV3K.indexOf(' ', begin);
|
|
393
357
|
for (let k = 0; k < 3; ++k) {
|
|
394
358
|
begin = end + 1;
|
|
@@ -400,7 +364,7 @@ function parseBondBlock(molfileV3K, bondCount) {
|
|
|
400
364
|
// parse keyword arguments
|
|
401
365
|
const endOfLine = molfileV3K.indexOf('\n', begin);
|
|
402
366
|
let lineRemainder = molfileV3K.slice(end, endOfLine);
|
|
403
|
-
let beginCfg = lineRemainder.indexOf(V3K_BOND_CONFIG);
|
|
367
|
+
let beginCfg = lineRemainder.indexOf(C.V3K_BOND_CONFIG);
|
|
404
368
|
if (beginCfg !== -1) {
|
|
405
369
|
beginCfg = lineRemainder.indexOf('=', beginCfg) + 1;
|
|
406
370
|
let endCfg = lineRemainder.indexOf(' ', beginCfg);
|
|
@@ -408,7 +372,7 @@ function parseBondBlock(molfileV3K, bondCount) {
|
|
|
408
372
|
endCfg = lineRemainder.length;
|
|
409
373
|
const bondConfig = parseInt(lineRemainder.slice(beginCfg, endCfg));
|
|
410
374
|
bondConfiguration.set(i, bondConfig);
|
|
411
|
-
const removedSubstring = V3K_BOND_CONFIG + bondConfig.toString();
|
|
375
|
+
const removedSubstring = C.V3K_BOND_CONFIG + bondConfig.toString();
|
|
412
376
|
lineRemainder = lineRemainder.replace(removedSubstring, '');
|
|
413
377
|
}
|
|
414
378
|
if (!lineRemainder)
|
|
@@ -422,12 +386,14 @@ function parseBondBlock(molfileV3K, bondCount) {
|
|
|
422
386
|
};
|
|
423
387
|
}
|
|
424
388
|
/** Constructs mapping of r-group nodes to default capGroups, all numeration starting from 1.
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
389
|
+
* According to https://pubs.acs.org/doi/10.1021/ci3001925, R1 and R2 are the chain extending attachment points,
|
|
390
|
+
* while R3 is the branching attachment point.
|
|
391
|
+
* @param {string} molfileV2K - V2000 molfile
|
|
392
|
+
* @return {Map<number, number>} - Map of r-group nodes to default capGroups*/
|
|
393
|
+
export function parseCapGroupIdxMap(molfileV2K) {
|
|
428
394
|
const capGroupIdxMap = new Map();
|
|
429
395
|
// parse A-lines (RNA)
|
|
430
|
-
let begin = molfileV2K.indexOf(V2K_A_LINE, 0);
|
|
396
|
+
let begin = molfileV2K.indexOf(C.V2K_A_LINE, 0);
|
|
431
397
|
let end = begin;
|
|
432
398
|
while (begin !== -1) {
|
|
433
399
|
// parse the rNode to which the cap group is attached
|
|
@@ -438,13 +404,13 @@ function parseCapGroupIdxMap(molfileV2K) {
|
|
|
438
404
|
end = molfileV2K.indexOf('\n', begin);
|
|
439
405
|
const capGroup = parseInt(molfileV2K.substring(begin, end).replace(/^R/, ''));
|
|
440
406
|
capGroupIdxMap.set(rNode, capGroup);
|
|
441
|
-
begin = molfileV2K.indexOf(V2K_A_LINE, end);
|
|
407
|
+
begin = molfileV2K.indexOf(C.V2K_A_LINE, end);
|
|
442
408
|
}
|
|
443
409
|
// parse RGP lines (may be more than one in RNA monomers)
|
|
444
|
-
begin = molfileV2K.indexOf(V2K_RGP_LINE, 0);
|
|
410
|
+
begin = molfileV2K.indexOf(C.V2K_RGP_LINE, 0);
|
|
445
411
|
end = molfileV2K.indexOf('\n', begin);
|
|
446
412
|
while (begin !== -1) {
|
|
447
|
-
begin += V2K_RGP_SHIFT;
|
|
413
|
+
begin += C.V2K_RGP_SHIFT;
|
|
448
414
|
end = molfileV2K.indexOf('\n', begin);
|
|
449
415
|
const rgpStringParsed = molfileV2K.substring(begin, end)
|
|
450
416
|
.replaceAll(/\s+/g, ' ')
|
|
@@ -458,14 +424,14 @@ function parseCapGroupIdxMap(molfileV2K) {
|
|
|
458
424
|
else
|
|
459
425
|
capGroupIdxMap.set(rgpIndicesArray[i], rgpIndicesArray[i + 1]);
|
|
460
426
|
}
|
|
461
|
-
begin = molfileV2K.indexOf(V2K_RGP_LINE, end);
|
|
427
|
+
begin = molfileV2K.indexOf(C.V2K_RGP_LINE, end);
|
|
462
428
|
}
|
|
463
429
|
return capGroupIdxMap;
|
|
464
430
|
}
|
|
465
|
-
function parseAtomAndBondCounts(molfileV3K) {
|
|
431
|
+
export function parseAtomAndBondCounts(molfileV3K) {
|
|
466
432
|
molfileV3K = molfileV3K.replaceAll('\r', ''); // to handle old and new sdf standards
|
|
467
433
|
// parse atom count
|
|
468
|
-
let begin = molfileV3K.indexOf(V3K_BEGIN_COUNTS_LINE) + V3K_COUNTS_SHIFT;
|
|
434
|
+
let begin = molfileV3K.indexOf(C.V3K_BEGIN_COUNTS_LINE) + C.V3K_COUNTS_SHIFT;
|
|
469
435
|
let end = molfileV3K.indexOf(' ', begin + 1);
|
|
470
436
|
const numOfAtoms = parseInt(molfileV3K.substring(begin, end));
|
|
471
437
|
// parse bond count
|
|
@@ -475,17 +441,20 @@ function parseAtomAndBondCounts(molfileV3K) {
|
|
|
475
441
|
return { atomCount: numOfAtoms, bondCount: numOfBonds };
|
|
476
442
|
}
|
|
477
443
|
/** Parse V3000 atom block and return Atoms object. NOTICE: only atomTypes, x, y
|
|
478
|
-
|
|
444
|
+
* and kwargs fields are set in the return value, with other fields dummy initialized.
|
|
445
|
+
* @param {string} molfileV3K - V3000 molfile
|
|
446
|
+
* @param {number} atomCount - number of atoms in the molecule
|
|
447
|
+
* @return {Atoms} - Atoms object */
|
|
479
448
|
function parseAtomBlock(molfileV3K, atomCount) {
|
|
480
449
|
const atomTypes = new Array(atomCount);
|
|
481
450
|
const x = new Float32Array(atomCount);
|
|
482
451
|
const y = new Float32Array(atomCount);
|
|
483
452
|
const kwargs = new Array(atomCount);
|
|
484
|
-
let begin = molfileV3K.indexOf(V3K_BEGIN_ATOM_BLOCK); // V3000 atoms block
|
|
453
|
+
let begin = molfileV3K.indexOf(C.V3K_BEGIN_ATOM_BLOCK); // V3000 atoms block
|
|
485
454
|
begin = molfileV3K.indexOf('\n', begin);
|
|
486
455
|
let end = begin;
|
|
487
456
|
for (let i = 0; i < atomCount; i++) {
|
|
488
|
-
begin = molfileV3K.indexOf(V3K_BEGIN_DATA_LINE, begin) + V3K_IDX_SHIFT;
|
|
457
|
+
begin = molfileV3K.indexOf(C.V3K_BEGIN_DATA_LINE, begin) + C.V3K_IDX_SHIFT;
|
|
489
458
|
end = molfileV3K.indexOf(' ', begin); // skip the idx row
|
|
490
459
|
// parse atom type
|
|
491
460
|
begin = end + 1;
|
|
@@ -513,11 +482,12 @@ function parseAtomBlock(molfileV3K, atomCount) {
|
|
|
513
482
|
kwargs: kwargs,
|
|
514
483
|
};
|
|
515
484
|
}
|
|
516
|
-
/** Remove hydrogen nodes
|
|
485
|
+
/** Remove hydrogen nodes
|
|
486
|
+
* @param {MolGraph} monomerGraph - monomer graph*/
|
|
517
487
|
function removeHydrogen(monomerGraph) {
|
|
518
488
|
let i = 0;
|
|
519
489
|
while (i < monomerGraph.atoms.atomTypes.length) {
|
|
520
|
-
if (monomerGraph.atoms.atomTypes[i] === HYDROGEN) {
|
|
490
|
+
if (monomerGraph.atoms.atomTypes[i] === C.HYDROGEN) {
|
|
521
491
|
removeNodeAndBonds(monomerGraph, i + 1); // i + 1 because molfile node indexing starts from 1
|
|
522
492
|
--i;
|
|
523
493
|
// monomerGraph.atoms.atomTypes[i] = 'Li';
|
|
@@ -526,7 +496,9 @@ function removeHydrogen(monomerGraph) {
|
|
|
526
496
|
}
|
|
527
497
|
}
|
|
528
498
|
/** Remove node 'removedNode' and the associated bonds. Notice, numeration of
|
|
529
|
-
|
|
499
|
+
* nodes in molfiles starts from 1, not 0
|
|
500
|
+
* @param {MolGraph} monomerGraph - monomer graph
|
|
501
|
+
* @param {number} removedNode - node to be removed*/
|
|
530
502
|
function removeNodeAndBonds(monomerGraph, removedNode) {
|
|
531
503
|
if (typeof removedNode !== 'undefined') {
|
|
532
504
|
const removedNodeIdx = removedNode - 1;
|
|
@@ -604,7 +576,8 @@ function spliceTypedArray(TConstructor, typedArray, start, count) {
|
|
|
604
576
|
}
|
|
605
577
|
return result;
|
|
606
578
|
}
|
|
607
|
-
/** Adjust the peptide MolGraph to default/standardized position
|
|
579
|
+
/** Adjust the peptide MolGraph to default/standardized position
|
|
580
|
+
* @param {MolGraph} monomer - monomer graph*/
|
|
608
581
|
function adjustPeptideMonomerGraph(monomer) {
|
|
609
582
|
const centeredNode = monomer.meta.terminalNodes[0] - 1; // node indexing in molfiles starts from 1
|
|
610
583
|
const rotatedNode = monomer.meta.rNodes[0] - 1;
|
|
@@ -686,10 +659,12 @@ function adjustBaseMonomerGraph(monomer, pointerToBranchAngle) {
|
|
|
686
659
|
}
|
|
687
660
|
}
|
|
688
661
|
function getEuclideanDistance(p1, p2) {
|
|
689
|
-
return keepPrecision(Math.sqrt(
|
|
662
|
+
return keepPrecision(Math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2));
|
|
690
663
|
}
|
|
691
664
|
/** Flip carboxyl group with the radical in a peptide monomer in case the
|
|
692
|
-
|
|
665
|
+
* carboxyl group is in the lower half-plane
|
|
666
|
+
* @param {MolGraph}monomer - peptide monomer
|
|
667
|
+
* @param {number}doubleBondedOxygen - index of the double-bonded oxygen atom*/
|
|
693
668
|
function flipCarboxylAndRadical(monomer, doubleBondedOxygen) {
|
|
694
669
|
// verify that the carboxyl group is in the lower half-plane
|
|
695
670
|
if (monomer.atoms.y[monomer.meta.rNodes[1] - 1] < 0 &&
|
|
@@ -698,7 +673,10 @@ function flipCarboxylAndRadical(monomer, doubleBondedOxygen) {
|
|
|
698
673
|
rotateCenteredGraph(monomer.atoms, -findAngleWithOX(monomer.atoms.x[monomer.meta.terminalNodes[1] - 1], monomer.atoms.y[monomer.meta.terminalNodes[1] - 1]));
|
|
699
674
|
}
|
|
700
675
|
}
|
|
701
|
-
/** Finds angle between OY and the ray joining origin with (x, y)
|
|
676
|
+
/** Finds angle between OY and the ray joining origin with (x, y)
|
|
677
|
+
* @param {number}x
|
|
678
|
+
* @param {number}y
|
|
679
|
+
* @return {number} angle in radians*/
|
|
702
680
|
function findAngleWithOY(x, y) {
|
|
703
681
|
let angle;
|
|
704
682
|
if (x === 0) {
|
|
@@ -714,11 +692,17 @@ function findAngleWithOY(x, y) {
|
|
|
714
692
|
}
|
|
715
693
|
return angle;
|
|
716
694
|
}
|
|
717
|
-
/** Finds angle between OX and the ray joining origin with (x, y)
|
|
695
|
+
/** Finds angle between OX and the ray joining origin with (x, y)
|
|
696
|
+
* @param {number}x
|
|
697
|
+
* @param {number}y
|
|
698
|
+
* @return {number} angle in radians
|
|
699
|
+
*/
|
|
718
700
|
function findAngleWithOX(x, y) {
|
|
719
701
|
return findAngleWithOY(x, y) + Math.PI / 2;
|
|
720
702
|
}
|
|
721
|
-
/** Rotate the graph around the origin by 'angle'
|
|
703
|
+
/** Rotate the graph around the origin by 'angle'
|
|
704
|
+
* @param {Atoms}atoms - atoms of the graph
|
|
705
|
+
* @param {number}angle - angle in radians*/
|
|
722
706
|
function rotateCenteredGraph(atoms, angle) {
|
|
723
707
|
if (angle !== 0) {
|
|
724
708
|
const x = atoms.x;
|
|
@@ -732,15 +716,19 @@ function rotateCenteredGraph(atoms, angle) {
|
|
|
732
716
|
}
|
|
733
717
|
}
|
|
734
718
|
}
|
|
735
|
-
/** Flip monomer graph around OX axis preserving stereometry
|
|
719
|
+
/** Flip monomer graph around OX axis preserving stereometry
|
|
720
|
+
* @param {MolGraph}monomer - monomer graph*/
|
|
736
721
|
function flipMonomerAroundOX(monomer) {
|
|
737
722
|
flipMolGraph(monomer, true);
|
|
738
723
|
}
|
|
739
|
-
/** Flip monomer graph around OY axis preserving stereometry
|
|
724
|
+
/** Flip monomer graph around OY axis preserving stereometry
|
|
725
|
+
* @param {MolGraph}monomer - monomer graph*/
|
|
740
726
|
function flipMonomerAroundOY(monomer) {
|
|
741
727
|
flipMolGraph(monomer, false);
|
|
742
728
|
}
|
|
743
|
-
/** Flip graph around a specified axis: 'true' corresponds to OX, 'false' to OY
|
|
729
|
+
/** Flip graph around a specified axis: 'true' corresponds to OX, 'false' to OY
|
|
730
|
+
* @param {MolGraph}molGraph - graph to flip
|
|
731
|
+
* @param {boolean}axis - axis to flip around*/
|
|
744
732
|
function flipMolGraph(molGraph, axis) {
|
|
745
733
|
if (axis) { // flipping around OX
|
|
746
734
|
const y = molGraph.atoms.y;
|
|
@@ -759,9 +747,20 @@ function flipMolGraph(molGraph, axis) {
|
|
|
759
747
|
orientation.set(key, newValue);
|
|
760
748
|
}
|
|
761
749
|
}
|
|
750
|
+
function getAngleBetweenSugarBranchAndOY(molGraph) {
|
|
751
|
+
const x = molGraph.atoms.x;
|
|
752
|
+
const y = molGraph.atoms.y;
|
|
753
|
+
const rNode = molGraph.meta.rNodes[2] - 1;
|
|
754
|
+
const terminalNode = molGraph.meta.terminalNodes[2] - 1;
|
|
755
|
+
const xShift = x[rNode] - x[terminalNode];
|
|
756
|
+
const yShift = y[rNode] - y[terminalNode];
|
|
757
|
+
return Math.atan(yShift / xShift) + Math.PI / 2;
|
|
758
|
+
}
|
|
762
759
|
/** Flips double-bonded 'O' in carbonyl group with 'OH' in order for the monomers
|
|
763
|
-
|
|
764
|
-
|
|
760
|
+
* to have standard representation simplifying their concatenation. The
|
|
761
|
+
* monomer must already be adjusted with adjustPeptideMonomerGraph in order for this function to be implemented
|
|
762
|
+
* @param {MolGraph}monomer - peptide monomer
|
|
763
|
+
* @param {number}doubleBondedOxygen - index of the double-bonded oxygen atom*/
|
|
765
764
|
function flipHydroxilGroup(monomer, doubleBondedOxygen) {
|
|
766
765
|
const x = monomer.atoms.x;
|
|
767
766
|
// -1 below because indexing of nodes in molfiles starts from 1, unlike arrays
|
|
@@ -769,7 +768,9 @@ function flipHydroxilGroup(monomer, doubleBondedOxygen) {
|
|
|
769
768
|
swapNodes(monomer, doubleBondedOxygen, monomer.meta.rNodes[1]);
|
|
770
769
|
}
|
|
771
770
|
/** Determine the number of node (starting from 1) corresponding to the
|
|
772
|
-
|
|
771
|
+
* double-bonded oxygen of the carbonyl group
|
|
772
|
+
* @param {MolGraph}monomer - peptide monomer
|
|
773
|
+
* @return {number} index of the double-bonded oxygen atom*/
|
|
773
774
|
function findDoubleBondedCarbonylOxygen(monomer) {
|
|
774
775
|
const bondsMap = constructBondsMap(monomer);
|
|
775
776
|
let doubleBondedOxygen = 0;
|
|
@@ -777,13 +778,16 @@ function findDoubleBondedCarbonylOxygen(monomer) {
|
|
|
777
778
|
// iterate over the nodes bonded to the carbon and find the double one
|
|
778
779
|
while (doubleBondedOxygen === 0) {
|
|
779
780
|
const node = bondsMap.get(monomer.meta.terminalNodes[1])[i];
|
|
780
|
-
if (monomer.atoms.atomTypes[node - 1] === OXYGEN && node !== monomer.meta.rNodes[1])
|
|
781
|
+
if (monomer.atoms.atomTypes[node - 1] === C.OXYGEN && node !== monomer.meta.rNodes[1])
|
|
781
782
|
doubleBondedOxygen = node;
|
|
782
783
|
i++;
|
|
783
784
|
}
|
|
784
785
|
return doubleBondedOxygen;
|
|
785
786
|
}
|
|
786
|
-
/** Swap the Cartesian coordinates of the two specified nodes in MolGraph
|
|
787
|
+
/** Swap the Cartesian coordinates of the two specified nodes in MolGraph
|
|
788
|
+
* @param {MolGraph}monomer - monomer graph
|
|
789
|
+
* @param {number}nodeOne - index of the first node
|
|
790
|
+
* @param {number}nodeTwo - index of the second node*/
|
|
787
791
|
function swapNodes(monomer, nodeOne, nodeTwo) {
|
|
788
792
|
const nodeOneIdx = nodeOne - 1;
|
|
789
793
|
const nodeTwoIdx = nodeTwo - 1;
|
|
@@ -796,23 +800,27 @@ function swapNodes(monomer, nodeOne, nodeTwo) {
|
|
|
796
800
|
x[nodeTwoIdx] = tmpX;
|
|
797
801
|
y[nodeTwoIdx] = tmpY;
|
|
798
802
|
}
|
|
799
|
-
/** Maps a node to the list of nodes bound to it
|
|
803
|
+
/** Maps a node to the list of nodes bound to it
|
|
804
|
+
* @param {MolGraph}monomer - monomer graph
|
|
805
|
+
* @return {Map<number, Array<number>>} map of nodes to the list of nodes bound to them*/
|
|
800
806
|
function constructBondsMap(monomer) {
|
|
801
|
-
var _a;
|
|
802
807
|
const map = new Map();
|
|
803
808
|
for (const atomPairs of monomer.bonds.atomPairs) {
|
|
804
809
|
for (let i = 0; i < 2; i++) {
|
|
805
810
|
const key = atomPairs[i];
|
|
806
811
|
const value = atomPairs[(i + 1) % 2];
|
|
807
812
|
if (map.has(key))
|
|
808
|
-
|
|
813
|
+
map.get(key)?.push(value);
|
|
809
814
|
else
|
|
810
815
|
map.set(key, new Array(1).fill(value));
|
|
811
816
|
}
|
|
812
817
|
}
|
|
813
818
|
return map;
|
|
814
819
|
}
|
|
815
|
-
/** Shift molGraph in the XOY plane
|
|
820
|
+
/** Shift molGraph in the XOY plane
|
|
821
|
+
* @param {MolGraph}molGraph - graph to shift
|
|
822
|
+
* @param {number}xShift - shift along X axis
|
|
823
|
+
* @param {number}yShift - shift along Y axis*/
|
|
816
824
|
function shiftCoordinates(molGraph, xShift, yShift) {
|
|
817
825
|
const x = molGraph.atoms.x;
|
|
818
826
|
const y = molGraph.atoms.y;
|
|
@@ -822,245 +830,7 @@ function shiftCoordinates(molGraph, xShift, yShift) {
|
|
|
822
830
|
y[i] = keepPrecision(y[i] + yShift);
|
|
823
831
|
}
|
|
824
832
|
}
|
|
825
|
-
|
|
826
|
-
function monomerSeqToMolfile(monomerSeq, monomersDict, alphabet, polymerType) {
|
|
827
|
-
if (monomerSeq.length === 0) {
|
|
828
|
-
// throw new Error('monomerSeq is empty');
|
|
829
|
-
return '';
|
|
830
|
-
}
|
|
831
|
-
// define atom and bond counts, taking into account the bond type
|
|
832
|
-
const getAtomAndBondCounts = getResultingAtomBondCounts;
|
|
833
|
-
const { atomCount, bondCount } = getAtomAndBondCounts(monomerSeq, monomersDict, alphabet, polymerType);
|
|
834
|
-
// create arrays to store lines of the resulting molfile
|
|
835
|
-
const molfileAtomBlock = new Array(atomCount);
|
|
836
|
-
const molfileBondBlock = new Array(bondCount);
|
|
837
|
-
let addMonomerToMolblock; // todo: types?
|
|
838
|
-
let sugar = null;
|
|
839
|
-
let phosphate = null;
|
|
840
|
-
if (polymerType === "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */) {
|
|
841
|
-
addMonomerToMolblock = addAminoAcidToMolblock;
|
|
842
|
-
}
|
|
843
|
-
else { // nucleotides
|
|
844
|
-
addMonomerToMolblock = addNucleotideToMolblock;
|
|
845
|
-
sugar = (alphabet === "DNA" /* ALPHABET.DNA */) ? monomersDict.get(DEOXYRIBOSE) : monomersDict.get(RIBOSE);
|
|
846
|
-
phosphate = monomersDict.get(PHOSPHATE);
|
|
847
|
-
}
|
|
848
|
-
const v = {
|
|
849
|
-
i: 0,
|
|
850
|
-
nodeShift: 0,
|
|
851
|
-
bondShift: 0,
|
|
852
|
-
backbonePositionShift: new Array(2).fill(0),
|
|
853
|
-
branchPositionShift: new Array(2).fill(0),
|
|
854
|
-
backboneAttachNode: 0,
|
|
855
|
-
branchAttachNode: 0,
|
|
856
|
-
flipFactor: 1,
|
|
857
|
-
};
|
|
858
|
-
const C = {
|
|
859
|
-
sugar: sugar,
|
|
860
|
-
phosphate: phosphate,
|
|
861
|
-
seqLength: monomerSeq.length,
|
|
862
|
-
atomCount: atomCount,
|
|
863
|
-
bondCount: bondCount,
|
|
864
|
-
};
|
|
865
|
-
for (v.i = 0; v.i < C.seqLength; ++v.i) {
|
|
866
|
-
const monomer = monomersDict.get(monomerSeq[v.i]);
|
|
867
|
-
addMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v, C);
|
|
868
|
-
}
|
|
869
|
-
capResultingMolblock(molfileAtomBlock, molfileBondBlock, v, C);
|
|
870
|
-
const molfileCountsLine = V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + V3K_COUNTS_LINE_ENDING;
|
|
871
|
-
// todo: possible optimization may be achieved by replacing .join('') with +=
|
|
872
|
-
// since counterintuitively joining an array into a new string is reportedly
|
|
873
|
-
// slower than using += as below
|
|
874
|
-
let result = '';
|
|
875
|
-
result += V3K_HEADER_FIRST_LINE;
|
|
876
|
-
result += V3K_HEADER_SECOND_LINE;
|
|
877
|
-
result += V3K_BEGIN_CTAB_BLOCK;
|
|
878
|
-
result += molfileCountsLine;
|
|
879
|
-
result += V3K_BEGIN_ATOM_BLOCK;
|
|
880
|
-
result += molfileAtomBlock.join('');
|
|
881
|
-
result += V3K_END_ATOM_BLOCK;
|
|
882
|
-
result += V3K_BEGIN_BOND_BLOCK;
|
|
883
|
-
result += molfileBondBlock.join('');
|
|
884
|
-
result += V3K_END_BOND_BLOCK;
|
|
885
|
-
result += V3K_END_CTAB_BLOCK;
|
|
886
|
-
result += V3K_END;
|
|
887
|
-
// return molfileParts.join('');
|
|
888
|
-
return result;
|
|
889
|
-
}
|
|
890
|
-
/** Cap the resulting (after sewing up all the monomers) molfile with 'O' */
|
|
891
|
-
function capResultingMolblock(molfileAtomBlock, molfileBondBlock, v, C) {
|
|
892
|
-
// add terminal oxygen
|
|
893
|
-
const atomIdx = v.nodeShift + 1;
|
|
894
|
-
molfileAtomBlock[C.atomCount] = V3K_BEGIN_DATA_LINE + atomIdx + ' ' +
|
|
895
|
-
OXYGEN + ' ' + keepPrecision(v.backbonePositionShift[0]) + ' ' +
|
|
896
|
-
v.flipFactor * keepPrecision(v.backbonePositionShift[1]) + ' ' + '0.000000 0' + '\n';
|
|
897
|
-
// add terminal bond
|
|
898
|
-
const firstAtom = v.backboneAttachNode;
|
|
899
|
-
const secondAtom = atomIdx;
|
|
900
|
-
molfileBondBlock[C.bondCount] = V3K_BEGIN_DATA_LINE + v.bondShift + ' ' +
|
|
901
|
-
1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
|
|
902
|
-
}
|
|
903
|
-
function addAminoAcidToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {
|
|
904
|
-
v.flipFactor = Math.pow((-1), (v.i % 2)); // to flip every even monomer over OX
|
|
905
|
-
addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v);
|
|
906
|
-
}
|
|
907
|
-
function addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {
|
|
908
|
-
// todo: remove these comments to the docstrings of the corr. functions
|
|
909
|
-
// construnct the lines of V3K molfile atom block
|
|
910
|
-
fillAtomLines(monomer, molfileAtomBlock, v);
|
|
911
|
-
// construct the lines of V3K molfile bond block
|
|
912
|
-
fillBondLines(monomer, molfileBondBlock, v);
|
|
913
|
-
// peptide bond
|
|
914
|
-
fillChainExtendingBond(monomer, molfileBondBlock, v);
|
|
915
|
-
// update branch variables if necessary
|
|
916
|
-
if (monomer.meta.branchShift !== null && monomer.meta.terminalNodes.length > 2)
|
|
917
|
-
updateBranchVariables(monomer, v);
|
|
918
|
-
// update loop variables
|
|
919
|
-
updateChainExtendingVariables(monomer, v);
|
|
920
|
-
}
|
|
921
|
-
function addNucleotideToMolblock(nucleobase, molfileAtomBlock, molfileBondBlock, v, C) {
|
|
922
|
-
// construnct the lines of V3K molfile atom block corresponding to phosphate
|
|
923
|
-
// and sugar
|
|
924
|
-
if (v.i === 0) {
|
|
925
|
-
addBackboneMonomerToMolblock(C.sugar, molfileAtomBlock, molfileBondBlock, v);
|
|
926
|
-
}
|
|
927
|
-
else {
|
|
928
|
-
for (const monomer of [C.phosphate, C.sugar])
|
|
929
|
-
addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v);
|
|
930
|
-
}
|
|
931
|
-
addBranchMonomerToMolblock(nucleobase, molfileAtomBlock, molfileBondBlock, v);
|
|
932
|
-
}
|
|
933
|
-
function addBranchMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {
|
|
934
|
-
fillBranchAtomLines(monomer, molfileAtomBlock, v);
|
|
935
|
-
fillBondLines(monomer, molfileBondBlock, v);
|
|
936
|
-
fillBackboneToBranchBond(monomer, molfileBondBlock, v);
|
|
937
|
-
// C-N bond
|
|
938
|
-
const bondIdx = v.bondShift;
|
|
939
|
-
const firstAtom = v.branchAttachNode;
|
|
940
|
-
const secondAtom = monomer.meta.terminalNodes[0] + v.nodeShift;
|
|
941
|
-
molfileBondBlock[bondIdx - 1] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
|
|
942
|
-
1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
|
|
943
|
-
// update loop variables
|
|
944
|
-
v.bondShift += monomer.bonds.atomPairs.length + 1;
|
|
945
|
-
v.nodeShift += monomer.atoms.atomTypes.length;
|
|
946
|
-
}
|
|
947
|
-
function updateChainExtendingVariables(monomer, v) {
|
|
948
|
-
v.backboneAttachNode = v.nodeShift + monomer.meta.terminalNodes[1];
|
|
949
|
-
v.bondShift += monomer.bonds.atomPairs.length + 1;
|
|
950
|
-
v.nodeShift += monomer.atoms.atomTypes.length;
|
|
951
|
-
v.backbonePositionShift[0] += monomer.meta.backboneShift[0]; // todo: non-null check
|
|
952
|
-
v.backbonePositionShift[1] += v.flipFactor * monomer.meta.backboneShift[1];
|
|
953
|
-
}
|
|
954
|
-
function updateBranchVariables(monomer, v) {
|
|
955
|
-
v.branchAttachNode = v.nodeShift + monomer.meta.terminalNodes[2];
|
|
956
|
-
for (let i = 0; i < 2; ++i)
|
|
957
|
-
v.branchPositionShift[i] = v.backbonePositionShift[i] + monomer.meta.branchShift[i];
|
|
958
|
-
}
|
|
959
|
-
function fillAtomLines(monomer, molfileAtomBlock, v) {
|
|
960
|
-
for (let j = 0; j < monomer.atoms.atomTypes.length; ++j) {
|
|
961
|
-
const atomIdx = v.nodeShift + j + 1;
|
|
962
|
-
molfileAtomBlock[v.nodeShift + j] = V3K_BEGIN_DATA_LINE + atomIdx + ' ' +
|
|
963
|
-
monomer.atoms.atomTypes[j] + ' ' +
|
|
964
|
-
keepPrecision(v.backbonePositionShift[0] + monomer.atoms.x[j]) + ' ' +
|
|
965
|
-
keepPrecision(v.backbonePositionShift[1] + v.flipFactor * monomer.atoms.y[j]) +
|
|
966
|
-
' ' + monomer.atoms.kwargs[j];
|
|
967
|
-
}
|
|
968
|
-
}
|
|
969
|
-
// todo: remove as quickfix
|
|
970
|
-
function fillBranchAtomLines(monomer, molfileAtomBlock, v) {
|
|
971
|
-
for (let j = 0; j < monomer.atoms.atomTypes.length; ++j) {
|
|
972
|
-
const atomIdx = v.nodeShift + j + 1;
|
|
973
|
-
molfileAtomBlock[v.nodeShift + j] = V3K_BEGIN_DATA_LINE + atomIdx + ' ' +
|
|
974
|
-
monomer.atoms.atomTypes[j] + ' ' +
|
|
975
|
-
keepPrecision(v.branchPositionShift[0] + monomer.atoms.x[j]) + ' ' +
|
|
976
|
-
keepPrecision(v.branchPositionShift[1] + v.flipFactor * monomer.atoms.y[j]) +
|
|
977
|
-
' ' + monomer.atoms.kwargs[j];
|
|
978
|
-
}
|
|
979
|
-
}
|
|
980
|
-
function fillBondLines(monomer, molfileBondBlock, v) {
|
|
981
|
-
// construct the lines of V3K molfile bond block
|
|
982
|
-
for (let j = 0; j < monomer.bonds.atomPairs.length; ++j) {
|
|
983
|
-
const bondIdx = v.bondShift + j + 1;
|
|
984
|
-
const firstAtom = monomer.bonds.atomPairs[j][0] + v.nodeShift;
|
|
985
|
-
const secondAtom = monomer.bonds.atomPairs[j][1] + v.nodeShift;
|
|
986
|
-
let bondCfg = '';
|
|
987
|
-
if (monomer.bonds.bondConfiguration.has(j)) {
|
|
988
|
-
// flip orientation when necessary
|
|
989
|
-
let orientation = monomer.bonds.bondConfiguration.get(j);
|
|
990
|
-
if (v.flipFactor < 0)
|
|
991
|
-
orientation = (orientation === 1) ? 3 : 1;
|
|
992
|
-
bondCfg = ' CFG=' + orientation;
|
|
993
|
-
}
|
|
994
|
-
const kwargs = monomer.bonds.kwargs.has(j) ?
|
|
995
|
-
' ' + monomer.bonds.kwargs.get(j) : '';
|
|
996
|
-
molfileBondBlock[v.bondShift + j] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
|
|
997
|
-
monomer.bonds.bondTypes[j] + ' ' +
|
|
998
|
-
firstAtom + ' ' + secondAtom + bondCfg + kwargs + '\n';
|
|
999
|
-
}
|
|
1000
|
-
}
|
|
1001
|
-
function fillChainExtendingBond(monomer, molfileBondBlock, v) {
|
|
1002
|
-
if (v.backboneAttachNode !== 0) {
|
|
1003
|
-
const bondIdx = v.bondShift;
|
|
1004
|
-
const firstAtom = v.backboneAttachNode;
|
|
1005
|
-
const secondAtom = monomer.meta.terminalNodes[0] + v.nodeShift;
|
|
1006
|
-
molfileBondBlock[v.bondShift - 1] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
|
|
1007
|
-
1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
|
|
1008
|
-
}
|
|
1009
|
-
}
|
|
1010
|
-
// todo: remove
|
|
1011
|
-
function fillBackboneToBranchBond(branchMonomer, molfileBondBlock, v) {
|
|
1012
|
-
const bondIdx = v.bondShift;
|
|
1013
|
-
const firstAtom = v.branchAttachNode;
|
|
1014
|
-
const secondAtom = branchMonomer.meta.terminalNodes[0] + v.nodeShift;
|
|
1015
|
-
molfileBondBlock[bondIdx - 1] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
|
|
1016
|
-
1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
|
|
1017
|
-
}
|
|
1018
|
-
/** Compute the atom/bond counts for the resulting molfile, depending on the
|
|
1019
|
-
* type of polymer (peptide/nucleotide) */
|
|
1020
|
-
function getResultingAtomBondCounts(monomerSeq, monomersDict, alphabet, polymerType) {
|
|
1021
|
-
let atomCount = 0;
|
|
1022
|
-
let bondCount = 0;
|
|
1023
|
-
// sum up all the atoms/nodes provided by the sequence
|
|
1024
|
-
for (const monomerSymbol of monomerSeq) {
|
|
1025
|
-
if (monomerSymbol === '')
|
|
1026
|
-
continue; // Skip for gap/empty monomer in MSA
|
|
1027
|
-
const monomer = monomersDict.get(monomerSymbol);
|
|
1028
|
-
atomCount += monomer.atoms.x.length;
|
|
1029
|
-
bondCount += monomer.bonds.bondTypes.length;
|
|
1030
|
-
}
|
|
1031
|
-
// add extra values depending on the polymer type
|
|
1032
|
-
if (polymerType === "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */) {
|
|
1033
|
-
// add the rightmost/terminating cap group 'OH' (i.e. 'O')
|
|
1034
|
-
atomCount += 1;
|
|
1035
|
-
// add chain-extending bonds (C-NH per each monomer pair and terminal C-OH)
|
|
1036
|
-
bondCount += monomerSeq.length;
|
|
1037
|
-
}
|
|
1038
|
-
else { // nucleotides
|
|
1039
|
-
const sugar = (alphabet === "DNA" /* ALPHABET.DNA */) ?
|
|
1040
|
-
monomersDict.get(DEOXYRIBOSE) : monomersDict.get(RIBOSE);
|
|
1041
|
-
const phosphate = monomersDict.get(PHOSPHATE);
|
|
1042
|
-
// add phosphate per each pair of nucleobase symbols
|
|
1043
|
-
atomCount += (monomerSeq.length - 1) * phosphate.atoms.x.length;
|
|
1044
|
-
// add sugar per each nucleobase symbol
|
|
1045
|
-
atomCount += monomerSeq.length * sugar.atoms.x.length;
|
|
1046
|
-
// add the leftmost cap group 'OH' (i.e. 'O')
|
|
1047
|
-
atomCount += 1;
|
|
1048
|
-
// add bonds from phosphate monomers
|
|
1049
|
-
bondCount += (monomerSeq.length - 1) * phosphate.bonds.bondTypes.length;
|
|
1050
|
-
// add bonds from sugar monomers
|
|
1051
|
-
bondCount += monomerSeq.length * sugar.bonds.bondTypes.length;
|
|
1052
|
-
// exclude the first chain-extending bond O-P (absent, no 'leftmost' phosphate)
|
|
1053
|
-
bondCount -= 1;
|
|
1054
|
-
// add chain-extending and branch bonds (O-P, C-O and C-N per each nucleotide)
|
|
1055
|
-
bondCount += monomerSeq.length * 3;
|
|
1056
|
-
}
|
|
1057
|
-
return { atomCount, bondCount };
|
|
1058
|
-
}
|
|
1059
|
-
/** Keep precision upon floating point operations over atom coordinates */
|
|
1060
|
-
function keepPrecision(x) {
|
|
1061
|
-
return Math.round(PRECISION_FACTOR * x) / PRECISION_FACTOR;
|
|
1062
|
-
}
|
|
1063
|
-
function convertMolGraphToMolfileV3K(molGraph) {
|
|
833
|
+
export function convertMolGraphToMolfileV3K(molGraph) {
|
|
1064
834
|
// counts line
|
|
1065
835
|
const atomType = molGraph.atoms.atomTypes;
|
|
1066
836
|
const x = molGraph.atoms.x;
|
|
@@ -1073,7 +843,7 @@ function convertMolGraphToMolfileV3K(molGraph) {
|
|
|
1073
843
|
const atomCount = atomType.length;
|
|
1074
844
|
const bondCount = molGraph.bonds.bondTypes.length;
|
|
1075
845
|
// todo rewrite using constants
|
|
1076
|
-
const molfileCountsLine = V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + V3K_COUNTS_LINE_ENDING;
|
|
846
|
+
const molfileCountsLine = C.V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + C.V3K_COUNTS_LINE_ENDING;
|
|
1077
847
|
// atom block
|
|
1078
848
|
let molfileAtomBlock = '';
|
|
1079
849
|
for (let i = 0; i < atomCount; ++i) {
|
|
@@ -1087,7 +857,7 @@ function convertMolGraphToMolfileV3K(molGraph) {
|
|
|
1087
857
|
// formatted[1] = formatted[1].padEnd(V3K_ATOM_COORDINATE_PRECISION, '0');
|
|
1088
858
|
// coordinate[k] = formatted.join('.');
|
|
1089
859
|
// }
|
|
1090
|
-
const atomLine = V3K_BEGIN_DATA_LINE + atomIdx + ' ' + atomType[i] + ' ' +
|
|
860
|
+
const atomLine = C.V3K_BEGIN_DATA_LINE + atomIdx + ' ' + atomType[i] + ' ' +
|
|
1091
861
|
coordinate[0] + ' ' + coordinate[1] + ' ' + atomKwargs[i];
|
|
1092
862
|
molfileAtomBlock += atomLine;
|
|
1093
863
|
}
|
|
@@ -1099,54 +869,54 @@ function convertMolGraphToMolfileV3K(molGraph) {
|
|
|
1099
869
|
const secondAtom = atomPair[i][1];
|
|
1100
870
|
const kwargs = bondKwargs.has(i) ? ' ' + bondKwargs.get(i) : '';
|
|
1101
871
|
const bondCfg = bondConfig.has(i) ? ' CFG=' + bondConfig.get(i) : '';
|
|
1102
|
-
const bondLine = V3K_BEGIN_DATA_LINE + bondIdx + ' ' + bondType[i] + ' ' +
|
|
872
|
+
const bondLine = C.V3K_BEGIN_DATA_LINE + bondIdx + ' ' + bondType[i] + ' ' +
|
|
1103
873
|
firstAtom + ' ' + secondAtom + bondCfg + kwargs + '\n';
|
|
1104
874
|
molfileBondBlock += bondLine;
|
|
1105
875
|
}
|
|
1106
876
|
const molfileParts = [
|
|
1107
|
-
V3K_HEADER_FIRST_LINE,
|
|
1108
|
-
V3K_HEADER_SECOND_LINE,
|
|
1109
|
-
V3K_BEGIN_CTAB_BLOCK,
|
|
877
|
+
C.V3K_HEADER_FIRST_LINE,
|
|
878
|
+
C.V3K_HEADER_SECOND_LINE,
|
|
879
|
+
C.V3K_BEGIN_CTAB_BLOCK,
|
|
1110
880
|
molfileCountsLine,
|
|
1111
|
-
V3K_BEGIN_ATOM_BLOCK,
|
|
881
|
+
C.V3K_BEGIN_ATOM_BLOCK,
|
|
1112
882
|
molfileAtomBlock,
|
|
1113
|
-
V3K_END_ATOM_BLOCK,
|
|
1114
|
-
V3K_BEGIN_BOND_BLOCK,
|
|
883
|
+
C.V3K_END_ATOM_BLOCK,
|
|
884
|
+
C.V3K_BEGIN_BOND_BLOCK,
|
|
1115
885
|
molfileBondBlock,
|
|
1116
|
-
V3K_END_BOND_BLOCK,
|
|
1117
|
-
V3K_END_CTAB_BLOCK,
|
|
1118
|
-
V3K_END,
|
|
886
|
+
C.V3K_END_BOND_BLOCK,
|
|
887
|
+
C.V3K_END_CTAB_BLOCK,
|
|
888
|
+
C.V3K_END,
|
|
1119
889
|
];
|
|
1120
890
|
const resultingMolfile = molfileParts.join('');
|
|
1121
891
|
// console.log(resultingMolfile);
|
|
1122
892
|
return resultingMolfile;
|
|
1123
893
|
}
|
|
1124
|
-
export function getSymbolToCappedMolfileMap(monomersLibList) {
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
return symbolToCappedMolfileMap;
|
|
1147
|
-
});
|
|
894
|
+
export async function getSymbolToCappedMolfileMap(monomersLibList) {
|
|
895
|
+
if (DG.Func.find({ package: 'Chem', name: 'getRdKitModule' }).length === 0) {
|
|
896
|
+
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
897
|
+
return;
|
|
898
|
+
}
|
|
899
|
+
const symbolToCappedMolfileMap = new Map();
|
|
900
|
+
const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
|
|
901
|
+
for (const monomerLibObject of monomersLibList) {
|
|
902
|
+
const monomerSymbol = monomerLibObject["symbol" /* HELM_FIELDS.SYMBOL */];
|
|
903
|
+
const capGroups = parseCapGroups(monomerLibObject["rgroups" /* HELM_FIELDS.RGROUPS */]);
|
|
904
|
+
const capGroupIdxMap = parseCapGroupIdxMap(monomerLibObject["molfile" /* HELM_FIELDS.MOLFILE */]);
|
|
905
|
+
const molfileV3K = convertMolfileToV3K(removeRGroupLines(monomerLibObject["molfile" /* HELM_FIELDS.MOLFILE */]), moduleRdkit);
|
|
906
|
+
const counts = parseAtomAndBondCounts(molfileV3K);
|
|
907
|
+
const atoms = parseAtomBlock(molfileV3K, counts.atomCount);
|
|
908
|
+
const bonds = parseBondBlock(molfileV3K, counts.bondCount);
|
|
909
|
+
const meta = getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap);
|
|
910
|
+
const monomerGraph = { atoms: atoms, bonds: bonds, meta: meta };
|
|
911
|
+
removeHydrogen(monomerGraph);
|
|
912
|
+
const molfile = convertMolGraphToMolfileV3K(monomerGraph);
|
|
913
|
+
symbolToCappedMolfileMap.set(monomerSymbol, molfile);
|
|
914
|
+
}
|
|
915
|
+
return symbolToCappedMolfileMap;
|
|
1148
916
|
}
|
|
1149
|
-
/** Get the V3K molfile corresponding to the capped Monomer (default cap groups)
|
|
917
|
+
/** Get the V3K molfile corresponding to the capped Monomer (default cap groups)
|
|
918
|
+
* @param {Monomer} monomer
|
|
919
|
+
* @return {string} V3K molfile*/
|
|
1150
920
|
export function capPeptideMonomer(monomer) {
|
|
1151
921
|
const funcList = DG.Func.find({ package: 'Chem', name: 'getRdKitModule' });
|
|
1152
922
|
const moduleRdkit = funcList[0].apply();
|