@datagrok-libraries/bio 5.32.3 → 5.32.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.eslintrc.json +17 -12
  2. package/package.json +1 -1
  3. package/src/aminoacids.js +3 -4
  4. package/src/aminoacids.js.map +1 -1
  5. package/src/monomer-works/consts.d.ts +27 -0
  6. package/src/monomer-works/consts.d.ts.map +1 -0
  7. package/src/monomer-works/consts.js +30 -0
  8. package/src/monomer-works/consts.js.map +1 -0
  9. package/src/monomer-works/monomer-utils.js +6 -17
  10. package/src/monomer-works/monomer-utils.js.map +1 -1
  11. package/src/monomer-works/seq-to-molfile-worker.d.ts +2 -0
  12. package/src/monomer-works/seq-to-molfile-worker.d.ts.map +1 -0
  13. package/src/monomer-works/seq-to-molfile-worker.js +19 -0
  14. package/src/monomer-works/seq-to-molfile-worker.js.map +1 -0
  15. package/src/monomer-works/seq-to-molfile.d.ts +9 -0
  16. package/src/monomer-works/seq-to-molfile.d.ts.map +1 -0
  17. package/src/monomer-works/seq-to-molfile.js +31 -0
  18. package/src/monomer-works/seq-to-molfile.js.map +1 -0
  19. package/src/monomer-works/to-atomic-level-utils.d.ts +23 -0
  20. package/src/monomer-works/to-atomic-level-utils.d.ts.map +1 -0
  21. package/src/monomer-works/to-atomic-level-utils.js +284 -0
  22. package/src/monomer-works/to-atomic-level-utils.js.map +1 -0
  23. package/src/monomer-works/to-atomic-level.d.ts +48 -4
  24. package/src/monomer-works/to-atomic-level.d.ts.map +1 -1
  25. package/src/monomer-works/to-atomic-level.js +273 -503
  26. package/src/monomer-works/to-atomic-level.js.map +1 -1
  27. package/src/monomer-works/types.d.ts +71 -0
  28. package/src/monomer-works/types.d.ts.map +1 -0
  29. package/src/monomer-works/types.js +2 -0
  30. package/src/monomer-works/types.js.map +1 -0
  31. package/src/pdb/pdb-helper.js +7 -18
  32. package/src/pdb/pdb-helper.js.map +1 -1
  33. package/src/tests/palettes-tests.js +20 -33
  34. package/src/tests/palettes-tests.js.map +1 -1
  35. package/src/trees/dendrogram.js +2 -13
  36. package/src/trees/dendrogram.js.map +1 -1
  37. package/src/trees/tree-helper.js +7 -18
  38. package/src/trees/tree-helper.js.map +1 -1
  39. package/src/utils/cell-renderer.js +3 -4
  40. package/src/utils/cell-renderer.js.map +1 -1
  41. package/src/utils/macromolecule/utils.js +4 -6
  42. package/src/utils/macromolecule/utils.js.map +1 -1
  43. package/src/utils/splitter.js +1 -1
  44. package/src/utils/splitter.js.map +1 -1
  45. package/src/utils/units-handler.js +26 -38
  46. package/src/utils/units-handler.js.map +1 -1
  47. package/src/utils.js +1 -1
  48. package/src/utils.js.map +1 -1
  49. package/src/viewers/ngl-gl-viewer.js +6 -17
  50. package/src/viewers/ngl-gl-viewer.js.map +1 -1
  51. package/src/viewers/phylocanvas-gl-viewer.js +7 -18
  52. package/src/viewers/phylocanvas-gl-viewer.js.map +1 -1
  53. package/tsconfig.json +1 -1
@@ -1,137 +1,71 @@
1
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
- return new (P || (P = Promise))(function (resolve, reject) {
4
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
- step((generator = generator.apply(thisArg, _arguments || [])).next());
8
- });
9
- };
10
1
  /* Do not change these import lines to match external modules in webpack configuration */
11
2
  import * as grok from 'datagrok-api/grok';
12
3
  import * as DG from 'datagrok-api/dg';
13
- import { HELM_CORE_FIELDS, } from '../utils/const';
14
- import { getSplitter } from '../utils/macromolecule/utils';
15
4
  import { NotationConverter } from '../utils/notation-converter';
16
- import { errorToConsole } from '@datagrok-libraries/utils/src/to-console';
17
5
  import { UnitsHandler } from '../utils/units-handler';
18
- // constants for parsing molfile V2000
19
- const V2K_RGP_SHIFT = 8;
20
- const V2K_RGP_LINE = 'M RGP';
21
- const V2K_A_LINE = 'A ';
22
- // constants for parsing/reconstruction of molfile V3000
23
- const V3K_COUNTS_SHIFT = 14;
24
- const V3K_IDX_SHIFT = 7;
25
- const V3K_HEADER_FIRST_LINE = '\nDatagrok macromolecule handler\n\n';
26
- const V3K_HEADER_SECOND_LINE = ' 0 0 0 0 0 0 999 V3000\n';
27
- const V3K_BEGIN_CTAB_BLOCK = 'M V30 BEGIN CTAB\n';
28
- const V3K_END_CTAB_BLOCK = 'M V30 END CTAB\n';
29
- const V3K_BEGIN_COUNTS_LINE = 'M V30 COUNTS ';
30
- const V3K_COUNTS_LINE_ENDING = ' 0 0 0\n';
31
- const V3K_BEGIN_ATOM_BLOCK = 'M V30 BEGIN ATOM\n';
32
- const V3K_END_ATOM_BLOCK = 'M V30 END ATOM\n';
33
- const V3K_BEGIN_BOND_BLOCK = 'M V30 BEGIN BOND\n';
34
- const V3K_END_BOND_BLOCK = 'M V30 END BOND\n';
35
- const V3K_BOND_CONFIG = ' CFG=';
36
- const V3K_BEGIN_DATA_LINE = 'M V30 ';
37
- const V3K_END = 'M END';
38
- const PRECISION_FACTOR = 10000; // HELMCoreLibrary has 4 significant digits after decimal point in atom coordinates
39
- // symbols for the corresponding monomers in HELM library
40
- const DEOXYRIBOSE = 'd';
41
- const RIBOSE = 'r';
42
- const PHOSPHATE = 'p';
43
- const OXYGEN = 'O';
44
- const HYDROGEN = 'H';
6
+ import { getFormattedMonomerLib, keepPrecision } from './to-atomic-level-utils';
7
+ import { seqToMolFileWorker } from './seq-to-molfile';
8
+ import { monomerWorksConsts as C } from './consts';
9
+ import { errorToConsole } from '@datagrok-libraries/utils';
10
+ import { getSplitter } from '../utils/macromolecule';
45
11
  // todo: verify that all functions have return types
46
- /** Convert Macromolecule column into Molecule column storing molfile V3000 with the help of a monomer library */
47
- export function _toAtomicLevel(df, seqCol, monomerLib) {
48
- return __awaiter(this, void 0, void 0, function* () {
49
- // todo: remove this from the library
50
- if (DG.Func.find({ package: 'Chem', name: 'getRdKitModule' }).length === 0) {
51
- const msg = 'Transformation to atomic level requires the package "Chem" installed.';
52
- return { col: null, warnings: [msg] };
53
- }
54
- if (seqCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
55
- const msg = `Only the ${DG.SEMTYPE.MACROMOLECULE} columns can be converted to atomic level, ` +
56
- `the chosen column has semType '${seqCol.semType}'`;
57
- return { col: null, warnings: [msg] };
58
- }
59
- let srcCol = seqCol;
60
- const seqUh = UnitsHandler.getOrCreate(seqCol);
61
- // convert 'helm' to 'separator' units
62
- if (seqUh.isHelm()) {
63
- const converter = new NotationConverter(seqCol);
64
- srcCol = converter.convert("separator" /* NOTATION.SEPARATOR */, '.');
65
- srcCol.name = seqCol.name; // Replace converted col name 'separator(<original>)' to '<original>';
66
- }
67
- const srcUh = UnitsHandler.getOrCreate(srcCol);
68
- const alphabet = srcUh.alphabet;
69
- // determine the polymer type according to HELM specifications
70
- let polymerType;
71
- // todo: an exception from dart comes before this check if the alphabet is UN
72
- if (alphabet === "PT" /* ALPHABET.PT */ || alphabet === "UN" /* ALPHABET.UN */) {
73
- polymerType = "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */;
74
- }
75
- else if (alphabet === "RNA" /* ALPHABET.RNA */ || alphabet === "DNA" /* ALPHABET.DNA */) {
76
- polymerType = "RNA" /* HELM_POLYMER_TYPE.RNA */;
77
- }
78
- else {
79
- const msg = `Unexpected column's '${srcCol.name}' alphabet '${alphabet}'.`;
80
- return { col: null, warnings: [msg] };
81
- }
82
- const monomerSequencesArray = getMonomerSequencesArray(srcCol);
83
- const monomersDict = yield getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet);
84
- const srcColLength = srcCol.length;
85
- const molfileList = new Array(srcColLength);
86
- const molfileWarningList = new Array(0);
87
- for (let rowI = 0; rowI < srcColLength; ++rowI) {
88
- try {
89
- const monomerSeq = monomerSequencesArray[rowI];
90
- molfileList[rowI] = monomerSeqToMolfile(monomerSeq, monomersDict, alphabet, polymerType);
91
- }
92
- catch (err) {
93
- const errMsg = err instanceof Error ? err.message : err.toString();
94
- const msg = `Cannot get molfile of row #${rowI}: ${errMsg}.`;
95
- molfileWarningList.push(msg);
96
- }
97
- }
98
- if (molfileWarningList.length > 0.05 * srcColLength)
99
- throw new Error('Too many errors getting molfiles.');
100
- // exclude name collisions
101
- const name = `molfile(${srcCol.name})`;
102
- const resColName = df.columns.getUnusedName(name);
103
- const resCol = DG.Column.fromStrings(resColName, molfileList);
104
- resCol.semType = DG.SEMTYPE.MOLECULE;
105
- resCol.setTag(DG.TAGS.UNITS, DG.UNITS.Molecule.MOLBLOCK);
106
- return { col: resCol, warnings: molfileWarningList };
107
- });
108
- }
109
- /** Get a mapping of peptide symbols to HELM monomer library
110
- * objects with selected fields.
111
- */
112
- function getFormattedMonomerLib(monomerLib, polymerType, alphabet) {
113
- const map = new Map();
114
- for (const monomerSymbol of monomerLib.getMonomerSymbolsByType(polymerType)) {
115
- const it = monomerLib.getMonomer(polymerType, monomerSymbol);
116
- if (polymerType === "RNA" /* HELM_POLYMER_TYPE.RNA */ &&
117
- (it["monomerType" /* HELM_FIELDS.MONOMER_TYPE */] === "Branch" /* HELM_MONOMER_TYPE.BRANCH */ ||
118
- alphabet === "DNA" /* ALPHABET.DNA */ && it["symbol" /* HELM_FIELDS.SYMBOL */] === DEOXYRIBOSE ||
119
- alphabet === "RNA" /* ALPHABET.RNA */ && it["symbol" /* HELM_FIELDS.SYMBOL */] === RIBOSE ||
120
- it["symbol" /* HELM_FIELDS.SYMBOL */] === PHOSPHATE) ||
121
- polymerType === "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */ &&
122
- it["monomerType" /* HELM_FIELDS.MONOMER_TYPE */] !== "Branch" /* HELM_MONOMER_TYPE.BRANCH */) {
123
- const monomerObject = {};
124
- HELM_CORE_FIELDS.forEach((field) => {
125
- //@ts-ignore
126
- monomerObject[field] = it[field];
127
- });
128
- map.set(it["symbol" /* HELM_FIELDS.SYMBOL */], monomerObject);
129
- }
12
+ /** Convert Macromolecule column into Molecule column storing molfile V3000 with the help of a monomer library
13
+ * @param {DG.DataFrame} df - DataFrame containing the column to be converted
14
+ * @param {DG.Column} seqCol - Column containing the macromolecule sequence
15
+ * @param {IMonomerLib} monomerLib - Monomer library
16
+ */
17
+ export async function _toAtomicLevel(df, seqCol, monomerLib) {
18
+ // todo: remove this from the library
19
+ if (DG.Func.find({ package: 'Chem', name: 'getRdKitModule' }).length === 0) {
20
+ const msg = 'Transformation to atomic level requires the package "Chem" installed.';
21
+ return { col: null, warnings: [msg] };
22
+ }
23
+ if (seqCol.semType !== DG.SEMTYPE.MACROMOLECULE) {
24
+ const msg = `Only the ${DG.SEMTYPE.MACROMOLECULE} columns can be converted to atomic level, ` +
25
+ `the chosen column has semType '${seqCol.semType}'`;
26
+ return { col: null, warnings: [msg] };
27
+ }
28
+ let srcCol = seqCol;
29
+ const seqUh = UnitsHandler.getOrCreate(seqCol);
30
+ // convert 'helm' to 'separator' units
31
+ if (seqUh.isHelm()) {
32
+ const converter = new NotationConverter(seqCol);
33
+ srcCol = converter.convert("separator" /* NOTATION.SEPARATOR */, '.');
34
+ srcCol.name = seqCol.name; // Replace converted col name 'separator(<original>)' to '<original>';
35
+ }
36
+ const srcUh = UnitsHandler.getOrCreate(srcCol);
37
+ const alphabet = srcUh.alphabet;
38
+ // determine the polymer type according to HELM specifications
39
+ let polymerType;
40
+ // todo: an exception from dart comes before this check if the alphabet is UN
41
+ if (alphabet === "PT" /* ALPHABET.PT */ || alphabet === "UN" /* ALPHABET.UN */) {
42
+ polymerType = "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */;
43
+ }
44
+ else if (alphabet === "RNA" /* ALPHABET.RNA */ || alphabet === "DNA" /* ALPHABET.DNA */) {
45
+ polymerType = "RNA" /* HELM_POLYMER_TYPE.RNA */;
130
46
  }
131
- return map;
132
- }
133
- /** Get jagged array of monomer symbols for the dataframe */
134
- function getMonomerSequencesArray(macroMolCol) {
47
+ else {
48
+ const msg = `Unexpected column's '${srcCol.name}' alphabet '${alphabet}'.`;
49
+ return { col: null, warnings: [msg] };
50
+ }
51
+ const monomerSequencesArray = getMonomerSequencesArray(srcCol);
52
+ const monomersDict = await getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet);
53
+ const srcColLength = srcCol.length;
54
+ const { molfileList, molfileWarningList } = await seqToMolFileWorker(monomerSequencesArray, monomersDict, alphabet, polymerType, srcColLength);
55
+ if (molfileWarningList.length > 0.05 * srcColLength)
56
+ throw new Error('Too many errors getting molfiles.');
57
+ // exclude name collisions
58
+ const name = `molfile(${srcCol.name})`;
59
+ const resColName = df.columns.getUnusedName(name);
60
+ const resCol = DG.Column.fromStrings(resColName, molfileList);
61
+ resCol.semType = DG.SEMTYPE.MOLECULE;
62
+ resCol.setTag(DG.TAGS.UNITS, DG.UNITS.Molecule.MOLBLOCK);
63
+ return { col: resCol, warnings: molfileWarningList };
64
+ }
65
+ /** Get jagged array of monomer symbols for the dataframe
66
+ * @param {DG.Column} macroMolCol - Column with macro-molecules
67
+ * @return {string[]} - Jagged array of monomer symbols for the dataframe */
68
+ export function getMonomerSequencesArray(macroMolCol) {
135
69
  const columnLength = macroMolCol.length;
136
70
  const result = new Array(columnLength);
137
71
  // split the string into monomers
@@ -147,53 +81,53 @@ function getMonomerSequencesArray(macroMolCol) {
147
81
  }
148
82
  /** Get a mapping of monomer symbols to MolGraph objects. Notice, the
149
83
  * transformation from molfile V2000 to V3000 takes place,
150
- * with the help of async function call from Chem (RdKit module) */
151
- function getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet) {
152
- return __awaiter(this, void 0, void 0, function* () {
153
- // todo: exception - no gaps, no empty string monomers
154
- const formattedMonomerLib = getFormattedMonomerLib(monomerLib, polymerType, alphabet);
155
- const monomersDict = new Map();
156
- const moduleRdkit = yield grok.functions.call('Chem:getRdKitModule');
157
- const pointerToBranchAngle = {
158
- value: null
159
- };
160
- // this must NOT be placed after translating monomer sequences
161
- // because adding branch monomers for nucleobases relies on these data
162
- if (polymerType === "RNA" /* HELM_POLYMER_TYPE.RNA */) {
163
- const symbols = (alphabet === "RNA" /* ALPHABET.RNA */) ?
164
- [RIBOSE, PHOSPHATE] : [DEOXYRIBOSE, PHOSPHATE];
165
- for (const sym of symbols)
84
+ * with the help of async function call from Chem (RdKit module)
85
+ * @param {string[]} monomerSequencesArray - Jagged array of monomer symbols for the dataframe
86
+ * @param {IMonomerLib} monomerLib - Monomer library
87
+ * @param {HELM_POLYMER_TYPE} polymerType - Polymer type
88
+ * @param {ALPHABET} alphabet - Alphabet
89
+ * @return {Map<string, MolGraph>} - Mapping of monomer symbols to MolGraph objects*/
90
+ export async function getMonomersDictFromLib(monomerSequencesArray, monomerLib, polymerType, alphabet) {
91
+ // todo: exception - no gaps, no empty string monomers
92
+ const formattedMonomerLib = getFormattedMonomerLib(monomerLib, polymerType, alphabet);
93
+ const monomersDict = new Map();
94
+ const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
95
+ const pointerToBranchAngle = {
96
+ value: null
97
+ };
98
+ // this must NOT be placed after translating monomer sequences
99
+ // because adding branch monomers for nucleobases relies on these data
100
+ if (polymerType === "RNA" /* HELM_POLYMER_TYPE.RNA */) {
101
+ const symbols = (alphabet === "RNA" /* ALPHABET.RNA */) ?
102
+ [C.RIBOSE, C.PHOSPHATE] : [C.DEOXYRIBOSE, C.PHOSPHATE];
103
+ for (const sym of symbols)
104
+ addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
105
+ }
106
+ for (let rowI = 0; rowI < monomerSequencesArray.length; ++rowI) {
107
+ const monomerSeq = monomerSequencesArray[rowI];
108
+ for (const sym of monomerSeq) {
109
+ if (sym === '')
110
+ continue; // Skip gap/empty monomer for MSA
111
+ try {
166
112
  addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
167
- }
168
- for (let rowI = 0; rowI < monomerSequencesArray.length; ++rowI) {
169
- const monomerSeq = monomerSequencesArray[rowI];
170
- for (const sym of monomerSeq) {
171
- if (sym === '')
172
- continue; // Skip gap/empty monomer for MSA
173
- try {
174
- addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
175
- }
176
- catch (err) {
177
- const errTxt = errorToConsole(err);
178
- console.error(`bio lib: getMonomersDictFromLib() sym='${sym}', error:\n` + errTxt);
179
- const errMsg = `Сan't get monomer '${sym}' from library: ${errTxt}`; // Text for Datagrok error baloon
180
- throw new Error(errMsg);
181
- }
113
+ }
114
+ catch (err) {
115
+ const errTxt = errorToConsole(err);
116
+ console.error(`bio lib: getMonomersDictFromLib() sym='${sym}', error:\n` + errTxt);
117
+ const errMsg = `Сan't get monomer '${sym}' from library: ${errTxt}`; // Text for Datagrok error baloon
118
+ throw new Error(errMsg);
182
119
  }
183
120
  }
184
- return monomersDict;
185
- });
186
- }
187
- function getAngleBetweenSugarBranchAndOY(molGraph) {
188
- const x = molGraph.atoms.x;
189
- const y = molGraph.atoms.y;
190
- const rNode = molGraph.meta.rNodes[2] - 1;
191
- const terminalNode = molGraph.meta.terminalNodes[2] - 1;
192
- const xShift = x[rNode] - x[terminalNode];
193
- const yShift = y[rNode] - y[terminalNode];
194
- return Math.atan(yShift / xShift) + Math.PI / 2;
121
+ }
122
+ return monomersDict;
195
123
  }
196
- /** Adds MolGraph object for 'sym' to the monomers dict when necessary */
124
+ /** Adds MolGraph object for 'sym' to the monomers dict when necessary
125
+ * @param {Map<string, MolGraph>} monomersDict - Monomers dictionary
126
+ * @param {string} sym - Monomer symbol
127
+ * @param {Map<string, any>} formattedMonomerLib - Formatted monomer library
128
+ * @param {any} moduleRdkit - RDKit module
129
+ * @param {HELM_POLYMER_TYPE} polymerType - Polymer type
130
+ * @param {NumberWrapper} pointerToBranchAngle - Pointer to branch angle*/
197
131
  function addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle) {
198
132
  if (!monomersDict.has(sym)) {
199
133
  const monomerData = getMolGraph(sym, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle);
@@ -205,7 +139,13 @@ function addMonomerToDict(monomersDict, sym, formattedMonomerLib, moduleRdkit, p
205
139
  }
206
140
  }
207
141
  /** Construct the MolGraph object for specified monomerSymbol: the associated
208
- * graph is adjusted in XY plane and filled with default R-groups */
142
+ * graph is adjusted in XY plane and filled with default R-groups
143
+ * @param {string} monomerSymbol - Monomer symbol
144
+ * @param {Map<string, any>} formattedMonomerLib - Formatted monomer library
145
+ * @param {any} moduleRdkit - RDKit module
146
+ * @param {HELM_POLYMER_TYPE} polymerType - Polymer type
147
+ * @param {NumberWrapper} pointerToBranchAngle - Pointer to branch angle
148
+ * @return {MolGraph | null} - MolGraph object or null if monomerSymbol is absent in the library*/
209
149
  function getMolGraph(monomerSymbol, formattedMonomerLib, moduleRdkit, polymerType, pointerToBranchAngle) {
210
150
  if (!formattedMonomerLib.has(monomerSymbol)) {
211
151
  return null;
@@ -224,9 +164,9 @@ function getMolGraph(monomerSymbol, formattedMonomerLib, moduleRdkit, polymerTyp
224
164
  adjustPeptideMonomerGraph(monomerGraph);
225
165
  }
226
166
  else { // nucleotides
227
- if (monomerSymbol === RIBOSE || monomerSymbol === DEOXYRIBOSE)
167
+ if (monomerSymbol === C.RIBOSE || monomerSymbol === C.DEOXYRIBOSE)
228
168
  adjustSugarMonomerGraph(monomerGraph, pointerToBranchAngle);
229
- else if (monomerSymbol === PHOSPHATE)
169
+ else if (monomerSymbol === C.PHOSPHATE)
230
170
  adjustPhosphateMonomerGraph(monomerGraph);
231
171
  else
232
172
  adjustBaseMonomerGraph(monomerGraph, pointerToBranchAngle);
@@ -244,7 +184,7 @@ function setShiftsAndTerminalNodes(polymerType, monomerGraph, monomerSymbol) {
244
184
  removeNodeAndBonds(monomerGraph, monomerGraph.meta.rNodes[1]);
245
185
  }
246
186
  else { // nucleotides
247
- if (monomerSymbol === RIBOSE || monomerSymbol === DEOXYRIBOSE) {
187
+ if (monomerSymbol === C.RIBOSE || monomerSymbol === C.DEOXYRIBOSE) {
248
188
  // remove R2
249
189
  removeNodeAndBonds(monomerGraph, monomerGraph.meta.rNodes[1]);
250
190
  // set terminalNode2 (oxygen) as new R2
@@ -258,7 +198,7 @@ function setShiftsAndTerminalNodes(polymerType, monomerGraph, monomerSymbol) {
258
198
  // remove the branching r-group
259
199
  removeNodeAndBonds(monomerGraph, monomerGraph.meta.rNodes[2]);
260
200
  }
261
- else if (monomerSymbol === PHOSPHATE) {
201
+ else if (monomerSymbol === C.PHOSPHATE) {
262
202
  monomerGraph.meta.terminalNodes[0] = monomerGraph.meta.rNodes[0];
263
203
  shiftCoordinates(monomerGraph, -monomerGraph.atoms.x[monomerGraph.meta.terminalNodes[0] - 1], -monomerGraph.atoms.y[monomerGraph.meta.terminalNodes[0] - 1]);
264
204
  setShifts(monomerGraph, polymerType);
@@ -269,7 +209,13 @@ function setShiftsAndTerminalNodes(polymerType, monomerGraph, monomerSymbol) {
269
209
  }
270
210
  }
271
211
  }
272
- // todo: sdoc
212
+ /**
213
+ * Get monomer metadata object
214
+ * @param {Atoms} atoms - Atoms object
215
+ * @param {Bonds} bonds - Bonds object
216
+ * @param {string[]} capGroups - Cap groups
217
+ * @param {Map<number, number>} capGroupIdxMap - Cap group index map
218
+ * @return {MonomerMetadata}*/
273
219
  function getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap) {
274
220
  const meta = {
275
221
  backboneShift: null,
@@ -282,8 +228,9 @@ function getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap) {
282
228
  setTerminalNodes(bonds, meta);
283
229
  return meta;
284
230
  }
285
- /** Parse element symbols for R-groups from the HELM monomer library R-groups
286
- * field */
231
+ /** Parse element symbols for R-groups from the HELM monomer library R-group field
232
+ * @param {any[]} rGroupObjList - R-group object list
233
+ * @return {Map<number, number>} - Cap group index map*/
287
234
  export function parseCapGroups(rGroupObjList) {
288
235
  // specifically for HELMCoreLibrary
289
236
  // considered only monoatomic rgroups
@@ -299,7 +246,10 @@ export function parseCapGroups(rGroupObjList) {
299
246
  }
300
247
  return capGroupsArray;
301
248
  }
302
- /** Substitute the cap group elements instead of R# */
249
+ /** Substitute the cap group elements instead of R#
250
+ * @param {Atoms} atoms - Atoms object
251
+ * @param {string[]} capGroups - Cap groups
252
+ * @param {Map<number, number>} capGroupIdxMap - Cap group index map*/
303
253
  function substituteCapGroups(atoms, capGroups, capGroupIdxMap) {
304
254
  for (const [node, capIdx] of capGroupIdxMap)
305
255
  atoms.atomTypes[node - 1] = capGroups[capIdx - 1]; // -1 because molfile indexing starts from 1
@@ -341,7 +291,9 @@ function setTerminalNodes(bonds, meta) {
341
291
  ++i;
342
292
  }
343
293
  }
344
- /** Sets shifts in 'meta' attribute of MolGraph */
294
+ /** Sets shifts in 'meta' attribute of MolGraph
295
+ * @param {MolGraph} molGraph - MolGraph object
296
+ * @param {HELM_POLYMER_TYPE} polymerType - Polymer type*/
345
297
  function setShifts(molGraph, polymerType) {
346
298
  if (molGraph.meta.rNodes.length > 1) {
347
299
  molGraph.meta.backboneShift = getShiftBetweenNodes(molGraph, molGraph.meta.rNodes[1] - 1, molGraph.meta.terminalNodes[0] - 1);
@@ -350,7 +302,11 @@ function setShifts(molGraph, polymerType) {
350
302
  molGraph.meta.branchShift = getShiftBetweenNodes(molGraph, molGraph.meta.rNodes[2] - 1, molGraph.meta.terminalNodes[0] - 1);
351
303
  }
352
304
  }
353
- /** Returns the pair [xShift, yShift] for specified node indices */
305
+ /** Returns the pair [xShift, yShift] for specified node indices
306
+ * @param {MolGraph} molGraph - MolGraph object
307
+ * @param {number} rightNodeIdx - Right node index
308
+ * @param {number} leftNodeIdx - Left node index
309
+ * @return {number[]} - Shift between nodes*/
354
310
  function getShiftBetweenNodes(molGraph, rightNodeIdx, leftNodeIdx) {
355
311
  return [
356
312
  keepPrecision(molGraph.atoms.x[rightNodeIdx] -
@@ -360,35 +316,43 @@ function getShiftBetweenNodes(molGraph, rightNodeIdx, leftNodeIdx) {
360
316
  ];
361
317
  }
362
318
  /** Helper function necessary to build a correct V3000 molfile out of V2000 with
363
- * specified r-groups*/
319
+ * specified r-groups
320
+ * @param {string} molfileV2K - V2000 molfile
321
+ * @return {string} - V2000 molfile without R-group lines*/
364
322
  function removeRGroupLines(molfileV2K) {
365
- let begin = molfileV2K.indexOf(V2K_A_LINE, 0);
323
+ let begin = molfileV2K.indexOf(C.V2K_A_LINE, 0);
366
324
  if (begin === -1)
367
- begin = molfileV2K.indexOf(V2K_RGP_LINE);
368
- const end = molfileV2K.indexOf(V3K_END, begin);
325
+ begin = molfileV2K.indexOf(C.V2K_RGP_LINE);
326
+ const end = molfileV2K.indexOf(C.V3K_END, begin);
369
327
  return molfileV2K.substring(0, begin) + molfileV2K.substring(end);
370
328
  }
371
- /** V2000 to V3000 converter */
372
- function convertMolfileToV3K(molfileV2K, moduleRdkit) {
329
+ /** V2000 to V3000 converter
330
+ * @param {string} molfileV2K - V2000 molfile
331
+ * @param {any} moduleRdkit - RDKit module
332
+ * @return {string} - V3000 molfile*/
333
+ export function convertMolfileToV3K(molfileV2K, moduleRdkit) {
373
334
  // The standard Chem converter is not used here because it relies on creation of moduleRdkit on each iteration
374
335
  const molObj = moduleRdkit.get_mol(molfileV2K);
375
336
  const molfileV3K = molObj.get_v3Kmolblock();
376
337
  molObj.delete();
377
338
  return molfileV3K;
378
339
  }
379
- /** Parse V3000 bond block and construct the Bonds object */
380
- function parseBondBlock(molfileV3K, bondCount) {
340
+ /** Parse V3000 bond block and construct the Bonds object
341
+ * @param {string} molfileV3K - V3000 molfile
342
+ * @param {number} bondCount - Number of bonds
343
+ * @return {Bonds} - Bonds object*/
344
+ export function parseBondBlock(molfileV3K, bondCount) {
381
345
  const bondTypes = new Uint32Array(bondCount);
382
346
  const atomPairs = new Array(bondCount);
383
347
  const bondConfiguration = new Map();
384
348
  const kwargs = new Map();
385
- let begin = molfileV3K.indexOf(V3K_BEGIN_BOND_BLOCK);
349
+ let begin = molfileV3K.indexOf(C.V3K_BEGIN_BOND_BLOCK);
386
350
  begin = molfileV3K.indexOf('\n', begin);
387
351
  let end = begin;
388
352
  for (let i = 0; i < bondCount; ++i) {
389
353
  // parse bond type and atom pair
390
354
  const parsedValues = new Array(3);
391
- begin = molfileV3K.indexOf(V3K_BEGIN_DATA_LINE, end) + V3K_IDX_SHIFT;
355
+ begin = molfileV3K.indexOf(C.V3K_BEGIN_DATA_LINE, end) + C.V3K_IDX_SHIFT;
392
356
  end = molfileV3K.indexOf(' ', begin);
393
357
  for (let k = 0; k < 3; ++k) {
394
358
  begin = end + 1;
@@ -400,7 +364,7 @@ function parseBondBlock(molfileV3K, bondCount) {
400
364
  // parse keyword arguments
401
365
  const endOfLine = molfileV3K.indexOf('\n', begin);
402
366
  let lineRemainder = molfileV3K.slice(end, endOfLine);
403
- let beginCfg = lineRemainder.indexOf(V3K_BOND_CONFIG);
367
+ let beginCfg = lineRemainder.indexOf(C.V3K_BOND_CONFIG);
404
368
  if (beginCfg !== -1) {
405
369
  beginCfg = lineRemainder.indexOf('=', beginCfg) + 1;
406
370
  let endCfg = lineRemainder.indexOf(' ', beginCfg);
@@ -408,7 +372,7 @@ function parseBondBlock(molfileV3K, bondCount) {
408
372
  endCfg = lineRemainder.length;
409
373
  const bondConfig = parseInt(lineRemainder.slice(beginCfg, endCfg));
410
374
  bondConfiguration.set(i, bondConfig);
411
- const removedSubstring = V3K_BOND_CONFIG + bondConfig.toString();
375
+ const removedSubstring = C.V3K_BOND_CONFIG + bondConfig.toString();
412
376
  lineRemainder = lineRemainder.replace(removedSubstring, '');
413
377
  }
414
378
  if (!lineRemainder)
@@ -422,12 +386,14 @@ function parseBondBlock(molfileV3K, bondCount) {
422
386
  };
423
387
  }
424
388
  /** Constructs mapping of r-group nodes to default capGroups, all numeration starting from 1.
425
- * According to https://pubs.acs.org/doi/10.1021/ci3001925, R1 and R2 are the chain extending attachment points,
426
- * while R3 is the branching attachment point. */
427
- function parseCapGroupIdxMap(molfileV2K) {
389
+ * According to https://pubs.acs.org/doi/10.1021/ci3001925, R1 and R2 are the chain extending attachment points,
390
+ * while R3 is the branching attachment point.
391
+ * @param {string} molfileV2K - V2000 molfile
392
+ * @return {Map<number, number>} - Map of r-group nodes to default capGroups*/
393
+ export function parseCapGroupIdxMap(molfileV2K) {
428
394
  const capGroupIdxMap = new Map();
429
395
  // parse A-lines (RNA)
430
- let begin = molfileV2K.indexOf(V2K_A_LINE, 0);
396
+ let begin = molfileV2K.indexOf(C.V2K_A_LINE, 0);
431
397
  let end = begin;
432
398
  while (begin !== -1) {
433
399
  // parse the rNode to which the cap group is attached
@@ -438,13 +404,13 @@ function parseCapGroupIdxMap(molfileV2K) {
438
404
  end = molfileV2K.indexOf('\n', begin);
439
405
  const capGroup = parseInt(molfileV2K.substring(begin, end).replace(/^R/, ''));
440
406
  capGroupIdxMap.set(rNode, capGroup);
441
- begin = molfileV2K.indexOf(V2K_A_LINE, end);
407
+ begin = molfileV2K.indexOf(C.V2K_A_LINE, end);
442
408
  }
443
409
  // parse RGP lines (may be more than one in RNA monomers)
444
- begin = molfileV2K.indexOf(V2K_RGP_LINE, 0);
410
+ begin = molfileV2K.indexOf(C.V2K_RGP_LINE, 0);
445
411
  end = molfileV2K.indexOf('\n', begin);
446
412
  while (begin !== -1) {
447
- begin += V2K_RGP_SHIFT;
413
+ begin += C.V2K_RGP_SHIFT;
448
414
  end = molfileV2K.indexOf('\n', begin);
449
415
  const rgpStringParsed = molfileV2K.substring(begin, end)
450
416
  .replaceAll(/\s+/g, ' ')
@@ -458,14 +424,14 @@ function parseCapGroupIdxMap(molfileV2K) {
458
424
  else
459
425
  capGroupIdxMap.set(rgpIndicesArray[i], rgpIndicesArray[i + 1]);
460
426
  }
461
- begin = molfileV2K.indexOf(V2K_RGP_LINE, end);
427
+ begin = molfileV2K.indexOf(C.V2K_RGP_LINE, end);
462
428
  }
463
429
  return capGroupIdxMap;
464
430
  }
465
- function parseAtomAndBondCounts(molfileV3K) {
431
+ export function parseAtomAndBondCounts(molfileV3K) {
466
432
  molfileV3K = molfileV3K.replaceAll('\r', ''); // to handle old and new sdf standards
467
433
  // parse atom count
468
- let begin = molfileV3K.indexOf(V3K_BEGIN_COUNTS_LINE) + V3K_COUNTS_SHIFT;
434
+ let begin = molfileV3K.indexOf(C.V3K_BEGIN_COUNTS_LINE) + C.V3K_COUNTS_SHIFT;
469
435
  let end = molfileV3K.indexOf(' ', begin + 1);
470
436
  const numOfAtoms = parseInt(molfileV3K.substring(begin, end));
471
437
  // parse bond count
@@ -475,17 +441,20 @@ function parseAtomAndBondCounts(molfileV3K) {
475
441
  return { atomCount: numOfAtoms, bondCount: numOfBonds };
476
442
  }
477
443
  /** Parse V3000 atom block and return Atoms object. NOTICE: only atomTypes, x, y
478
- * and kwargs fields are set in the return value, with other fields dummy */
444
+ * and kwargs fields are set in the return value, with other fields dummy initialized.
445
+ * @param {string} molfileV3K - V3000 molfile
446
+ * @param {number} atomCount - number of atoms in the molecule
447
+ * @return {Atoms} - Atoms object */
479
448
  function parseAtomBlock(molfileV3K, atomCount) {
480
449
  const atomTypes = new Array(atomCount);
481
450
  const x = new Float32Array(atomCount);
482
451
  const y = new Float32Array(atomCount);
483
452
  const kwargs = new Array(atomCount);
484
- let begin = molfileV3K.indexOf(V3K_BEGIN_ATOM_BLOCK); // V3000 atoms block
453
+ let begin = molfileV3K.indexOf(C.V3K_BEGIN_ATOM_BLOCK); // V3000 atoms block
485
454
  begin = molfileV3K.indexOf('\n', begin);
486
455
  let end = begin;
487
456
  for (let i = 0; i < atomCount; i++) {
488
- begin = molfileV3K.indexOf(V3K_BEGIN_DATA_LINE, begin) + V3K_IDX_SHIFT;
457
+ begin = molfileV3K.indexOf(C.V3K_BEGIN_DATA_LINE, begin) + C.V3K_IDX_SHIFT;
489
458
  end = molfileV3K.indexOf(' ', begin); // skip the idx row
490
459
  // parse atom type
491
460
  begin = end + 1;
@@ -513,11 +482,12 @@ function parseAtomBlock(molfileV3K, atomCount) {
513
482
  kwargs: kwargs,
514
483
  };
515
484
  }
516
- /** Remove hydrogen nodes */
485
+ /** Remove hydrogen nodes
486
+ * @param {MolGraph} monomerGraph - monomer graph*/
517
487
  function removeHydrogen(monomerGraph) {
518
488
  let i = 0;
519
489
  while (i < monomerGraph.atoms.atomTypes.length) {
520
- if (monomerGraph.atoms.atomTypes[i] === HYDROGEN) {
490
+ if (monomerGraph.atoms.atomTypes[i] === C.HYDROGEN) {
521
491
  removeNodeAndBonds(monomerGraph, i + 1); // i + 1 because molfile node indexing starts from 1
522
492
  --i;
523
493
  // monomerGraph.atoms.atomTypes[i] = 'Li';
@@ -526,7 +496,9 @@ function removeHydrogen(monomerGraph) {
526
496
  }
527
497
  }
528
498
  /** Remove node 'removedNode' and the associated bonds. Notice, numeration of
529
- * nodes in molfiles starts from 1, not 0 */
499
+ * nodes in molfiles starts from 1, not 0
500
+ * @param {MolGraph} monomerGraph - monomer graph
501
+ * @param {number} removedNode - node to be removed*/
530
502
  function removeNodeAndBonds(monomerGraph, removedNode) {
531
503
  if (typeof removedNode !== 'undefined') {
532
504
  const removedNodeIdx = removedNode - 1;
@@ -604,7 +576,8 @@ function spliceTypedArray(TConstructor, typedArray, start, count) {
604
576
  }
605
577
  return result;
606
578
  }
607
- /** Adjust the peptide MolGraph to default/standardized position */
579
+ /** Adjust the peptide MolGraph to default/standardized position
580
+ * @param {MolGraph} monomer - monomer graph*/
608
581
  function adjustPeptideMonomerGraph(monomer) {
609
582
  const centeredNode = monomer.meta.terminalNodes[0] - 1; // node indexing in molfiles starts from 1
610
583
  const rotatedNode = monomer.meta.rNodes[0] - 1;
@@ -686,10 +659,12 @@ function adjustBaseMonomerGraph(monomer, pointerToBranchAngle) {
686
659
  }
687
660
  }
688
661
  function getEuclideanDistance(p1, p2) {
689
- return keepPrecision(Math.sqrt(Math.pow((p1.x - p2.x), 2) + Math.pow((p1.y - p2.y), 2)));
662
+ return keepPrecision(Math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2));
690
663
  }
691
664
  /** Flip carboxyl group with the radical in a peptide monomer in case the
692
- * carboxyl group is in the lower half-plane */
665
+ * carboxyl group is in the lower half-plane
666
+ * @param {MolGraph}monomer - peptide monomer
667
+ * @param {number}doubleBondedOxygen - index of the double-bonded oxygen atom*/
693
668
  function flipCarboxylAndRadical(monomer, doubleBondedOxygen) {
694
669
  // verify that the carboxyl group is in the lower half-plane
695
670
  if (monomer.atoms.y[monomer.meta.rNodes[1] - 1] < 0 &&
@@ -698,7 +673,10 @@ function flipCarboxylAndRadical(monomer, doubleBondedOxygen) {
698
673
  rotateCenteredGraph(monomer.atoms, -findAngleWithOX(monomer.atoms.x[monomer.meta.terminalNodes[1] - 1], monomer.atoms.y[monomer.meta.terminalNodes[1] - 1]));
699
674
  }
700
675
  }
701
- /** Finds angle between OY and the ray joining origin with (x, y) */
676
+ /** Finds angle between OY and the ray joining origin with (x, y)
677
+ * @param {number}x
678
+ * @param {number}y
679
+ * @return {number} angle in radians*/
702
680
  function findAngleWithOY(x, y) {
703
681
  let angle;
704
682
  if (x === 0) {
@@ -714,11 +692,17 @@ function findAngleWithOY(x, y) {
714
692
  }
715
693
  return angle;
716
694
  }
717
- /** Finds angle between OX and the ray joining origin with (x, y) */
695
+ /** Finds angle between OX and the ray joining origin with (x, y)
696
+ * @param {number}x
697
+ * @param {number}y
698
+ * @return {number} angle in radians
699
+ */
718
700
  function findAngleWithOX(x, y) {
719
701
  return findAngleWithOY(x, y) + Math.PI / 2;
720
702
  }
721
- /** Rotate the graph around the origin by 'angle' */
703
+ /** Rotate the graph around the origin by 'angle'
704
+ * @param {Atoms}atoms - atoms of the graph
705
+ * @param {number}angle - angle in radians*/
722
706
  function rotateCenteredGraph(atoms, angle) {
723
707
  if (angle !== 0) {
724
708
  const x = atoms.x;
@@ -732,15 +716,19 @@ function rotateCenteredGraph(atoms, angle) {
732
716
  }
733
717
  }
734
718
  }
735
- /** Flip monomer graph around OX axis preserving stereometry */
719
+ /** Flip monomer graph around OX axis preserving stereometry
720
+ * @param {MolGraph}monomer - monomer graph*/
736
721
  function flipMonomerAroundOX(monomer) {
737
722
  flipMolGraph(monomer, true);
738
723
  }
739
- /** Flip monomer graph around OY axis preserving stereometry */
724
+ /** Flip monomer graph around OY axis preserving stereometry
725
+ * @param {MolGraph}monomer - monomer graph*/
740
726
  function flipMonomerAroundOY(monomer) {
741
727
  flipMolGraph(monomer, false);
742
728
  }
743
- /** Flip graph around a specified axis: 'true' corresponds to OX, 'false' to OY */
729
+ /** Flip graph around a specified axis: 'true' corresponds to OX, 'false' to OY
730
+ * @param {MolGraph}molGraph - graph to flip
731
+ * @param {boolean}axis - axis to flip around*/
744
732
  function flipMolGraph(molGraph, axis) {
745
733
  if (axis) { // flipping around OX
746
734
  const y = molGraph.atoms.y;
@@ -759,9 +747,20 @@ function flipMolGraph(molGraph, axis) {
759
747
  orientation.set(key, newValue);
760
748
  }
761
749
  }
750
+ function getAngleBetweenSugarBranchAndOY(molGraph) {
751
+ const x = molGraph.atoms.x;
752
+ const y = molGraph.atoms.y;
753
+ const rNode = molGraph.meta.rNodes[2] - 1;
754
+ const terminalNode = molGraph.meta.terminalNodes[2] - 1;
755
+ const xShift = x[rNode] - x[terminalNode];
756
+ const yShift = y[rNode] - y[terminalNode];
757
+ return Math.atan(yShift / xShift) + Math.PI / 2;
758
+ }
762
759
  /** Flips double-bonded 'O' in carbonyl group with 'OH' in order for the monomers
763
- * to have standard representation simplifying their concatenation. The
764
- * monomer must already be adjusted with adjustPeptideMonomerGraph in order for this function to be implemented */
760
+ * to have standard representation simplifying their concatenation. The
761
+ * monomer must already be adjusted with adjustPeptideMonomerGraph in order for this function to be implemented
762
+ * @param {MolGraph}monomer - peptide monomer
763
+ * @param {number}doubleBondedOxygen - index of the double-bonded oxygen atom*/
765
764
  function flipHydroxilGroup(monomer, doubleBondedOxygen) {
766
765
  const x = monomer.atoms.x;
767
766
  // -1 below because indexing of nodes in molfiles starts from 1, unlike arrays
@@ -769,7 +768,9 @@ function flipHydroxilGroup(monomer, doubleBondedOxygen) {
769
768
  swapNodes(monomer, doubleBondedOxygen, monomer.meta.rNodes[1]);
770
769
  }
771
770
  /** Determine the number of node (starting from 1) corresponding to the
772
- * double-bonded oxygen of the carbonyl group */
771
+ * double-bonded oxygen of the carbonyl group
772
+ * @param {MolGraph}monomer - peptide monomer
773
+ * @return {number} index of the double-bonded oxygen atom*/
773
774
  function findDoubleBondedCarbonylOxygen(monomer) {
774
775
  const bondsMap = constructBondsMap(monomer);
775
776
  let doubleBondedOxygen = 0;
@@ -777,13 +778,16 @@ function findDoubleBondedCarbonylOxygen(monomer) {
777
778
  // iterate over the nodes bonded to the carbon and find the double one
778
779
  while (doubleBondedOxygen === 0) {
779
780
  const node = bondsMap.get(monomer.meta.terminalNodes[1])[i];
780
- if (monomer.atoms.atomTypes[node - 1] === OXYGEN && node !== monomer.meta.rNodes[1])
781
+ if (monomer.atoms.atomTypes[node - 1] === C.OXYGEN && node !== monomer.meta.rNodes[1])
781
782
  doubleBondedOxygen = node;
782
783
  i++;
783
784
  }
784
785
  return doubleBondedOxygen;
785
786
  }
786
- /** Swap the Cartesian coordinates of the two specified nodes in MolGraph */
787
+ /** Swap the Cartesian coordinates of the two specified nodes in MolGraph
788
+ * @param {MolGraph}monomer - monomer graph
789
+ * @param {number}nodeOne - index of the first node
790
+ * @param {number}nodeTwo - index of the second node*/
787
791
  function swapNodes(monomer, nodeOne, nodeTwo) {
788
792
  const nodeOneIdx = nodeOne - 1;
789
793
  const nodeTwoIdx = nodeTwo - 1;
@@ -796,23 +800,27 @@ function swapNodes(monomer, nodeOne, nodeTwo) {
796
800
  x[nodeTwoIdx] = tmpX;
797
801
  y[nodeTwoIdx] = tmpY;
798
802
  }
799
- /** Maps a node to the list of nodes bound to it */
803
+ /** Maps a node to the list of nodes bound to it
804
+ * @param {MolGraph}monomer - monomer graph
805
+ * @return {Map<number, Array<number>>} map of nodes to the list of nodes bound to them*/
800
806
  function constructBondsMap(monomer) {
801
- var _a;
802
807
  const map = new Map();
803
808
  for (const atomPairs of monomer.bonds.atomPairs) {
804
809
  for (let i = 0; i < 2; i++) {
805
810
  const key = atomPairs[i];
806
811
  const value = atomPairs[(i + 1) % 2];
807
812
  if (map.has(key))
808
- (_a = map.get(key)) === null || _a === void 0 ? void 0 : _a.push(value);
813
+ map.get(key)?.push(value);
809
814
  else
810
815
  map.set(key, new Array(1).fill(value));
811
816
  }
812
817
  }
813
818
  return map;
814
819
  }
815
- /** Shift molGraph in the XOY plane */
820
+ /** Shift molGraph in the XOY plane
821
+ * @param {MolGraph}molGraph - graph to shift
822
+ * @param {number}xShift - shift along X axis
823
+ * @param {number}yShift - shift along Y axis*/
816
824
  function shiftCoordinates(molGraph, xShift, yShift) {
817
825
  const x = molGraph.atoms.x;
818
826
  const y = molGraph.atoms.y;
@@ -822,245 +830,7 @@ function shiftCoordinates(molGraph, xShift, yShift) {
822
830
  y[i] = keepPrecision(y[i] + yShift);
823
831
  }
824
832
  }
825
- /** Translate a sequence of monomer symbols into Molfile V3000 */
826
- function monomerSeqToMolfile(monomerSeq, monomersDict, alphabet, polymerType) {
827
- if (monomerSeq.length === 0) {
828
- // throw new Error('monomerSeq is empty');
829
- return '';
830
- }
831
- // define atom and bond counts, taking into account the bond type
832
- const getAtomAndBondCounts = getResultingAtomBondCounts;
833
- const { atomCount, bondCount } = getAtomAndBondCounts(monomerSeq, monomersDict, alphabet, polymerType);
834
- // create arrays to store lines of the resulting molfile
835
- const molfileAtomBlock = new Array(atomCount);
836
- const molfileBondBlock = new Array(bondCount);
837
- let addMonomerToMolblock; // todo: types?
838
- let sugar = null;
839
- let phosphate = null;
840
- if (polymerType === "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */) {
841
- addMonomerToMolblock = addAminoAcidToMolblock;
842
- }
843
- else { // nucleotides
844
- addMonomerToMolblock = addNucleotideToMolblock;
845
- sugar = (alphabet === "DNA" /* ALPHABET.DNA */) ? monomersDict.get(DEOXYRIBOSE) : monomersDict.get(RIBOSE);
846
- phosphate = monomersDict.get(PHOSPHATE);
847
- }
848
- const v = {
849
- i: 0,
850
- nodeShift: 0,
851
- bondShift: 0,
852
- backbonePositionShift: new Array(2).fill(0),
853
- branchPositionShift: new Array(2).fill(0),
854
- backboneAttachNode: 0,
855
- branchAttachNode: 0,
856
- flipFactor: 1,
857
- };
858
- const C = {
859
- sugar: sugar,
860
- phosphate: phosphate,
861
- seqLength: monomerSeq.length,
862
- atomCount: atomCount,
863
- bondCount: bondCount,
864
- };
865
- for (v.i = 0; v.i < C.seqLength; ++v.i) {
866
- const monomer = monomersDict.get(monomerSeq[v.i]);
867
- addMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v, C);
868
- }
869
- capResultingMolblock(molfileAtomBlock, molfileBondBlock, v, C);
870
- const molfileCountsLine = V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + V3K_COUNTS_LINE_ENDING;
871
- // todo: possible optimization may be achieved by replacing .join('') with +=
872
- // since counterintuitively joining an array into a new string is reportedly
873
- // slower than using += as below
874
- let result = '';
875
- result += V3K_HEADER_FIRST_LINE;
876
- result += V3K_HEADER_SECOND_LINE;
877
- result += V3K_BEGIN_CTAB_BLOCK;
878
- result += molfileCountsLine;
879
- result += V3K_BEGIN_ATOM_BLOCK;
880
- result += molfileAtomBlock.join('');
881
- result += V3K_END_ATOM_BLOCK;
882
- result += V3K_BEGIN_BOND_BLOCK;
883
- result += molfileBondBlock.join('');
884
- result += V3K_END_BOND_BLOCK;
885
- result += V3K_END_CTAB_BLOCK;
886
- result += V3K_END;
887
- // return molfileParts.join('');
888
- return result;
889
- }
890
- /** Cap the resulting (after sewing up all the monomers) molfile with 'O' */
891
- function capResultingMolblock(molfileAtomBlock, molfileBondBlock, v, C) {
892
- // add terminal oxygen
893
- const atomIdx = v.nodeShift + 1;
894
- molfileAtomBlock[C.atomCount] = V3K_BEGIN_DATA_LINE + atomIdx + ' ' +
895
- OXYGEN + ' ' + keepPrecision(v.backbonePositionShift[0]) + ' ' +
896
- v.flipFactor * keepPrecision(v.backbonePositionShift[1]) + ' ' + '0.000000 0' + '\n';
897
- // add terminal bond
898
- const firstAtom = v.backboneAttachNode;
899
- const secondAtom = atomIdx;
900
- molfileBondBlock[C.bondCount] = V3K_BEGIN_DATA_LINE + v.bondShift + ' ' +
901
- 1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
902
- }
903
- function addAminoAcidToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {
904
- v.flipFactor = Math.pow((-1), (v.i % 2)); // to flip every even monomer over OX
905
- addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v);
906
- }
907
- function addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {
908
- // todo: remove these comments to the docstrings of the corr. functions
909
- // construnct the lines of V3K molfile atom block
910
- fillAtomLines(monomer, molfileAtomBlock, v);
911
- // construct the lines of V3K molfile bond block
912
- fillBondLines(monomer, molfileBondBlock, v);
913
- // peptide bond
914
- fillChainExtendingBond(monomer, molfileBondBlock, v);
915
- // update branch variables if necessary
916
- if (monomer.meta.branchShift !== null && monomer.meta.terminalNodes.length > 2)
917
- updateBranchVariables(monomer, v);
918
- // update loop variables
919
- updateChainExtendingVariables(monomer, v);
920
- }
921
- function addNucleotideToMolblock(nucleobase, molfileAtomBlock, molfileBondBlock, v, C) {
922
- // construnct the lines of V3K molfile atom block corresponding to phosphate
923
- // and sugar
924
- if (v.i === 0) {
925
- addBackboneMonomerToMolblock(C.sugar, molfileAtomBlock, molfileBondBlock, v);
926
- }
927
- else {
928
- for (const monomer of [C.phosphate, C.sugar])
929
- addBackboneMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v);
930
- }
931
- addBranchMonomerToMolblock(nucleobase, molfileAtomBlock, molfileBondBlock, v);
932
- }
933
- function addBranchMonomerToMolblock(monomer, molfileAtomBlock, molfileBondBlock, v) {
934
- fillBranchAtomLines(monomer, molfileAtomBlock, v);
935
- fillBondLines(monomer, molfileBondBlock, v);
936
- fillBackboneToBranchBond(monomer, molfileBondBlock, v);
937
- // C-N bond
938
- const bondIdx = v.bondShift;
939
- const firstAtom = v.branchAttachNode;
940
- const secondAtom = monomer.meta.terminalNodes[0] + v.nodeShift;
941
- molfileBondBlock[bondIdx - 1] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
942
- 1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
943
- // update loop variables
944
- v.bondShift += monomer.bonds.atomPairs.length + 1;
945
- v.nodeShift += monomer.atoms.atomTypes.length;
946
- }
947
- function updateChainExtendingVariables(monomer, v) {
948
- v.backboneAttachNode = v.nodeShift + monomer.meta.terminalNodes[1];
949
- v.bondShift += monomer.bonds.atomPairs.length + 1;
950
- v.nodeShift += monomer.atoms.atomTypes.length;
951
- v.backbonePositionShift[0] += monomer.meta.backboneShift[0]; // todo: non-null check
952
- v.backbonePositionShift[1] += v.flipFactor * monomer.meta.backboneShift[1];
953
- }
954
- function updateBranchVariables(monomer, v) {
955
- v.branchAttachNode = v.nodeShift + monomer.meta.terminalNodes[2];
956
- for (let i = 0; i < 2; ++i)
957
- v.branchPositionShift[i] = v.backbonePositionShift[i] + monomer.meta.branchShift[i];
958
- }
959
- function fillAtomLines(monomer, molfileAtomBlock, v) {
960
- for (let j = 0; j < monomer.atoms.atomTypes.length; ++j) {
961
- const atomIdx = v.nodeShift + j + 1;
962
- molfileAtomBlock[v.nodeShift + j] = V3K_BEGIN_DATA_LINE + atomIdx + ' ' +
963
- monomer.atoms.atomTypes[j] + ' ' +
964
- keepPrecision(v.backbonePositionShift[0] + monomer.atoms.x[j]) + ' ' +
965
- keepPrecision(v.backbonePositionShift[1] + v.flipFactor * monomer.atoms.y[j]) +
966
- ' ' + monomer.atoms.kwargs[j];
967
- }
968
- }
969
- // todo: remove as quickfix
970
- function fillBranchAtomLines(monomer, molfileAtomBlock, v) {
971
- for (let j = 0; j < monomer.atoms.atomTypes.length; ++j) {
972
- const atomIdx = v.nodeShift + j + 1;
973
- molfileAtomBlock[v.nodeShift + j] = V3K_BEGIN_DATA_LINE + atomIdx + ' ' +
974
- monomer.atoms.atomTypes[j] + ' ' +
975
- keepPrecision(v.branchPositionShift[0] + monomer.atoms.x[j]) + ' ' +
976
- keepPrecision(v.branchPositionShift[1] + v.flipFactor * monomer.atoms.y[j]) +
977
- ' ' + monomer.atoms.kwargs[j];
978
- }
979
- }
980
- function fillBondLines(monomer, molfileBondBlock, v) {
981
- // construct the lines of V3K molfile bond block
982
- for (let j = 0; j < monomer.bonds.atomPairs.length; ++j) {
983
- const bondIdx = v.bondShift + j + 1;
984
- const firstAtom = monomer.bonds.atomPairs[j][0] + v.nodeShift;
985
- const secondAtom = monomer.bonds.atomPairs[j][1] + v.nodeShift;
986
- let bondCfg = '';
987
- if (monomer.bonds.bondConfiguration.has(j)) {
988
- // flip orientation when necessary
989
- let orientation = monomer.bonds.bondConfiguration.get(j);
990
- if (v.flipFactor < 0)
991
- orientation = (orientation === 1) ? 3 : 1;
992
- bondCfg = ' CFG=' + orientation;
993
- }
994
- const kwargs = monomer.bonds.kwargs.has(j) ?
995
- ' ' + monomer.bonds.kwargs.get(j) : '';
996
- molfileBondBlock[v.bondShift + j] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
997
- monomer.bonds.bondTypes[j] + ' ' +
998
- firstAtom + ' ' + secondAtom + bondCfg + kwargs + '\n';
999
- }
1000
- }
1001
- function fillChainExtendingBond(monomer, molfileBondBlock, v) {
1002
- if (v.backboneAttachNode !== 0) {
1003
- const bondIdx = v.bondShift;
1004
- const firstAtom = v.backboneAttachNode;
1005
- const secondAtom = monomer.meta.terminalNodes[0] + v.nodeShift;
1006
- molfileBondBlock[v.bondShift - 1] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
1007
- 1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
1008
- }
1009
- }
1010
- // todo: remove
1011
- function fillBackboneToBranchBond(branchMonomer, molfileBondBlock, v) {
1012
- const bondIdx = v.bondShift;
1013
- const firstAtom = v.branchAttachNode;
1014
- const secondAtom = branchMonomer.meta.terminalNodes[0] + v.nodeShift;
1015
- molfileBondBlock[bondIdx - 1] = V3K_BEGIN_DATA_LINE + bondIdx + ' ' +
1016
- 1 + ' ' + firstAtom + ' ' + secondAtom + '\n';
1017
- }
1018
- /** Compute the atom/bond counts for the resulting molfile, depending on the
1019
- * type of polymer (peptide/nucleotide) */
1020
- function getResultingAtomBondCounts(monomerSeq, monomersDict, alphabet, polymerType) {
1021
- let atomCount = 0;
1022
- let bondCount = 0;
1023
- // sum up all the atoms/nodes provided by the sequence
1024
- for (const monomerSymbol of monomerSeq) {
1025
- if (monomerSymbol === '')
1026
- continue; // Skip for gap/empty monomer in MSA
1027
- const monomer = monomersDict.get(monomerSymbol);
1028
- atomCount += monomer.atoms.x.length;
1029
- bondCount += monomer.bonds.bondTypes.length;
1030
- }
1031
- // add extra values depending on the polymer type
1032
- if (polymerType === "PEPTIDE" /* HELM_POLYMER_TYPE.PEPTIDE */) {
1033
- // add the rightmost/terminating cap group 'OH' (i.e. 'O')
1034
- atomCount += 1;
1035
- // add chain-extending bonds (C-NH per each monomer pair and terminal C-OH)
1036
- bondCount += monomerSeq.length;
1037
- }
1038
- else { // nucleotides
1039
- const sugar = (alphabet === "DNA" /* ALPHABET.DNA */) ?
1040
- monomersDict.get(DEOXYRIBOSE) : monomersDict.get(RIBOSE);
1041
- const phosphate = monomersDict.get(PHOSPHATE);
1042
- // add phosphate per each pair of nucleobase symbols
1043
- atomCount += (monomerSeq.length - 1) * phosphate.atoms.x.length;
1044
- // add sugar per each nucleobase symbol
1045
- atomCount += monomerSeq.length * sugar.atoms.x.length;
1046
- // add the leftmost cap group 'OH' (i.e. 'O')
1047
- atomCount += 1;
1048
- // add bonds from phosphate monomers
1049
- bondCount += (monomerSeq.length - 1) * phosphate.bonds.bondTypes.length;
1050
- // add bonds from sugar monomers
1051
- bondCount += monomerSeq.length * sugar.bonds.bondTypes.length;
1052
- // exclude the first chain-extending bond O-P (absent, no 'leftmost' phosphate)
1053
- bondCount -= 1;
1054
- // add chain-extending and branch bonds (O-P, C-O and C-N per each nucleotide)
1055
- bondCount += monomerSeq.length * 3;
1056
- }
1057
- return { atomCount, bondCount };
1058
- }
1059
- /** Keep precision upon floating point operations over atom coordinates */
1060
- function keepPrecision(x) {
1061
- return Math.round(PRECISION_FACTOR * x) / PRECISION_FACTOR;
1062
- }
1063
- function convertMolGraphToMolfileV3K(molGraph) {
833
+ export function convertMolGraphToMolfileV3K(molGraph) {
1064
834
  // counts line
1065
835
  const atomType = molGraph.atoms.atomTypes;
1066
836
  const x = molGraph.atoms.x;
@@ -1073,7 +843,7 @@ function convertMolGraphToMolfileV3K(molGraph) {
1073
843
  const atomCount = atomType.length;
1074
844
  const bondCount = molGraph.bonds.bondTypes.length;
1075
845
  // todo rewrite using constants
1076
- const molfileCountsLine = V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + V3K_COUNTS_LINE_ENDING;
846
+ const molfileCountsLine = C.V3K_BEGIN_COUNTS_LINE + atomCount + ' ' + bondCount + C.V3K_COUNTS_LINE_ENDING;
1077
847
  // atom block
1078
848
  let molfileAtomBlock = '';
1079
849
  for (let i = 0; i < atomCount; ++i) {
@@ -1087,7 +857,7 @@ function convertMolGraphToMolfileV3K(molGraph) {
1087
857
  // formatted[1] = formatted[1].padEnd(V3K_ATOM_COORDINATE_PRECISION, '0');
1088
858
  // coordinate[k] = formatted.join('.');
1089
859
  // }
1090
- const atomLine = V3K_BEGIN_DATA_LINE + atomIdx + ' ' + atomType[i] + ' ' +
860
+ const atomLine = C.V3K_BEGIN_DATA_LINE + atomIdx + ' ' + atomType[i] + ' ' +
1091
861
  coordinate[0] + ' ' + coordinate[1] + ' ' + atomKwargs[i];
1092
862
  molfileAtomBlock += atomLine;
1093
863
  }
@@ -1099,54 +869,54 @@ function convertMolGraphToMolfileV3K(molGraph) {
1099
869
  const secondAtom = atomPair[i][1];
1100
870
  const kwargs = bondKwargs.has(i) ? ' ' + bondKwargs.get(i) : '';
1101
871
  const bondCfg = bondConfig.has(i) ? ' CFG=' + bondConfig.get(i) : '';
1102
- const bondLine = V3K_BEGIN_DATA_LINE + bondIdx + ' ' + bondType[i] + ' ' +
872
+ const bondLine = C.V3K_BEGIN_DATA_LINE + bondIdx + ' ' + bondType[i] + ' ' +
1103
873
  firstAtom + ' ' + secondAtom + bondCfg + kwargs + '\n';
1104
874
  molfileBondBlock += bondLine;
1105
875
  }
1106
876
  const molfileParts = [
1107
- V3K_HEADER_FIRST_LINE,
1108
- V3K_HEADER_SECOND_LINE,
1109
- V3K_BEGIN_CTAB_BLOCK,
877
+ C.V3K_HEADER_FIRST_LINE,
878
+ C.V3K_HEADER_SECOND_LINE,
879
+ C.V3K_BEGIN_CTAB_BLOCK,
1110
880
  molfileCountsLine,
1111
- V3K_BEGIN_ATOM_BLOCK,
881
+ C.V3K_BEGIN_ATOM_BLOCK,
1112
882
  molfileAtomBlock,
1113
- V3K_END_ATOM_BLOCK,
1114
- V3K_BEGIN_BOND_BLOCK,
883
+ C.V3K_END_ATOM_BLOCK,
884
+ C.V3K_BEGIN_BOND_BLOCK,
1115
885
  molfileBondBlock,
1116
- V3K_END_BOND_BLOCK,
1117
- V3K_END_CTAB_BLOCK,
1118
- V3K_END,
886
+ C.V3K_END_BOND_BLOCK,
887
+ C.V3K_END_CTAB_BLOCK,
888
+ C.V3K_END,
1119
889
  ];
1120
890
  const resultingMolfile = molfileParts.join('');
1121
891
  // console.log(resultingMolfile);
1122
892
  return resultingMolfile;
1123
893
  }
1124
- export function getSymbolToCappedMolfileMap(monomersLibList) {
1125
- return __awaiter(this, void 0, void 0, function* () {
1126
- if (DG.Func.find({ package: 'Chem', name: 'getRdKitModule' }).length === 0) {
1127
- grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
1128
- return;
1129
- }
1130
- const symbolToCappedMolfileMap = new Map();
1131
- const moduleRdkit = yield grok.functions.call('Chem:getRdKitModule');
1132
- for (const monomerLibObject of monomersLibList) {
1133
- const monomerSymbol = monomerLibObject["symbol" /* HELM_FIELDS.SYMBOL */];
1134
- const capGroups = parseCapGroups(monomerLibObject["rgroups" /* HELM_FIELDS.RGROUPS */]);
1135
- const capGroupIdxMap = parseCapGroupIdxMap(monomerLibObject["molfile" /* HELM_FIELDS.MOLFILE */]);
1136
- const molfileV3K = convertMolfileToV3K(removeRGroupLines(monomerLibObject["molfile" /* HELM_FIELDS.MOLFILE */]), moduleRdkit);
1137
- const counts = parseAtomAndBondCounts(molfileV3K);
1138
- const atoms = parseAtomBlock(molfileV3K, counts.atomCount);
1139
- const bonds = parseBondBlock(molfileV3K, counts.bondCount);
1140
- const meta = getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap);
1141
- const monomerGraph = { atoms: atoms, bonds: bonds, meta: meta };
1142
- removeHydrogen(monomerGraph);
1143
- const molfile = convertMolGraphToMolfileV3K(monomerGraph);
1144
- symbolToCappedMolfileMap.set(monomerSymbol, molfile);
1145
- }
1146
- return symbolToCappedMolfileMap;
1147
- });
894
+ export async function getSymbolToCappedMolfileMap(monomersLibList) {
895
+ if (DG.Func.find({ package: 'Chem', name: 'getRdKitModule' }).length === 0) {
896
+ grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
897
+ return;
898
+ }
899
+ const symbolToCappedMolfileMap = new Map();
900
+ const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
901
+ for (const monomerLibObject of monomersLibList) {
902
+ const monomerSymbol = monomerLibObject["symbol" /* HELM_FIELDS.SYMBOL */];
903
+ const capGroups = parseCapGroups(monomerLibObject["rgroups" /* HELM_FIELDS.RGROUPS */]);
904
+ const capGroupIdxMap = parseCapGroupIdxMap(monomerLibObject["molfile" /* HELM_FIELDS.MOLFILE */]);
905
+ const molfileV3K = convertMolfileToV3K(removeRGroupLines(monomerLibObject["molfile" /* HELM_FIELDS.MOLFILE */]), moduleRdkit);
906
+ const counts = parseAtomAndBondCounts(molfileV3K);
907
+ const atoms = parseAtomBlock(molfileV3K, counts.atomCount);
908
+ const bonds = parseBondBlock(molfileV3K, counts.bondCount);
909
+ const meta = getMonomerMetadata(atoms, bonds, capGroups, capGroupIdxMap);
910
+ const monomerGraph = { atoms: atoms, bonds: bonds, meta: meta };
911
+ removeHydrogen(monomerGraph);
912
+ const molfile = convertMolGraphToMolfileV3K(monomerGraph);
913
+ symbolToCappedMolfileMap.set(monomerSymbol, molfile);
914
+ }
915
+ return symbolToCappedMolfileMap;
1148
916
  }
1149
- /** Get the V3K molfile corresponding to the capped Monomer (default cap groups) */
917
+ /** Get the V3K molfile corresponding to the capped Monomer (default cap groups)
918
+ * @param {Monomer} monomer
919
+ * @return {string} V3K molfile*/
1150
920
  export function capPeptideMonomer(monomer) {
1151
921
  const funcList = DG.Func.find({ package: 'Chem', name: 'getRdKitModule' });
1152
922
  const moduleRdkit = funcList[0].apply();