@datagrok/bio 2.12.11 → 2.12.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -1
- package/CHANGELOG.md +10 -0
- package/dist/246.js +2 -0
- package/dist/246.js.map +1 -0
- package/dist/42.js +1 -1
- package/dist/42.js.map +1 -1
- package/dist/545.js +3 -0
- package/dist/545.js.map +1 -0
- package/dist/590.js.map +1 -1
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.LICENSE.txt +0 -8
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +5 -5
- package/dist/package.js.LICENSE.txt +0 -8
- package/dist/package.js.map +1 -1
- package/package.json +7 -6
- package/src/package.ts +2 -2
- package/src/tests/renderers-test.ts +47 -1
- package/src/utils/cell-renderer.ts +28 -3
- package/src/utils/helm-to-molfile/converter/connection-list.ts +40 -0
- package/src/utils/helm-to-molfile/converter/const.ts +4 -0
- package/src/utils/helm-to-molfile/converter/converter.ts +124 -0
- package/src/utils/helm-to-molfile/converter/helm.ts +112 -0
- package/src/utils/helm-to-molfile/converter/index.ts +1 -0
- package/src/utils/helm-to-molfile/converter/mol-atoms-v2k.ts +24 -0
- package/src/utils/helm-to-molfile/converter/mol-atoms-v3k.ts +38 -0
- package/src/utils/helm-to-molfile/converter/mol-atoms.ts +44 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds-v2k.ts +26 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds-v3k.ts +30 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +56 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-factory.ts +16 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-old.ts +100 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-v2k.ts +21 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-v3k.ts +21 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +79 -0
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +103 -0
- package/src/utils/helm-to-molfile/converter/polymer.ts +99 -0
- package/src/utils/helm-to-molfile/converter/position-handler.ts +23 -0
- package/src/utils/helm-to-molfile/converter/r-group-handler.ts +122 -0
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +89 -0
- package/src/utils/helm-to-molfile/converter/types.ts +12 -0
- package/src/utils/helm-to-molfile/utils.ts +32 -0
- package/src/utils/poly-tool/const.ts +0 -4
- package/src/utils/poly-tool/transformation.ts +126 -62
- package/src/utils/sequence-to-mol.ts +1 -1
- package/webpack.config.js +4 -3
- package/dist/709.js +0 -2
- package/dist/709.js.map +0 -1
- package/dist/777.js +0 -3
- package/dist/777.js.map +0 -1
- package/link-bio +0 -7
- package/setup +0 -52
- package/src/utils/atomic-works.ts +0 -367
- package/src/utils/helm-to-molfile.ts +0 -959
- /package/dist/{777.js.LICENSE.txt → 545.js.LICENSE.txt} +0 -0
|
@@ -1,959 +0,0 @@
|
|
|
1
|
-
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
-
import * as grok from 'datagrok-api/grok';
|
|
3
|
-
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as OCL from 'openchemlib/full';
|
|
5
|
-
|
|
6
|
-
import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
|
|
7
|
-
import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
|
|
8
|
-
import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
9
|
-
import {HELM_POLYMER_TYPE, HELM_RGROUP_FIELDS} from '@datagrok-libraries/bio/src/utils/const';
|
|
10
|
-
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
11
|
-
|
|
12
|
-
import {MonomerLibManager} from './monomer-lib/lib-manager';
|
|
13
|
-
|
|
14
|
-
import {_package} from '../package';
|
|
15
|
-
|
|
16
|
-
const enum V2K_CONST {
|
|
17
|
-
MAX_ATOM_COUNT = 999,
|
|
18
|
-
RGP_LINE_START = 'M RGP',
|
|
19
|
-
ATOM_ALIAS_LINE_START = 'A ',
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
const HELM_SECTION_SEPARATOR = '$';
|
|
23
|
-
const HELM_ITEM_SEPARATOR = '|';
|
|
24
|
-
const R_GROUP_ELEMENT_SYMBOL = 'R#';
|
|
25
|
-
const HYDROGEN_SYMBOL = 'H';
|
|
26
|
-
const enum HELM_MONOMER_TYPE {
|
|
27
|
-
BACKBONE,
|
|
28
|
-
BRANCH,
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
type Bond = {
|
|
32
|
-
/** Global (for complex polymer) or local (for simple polymer) monomer index, starting from 0 */
|
|
33
|
-
monomerIdx: number,
|
|
34
|
-
/** RGroup id, starting from 1 */
|
|
35
|
-
rGroupId: number
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/** Position of a node in the connection list / bond block */
|
|
39
|
-
type PositionInBonds = {
|
|
40
|
-
bondLineIdx: number,
|
|
41
|
-
nodeIdx: number,
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/** Translate HELM column into molfile column and append to the dataframe */
|
|
45
|
-
export async function helm2mol(df: DG.DataFrame, helmCol: DG.Column<string>): Promise<void> {
|
|
46
|
-
const molCol = await getMolColumnFromHelm(df, helmCol);
|
|
47
|
-
df.columns.add(molCol, true);
|
|
48
|
-
await grok.data.detectSemanticTypes(df);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
/** Translate HELM column into molfile column and append to the dataframe */
|
|
53
|
-
export async function getMolColumnFromHelm(
|
|
54
|
-
df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine?: boolean
|
|
55
|
-
): Promise<DG.Column<string>> {
|
|
56
|
-
const converter = new HelmToMolfileConverter(helmCol, df);
|
|
57
|
-
const molCol = await converter.convertToRdKitBeautifiedMolfileColumn(chiralityEngine);
|
|
58
|
-
molCol.semType = DG.SEMTYPE.MOLECULE;
|
|
59
|
-
return molCol;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export async function getSmilesColumnFromHelm(
|
|
63
|
-
df: DG.DataFrame, helmCol: DG.Column<string>
|
|
64
|
-
): Promise<DG.Column<string>> {
|
|
65
|
-
const converter = new HelmToMolfileConverter(helmCol, df);
|
|
66
|
-
const smilesCol = await converter.convertToSmiles();
|
|
67
|
-
smilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
68
|
-
return smilesCol;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
export class HelmToMolfileConverter {
|
|
72
|
-
constructor(private helmColumn: DG.Column<string>, private df: DG.DataFrame) {
|
|
73
|
-
this.helmColumn = helmColumn;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
async convertToSmiles(): Promise<DG.Column<string>> {
|
|
77
|
-
const smiles = await this.getSmilesList();
|
|
78
|
-
const columnName = this.df.columns.getUnusedName(`smiles(${this.helmColumn.name})`);
|
|
79
|
-
return DG.Column.fromStrings(columnName, smiles.map((molecule) => {
|
|
80
|
-
if (molecule === null)
|
|
81
|
-
return '';
|
|
82
|
-
return molecule;
|
|
83
|
-
}));
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
private async getSmilesList(): Promise<string[]> {
|
|
87
|
-
const molfilesV2K = (await this.convertToMolfileV2KColumn()).toList();
|
|
88
|
-
const smiles = molfilesV2K.map((mol) => DG.chem.convert(mol, DG.chem.Notation.MolBlock, DG.chem.Notation.Smiles));
|
|
89
|
-
return smiles;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
async getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string) {
|
|
93
|
-
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
94
|
-
if (mol === null)
|
|
95
|
-
return '';
|
|
96
|
-
const molBlock = mol.get_molblock();
|
|
97
|
-
mol!.delete();
|
|
98
|
-
return molBlock;
|
|
99
|
-
});
|
|
100
|
-
const molv3000Arr = new Array<string>(beautifiedMolV2000.length);
|
|
101
|
-
const chiralityPb = DG.TaskBarProgressIndicator.create(`Handling chirality...`);
|
|
102
|
-
for (let i = 0; i < beautifiedMolV2000.length; i++) {
|
|
103
|
-
const oclMolecule = OCL.Molecule.fromMolfile(beautifiedMolV2000[i]);
|
|
104
|
-
const molV3000 = oclMolecule.toMolfileV3();
|
|
105
|
-
molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
|
|
106
|
-
const progress = i / beautifiedMolV2000.length * 100;
|
|
107
|
-
chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
|
|
108
|
-
}
|
|
109
|
-
chiralityPb.close();
|
|
110
|
-
return DG.Column.fromStrings(columnName, molv3000Arr);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
async convertToRdKitBeautifiedMolfileColumn(chiralityEngine?: boolean): Promise<DG.Column<string>> {
|
|
114
|
-
const smiles = await this.getSmilesList();
|
|
115
|
-
const rdKitModule: RDModule = await grok.functions.call('Chem:getRdKitModule');
|
|
116
|
-
const beautifiedMols = smiles.map((item) =>{
|
|
117
|
-
if (item === '')
|
|
118
|
-
return null;
|
|
119
|
-
const mol = rdKitModule.get_mol(item);
|
|
120
|
-
if (!mol)
|
|
121
|
-
return null;
|
|
122
|
-
mol.normalize_depiction(1);
|
|
123
|
-
mol.straighten_depiction(true);
|
|
124
|
-
return mol;
|
|
125
|
-
});
|
|
126
|
-
const columnName = this.df.columns.getUnusedName(`molfile(${this.helmColumn.name})`);
|
|
127
|
-
|
|
128
|
-
if (chiralityEngine)
|
|
129
|
-
return await this.getMolV3000ViaOCL(beautifiedMols, columnName);
|
|
130
|
-
return DG.Column.fromStrings(columnName, beautifiedMols.map((mol) => {
|
|
131
|
-
if (mol === null)
|
|
132
|
-
return '';
|
|
133
|
-
const molBlock = mol.get_v3Kmolblock();
|
|
134
|
-
mol!.delete();
|
|
135
|
-
return molBlock;
|
|
136
|
-
}));
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
async convertToMolfileV2KColumn(): Promise<DG.Column<string>> {
|
|
140
|
-
const polymerGraphColumn: DG.Column<string> = await this.getPolymerGraphColumn();
|
|
141
|
-
const molfileList = polymerGraphColumn.toList().map(
|
|
142
|
-
(pseudoMolfile: string, idx: number) => {
|
|
143
|
-
const helm = this.helmColumn.get(idx);
|
|
144
|
-
if (!helm)
|
|
145
|
-
return '';
|
|
146
|
-
let result = '';
|
|
147
|
-
try {
|
|
148
|
-
result = this.getPolymerMolfile(helm, pseudoMolfile);
|
|
149
|
-
} catch (err: any) {
|
|
150
|
-
const [errMsg, errStack] = errInfo(err);
|
|
151
|
-
_package.logger.error(errMsg, undefined, errStack);
|
|
152
|
-
} finally {
|
|
153
|
-
return result;
|
|
154
|
-
}
|
|
155
|
-
});
|
|
156
|
-
const molfileColName = this.df.columns.getUnusedName(`molfileV2K(${this.helmColumn.name})`);
|
|
157
|
-
const molfileColumn = DG.Column.fromList('string', molfileColName, molfileList);
|
|
158
|
-
return molfileColumn;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
private async getPolymerGraphColumn(): Promise<DG.Column<string>> {
|
|
162
|
-
const polymerGraphColumn: DG.Column<string> =
|
|
163
|
-
await grok.functions.call('HELM:getMolfiles', {col: this.helmColumn});
|
|
164
|
-
return polymerGraphColumn;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
private getPolymerMolfile(helm: string, polymerGraph: string): string {
|
|
168
|
-
const globalPositionHandler = new GlobalMonomerPositionHandler(polymerGraph);
|
|
169
|
-
const polymer = new Polymer(helm);
|
|
170
|
-
globalPositionHandler.monomerSymbols.forEach((monomerSymbol: string, monomerIdx: number) => {
|
|
171
|
-
const shift = globalPositionHandler.getMonomerShifts(monomerIdx);
|
|
172
|
-
polymer.addMonomer(monomerSymbol, monomerIdx, shift);
|
|
173
|
-
});
|
|
174
|
-
const polymerMolfile = polymer.compileToMolfile();
|
|
175
|
-
return polymerMolfile;
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
class GlobalMonomerPositionHandler {
|
|
180
|
-
constructor(helmCoordinatesPseudoMolfile: string) {
|
|
181
|
-
this.molfileHandler = MolfileHandler.getInstance(helmCoordinatesPseudoMolfile);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
private molfileHandler: MolfileHandlerBase;
|
|
185
|
-
|
|
186
|
-
get monomerSymbols(): string[] {
|
|
187
|
-
return this.molfileHandler.atomTypes;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
getMonomerShifts(monomerIdx: number): {x: number, y: number} {
|
|
191
|
-
const x = this.molfileHandler.x[monomerIdx];
|
|
192
|
-
const y = this.molfileHandler.y[monomerIdx];
|
|
193
|
-
return {x, y};
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
class MonomerWrapper {
|
|
198
|
-
constructor(
|
|
199
|
-
monomerSymbol: string,
|
|
200
|
-
polymerType: HELM_POLYMER_TYPE,
|
|
201
|
-
) {
|
|
202
|
-
const monomerLib = MonomerLibManager.instance.getBioLib();
|
|
203
|
-
const monomer = monomerLib.getMonomer(polymerType, monomerSymbol);
|
|
204
|
-
if (!monomer)
|
|
205
|
-
throw new Error(`Monomer ${monomerSymbol} is not found in the library`);
|
|
206
|
-
this.molfileWrapper = new MolfileWrapper(monomer.molfile, monomerSymbol);
|
|
207
|
-
this.capGroupElements = monomer.rgroups.map((rgroup) => {
|
|
208
|
-
const smiles = rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES] ||
|
|
209
|
-
// WARNING: ignore because both key variants coexist in HELM Core Library!
|
|
210
|
-
// @ts-ignore
|
|
211
|
-
rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE];
|
|
212
|
-
// extract the element symbol
|
|
213
|
-
return smiles.replace(/(\[|\]|\*|:|\d)/g, '');
|
|
214
|
-
});
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
private molfileWrapper: MolfileWrapper;
|
|
218
|
-
private capGroupElements: string[] = [];
|
|
219
|
-
|
|
220
|
-
shiftCoordinates(shift: {x: number, y: number}): void {
|
|
221
|
-
this.molfileWrapper.shiftCoordinates(shift);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
getAtomLines(): string[] {
|
|
225
|
-
return this.molfileWrapper.getAtomLines();
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
getBondLines(): string[] {
|
|
229
|
-
return this.molfileWrapper.getBondLines();
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
removeBondedRGroups(rGroupIds: number[]): void {
|
|
233
|
-
this.molfileWrapper.removeRGroups(rGroupIds);
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
capTrailingRGroups(): void {
|
|
237
|
-
this.molfileWrapper.capRGroups(this.capGroupElements);
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
replaceRGroupWithAttachmentAtom(rGroupId: number, attachmentAtomIdx: number): void {
|
|
241
|
-
this.molfileWrapper.replaceRGroupWithAttachmentAtom(rGroupId, attachmentAtomIdx);
|
|
242
|
-
};
|
|
243
|
-
|
|
244
|
-
getAttachmentAtomByRGroupId(rGroupId: number): number {
|
|
245
|
-
const attachmentAtom = this.molfileWrapper.getAttachmentAtomByRGroupId(rGroupId);
|
|
246
|
-
return attachmentAtom;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
deleteBondLineWithSpecifiedRGroup(rGroupId: number): void {
|
|
250
|
-
this.molfileWrapper.deleteBondLineWithSpecifiedRGroup(rGroupId);
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
shiftBonds(shift: number): void {
|
|
254
|
-
this.molfileWrapper.shiftBonds(shift);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
class RGroupHandler {
|
|
259
|
-
constructor(rGroupLines: string[], private atoms: MolfileAtoms, private bonds: MolfileBonds) {
|
|
260
|
-
this.rGroupIdToAtomicIndexMap = this.getRGroupIdToAtomicIdxMap(rGroupLines);
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
/** Relates R group id (starting from 1) to its atomic index within the
|
|
264
|
-
* molfile */
|
|
265
|
-
rGroupIdToAtomicIndexMap: Map<number, number>;
|
|
266
|
-
|
|
267
|
-
/** Maps R group id (starting from 1) to its position in the bond block */
|
|
268
|
-
private rGroupBondPositionMap = new Map<number, PositionInBonds>();
|
|
269
|
-
|
|
270
|
-
getAtomicIdx(rGroupId: number): number | null {
|
|
271
|
-
const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rGroupId);
|
|
272
|
-
return atomicIdx === undefined ? null : atomicIdx;
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
private removeRGroupsFromAtomBlock(rGroupIds: number[]): void {
|
|
276
|
-
rGroupIds.forEach((rgroupId) => {
|
|
277
|
-
const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rgroupId);
|
|
278
|
-
if (atomicIdx === undefined)
|
|
279
|
-
throw new Error(`Cannot find atomic index for R group ${rgroupId}`);
|
|
280
|
-
});
|
|
281
|
-
|
|
282
|
-
const rGroupAtomicIndices = Array.from(this.rGroupIdToAtomicIndexMap.entries()).filter(
|
|
283
|
-
([rGroupId, _]) => rGroupIds.includes(rGroupId)
|
|
284
|
-
).map(([_, atomicIdx]) => atomicIdx);
|
|
285
|
-
this.atoms.deleteAtoms(rGroupAtomicIndices);
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
removeRGroups(rGroupIds: number[]): void {
|
|
289
|
-
this.removeRGroupsFromAtomBlock(rGroupIds);
|
|
290
|
-
|
|
291
|
-
rGroupIds.forEach((rGroupId) => {
|
|
292
|
-
const dummyPosition = this.replaceRGroupInBondsByDummy(rGroupId);
|
|
293
|
-
this.rGroupBondPositionMap.set(rGroupId, dummyPosition);
|
|
294
|
-
});
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
/** Replace RGroups by -1, update associated maps, and return the position in
|
|
298
|
-
* bond block */
|
|
299
|
-
private replaceRGroupInBondsByDummy(rGroupId: number): PositionInBonds {
|
|
300
|
-
const rGroupAtomicIdx = this.rGroupIdToAtomicIndexMap.get(rGroupId)!;
|
|
301
|
-
|
|
302
|
-
if (this.rGroupBondPositionMap.has(rGroupId))
|
|
303
|
-
throw new Error(`R group ${rGroupId} is already handled`);
|
|
304
|
-
|
|
305
|
-
const positions = this.bonds.getPositionsInBonds(rGroupAtomicIdx + 1);
|
|
306
|
-
if (positions.length === 0)
|
|
307
|
-
throw new Error(`Cannot find position for R group ${rGroupId}`);
|
|
308
|
-
if (positions.length > 1)
|
|
309
|
-
throw new Error(`More than one position for R group ${rGroupId}`);
|
|
310
|
-
|
|
311
|
-
const rGroupPosition = positions[0];
|
|
312
|
-
|
|
313
|
-
this.bonds.replacePositionsInBondsByDummy([rGroupPosition]);
|
|
314
|
-
this.bonds.removeAtomIdFromBonds(rGroupAtomicIdx + 1);
|
|
315
|
-
this.removeRGroupFromAtomicIdxMap(rGroupId, rGroupAtomicIdx);
|
|
316
|
-
|
|
317
|
-
return rGroupPosition;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
private removeRGroupFromAtomicIdxMap(deletedId: number, deletedAtomicIdx: number): void {
|
|
321
|
-
this.rGroupIdToAtomicIndexMap.delete(deletedId);
|
|
322
|
-
for (const [rGroupId, rGroupAtomicIdx] of this.rGroupIdToAtomicIndexMap) {
|
|
323
|
-
if (rGroupAtomicIdx > deletedAtomicIdx)
|
|
324
|
-
this.rGroupIdToAtomicIndexMap.set(rGroupId, rGroupAtomicIdx - 1);
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
private getRGroupIdToAtomicIdxMap(lines: string[]): Map<number, number> {
|
|
329
|
-
function getAtomIdxToRgpIdxList(rgpLine: string): [number, number][] {
|
|
330
|
-
const indices = rgpLine.split(/\s+/).filter((item) => item)
|
|
331
|
-
.slice(3).map((item) => parseInt(item));
|
|
332
|
-
const atomIdxToRgpIdxList = new Array<[number, number]>(indices.length / 2);
|
|
333
|
-
for (let i = 0; i < indices.length; i += 2)
|
|
334
|
-
atomIdxToRgpIdxList[i / 2] = [indices[i + 1], indices[i] - 1];
|
|
335
|
-
return atomIdxToRgpIdxList;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
const map = new Map<number, number>();
|
|
339
|
-
|
|
340
|
-
const rgroupLines = lines.filter((line: string) => line.startsWith(V2K_CONST.RGP_LINE_START));
|
|
341
|
-
rgroupLines.forEach((line: string) => {
|
|
342
|
-
const atomIdxToRgpIdxList = getAtomIdxToRgpIdxList(line);
|
|
343
|
-
for (const [key, value] of atomIdxToRgpIdxList) {
|
|
344
|
-
if (map.has(key))
|
|
345
|
-
throw new Error(`R group ${key} is already in the map`);
|
|
346
|
-
map.set(key, value);
|
|
347
|
-
}
|
|
348
|
-
});
|
|
349
|
-
|
|
350
|
-
const atomAliasLinesIndices = lines.map((line: string, idx: number) => {
|
|
351
|
-
if (line.startsWith(V2K_CONST.ATOM_ALIAS_LINE_START))
|
|
352
|
-
return idx;
|
|
353
|
-
}).filter((idx) => idx !== undefined) as number[];
|
|
354
|
-
const atomAliasLines = atomAliasLinesIndices.map((idx) => lines[idx]);
|
|
355
|
-
const atomAliasTextLines = atomAliasLinesIndices.map((idx) => lines[idx + 1]);
|
|
356
|
-
atomAliasLines.forEach((line: string, idx: number) => {
|
|
357
|
-
const rgpAtomIdx = parseInt(line.split(/\s+/)[1]) - 1;
|
|
358
|
-
const rgpId = parseInt(atomAliasTextLines[idx].substring(1));
|
|
359
|
-
if (map.has(rgpId))
|
|
360
|
-
throw new Error(`R group ${rgpId} is already in the map`);
|
|
361
|
-
map.set(rgpId, rgpAtomIdx);
|
|
362
|
-
});
|
|
363
|
-
|
|
364
|
-
const rGroupAtomicIndices = this.atoms.getRGroupAtomicIndices();
|
|
365
|
-
const unaccounted = rGroupAtomicIndices.filter((idx) => !Array.from(map.values()).includes(idx));
|
|
366
|
-
if (unaccounted.length !== 0)
|
|
367
|
-
throw new Error(`Unaccounted R group indices: ${unaccounted}`);
|
|
368
|
-
|
|
369
|
-
return map;
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
deleteBondLineWithSpecifiedRGroup(rGroupId: number): void {
|
|
373
|
-
const position = this.rGroupBondPositionMap.get(rGroupId);
|
|
374
|
-
if (!position)
|
|
375
|
-
throw new Error(`Cannot find position for R group ${rGroupId}`);
|
|
376
|
-
const {bondLineIdx} = position;
|
|
377
|
-
this.bonds.deleteBondLines([bondLineIdx]);
|
|
378
|
-
this.rGroupBondPositionMap.delete(rGroupId);
|
|
379
|
-
this.rGroupIdToAtomicIndexMap.delete(rGroupId);
|
|
380
|
-
// update values of other positions
|
|
381
|
-
this.rGroupBondPositionMap.forEach((position) => {
|
|
382
|
-
if (position.bondLineIdx > bondLineIdx)
|
|
383
|
-
position.bondLineIdx -= 1;
|
|
384
|
-
});
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
replaceRGroupWithAttachmentAtom(rGroupId: number, externalAtom: number): void {
|
|
388
|
-
const position = this.rGroupBondPositionMap.get(rGroupId);
|
|
389
|
-
if (!position)
|
|
390
|
-
throw new Error(`Cannot find position for R group ${rGroupId}`);
|
|
391
|
-
const {bondLineIdx, nodeIdx} = position;
|
|
392
|
-
this.bonds.bondedAtoms[bondLineIdx][nodeIdx] = externalAtom;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
/** Atom id is molfile id starting from 1 */
|
|
396
|
-
getAttachmentAtomIdByRGroupId(rgroupId: number): number {
|
|
397
|
-
const position = this.rGroupBondPositionMap.get(rgroupId);
|
|
398
|
-
if (!position)
|
|
399
|
-
throw new Error(`Cannot find position for R group ${rgroupId}`);
|
|
400
|
-
const {bondLineIdx, nodeIdx} = position;
|
|
401
|
-
return this.bonds.bondedAtoms[bondLineIdx][(nodeIdx + 1) % 2];
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
/** WARNING: capping RGRoups and deletion of the bonded ones don't commute */
|
|
405
|
-
capRGroups(capGroupElements: string[]): void {
|
|
406
|
-
this.rGroupIdToAtomicIndexMap.forEach((atomicIdx, rGroupId) => {
|
|
407
|
-
const element = capGroupElements[rGroupId - 1];
|
|
408
|
-
if (element === HYDROGEN_SYMBOL) {
|
|
409
|
-
this.removeRGroups([rGroupId]);
|
|
410
|
-
this.deleteBondLineWithSpecifiedRGroup(rGroupId);
|
|
411
|
-
} else {
|
|
412
|
-
this.atoms.replaceElementSymbol(atomicIdx, element);
|
|
413
|
-
}
|
|
414
|
-
});
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
class MolfileBonds {
|
|
419
|
-
constructor(bondLines: string[]) {
|
|
420
|
-
this.rawBondLines = bondLines;
|
|
421
|
-
this.bondedPairs = this.rawBondLines.map((line: string) => {
|
|
422
|
-
const firstAtom = parseInt(line.substring(0, 3));
|
|
423
|
-
const secondAtom = parseInt(line.substring(3, 6));
|
|
424
|
-
return [firstAtom, secondAtom];
|
|
425
|
-
});
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
private bondedPairs: number[][] = [];
|
|
429
|
-
private rawBondLines: string[] = [];
|
|
430
|
-
|
|
431
|
-
/** Get bond lines with new values for bonded atoms */
|
|
432
|
-
getBondLines(): string[] {
|
|
433
|
-
return this.bondedPairs.map((bondedPair, idx) => {
|
|
434
|
-
if (bondedPair.some((atom) => atom === -1))
|
|
435
|
-
throw new Error(`Bonded pair ${bondedPair} contains -1`);
|
|
436
|
-
return `${bondedPair[0].toString().padStart(3, ' ')}${
|
|
437
|
-
bondedPair[1].toString().padStart(3, ' ')
|
|
438
|
-
}${this.rawBondLines[idx].substring(6)}`;
|
|
439
|
-
});
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
get bondedAtoms(): number[][] {
|
|
443
|
-
return this.bondedPairs;
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
deleteBondLines(indices: number[]): void {
|
|
447
|
-
this.rawBondLines = this.rawBondLines.filter((_, idx) => !indices.includes(idx));
|
|
448
|
-
this.bondedPairs = this.bondedPairs.filter((_, idx) => !indices.includes(idx));
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
/** Atom id starts from 1 */
|
|
452
|
-
getPositionsInBonds(atomId: number): PositionInBonds[] {
|
|
453
|
-
const positions: PositionInBonds[] = [];
|
|
454
|
-
this.bondedPairs.forEach((bondedPair, bondLineIdx) => {
|
|
455
|
-
bondedPair.forEach((atom, nodeIdx) => {
|
|
456
|
-
if (atom === atomId)
|
|
457
|
-
positions.push({bondLineIdx, nodeIdx});
|
|
458
|
-
});
|
|
459
|
-
});
|
|
460
|
-
return positions;
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
replacePositionsInBondsByDummy(positions: PositionInBonds[], dummy?: number): void {
|
|
464
|
-
if (dummy === undefined)
|
|
465
|
-
dummy = -1;
|
|
466
|
-
positions.forEach((position) => {
|
|
467
|
-
const {bondLineIdx, nodeIdx} = position;
|
|
468
|
-
this.bondedPairs[bondLineIdx][nodeIdx] = dummy!;
|
|
469
|
-
});
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
removeAtomIdFromBonds(atomId: number): void {
|
|
473
|
-
this.bondedPairs = this.bondedPairs.map((bondedPair) => {
|
|
474
|
-
return bondedPair.map((id) => {
|
|
475
|
-
if (id > atomId)
|
|
476
|
-
return id - 1;
|
|
477
|
-
return id;
|
|
478
|
-
});
|
|
479
|
-
});
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
shift(shift: number): void {
|
|
483
|
-
this.bondedPairs = this.bondedPairs.map((bondedPair) => {
|
|
484
|
-
return bondedPair.map((id) => id + shift);
|
|
485
|
-
});
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
class MolfileAtoms {
|
|
490
|
-
constructor(atomLines: string[]) {
|
|
491
|
-
this.rawAtomLines = atomLines;
|
|
492
|
-
this.coordinates = this.rawAtomLines.map((line: string) => {
|
|
493
|
-
const x = parseFloat(line.substring(0, 10));
|
|
494
|
-
const y = parseFloat(line.substring(10, 20));
|
|
495
|
-
return {x, y};
|
|
496
|
-
});
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
private coordinates: {x: number, y: number}[] = [];
|
|
500
|
-
private rawAtomLines: string[] = [];
|
|
501
|
-
|
|
502
|
-
get atomCoordinates(): {x: number, y: number}[] {
|
|
503
|
-
return this.coordinates;
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
get atomLines(): string[] {
|
|
507
|
-
return this.rawAtomLines.map((line: string, idx: number) => {
|
|
508
|
-
const coordinates = this.coordinates[idx];
|
|
509
|
-
const x = coordinates.x.toFixed(4).padStart(10, ' ');
|
|
510
|
-
const y = coordinates.y.toFixed(4).padStart(10, ' ');
|
|
511
|
-
return `${x}${y}${line.substring(20)}`;
|
|
512
|
-
});
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
replaceElementSymbol(atomIdx: number, newElementSymbol: string): void {
|
|
516
|
-
this.rawAtomLines[atomIdx] = this.rawAtomLines[atomIdx].replace(R_GROUP_ELEMENT_SYMBOL, newElementSymbol);
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
deleteAtoms(indices: number[]): void {
|
|
520
|
-
this.coordinates = this.coordinates.filter((_, idx) => !indices.includes(idx));
|
|
521
|
-
this.rawAtomLines = this.rawAtomLines.filter((_, idx) => !indices.includes(idx));
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
shift(shift: {x: number, y: number}): void {
|
|
525
|
-
this.coordinates = this.coordinates.map((coordinates) => {
|
|
526
|
-
const newX = coordinates.x + shift.x;
|
|
527
|
-
const newY = coordinates.y + shift.y;
|
|
528
|
-
if (isNaN(newX) || isNaN(newY))
|
|
529
|
-
throw new Error(`Cannot shift coordinates by ${shift.x}, ${shift.y}`);
|
|
530
|
-
return {x: newX, y: newY};
|
|
531
|
-
});
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
rotate(angle: number): void {
|
|
535
|
-
this.coordinates = this.coordinates.map((coordinates) => {
|
|
536
|
-
const x = coordinates.x;
|
|
537
|
-
const y = coordinates.y;
|
|
538
|
-
const newX = x * Math.cos(angle) - y * Math.sin(angle);
|
|
539
|
-
const newY = x * Math.sin(angle) + y * Math.cos(angle);
|
|
540
|
-
if (isNaN(newX) || isNaN(newY))
|
|
541
|
-
throw new Error(`Cannot rotate coordinates by ${angle}`);
|
|
542
|
-
return {x: newX, y: newY};
|
|
543
|
-
});
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
getRGroupAtomicIndices(): number[] {
|
|
547
|
-
return this.rawAtomLines.map((line: string, idx: number) => {
|
|
548
|
-
if (line.includes(R_GROUP_ELEMENT_SYMBOL))
|
|
549
|
-
return idx;
|
|
550
|
-
}).filter((idx) => idx !== undefined) as number[];
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
class MolfileWrapper {
|
|
555
|
-
constructor(molfileV2K: string, private monomerSymbol: string) {
|
|
556
|
-
const lines = molfileV2K.split('\n');
|
|
557
|
-
|
|
558
|
-
// TODO: port to consts
|
|
559
|
-
const atomCountIdx = {begin: 0, end: 3};
|
|
560
|
-
const bondCountIdx = {begin: 3, end: 6};
|
|
561
|
-
const countsLineIdx = 3;
|
|
562
|
-
const atomBlockIdx = 4;
|
|
563
|
-
|
|
564
|
-
const atomCount = parseInt(lines[countsLineIdx].substring(atomCountIdx.begin, atomCountIdx.end));
|
|
565
|
-
const bondCount = parseInt(lines[countsLineIdx].substring(bondCountIdx.begin, bondCountIdx.end));
|
|
566
|
-
|
|
567
|
-
const atomLines = lines.slice(atomBlockIdx, atomBlockIdx + atomCount);
|
|
568
|
-
this.atoms = new MolfileAtoms(atomLines);
|
|
569
|
-
|
|
570
|
-
const bondLines = lines.slice(atomBlockIdx + atomCount, atomBlockIdx + atomCount + bondCount);
|
|
571
|
-
this.bonds = new MolfileBonds(bondLines);
|
|
572
|
-
|
|
573
|
-
this.rGroups = new RGroupHandler(lines, this.atoms, this.bonds);
|
|
574
|
-
|
|
575
|
-
this.shiftMonomerToDefaultPosition();
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
private atoms: MolfileAtoms;
|
|
579
|
-
private bonds: MolfileBonds;
|
|
580
|
-
private rGroups: RGroupHandler;
|
|
581
|
-
|
|
582
|
-
deleteBondLineWithSpecifiedRGroup(rGroupId: number): void {
|
|
583
|
-
this.rGroups.deleteBondLineWithSpecifiedRGroup(rGroupId);
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
shiftCoordinates(shift: {x: number, y: number}): void {
|
|
587
|
-
this.atoms.shift(shift);
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
rotateCoordinates(angle: number): void {
|
|
591
|
-
this.atoms.rotate(angle);
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
getBondLines(): string[] {
|
|
595
|
-
return this.bonds.getBondLines();
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
getAtomLines(): string[] {
|
|
599
|
-
return this.atoms.atomLines;
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
removeRGroups(rGroupIds: number[]): void {
|
|
603
|
-
this.rGroups.removeRGroups(rGroupIds);
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
replaceRGroupWithAttachmentAtom(rGroupId: number, externalAtom: number): void {
|
|
607
|
-
this.rGroups.replaceRGroupWithAttachmentAtom(rGroupId, externalAtom);
|
|
608
|
-
}
|
|
609
|
-
|
|
610
|
-
getAttachmentAtomByRGroupId(rgroupId: number): number {
|
|
611
|
-
return this.rGroups.getAttachmentAtomIdByRGroupId(rgroupId);
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
private shiftR1GroupToOrigin(): void {
|
|
615
|
-
const r1Idx = this.rGroups.getAtomicIdx(1);
|
|
616
|
-
if (r1Idx === null)
|
|
617
|
-
throw new Error(`Cannot find R1 group for monomer ${this.monomerSymbol}`);
|
|
618
|
-
const {x, y} = this.atoms.atomCoordinates[r1Idx];
|
|
619
|
-
this.atoms.shift({x: -x, y: -y});
|
|
620
|
-
}
|
|
621
|
-
|
|
622
|
-
private alignR2AlongX(): void {
|
|
623
|
-
const r2Idx = this.rGroups.getAtomicIdx(2);
|
|
624
|
-
if (r2Idx === null)
|
|
625
|
-
throw new Error(`Cannot find R2 group for monomer ${this.monomerSymbol}`);
|
|
626
|
-
const r2Coordinates = this.atoms.atomCoordinates[r2Idx];
|
|
627
|
-
const tan = r2Coordinates.y / r2Coordinates.x;
|
|
628
|
-
const angle = Math.atan(tan);
|
|
629
|
-
if (isNaN(angle))
|
|
630
|
-
throw new Error(`Cannot calculate angle for R2 group for monomer ${this.monomerSymbol}`);
|
|
631
|
-
this.rotateCoordinates(-angle);
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
private shiftMonomerToDefaultPosition(): void {
|
|
635
|
-
this.shiftR1GroupToOrigin();
|
|
636
|
-
const r2Idx = this.rGroups.getAtomicIdx(2);
|
|
637
|
-
if (r2Idx !== null)
|
|
638
|
-
this.alignR2AlongX();
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
shiftBonds(shift: number): void {
|
|
642
|
-
this.bonds.shift(shift);
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
capRGroups(capGroupElements: string[]): void {
|
|
646
|
-
this.rGroups.capRGroups(capGroupElements);
|
|
647
|
-
}
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
/** Wrapper over simple polymer substring of HELM, like RNA1{d(A)p} */
|
|
651
|
-
class SimplePolymer {
|
|
652
|
-
constructor(private simplePolymer: string) {
|
|
653
|
-
this.polymerType = this.getPolymerType();
|
|
654
|
-
this.idx = this.getIdx();
|
|
655
|
-
const {monomers, monomerTypes} = this.getMonomerSymbolsAndTypes();
|
|
656
|
-
this.monomers = monomers;
|
|
657
|
-
this.monomerTypes = monomerTypes;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
readonly polymerType: string;
|
|
661
|
-
readonly monomers: string[];
|
|
662
|
-
private idx: number;
|
|
663
|
-
private monomerTypes: HELM_MONOMER_TYPE[];
|
|
664
|
-
|
|
665
|
-
/** Simple polymer id in the form 'polymer type' + 'index' */
|
|
666
|
-
get id(): string {
|
|
667
|
-
return this.polymerType + this.idx.toString();
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
private getPolymerType(): string {
|
|
671
|
-
const regex = new RegExp(
|
|
672
|
-
`(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA})[0-9]+{`
|
|
673
|
-
);
|
|
674
|
-
const match = this.simplePolymer.match(regex);
|
|
675
|
-
if (!match)
|
|
676
|
-
throw new Error(`Unsupported polymer type in ${this.simplePolymer}`);
|
|
677
|
-
const polymerType = match[1];
|
|
678
|
-
return polymerType;
|
|
679
|
-
}
|
|
680
|
-
|
|
681
|
-
private getIdx(): number {
|
|
682
|
-
const regex = new RegExp(`${this.polymerType}([0-9]+){`);
|
|
683
|
-
const match = this.simplePolymer.match(regex);
|
|
684
|
-
if (!match)
|
|
685
|
-
throw new Error(`Cannot parse simple polymer id from ${this.simplePolymer}`);
|
|
686
|
-
const id = parseInt(match[1]);
|
|
687
|
-
return id;
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
private getMonomerSymbolsAndTypes(): {monomers: string[], monomerTypes: HELM_MONOMER_TYPE[]} {
|
|
691
|
-
const helmWrapperRegex = new RegExp(`${this.polymerType}${this.idx}{|}`, 'g');
|
|
692
|
-
const monomerGroups = this.simplePolymer.replace(helmWrapperRegex, '').split('.');
|
|
693
|
-
const monomerList: string[] = [];
|
|
694
|
-
const monomerTypeList: HELM_MONOMER_TYPE[] = [];
|
|
695
|
-
monomerGroups.forEach((monomerGroup) => {
|
|
696
|
-
const splitted = monomerGroup.split(/\(|\)/)
|
|
697
|
-
.map((el) => el.replace(/[\[\]]/g, ''));
|
|
698
|
-
monomerList.push(...splitted);
|
|
699
|
-
// WARNING: only the groups of the form r(A)p, as in RNA, are supported
|
|
700
|
-
const monomerTypes = splitted.map(
|
|
701
|
-
(_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
|
|
702
|
-
);
|
|
703
|
-
monomerTypeList.push(...monomerTypes);
|
|
704
|
-
});
|
|
705
|
-
return {monomers: monomerList, monomerTypes: monomerTypeList};
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
/** Get list of pairs for bonded monomers, monomers indexed locally
|
|
709
|
-
* (within the simple polymer) */
|
|
710
|
-
getBondData(): Bond[][] {
|
|
711
|
-
const result: Bond[][] = [];
|
|
712
|
-
const backboneMonomerIndices = this.monomerTypes.map((type, idx) => {
|
|
713
|
-
if (type === HELM_MONOMER_TYPE.BACKBONE)
|
|
714
|
-
return idx;
|
|
715
|
-
}
|
|
716
|
-
).filter((idx) => idx !== undefined) as number[];
|
|
717
|
-
const branchMonomerIndices = this.monomerTypes.map((type, idx) => {
|
|
718
|
-
if (type === HELM_MONOMER_TYPE.BRANCH)
|
|
719
|
-
return idx;
|
|
720
|
-
}
|
|
721
|
-
).filter((idx) => idx !== undefined) as number[];
|
|
722
|
-
for (let i = 0; i < backboneMonomerIndices.length - 1; i++) {
|
|
723
|
-
const backboneIdx = backboneMonomerIndices[i];
|
|
724
|
-
const nextBackboneIdx = backboneMonomerIndices[i + 1];
|
|
725
|
-
result.push([{monomerIdx: backboneIdx, rGroupId: 2}, {monomerIdx: nextBackboneIdx, rGroupId: 1}]);
|
|
726
|
-
}
|
|
727
|
-
for (let i = 0; i < branchMonomerIndices.length; i++) {
|
|
728
|
-
const branchIdx = branchMonomerIndices[i];
|
|
729
|
-
const backboneIdx = branchIdx - 1;
|
|
730
|
-
result.push([{monomerIdx: backboneIdx, rGroupId: 3}, {monomerIdx: branchIdx, rGroupId: 1}]);
|
|
731
|
-
}
|
|
732
|
-
return result;
|
|
733
|
-
}
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
class ConnectionList {
|
|
737
|
-
constructor(connectionList: string) {
|
|
738
|
-
const splitted = connectionList.split(HELM_ITEM_SEPARATOR);
|
|
739
|
-
splitted.forEach((connectionItem: string) => this.validateConnectionItem(connectionItem));
|
|
740
|
-
this.connectionItems = splitted;
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
private connectionItems: string[];
|
|
744
|
-
|
|
745
|
-
private validateConnectionItem(connectionItem: string): void {
|
|
746
|
-
const allowedType = `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA})`;
|
|
747
|
-
const regex = new RegExp(`${allowedType}[0-9]+,${allowedType}[0-9]+,[0-9]+:R[0-9]+-[0-9]+:R[0-9]+`, 'g');
|
|
748
|
-
if (!connectionItem.match(regex))
|
|
749
|
-
throw new Error(`Cannot parse connection item from ${connectionItem}`);
|
|
750
|
-
}
|
|
751
|
-
|
|
752
|
-
getConnectionData(): {polymerId: string, bond: Bond}[][] {
|
|
753
|
-
const result: {polymerId: string, bond: Bond}[][] = [];
|
|
754
|
-
this.connectionItems.forEach((connectionItem: string) => {
|
|
755
|
-
const pair: {polymerId: string, bond: Bond}[] = [];
|
|
756
|
-
const splitted = connectionItem.split(',');
|
|
757
|
-
splitted[2].split('-').forEach((item, idx) => {
|
|
758
|
-
const polymerId = splitted[idx];
|
|
759
|
-
const data = item.split(':');
|
|
760
|
-
// WARNING: monomer idx starts from 0
|
|
761
|
-
const monomerIdx = parseInt(data[0]) - 1;
|
|
762
|
-
const rGroupId = parseInt(data[1].slice(1));
|
|
763
|
-
const bondData = {monomerIdx, rGroupId};
|
|
764
|
-
pair.push({polymerId, bond: bondData});
|
|
765
|
-
});
|
|
766
|
-
result.push(pair);
|
|
767
|
-
});
|
|
768
|
-
return result;
|
|
769
|
-
}
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
class Helm {
|
|
773
|
-
constructor(private helm: string) {
|
|
774
|
-
const helmSections = this.helm.split(HELM_SECTION_SEPARATOR);
|
|
775
|
-
const simplePolymers = helmSections[0].split(HELM_ITEM_SEPARATOR);
|
|
776
|
-
this.simplePolymers = simplePolymers
|
|
777
|
-
.map((item) => new SimplePolymer(item));
|
|
778
|
-
if (helmSections[1] !== '')
|
|
779
|
-
this.connectionList = new ConnectionList(helmSections[1]);
|
|
780
|
-
this.bondData = this.getBondData();
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
/** List of pairs for bonded monomers, monomers indexed globally (withing the
|
|
784
|
-
* complex polymer scope) */
|
|
785
|
-
readonly bondData: Bond[][];
|
|
786
|
-
|
|
787
|
-
private simplePolymers: SimplePolymer[];
|
|
788
|
-
private connectionList?: ConnectionList;
|
|
789
|
-
|
|
790
|
-
toString() {
|
|
791
|
-
return this.helm;
|
|
792
|
-
}
|
|
793
|
-
|
|
794
|
-
getPolymerTypeByMonomerIdx(monomerGlobalIdx: number): HELM_POLYMER_TYPE {
|
|
795
|
-
const simplePolymer = this.getSimplePolymerByMonomerIdx(monomerGlobalIdx);
|
|
796
|
-
const polymerType = simplePolymer.polymerType;
|
|
797
|
-
return polymerType as HELM_POLYMER_TYPE;
|
|
798
|
-
}
|
|
799
|
-
|
|
800
|
-
private getSimplePolymerByMonomerIdx(monomerGlobalIdx: number): SimplePolymer {
|
|
801
|
-
const shifts = this.getMonomerIdxShifts();
|
|
802
|
-
const shiftValues = Object.values(shifts);
|
|
803
|
-
const lowerBound = shiftValues.sort((a, b) => a - b).find(
|
|
804
|
-
(shift) => monomerGlobalIdx >= shift
|
|
805
|
-
);
|
|
806
|
-
if (lowerBound === undefined)
|
|
807
|
-
throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
|
|
808
|
-
const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
|
|
809
|
-
const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
|
|
810
|
-
return simplePolymer;
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
private shiftBondMonomerIds(shift: number, bonds: Bond[][]): void {
|
|
814
|
-
bonds.forEach((bond) => {
|
|
815
|
-
bond.forEach((bondPart) => {
|
|
816
|
-
bondPart.monomerIdx += shift;
|
|
817
|
-
});
|
|
818
|
-
});
|
|
819
|
-
}
|
|
820
|
-
|
|
821
|
-
private getMonomerIdxShifts(): {[simplePolymerId: string]: number} {
|
|
822
|
-
const result: {[simplePolymerId: string]: number} = {};
|
|
823
|
-
let shift = 0;
|
|
824
|
-
this.simplePolymers.forEach((simplePolymer) => {
|
|
825
|
-
result[simplePolymer.id] = shift;
|
|
826
|
-
shift += simplePolymer.monomers.length;
|
|
827
|
-
});
|
|
828
|
-
return result;
|
|
829
|
-
}
|
|
830
|
-
|
|
831
|
-
private getBondData(): Bond[][] {
|
|
832
|
-
const shifts = this.getMonomerIdxShifts();
|
|
833
|
-
const result: Bond[][] = [];
|
|
834
|
-
this.simplePolymers.forEach((simplePolymer) => {
|
|
835
|
-
const bondData = simplePolymer.getBondData();
|
|
836
|
-
const shift = shifts[simplePolymer.id];
|
|
837
|
-
this.shiftBondMonomerIds(shift, bondData);
|
|
838
|
-
result.push(...bondData);
|
|
839
|
-
});
|
|
840
|
-
if (this.connectionList) {
|
|
841
|
-
const connectionData = this.connectionList.getConnectionData();
|
|
842
|
-
connectionData.forEach((connection) => {
|
|
843
|
-
const data: Bond[] = [];
|
|
844
|
-
connection.forEach((connectionItem) => {
|
|
845
|
-
const shift = shifts[connectionItem.polymerId];
|
|
846
|
-
const bond = connectionItem.bond;
|
|
847
|
-
bond.monomerIdx += shift;
|
|
848
|
-
data.push(bond);
|
|
849
|
-
});
|
|
850
|
-
result.push(data);
|
|
851
|
-
});
|
|
852
|
-
}
|
|
853
|
-
return result;
|
|
854
|
-
}
|
|
855
|
-
}
|
|
856
|
-
|
|
857
|
-
class Polymer {
|
|
858
|
-
constructor(helm: string) {
|
|
859
|
-
this.helm = new Helm(helm);
|
|
860
|
-
|
|
861
|
-
this.bondedRGroupsMap = new Map<number, number[]>();
|
|
862
|
-
this.helm.bondData.forEach((bond) => {
|
|
863
|
-
bond.forEach((bondPart) => {
|
|
864
|
-
const monomerIdx = bondPart.monomerIdx;
|
|
865
|
-
const rGroupId = bondPart.rGroupId;
|
|
866
|
-
if (!this.bondedRGroupsMap.get(monomerIdx))
|
|
867
|
-
this.bondedRGroupsMap.set(monomerIdx, []);
|
|
868
|
-
this.bondedRGroupsMap.get(monomerIdx)!.push(rGroupId);
|
|
869
|
-
});
|
|
870
|
-
});
|
|
871
|
-
}
|
|
872
|
-
|
|
873
|
-
private monomerWrappers: MonomerWrapper[] = [];
|
|
874
|
-
private helm: Helm;
|
|
875
|
-
/** Maps global monomer index to r-group ids (starting from 1) participating
|
|
876
|
-
* in connection */
|
|
877
|
-
private bondedRGroupsMap: Map<number, number[]>;
|
|
878
|
-
|
|
879
|
-
addMonomer(
|
|
880
|
-
monomerSymbol: string,
|
|
881
|
-
monomerIdx: number,
|
|
882
|
-
shift: {x: number, y: number},
|
|
883
|
-
): void {
|
|
884
|
-
const polymerType = this.helm.getPolymerTypeByMonomerIdx(monomerIdx);
|
|
885
|
-
const monomerWrapper = new MonomerWrapper(monomerSymbol, polymerType);
|
|
886
|
-
monomerWrapper.shiftCoordinates(shift);
|
|
887
|
-
|
|
888
|
-
this.monomerWrappers.push(monomerWrapper);
|
|
889
|
-
}
|
|
890
|
-
|
|
891
|
-
private removeRGroups(): void {
|
|
892
|
-
this.monomerWrappers.forEach((monomerWrapper, monomerIdx) => {
|
|
893
|
-
if (this.bondedRGroupsMap.has(monomerIdx))
|
|
894
|
-
monomerWrapper.removeBondedRGroups(this.bondedRGroupsMap.get(monomerIdx)!);
|
|
895
|
-
monomerWrapper.capTrailingRGroups();
|
|
896
|
-
});
|
|
897
|
-
}
|
|
898
|
-
|
|
899
|
-
private getAtomNumberShifts(): number[] {
|
|
900
|
-
const atomNumberShifts: number[] = [];
|
|
901
|
-
let shift = 0;
|
|
902
|
-
this.monomerWrappers.forEach((monomerWrapper) => {
|
|
903
|
-
atomNumberShifts.push(shift);
|
|
904
|
-
shift += monomerWrapper.getAtomLines().length;
|
|
905
|
-
});
|
|
906
|
-
return atomNumberShifts;
|
|
907
|
-
}
|
|
908
|
-
|
|
909
|
-
private restoreBondsBetweenMonomers(): void {
|
|
910
|
-
this.helm.bondData.forEach((bond) => {
|
|
911
|
-
const monomerIdx = bond.map((bondPart) => bondPart.monomerIdx);
|
|
912
|
-
const rGroupId = bond.map((bondPart) => bondPart.rGroupId);
|
|
913
|
-
const monomer = monomerIdx.map((idx) => this.monomerWrappers[idx]);
|
|
914
|
-
|
|
915
|
-
const attachmentAtom = monomer[1].getAttachmentAtomByRGroupId(rGroupId[1]);
|
|
916
|
-
monomer[0].replaceRGroupWithAttachmentAtom(rGroupId[0], attachmentAtom);
|
|
917
|
-
monomer[1].deleteBondLineWithSpecifiedRGroup(rGroupId[1]);
|
|
918
|
-
});
|
|
919
|
-
}
|
|
920
|
-
|
|
921
|
-
compileToMolfile(): string {
|
|
922
|
-
const molfileHeader = '\nDatagrok\n';
|
|
923
|
-
const atomLines: string[] = [];
|
|
924
|
-
const bondLines: string[] = [];
|
|
925
|
-
|
|
926
|
-
this.removeRGroups();
|
|
927
|
-
|
|
928
|
-
const atomNumberShifts = this.getAtomNumberShifts();
|
|
929
|
-
this.monomerWrappers.forEach((monomerWrapper, idx) => {
|
|
930
|
-
monomerWrapper.shiftBonds(atomNumberShifts[idx]);
|
|
931
|
-
});
|
|
932
|
-
|
|
933
|
-
this.restoreBondsBetweenMonomers();
|
|
934
|
-
|
|
935
|
-
this.monomerWrappers.forEach((monomerWrapper) => {
|
|
936
|
-
atomLines.push(...monomerWrapper.getAtomLines());
|
|
937
|
-
bondLines.push(...monomerWrapper.getBondLines());
|
|
938
|
-
});
|
|
939
|
-
|
|
940
|
-
const atomCount = atomLines.length;
|
|
941
|
-
if (atomCount > V2K_CONST.MAX_ATOM_COUNT) {
|
|
942
|
-
throw new Error(
|
|
943
|
-
`Atom count in polymer ${this.helm.toString()} is ${atomCount} and exceeds ${V2K_CONST.MAX_ATOM_COUNT}`
|
|
944
|
-
);
|
|
945
|
-
}
|
|
946
|
-
|
|
947
|
-
const bondCount = bondLines.length;
|
|
948
|
-
const countsLine = `${
|
|
949
|
-
atomCount.toString().padStart(3, ' ')
|
|
950
|
-
}${
|
|
951
|
-
bondCount.toString().padStart(3, ' ')
|
|
952
|
-
} 0 0 1 0 0 V2000`;
|
|
953
|
-
const molfileEnd = 'M END\n';
|
|
954
|
-
const newLineChar = '\n';
|
|
955
|
-
const blockList = [molfileHeader, countsLine, atomLines.join(newLineChar), bondLines.join(newLineChar), molfileEnd];
|
|
956
|
-
const molfile = blockList.join(newLineChar);
|
|
957
|
-
return molfile;
|
|
958
|
-
}
|
|
959
|
-
}
|