@datagrok/bio 2.12.11 → 2.12.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -1
- package/CHANGELOG.md +10 -0
- package/dist/246.js +2 -0
- package/dist/246.js.map +1 -0
- package/dist/42.js +1 -1
- package/dist/42.js.map +1 -1
- package/dist/545.js +3 -0
- package/dist/545.js.map +1 -0
- package/dist/590.js.map +1 -1
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.LICENSE.txt +0 -8
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +5 -5
- package/dist/package.js.LICENSE.txt +0 -8
- package/dist/package.js.map +1 -1
- package/package.json +7 -6
- package/src/package.ts +2 -2
- package/src/tests/renderers-test.ts +47 -1
- package/src/utils/cell-renderer.ts +28 -3
- package/src/utils/helm-to-molfile/converter/connection-list.ts +40 -0
- package/src/utils/helm-to-molfile/converter/const.ts +4 -0
- package/src/utils/helm-to-molfile/converter/converter.ts +124 -0
- package/src/utils/helm-to-molfile/converter/helm.ts +112 -0
- package/src/utils/helm-to-molfile/converter/index.ts +1 -0
- package/src/utils/helm-to-molfile/converter/mol-atoms-v2k.ts +24 -0
- package/src/utils/helm-to-molfile/converter/mol-atoms-v3k.ts +38 -0
- package/src/utils/helm-to-molfile/converter/mol-atoms.ts +44 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds-v2k.ts +26 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds-v3k.ts +30 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +56 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-factory.ts +16 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-old.ts +100 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-v2k.ts +21 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper-v3k.ts +21 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +79 -0
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +103 -0
- package/src/utils/helm-to-molfile/converter/polymer.ts +99 -0
- package/src/utils/helm-to-molfile/converter/position-handler.ts +23 -0
- package/src/utils/helm-to-molfile/converter/r-group-handler.ts +122 -0
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +89 -0
- package/src/utils/helm-to-molfile/converter/types.ts +12 -0
- package/src/utils/helm-to-molfile/utils.ts +32 -0
- package/src/utils/poly-tool/const.ts +0 -4
- package/src/utils/poly-tool/transformation.ts +126 -62
- package/src/utils/sequence-to-mol.ts +1 -1
- package/webpack.config.js +4 -3
- package/dist/709.js +0 -2
- package/dist/709.js.map +0 -1
- package/dist/777.js +0 -3
- package/dist/777.js.map +0 -1
- package/link-bio +0 -7
- package/setup +0 -52
- package/src/utils/atomic-works.ts +0 -367
- package/src/utils/helm-to-molfile.ts +0 -959
- /package/dist/{777.js.LICENSE.txt → 545.js.LICENSE.txt} +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.12.
|
|
8
|
+
"version": "2.12.13",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,12 +34,12 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.40.
|
|
38
|
-
"@datagrok-libraries/chem-meta": "^1.2.
|
|
39
|
-
"@datagrok-libraries/
|
|
37
|
+
"@datagrok-libraries/bio": "^5.40.8",
|
|
38
|
+
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
39
|
+
"@datagrok-libraries/math": "^1.1.1",
|
|
40
|
+
"@datagrok-libraries/ml": "^6.6.0",
|
|
40
41
|
"@datagrok-libraries/tutorials": "^1.3.12",
|
|
41
42
|
"@datagrok-libraries/utils": "^4.2.0",
|
|
42
|
-
"@datagrok-libraries/math": "^1.0.8",
|
|
43
43
|
"@webgpu/types": "^0.1.40",
|
|
44
44
|
"ajv": "^8.12.0",
|
|
45
45
|
"ajv-errors": "^3.0.0",
|
|
@@ -103,7 +103,8 @@
|
|
|
103
103
|
"All users"
|
|
104
104
|
],
|
|
105
105
|
"sources": [
|
|
106
|
-
"css/helm.css"
|
|
106
|
+
"css/helm.css",
|
|
107
|
+
"common/openchemlib-full.js"
|
|
107
108
|
],
|
|
108
109
|
"category": "Bioinformatics",
|
|
109
110
|
"meta": {
|
package/src/package.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
/*
|
|
1
|
+
/* eslint max-lines: "off" */
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
@@ -486,7 +486,7 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
|
|
|
486
486
|
//meta.supportedSemTypes: Macromolecule
|
|
487
487
|
//meta.supportedTypes: string
|
|
488
488
|
//meta.supportedUnits: fasta,separator,helm
|
|
489
|
-
//meta.supportedDistanceFunctions: Levenshtein,
|
|
489
|
+
//meta.supportedDistanceFunctions: Hamming,Levenshtein,Monomer chemical distance,Needlemann-Wunsch
|
|
490
490
|
//input: column col {semType: Macromolecule}
|
|
491
491
|
//input: string metric
|
|
492
492
|
//input: double gapOpen = 1 {caption: Gap open penalty; default: 1; optional: true}
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
4
|
|
|
4
|
-
import
|
|
5
|
+
import $ from 'cash-dom';
|
|
6
|
+
import {fromEvent} from 'rxjs';
|
|
7
|
+
|
|
8
|
+
import {category, expect, test, delay, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
5
9
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
10
|
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
11
|
|
|
@@ -12,6 +16,8 @@ import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-
|
|
|
12
16
|
import {awaitGrid} from './utils';
|
|
13
17
|
import * as C from '../utils/constants';
|
|
14
18
|
|
|
19
|
+
import {_package} from '../package-test';
|
|
20
|
+
|
|
15
21
|
category('renderers', () => {
|
|
16
22
|
test('long sequence performance ', async () => {
|
|
17
23
|
await performanceTest(generateLongSequence, 'Long sequences');
|
|
@@ -48,6 +54,10 @@ category('renderers', () => {
|
|
|
48
54
|
await _selectRendererBySemType();
|
|
49
55
|
});
|
|
50
56
|
|
|
57
|
+
test('scatterPlotTooltip', async () => {
|
|
58
|
+
await _testScatterPlotTooltip();
|
|
59
|
+
});
|
|
60
|
+
|
|
51
61
|
async function _rendererMacromoleculeFasta() {
|
|
52
62
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
|
53
63
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
@@ -197,4 +207,40 @@ category('renderers', () => {
|
|
|
197
207
|
`view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
|
|
198
208
|
}
|
|
199
209
|
}
|
|
210
|
+
|
|
211
|
+
const seqCoordsCsv = `seq,x,y
|
|
212
|
+
ACGGTGTCGT,0,0
|
|
213
|
+
CGGTATCCCT,1,0
|
|
214
|
+
CTCGGCATGC,2,0
|
|
215
|
+
`;
|
|
216
|
+
|
|
217
|
+
async function _testScatterPlotTooltip(): Promise<void> {
|
|
218
|
+
const df = DG.DataFrame.fromCsv(seqCoordsCsv);
|
|
219
|
+
df.currentRowIdx = 0;
|
|
220
|
+
const view = grok.shell.addTableView(df);
|
|
221
|
+
const sp: DG.ScatterPlotViewer = df.plot.scatter({x: 'x', y: 'y'});
|
|
222
|
+
view.dockManager.dock(sp, DG.DOCK_TYPE.RIGHT, null);
|
|
223
|
+
await Promise.all([
|
|
224
|
+
testEvent(sp.onAfterDrawScene, () => {}, () => { sp.invalidateCanvas(); }, 1000),
|
|
225
|
+
awaitGrid(view.grid, 500)
|
|
226
|
+
]);
|
|
227
|
+
|
|
228
|
+
const spBcr = sp.root.getBoundingClientRect();
|
|
229
|
+
const wp = sp.worldToScreen(1, 0);
|
|
230
|
+
const ev = new MouseEvent('mousemove', {
|
|
231
|
+
cancelable: true, bubbles: true, view: window, button: 0,
|
|
232
|
+
clientX: spBcr.left + wp.x, clientY: spBcr.top + wp.y
|
|
233
|
+
});
|
|
234
|
+
const spCanvas = $(sp.root).find('canvas').get()[0] as HTMLCanvasElement;
|
|
235
|
+
await testEvent(fromEvent(spCanvas, 'mousemove'), () => {
|
|
236
|
+
_package.logger.debug(`Test: event, currentRowIdx=${df.currentRowIdx}`);
|
|
237
|
+
expect($(ui.tooltip.root).find('div table.d4-row-tooltip-table tr td canvas').length, 1);
|
|
238
|
+
expect(sp.hitTest(wp.x, wp.y), 1);
|
|
239
|
+
}, () => {
|
|
240
|
+
spCanvas.dispatchEvent(ev);
|
|
241
|
+
}, 500);
|
|
242
|
+
// TODO: Any error occurred become 'Cannot read properties of null (reading 'get$columns')' because of scatter plot
|
|
243
|
+
//await testEvent(sp.onAfterDrawScene, () => {}, () => { sp.invalidateCanvas(); }, 200);
|
|
244
|
+
await awaitGrid(view.grid, 500);
|
|
245
|
+
}
|
|
200
246
|
});
|
|
@@ -59,6 +59,31 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
59
59
|
return [text, simplified];
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
+
type RendererGridCellTemp = {
|
|
63
|
+
[mmcrTemps.monomerPlacer]: MonomerPlacer
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function getRendererFridCellTempTemp(gridCell: DG.GridCell): RendererGridCellTemp {
|
|
67
|
+
/** Primarily store/get MonomerPlacer at GridColumn, fallback at (Table) Column for scatter plot tooltip */
|
|
68
|
+
let temp: RendererGridCellTemp | null = null;
|
|
69
|
+
|
|
70
|
+
let gridCol: DG.GridColumn | null = null;
|
|
71
|
+
try { gridCol = gridCell.gridColumn; } catch { gridCol = null; }
|
|
72
|
+
temp = gridCol && gridCol.dart ? gridCol.temp as RendererGridCellTemp : null;
|
|
73
|
+
|
|
74
|
+
if (!temp) {
|
|
75
|
+
let tableCol: DG.Column | null = null;
|
|
76
|
+
try { tableCol = gridCell.cell.column; } catch { tableCol = null; }
|
|
77
|
+
if (!tableCol) {
|
|
78
|
+
const k = 42;
|
|
79
|
+
}
|
|
80
|
+
temp = tableCol ? tableCol.temp as RendererGridCellTemp : null;
|
|
81
|
+
}
|
|
82
|
+
if (temp === null)
|
|
83
|
+
throw new Error(`Monomer placer store (GridColumn or Column) not found.`);
|
|
84
|
+
return temp;
|
|
85
|
+
}
|
|
86
|
+
|
|
62
87
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
63
88
|
private padding: number = 5;
|
|
64
89
|
|
|
@@ -82,7 +107,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
82
107
|
|
|
83
108
|
const tableCol: DG.Column = gridCell.cell.column;
|
|
84
109
|
//const tableColTemp: TempType = tableCol.temp;
|
|
85
|
-
const seqColTemp: MonomerPlacer = gridCell
|
|
110
|
+
const seqColTemp: MonomerPlacer = getRendererFridCellTempTemp(gridCell)[mmcrTemps.monomerPlacer];
|
|
86
111
|
if (!seqColTemp) return; // Can do nothing without precalculated data
|
|
87
112
|
|
|
88
113
|
const gridCellBounds: DG.Rect = gridCell.bounds;
|
|
@@ -165,7 +190,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
165
190
|
(!isNaN(tagMaxMonomerLength) ? tagMaxMonomerLength : _package.properties?.MaxMonomerLength) ?? 4;
|
|
166
191
|
}
|
|
167
192
|
|
|
168
|
-
let seqColTemp: MonomerPlacer = gridCell
|
|
193
|
+
let seqColTemp: MonomerPlacer = getRendererFridCellTempTemp(gridCell)[mmcrTemps.monomerPlacer];
|
|
169
194
|
if (!seqColTemp) {
|
|
170
195
|
seqColTemp = new MonomerPlacer(grid, tableCol,
|
|
171
196
|
() => {
|
|
@@ -192,7 +217,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
192
217
|
const _maxIndex = maxLengthWords.length;
|
|
193
218
|
|
|
194
219
|
// Store updated seqColTemp to the col temp
|
|
195
|
-
if (seqColTemp.updated) gridCell
|
|
220
|
+
if (seqColTemp.updated) getRendererFridCellTempTemp(gridCell)[mmcrTemps.monomerPlacer] = seqColTemp;
|
|
196
221
|
|
|
197
222
|
g.save();
|
|
198
223
|
try {
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
2
|
+
import {HELM_ITEM_SEPARATOR} from './const';
|
|
3
|
+
import {Bond} from './types';
|
|
4
|
+
|
|
5
|
+
export class ConnectionList {
|
|
6
|
+
constructor(connectionList: string) {
|
|
7
|
+
const splitted = connectionList.split(HELM_ITEM_SEPARATOR);
|
|
8
|
+
splitted.forEach((connectionItem: string) => this.validateConnectionItem(connectionItem));
|
|
9
|
+
this.connectionItems = splitted;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
private connectionItems: string[];
|
|
13
|
+
|
|
14
|
+
private validateConnectionItem(connectionItem: string): void {
|
|
15
|
+
const allowedType = `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA})`;
|
|
16
|
+
const regex = new RegExp(`${allowedType}[0-9]+,${allowedType}[0-9]+,[0-9]+:R[0-9]+-[0-9]+:R[0-9]+`, 'g');
|
|
17
|
+
if (!connectionItem.match(regex))
|
|
18
|
+
throw new Error(`Cannot parse connection item from ${connectionItem}`);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
getConnectionData(): {polymerId: string, bond: Bond}[][] {
|
|
22
|
+
const result: {polymerId: string, bond: Bond}[][] = [];
|
|
23
|
+
this.connectionItems.forEach((connectionItem: string) => {
|
|
24
|
+
const pair: {polymerId: string, bond: Bond}[] = [];
|
|
25
|
+
const splitted = connectionItem.split(',');
|
|
26
|
+
splitted[2].split('-').forEach((item, idx) => {
|
|
27
|
+
const polymerId = splitted[idx];
|
|
28
|
+
const data = item.split(':');
|
|
29
|
+
// WARNING: monomer idx starts from 0
|
|
30
|
+
const monomerIdx = parseInt(data[0]) - 1;
|
|
31
|
+
const rGroupId = parseInt(data[1].slice(1));
|
|
32
|
+
const bondData = {monomerIdx, rGroupId};
|
|
33
|
+
pair.push({polymerId, bond: bondData});
|
|
34
|
+
});
|
|
35
|
+
result.push(pair);
|
|
36
|
+
});
|
|
37
|
+
return result;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as OCL from 'openchemlib/full';
|
|
5
|
+
|
|
6
|
+
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
7
|
+
import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
8
|
+
import {_package} from '../../../package';
|
|
9
|
+
import {Polymer} from './polymer';
|
|
10
|
+
import {GlobalMonomerPositionHandler} from './position-handler';
|
|
11
|
+
|
|
12
|
+
export class HelmToMolfileConverter {
|
|
13
|
+
constructor(private helmColumn: DG.Column<string>, private df: DG.DataFrame) {
|
|
14
|
+
this.helmColumn = helmColumn;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async convertToSmiles(): Promise<DG.Column<string>> {
|
|
18
|
+
const smiles = await this.getSmilesList();
|
|
19
|
+
const columnName = this.df.columns.getUnusedName(`smiles(${this.helmColumn.name})`);
|
|
20
|
+
return DG.Column.fromStrings(columnName, smiles.map((molecule) => {
|
|
21
|
+
if (molecule === null)
|
|
22
|
+
return '';
|
|
23
|
+
return molecule;
|
|
24
|
+
}));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
private async getSmilesList(): Promise<string[]> {
|
|
28
|
+
const molfilesV2K = (await this.convertToMolfileV2KColumn()).toList();
|
|
29
|
+
const smiles = molfilesV2K.map((mol) => DG.chem.convert(mol, DG.chem.Notation.MolBlock, DG.chem.Notation.Smiles));
|
|
30
|
+
return smiles;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
private async getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string) {
|
|
34
|
+
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
35
|
+
if (mol === null)
|
|
36
|
+
return '';
|
|
37
|
+
const molBlock = mol.get_molblock();
|
|
38
|
+
mol!.delete();
|
|
39
|
+
return molBlock;
|
|
40
|
+
});
|
|
41
|
+
const molv3000Arr = new Array<string>(beautifiedMolV2000.length);
|
|
42
|
+
const chiralityPb = DG.TaskBarProgressIndicator.create(`Handling chirality...`);
|
|
43
|
+
for (let i = 0; i < beautifiedMolV2000.length; i++) {
|
|
44
|
+
const oclMolecule = OCL.Molecule.fromMolfile(beautifiedMolV2000[i]);
|
|
45
|
+
const molV3000 = oclMolecule.toMolfileV3();
|
|
46
|
+
molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
|
|
47
|
+
const progress = i / beautifiedMolV2000.length * 100;
|
|
48
|
+
chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
|
|
49
|
+
}
|
|
50
|
+
chiralityPb.close();
|
|
51
|
+
return DG.Column.fromStrings(columnName, molv3000Arr);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async convertToRdKitBeautifiedMolfileColumn(chiralityEngine?: boolean): Promise<DG.Column<string>> {
|
|
55
|
+
const smiles = await this.getSmilesList();
|
|
56
|
+
const rdKitModule: RDModule = await grok.functions.call('Chem:getRdKitModule');
|
|
57
|
+
const beautifiedMols = smiles.map((item) =>{
|
|
58
|
+
if (item === '')
|
|
59
|
+
return null;
|
|
60
|
+
const mol = rdKitModule.get_mol(item);
|
|
61
|
+
if (!mol)
|
|
62
|
+
return null;
|
|
63
|
+
mol.normalize_depiction(1);
|
|
64
|
+
mol.straighten_depiction(true);
|
|
65
|
+
return mol;
|
|
66
|
+
});
|
|
67
|
+
const columnName = this.df.columns.getUnusedName(`molfile(${this.helmColumn.name})`);
|
|
68
|
+
|
|
69
|
+
if (chiralityEngine)
|
|
70
|
+
return await this.getMolV3000ViaOCL(beautifiedMols, columnName);
|
|
71
|
+
return DG.Column.fromStrings(columnName, beautifiedMols.map((mol) => {
|
|
72
|
+
if (mol === null)
|
|
73
|
+
return '';
|
|
74
|
+
const molBlock = mol.get_v3Kmolblock();
|
|
75
|
+
mol!.delete();
|
|
76
|
+
return molBlock;
|
|
77
|
+
}));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
private async convertToMolfileV2KColumn(): Promise<DG.Column<string>> {
|
|
81
|
+
const polymerGraphColumn: DG.Column<string> = await this.getPolymerGraphColumn();
|
|
82
|
+
const rdKitModule = await grok.functions.call('Chem:getRdKitModule');
|
|
83
|
+
const molfileList = polymerGraphColumn.toList().map(
|
|
84
|
+
(pseudoMolfile: string, idx: number) => {
|
|
85
|
+
const helm = this.helmColumn.get(idx);
|
|
86
|
+
if (!helm)
|
|
87
|
+
return '';
|
|
88
|
+
let result = '';
|
|
89
|
+
try {
|
|
90
|
+
result = this.getPolymerMolfile(helm, pseudoMolfile, rdKitModule);
|
|
91
|
+
} catch (err: any) {
|
|
92
|
+
const [errMsg, errStack] = errInfo(err);
|
|
93
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
94
|
+
} finally {
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
const molfileColName = this.df.columns.getUnusedName(`molfileV2K(${this.helmColumn.name})`);
|
|
99
|
+
const molfileColumn = DG.Column.fromList('string', molfileColName, molfileList);
|
|
100
|
+
return molfileColumn;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private async getPolymerGraphColumn(): Promise<DG.Column<string>> {
|
|
104
|
+
const polymerGraphColumn: DG.Column<string> =
|
|
105
|
+
await grok.functions.call('HELM:getMolfiles', {col: this.helmColumn});
|
|
106
|
+
return polymerGraphColumn;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
private getPolymerMolfile(
|
|
110
|
+
helm: string,
|
|
111
|
+
polymerGraph: string,
|
|
112
|
+
rdKitModule: RDModule
|
|
113
|
+
): string {
|
|
114
|
+
const globalPositionHandler = new GlobalMonomerPositionHandler(polymerGraph);
|
|
115
|
+
const polymer = new Polymer(helm, rdKitModule);
|
|
116
|
+
globalPositionHandler.monomerSymbols.forEach((monomerSymbol: string, monomerIdx: number) => {
|
|
117
|
+
const shift = globalPositionHandler.getMonomerShifts(monomerIdx);
|
|
118
|
+
polymer.addMonomer(monomerSymbol, monomerIdx, shift);
|
|
119
|
+
});
|
|
120
|
+
const polymerMolfile = polymer.compileToMolfile();
|
|
121
|
+
return polymerMolfile;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
2
|
+
import {ConnectionList} from './connection-list';
|
|
3
|
+
import {HELM_ITEM_SEPARATOR, HELM_SECTION_SEPARATOR} from './const';
|
|
4
|
+
import {SimplePolymer} from './simple-polymer';
|
|
5
|
+
import {Bond} from './types';
|
|
6
|
+
|
|
7
|
+
export class Helm {
|
|
8
|
+
constructor(private helmString: string) {
|
|
9
|
+
const helmSections = this.helmString.split(HELM_SECTION_SEPARATOR);
|
|
10
|
+
const simplePolymers = helmSections[0].split(HELM_ITEM_SEPARATOR);
|
|
11
|
+
this.simplePolymers = simplePolymers
|
|
12
|
+
.map((item) => new SimplePolymer(item));
|
|
13
|
+
if (helmSections[1] !== '')
|
|
14
|
+
this.connectionList = new ConnectionList(helmSections[1]);
|
|
15
|
+
this.bondData = this.getBondData();
|
|
16
|
+
|
|
17
|
+
this.bondedRGroupsMap = this.getBondedRGroupsMap();
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** List of pairs for bonded monomers, monomers indexed globally (withing the
|
|
21
|
+
* complex polymer scope) */
|
|
22
|
+
readonly bondData: Bond[][];
|
|
23
|
+
|
|
24
|
+
private simplePolymers: SimplePolymer[];
|
|
25
|
+
private connectionList?: ConnectionList;
|
|
26
|
+
|
|
27
|
+
/** Maps global monomer index to r-group ids (starting from 1) participating
|
|
28
|
+
* in connection */
|
|
29
|
+
readonly bondedRGroupsMap: Map<number, number[]>;
|
|
30
|
+
|
|
31
|
+
private getBondedRGroupsMap(): Map<number, number[]> {
|
|
32
|
+
const bondedRGroupsMap = new Map<number, number[]>();
|
|
33
|
+
this.bondData.forEach((bond) => {
|
|
34
|
+
bond.forEach((bondPart) => {
|
|
35
|
+
const monomerIdx = bondPart.monomerIdx;
|
|
36
|
+
const rGroupId = bondPart.rGroupId;
|
|
37
|
+
if (!bondedRGroupsMap.get(monomerIdx))
|
|
38
|
+
bondedRGroupsMap.set(monomerIdx, []);
|
|
39
|
+
bondedRGroupsMap.get(monomerIdx)!.push(rGroupId);
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
return bondedRGroupsMap;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
toString() {
|
|
47
|
+
return this.helmString;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
getPolymerTypeByMonomerIdx(monomerGlobalIdx: number): HELM_POLYMER_TYPE {
|
|
51
|
+
const simplePolymer = this.getSimplePolymerByMonomerIdx(monomerGlobalIdx);
|
|
52
|
+
const polymerType = simplePolymer.polymerType;
|
|
53
|
+
return polymerType as HELM_POLYMER_TYPE;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
private getSimplePolymerByMonomerIdx(monomerGlobalIdx: number): SimplePolymer {
|
|
57
|
+
const shifts = this.getMonomerIdxShifts();
|
|
58
|
+
const shiftValues = Object.values(shifts);
|
|
59
|
+
const lowerBound = shiftValues.sort((a, b) => a - b).find(
|
|
60
|
+
(shift) => monomerGlobalIdx >= shift
|
|
61
|
+
);
|
|
62
|
+
if (lowerBound === undefined)
|
|
63
|
+
throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
|
|
64
|
+
const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
|
|
65
|
+
const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
|
|
66
|
+
return simplePolymer;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
private shiftBondMonomerIds(shift: number, bonds: Bond[][]): void {
|
|
70
|
+
bonds.forEach((bond) => {
|
|
71
|
+
bond.forEach((bondPart) => {
|
|
72
|
+
bondPart.monomerIdx += shift;
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private getMonomerIdxShifts(): {[simplePolymerId: string]: number} {
|
|
78
|
+
const result: {[simplePolymerId: string]: number} = {};
|
|
79
|
+
let shift = 0;
|
|
80
|
+
this.simplePolymers.forEach((simplePolymer) => {
|
|
81
|
+
result[simplePolymer.id] = shift;
|
|
82
|
+
shift += simplePolymer.monomers.length;
|
|
83
|
+
});
|
|
84
|
+
return result;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
private getBondData(): Bond[][] {
|
|
88
|
+
const shifts = this.getMonomerIdxShifts();
|
|
89
|
+
const result: Bond[][] = [];
|
|
90
|
+
this.simplePolymers.forEach((simplePolymer) => {
|
|
91
|
+
const bondData = simplePolymer.getBondData();
|
|
92
|
+
const shift = shifts[simplePolymer.id];
|
|
93
|
+
this.shiftBondMonomerIds(shift, bondData);
|
|
94
|
+
result.push(...bondData);
|
|
95
|
+
});
|
|
96
|
+
if (this.connectionList) {
|
|
97
|
+
const connectionData = this.connectionList.getConnectionData();
|
|
98
|
+
connectionData.forEach((connection) => {
|
|
99
|
+
const data: Bond[] = [];
|
|
100
|
+
connection.forEach((connectionItem) => {
|
|
101
|
+
const shift = shifts[connectionItem.polymerId];
|
|
102
|
+
const bond = connectionItem.bond;
|
|
103
|
+
bond.monomerIdx += shift;
|
|
104
|
+
data.push(bond);
|
|
105
|
+
});
|
|
106
|
+
result.push(data);
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
return result;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {HelmToMolfileConverter} from './converter';
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
|
|
2
|
+
import {MolfileAtoms} from './mol-atoms';
|
|
3
|
+
|
|
4
|
+
export class MolfileAtomsV2K extends MolfileAtoms {
|
|
5
|
+
constructor(molfileHandler: MolfileHandlerBase) {
|
|
6
|
+
super();
|
|
7
|
+
this.rawAtomLines = molfileHandler.getAtomLines();
|
|
8
|
+
this.coordinates = this.rawAtomLines.map((line: string) => {
|
|
9
|
+
const x = parseFloat(line.substring(0, 10));
|
|
10
|
+
const y = parseFloat(line.substring(10, 20));
|
|
11
|
+
return {x, y};
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
get atomLines(): string[] {
|
|
16
|
+
return this.rawAtomLines.map((line: string, idx: number) => {
|
|
17
|
+
const coordinates = this.coordinates[idx];
|
|
18
|
+
const x = coordinates.x.toFixed(4).padStart(10, ' ');
|
|
19
|
+
const y = coordinates.y.toFixed(4).padStart(10, ' ');
|
|
20
|
+
return `${x}${y}${line.substring(20)}`;
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
|
|
2
|
+
import {MolfileAtoms} from './mol-atoms';
|
|
3
|
+
|
|
4
|
+
const PRECISION = 6;
|
|
5
|
+
|
|
6
|
+
export class MolfileAtomsV3K extends MolfileAtoms {
|
|
7
|
+
constructor(private molfileHandler: MolfileHandlerBase) {
|
|
8
|
+
super();
|
|
9
|
+
this.rawAtomLines = molfileHandler.getAtomLines();
|
|
10
|
+
this.coordinates = this.getCoordinates();
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
private getCoordinates(): {x: number, y: number}[] {
|
|
14
|
+
const x = this.molfileHandler.x;
|
|
15
|
+
const y = this.molfileHandler.y;
|
|
16
|
+
return Array.from(x).map((xCoord, idx) => {
|
|
17
|
+
return {x: xCoord, y: y[idx]};
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
get atomLines(): string[] {
|
|
23
|
+
// todo: optimize, optionally port to molfile-handler
|
|
24
|
+
const coordinateRegex = /^(M V30 .*)(-?\d+\.\d+)( )(-?\d+\.\d+)( -?\d+\.\d+.*)$/;
|
|
25
|
+
const rGroupsRegex = /\sRGROUPS=\(\d+(\s+\d+)*\)/;
|
|
26
|
+
|
|
27
|
+
return this.rawAtomLines.map((line: string, idx: number) => {
|
|
28
|
+
const coordinates = this.coordinates[idx];
|
|
29
|
+
const x = coordinates.x.toFixed(PRECISION);
|
|
30
|
+
const y = coordinates.y.toFixed(PRECISION);
|
|
31
|
+
|
|
32
|
+
return line.replace(coordinateRegex, (match, p1, p2, p3, p4, p5) => {
|
|
33
|
+
return p1 + x + p3 + y + p5;
|
|
34
|
+
}).replace(rGroupsRegex, '');
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import {R_GROUP_ELEMENT_SYMBOL} from '@datagrok-libraries/chem-meta/src/formats/molfile-const';
|
|
2
|
+
|
|
3
|
+
export abstract class MolfileAtoms {
|
|
4
|
+
protected coordinates: {x: number, y: number}[] = [];
|
|
5
|
+
protected rawAtomLines: string[] = [];
|
|
6
|
+
|
|
7
|
+
get atomCoordinates(): {x: number, y: number}[] {
|
|
8
|
+
return this.coordinates;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
abstract get atomLines(): string[];
|
|
12
|
+
|
|
13
|
+
replaceRGroupSymbolByElement(atomIdx: number, newElementSymbol: string): void {
|
|
14
|
+
this.rawAtomLines[atomIdx] = this.rawAtomLines[atomIdx].replace(R_GROUP_ELEMENT_SYMBOL, newElementSymbol);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
deleteAtoms(indices: number[]): void {
|
|
18
|
+
this.coordinates = this.coordinates.filter((_, idx) => !indices.includes(idx));
|
|
19
|
+
this.rawAtomLines = this.rawAtomLines.filter((_, idx) => !indices.includes(idx));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
shift(shift: {x: number, y: number}): void {
|
|
23
|
+
this.coordinates = this.coordinates.map((coordinates) => {
|
|
24
|
+
const newX = coordinates.x + shift.x;
|
|
25
|
+
const newY = coordinates.y + shift.y;
|
|
26
|
+
if (isNaN(newX) || isNaN(newY))
|
|
27
|
+
throw new Error(`Cannot shift coordinates by ${shift.x}, ${shift.y}`);
|
|
28
|
+
return {x: newX, y: newY};
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
rotate(angle: number): void {
|
|
33
|
+
this.coordinates = this.coordinates.map((coordinates) => {
|
|
34
|
+
const x = coordinates.x;
|
|
35
|
+
const y = coordinates.y;
|
|
36
|
+
const newX = x * Math.cos(angle) - y * Math.sin(angle);
|
|
37
|
+
const newY = x * Math.sin(angle) + y * Math.cos(angle);
|
|
38
|
+
if (isNaN(newX) || isNaN(newY))
|
|
39
|
+
throw new Error(`Cannot rotate coordinates by ${angle}`);
|
|
40
|
+
return {x: newX, y: newY};
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
|
|
2
|
+
import {MolfileBonds} from './mol-bonds';
|
|
3
|
+
|
|
4
|
+
export class MolfileBondsV2K extends MolfileBonds {
|
|
5
|
+
constructor(molfileHandler: MolfileHandlerBase) {
|
|
6
|
+
super();
|
|
7
|
+
this.rawBondLines = molfileHandler.getBondLines();
|
|
8
|
+
this.bondedAtomPairs = this.rawBondLines.map((line: string) => {
|
|
9
|
+
const firstAtom = parseInt(line.substring(0, 3));
|
|
10
|
+
const secondAtom = parseInt(line.substring(3, 6));
|
|
11
|
+
return [firstAtom, secondAtom];
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Get bond lines with new values for bonded atoms */
|
|
16
|
+
getBondLines(): string[] {
|
|
17
|
+
return this.bondedAtomPairs.map((bondedPair, idx) => {
|
|
18
|
+
if (bondedPair.some((atom) => atom === -1))
|
|
19
|
+
throw new Error(`Bonded pair ${bondedPair} contains -1`);
|
|
20
|
+
return `${bondedPair[0].toString().padStart(3, ' ')}${
|
|
21
|
+
bondedPair[1].toString().padStart(3, ' ')
|
|
22
|
+
}${this.rawBondLines[idx].substring(6)}`;
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
|
|
2
|
+
import {MolfileBonds} from './mol-bonds';
|
|
3
|
+
|
|
4
|
+
export class MolfileBondsV3K extends MolfileBonds {
|
|
5
|
+
constructor(private molfileHandler: MolfileHandlerBase) {
|
|
6
|
+
super();
|
|
7
|
+
this.rawBondLines = molfileHandler.getBondLines();
|
|
8
|
+
this.bondedAtomPairs = this.getBondedAtomPairs();
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
private getBondedAtomPairs(): number[][] {
|
|
12
|
+
const bondedAtoms = this.molfileHandler.pairsOfBondedAtoms;
|
|
13
|
+
return bondedAtoms.map((pair) => [pair[0], pair[1]]);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/** Get bond lines with new values for bonded atoms */
|
|
17
|
+
getBondLines(): string[] {
|
|
18
|
+
// todo: optimize
|
|
19
|
+
const regex = /^(M\s+V30\s+\d+\s+\d+\s+)(\d+)(\s+)(\d+)(.*)$/;
|
|
20
|
+
return this.bondedAtomPairs.map((bondedPair, idx) => {
|
|
21
|
+
if (bondedPair.some((atom) => atom === -1))
|
|
22
|
+
throw new Error(`Bonded pair ${bondedPair} contains -1`);
|
|
23
|
+
const result = this.rawBondLines[idx].replace(regex, (match, p1, p2, p3, p4, p5) => {
|
|
24
|
+
return `${p1}${bondedPair[0]}${p3}${bondedPair[1]}${p5}`;
|
|
25
|
+
});
|
|
26
|
+
return result;
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|