@datagrok/bio 2.12.11 → 2.12.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.eslintrc.json +4 -1
  2. package/CHANGELOG.md +10 -0
  3. package/dist/246.js +2 -0
  4. package/dist/246.js.map +1 -0
  5. package/dist/42.js +1 -1
  6. package/dist/42.js.map +1 -1
  7. package/dist/545.js +3 -0
  8. package/dist/545.js.map +1 -0
  9. package/dist/590.js.map +1 -1
  10. package/dist/package-test.js +5 -5
  11. package/dist/package-test.js.LICENSE.txt +0 -8
  12. package/dist/package-test.js.map +1 -1
  13. package/dist/package.js +5 -5
  14. package/dist/package.js.LICENSE.txt +0 -8
  15. package/dist/package.js.map +1 -1
  16. package/package.json +7 -6
  17. package/src/package.ts +2 -2
  18. package/src/tests/renderers-test.ts +47 -1
  19. package/src/utils/cell-renderer.ts +28 -3
  20. package/src/utils/helm-to-molfile/converter/connection-list.ts +40 -0
  21. package/src/utils/helm-to-molfile/converter/const.ts +4 -0
  22. package/src/utils/helm-to-molfile/converter/converter.ts +124 -0
  23. package/src/utils/helm-to-molfile/converter/helm.ts +112 -0
  24. package/src/utils/helm-to-molfile/converter/index.ts +1 -0
  25. package/src/utils/helm-to-molfile/converter/mol-atoms-v2k.ts +24 -0
  26. package/src/utils/helm-to-molfile/converter/mol-atoms-v3k.ts +38 -0
  27. package/src/utils/helm-to-molfile/converter/mol-atoms.ts +44 -0
  28. package/src/utils/helm-to-molfile/converter/mol-bonds-v2k.ts +26 -0
  29. package/src/utils/helm-to-molfile/converter/mol-bonds-v3k.ts +30 -0
  30. package/src/utils/helm-to-molfile/converter/mol-bonds.ts +56 -0
  31. package/src/utils/helm-to-molfile/converter/mol-wrapper-factory.ts +16 -0
  32. package/src/utils/helm-to-molfile/converter/mol-wrapper-old.ts +100 -0
  33. package/src/utils/helm-to-molfile/converter/mol-wrapper-v2k.ts +21 -0
  34. package/src/utils/helm-to-molfile/converter/mol-wrapper-v3k.ts +21 -0
  35. package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +79 -0
  36. package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +103 -0
  37. package/src/utils/helm-to-molfile/converter/polymer.ts +99 -0
  38. package/src/utils/helm-to-molfile/converter/position-handler.ts +23 -0
  39. package/src/utils/helm-to-molfile/converter/r-group-handler.ts +122 -0
  40. package/src/utils/helm-to-molfile/converter/simple-polymer.ts +89 -0
  41. package/src/utils/helm-to-molfile/converter/types.ts +12 -0
  42. package/src/utils/helm-to-molfile/utils.ts +32 -0
  43. package/src/utils/poly-tool/const.ts +0 -4
  44. package/src/utils/poly-tool/transformation.ts +126 -62
  45. package/src/utils/sequence-to-mol.ts +1 -1
  46. package/webpack.config.js +4 -3
  47. package/dist/709.js +0 -2
  48. package/dist/709.js.map +0 -1
  49. package/dist/777.js +0 -3
  50. package/dist/777.js.map +0 -1
  51. package/link-bio +0 -7
  52. package/setup +0 -52
  53. package/src/utils/atomic-works.ts +0 -367
  54. package/src/utils/helm-to-molfile.ts +0 -959
  55. /package/dist/{777.js.LICENSE.txt → 545.js.LICENSE.txt} +0 -0
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Aleksandr Tanas",
6
6
  "email": "atanas@datagrok.ai"
7
7
  },
8
- "version": "2.12.11",
8
+ "version": "2.12.13",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,12 +34,12 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.40.6",
38
- "@datagrok-libraries/chem-meta": "^1.2.3",
39
- "@datagrok-libraries/ml": "^6.5.1",
37
+ "@datagrok-libraries/bio": "^5.40.8",
38
+ "@datagrok-libraries/chem-meta": "^1.2.5",
39
+ "@datagrok-libraries/math": "^1.1.1",
40
+ "@datagrok-libraries/ml": "^6.6.0",
40
41
  "@datagrok-libraries/tutorials": "^1.3.12",
41
42
  "@datagrok-libraries/utils": "^4.2.0",
42
- "@datagrok-libraries/math": "^1.0.8",
43
43
  "@webgpu/types": "^0.1.40",
44
44
  "ajv": "^8.12.0",
45
45
  "ajv-errors": "^3.0.0",
@@ -103,7 +103,8 @@
103
103
  "All users"
104
104
  ],
105
105
  "sources": [
106
- "css/helm.css"
106
+ "css/helm.css",
107
+ "common/openchemlib-full.js"
107
108
  ],
108
109
  "category": "Bioinformatics",
109
110
  "meta": {
package/src/package.ts CHANGED
@@ -1,4 +1,4 @@
1
- /* Do not change these import lines to match external modules in webpack configuration */
1
+ /* eslint max-lines: "off" */
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
@@ -486,7 +486,7 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
486
486
  //meta.supportedSemTypes: Macromolecule
487
487
  //meta.supportedTypes: string
488
488
  //meta.supportedUnits: fasta,separator,helm
489
- //meta.supportedDistanceFunctions: Levenshtein,Hamming,Monomer chemical distance,Needlemann-Wunsch
489
+ //meta.supportedDistanceFunctions: Hamming,Levenshtein,Monomer chemical distance,Needlemann-Wunsch
490
490
  //input: column col {semType: Macromolecule}
491
491
  //input: string metric
492
492
  //input: double gapOpen = 1 {caption: Gap open penalty; default: 1; optional: true}
@@ -1,7 +1,11 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
3
4
 
4
- import {category, expect, test, delay} from '@datagrok-libraries/utils/src/test';
5
+ import $ from 'cash-dom';
6
+ import {fromEvent} from 'rxjs';
7
+
8
+ import {category, expect, test, delay, testEvent} from '@datagrok-libraries/utils/src/test';
5
9
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
10
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
7
11
 
@@ -12,6 +16,8 @@ import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-
12
16
  import {awaitGrid} from './utils';
13
17
  import * as C from '../utils/constants';
14
18
 
19
+ import {_package} from '../package-test';
20
+
15
21
  category('renderers', () => {
16
22
  test('long sequence performance ', async () => {
17
23
  await performanceTest(generateLongSequence, 'Long sequences');
@@ -48,6 +54,10 @@ category('renderers', () => {
48
54
  await _selectRendererBySemType();
49
55
  });
50
56
 
57
+ test('scatterPlotTooltip', async () => {
58
+ await _testScatterPlotTooltip();
59
+ });
60
+
51
61
  async function _rendererMacromoleculeFasta() {
52
62
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
53
63
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
@@ -197,4 +207,40 @@ category('renderers', () => {
197
207
  `view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
198
208
  }
199
209
  }
210
+
211
+ const seqCoordsCsv = `seq,x,y
212
+ ACGGTGTCGT,0,0
213
+ CGGTATCCCT,1,0
214
+ CTCGGCATGC,2,0
215
+ `;
216
+
217
+ async function _testScatterPlotTooltip(): Promise<void> {
218
+ const df = DG.DataFrame.fromCsv(seqCoordsCsv);
219
+ df.currentRowIdx = 0;
220
+ const view = grok.shell.addTableView(df);
221
+ const sp: DG.ScatterPlotViewer = df.plot.scatter({x: 'x', y: 'y'});
222
+ view.dockManager.dock(sp, DG.DOCK_TYPE.RIGHT, null);
223
+ await Promise.all([
224
+ testEvent(sp.onAfterDrawScene, () => {}, () => { sp.invalidateCanvas(); }, 1000),
225
+ awaitGrid(view.grid, 500)
226
+ ]);
227
+
228
+ const spBcr = sp.root.getBoundingClientRect();
229
+ const wp = sp.worldToScreen(1, 0);
230
+ const ev = new MouseEvent('mousemove', {
231
+ cancelable: true, bubbles: true, view: window, button: 0,
232
+ clientX: spBcr.left + wp.x, clientY: spBcr.top + wp.y
233
+ });
234
+ const spCanvas = $(sp.root).find('canvas').get()[0] as HTMLCanvasElement;
235
+ await testEvent(fromEvent(spCanvas, 'mousemove'), () => {
236
+ _package.logger.debug(`Test: event, currentRowIdx=${df.currentRowIdx}`);
237
+ expect($(ui.tooltip.root).find('div table.d4-row-tooltip-table tr td canvas').length, 1);
238
+ expect(sp.hitTest(wp.x, wp.y), 1);
239
+ }, () => {
240
+ spCanvas.dispatchEvent(ev);
241
+ }, 500);
242
+ // TODO: Any error occurred become 'Cannot read properties of null (reading 'get$columns')' because of scatter plot
243
+ //await testEvent(sp.onAfterDrawScene, () => {}, () => { sp.invalidateCanvas(); }, 200);
244
+ await awaitGrid(view.grid, 500);
245
+ }
200
246
  });
@@ -59,6 +59,31 @@ export function processSequence(subParts: string[]): [string[], boolean] {
59
59
  return [text, simplified];
60
60
  }
61
61
 
62
+ type RendererGridCellTemp = {
63
+ [mmcrTemps.monomerPlacer]: MonomerPlacer
64
+ }
65
+
66
+ function getRendererFridCellTempTemp(gridCell: DG.GridCell): RendererGridCellTemp {
67
+ /** Primarily store/get MonomerPlacer at GridColumn, fallback at (Table) Column for scatter plot tooltip */
68
+ let temp: RendererGridCellTemp | null = null;
69
+
70
+ let gridCol: DG.GridColumn | null = null;
71
+ try { gridCol = gridCell.gridColumn; } catch { gridCol = null; }
72
+ temp = gridCol && gridCol.dart ? gridCol.temp as RendererGridCellTemp : null;
73
+
74
+ if (!temp) {
75
+ let tableCol: DG.Column | null = null;
76
+ try { tableCol = gridCell.cell.column; } catch { tableCol = null; }
77
+ if (!tableCol) {
78
+ const k = 42;
79
+ }
80
+ temp = tableCol ? tableCol.temp as RendererGridCellTemp : null;
81
+ }
82
+ if (temp === null)
83
+ throw new Error(`Monomer placer store (GridColumn or Column) not found.`);
84
+ return temp;
85
+ }
86
+
62
87
  export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
63
88
  private padding: number = 5;
64
89
 
@@ -82,7 +107,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
82
107
 
83
108
  const tableCol: DG.Column = gridCell.cell.column;
84
109
  //const tableColTemp: TempType = tableCol.temp;
85
- const seqColTemp: MonomerPlacer = gridCell.gridColumn.temp[mmcrTemps.monomerPlacer];
110
+ const seqColTemp: MonomerPlacer = getRendererFridCellTempTemp(gridCell)[mmcrTemps.monomerPlacer];
86
111
  if (!seqColTemp) return; // Can do nothing without precalculated data
87
112
 
88
113
  const gridCellBounds: DG.Rect = gridCell.bounds;
@@ -165,7 +190,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
165
190
  (!isNaN(tagMaxMonomerLength) ? tagMaxMonomerLength : _package.properties?.MaxMonomerLength) ?? 4;
166
191
  }
167
192
 
168
- let seqColTemp: MonomerPlacer = gridCell.gridColumn.temp[mmcrTemps.monomerPlacer];
193
+ let seqColTemp: MonomerPlacer = getRendererFridCellTempTemp(gridCell)[mmcrTemps.monomerPlacer];
169
194
  if (!seqColTemp) {
170
195
  seqColTemp = new MonomerPlacer(grid, tableCol,
171
196
  () => {
@@ -192,7 +217,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
192
217
  const _maxIndex = maxLengthWords.length;
193
218
 
194
219
  // Store updated seqColTemp to the col temp
195
- if (seqColTemp.updated) gridCell.gridColumn.temp[mmcrTemps.monomerPlacer] = seqColTemp;
220
+ if (seqColTemp.updated) getRendererFridCellTempTemp(gridCell)[mmcrTemps.monomerPlacer] = seqColTemp;
196
221
 
197
222
  g.save();
198
223
  try {
@@ -0,0 +1,40 @@
1
+ import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
2
+ import {HELM_ITEM_SEPARATOR} from './const';
3
+ import {Bond} from './types';
4
+
5
+ export class ConnectionList {
6
+ constructor(connectionList: string) {
7
+ const splitted = connectionList.split(HELM_ITEM_SEPARATOR);
8
+ splitted.forEach((connectionItem: string) => this.validateConnectionItem(connectionItem));
9
+ this.connectionItems = splitted;
10
+ }
11
+
12
+ private connectionItems: string[];
13
+
14
+ private validateConnectionItem(connectionItem: string): void {
15
+ const allowedType = `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA})`;
16
+ const regex = new RegExp(`${allowedType}[0-9]+,${allowedType}[0-9]+,[0-9]+:R[0-9]+-[0-9]+:R[0-9]+`, 'g');
17
+ if (!connectionItem.match(regex))
18
+ throw new Error(`Cannot parse connection item from ${connectionItem}`);
19
+ }
20
+
21
+ getConnectionData(): {polymerId: string, bond: Bond}[][] {
22
+ const result: {polymerId: string, bond: Bond}[][] = [];
23
+ this.connectionItems.forEach((connectionItem: string) => {
24
+ const pair: {polymerId: string, bond: Bond}[] = [];
25
+ const splitted = connectionItem.split(',');
26
+ splitted[2].split('-').forEach((item, idx) => {
27
+ const polymerId = splitted[idx];
28
+ const data = item.split(':');
29
+ // WARNING: monomer idx starts from 0
30
+ const monomerIdx = parseInt(data[0]) - 1;
31
+ const rGroupId = parseInt(data[1].slice(1));
32
+ const bondData = {monomerIdx, rGroupId};
33
+ pair.push({polymerId, bond: bondData});
34
+ });
35
+ result.push(pair);
36
+ });
37
+ return result;
38
+ }
39
+ }
40
+
@@ -0,0 +1,4 @@
1
+ export const HELM_ITEM_SEPARATOR = '|';
2
+ export const HELM_SECTION_SEPARATOR = '$';
3
+
4
+ export const HYDROGEN_SYMBOL = 'H';
@@ -0,0 +1,124 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as OCL from 'openchemlib/full';
5
+
6
+ import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
7
+ import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
8
+ import {_package} from '../../../package';
9
+ import {Polymer} from './polymer';
10
+ import {GlobalMonomerPositionHandler} from './position-handler';
11
+
12
+ export class HelmToMolfileConverter {
13
+ constructor(private helmColumn: DG.Column<string>, private df: DG.DataFrame) {
14
+ this.helmColumn = helmColumn;
15
+ }
16
+
17
+ async convertToSmiles(): Promise<DG.Column<string>> {
18
+ const smiles = await this.getSmilesList();
19
+ const columnName = this.df.columns.getUnusedName(`smiles(${this.helmColumn.name})`);
20
+ return DG.Column.fromStrings(columnName, smiles.map((molecule) => {
21
+ if (molecule === null)
22
+ return '';
23
+ return molecule;
24
+ }));
25
+ }
26
+
27
+ private async getSmilesList(): Promise<string[]> {
28
+ const molfilesV2K = (await this.convertToMolfileV2KColumn()).toList();
29
+ const smiles = molfilesV2K.map((mol) => DG.chem.convert(mol, DG.chem.Notation.MolBlock, DG.chem.Notation.Smiles));
30
+ return smiles;
31
+ }
32
+
33
+ private async getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string) {
34
+ const beautifiedMolV2000 = beautifiedMols.map((mol) => {
35
+ if (mol === null)
36
+ return '';
37
+ const molBlock = mol.get_molblock();
38
+ mol!.delete();
39
+ return molBlock;
40
+ });
41
+ const molv3000Arr = new Array<string>(beautifiedMolV2000.length);
42
+ const chiralityPb = DG.TaskBarProgressIndicator.create(`Handling chirality...`);
43
+ for (let i = 0; i < beautifiedMolV2000.length; i++) {
44
+ const oclMolecule = OCL.Molecule.fromMolfile(beautifiedMolV2000[i]);
45
+ const molV3000 = oclMolecule.toMolfileV3();
46
+ molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
47
+ const progress = i / beautifiedMolV2000.length * 100;
48
+ chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
49
+ }
50
+ chiralityPb.close();
51
+ return DG.Column.fromStrings(columnName, molv3000Arr);
52
+ }
53
+
54
+ async convertToRdKitBeautifiedMolfileColumn(chiralityEngine?: boolean): Promise<DG.Column<string>> {
55
+ const smiles = await this.getSmilesList();
56
+ const rdKitModule: RDModule = await grok.functions.call('Chem:getRdKitModule');
57
+ const beautifiedMols = smiles.map((item) =>{
58
+ if (item === '')
59
+ return null;
60
+ const mol = rdKitModule.get_mol(item);
61
+ if (!mol)
62
+ return null;
63
+ mol.normalize_depiction(1);
64
+ mol.straighten_depiction(true);
65
+ return mol;
66
+ });
67
+ const columnName = this.df.columns.getUnusedName(`molfile(${this.helmColumn.name})`);
68
+
69
+ if (chiralityEngine)
70
+ return await this.getMolV3000ViaOCL(beautifiedMols, columnName);
71
+ return DG.Column.fromStrings(columnName, beautifiedMols.map((mol) => {
72
+ if (mol === null)
73
+ return '';
74
+ const molBlock = mol.get_v3Kmolblock();
75
+ mol!.delete();
76
+ return molBlock;
77
+ }));
78
+ }
79
+
80
+ private async convertToMolfileV2KColumn(): Promise<DG.Column<string>> {
81
+ const polymerGraphColumn: DG.Column<string> = await this.getPolymerGraphColumn();
82
+ const rdKitModule = await grok.functions.call('Chem:getRdKitModule');
83
+ const molfileList = polymerGraphColumn.toList().map(
84
+ (pseudoMolfile: string, idx: number) => {
85
+ const helm = this.helmColumn.get(idx);
86
+ if (!helm)
87
+ return '';
88
+ let result = '';
89
+ try {
90
+ result = this.getPolymerMolfile(helm, pseudoMolfile, rdKitModule);
91
+ } catch (err: any) {
92
+ const [errMsg, errStack] = errInfo(err);
93
+ _package.logger.error(errMsg, undefined, errStack);
94
+ } finally {
95
+ return result;
96
+ }
97
+ });
98
+ const molfileColName = this.df.columns.getUnusedName(`molfileV2K(${this.helmColumn.name})`);
99
+ const molfileColumn = DG.Column.fromList('string', molfileColName, molfileList);
100
+ return molfileColumn;
101
+ }
102
+
103
+ private async getPolymerGraphColumn(): Promise<DG.Column<string>> {
104
+ const polymerGraphColumn: DG.Column<string> =
105
+ await grok.functions.call('HELM:getMolfiles', {col: this.helmColumn});
106
+ return polymerGraphColumn;
107
+ }
108
+
109
+ private getPolymerMolfile(
110
+ helm: string,
111
+ polymerGraph: string,
112
+ rdKitModule: RDModule
113
+ ): string {
114
+ const globalPositionHandler = new GlobalMonomerPositionHandler(polymerGraph);
115
+ const polymer = new Polymer(helm, rdKitModule);
116
+ globalPositionHandler.monomerSymbols.forEach((monomerSymbol: string, monomerIdx: number) => {
117
+ const shift = globalPositionHandler.getMonomerShifts(monomerIdx);
118
+ polymer.addMonomer(monomerSymbol, monomerIdx, shift);
119
+ });
120
+ const polymerMolfile = polymer.compileToMolfile();
121
+ return polymerMolfile;
122
+ }
123
+ }
124
+
@@ -0,0 +1,112 @@
1
+ import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
2
+ import {ConnectionList} from './connection-list';
3
+ import {HELM_ITEM_SEPARATOR, HELM_SECTION_SEPARATOR} from './const';
4
+ import {SimplePolymer} from './simple-polymer';
5
+ import {Bond} from './types';
6
+
7
+ export class Helm {
8
+ constructor(private helmString: string) {
9
+ const helmSections = this.helmString.split(HELM_SECTION_SEPARATOR);
10
+ const simplePolymers = helmSections[0].split(HELM_ITEM_SEPARATOR);
11
+ this.simplePolymers = simplePolymers
12
+ .map((item) => new SimplePolymer(item));
13
+ if (helmSections[1] !== '')
14
+ this.connectionList = new ConnectionList(helmSections[1]);
15
+ this.bondData = this.getBondData();
16
+
17
+ this.bondedRGroupsMap = this.getBondedRGroupsMap();
18
+ }
19
+
20
+ /** List of pairs for bonded monomers, monomers indexed globally (withing the
21
+ * complex polymer scope) */
22
+ readonly bondData: Bond[][];
23
+
24
+ private simplePolymers: SimplePolymer[];
25
+ private connectionList?: ConnectionList;
26
+
27
+ /** Maps global monomer index to r-group ids (starting from 1) participating
28
+ * in connection */
29
+ readonly bondedRGroupsMap: Map<number, number[]>;
30
+
31
+ private getBondedRGroupsMap(): Map<number, number[]> {
32
+ const bondedRGroupsMap = new Map<number, number[]>();
33
+ this.bondData.forEach((bond) => {
34
+ bond.forEach((bondPart) => {
35
+ const monomerIdx = bondPart.monomerIdx;
36
+ const rGroupId = bondPart.rGroupId;
37
+ if (!bondedRGroupsMap.get(monomerIdx))
38
+ bondedRGroupsMap.set(monomerIdx, []);
39
+ bondedRGroupsMap.get(monomerIdx)!.push(rGroupId);
40
+ });
41
+ });
42
+
43
+ return bondedRGroupsMap;
44
+ }
45
+
46
+ toString() {
47
+ return this.helmString;
48
+ }
49
+
50
+ getPolymerTypeByMonomerIdx(monomerGlobalIdx: number): HELM_POLYMER_TYPE {
51
+ const simplePolymer = this.getSimplePolymerByMonomerIdx(monomerGlobalIdx);
52
+ const polymerType = simplePolymer.polymerType;
53
+ return polymerType as HELM_POLYMER_TYPE;
54
+ }
55
+
56
+ private getSimplePolymerByMonomerIdx(monomerGlobalIdx: number): SimplePolymer {
57
+ const shifts = this.getMonomerIdxShifts();
58
+ const shiftValues = Object.values(shifts);
59
+ const lowerBound = shiftValues.sort((a, b) => a - b).find(
60
+ (shift) => monomerGlobalIdx >= shift
61
+ );
62
+ if (lowerBound === undefined)
63
+ throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
64
+ const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
65
+ const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
66
+ return simplePolymer;
67
+ }
68
+
69
+ private shiftBondMonomerIds(shift: number, bonds: Bond[][]): void {
70
+ bonds.forEach((bond) => {
71
+ bond.forEach((bondPart) => {
72
+ bondPart.monomerIdx += shift;
73
+ });
74
+ });
75
+ }
76
+
77
+ private getMonomerIdxShifts(): {[simplePolymerId: string]: number} {
78
+ const result: {[simplePolymerId: string]: number} = {};
79
+ let shift = 0;
80
+ this.simplePolymers.forEach((simplePolymer) => {
81
+ result[simplePolymer.id] = shift;
82
+ shift += simplePolymer.monomers.length;
83
+ });
84
+ return result;
85
+ }
86
+
87
+ private getBondData(): Bond[][] {
88
+ const shifts = this.getMonomerIdxShifts();
89
+ const result: Bond[][] = [];
90
+ this.simplePolymers.forEach((simplePolymer) => {
91
+ const bondData = simplePolymer.getBondData();
92
+ const shift = shifts[simplePolymer.id];
93
+ this.shiftBondMonomerIds(shift, bondData);
94
+ result.push(...bondData);
95
+ });
96
+ if (this.connectionList) {
97
+ const connectionData = this.connectionList.getConnectionData();
98
+ connectionData.forEach((connection) => {
99
+ const data: Bond[] = [];
100
+ connection.forEach((connectionItem) => {
101
+ const shift = shifts[connectionItem.polymerId];
102
+ const bond = connectionItem.bond;
103
+ bond.monomerIdx += shift;
104
+ data.push(bond);
105
+ });
106
+ result.push(data);
107
+ });
108
+ }
109
+ return result;
110
+ }
111
+ }
112
+
@@ -0,0 +1 @@
1
+ export {HelmToMolfileConverter} from './converter';
@@ -0,0 +1,24 @@
1
+ import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
2
+ import {MolfileAtoms} from './mol-atoms';
3
+
4
+ export class MolfileAtomsV2K extends MolfileAtoms {
5
+ constructor(molfileHandler: MolfileHandlerBase) {
6
+ super();
7
+ this.rawAtomLines = molfileHandler.getAtomLines();
8
+ this.coordinates = this.rawAtomLines.map((line: string) => {
9
+ const x = parseFloat(line.substring(0, 10));
10
+ const y = parseFloat(line.substring(10, 20));
11
+ return {x, y};
12
+ });
13
+ }
14
+
15
+ get atomLines(): string[] {
16
+ return this.rawAtomLines.map((line: string, idx: number) => {
17
+ const coordinates = this.coordinates[idx];
18
+ const x = coordinates.x.toFixed(4).padStart(10, ' ');
19
+ const y = coordinates.y.toFixed(4).padStart(10, ' ');
20
+ return `${x}${y}${line.substring(20)}`;
21
+ });
22
+ }
23
+ }
24
+
@@ -0,0 +1,38 @@
1
+ import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
2
+ import {MolfileAtoms} from './mol-atoms';
3
+
4
+ const PRECISION = 6;
5
+
6
+ export class MolfileAtomsV3K extends MolfileAtoms {
7
+ constructor(private molfileHandler: MolfileHandlerBase) {
8
+ super();
9
+ this.rawAtomLines = molfileHandler.getAtomLines();
10
+ this.coordinates = this.getCoordinates();
11
+ }
12
+
13
+ private getCoordinates(): {x: number, y: number}[] {
14
+ const x = this.molfileHandler.x;
15
+ const y = this.molfileHandler.y;
16
+ return Array.from(x).map((xCoord, idx) => {
17
+ return {x: xCoord, y: y[idx]};
18
+ });
19
+ }
20
+
21
+
22
+ get atomLines(): string[] {
23
+ // todo: optimize, optionally port to molfile-handler
24
+ const coordinateRegex = /^(M V30 .*)(-?\d+\.\d+)( )(-?\d+\.\d+)( -?\d+\.\d+.*)$/;
25
+ const rGroupsRegex = /\sRGROUPS=\(\d+(\s+\d+)*\)/;
26
+
27
+ return this.rawAtomLines.map((line: string, idx: number) => {
28
+ const coordinates = this.coordinates[idx];
29
+ const x = coordinates.x.toFixed(PRECISION);
30
+ const y = coordinates.y.toFixed(PRECISION);
31
+
32
+ return line.replace(coordinateRegex, (match, p1, p2, p3, p4, p5) => {
33
+ return p1 + x + p3 + y + p5;
34
+ }).replace(rGroupsRegex, '');
35
+ });
36
+ }
37
+ }
38
+
@@ -0,0 +1,44 @@
1
+ import {R_GROUP_ELEMENT_SYMBOL} from '@datagrok-libraries/chem-meta/src/formats/molfile-const';
2
+
3
+ export abstract class MolfileAtoms {
4
+ protected coordinates: {x: number, y: number}[] = [];
5
+ protected rawAtomLines: string[] = [];
6
+
7
+ get atomCoordinates(): {x: number, y: number}[] {
8
+ return this.coordinates;
9
+ }
10
+
11
+ abstract get atomLines(): string[];
12
+
13
+ replaceRGroupSymbolByElement(atomIdx: number, newElementSymbol: string): void {
14
+ this.rawAtomLines[atomIdx] = this.rawAtomLines[atomIdx].replace(R_GROUP_ELEMENT_SYMBOL, newElementSymbol);
15
+ }
16
+
17
+ deleteAtoms(indices: number[]): void {
18
+ this.coordinates = this.coordinates.filter((_, idx) => !indices.includes(idx));
19
+ this.rawAtomLines = this.rawAtomLines.filter((_, idx) => !indices.includes(idx));
20
+ }
21
+
22
+ shift(shift: {x: number, y: number}): void {
23
+ this.coordinates = this.coordinates.map((coordinates) => {
24
+ const newX = coordinates.x + shift.x;
25
+ const newY = coordinates.y + shift.y;
26
+ if (isNaN(newX) || isNaN(newY))
27
+ throw new Error(`Cannot shift coordinates by ${shift.x}, ${shift.y}`);
28
+ return {x: newX, y: newY};
29
+ });
30
+ }
31
+
32
+ rotate(angle: number): void {
33
+ this.coordinates = this.coordinates.map((coordinates) => {
34
+ const x = coordinates.x;
35
+ const y = coordinates.y;
36
+ const newX = x * Math.cos(angle) - y * Math.sin(angle);
37
+ const newY = x * Math.sin(angle) + y * Math.cos(angle);
38
+ if (isNaN(newX) || isNaN(newY))
39
+ throw new Error(`Cannot rotate coordinates by ${angle}`);
40
+ return {x: newX, y: newY};
41
+ });
42
+ }
43
+ }
44
+
@@ -0,0 +1,26 @@
1
+ import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
2
+ import {MolfileBonds} from './mol-bonds';
3
+
4
+ export class MolfileBondsV2K extends MolfileBonds {
5
+ constructor(molfileHandler: MolfileHandlerBase) {
6
+ super();
7
+ this.rawBondLines = molfileHandler.getBondLines();
8
+ this.bondedAtomPairs = this.rawBondLines.map((line: string) => {
9
+ const firstAtom = parseInt(line.substring(0, 3));
10
+ const secondAtom = parseInt(line.substring(3, 6));
11
+ return [firstAtom, secondAtom];
12
+ });
13
+ }
14
+
15
+ /** Get bond lines with new values for bonded atoms */
16
+ getBondLines(): string[] {
17
+ return this.bondedAtomPairs.map((bondedPair, idx) => {
18
+ if (bondedPair.some((atom) => atom === -1))
19
+ throw new Error(`Bonded pair ${bondedPair} contains -1`);
20
+ return `${bondedPair[0].toString().padStart(3, ' ')}${
21
+ bondedPair[1].toString().padStart(3, ' ')
22
+ }${this.rawBondLines[idx].substring(6)}`;
23
+ });
24
+ }
25
+ }
26
+
@@ -0,0 +1,30 @@
1
+ import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
2
+ import {MolfileBonds} from './mol-bonds';
3
+
4
+ export class MolfileBondsV3K extends MolfileBonds {
5
+ constructor(private molfileHandler: MolfileHandlerBase) {
6
+ super();
7
+ this.rawBondLines = molfileHandler.getBondLines();
8
+ this.bondedAtomPairs = this.getBondedAtomPairs();
9
+ }
10
+
11
+ private getBondedAtomPairs(): number[][] {
12
+ const bondedAtoms = this.molfileHandler.pairsOfBondedAtoms;
13
+ return bondedAtoms.map((pair) => [pair[0], pair[1]]);
14
+ }
15
+
16
+ /** Get bond lines with new values for bonded atoms */
17
+ getBondLines(): string[] {
18
+ // todo: optimize
19
+ const regex = /^(M\s+V30\s+\d+\s+\d+\s+)(\d+)(\s+)(\d+)(.*)$/;
20
+ return this.bondedAtomPairs.map((bondedPair, idx) => {
21
+ if (bondedPair.some((atom) => atom === -1))
22
+ throw new Error(`Bonded pair ${bondedPair} contains -1`);
23
+ const result = this.rawBondLines[idx].replace(regex, (match, p1, p2, p3, p4, p5) => {
24
+ return `${p1}${bondedPair[0]}${p3}${bondedPair[1]}${p5}`;
25
+ });
26
+ return result;
27
+ });
28
+ }
29
+ }
30
+