@datagrok/bio 2.25.10 → 2.25.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/detectors.js +1 -0
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/284.js.map +1 -1
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +3 -4
- package/src/analysis/sequence-similarity-viewer.ts +4 -4
- package/src/tests/to-atomic-level-tests.ts +121 -1
- package/src/utils/biln.ts +10 -2
- package/src/utils/convert.ts +62 -16
- package/src/utils/helm-to-molfile/converter/mol-atoms.ts +5 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +5 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +4 -2
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +24 -10
- package/src/utils/helm-to-molfile/converter/r-group-handler.ts +100 -7
- package/src/utils/helm-to-molfile/converter/types.ts +12 -0
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +108 -11
- package/test-console-output-1.log +456 -417
- package/test-record-1.mp4 +0 -0
- package/webpack.config.js +22 -1
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.25.
|
|
8
|
+
"version": "2.25.12",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,10 +44,10 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.61.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.61.6",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
|
-
"@datagrok-libraries/ml": "^6.10.
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.9",
|
|
51
51
|
"@datagrok-libraries/test": "^1.1.0",
|
|
52
52
|
"@datagrok-libraries/tutorials": "^1.7.4",
|
|
53
53
|
"@datagrok-libraries/utils": "^4.6.9",
|
|
@@ -77,7 +77,6 @@
|
|
|
77
77
|
"@types/wu": "^2.1.44",
|
|
78
78
|
"@typescript-eslint/eslint-plugin": "^8.8.1",
|
|
79
79
|
"@typescript-eslint/parser": "^8.8.1",
|
|
80
|
-
"datagrok-tools": "^5.1.5",
|
|
81
80
|
"eslint": "^8.57.1",
|
|
82
81
|
"eslint-config-google": "^0.14.0",
|
|
83
82
|
"eslint-plugin-rxjs": "^5.0.3",
|
|
@@ -62,7 +62,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
62
62
|
if (!this.beforeRender())
|
|
63
63
|
return;
|
|
64
64
|
if (this.targetColumn) {
|
|
65
|
-
this.curIdx = this.dataFrame
|
|
65
|
+
this.curIdx = (this.dataFrame?.currentRowIdx ?? -1) == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
66
66
|
|
|
67
67
|
// Force recomputation if parameters changed
|
|
68
68
|
const parametersChanged =
|
|
@@ -72,7 +72,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
72
72
|
this.lastGapExtend !== this.gapExtend;
|
|
73
73
|
|
|
74
74
|
if ((computeData && !this.gridSelect) || parametersChanged) {
|
|
75
|
-
this.targetMoleculeIdx = (this.dataFrame
|
|
75
|
+
this.targetMoleculeIdx = (this.dataFrame?.currentRowIdx ?? -1) < 0 ? 0 : this.dataFrame!.currentRowIdx; await this.computeByMM();
|
|
76
76
|
const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
|
|
77
77
|
`similar (${this.targetColumn})`;
|
|
78
78
|
this.molCol = DG.Column.string(similarColumnName,
|
|
@@ -87,9 +87,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
87
87
|
let prevTimer: any = null;
|
|
88
88
|
const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
|
|
89
89
|
prevTimer && clearTimeout(prevTimer);
|
|
90
|
-
if ((resDf
|
|
90
|
+
if ((resDf?.currentRowIdx ?? -1) < 0)
|
|
91
91
|
return;
|
|
92
|
-
this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
|
|
92
|
+
this.dataFrame && (this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx));
|
|
93
93
|
prevTimer = setTimeout(() => { this.createPropertyPanel(resDf); }, 300);
|
|
94
94
|
this.gridSelect = true;
|
|
95
95
|
});
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
1
2
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
3
|
import * as grok from 'datagrok-api/grok';
|
|
3
4
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -8,7 +9,7 @@ import wu from 'wu';
|
|
|
8
9
|
import {before, after, category, test, expectArray, expect} from '@datagrok-libraries/test/src/test';
|
|
9
10
|
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
10
11
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
11
|
-
import {IMonomerLib} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
12
|
+
import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
12
13
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
13
14
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
14
15
|
import {
|
|
@@ -23,6 +24,80 @@ import {_package} from '../package-test';
|
|
|
23
24
|
const appPath = 'System:AppData/Bio';
|
|
24
25
|
const fileSource = new DG.FileSource(appPath);
|
|
25
26
|
|
|
27
|
+
const complexMonomerAllylRgroup: Monomer = {
|
|
28
|
+
'symbol': 'allyl_mon',
|
|
29
|
+
'name': 'monomer with Allyl R group',
|
|
30
|
+
'molfile': '\n RDKit 2D\n\n 9 8 0 0 0 0 0 0 0 0999 V2000\n 1.4434 -2.1667 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4434 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.1443 0.0833 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.1547 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.4537 0.0833 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n -3.7528 -0.6667 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 0.1443 1.5833 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4434 2.3333 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 2.7424 0.0833 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0\n 2 3 1 0\n 3 4 1 6\n 4 5 1 0\n 5 6 1 0\n 3 7 1 0\n 7 8 1 0\n 2 9 1 0\nM RGP 3 6 3 8 1 9 2\nM END\n',
|
|
31
|
+
'smiles': 'O=C([C@H](CS[*:3])N[*:1])[*:2]',
|
|
32
|
+
'polymerType': 'PEPTIDE',
|
|
33
|
+
'monomerType': 'Backbone',
|
|
34
|
+
'naturalAnalog': 'C',
|
|
35
|
+
'id': 16,
|
|
36
|
+
'rgroups': [
|
|
37
|
+
{
|
|
38
|
+
'alternateId': 'R1-H',
|
|
39
|
+
'capGroupName': 'H',
|
|
40
|
+
'capGroupSmiles': '[H][*:1]',
|
|
41
|
+
'label': 'R1'
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
'alternateId': 'R2-OH',
|
|
45
|
+
'capGroupName': 'OH',
|
|
46
|
+
'capGroupSmiles': 'O[*:2]',
|
|
47
|
+
'label': 'R2'
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
'alternateId': 'R3-Allyl',
|
|
51
|
+
'capGroupName': 'Allyl',
|
|
52
|
+
'capGroupSmiles': 'C=C[*:3]',
|
|
53
|
+
'label': 'R3'
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
'author': 'Admin',
|
|
57
|
+
'createDate': '2026-02-18T14:48:41.723Z',
|
|
58
|
+
'meta': {}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const complexMonomerWithComplexRgroup: Monomer = {
|
|
62
|
+
'symbol': 'SomeComplex',
|
|
63
|
+
'name': 'Some complex monomer with complex R group',
|
|
64
|
+
'molfile': '\n RDKit 2D\n\n 10 9 0 0 0 0 0 0 0 0999 V2000\n -1.4289 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.7280 0.3750 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n -4.0270 -0.3750 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -0.1299 0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.1299 1.8750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1691 2.6250 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1691 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1691 -1.8750 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2.4682 -2.6250 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 2.4682 0.3750 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 1 0\n 4 1 1 6\n 4 5 1 0\n 5 6 1 0\n 4 7 1 0\n 7 8 1 0\n 8 9 1 0\n 7 10 1 0\nM RGP 4 3 3 6 1 9 4 10 2\nM END\n',
|
|
65
|
+
'smiles': '[*:4]OC([C@H](CS[*:3])N[*:1])[*:2]',
|
|
66
|
+
'polymerType': 'PEPTIDE',
|
|
67
|
+
'monomerType': 'Backbone',
|
|
68
|
+
'naturalAnalog': 'C',
|
|
69
|
+
'id': 16,
|
|
70
|
+
'rgroups': [
|
|
71
|
+
{
|
|
72
|
+
'alternateId': 'R1-H',
|
|
73
|
+
'capGroupName': 'H',
|
|
74
|
+
'capGroupSmiles': '[H][*:1]',
|
|
75
|
+
'label': 'R1'
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
'alternateId': 'R2-OH',
|
|
79
|
+
'capGroupName': 'OH',
|
|
80
|
+
'capGroupSmiles': 'O[*:2]',
|
|
81
|
+
'label': 'R2'
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
'alternateId': 'R3-Something',
|
|
85
|
+
'capGroupName': 'Something',
|
|
86
|
+
'capGroupSmiles': 'C=CC([*:3])=C',
|
|
87
|
+
'label': 'R3'
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
'alternateId': 'R4-SomethingElse',
|
|
91
|
+
'capGroupName': 'SomethingElse',
|
|
92
|
+
'capGroupSmiles': 'ClCCCC=CC([*:4])=CCC',
|
|
93
|
+
'label': 'R4'
|
|
94
|
+
}
|
|
95
|
+
],
|
|
96
|
+
'author': 'Admin',
|
|
97
|
+
'createDate': '2026-02-18T14:48:41.723Z',
|
|
98
|
+
'meta': {}
|
|
99
|
+
};
|
|
100
|
+
|
|
26
101
|
const enum Tests {
|
|
27
102
|
PT = 'peptides-fasta',
|
|
28
103
|
DNA = 'dna-fasta',
|
|
@@ -225,6 +300,51 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
225
300
|
expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
|
|
226
301
|
});
|
|
227
302
|
|
|
303
|
+
async function _testToAtomicLevelWithCustomMonomer(srcHelm: string, expectedSmiles: string): Promise<void> {
|
|
304
|
+
let error: any = null;
|
|
305
|
+
// first, patch the monomer library with a custom monomers
|
|
306
|
+
const monomerLib = monomerLibHelper.getMonomerLib();
|
|
307
|
+
// @ts-ignore
|
|
308
|
+
monomerLib._monomers['PEPTIDE'][complexMonomerAllylRgroup.symbol] = complexMonomerAllylRgroup;
|
|
309
|
+
// @ts-ignore
|
|
310
|
+
monomerLib._monomers['PEPTIDE'][complexMonomerWithComplexRgroup.symbol] = complexMonomerWithComplexRgroup;
|
|
311
|
+
|
|
312
|
+
try {
|
|
313
|
+
const converter = await seqHelper.getHelmToMolfileConverter(monomerLib);
|
|
314
|
+
const resMolFile = seqHelper.helmToAtomicLevelSingle(srcHelm, converter, true, true);
|
|
315
|
+
const resSmiles = grok.chem.convert(resMolFile.molfile, grok.chem.Notation.Unknown, grok.chem.Notation.Smiles);
|
|
316
|
+
expect(resSmiles, expectedSmiles);
|
|
317
|
+
} catch (err) {
|
|
318
|
+
error = err;
|
|
319
|
+
}
|
|
320
|
+
// restore the monomer library to avoid affecting other tests
|
|
321
|
+
// @ts-ignore
|
|
322
|
+
delete monomerLib._monomers['PEPTIDE'][complexMonomerAllylRgroup.symbol];
|
|
323
|
+
// @ts-ignore
|
|
324
|
+
delete monomerLib._monomers['PEPTIDE'][complexMonomerWithComplexRgroup.symbol];
|
|
325
|
+
|
|
326
|
+
if (error)
|
|
327
|
+
throw error;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
test('SingleHelmMonomerWithAllylGroups', async () => {
|
|
331
|
+
const srcHelm = `PEPTIDE1{[${complexMonomerAllylRgroup.symbol}]}$$$$V2.0`;
|
|
332
|
+
const expectedSmiles = 'C=CSC[C@H](N)C(=O)O';
|
|
333
|
+
await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
test('SingleHelmMonomerWithComplexRGroups', async () => {
|
|
337
|
+
const srcHelm = `PEPTIDE1{[${complexMonomerWithComplexRgroup.symbol}]}$$$$V2.0`;
|
|
338
|
+
const expectedSmiles = 'C=CC(=C)SC[C@H](N)C(O)OC(C=CCCCCl)=CCC';
|
|
339
|
+
await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
test('HelmPolymerWithComplexRGroups', async () => {
|
|
343
|
+
const srcHelm = `PEPTIDE1{[dI].[Trp_Ome].[Asp_OMe].[D-Cit].[meG].[Phe_4NH2].[Phe_34diCl].[meY].[Pro_4Me3OH].[Met_O].[NMe2Abz].[Tyr_Ph4OH].[3Pal].[xiIle].[Tyr_35diI].[Ala_tBu]}|PEPTIDE2{[${complexMonomerAllylRgroup.symbol}].[${complexMonomerWithComplexRgroup.symbol}]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R3-1:R1$$$V2.0`;
|
|
344
|
+
const expectedSmiles = 'C=CSC[C@H](NCC[C@@H](C)[C@H]1NC(=O)[C@H](C(C)(C)C)NC(=O)[C@H](Cc2cc(I)c(O)c(I)c2)NC(=O)[C@H](C(C)CC)NC(=O)[C@H](Cc2cccnc2)NC(=O)[C@H](Cc2ccc(Oc3ccc(O)cc3)cc2)NC(=O)c2ccccc2N(C)C(=O)[C@H](CCS(C)=O)NC(=O)[C@@H]2C(O)C(C)CN2C(=O)[C@H](Cc2ccc(O)cc2)N(C)C(=O)[C@H](Cc2ccc(Cl)c(Cl)c2)NC(=O)[C@H](Cc2ccc(N)cc2)NC(=O)CN(C)C(=O)[C@@H](CCCNC(N)=O)NC(=O)[C@H](CC(=O)OC)NC(=O)[C@H](Cc2cn(OC)c3ccccc23)NC1=O)C(=O)N[C@@H](CSC(=C)C=C)C(O)OC(C=CCCCCl)=CCC';
|
|
345
|
+
await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
|
|
346
|
+
});
|
|
347
|
+
|
|
228
348
|
async function _testToAtomicLevel(
|
|
229
349
|
df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
|
|
230
350
|
): Promise<DG.Column | null> {
|
package/src/utils/biln.ts
CHANGED
|
@@ -6,7 +6,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
6
6
|
|
|
7
7
|
/* eslint-disable max-len */
|
|
8
8
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
9
|
-
import {INotationProvider, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
9
|
+
import {INotationProvider, NotationProviderBase, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
10
10
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
11
11
|
import {CellRendererBackBase} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
|
|
12
12
|
import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
|
|
@@ -14,16 +14,24 @@ import {monomerToShort, splitterAsBiln} from '@datagrok-libraries/bio/src/utils/
|
|
|
14
14
|
import {_package} from '../package';
|
|
15
15
|
/* eslint-enable max-len */
|
|
16
16
|
|
|
17
|
-
export class BilnNotationProvider implements INotationProvider {
|
|
17
|
+
export class BilnNotationProvider extends NotationProviderBase implements INotationProvider {
|
|
18
18
|
public readonly splitter: SplitterFunc;
|
|
19
19
|
|
|
20
20
|
get defaultGapOriginal(): string { return ''; }
|
|
21
21
|
|
|
22
|
+
static override get notationName(): string { return NOTATION.BILN; }
|
|
23
|
+
|
|
24
|
+
static override get implementsFromHelm(): boolean { return false; }
|
|
25
|
+
|
|
26
|
+
static override convertFromHelm(helm: string, options: any): string {
|
|
27
|
+
throw new Error('Canonical way of converting from helm to biln must be used');
|
|
28
|
+
}
|
|
22
29
|
constructor(
|
|
23
30
|
public readonly separator: string,
|
|
24
31
|
public readonly seqHelper: ISeqHelper,
|
|
25
32
|
public readonly seqCol: DG.Column
|
|
26
33
|
) {
|
|
34
|
+
super();
|
|
27
35
|
this.splitter = splitterAsBiln.bind(this);
|
|
28
36
|
}
|
|
29
37
|
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
1
2
|
import * as DG from 'datagrok-api/dg';
|
|
2
3
|
import * as ui from 'datagrok-api/ui';
|
|
3
4
|
import * as grok from 'datagrok-api/grok';
|
|
@@ -7,6 +8,7 @@ import {Subscription} from 'rxjs';
|
|
|
7
8
|
|
|
8
9
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
10
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
|
+
import {NotationProviderBase} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
let convertDialog: DG.Dialog | null = null;
|
|
@@ -18,7 +20,7 @@ let convertDialogSubs: Subscription[] = [];
|
|
|
18
20
|
* @param {DG.Column<string>} col Column with 'Macromolecule' semantic type
|
|
19
21
|
* @param {ISeqHelper} seqHelper
|
|
20
22
|
*/
|
|
21
|
-
export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelper): void {
|
|
23
|
+
export async function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelper): Promise<void> {
|
|
22
24
|
let srcCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
|
|
23
25
|
if (!srcCol)
|
|
24
26
|
throw new Error('No column with Macromolecule semantic type found');
|
|
@@ -41,6 +43,9 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
41
43
|
NOTATION.HELM,
|
|
42
44
|
NOTATION.BILN
|
|
43
45
|
];
|
|
46
|
+
|
|
47
|
+
const notationProviderConstructors = await NotationProviderBase.getProviderConstructors();
|
|
48
|
+
|
|
44
49
|
const toggleColumn = (newCol: DG.Column) => {
|
|
45
50
|
srcCol = newCol;
|
|
46
51
|
converterSh = seqHelper.getSeqHandler(srcCol);
|
|
@@ -49,6 +54,15 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
49
54
|
separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
|
|
50
55
|
dialogHeader.textContent = 'Current notation: ' + currentNotation;
|
|
51
56
|
filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
57
|
+
if (currentNotation === NOTATION.CUSTOM)
|
|
58
|
+
filteredNotations = [NOTATION.HELM];
|
|
59
|
+
if (currentNotation === NOTATION.HELM) {
|
|
60
|
+
// add custom notations that
|
|
61
|
+
notationProviderConstructors.forEach((c) => {
|
|
62
|
+
if (c.implementsFromHelm)
|
|
63
|
+
filteredNotations.unshift(c.notationName as NOTATION); // hack :)
|
|
64
|
+
});
|
|
65
|
+
}
|
|
52
66
|
targetNotationInput = ui.input.choice('Convert to', {
|
|
53
67
|
value: filteredNotations[0], items: filteredNotations,
|
|
54
68
|
onValueChanged: toggleSeparator
|
|
@@ -72,6 +86,15 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
72
86
|
|
|
73
87
|
const separatorArray = ['-', '.', '/'];
|
|
74
88
|
let filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
89
|
+
if (currentNotation === NOTATION.CUSTOM)
|
|
90
|
+
filteredNotations = [NOTATION.HELM];
|
|
91
|
+
if (currentNotation === NOTATION.HELM) {
|
|
92
|
+
// add custom notations that
|
|
93
|
+
notationProviderConstructors.forEach((c) => {
|
|
94
|
+
if (c.implementsFromHelm)
|
|
95
|
+
filteredNotations.unshift(c.notationName as NOTATION); // hack :)
|
|
96
|
+
});
|
|
97
|
+
}
|
|
75
98
|
|
|
76
99
|
const separatorInput = ui.input.choice('Separator', {value: separatorArray[0], items: separatorArray});
|
|
77
100
|
|
|
@@ -105,8 +128,11 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
105
128
|
.onOK(async () => {
|
|
106
129
|
const targetNotation = targetNotationInput.value as NOTATION;
|
|
107
130
|
const separator: string | undefined = targetNotation === NOTATION.SEPARATOR ? separatorInput.value! : undefined;
|
|
131
|
+
let notationProviderConstructor: typeof NotationProviderBase | undefined = undefined;
|
|
132
|
+
if (!notations.includes(targetNotation) && notationProviderConstructors.find((c) => c.notationName === targetNotation))
|
|
133
|
+
notationProviderConstructor = notationProviderConstructors.find((c) => c.notationName === targetNotation)!;
|
|
108
134
|
|
|
109
|
-
await convertDo(srcCol, seqHelper, targetNotation, separator);
|
|
135
|
+
await convertDo(srcCol, seqHelper, targetNotation, separator, notationProviderConstructor);
|
|
110
136
|
})
|
|
111
137
|
.show({x: 350, y: 100});
|
|
112
138
|
|
|
@@ -123,18 +149,38 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
123
149
|
* @param {NOTATION} targetNotation Target notation
|
|
124
150
|
* @param {string | null} separator Separator for SEPARATOR notation
|
|
125
151
|
*/
|
|
126
|
-
export async function convertDo(srcCol: DG.Column, seqHelper: ISeqHelper, targetNotation: NOTATION, separator?: string): Promise<DG.Column> {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
152
|
+
export async function convertDo(srcCol: DG.Column, seqHelper: ISeqHelper, targetNotation: NOTATION, separator?: string, notationProviderConstructor?: typeof NotationProviderBase): Promise<DG.Column> {
|
|
153
|
+
if (notationProviderConstructor) {
|
|
154
|
+
const newColName = srcCol.dataFrame.columns.getUnusedName(`${notationProviderConstructor.notationName}(${srcCol.name})`);
|
|
155
|
+
const newCol = DG.Column.string(newColName, srcCol.length);
|
|
156
|
+
newCol.init((i) => {
|
|
157
|
+
const seq = srcCol.get(i);// we know for sure (in Macron accent) that it is helm
|
|
158
|
+
try {
|
|
159
|
+
return notationProviderConstructor.convertFromHelm(seq, {});
|
|
160
|
+
} catch (e) {
|
|
161
|
+
console.error(`Error converting sequence at row ${i}: ${e instanceof Error ? e.message : e}`);
|
|
162
|
+
return '';
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
srcCol.dataFrame.columns.add(newCol);
|
|
166
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newCol});
|
|
167
|
+
if (semType)
|
|
168
|
+
newCol.semType = semType;
|
|
169
|
+
await grok.data.detectSemanticTypes(srcCol.dataFrame);
|
|
170
|
+
return newCol;
|
|
171
|
+
} else {
|
|
172
|
+
const converterSh = seqHelper.getSeqHandler(srcCol);
|
|
173
|
+
const newColumn = converterSh.convert(targetNotation, separator);
|
|
174
|
+
srcCol.dataFrame.columns.add(newColumn);
|
|
175
|
+
|
|
176
|
+
// Call detector directly to escape some error on detectSemanticTypes
|
|
177
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
|
|
178
|
+
if (semType)
|
|
179
|
+
newColumn.semType = semType;
|
|
180
|
+
|
|
181
|
+
// call to calculate 'cell.renderer' tag
|
|
182
|
+
await grok.data.detectSemanticTypes(srcCol.dataFrame);
|
|
183
|
+
|
|
184
|
+
return newColumn;
|
|
185
|
+
}
|
|
140
186
|
}
|
|
@@ -16,6 +16,11 @@ export abstract class MolfileAtoms {
|
|
|
16
16
|
this.rawAtomLines[atomIdx] = this.rawAtomLines[atomIdx].replace(R_GROUP_ELEMENT_SYMBOL, newElementSymbol);
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
appendAtomLine(rawLine: string, x: number, y: number): void {
|
|
20
|
+
this.rawAtomLines.push(rawLine);
|
|
21
|
+
this.coordinates.push({x, y});
|
|
22
|
+
}
|
|
23
|
+
|
|
19
24
|
deleteAtoms(indices: number[]): void {
|
|
20
25
|
this.coordinates = this.coordinates.filter((_, idx) => !indices.includes(idx));
|
|
21
26
|
this.rawAtomLines = this.rawAtomLines.filter((_, idx) => !indices.includes(idx));
|
|
@@ -13,6 +13,11 @@ export abstract class MolfileBonds {
|
|
|
13
13
|
return this.bondedAtomPairs;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
appendBondLine(rawLine: string, bondedPair: number[]): void {
|
|
17
|
+
this.rawBondLines.push(rawLine);
|
|
18
|
+
this.bondedAtomPairs.push(bondedPair);
|
|
19
|
+
}
|
|
20
|
+
|
|
16
21
|
deleteBondLines(indices: number[]): void {
|
|
17
22
|
this.rawBondLines = this.rawBondLines.filter((_, idx) => !indices.includes(idx));
|
|
18
23
|
this.bondedAtomPairs = this.bondedAtomPairs.filter((_, idx) => !indices.includes(idx));
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
1
2
|
import {MolfileAtoms} from './mol-atoms';
|
|
2
3
|
import {MolfileBonds} from './mol-bonds';
|
|
3
4
|
import {RGroupHandler} from './r-group-handler';
|
|
5
|
+
import {CapGroupInfo} from './types';
|
|
4
6
|
|
|
5
7
|
export abstract class MolfileWrapper {
|
|
6
8
|
constructor(protected monomerSymbol: string) { }
|
|
@@ -76,8 +78,8 @@ export abstract class MolfileWrapper {
|
|
|
76
78
|
this.bonds.shift(shift);
|
|
77
79
|
}
|
|
78
80
|
|
|
79
|
-
capRGroups(
|
|
80
|
-
this.rGroups.capRGroups(
|
|
81
|
+
capRGroups(capGroupInfo: CapGroupInfo[], rdKitModule: RDModule): void {
|
|
82
|
+
this.rGroups.capRGroups(capGroupInfo, rdKitModule);
|
|
81
83
|
}
|
|
82
84
|
}
|
|
83
85
|
|
|
@@ -6,17 +6,23 @@ import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/mo
|
|
|
6
6
|
import {Helm} from './helm';
|
|
7
7
|
import {MolfileWrapper} from './mol-wrapper';
|
|
8
8
|
import {MolfileWrapperFactory} from './mol-wrapper-factory';
|
|
9
|
+
import {CapGroupInfo} from './types';
|
|
10
|
+
|
|
11
|
+
/** Returns true if the string is a valid single element symbol (e.g. 'H', 'O', 'C', 'Cl') */
|
|
12
|
+
function isSimpleElement(s: string): boolean {
|
|
13
|
+
return /^[A-Z][a-z]?$/.test(s);
|
|
14
|
+
}
|
|
9
15
|
|
|
10
16
|
export class MonomerWrapper {
|
|
11
17
|
private readonly molfileWrapper: MolfileWrapper;
|
|
12
|
-
private
|
|
18
|
+
private capGroupInfo: CapGroupInfo[] = [];
|
|
13
19
|
private static molfileV2KToV3KCache: Map<string, string> = new Map();
|
|
14
20
|
constructor(
|
|
15
21
|
public readonly monomerSymbol: string,
|
|
16
22
|
public readonly monomerIdx: number,
|
|
17
23
|
private helm: Helm,
|
|
18
24
|
shift: { x: number, y: number },
|
|
19
|
-
rdKitModule: RDModule,
|
|
25
|
+
private readonly rdKitModule: RDModule,
|
|
20
26
|
private readonly monomerLib: IMonomerLibBase
|
|
21
27
|
) {
|
|
22
28
|
const libraryMonomerObject = this.getLibraryMonomerObject();
|
|
@@ -26,7 +32,7 @@ export class MonomerWrapper {
|
|
|
26
32
|
molfile = this.convertMolfileToV3KFormat(molfile, monomerSymbol, rdKitModule);
|
|
27
33
|
|
|
28
34
|
this.molfileWrapper = MolfileWrapperFactory.getInstance(molfile, monomerSymbol);
|
|
29
|
-
this.
|
|
35
|
+
this.capGroupInfo = this.getCapGroupInfo(libraryMonomerObject);
|
|
30
36
|
|
|
31
37
|
this.removeRGroups(helm.bondedRGroupsMap[monomerIdx]!);
|
|
32
38
|
this.capRemainingRGroups();
|
|
@@ -63,20 +69,28 @@ export class MonomerWrapper {
|
|
|
63
69
|
return monomer;
|
|
64
70
|
}
|
|
65
71
|
|
|
66
|
-
private
|
|
72
|
+
private getCapGroupInfo(
|
|
67
73
|
libraryMonomerObject: Monomer
|
|
68
|
-
):
|
|
74
|
+
): CapGroupInfo[] {
|
|
69
75
|
const rgroups = libraryMonomerObject.rgroups;
|
|
70
|
-
|
|
76
|
+
return rgroups.map((rgroup, ind) => {
|
|
71
77
|
const smiles = rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES] ||
|
|
72
78
|
// WARNING: ignore because both key variants coexist in HELM Core Library!
|
|
73
79
|
// @ts-ignore
|
|
74
80
|
rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE];
|
|
81
|
+
let rgroupId = rgroup[HELM_RGROUP_FIELDS.LABEL][1];
|
|
82
|
+
if (!rgroupId || !parseInt(rgroupId) || isNaN(parseInt(rgroupId))) {
|
|
83
|
+
// try to parse it from smiles, which can look like '[H][*:1]', 'O[*:2]', 'C=C[*:3]'
|
|
84
|
+
const match = smiles?.match(/\[\*:(\d)\]/);
|
|
85
|
+
if (match && match[1])
|
|
86
|
+
rgroupId = match[1];
|
|
87
|
+
}
|
|
88
|
+
if (!rgroupId || !parseInt(rgroupId) || isNaN(parseInt(rgroupId)))
|
|
89
|
+
rgroupId = `${ind + 1}`; // fallback to index-based id, starting from 1
|
|
75
90
|
// extract the element symbol
|
|
76
|
-
|
|
91
|
+
const element = smiles.replace(/(\[|\]|\*|:|\d)/g, '');
|
|
92
|
+
return {element, smiles, isSimple: isSimpleElement(element), rGroupId: parseInt(rgroupId)};
|
|
77
93
|
});
|
|
78
|
-
|
|
79
|
-
return result;
|
|
80
94
|
}
|
|
81
95
|
|
|
82
96
|
private shiftCoordinates(shift: { x: number, y: number }): void {
|
|
@@ -96,7 +110,7 @@ export class MonomerWrapper {
|
|
|
96
110
|
}
|
|
97
111
|
|
|
98
112
|
private capRemainingRGroups(): void {
|
|
99
|
-
this.molfileWrapper.capRGroups(this.
|
|
113
|
+
this.molfileWrapper.capRGroups(this.capGroupInfo, this.rdKitModule);
|
|
100
114
|
}
|
|
101
115
|
|
|
102
116
|
replaceRGroupWithAttachmentAtom(rGroupId: number, attachmentAtomIdx: number): void {
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
|
|
2
|
+
import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
|
|
3
|
+
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
2
4
|
import {HYDROGEN_SYMBOL} from './const';
|
|
3
5
|
import {MolfileAtoms} from './mol-atoms';
|
|
4
6
|
import {MolfileBonds} from './mol-bonds';
|
|
5
|
-
import {PositionInBonds} from './types';
|
|
7
|
+
import {CapGroupInfo, PositionInBonds} from './types';
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
export class RGroupHandler {
|
|
@@ -108,15 +110,106 @@ export class RGroupHandler {
|
|
|
108
110
|
}
|
|
109
111
|
|
|
110
112
|
/** WARNING: capping RGroups and deletion of the bonded ones don't commute */
|
|
111
|
-
capRGroups(
|
|
113
|
+
capRGroups(capGroupInfo: CapGroupInfo[], rdKitModule: RDModule): void {
|
|
112
114
|
this.rGroupIdToAtomicIndexMap.forEach((atomicIdx, rGroupId) => {
|
|
113
|
-
const
|
|
114
|
-
if (
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
const info = capGroupInfo.find((info) => info.rGroupId === rGroupId) ?? capGroupInfo[rGroupId - 1];
|
|
116
|
+
if (info.isSimple) {
|
|
117
|
+
if (info.element === HYDROGEN_SYMBOL) {
|
|
118
|
+
this.removeRGroups([rGroupId]);
|
|
119
|
+
this.deleteBondLineWithSpecifiedRGroup(rGroupId);
|
|
120
|
+
} else
|
|
121
|
+
this.atoms.replaceRGroupSymbolByElement(atomicIdx, info.element);
|
|
117
122
|
} else
|
|
118
|
-
this.
|
|
123
|
+
this.capWithComplexGroup(atomicIdx, info.smiles, rdKitModule);
|
|
119
124
|
});
|
|
120
125
|
}
|
|
126
|
+
|
|
127
|
+
/** Cap an R-group with a multi-atom cap group by parsing the cap SMILES,
|
|
128
|
+
* then inserting its atoms and bonds into the monomer molfile */
|
|
129
|
+
private capWithComplexGroup(
|
|
130
|
+
rGroupAtomicIdx: number, capSmiles: string, rdKitModule: RDModule
|
|
131
|
+
): void {
|
|
132
|
+
// Replace [*:N] with placeholder element Xe so RDKit can parse the SMILES
|
|
133
|
+
const PLACEHOLDER = 'Xe';
|
|
134
|
+
const parsableSmiles = capSmiles.replace(/\[\*:\d+\]/g, `[${PLACEHOLDER}]`);
|
|
135
|
+
const capMol = rdKitModule.get_mol(parsableSmiles);
|
|
136
|
+
if (!capMol)
|
|
137
|
+
throw new Error(`Cannot parse cap group SMILES: ${capSmiles}`);
|
|
138
|
+
|
|
139
|
+
let capMolfile: string;
|
|
140
|
+
try {
|
|
141
|
+
capMolfile = capMol.get_v3Kmolblock();
|
|
142
|
+
} finally {
|
|
143
|
+
capMol.delete();
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const capHandler = MolfileHandler.getInstance(capMolfile);
|
|
147
|
+
const capAtomLines = capHandler.getAtomLines();
|
|
148
|
+
const capBondPairs = capHandler.pairsOfBondedAtoms;
|
|
149
|
+
const capBondLines = capHandler.getBondLines();
|
|
150
|
+
const capX = capHandler.x;
|
|
151
|
+
const capY = capHandler.y;
|
|
152
|
+
const capAtomTypes = capHandler.atomTypes;
|
|
153
|
+
|
|
154
|
+
// Find the placeholder atom (was the [*:N] attachment point)
|
|
155
|
+
let dummyCapIdx = -1; // 0-based
|
|
156
|
+
for (let i = 0; i < capAtomTypes.length; i++) {
|
|
157
|
+
if (capAtomTypes[i] === PLACEHOLDER) {
|
|
158
|
+
dummyCapIdx = i;
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (dummyCapIdx === -1)
|
|
163
|
+
throw new Error(`Cannot find placeholder atom in cap group SMILES: ${capSmiles}`);
|
|
164
|
+
|
|
165
|
+
// Find the attachment atom (bonded to placeholder) and the bond connecting them
|
|
166
|
+
let attachmentCapIdx = -1; // 0-based
|
|
167
|
+
for (let i = 0; i < capBondPairs.length; i++) {
|
|
168
|
+
const [a1, a2] = capBondPairs[i]; // 1-based
|
|
169
|
+
if (a1 === dummyCapIdx + 1) {
|
|
170
|
+
attachmentCapIdx = a2 - 1;
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
if (a2 === dummyCapIdx + 1) {
|
|
174
|
+
attachmentCapIdx = a1 - 1;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (attachmentCapIdx === -1)
|
|
179
|
+
throw new Error(`Cannot find attachment atom in cap group SMILES: ${capSmiles}`);
|
|
180
|
+
|
|
181
|
+
// Compute coordinate translation: place cap attachment at R-group position
|
|
182
|
+
const rGroupCoords = this.atoms.atomCoordinates[rGroupAtomicIdx];
|
|
183
|
+
const tx = rGroupCoords.x - capX[attachmentCapIdx];
|
|
184
|
+
const ty = rGroupCoords.y - capY[attachmentCapIdx];
|
|
185
|
+
|
|
186
|
+
// Replace the R# atom symbol with the attachment atom's element
|
|
187
|
+
const attachmentSymbol = capAtomTypes[attachmentCapIdx];
|
|
188
|
+
this.atoms.replaceRGroupSymbolByElement(rGroupAtomicIdx, attachmentSymbol);
|
|
189
|
+
|
|
190
|
+
// Build index mapping: cap 1-based → monomer 1-based
|
|
191
|
+
const capToMonomer = new Map<number, number>();
|
|
192
|
+
capToMonomer.set(attachmentCapIdx + 1, rGroupAtomicIdx + 1);
|
|
193
|
+
|
|
194
|
+
// Append remaining cap atoms (excluding placeholder and attachment)
|
|
195
|
+
let nextMonomerIdx = this.atoms.count + 1; // 1-based
|
|
196
|
+
for (let i = 0; i < capAtomLines.length; i++) {
|
|
197
|
+
if (i === dummyCapIdx || i === attachmentCapIdx) continue;
|
|
198
|
+
const newX = capX[i] + tx;
|
|
199
|
+
const newY = capY[i] + ty;
|
|
200
|
+
this.atoms.appendAtomLine(capAtomLines[i], newX, newY);
|
|
201
|
+
capToMonomer.set(i + 1, nextMonomerIdx);
|
|
202
|
+
nextMonomerIdx++;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Append cap bonds (excluding any bond involving the placeholder)
|
|
206
|
+
for (let i = 0; i < capBondPairs.length; i++) {
|
|
207
|
+
const [a1, a2] = capBondPairs[i]; // 1-based in cap
|
|
208
|
+
if (a1 === dummyCapIdx + 1 || a2 === dummyCapIdx + 1) continue;
|
|
209
|
+
const newA1 = capToMonomer.get(a1)!;
|
|
210
|
+
const newA2 = capToMonomer.get(a2)!;
|
|
211
|
+
this.bonds.appendBondLine(capBondLines[i], [newA1, newA2]);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
121
214
|
}
|
|
122
215
|
|
|
@@ -11,3 +11,15 @@ export type PositionInBonds = {
|
|
|
11
11
|
nodeIdx: number,
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
+
/** Cap group information for an R-group */
|
|
15
|
+
export type CapGroupInfo = {
|
|
16
|
+
/** Extracted element string (e.g. 'H', 'O', 'C=C') */
|
|
17
|
+
element: string,
|
|
18
|
+
/** Raw cap group SMILES (e.g. '[H][*:1]', 'O[*:2]', 'C=C[*:3]') */
|
|
19
|
+
smiles: string,
|
|
20
|
+
/** Whether the cap is a single atom (valid element symbol) */
|
|
21
|
+
isSimple: boolean,
|
|
22
|
+
/** Number of R group, to handle cases where its not sorted */
|
|
23
|
+
rGroupId: number,
|
|
24
|
+
}
|
|
25
|
+
|