@datagrok/bio 2.14.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/css/monomer-manager.css +66 -0
- package/detectors.js +7 -2
- package/dist/111.js +1 -1
- package/dist/111.js.map +1 -1
- package/dist/234.js +1 -1
- package/dist/234.js.map +1 -1
- package/dist/242.js.map +1 -1
- package/dist/603.js +1 -1
- package/dist/603.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/778.js +1 -1
- package/dist/778.js.map +1 -1
- package/dist/793.js +1 -1
- package/dist/793.js.map +1 -1
- package/dist/801.js +2 -0
- package/dist/801.js.map +1 -0
- package/dist/950.js +1 -1
- package/dist/950.js.map +1 -1
- package/dist/980.js +2 -0
- package/dist/980.js.map +1 -0
- package/dist/package-test.js +6 -6
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +5 -5
- package/dist/package.js.map +1 -1
- package/files/monomer-libraries/polytool-lib.json +48 -0
- package/files/monomer-libraries/sample-lib-Aca-colored.json +2 -2
- package/package.json +20 -12
- package/src/analysis/sequence-space.ts +2 -1
- package/src/demo/bio05-helm-msa-sequence-space.ts +1 -1
- package/src/package-test.ts +3 -1
- package/src/package-types.ts +9 -1
- package/src/package.ts +77 -33
- package/src/seq_align.ts +1 -1
- package/src/substructure-search/substructure-search.ts +2 -2
- package/src/tests/WebLogo-project-tests.ts +3 -4
- package/src/tests/activity-cliffs-tests.ts +5 -18
- package/src/tests/detectors-benchmark-tests.ts +24 -9
- package/src/tests/mm-distance-tests.ts +4 -3
- package/src/tests/monomer-libraries-tests.ts +3 -3
- package/src/tests/seq-handler-get-helm-tests.ts +88 -0
- package/src/tests/sequence-space-test.ts +4 -3
- package/src/tests/to-atomic-level-tests.ts +2 -0
- package/src/tests/to-atomic-level-ui-tests.ts +74 -0
- package/src/utils/cell-renderer.ts +3 -0
- package/src/utils/convert.ts +2 -2
- package/src/utils/cyclized.ts +20 -1
- package/src/utils/dimerized.ts +12 -0
- package/src/utils/get-region-func-editor.ts +1 -1
- package/src/utils/helm-to-molfile/converter/converter.ts +58 -30
- package/src/utils/helm-to-molfile/converter/mol-atoms.ts +2 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +2 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +5 -1
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +7 -3
- package/src/utils/helm-to-molfile/converter/polymer.ts +21 -6
- package/src/utils/helm-to-molfile/converter/types.ts +11 -0
- package/src/utils/helm-to-molfile/utils.ts +11 -15
- package/src/utils/monomer-lib/lib-manager.ts +15 -1
- package/src/utils/monomer-lib/library-file-manager/file-manager.ts +1 -1
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +8 -0
- package/src/utils/monomer-lib/library-file-manager/ui.ts +150 -3
- package/src/utils/monomer-lib/monomer-lib.ts +59 -21
- package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts +155 -0
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +924 -0
- package/src/utils/multiple-sequence-alignment-ui.ts +3 -3
- package/src/utils/seq-helper/index.ts +1 -0
- package/src/utils/seq-helper/seq-helper.ts +131 -0
- package/src/utils/sequence-to-mol.ts +47 -18
- package/src/widgets/bio-substructure-filter.ts +9 -7
- package/src/widgets/package-settings-editor-widget.ts +6 -6
- package/src/widgets/representations.ts +12 -12
- package/dist/449.js +0 -2
- package/dist/449.js.map +0 -1
- /package/src/tests/{seq-handler-get-region.ts → seq-handler-get-region-tests.ts} +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {after, before, category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
import {NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
9
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
10
|
+
import {getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
11
|
+
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
12
|
+
|
|
13
|
+
category('SeqHandler: getHelm', () => {
|
|
14
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
15
|
+
let userLibSettings: UserLibSettings; // backup
|
|
16
|
+
|
|
17
|
+
before(async () => {
|
|
18
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
19
|
+
userLibSettings = await getUserLibSettings();
|
|
20
|
+
|
|
21
|
+
// Test 'helm' requires default monomer library loaded
|
|
22
|
+
await setUserLibSettingsForTests();
|
|
23
|
+
await monomerLibHelper.loadMonomerLib(true); // load default libraries
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
after(async () => {
|
|
27
|
+
await setUserLibSettings(userLibSettings);
|
|
28
|
+
await monomerLibHelper.loadMonomerLib(true); // load user settings libraries
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
const tests: {
|
|
33
|
+
[testName: string]: {
|
|
34
|
+
src: {
|
|
35
|
+
seq: string, notation: NOTATION, separator?: string
|
|
36
|
+
}, tgt: { helm: string }
|
|
37
|
+
}
|
|
38
|
+
} = {
|
|
39
|
+
'fasta': {
|
|
40
|
+
src: {seq: 'MDYKETMDYKET', notation: NOTATION.FASTA},
|
|
41
|
+
tgt: {helm: 'PEPTIDE1{M.D.Y.K.E.T.M.D.Y.K.E.T}$$$$'},
|
|
42
|
+
},
|
|
43
|
+
'separator': {
|
|
44
|
+
src: {seq: 'M-D-Y-K-E-T-M-D-Y-K-E-T', notation: NOTATION.SEPARATOR, separator: '-'},
|
|
45
|
+
tgt: {helm: 'PEPTIDE1{M.D.Y.K.E.T.M.D.Y.K.E.T}$$$$'},
|
|
46
|
+
},
|
|
47
|
+
'helm': {
|
|
48
|
+
src: {seq: 'PEPTIDE1{M.D.Y.K.E.T}$$$$', notation: NOTATION.HELM},
|
|
49
|
+
tgt: {helm: 'PEPTIDE1{M.D.Y.K.E.T}$$$$'},
|
|
50
|
+
},
|
|
51
|
+
'helm-cyclic': {
|
|
52
|
+
src: {seq: 'PEPTIDE1{M.D.Y.K.E.T}$PEPTIDE1,PEPTIDE1,6:R2-1:R1$$$V2.0', notation: NOTATION.HELM},
|
|
53
|
+
tgt: {helm: 'PEPTIDE1{M.D.Y.K.E.T}$PEPTIDE1,PEPTIDE1,6:R2-1:R1$$$V2.0'}
|
|
54
|
+
},
|
|
55
|
+
|
|
56
|
+
// TODO: Add tests for cyclized and dimerized
|
|
57
|
+
// 'separator-cyclized': {
|
|
58
|
+
// src: {seq: 'R-F-C(1)-T-G-H-F-Y-P-C(1)-meI', notation: NOTATION.SEPARATOR, separator: '-',},
|
|
59
|
+
// tgt: {helm: 'PEPTIDE1{[R].[F].[C].[T].[G].[H].[F].[Y].[P].[C].[meI]}$PEPTIDE1,PEPTIDE1,3:R3-10:R3$$$'}
|
|
60
|
+
// },
|
|
61
|
+
// 'separator-dimerized': {
|
|
62
|
+
// src: {seq: '(#2)C-{R-F-C(2)-T-G-H-F-Y-P-C(2)-Mei}', notation: NOTATION.SEPARATOR, separator: '-',},
|
|
63
|
+
// tgt: {helm: ''}
|
|
64
|
+
// },
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
for (const [testName, testData] of Object.entries(tests)) {
|
|
68
|
+
test(testName, async () => {
|
|
69
|
+
await _testSeqHandlerGetHelm(testData.src.seq, testData.src.notation, testData.src.separator, testData.tgt.helm);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async function _testSeqHandlerGetHelm(
|
|
74
|
+
srcSeq: string, srcNotation: NOTATION, srcSeparator: string | undefined, tgtHelm: string
|
|
75
|
+
): Promise<void> {
|
|
76
|
+
const seqCol = DG.Column.fromStrings('seq', [srcSeq]);
|
|
77
|
+
const df = DG.DataFrame.fromColumns([seqCol]);
|
|
78
|
+
// seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
79
|
+
// seqCol.setTag(DG.TAGS.UNITS, srcNotation);
|
|
80
|
+
// seqCol.setTag(TAGS.alphabet, 'PT');
|
|
81
|
+
// if (srcSeparator) seqCol.setTag(TAGS.separator, srcSeparator);
|
|
82
|
+
await grok.data.detectSemanticTypes(df);
|
|
83
|
+
|
|
84
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
85
|
+
const resHelm = await sh.getHelm(0, false);
|
|
86
|
+
expect(resHelm, tgtHelm);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
@@ -14,9 +14,10 @@ category('sequenceSpace', async () => {
|
|
|
14
14
|
let testHelmWithEmptyRowsTableView: DG.TableView;
|
|
15
15
|
|
|
16
16
|
test('sequenceSpaceOpens', async () => {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
const testData = !DG.Test.isInBenchmark ?
|
|
18
|
+
{fileName: 'tests/100_3_clustests.csv'} :
|
|
19
|
+
{fileName: 'tests/peptides_motif-with-random_10000.csv'};
|
|
20
|
+
testFastaDf = await readDataframe(testData.fileName);
|
|
20
21
|
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
21
22
|
await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'sequence');
|
|
22
23
|
//grok.shell.closeTable(testFastaDf);
|
|
@@ -18,6 +18,7 @@ import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
|
18
18
|
|
|
19
19
|
import {toAtomicLevel} from '../package';
|
|
20
20
|
import {_package} from '../package-test';
|
|
21
|
+
import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
21
22
|
|
|
22
23
|
const appPath = 'System:AppData/Bio';
|
|
23
24
|
const fileSource = new DG.FileSource(appPath);
|
|
@@ -216,6 +217,7 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
216
217
|
async function _testToAtomicLevel(
|
|
217
218
|
df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
|
|
218
219
|
): Promise<DG.Column | null> {
|
|
220
|
+
const rdKitModule = await getRdKitModule();
|
|
219
221
|
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
220
222
|
const monomerLib: IMonomerLib = monomerLibHelper.getMonomerLib();
|
|
221
223
|
const res = await _toAtomicLevel(df, seqCol, monomerLib);
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
|
|
4
|
+
import {after, before, category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
6
|
+
import {sequenceToMolfile} from '../utils/sequence-to-mol';
|
|
7
|
+
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
8
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
9
|
+
import {getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
10
|
+
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
11
|
+
|
|
12
|
+
import {ConverterFunc} from './types';
|
|
13
|
+
import {_package} from '../package';
|
|
14
|
+
|
|
15
|
+
category('toAtomicLevel-ui', () => {
|
|
16
|
+
|
|
17
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
18
|
+
let userLibSettings: UserLibSettings;
|
|
19
|
+
let helmHelper: IHelmHelper;
|
|
20
|
+
|
|
21
|
+
before(async () => {
|
|
22
|
+
helmHelper = await getHelmHelper(); // init Helm package
|
|
23
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
24
|
+
userLibSettings = await getUserLibSettings();
|
|
25
|
+
|
|
26
|
+
// Test 'helm' requires default monomer library loaded
|
|
27
|
+
await setUserLibSettingsForTests();
|
|
28
|
+
await monomerLibHelper.loadMonomerLib(true); // load default libraries
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
after(async () => {
|
|
32
|
+
// UserDataStorage.put() replaces existing data
|
|
33
|
+
await setUserLibSettings(userLibSettings);
|
|
34
|
+
await monomerLibHelper.loadMonomerLib(true); // load user settings libraries
|
|
35
|
+
});
|
|
36
|
+
const fastaCsv = `seq
|
|
37
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
38
|
+
MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
|
|
39
|
+
MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
|
|
40
|
+
`;
|
|
41
|
+
const helmCsv = `seq
|
|
42
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Aze.dV.E.N.dV.Phe_4Me}$$$$
|
|
43
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
44
|
+
PEPTIDE1{meI.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.D-Cit.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
45
|
+
`;
|
|
46
|
+
|
|
47
|
+
test('toAtomicLevel-fasta-linear', async () => {
|
|
48
|
+
const df = DG.DataFrame.fromCsv(fastaCsv);
|
|
49
|
+
await grok.data.detectSemanticTypes(df);
|
|
50
|
+
const seqCol = df.getCol('seq');
|
|
51
|
+
await _testToAtomicLevelFunc(df, seqCol, false);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('toAtomicLevel-fasta-nonlinear', async () => {
|
|
55
|
+
const df = DG.DataFrame.fromCsv(fastaCsv);
|
|
56
|
+
await grok.data.detectSemanticTypes(df);
|
|
57
|
+
const seqCol = df.getCol('seq');
|
|
58
|
+
await _testToAtomicLevelFunc(df, seqCol, true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('toAtomicLevel-helm', async () => {
|
|
62
|
+
const df = DG.DataFrame.fromCsv(helmCsv);
|
|
63
|
+
await grok.data.detectSemanticTypes(df);
|
|
64
|
+
const seqCol = df.getCol('seq');
|
|
65
|
+
await _testToAtomicLevelFunc(df, seqCol, true);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
async function _testToAtomicLevelFunc(
|
|
69
|
+
table: DG.DataFrame, seqCol: DG.Column<string>, nonlinear: boolean
|
|
70
|
+
): Promise<void> {
|
|
71
|
+
const molCol = await sequenceToMolfile(table, seqCol, nonlinear, monomerLibHelper.getMonomerLib());
|
|
72
|
+
expect(molCol!.semType, DG.SEMTYPE.MOLECULE);
|
|
73
|
+
}
|
|
74
|
+
});
|
package/src/utils/convert.ts
CHANGED
|
@@ -65,7 +65,7 @@ export function convert(col?: DG.Column): void {
|
|
|
65
65
|
};
|
|
66
66
|
|
|
67
67
|
const targetColumnInput = ui.input.column('Column', {table: grok.shell.t, value: srcCol,
|
|
68
|
-
onValueChanged: (
|
|
68
|
+
onValueChanged: (value) => toggleColumn(value)});
|
|
69
69
|
|
|
70
70
|
const separatorArray = ['-', '.', '/'];
|
|
71
71
|
let filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
@@ -85,7 +85,7 @@ export function convert(col?: DG.Column): void {
|
|
|
85
85
|
// set correct visibility on init
|
|
86
86
|
toggleSeparator();
|
|
87
87
|
|
|
88
|
-
targetNotationInput.onChanged(() => {
|
|
88
|
+
targetNotationInput.onChanged.subscribe(() => {
|
|
89
89
|
toggleSeparator();
|
|
90
90
|
});
|
|
91
91
|
|
package/src/utils/cyclized.ts
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
1
5
|
import {GAP_SYMBOL, INotationProvider, ISeqSplitted, SeqSplittedBase, SplitterFunc}
|
|
2
6
|
from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
3
7
|
import {getSplitterWithSeparator, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
@@ -18,7 +22,22 @@ export class CyclizedNotationProvider implements INotationProvider {
|
|
|
18
22
|
private _splitter(seq: string): ISeqSplitted {
|
|
19
23
|
const baseSS: ISeqSplitted = this.separatorSplitter(seq);
|
|
20
24
|
return new CyclizedSeqSplitted(baseSS.originals, GapOriginals[NOTATION.SEPARATOR]);
|
|
21
|
-
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
public async getHelm(seqCol: DG.Column<string>, options?: any): Promise<DG.Column<string>> {
|
|
28
|
+
const polyToolPackageName: string = 'SequenceTranslator';
|
|
29
|
+
|
|
30
|
+
const funcList = DG.Func.find({package: polyToolPackageName, name: 'polyToolConvert2'});
|
|
31
|
+
if (funcList.length == 0)
|
|
32
|
+
throw new Error(`Package '${polyToolPackageName}' must be installed for Cyclized notation provider.`);
|
|
33
|
+
const func = funcList[0];
|
|
34
|
+
|
|
35
|
+
const ptConvertCall = await func.prepare({table: seqCol.dataFrame, seqCol: seqCol, ...options});
|
|
36
|
+
|
|
37
|
+
const editorFunc = DG.Func.find({package: polyToolPackageName, name: 'getPolyToolConvertEditor'})[0];
|
|
38
|
+
const resHelmCol = (await editorFunc.prepare({call: ptConvertCall}).call()).getOutputParamValue() as DG.Column<string>;
|
|
39
|
+
return resHelmCol;
|
|
40
|
+
}
|
|
22
41
|
}
|
|
23
42
|
|
|
24
43
|
/** Gets canonical monomers for original ones with cyclization marks */
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {GAP_SYMBOL, INotationProvider, ISeqSplitted, SeqSplittedBase, SplitterFunc}
|
|
6
|
+
from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
7
|
+
import {CyclizedNotationProvider} from './cyclized';
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
export class DimerizedNotationProvider extends CyclizedNotationProvider {
|
|
11
|
+
|
|
12
|
+
}
|
|
@@ -51,7 +51,7 @@ export class GetRegionFuncEditor {
|
|
|
51
51
|
|
|
52
52
|
this.inputs.name = ui.input.string('Column name', {value: this.getDefaultName(),
|
|
53
53
|
onValueChanged: this.nameInputChanged.bind(this), clearIcon: true});
|
|
54
|
-
this.inputs.name.onInput(this.nameInputInput.bind(this)); // To catch clear event
|
|
54
|
+
this.inputs.name.onInput.subscribe(() => this.nameInputInput.bind(this)); // To catch clear event
|
|
55
55
|
|
|
56
56
|
// tooltips
|
|
57
57
|
for (const paramName in this.call.inputParams) {
|
|
@@ -1,24 +1,30 @@
|
|
|
1
1
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
import * as grok from 'datagrok-api/grok';
|
|
4
3
|
import * as OCL from 'openchemlib/full';
|
|
5
4
|
|
|
6
5
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
7
6
|
import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
8
7
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
|
|
8
|
+
import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
9
|
+
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
9
10
|
|
|
10
11
|
import {Polymer} from './polymer';
|
|
11
12
|
import {GlobalMonomerPositionHandler} from './position-handler';
|
|
13
|
+
import {MolfileWithMap, MonomerMap} from './types';
|
|
14
|
+
|
|
15
|
+
import {_package} from '../../../package';
|
|
12
16
|
|
|
13
|
-
import {_package, getMonomerLibHelper} from '../../../package';
|
|
14
17
|
|
|
15
18
|
export class HelmToMolfileConverter {
|
|
16
|
-
constructor(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
+
constructor(
|
|
20
|
+
private helmColumn: DG.Column<string>,
|
|
21
|
+
private df: DG.DataFrame,
|
|
22
|
+
private libHelper: IMonomerLibHelper,
|
|
23
|
+
private helmHelper: IHelmHelper,
|
|
24
|
+
) { }
|
|
19
25
|
|
|
20
|
-
|
|
21
|
-
const smiles =
|
|
26
|
+
convertToSmiles(rdKitModule: RDModule): DG.Column<string> {
|
|
27
|
+
const smiles = this.getSmilesList(rdKitModule);
|
|
22
28
|
const columnName = this.df.columns.getUnusedName(`smiles(${this.helmColumn.name})`);
|
|
23
29
|
return DG.Column.fromStrings(columnName, smiles.map((molecule) => {
|
|
24
30
|
if (molecule === null)
|
|
@@ -27,13 +33,13 @@ export class HelmToMolfileConverter {
|
|
|
27
33
|
}));
|
|
28
34
|
}
|
|
29
35
|
|
|
30
|
-
private
|
|
31
|
-
const molfilesV2K =
|
|
36
|
+
private getSmilesList(rdKitModule: RDModule): string[] {
|
|
37
|
+
const molfilesV2K = this.convertToMolfileV3KColumn(rdKitModule).toList();
|
|
32
38
|
const smiles = molfilesV2K.map((mol) => DG.chem.convert(mol, DG.chem.Notation.MolBlock, DG.chem.Notation.Smiles));
|
|
33
39
|
return smiles;
|
|
34
40
|
}
|
|
35
41
|
|
|
36
|
-
|
|
42
|
+
public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
|
|
37
43
|
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
38
44
|
if (mol === null)
|
|
39
45
|
return '';
|
|
@@ -54,9 +60,9 @@ export class HelmToMolfileConverter {
|
|
|
54
60
|
return DG.Column.fromStrings(columnName, molv3000Arr);
|
|
55
61
|
}
|
|
56
62
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
const
|
|
63
|
+
// @deprecated Use SeqHelper.helmToAtomicLevel
|
|
64
|
+
convertToRdKitBeautifiedMolfileColumn(chiralityEngine: boolean, rdKitModule: RDModule): DG.Column<string> {
|
|
65
|
+
const molfilesV3K = this.convertToMolfileV3KColumn(rdKitModule).toList();
|
|
60
66
|
const beautifiedMols = molfilesV3K.map((item) => {
|
|
61
67
|
if (item === '')
|
|
62
68
|
return null;
|
|
@@ -71,7 +77,7 @@ export class HelmToMolfileConverter {
|
|
|
71
77
|
const columnName = this.df.columns.getUnusedName(`molfile(${this.helmColumn.name})`);
|
|
72
78
|
|
|
73
79
|
if (chiralityEngine)
|
|
74
|
-
return
|
|
80
|
+
return this.getMolV3000ViaOCL(beautifiedMols, columnName);
|
|
75
81
|
return DG.Column.fromStrings(columnName, beautifiedMols.map((mol) => {
|
|
76
82
|
if (mol === null)
|
|
77
83
|
return '';
|
|
@@ -81,34 +87,56 @@ export class HelmToMolfileConverter {
|
|
|
81
87
|
}));
|
|
82
88
|
}
|
|
83
89
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
const monomerLib: IMonomerLib =
|
|
90
|
+
|
|
91
|
+
public convertToMolfileV3KColumn(rdKitModule: RDModule): DG.Column<string> {
|
|
92
|
+
const polymerGraphColumn: DG.Column<string> = this.getPolymerGraphColumn();
|
|
93
|
+
const monomerLib: IMonomerLib = this.libHelper.getMonomerLib();
|
|
88
94
|
const molfileList = polymerGraphColumn.toList().map(
|
|
89
95
|
(pseudoMolfile: string, idx: number) => {
|
|
90
96
|
const helm = this.helmColumn.get(idx);
|
|
91
|
-
if (!helm)
|
|
92
|
-
|
|
93
|
-
let
|
|
97
|
+
if (!helm) return '';
|
|
98
|
+
|
|
99
|
+
let resMolfileWithMap: MolfileWithMap;
|
|
94
100
|
try {
|
|
95
|
-
|
|
101
|
+
resMolfileWithMap = this.getPolymerMolfile(helm, pseudoMolfile, rdKitModule, monomerLib);
|
|
96
102
|
} catch (err: any) {
|
|
97
103
|
const [errMsg, errStack] = errInfo(err);
|
|
98
104
|
_package.logger.error(errMsg, undefined, errStack);
|
|
99
|
-
|
|
100
|
-
return result;
|
|
105
|
+
resMolfileWithMap = MolfileWithMap.empty();
|
|
101
106
|
}
|
|
107
|
+
return resMolfileWithMap.molfile;
|
|
102
108
|
});
|
|
103
109
|
const molfileColName = this.df.columns.getUnusedName(`molfileV2K(${this.helmColumn.name})`);
|
|
104
110
|
const molfileColumn = DG.Column.fromList('string', molfileColName, molfileList);
|
|
105
111
|
return molfileColumn;
|
|
106
112
|
}
|
|
107
113
|
|
|
108
|
-
|
|
109
|
-
const polymerGraphColumn: DG.Column<string> =
|
|
110
|
-
|
|
111
|
-
|
|
114
|
+
public convertToMolfileV3K(rdKitModule: RDModule): MolfileWithMap[] {
|
|
115
|
+
const polymerGraphColumn: DG.Column<string> = this.getPolymerGraphColumn();
|
|
116
|
+
const monomerLib: IMonomerLib = this.libHelper.getMonomerLib();
|
|
117
|
+
const resList: MolfileWithMap[] = polymerGraphColumn.toList().map(
|
|
118
|
+
(pseudoMolfile: string, idx: number): MolfileWithMap => {
|
|
119
|
+
const helm = this.helmColumn.get(idx);
|
|
120
|
+
if (!helm) return {molfile: '', monomers: []};
|
|
121
|
+
|
|
122
|
+
let resMolfile: MolfileWithMap;
|
|
123
|
+
try {
|
|
124
|
+
resMolfile = this.getPolymerMolfile(helm, pseudoMolfile, rdKitModule, monomerLib);
|
|
125
|
+
} catch (err: any) {
|
|
126
|
+
const [errMsg, errStack] = errInfo(err);
|
|
127
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
128
|
+
resMolfile = MolfileWithMap.empty();
|
|
129
|
+
}
|
|
130
|
+
return resMolfile;
|
|
131
|
+
});
|
|
132
|
+
return resList;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
private getPolymerGraphColumn(): DG.Column<string> {
|
|
136
|
+
const helmStrList = this.helmColumn.toList();
|
|
137
|
+
const molfileList = this.helmHelper.getMolfiles(helmStrList);
|
|
138
|
+
const molfileCol = DG.Column.fromStrings('mols', molfileList);
|
|
139
|
+
return molfileCol;
|
|
112
140
|
}
|
|
113
141
|
|
|
114
142
|
private getPolymerMolfile(
|
|
@@ -116,14 +144,14 @@ export class HelmToMolfileConverter {
|
|
|
116
144
|
polymerGraph: string,
|
|
117
145
|
rdKitModule: RDModule,
|
|
118
146
|
monomerLib: IMonomerLib
|
|
119
|
-
):
|
|
147
|
+
): MolfileWithMap {
|
|
120
148
|
const globalPositionHandler = new GlobalMonomerPositionHandler(polymerGraph);
|
|
121
149
|
const polymer = new Polymer(helm, rdKitModule, monomerLib);
|
|
122
150
|
globalPositionHandler.monomerSymbols.forEach((monomerSymbol: string, monomerIdx: number) => {
|
|
123
151
|
const shift = globalPositionHandler.getMonomerShifts(monomerIdx);
|
|
124
152
|
polymer.addMonomer(monomerSymbol, monomerIdx, shift);
|
|
125
153
|
});
|
|
126
|
-
const polymerMolfile = polymer.compileToMolfile();
|
|
154
|
+
const polymerMolfile: MolfileWithMap = polymer.compileToMolfile();
|
|
127
155
|
return polymerMolfile;
|
|
128
156
|
}
|
|
129
157
|
}
|
|
@@ -4,6 +4,8 @@ export abstract class MolfileAtoms {
|
|
|
4
4
|
protected coordinates: {x: number, y: number}[] = [];
|
|
5
5
|
protected rawAtomLines: string[] = [];
|
|
6
6
|
|
|
7
|
+
get count(): number { return this.coordinates.length; }
|
|
8
|
+
|
|
7
9
|
get atomCoordinates(): {x: number, y: number}[] {
|
|
8
10
|
return this.coordinates;
|
|
9
11
|
}
|
|
@@ -4,6 +4,8 @@ export abstract class MolfileBonds {
|
|
|
4
4
|
protected bondedAtomPairs: number[][] = [];
|
|
5
5
|
protected rawBondLines: string[] = [];
|
|
6
6
|
|
|
7
|
+
public get count(): number { return this.bondedAtomPairs.length;}
|
|
8
|
+
|
|
7
9
|
/** Get bond lines with new values for bonded atoms */
|
|
8
10
|
abstract getBondLines(): string[];
|
|
9
11
|
|
|
@@ -9,6 +9,10 @@ export abstract class MolfileWrapper {
|
|
|
9
9
|
protected bonds: MolfileBonds;
|
|
10
10
|
protected rGroups: RGroupHandler;
|
|
11
11
|
|
|
12
|
+
public get atomCount(): number { return this.atoms.count; }
|
|
13
|
+
|
|
14
|
+
public get bondCount(): number { return this.bonds.count; }
|
|
15
|
+
|
|
12
16
|
protected shiftR1GroupToOrigin(): void {
|
|
13
17
|
const r1Idx = this.rGroups.getAtomicIdx(1);
|
|
14
18
|
if (r1Idx === null)
|
|
@@ -40,7 +44,7 @@ export abstract class MolfileWrapper {
|
|
|
40
44
|
this.rGroups.deleteBondLineWithSpecifiedRGroup(rGroupId);
|
|
41
45
|
}
|
|
42
46
|
|
|
43
|
-
shiftCoordinates(shift: {x: number, y: number}): void {
|
|
47
|
+
shiftCoordinates(shift: { x: number, y: number }): void {
|
|
44
48
|
this.atoms.shift(shift);
|
|
45
49
|
}
|
|
46
50
|
|
|
@@ -8,12 +8,12 @@ import {MolfileWrapper} from './mol-wrapper';
|
|
|
8
8
|
import {MolfileWrapperFactory} from './mol-wrapper-factory';
|
|
9
9
|
|
|
10
10
|
export class MonomerWrapper {
|
|
11
|
-
private molfileWrapper: MolfileWrapper;
|
|
11
|
+
private readonly molfileWrapper: MolfileWrapper;
|
|
12
12
|
private capGroupElements: string[] = [];
|
|
13
13
|
|
|
14
14
|
constructor(
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
public readonly monomerSymbol: string,
|
|
16
|
+
public readonly monomerIdx: number,
|
|
17
17
|
private helm: Helm,
|
|
18
18
|
shift: { x: number, y: number },
|
|
19
19
|
rdKitModule: RDModule,
|
|
@@ -35,6 +35,10 @@ export class MonomerWrapper {
|
|
|
35
35
|
this.shiftCoordinates(shift);
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
public get atomCount() { return this.molfileWrapper.atomCount; }
|
|
39
|
+
|
|
40
|
+
public get bondCount() { return this.molfileWrapper.bondCount; }
|
|
41
|
+
|
|
38
42
|
private convertMolfileToV3KFormat(molfileV2K: string, monomerSymbol: string, rdKitModule: RDModule): string {
|
|
39
43
|
let mol: RDMol | null = null;
|
|
40
44
|
try {
|
|
@@ -2,8 +2,11 @@ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
|
2
2
|
import {V3K_CONST} from '@datagrok-libraries/chem-meta/src/formats/molfile-const';
|
|
3
3
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types/index';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {Helm} from './helm';
|
|
6
8
|
import {MonomerWrapper} from './monomer-wrapper';
|
|
9
|
+
import {MolfileWithMap, MonomerMap} from './types';
|
|
7
10
|
|
|
8
11
|
export class Polymer {
|
|
9
12
|
constructor(
|
|
@@ -50,7 +53,7 @@ export class Polymer {
|
|
|
50
53
|
});
|
|
51
54
|
}
|
|
52
55
|
|
|
53
|
-
compileToMolfile():
|
|
56
|
+
compileToMolfile(): MolfileWithMap {
|
|
54
57
|
const atomLines: string[] = [];
|
|
55
58
|
const bondLines: string[] = [];
|
|
56
59
|
|
|
@@ -61,10 +64,22 @@ export class Polymer {
|
|
|
61
64
|
|
|
62
65
|
this.restoreBondsBetweenMonomers();
|
|
63
66
|
|
|
64
|
-
this.monomerWrappers.
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
67
|
+
const monomers: MonomerMap[] = new Array<MonomerMap>(this.monomerWrappers.length);
|
|
68
|
+
for (const [mw, mwI] of wu.enumerate(this.monomerWrappers)) {
|
|
69
|
+
const mwAtomFirst = atomLines.length;
|
|
70
|
+
const mwBondFirst = bondLines.length;
|
|
71
|
+
|
|
72
|
+
atomLines.push(...mw.getAtomLines());
|
|
73
|
+
bondLines.push(...mw.getBondLines());
|
|
74
|
+
|
|
75
|
+
monomers[mwI] = {
|
|
76
|
+
position: mwI,
|
|
77
|
+
// TODO: PolymerType
|
|
78
|
+
symbol: mw.monomerSymbol,
|
|
79
|
+
atoms: wu.count(mwAtomFirst).take(mw.atomCount).toArray(),
|
|
80
|
+
bonds: wu.count(mwBondFirst).take(mw.bondCount).toArray(),
|
|
81
|
+
};
|
|
82
|
+
}
|
|
68
83
|
|
|
69
84
|
const atomCount = atomLines.length;
|
|
70
85
|
const bondCount = bondLines.length;
|
|
@@ -75,7 +90,7 @@ export class Polymer {
|
|
|
75
90
|
const molfileEnd = V3K_CONST.END_CTAB + '\n' + V3K_CONST.END;
|
|
76
91
|
const blockList = [header, atomBlock, bondBlock, molfileEnd];
|
|
77
92
|
const molfile = blockList.join('\n');
|
|
78
|
-
return molfile;
|
|
93
|
+
return {molfile, monomers};
|
|
79
94
|
}
|
|
80
95
|
|
|
81
96
|
private getV3KHeader(atomCount: number, bondCount: number): string {
|
|
@@ -10,3 +10,14 @@ export type PositionInBonds = {
|
|
|
10
10
|
bondLineIdx: number,
|
|
11
11
|
nodeIdx: number,
|
|
12
12
|
}
|
|
13
|
+
|
|
14
|
+
export type MonomerMap = { position: number, symbol: string, atoms: number[], bonds: number[] };
|
|
15
|
+
|
|
16
|
+
export class MolfileWithMap {
|
|
17
|
+
constructor(
|
|
18
|
+
public readonly molfile: string,
|
|
19
|
+
public readonly monomers: MonomerMap[]
|
|
20
|
+
) {}
|
|
21
|
+
|
|
22
|
+
static empty() { return new MolfileWithMap('', []); }
|
|
23
|
+
}
|
|
@@ -1,23 +1,18 @@
|
|
|
1
|
-
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
-
import * as DG from 'datagrok-api/dg';
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
6
|
-
|
|
7
|
-
/** Translate HELM column into molfile column and append to the dataframe */
|
|
8
|
-
export async function helm2mol(df: DG.DataFrame, helmCol: DG.Column<string>): Promise<void> {
|
|
9
|
-
const molCol = await getMolColumnFromHelm(df, helmCol);
|
|
10
|
-
df.columns.add(molCol, true);
|
|
11
|
-
await grok.data.detectSemanticTypes(df);
|
|
12
|
-
}
|
|
5
|
+
import {_package} from '../../package';
|
|
6
|
+
import {SeqHelper} from '../seq-helper';
|
|
13
7
|
|
|
14
8
|
|
|
15
9
|
/** Translate HELM column into molfile column and append to the dataframe */
|
|
16
10
|
export async function getMolColumnFromHelm(
|
|
17
|
-
df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine
|
|
11
|
+
df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine: boolean = true
|
|
18
12
|
): Promise<DG.Column<string>> {
|
|
19
|
-
const
|
|
20
|
-
const
|
|
13
|
+
const seqHelper = await SeqHelper.getInstance();
|
|
14
|
+
const converter = seqHelper.getHelmToMolfileConverter(df, helmCol);
|
|
15
|
+
const molCol = converter.convertToRdKitBeautifiedMolfileColumn(chiralityEngine, _package.rdKitModule);
|
|
21
16
|
molCol.semType = DG.SEMTYPE.MOLECULE;
|
|
22
17
|
return molCol;
|
|
23
18
|
}
|
|
@@ -25,8 +20,9 @@ export async function getMolColumnFromHelm(
|
|
|
25
20
|
export async function getSmilesColumnFromHelm(
|
|
26
21
|
df: DG.DataFrame, helmCol: DG.Column<string>
|
|
27
22
|
): Promise<DG.Column<string>> {
|
|
28
|
-
const
|
|
29
|
-
const
|
|
23
|
+
const seqHelper = await SeqHelper.getInstance();
|
|
24
|
+
const converter = seqHelper.getHelmToMolfileConverter(df, helmCol);
|
|
25
|
+
const smilesCol = converter.convertToSmiles(_package.rdKitModule);
|
|
30
26
|
smilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
31
27
|
return smilesCol;
|
|
32
28
|
}
|
|
@@ -83,6 +83,20 @@ export class MonomerLibManager implements IMonomerLibHelper {
|
|
|
83
83
|
return this._monomerSets;
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
+
/** Object containing symbols for each type of polymer where duplicate monomers are found in different libs (based on symbol as key) */
|
|
87
|
+
get duplicateMonomers() {
|
|
88
|
+
return this._monomerLib.duplicateMonomers;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** Returns true if all duplicate monomers are assigned preferences (which library they are coming from) */
|
|
92
|
+
get duplicatesHandled() {
|
|
93
|
+
return this._monomerLib.duplicatesHandled;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
assignDuplicatePreferances(settings: UserLibSettings) {
|
|
97
|
+
this._monomerLib.assignDuplicatePreferances(settings);
|
|
98
|
+
}
|
|
99
|
+
|
|
86
100
|
/** Instance promise of {@link getFileManager} */
|
|
87
101
|
private _fileManagerPromise?: Promise<MonomerLibFileManager>;
|
|
88
102
|
|
|
@@ -256,7 +270,7 @@ export class MonomerLibManager implements IMonomerLibHelper {
|
|
|
256
270
|
// -- Instance singleton --
|
|
257
271
|
public static async getInstance(): Promise<MonomerLibManager> {
|
|
258
272
|
let res = window.$monomerLibHelperPromise;
|
|
259
|
-
if (res
|
|
273
|
+
if (res == undefined) {
|
|
260
274
|
res = window.$monomerLibHelperPromise = (async () => {
|
|
261
275
|
const instance = new MonomerLibManager(_package.logger);
|
|
262
276
|
instance._eventManager = MonomerLibFileEventManager.getInstance();
|