@datagrok/bio 2.24.0 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/detectors.js +26 -12
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/samples/HELM_CHEMS.csv +11 -0
- package/package.json +2 -2
- package/src/analysis/sequence-space.ts +1 -1
- package/src/demo/bio03-atomic-level.ts +1 -1
- package/src/package-types.ts +1 -1
- package/src/package.ts +1 -1
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/utils/get-region.ts +2 -2
- package/src/utils/helm-to-molfile/converter/const.ts +0 -1
- package/src/utils/helm-to-molfile/converter/converter.ts +3 -3
- package/src/utils/helm-to-molfile/converter/helm.ts +14 -6
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +1 -1
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +2 -2
- package/src/utils/helm-to-molfile/converter/r-group-handler.ts +2 -2
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +1 -1
- package/src/utils/monomer-lib/library-file-manager/ui.ts +22 -5
- package/src/utils/monomer-lib/monomer-lib-base.ts +31 -3
- package/src/utils/monomer-lib/monomer-lib.ts +0 -26
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
- package/src/utils/monomer-lib/smiles2Monomer.ts +128 -0
- package/src/utils/monomer-lib/web-editor-monomer-dummy.ts +15 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/seq-helper/seq-handler.ts +25 -16
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/web-logo-viewer.ts +19 -8
- package/test-console-output-1.log +784 -775
- package/test-record-1.mp4 +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
HELM,Activity
|
|
2
|
+
"PEPTIDE1{[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE1,PEPTIDE1,8:R2-1:R1$$$",5.3075109739681280
|
|
3
|
+
"PEPTIDE1{[ac].D.A.D.E.[*N[C@H](C(=O)*)Cc1ccc(cc1)OC(C(=O)O)C(=O)O |$_R1;;;;;_R2;;;;;;;;;;;;;;;$|].L.[am]}$$$$ CHEMBL8284
|
|
4
|
+
PEPTIDE1{[N[C@](C)(C(=O)O)Cc1ccc(c(c1)O)O]}$$$$",5.7238768534315438
|
|
5
|
+
"PEPTIDE1{[meL].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[meL].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}|PEPTIDE2{E.[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].E.[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE2,PEPTIDE2,8:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R1-3:R3|PEPTIDE2,PEPTIDE1,1:R3-8:R2$$$V2.0",5.1858112460224372
|
|
6
|
+
"PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].F.I.Q.N.[dC].S.R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$V2.0",6.2235023908043692
|
|
7
|
+
"PEPTIDE1{A.[dP].D.[dW].F.[dF].N.[dY].Y.[dW].G.[dN].W.[dH].G.[*N[C@@H](C(=O)*)[C@@H](C)O |$_R1;;;;;_R2;;;$|]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1$$$",3.8459123763832412
|
|
8
|
+
PEPTIDE1{N.L.E.R.E.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].L.E.E.P.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].S.R.E.E.A.F}$$$$,3.2792043882465700
|
|
9
|
+
"PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].Y.[dF].G.[dN].[dC].[dP].R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$",2.1058521529925680
|
|
10
|
+
PEPTIDE1{A.[[*]C(=O)[C@H](C)N([*])C |$_R2;;;;;;_R1;;;$|].A}$$$$V2.0,1.8036950016492720
|
|
11
|
+
"CHEM1{[*N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|]}|PEPTIDE1{[C[C@H](N[*])C(=O)C[*] |$;;;_R1;;;;_R2$|].G.G.G.C.C.K.K.K.K}$PEPTIDE1,CHEM1,10:R3-1:R1$$$V2.0",6.3880602836120888
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.25.1",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.61.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.6",
|
|
@@ -33,7 +33,7 @@ export async function getEncodedSeqSpaceCol(
|
|
|
33
33
|
for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
|
|
34
34
|
const catI = seqColRawData[rowIdx];
|
|
35
35
|
const seq = seqColCats[catI];
|
|
36
|
-
if (seq
|
|
36
|
+
if (seq == null || seqCol.isNone(rowIdx)) {
|
|
37
37
|
//@ts-ignore
|
|
38
38
|
encList[rowIdx] = null;
|
|
39
39
|
continue;
|
|
@@ -18,7 +18,7 @@ export async function demoToAtomicLevel(): Promise<void> {
|
|
|
18
18
|
adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
|
|
19
19
|
adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
|
|
20
20
|
grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
|
|
21
|
-
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#
|
|
21
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#convert-to-atomic-level');
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
export async function demoBio03UI(): Promise<void> {
|
package/src/package-types.ts
CHANGED
|
@@ -41,7 +41,7 @@ export class BioPackageProperties extends Map<string, any> {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
public set maxMonomerLength(value: number | null) {
|
|
44
|
-
const vs = value
|
|
44
|
+
const vs = value == null ? 'long' : value.toString();
|
|
45
45
|
super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
|
|
46
46
|
this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
|
|
47
47
|
}
|
package/src/package.ts
CHANGED
|
@@ -83,7 +83,7 @@ category('monomerLibraries', () => {
|
|
|
83
83
|
};
|
|
84
84
|
const monomerLib = monomerLibHelper.getMonomerLib();
|
|
85
85
|
const absentOverrideMonomer = monomerLib.getMonomer(overMon.polymerType, overMon.symbol);
|
|
86
|
-
expect(absentOverrideMonomer
|
|
86
|
+
expect(absentOverrideMonomer == null, true, `Unexpectedly found monomer '${overMon.symbol}' `);
|
|
87
87
|
|
|
88
88
|
const overriddenMonomerLib = monomerLib.override({[overMon.polymerType]: {[overMon.symbol]: overMon}}, 'test');
|
|
89
89
|
const resOverMon = overriddenMonomerLib.getMonomer(overMon.polymerType, overMon.symbol);
|
package/src/utils/get-region.ts
CHANGED
|
@@ -47,9 +47,9 @@ export function getRegionDo(
|
|
|
47
47
|
if (sh.posList[posJ] == startPosName) startPosIdx = posJ;
|
|
48
48
|
if (sh.posList[posJ] == endPosName) endPosIdx = posJ;
|
|
49
49
|
}
|
|
50
|
-
if (startPosIdx
|
|
50
|
+
if (startPosIdx == null && startPosName !== null)
|
|
51
51
|
throw new Error(`Start position ${startPosName} not found.`);
|
|
52
|
-
if (endPosIdx
|
|
52
|
+
if (endPosIdx == null && endPosName !== null)
|
|
53
53
|
throw new Error(`End position ${endPosName} not found.`);
|
|
54
54
|
|
|
55
55
|
if (sh.posList.length < endPosIdx!)
|
|
@@ -29,7 +29,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
|
29
29
|
const smilesColName = `smiles(${helmCol.name})`;
|
|
30
30
|
const smilesColNameU = df ? df.columns.getUnusedName(smilesColName) : smilesColName;
|
|
31
31
|
return DG.Column.fromStrings(smilesColNameU, smiles.map((molecule) => {
|
|
32
|
-
if (molecule
|
|
32
|
+
if (molecule == null)
|
|
33
33
|
return '';
|
|
34
34
|
return molecule;
|
|
35
35
|
}));
|
|
@@ -57,7 +57,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
|
57
57
|
|
|
58
58
|
public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
|
|
59
59
|
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
60
|
-
if (mol
|
|
60
|
+
if (mol == null)
|
|
61
61
|
return '';
|
|
62
62
|
const molBlock = mol.get_v3Kmolblock();
|
|
63
63
|
mol!.delete();
|
|
@@ -97,7 +97,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
|
97
97
|
if (chiralityEngine)
|
|
98
98
|
return this.getMolV3000ViaOCL(beautifiedMols, molColNameU);
|
|
99
99
|
return DG.Column.fromStrings(molColNameU, beautifiedMols.map((mol) => {
|
|
100
|
-
if (mol
|
|
100
|
+
if (mol == null)
|
|
101
101
|
return '';
|
|
102
102
|
const molBlock = mol.get_v3Kmolblock();
|
|
103
103
|
mol!.delete();
|
|
@@ -1,16 +1,24 @@
|
|
|
1
1
|
import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
2
2
|
import {ConnectionList} from './connection-list';
|
|
3
|
-
import {HELM_ITEM_SEPARATOR
|
|
3
|
+
import {HELM_ITEM_SEPARATOR} from './const';
|
|
4
4
|
import {SimplePolymer} from './simple-polymer';
|
|
5
5
|
import {Bond} from './types';
|
|
6
6
|
|
|
7
7
|
export class Helm {
|
|
8
8
|
constructor(private helmString: string) {
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
|
|
9
|
+
const indexOfSequenceEnd = helmString.indexOf('}$');
|
|
10
|
+
const sequencePart = helmString.substring(0, indexOfSequenceEnd + 1);
|
|
11
|
+
const connectionsEndPart = helmString.indexOf('$', indexOfSequenceEnd + 2);
|
|
12
|
+
const connectionsPart = helmString.substring(indexOfSequenceEnd + 2, connectionsEndPart);
|
|
13
|
+
// const helmParts = seq.split('$');
|
|
14
|
+
const spList = sequencePart.split('}|');
|
|
15
|
+
// since we removed }|, need to add } to last part
|
|
16
|
+
for (let i = 0; i < spList.length - 1; i++)
|
|
17
|
+
spList[i] = spList[i] + '}';
|
|
18
|
+
|
|
19
|
+
this.simplePolymers = spList
|
|
12
20
|
.map((item) => new SimplePolymer(item));
|
|
13
|
-
this.connectionList = new ConnectionList(
|
|
21
|
+
this.connectionList = new ConnectionList(connectionsPart);
|
|
14
22
|
this.bondData = this.getBondData();
|
|
15
23
|
|
|
16
24
|
this.bondedRGroupsMap = this.getBondedRGroupsMap();
|
|
@@ -58,7 +66,7 @@ export class Helm {
|
|
|
58
66
|
const lowerBound = shiftValues.sort((a, b) => b - a).find( // find the largest shift not exceeding monomerGlobalIdx
|
|
59
67
|
(shift) => monomerGlobalIdx >= shift
|
|
60
68
|
);
|
|
61
|
-
if (lowerBound
|
|
69
|
+
if (lowerBound == undefined)
|
|
62
70
|
throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
|
|
63
71
|
const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
|
|
64
72
|
const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
|
|
@@ -31,7 +31,7 @@ export abstract class MolfileBonds {
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
replacePositionsInBondsByDummy(positions: PositionInBonds[], dummy?: number): void {
|
|
34
|
-
if (dummy
|
|
34
|
+
if (dummy == undefined)
|
|
35
35
|
dummy = -1;
|
|
36
36
|
positions.forEach((position) => {
|
|
37
37
|
const {bondLineIdx, nodeIdx} = position;
|
|
@@ -15,7 +15,7 @@ export abstract class MolfileWrapper {
|
|
|
15
15
|
|
|
16
16
|
protected shiftR1GroupToOrigin(): void {
|
|
17
17
|
const r1Idx = this.rGroups.getAtomicIdx(1);
|
|
18
|
-
if (r1Idx
|
|
18
|
+
if (r1Idx == null)
|
|
19
19
|
return; // R1 group is not present, nothing to shift
|
|
20
20
|
const {x, y} = this.atoms.atomCoordinates[r1Idx];
|
|
21
21
|
this.atoms.shift({x: -x, y: -y});
|
|
@@ -23,7 +23,7 @@ export abstract class MolfileWrapper {
|
|
|
23
23
|
|
|
24
24
|
protected alignR2AlongX(): void {
|
|
25
25
|
const r2Idx = this.rGroups.getAtomicIdx(2);
|
|
26
|
-
if (r2Idx
|
|
26
|
+
if (r2Idx == null)
|
|
27
27
|
throw new Error(`Cannot find R2 group for monomer ${this.monomerSymbol}`);
|
|
28
28
|
const r2Coordinates = this.atoms.atomCoordinates[r2Idx];
|
|
29
29
|
const tan = r2Coordinates.y / r2Coordinates.x;
|
|
@@ -19,13 +19,13 @@ export class RGroupHandler {
|
|
|
19
19
|
|
|
20
20
|
getAtomicIdx(rGroupId: number): number | null {
|
|
21
21
|
const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rGroupId);
|
|
22
|
-
return atomicIdx
|
|
22
|
+
return atomicIdx == undefined ? null : atomicIdx;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
private removeRGroupsFromAtomBlock(rGroupIds: number[]): void {
|
|
26
26
|
rGroupIds.forEach((rgroupId) => {
|
|
27
27
|
const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rgroupId);
|
|
28
|
-
if (atomicIdx
|
|
28
|
+
if (atomicIdx == undefined)
|
|
29
29
|
throw new Error(`Cannot find atomic index for R group ${rgroupId}`);
|
|
30
30
|
});
|
|
31
31
|
|
|
@@ -22,7 +22,7 @@ export class MonomerLibFileValidator {
|
|
|
22
22
|
|
|
23
23
|
validateFile(fileContent: string, fileName: string): boolean {
|
|
24
24
|
const jsonContent = this.parseJson(fileContent, fileName);
|
|
25
|
-
if (jsonContent
|
|
25
|
+
if (jsonContent == null)
|
|
26
26
|
return false;
|
|
27
27
|
|
|
28
28
|
if (!Array.isArray(jsonContent)) {
|
|
@@ -5,7 +5,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
7
|
import $ from 'cash-dom';
|
|
8
|
-
import {Subject} from 'rxjs';
|
|
8
|
+
import {Subject, Subscription} from 'rxjs';
|
|
9
9
|
|
|
10
10
|
import {
|
|
11
11
|
getUserLibSettings, setUserLibSettings
|
|
@@ -50,7 +50,7 @@ class MonomerLibraryManagerWidget {
|
|
|
50
50
|
private libHelper: IMonomerLibHelper;
|
|
51
51
|
|
|
52
52
|
static async getInstance(): Promise<MonomerLibraryManagerWidget> {
|
|
53
|
-
if (MonomerLibraryManagerWidget.instancePromise
|
|
53
|
+
if (MonomerLibraryManagerWidget.instancePromise == undefined) {
|
|
54
54
|
MonomerLibraryManagerWidget.instancePromise = (async () => {
|
|
55
55
|
const instance = new MonomerLibraryManagerWidget();
|
|
56
56
|
const libHelper = await getMonomerLibHelper();
|
|
@@ -67,10 +67,12 @@ class MonomerLibraryManagerWidget {
|
|
|
67
67
|
instance._widget = await instance.createWidget();
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
private _fileUploadSubscription: Subscription | null = null;
|
|
70
71
|
private async createWidget() {
|
|
71
72
|
const content = await this.getWidgetContent();
|
|
72
73
|
const monomerLibHelper = await getMonomerLibHelper();
|
|
73
|
-
|
|
74
|
+
this._fileUploadSubscription?.unsubscribe();
|
|
75
|
+
this._fileUploadSubscription =
|
|
74
76
|
monomerLibHelper.fileUploadRequested.subscribe(
|
|
75
77
|
() => this.promptToAddLibraryFiles()
|
|
76
78
|
);
|
|
@@ -92,14 +94,29 @@ class MonomerLibraryManagerWidget {
|
|
|
92
94
|
accept: '.json',
|
|
93
95
|
open: async (selectedFile) => {
|
|
94
96
|
const doAdd = async (provider: IMonomerLibProvider) => {
|
|
95
|
-
const content = await selectedFile.text();
|
|
96
97
|
const name = selectedFile.name;
|
|
98
|
+
const existingLibs = await provider.listLibraries();
|
|
99
|
+
// chech if library already exists
|
|
100
|
+
if (existingLibs.includes(name)) {
|
|
101
|
+
const confirm = await new Promise<boolean>((resolve) => {
|
|
102
|
+
ui.dialog('Confirm Library Update')
|
|
103
|
+
.add(ui.divText(`Library '${name}' already exists. Do you want to overwrite it?`))
|
|
104
|
+
.onOK(() => resolve(true))
|
|
105
|
+
.onCancel(() => resolve(false))
|
|
106
|
+
.show();
|
|
107
|
+
});
|
|
108
|
+
if (!confirm)
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const content = await selectedFile.text();
|
|
97
113
|
const progressIndicator = DG.TaskBarProgressIndicator.create(`Adding ${name} as a monomer library`);
|
|
98
114
|
try {
|
|
99
115
|
await provider.addOrUpdateLibraryString(name, content);
|
|
100
116
|
// this.eventManager.updateLibrarySelectionStatus(name, true);
|
|
101
117
|
} catch (e) {
|
|
102
118
|
grok.shell.error(`File ${name} is not a valid monomer library, verify it is aligned to HELM JSON schema.`);
|
|
119
|
+
console.error(e);
|
|
103
120
|
} finally {
|
|
104
121
|
progressIndicator.close();
|
|
105
122
|
}
|
|
@@ -122,7 +139,7 @@ class MonomerLibraryManagerWidget {
|
|
|
122
139
|
.onOK(async () => {
|
|
123
140
|
const provider = providers.find((p) => p.name === providersInput.value)!; // should not be null
|
|
124
141
|
await doAdd(provider);
|
|
125
|
-
});
|
|
142
|
+
}).show();
|
|
126
143
|
},
|
|
127
144
|
});
|
|
128
145
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
1
2
|
/* eslint-disable max-lines */
|
|
2
3
|
import * as grok from 'datagrok-api/grok';
|
|
3
4
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -18,12 +19,14 @@ import {GAP_SYMBOL, GapOriginals, NOTATION} from '@datagrok-libraries/bio/src/ut
|
|
|
18
19
|
import {Vector} from '@datagrok-libraries/utils/src/type-declarations';
|
|
19
20
|
import {vectorAdd, vectorDotProduct, vectorLength} from '@datagrok-libraries/utils/src/vector-operations';
|
|
20
21
|
|
|
21
|
-
import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer} from './web-editor-monomer-dummy';
|
|
22
|
+
import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer, SmilesWebEditorMonomer} from './web-editor-monomer-dummy';
|
|
22
23
|
import {LibraryWebEditorMonomer} from './web-editor-monomer-of-library';
|
|
23
24
|
import {naturalMonomerColors} from './monomer-colors';
|
|
24
25
|
|
|
25
26
|
import {_package} from '../../package';
|
|
26
27
|
import {MonomerLibData} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
28
|
+
import {smiles2Monomer} from './smiles2Monomer';
|
|
29
|
+
import {polymerTypeToHelmType} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
27
30
|
|
|
28
31
|
const monomerRe = /[\w()]+/;
|
|
29
32
|
//** Do not mess with monomer symbol with parenthesis enclosed in square brackets */
|
|
@@ -61,9 +64,15 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
61
64
|
}
|
|
62
65
|
|
|
63
66
|
getMonomerSymbolsByType(polymerType: PolymerType): string[] {
|
|
64
|
-
|
|
67
|
+
const res = Object.keys(this._monomers[polymerType]);
|
|
68
|
+
if (this._smilesMonomerCache[polymerType])
|
|
69
|
+
res.push(...Object.keys(this._smilesMonomerCache[polymerType]));
|
|
70
|
+
return res;
|
|
65
71
|
}
|
|
66
72
|
|
|
73
|
+
// smiles to symbol Mapping cache
|
|
74
|
+
private _smilesMonomerCache: {[polymerType: string]: {[smiles: string]: string}} = {};
|
|
75
|
+
|
|
67
76
|
/** Creates missing {@link Monomer} */
|
|
68
77
|
addMissingMonomer(polymerType: PolymerType, monomerSymbol: string): Monomer {
|
|
69
78
|
let mSet = this._monomers[polymerType];
|
|
@@ -78,6 +87,22 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
78
87
|
else if (polymerType === PolymerTypes.RNA && monomerSymbol === 'N')
|
|
79
88
|
monomerName = 'Any';
|
|
80
89
|
|
|
90
|
+
// test if it is smiles
|
|
91
|
+
// check if the missing monomer symbol is a valid SMILES string
|
|
92
|
+
const smilesMonomer = smiles2Monomer(monomerSymbol, polymerType);
|
|
93
|
+
if (smilesMonomer) {
|
|
94
|
+
this._smilesMonomerCache[polymerType] = this._smilesMonomerCache[polymerType] ?? {};
|
|
95
|
+
const smSet = this._smilesMonomerCache[polymerType];
|
|
96
|
+
const symbol = Object.keys(smSet).length + 1;
|
|
97
|
+
smSet[monomerSymbol] = `#${polymerType[0]}${symbol}`; // e.g. #P1, #R2, #C3, #B4
|
|
98
|
+
const m: Monomer = {...smilesMonomer, symbol: smSet[monomerSymbol]};
|
|
99
|
+
// note, the ID becomes #<index> for smiles based monomers, and as the smiles, original smiles is passed (which is in monomerSymbol), to avoid key duplication
|
|
100
|
+
const wem = new SmilesWebEditorMonomer(polymerTypeToHelmType(polymerType), m.symbol, monomerSymbol, `SMILES Monomer ${m.symbol}`, m.rgroups.map((rg) => rg[RGP.LABEL]));
|
|
101
|
+
m.wem = wem;
|
|
102
|
+
mSet[m.symbol] = m;
|
|
103
|
+
return m;
|
|
104
|
+
}
|
|
105
|
+
|
|
81
106
|
const m = mSet[monomerSymbol] = {
|
|
82
107
|
[REQ.SYMBOL]: monomerSymbol,
|
|
83
108
|
[REQ.NAME]: monomerName,
|
|
@@ -124,8 +149,11 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
124
149
|
if (res) break;
|
|
125
150
|
}
|
|
126
151
|
} else {
|
|
152
|
+
// Check smiles cache, modify with mapped symbol
|
|
153
|
+
if (this._smilesMonomerCache[polymerType]?.[monomerSymbol])
|
|
154
|
+
monomerSymbol = this._smilesMonomerCache[polymerType][monomerSymbol];
|
|
127
155
|
const dict = this._monomers[polymerType];
|
|
128
|
-
res = dict
|
|
156
|
+
res = dict?.[monomerSymbol] ?? null;
|
|
129
157
|
}
|
|
130
158
|
return res;
|
|
131
159
|
}
|
|
@@ -55,32 +55,6 @@ export class MonomerLib extends MonomerLibBase implements IMonomerLib {
|
|
|
55
55
|
return resJSON;
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
getMonomer(polymerType: PolymerType | null, argMonomerSymbol: string): Monomer | null {
|
|
59
|
-
const logPrefix = `Bio: MonomerLib.getMonomer()`;
|
|
60
|
-
// Adjust RNA's 'R' for ribose to 'r' and 'P' for phosphate to 'p' for case-sensitive monomer names.
|
|
61
|
-
// There are uppercase 'R' and 'P' at RNA samples in test data 'helm2.csv' but lowercase in HELMCoreLibrary.json
|
|
62
|
-
let monomerSymbol = argMonomerSymbol;
|
|
63
|
-
if (polymerType == 'RNA' && monomerSymbol == 'R')
|
|
64
|
-
monomerSymbol = 'r';
|
|
65
|
-
if (polymerType == 'RNA' && monomerSymbol == 'P')
|
|
66
|
-
monomerSymbol = 'p';
|
|
67
|
-
|
|
68
|
-
let res: Monomer | null = null;
|
|
69
|
-
|
|
70
|
-
if (!polymerType) {
|
|
71
|
-
_package.logger.warning(`${logPrefix} symbol '${argMonomerSymbol}', polymerType not specified.`);
|
|
72
|
-
// Assume any polymer type
|
|
73
|
-
for (const [_polymerType, dict] of Object.entries(this._monomers)) {
|
|
74
|
-
res = dict[monomerSymbol];
|
|
75
|
-
if (res) break;
|
|
76
|
-
}
|
|
77
|
-
} else {
|
|
78
|
-
const dict = this._monomers[polymerType];
|
|
79
|
-
res = dict?.[monomerSymbol] ?? null;
|
|
80
|
-
}
|
|
81
|
-
return res;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
58
|
private _monomerSets: { [biotype: string /*HelmType*/]: MonomerSetType } | null = null;
|
|
85
59
|
|
|
86
60
|
getMonomerSet(biotype: HelmType): MonomerSetType | null {
|
|
@@ -1178,7 +1178,7 @@ function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: strin
|
|
|
1178
1178
|
return isSmilesMalformed ? canonical : grok.chem.convert(canonical, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
|
|
1179
1179
|
}
|
|
1180
1180
|
|
|
1181
|
-
function getCorrectedMolBlock(molBlock: string) {
|
|
1181
|
+
export function getCorrectedMolBlock(molBlock: string) {
|
|
1182
1182
|
// to correct molblock, we should make sure that
|
|
1183
1183
|
// 1. RGP field is present at the end, before the M END line
|
|
1184
1184
|
// 2. RGP field is present in the correct format
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/* eslint-disable camelcase */
|
|
2
|
+
/* eslint-disable max-len */
|
|
3
|
+
import {Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
4
|
+
import {_package} from '../../package';
|
|
5
|
+
import {PolymerType} from '@datagrok-libraries/bio/src/helm/types';
|
|
6
|
+
import {HELM_RGROUP_FIELDS as RGP} from '@datagrok-libraries/bio/src/utils/const';
|
|
7
|
+
import * as grok from 'datagrok-api/grok';
|
|
8
|
+
import {getCorrectedMolBlock} from './monomer-manager/monomer-manager';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Exaple r groups
|
|
12
|
+
* {
|
|
13
|
+
"capGroupSMILES": "[*:1][H]",
|
|
14
|
+
"alternateId": "R1-H",
|
|
15
|
+
"capGroupName": "H",
|
|
16
|
+
"label": "R1"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"capGroupSMILES": "O[*:2]",
|
|
20
|
+
"alternateId": "R2-OH",
|
|
21
|
+
"capGroupName": "OH",
|
|
22
|
+
"label": "R2"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const cx_smiles_regexp = /.*\|\$.*R.*\$\|/;
|
|
28
|
+
const rgroup_regexp = /\[R(\d+)\]/;
|
|
29
|
+
const rgroup_regexpg = /\[R(\d+)\]/g;
|
|
30
|
+
const ambig_regexp = /\[\*:(\d+)\]/g;
|
|
31
|
+
|
|
32
|
+
export type MonomerWithoutSymbol = Omit<Monomer, 'symbol'>;
|
|
33
|
+
|
|
34
|
+
export function getMonomerFromRSmiles(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
|
|
35
|
+
const rgroupNumbers = Array.from(smiles.matchAll(rgroup_regexpg)).map((m) => m[1]);
|
|
36
|
+
const res: MonomerWithoutSymbol = {
|
|
37
|
+
name: 'Explicit SMILES Monomer',
|
|
38
|
+
smiles: smiles,
|
|
39
|
+
polymerType: polymerType ?? 'CHEM',
|
|
40
|
+
molfile: '',
|
|
41
|
+
rgroups: rgroupNumbers.map((numString) => ({
|
|
42
|
+
[RGP.LABEL]: `R${numString}`,
|
|
43
|
+
[RGP.CAP_GROUP_NAME]: `H`,
|
|
44
|
+
[RGP.CAP_GROUP_SMILES]: `[*:${numString}][H]`,
|
|
45
|
+
[RGP.ALTERNATE_ID]: `R${numString}-H`,
|
|
46
|
+
})),
|
|
47
|
+
author: 'Datagrok auto-generated',
|
|
48
|
+
id: 0,
|
|
49
|
+
createDate: null,
|
|
50
|
+
monomerType: 'Backbone',
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
//try to generate corrected molfile and smiles
|
|
55
|
+
let corSmiles = smiles;
|
|
56
|
+
res.rgroups.forEach((rg) => {
|
|
57
|
+
const labelNum = rg[RGP.LABEL].substring(1); // R1 -> 1
|
|
58
|
+
corSmiles = corSmiles.replace(`[R${labelNum}]`, `[*:${labelNum}]`);
|
|
59
|
+
});
|
|
60
|
+
const molFile = getCorrectedMolBlock(grok.chem.convert(corSmiles, grok.chem.Notation.Smiles, grok.chem.Notation.MolBlock));
|
|
61
|
+
res.molfile = molFile;
|
|
62
|
+
res.smiles = corSmiles;
|
|
63
|
+
} catch (e) {
|
|
64
|
+
_package.logger.error(`getMonomerFromRSmiles: cannot convert SMILES to Molfile: ${smiles}\n${e}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return res;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Generate Monomer Object directly from inline smiles
|
|
71
|
+
* Purely string based, no external calls
|
|
72
|
+
*
|
|
73
|
+
* Currently accepts (to be extended):
|
|
74
|
+
*
|
|
75
|
+
* cxsmiles written as *N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$| where * are connection points
|
|
76
|
+
*
|
|
77
|
+
* or * is square brackets like [*]N[C@H](C(=O)[*])Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|
|
|
78
|
+
*
|
|
79
|
+
* simple smiles with R notations like CCC[R1] or CCC(=O)[R2]
|
|
80
|
+
*
|
|
81
|
+
* simple smiles with ambiguety defined as [*:1] like CCC[*:1] or CCC(=O)[*:2]
|
|
82
|
+
*/
|
|
83
|
+
export function smiles2Monomer(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
|
|
84
|
+
try {
|
|
85
|
+
const isCxSmiles = cx_smiles_regexp.test(smiles);
|
|
86
|
+
if (isCxSmiles) {
|
|
87
|
+
// CXSMILES parsing
|
|
88
|
+
const parts = smiles.split('|$');
|
|
89
|
+
const molPart = parts[0].trim();
|
|
90
|
+
const rGroupPart = parts[1];
|
|
91
|
+
// make sure all R groups are captured
|
|
92
|
+
const rGroupMatches = Array.from(rGroupPart.matchAll(/R(\d+)/g));
|
|
93
|
+
const starsInMolecule = Array.from(molPart.matchAll(/(\*)/g));
|
|
94
|
+
if (rGroupMatches.length !== starsInMolecule.length)
|
|
95
|
+
return null; // make sure that number of R groups and stars are the same
|
|
96
|
+
// remove brackets from stars if any
|
|
97
|
+
let cleanMol = molPart.replaceAll(/\[\*\]/g, '*');
|
|
98
|
+
// speaking in terms of consecutiveness, R groups in definition and stars in Smiles will be in the same order
|
|
99
|
+
// so we can just iterate through them
|
|
100
|
+
const rGroupNumbers = rGroupMatches.map((m) => m[1]); // numbers as strings
|
|
101
|
+
for (let i = 0; i < rGroupNumbers.length; i++) {
|
|
102
|
+
const rNum = rGroupNumbers[i];
|
|
103
|
+
// replace first matched star with R group
|
|
104
|
+
cleanMol = cleanMol.replace('*', `[R${rNum}]`);
|
|
105
|
+
}
|
|
106
|
+
return getMonomerFromRSmiles(cleanMol, polymerType);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// simple smiles parsing
|
|
110
|
+
// to simplify, replace all ambigous [*:1] with R1, etc
|
|
111
|
+
let cleanSmiles = smiles;
|
|
112
|
+
const ambigMatches = Array.from(smiles.matchAll(ambig_regexp));
|
|
113
|
+
for (const match of ambigMatches) {
|
|
114
|
+
const fullMatch = match[0];
|
|
115
|
+
const rNum = match[1];
|
|
116
|
+
cleanSmiles = cleanSmiles.replace(fullMatch, `[R${rNum}]`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// make sure monomer has at least one R group
|
|
120
|
+
if (rgroup_regexp.test(cleanSmiles))
|
|
121
|
+
return getMonomerFromRSmiles(cleanSmiles, polymerType);
|
|
122
|
+
} catch (e) {
|
|
123
|
+
_package.logger.error(`smiles2Monomer: cannot parse SMILES: ${smiles}\n${e}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
@@ -18,7 +18,7 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
|
|
|
18
18
|
get issmiles(): boolean { return !!this.smiles; }
|
|
19
19
|
|
|
20
20
|
/** R-Group index os single digit only is allowed in Pistoia code */
|
|
21
|
-
|
|
21
|
+
at: WebEditorRGroups = {
|
|
22
22
|
R1: 'H', R2: 'H', R3: 'H', R4: 'H', R5: 'H', R6: 'H', R7: 'H', R8: 'H', R9: 'H'
|
|
23
23
|
};
|
|
24
24
|
|
|
@@ -80,6 +80,20 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
|
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
export class SmilesWebEditorMonomer extends WebEditorMonomerDummy {
|
|
84
|
+
public readonly backgroundcolor: string = '#808080';
|
|
85
|
+
public readonly linecolor: string = '#000000';
|
|
86
|
+
public readonly textcolor: string = '#000000';
|
|
87
|
+
|
|
88
|
+
constructor(biotype: string, id: string, smiles: string, name: string, rgpLabels: string[]) {
|
|
89
|
+
super(biotype, id, name, undefined, undefined, undefined, smiles);
|
|
90
|
+
this.at = {};
|
|
91
|
+
rgpLabels.forEach((label) => {
|
|
92
|
+
this.at[label] = 'H';
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
83
97
|
export class GapWebEditorMonomer extends WebEditorMonomerDummy {
|
|
84
98
|
public readonly backgroundcolor: string = '#FFFFFF';
|
|
85
99
|
public readonly linecolor: string = '#808080';
|
|
@@ -138,7 +138,7 @@ async function onDialogOk(
|
|
|
138
138
|
colInput.fireChanged();
|
|
139
139
|
if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
140
140
|
throw new Error('Chosen column has to be of Macromolecule semantic type');
|
|
141
|
-
if (performAlignment
|
|
141
|
+
if (performAlignment == undefined) // value can only be undefined when column can't be processed with either method
|
|
142
142
|
throw new Error('Invalid column format');
|
|
143
143
|
msaCol = await performAlignment(); // progress
|
|
144
144
|
if (msaCol == null)
|
|
@@ -149,7 +149,7 @@ function parseKalignError(out: string, limit?: number): string {
|
|
|
149
149
|
const errLineList: string[] = [];
|
|
150
150
|
const errLineRe = /^.+ERROR : (.+)$/gm;
|
|
151
151
|
let ma: RegExpExecArray | null;
|
|
152
|
-
while ((ma = errLineRe.exec(out)) != null && (limit
|
|
152
|
+
while ((ma = errLineRe.exec(out)) != null && (limit == undefined || errLineList.length < limit)) {
|
|
153
153
|
//
|
|
154
154
|
errLineList.push(ma[1]);
|
|
155
155
|
}
|