@datagrok/bio 2.24.0 → 2.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/samples/HELM_CHEMS.csv +11 -0
- package/package.json +2 -2
- package/src/analysis/sequence-space.ts +1 -1
- package/src/demo/bio03-atomic-level.ts +1 -1
- package/src/package-types.ts +1 -1
- package/src/package.ts +1 -1
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/utils/get-region.ts +2 -2
- package/src/utils/helm-to-molfile/converter/const.ts +0 -1
- package/src/utils/helm-to-molfile/converter/converter.ts +3 -3
- package/src/utils/helm-to-molfile/converter/helm.ts +14 -6
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +1 -1
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +2 -2
- package/src/utils/helm-to-molfile/converter/r-group-handler.ts +2 -2
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +1 -1
- package/src/utils/monomer-lib/library-file-manager/ui.ts +1 -1
- package/src/utils/monomer-lib/monomer-lib-base.ts +31 -3
- package/src/utils/monomer-lib/monomer-lib.ts +0 -26
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
- package/src/utils/monomer-lib/smiles2Monomer.ts +128 -0
- package/src/utils/monomer-lib/web-editor-monomer-dummy.ts +15 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/seq-helper/seq-handler.ts +10 -10
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/web-logo-viewer.ts +19 -8
- package/test-console-output-1.log +338 -341
- package/test-record-1.mp4 +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
HELM,Activity
|
|
2
|
+
"PEPTIDE1{[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE1,PEPTIDE1,8:R2-1:R1$$$",5.3075109739681280
|
|
3
|
+
"PEPTIDE1{[ac].D.A.D.E.[*N[C@H](C(=O)*)Cc1ccc(cc1)OC(C(=O)O)C(=O)O |$_R1;;;;;_R2;;;;;;;;;;;;;;;$|].L.[am]}$$$$ CHEMBL8284
|
|
4
|
+
PEPTIDE1{[N[C@](C)(C(=O)O)Cc1ccc(c(c1)O)O]}$$$$",5.7238768534315438
|
|
5
|
+
"PEPTIDE1{[meL].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[meL].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}|PEPTIDE2{E.[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].E.[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE2,PEPTIDE2,8:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R1-3:R3|PEPTIDE2,PEPTIDE1,1:R3-8:R2$$$V2.0",5.1858112460224372
|
|
6
|
+
"PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].F.I.Q.N.[dC].S.R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$V2.0",6.2235023908043692
|
|
7
|
+
"PEPTIDE1{A.[dP].D.[dW].F.[dF].N.[dY].Y.[dW].G.[dN].W.[dH].G.[*N[C@@H](C(=O)*)[C@@H](C)O |$_R1;;;;;_R2;;;$|]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1$$$",3.8459123763832412
|
|
8
|
+
PEPTIDE1{N.L.E.R.E.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].L.E.E.P.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].S.R.E.E.A.F}$$$$,3.2792043882465700
|
|
9
|
+
"PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].Y.[dF].G.[dN].[dC].[dP].R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$",2.1058521529925680
|
|
10
|
+
PEPTIDE1{A.[[*]C(=O)[C@H](C)N([*])C |$_R2;;;;;;_R1;;;$|].A}$$$$V2.0,1.8036950016492720
|
|
11
|
+
"CHEM1{[*N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|]}|PEPTIDE1{[C[C@H](N[*])C(=O)C[*] |$;;;_R1;;;;_R2$|].G.G.G.C.C.K.K.K.K}$PEPTIDE1,CHEM1,10:R3-1:R1$$$V2.0",6.3880602836120888
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.25.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.61.0",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.6",
|
|
@@ -33,7 +33,7 @@ export async function getEncodedSeqSpaceCol(
|
|
|
33
33
|
for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
|
|
34
34
|
const catI = seqColRawData[rowIdx];
|
|
35
35
|
const seq = seqColCats[catI];
|
|
36
|
-
if (seq
|
|
36
|
+
if (seq == null || seqCol.isNone(rowIdx)) {
|
|
37
37
|
//@ts-ignore
|
|
38
38
|
encList[rowIdx] = null;
|
|
39
39
|
continue;
|
|
@@ -18,7 +18,7 @@ export async function demoToAtomicLevel(): Promise<void> {
|
|
|
18
18
|
adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
|
|
19
19
|
adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
|
|
20
20
|
grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
|
|
21
|
-
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#
|
|
21
|
+
grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#convert-to-atomic-level');
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
export async function demoBio03UI(): Promise<void> {
|
package/src/package-types.ts
CHANGED
|
@@ -41,7 +41,7 @@ export class BioPackageProperties extends Map<string, any> {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
public set maxMonomerLength(value: number | null) {
|
|
44
|
-
const vs = value
|
|
44
|
+
const vs = value == null ? 'long' : value.toString();
|
|
45
45
|
super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
|
|
46
46
|
this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
|
|
47
47
|
}
|
package/src/package.ts
CHANGED
|
@@ -83,7 +83,7 @@ category('monomerLibraries', () => {
|
|
|
83
83
|
};
|
|
84
84
|
const monomerLib = monomerLibHelper.getMonomerLib();
|
|
85
85
|
const absentOverrideMonomer = monomerLib.getMonomer(overMon.polymerType, overMon.symbol);
|
|
86
|
-
expect(absentOverrideMonomer
|
|
86
|
+
expect(absentOverrideMonomer == null, true, `Unexpectedly found monomer '${overMon.symbol}' `);
|
|
87
87
|
|
|
88
88
|
const overriddenMonomerLib = monomerLib.override({[overMon.polymerType]: {[overMon.symbol]: overMon}}, 'test');
|
|
89
89
|
const resOverMon = overriddenMonomerLib.getMonomer(overMon.polymerType, overMon.symbol);
|
package/src/utils/get-region.ts
CHANGED
|
@@ -47,9 +47,9 @@ export function getRegionDo(
|
|
|
47
47
|
if (sh.posList[posJ] == startPosName) startPosIdx = posJ;
|
|
48
48
|
if (sh.posList[posJ] == endPosName) endPosIdx = posJ;
|
|
49
49
|
}
|
|
50
|
-
if (startPosIdx
|
|
50
|
+
if (startPosIdx == null && startPosName !== null)
|
|
51
51
|
throw new Error(`Start position ${startPosName} not found.`);
|
|
52
|
-
if (endPosIdx
|
|
52
|
+
if (endPosIdx == null && endPosName !== null)
|
|
53
53
|
throw new Error(`End position ${endPosName} not found.`);
|
|
54
54
|
|
|
55
55
|
if (sh.posList.length < endPosIdx!)
|
|
@@ -29,7 +29,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
|
29
29
|
const smilesColName = `smiles(${helmCol.name})`;
|
|
30
30
|
const smilesColNameU = df ? df.columns.getUnusedName(smilesColName) : smilesColName;
|
|
31
31
|
return DG.Column.fromStrings(smilesColNameU, smiles.map((molecule) => {
|
|
32
|
-
if (molecule
|
|
32
|
+
if (molecule == null)
|
|
33
33
|
return '';
|
|
34
34
|
return molecule;
|
|
35
35
|
}));
|
|
@@ -57,7 +57,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
|
57
57
|
|
|
58
58
|
public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
|
|
59
59
|
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
60
|
-
if (mol
|
|
60
|
+
if (mol == null)
|
|
61
61
|
return '';
|
|
62
62
|
const molBlock = mol.get_v3Kmolblock();
|
|
63
63
|
mol!.delete();
|
|
@@ -97,7 +97,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
|
97
97
|
if (chiralityEngine)
|
|
98
98
|
return this.getMolV3000ViaOCL(beautifiedMols, molColNameU);
|
|
99
99
|
return DG.Column.fromStrings(molColNameU, beautifiedMols.map((mol) => {
|
|
100
|
-
if (mol
|
|
100
|
+
if (mol == null)
|
|
101
101
|
return '';
|
|
102
102
|
const molBlock = mol.get_v3Kmolblock();
|
|
103
103
|
mol!.delete();
|
|
@@ -1,16 +1,24 @@
|
|
|
1
1
|
import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
2
2
|
import {ConnectionList} from './connection-list';
|
|
3
|
-
import {HELM_ITEM_SEPARATOR
|
|
3
|
+
import {HELM_ITEM_SEPARATOR} from './const';
|
|
4
4
|
import {SimplePolymer} from './simple-polymer';
|
|
5
5
|
import {Bond} from './types';
|
|
6
6
|
|
|
7
7
|
export class Helm {
|
|
8
8
|
constructor(private helmString: string) {
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
|
|
9
|
+
const indexOfSequenceEnd = helmString.indexOf('}$');
|
|
10
|
+
const sequencePart = helmString.substring(0, indexOfSequenceEnd + 1);
|
|
11
|
+
const connectionsEndPart = helmString.indexOf('$', indexOfSequenceEnd + 2);
|
|
12
|
+
const connectionsPart = helmString.substring(indexOfSequenceEnd + 2, connectionsEndPart);
|
|
13
|
+
// const helmParts = seq.split('$');
|
|
14
|
+
const spList = sequencePart.split('}|');
|
|
15
|
+
// since we removed }|, need to add } to last part
|
|
16
|
+
for (let i = 0; i < spList.length - 1; i++)
|
|
17
|
+
spList[i] = spList[i] + '}';
|
|
18
|
+
|
|
19
|
+
this.simplePolymers = spList
|
|
12
20
|
.map((item) => new SimplePolymer(item));
|
|
13
|
-
this.connectionList = new ConnectionList(
|
|
21
|
+
this.connectionList = new ConnectionList(connectionsPart);
|
|
14
22
|
this.bondData = this.getBondData();
|
|
15
23
|
|
|
16
24
|
this.bondedRGroupsMap = this.getBondedRGroupsMap();
|
|
@@ -58,7 +66,7 @@ export class Helm {
|
|
|
58
66
|
const lowerBound = shiftValues.sort((a, b) => b - a).find( // find the largest shift not exceeding monomerGlobalIdx
|
|
59
67
|
(shift) => monomerGlobalIdx >= shift
|
|
60
68
|
);
|
|
61
|
-
if (lowerBound
|
|
69
|
+
if (lowerBound == undefined)
|
|
62
70
|
throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
|
|
63
71
|
const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
|
|
64
72
|
const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
|
|
@@ -31,7 +31,7 @@ export abstract class MolfileBonds {
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
replacePositionsInBondsByDummy(positions: PositionInBonds[], dummy?: number): void {
|
|
34
|
-
if (dummy
|
|
34
|
+
if (dummy == undefined)
|
|
35
35
|
dummy = -1;
|
|
36
36
|
positions.forEach((position) => {
|
|
37
37
|
const {bondLineIdx, nodeIdx} = position;
|
|
@@ -15,7 +15,7 @@ export abstract class MolfileWrapper {
|
|
|
15
15
|
|
|
16
16
|
protected shiftR1GroupToOrigin(): void {
|
|
17
17
|
const r1Idx = this.rGroups.getAtomicIdx(1);
|
|
18
|
-
if (r1Idx
|
|
18
|
+
if (r1Idx == null)
|
|
19
19
|
return; // R1 group is not present, nothing to shift
|
|
20
20
|
const {x, y} = this.atoms.atomCoordinates[r1Idx];
|
|
21
21
|
this.atoms.shift({x: -x, y: -y});
|
|
@@ -23,7 +23,7 @@ export abstract class MolfileWrapper {
|
|
|
23
23
|
|
|
24
24
|
protected alignR2AlongX(): void {
|
|
25
25
|
const r2Idx = this.rGroups.getAtomicIdx(2);
|
|
26
|
-
if (r2Idx
|
|
26
|
+
if (r2Idx == null)
|
|
27
27
|
throw new Error(`Cannot find R2 group for monomer ${this.monomerSymbol}`);
|
|
28
28
|
const r2Coordinates = this.atoms.atomCoordinates[r2Idx];
|
|
29
29
|
const tan = r2Coordinates.y / r2Coordinates.x;
|
|
@@ -19,13 +19,13 @@ export class RGroupHandler {
|
|
|
19
19
|
|
|
20
20
|
getAtomicIdx(rGroupId: number): number | null {
|
|
21
21
|
const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rGroupId);
|
|
22
|
-
return atomicIdx
|
|
22
|
+
return atomicIdx == undefined ? null : atomicIdx;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
private removeRGroupsFromAtomBlock(rGroupIds: number[]): void {
|
|
26
26
|
rGroupIds.forEach((rgroupId) => {
|
|
27
27
|
const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rgroupId);
|
|
28
|
-
if (atomicIdx
|
|
28
|
+
if (atomicIdx == undefined)
|
|
29
29
|
throw new Error(`Cannot find atomic index for R group ${rgroupId}`);
|
|
30
30
|
});
|
|
31
31
|
|
|
@@ -22,7 +22,7 @@ export class MonomerLibFileValidator {
|
|
|
22
22
|
|
|
23
23
|
validateFile(fileContent: string, fileName: string): boolean {
|
|
24
24
|
const jsonContent = this.parseJson(fileContent, fileName);
|
|
25
|
-
if (jsonContent
|
|
25
|
+
if (jsonContent == null)
|
|
26
26
|
return false;
|
|
27
27
|
|
|
28
28
|
if (!Array.isArray(jsonContent)) {
|
|
@@ -50,7 +50,7 @@ class MonomerLibraryManagerWidget {
|
|
|
50
50
|
private libHelper: IMonomerLibHelper;
|
|
51
51
|
|
|
52
52
|
static async getInstance(): Promise<MonomerLibraryManagerWidget> {
|
|
53
|
-
if (MonomerLibraryManagerWidget.instancePromise
|
|
53
|
+
if (MonomerLibraryManagerWidget.instancePromise == undefined) {
|
|
54
54
|
MonomerLibraryManagerWidget.instancePromise = (async () => {
|
|
55
55
|
const instance = new MonomerLibraryManagerWidget();
|
|
56
56
|
const libHelper = await getMonomerLibHelper();
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
1
2
|
/* eslint-disable max-lines */
|
|
2
3
|
import * as grok from 'datagrok-api/grok';
|
|
3
4
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -18,12 +19,14 @@ import {GAP_SYMBOL, GapOriginals, NOTATION} from '@datagrok-libraries/bio/src/ut
|
|
|
18
19
|
import {Vector} from '@datagrok-libraries/utils/src/type-declarations';
|
|
19
20
|
import {vectorAdd, vectorDotProduct, vectorLength} from '@datagrok-libraries/utils/src/vector-operations';
|
|
20
21
|
|
|
21
|
-
import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer} from './web-editor-monomer-dummy';
|
|
22
|
+
import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer, SmilesWebEditorMonomer} from './web-editor-monomer-dummy';
|
|
22
23
|
import {LibraryWebEditorMonomer} from './web-editor-monomer-of-library';
|
|
23
24
|
import {naturalMonomerColors} from './monomer-colors';
|
|
24
25
|
|
|
25
26
|
import {_package} from '../../package';
|
|
26
27
|
import {MonomerLibData} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
28
|
+
import {smiles2Monomer} from './smiles2Monomer';
|
|
29
|
+
import {polymerTypeToHelmType} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
27
30
|
|
|
28
31
|
const monomerRe = /[\w()]+/;
|
|
29
32
|
//** Do not mess with monomer symbol with parenthesis enclosed in square brackets */
|
|
@@ -61,9 +64,15 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
61
64
|
}
|
|
62
65
|
|
|
63
66
|
getMonomerSymbolsByType(polymerType: PolymerType): string[] {
|
|
64
|
-
|
|
67
|
+
const res = Object.keys(this._monomers[polymerType]);
|
|
68
|
+
if (this._smilesMonomerCache[polymerType])
|
|
69
|
+
res.push(...Object.keys(this._smilesMonomerCache[polymerType]));
|
|
70
|
+
return res;
|
|
65
71
|
}
|
|
66
72
|
|
|
73
|
+
// smiles to symbol Mapping cache
|
|
74
|
+
private _smilesMonomerCache: {[polymerType: string]: {[smiles: string]: string}} = {};
|
|
75
|
+
|
|
67
76
|
/** Creates missing {@link Monomer} */
|
|
68
77
|
addMissingMonomer(polymerType: PolymerType, monomerSymbol: string): Monomer {
|
|
69
78
|
let mSet = this._monomers[polymerType];
|
|
@@ -78,6 +87,22 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
78
87
|
else if (polymerType === PolymerTypes.RNA && monomerSymbol === 'N')
|
|
79
88
|
monomerName = 'Any';
|
|
80
89
|
|
|
90
|
+
// test if it is smiles
|
|
91
|
+
// check if the missing monomer symbol is a valid SMILES string
|
|
92
|
+
const smilesMonomer = smiles2Monomer(monomerSymbol, polymerType);
|
|
93
|
+
if (smilesMonomer) {
|
|
94
|
+
this._smilesMonomerCache[polymerType] = this._smilesMonomerCache[polymerType] ?? {};
|
|
95
|
+
const smSet = this._smilesMonomerCache[polymerType];
|
|
96
|
+
const symbol = Object.keys(smSet).length + 1;
|
|
97
|
+
smSet[monomerSymbol] = `#${polymerType[0]}${symbol}`; // e.g. #P1, #R2, #C3, #B4
|
|
98
|
+
const m: Monomer = {...smilesMonomer, symbol: smSet[monomerSymbol]};
|
|
99
|
+
// note, the ID becomes #<index> for smiles based monomers, and as the smiles, original smiles is passed (which is in monomerSymbol), to avoid key duplication
|
|
100
|
+
const wem = new SmilesWebEditorMonomer(polymerTypeToHelmType(polymerType), m.symbol, monomerSymbol, `SMILES Monomer ${m.symbol}`, m.rgroups.map((rg) => rg[RGP.LABEL]));
|
|
101
|
+
m.wem = wem;
|
|
102
|
+
mSet[m.symbol] = m;
|
|
103
|
+
return m;
|
|
104
|
+
}
|
|
105
|
+
|
|
81
106
|
const m = mSet[monomerSymbol] = {
|
|
82
107
|
[REQ.SYMBOL]: monomerSymbol,
|
|
83
108
|
[REQ.NAME]: monomerName,
|
|
@@ -124,8 +149,11 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
124
149
|
if (res) break;
|
|
125
150
|
}
|
|
126
151
|
} else {
|
|
152
|
+
// Check smiles cache, modify with mapped symbol
|
|
153
|
+
if (this._smilesMonomerCache[polymerType]?.[monomerSymbol])
|
|
154
|
+
monomerSymbol = this._smilesMonomerCache[polymerType][monomerSymbol];
|
|
127
155
|
const dict = this._monomers[polymerType];
|
|
128
|
-
res = dict
|
|
156
|
+
res = dict?.[monomerSymbol] ?? null;
|
|
129
157
|
}
|
|
130
158
|
return res;
|
|
131
159
|
}
|
|
@@ -55,32 +55,6 @@ export class MonomerLib extends MonomerLibBase implements IMonomerLib {
|
|
|
55
55
|
return resJSON;
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
getMonomer(polymerType: PolymerType | null, argMonomerSymbol: string): Monomer | null {
|
|
59
|
-
const logPrefix = `Bio: MonomerLib.getMonomer()`;
|
|
60
|
-
// Adjust RNA's 'R' for ribose to 'r' and 'P' for phosphate to 'p' for case-sensitive monomer names.
|
|
61
|
-
// There are uppercase 'R' and 'P' at RNA samples in test data 'helm2.csv' but lowercase in HELMCoreLibrary.json
|
|
62
|
-
let monomerSymbol = argMonomerSymbol;
|
|
63
|
-
if (polymerType == 'RNA' && monomerSymbol == 'R')
|
|
64
|
-
monomerSymbol = 'r';
|
|
65
|
-
if (polymerType == 'RNA' && monomerSymbol == 'P')
|
|
66
|
-
monomerSymbol = 'p';
|
|
67
|
-
|
|
68
|
-
let res: Monomer | null = null;
|
|
69
|
-
|
|
70
|
-
if (!polymerType) {
|
|
71
|
-
_package.logger.warning(`${logPrefix} symbol '${argMonomerSymbol}', polymerType not specified.`);
|
|
72
|
-
// Assume any polymer type
|
|
73
|
-
for (const [_polymerType, dict] of Object.entries(this._monomers)) {
|
|
74
|
-
res = dict[monomerSymbol];
|
|
75
|
-
if (res) break;
|
|
76
|
-
}
|
|
77
|
-
} else {
|
|
78
|
-
const dict = this._monomers[polymerType];
|
|
79
|
-
res = dict?.[monomerSymbol] ?? null;
|
|
80
|
-
}
|
|
81
|
-
return res;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
58
|
private _monomerSets: { [biotype: string /*HelmType*/]: MonomerSetType } | null = null;
|
|
85
59
|
|
|
86
60
|
getMonomerSet(biotype: HelmType): MonomerSetType | null {
|
|
@@ -1178,7 +1178,7 @@ function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: strin
|
|
|
1178
1178
|
return isSmilesMalformed ? canonical : grok.chem.convert(canonical, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
|
|
1179
1179
|
}
|
|
1180
1180
|
|
|
1181
|
-
function getCorrectedMolBlock(molBlock: string) {
|
|
1181
|
+
export function getCorrectedMolBlock(molBlock: string) {
|
|
1182
1182
|
// to correct molblock, we should make sure that
|
|
1183
1183
|
// 1. RGP field is present at the end, before the M END line
|
|
1184
1184
|
// 2. RGP field is present in the correct format
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/* eslint-disable camelcase */
|
|
2
|
+
/* eslint-disable max-len */
|
|
3
|
+
import {Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
4
|
+
import {_package} from '../../package';
|
|
5
|
+
import {PolymerType} from '@datagrok-libraries/bio/src/helm/types';
|
|
6
|
+
import {HELM_RGROUP_FIELDS as RGP} from '@datagrok-libraries/bio/src/utils/const';
|
|
7
|
+
import * as grok from 'datagrok-api/grok';
|
|
8
|
+
import {getCorrectedMolBlock} from './monomer-manager/monomer-manager';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Exaple r groups
|
|
12
|
+
* {
|
|
13
|
+
"capGroupSMILES": "[*:1][H]",
|
|
14
|
+
"alternateId": "R1-H",
|
|
15
|
+
"capGroupName": "H",
|
|
16
|
+
"label": "R1"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"capGroupSMILES": "O[*:2]",
|
|
20
|
+
"alternateId": "R2-OH",
|
|
21
|
+
"capGroupName": "OH",
|
|
22
|
+
"label": "R2"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const cx_smiles_regexp = /.*\|\$.*R.*\$\|/;
|
|
28
|
+
const rgroup_regexp = /\[R(\d+)\]/;
|
|
29
|
+
const rgroup_regexpg = /\[R(\d+)\]/g;
|
|
30
|
+
const ambig_regexp = /\[\*:(\d+)\]/g;
|
|
31
|
+
|
|
32
|
+
export type MonomerWithoutSymbol = Omit<Monomer, 'symbol'>;
|
|
33
|
+
|
|
34
|
+
export function getMonomerFromRSmiles(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
|
|
35
|
+
const rgroupNumbers = Array.from(smiles.matchAll(rgroup_regexpg)).map((m) => m[1]);
|
|
36
|
+
const res: MonomerWithoutSymbol = {
|
|
37
|
+
name: 'Explicit SMILES Monomer',
|
|
38
|
+
smiles: smiles,
|
|
39
|
+
polymerType: polymerType ?? 'CHEM',
|
|
40
|
+
molfile: '',
|
|
41
|
+
rgroups: rgroupNumbers.map((numString) => ({
|
|
42
|
+
[RGP.LABEL]: `R${numString}`,
|
|
43
|
+
[RGP.CAP_GROUP_NAME]: `H`,
|
|
44
|
+
[RGP.CAP_GROUP_SMILES]: `[*:${numString}][H]`,
|
|
45
|
+
[RGP.ALTERNATE_ID]: `R${numString}-H`,
|
|
46
|
+
})),
|
|
47
|
+
author: 'Datagrok auto-generated',
|
|
48
|
+
id: 0,
|
|
49
|
+
createDate: null,
|
|
50
|
+
monomerType: 'Backbone',
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
//try to generate corrected molfile and smiles
|
|
55
|
+
let corSmiles = smiles;
|
|
56
|
+
res.rgroups.forEach((rg) => {
|
|
57
|
+
const labelNum = rg[RGP.LABEL].substring(1); // R1 -> 1
|
|
58
|
+
corSmiles = corSmiles.replace(`[R${labelNum}]`, `[*:${labelNum}]`);
|
|
59
|
+
});
|
|
60
|
+
const molFile = getCorrectedMolBlock(grok.chem.convert(corSmiles, grok.chem.Notation.Smiles, grok.chem.Notation.MolBlock));
|
|
61
|
+
res.molfile = molFile;
|
|
62
|
+
res.smiles = corSmiles;
|
|
63
|
+
} catch (e) {
|
|
64
|
+
_package.logger.error(`getMonomerFromRSmiles: cannot convert SMILES to Molfile: ${smiles}\n${e}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return res;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Generate Monomer Object directly from inline smiles
|
|
71
|
+
* Purely string based, no external calls
|
|
72
|
+
*
|
|
73
|
+
* Currently accepts (to be extended):
|
|
74
|
+
*
|
|
75
|
+
* cxsmiles written as *N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$| where * are connection points
|
|
76
|
+
*
|
|
77
|
+
* or * is square brackets like [*]N[C@H](C(=O)[*])Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|
|
|
78
|
+
*
|
|
79
|
+
* simple smiles with R notations like CCC[R1] or CCC(=O)[R2]
|
|
80
|
+
*
|
|
81
|
+
* simple smiles with ambiguety defined as [*:1] like CCC[*:1] or CCC(=O)[*:2]
|
|
82
|
+
*/
|
|
83
|
+
export function smiles2Monomer(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
|
|
84
|
+
try {
|
|
85
|
+
const isCxSmiles = cx_smiles_regexp.test(smiles);
|
|
86
|
+
if (isCxSmiles) {
|
|
87
|
+
// CXSMILES parsing
|
|
88
|
+
const parts = smiles.split('|$');
|
|
89
|
+
const molPart = parts[0].trim();
|
|
90
|
+
const rGroupPart = parts[1];
|
|
91
|
+
// make sure all R groups are captured
|
|
92
|
+
const rGroupMatches = Array.from(rGroupPart.matchAll(/R(\d+)/g));
|
|
93
|
+
const starsInMolecule = Array.from(molPart.matchAll(/(\*)/g));
|
|
94
|
+
if (rGroupMatches.length !== starsInMolecule.length)
|
|
95
|
+
return null; // make sure that number of R groups and stars are the same
|
|
96
|
+
// remove brackets from stars if any
|
|
97
|
+
let cleanMol = molPart.replaceAll(/\[\*\]/g, '*');
|
|
98
|
+
// speaking in terms of consecutiveness, R groups in definition and stars in Smiles will be in the same order
|
|
99
|
+
// so we can just iterate through them
|
|
100
|
+
const rGroupNumbers = rGroupMatches.map((m) => m[1]); // numbers as strings
|
|
101
|
+
for (let i = 0; i < rGroupNumbers.length; i++) {
|
|
102
|
+
const rNum = rGroupNumbers[i];
|
|
103
|
+
// replace first matched star with R group
|
|
104
|
+
cleanMol = cleanMol.replace('*', `[R${rNum}]`);
|
|
105
|
+
}
|
|
106
|
+
return getMonomerFromRSmiles(cleanMol, polymerType);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// simple smiles parsing
|
|
110
|
+
// to simplify, replace all ambigous [*:1] with R1, etc
|
|
111
|
+
let cleanSmiles = smiles;
|
|
112
|
+
const ambigMatches = Array.from(smiles.matchAll(ambig_regexp));
|
|
113
|
+
for (const match of ambigMatches) {
|
|
114
|
+
const fullMatch = match[0];
|
|
115
|
+
const rNum = match[1];
|
|
116
|
+
cleanSmiles = cleanSmiles.replace(fullMatch, `[R${rNum}]`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// make sure monomer has at least one R group
|
|
120
|
+
if (rgroup_regexp.test(cleanSmiles))
|
|
121
|
+
return getMonomerFromRSmiles(cleanSmiles, polymerType);
|
|
122
|
+
} catch (e) {
|
|
123
|
+
_package.logger.error(`smiles2Monomer: cannot parse SMILES: ${smiles}\n${e}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
@@ -18,7 +18,7 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
|
|
|
18
18
|
get issmiles(): boolean { return !!this.smiles; }
|
|
19
19
|
|
|
20
20
|
/** R-Group index os single digit only is allowed in Pistoia code */
|
|
21
|
-
|
|
21
|
+
at: WebEditorRGroups = {
|
|
22
22
|
R1: 'H', R2: 'H', R3: 'H', R4: 'H', R5: 'H', R6: 'H', R7: 'H', R8: 'H', R9: 'H'
|
|
23
23
|
};
|
|
24
24
|
|
|
@@ -80,6 +80,20 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
|
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
export class SmilesWebEditorMonomer extends WebEditorMonomerDummy {
|
|
84
|
+
public readonly backgroundcolor: string = '#808080';
|
|
85
|
+
public readonly linecolor: string = '#000000';
|
|
86
|
+
public readonly textcolor: string = '#000000';
|
|
87
|
+
|
|
88
|
+
constructor(biotype: string, id: string, smiles: string, name: string, rgpLabels: string[]) {
|
|
89
|
+
super(biotype, id, name, undefined, undefined, undefined, smiles);
|
|
90
|
+
this.at = {};
|
|
91
|
+
rgpLabels.forEach((label) => {
|
|
92
|
+
this.at[label] = 'H';
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
83
97
|
export class GapWebEditorMonomer extends WebEditorMonomerDummy {
|
|
84
98
|
public readonly backgroundcolor: string = '#FFFFFF';
|
|
85
99
|
public readonly linecolor: string = '#808080';
|
|
@@ -138,7 +138,7 @@ async function onDialogOk(
|
|
|
138
138
|
colInput.fireChanged();
|
|
139
139
|
if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
140
140
|
throw new Error('Chosen column has to be of Macromolecule semantic type');
|
|
141
|
-
if (performAlignment
|
|
141
|
+
if (performAlignment == undefined) // value can only be undefined when column can't be processed with either method
|
|
142
142
|
throw new Error('Invalid column format');
|
|
143
143
|
msaCol = await performAlignment(); // progress
|
|
144
144
|
if (msaCol == null)
|
|
@@ -149,7 +149,7 @@ function parseKalignError(out: string, limit?: number): string {
|
|
|
149
149
|
const errLineList: string[] = [];
|
|
150
150
|
const errLineRe = /^.+ERROR : (.+)$/gm;
|
|
151
151
|
let ma: RegExpExecArray | null;
|
|
152
|
-
while ((ma = errLineRe.exec(out)) != null && (limit
|
|
152
|
+
while ((ma = errLineRe.exec(out)) != null && (limit == undefined || errLineList.length < limit)) {
|
|
153
153
|
//
|
|
154
154
|
errLineList.push(ma[1]);
|
|
155
155
|
}
|
|
@@ -132,7 +132,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
132
132
|
for (const seq of values) {
|
|
133
133
|
const mSeq = !!seq ? splitter(seq) : [];
|
|
134
134
|
|
|
135
|
-
if (firstLength
|
|
135
|
+
if (firstLength == null)
|
|
136
136
|
firstLength = mSeq.length;
|
|
137
137
|
else if (mSeq.length !== firstLength)
|
|
138
138
|
sameLength = false;
|
|
@@ -182,13 +182,13 @@ export class SeqHandler implements ISeqHandler {
|
|
|
182
182
|
throw new Error('Alphabet is empty and not annotated.');
|
|
183
183
|
|
|
184
184
|
let aligned = uh.column.getTag(TAGS.aligned);
|
|
185
|
-
if (aligned
|
|
185
|
+
if (aligned == null) {
|
|
186
186
|
aligned = uh.stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
|
|
187
187
|
uh.column.setTag(TAGS.aligned, aligned);
|
|
188
188
|
}
|
|
189
189
|
|
|
190
190
|
let alphabet = uh.column.getTag(TAGS.alphabet);
|
|
191
|
-
if (alphabet
|
|
191
|
+
if (alphabet == null) {
|
|
192
192
|
alphabet = detectAlphabet(uh.stats.freq, candidateAlphabets);
|
|
193
193
|
uh.column.setTag(TAGS.alphabet, alphabet);
|
|
194
194
|
}
|
|
@@ -200,7 +200,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
200
200
|
}
|
|
201
201
|
} else if (units === NOTATION.HELM) {
|
|
202
202
|
let alphabet = uh.column.getTag(TAGS.alphabet);
|
|
203
|
-
if (alphabet
|
|
203
|
+
if (alphabet == null) {
|
|
204
204
|
// const cats = uh.column.categories;
|
|
205
205
|
// const splitter = uh.getSplitter();
|
|
206
206
|
// const samples = Array.from(new Set(
|
|
@@ -232,7 +232,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
232
232
|
|
|
233
233
|
public get separator(): string | undefined {
|
|
234
234
|
const separator: string | undefined = this.column.getTag(TAGS.separator) ?? undefined;
|
|
235
|
-
if (this.notation === NOTATION.SEPARATOR && separator
|
|
235
|
+
if (this.notation === NOTATION.SEPARATOR && separator == undefined)
|
|
236
236
|
throw new Error(`Separator is mandatory for column '${this.column.name}' of notation '${this.notation}'.`);
|
|
237
237
|
return separator;
|
|
238
238
|
}
|
|
@@ -327,7 +327,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
327
327
|
const seq = this.column.get(rowIdx);
|
|
328
328
|
return this.getSplitter(limit)(seq);
|
|
329
329
|
} else {
|
|
330
|
-
if (this.column.version !== this.columnVersion || this._splitted
|
|
330
|
+
if (this.column.version !== this.columnVersion || this._splitted == null) {
|
|
331
331
|
this.columnVersion = this.column.version;
|
|
332
332
|
this._splitted = new Array<WeakRef<ISeqSplitted>>(this.column.length);
|
|
333
333
|
}
|
|
@@ -408,7 +408,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
408
408
|
}
|
|
409
409
|
|
|
410
410
|
public get stats(): SeqColStats {
|
|
411
|
-
if (this._stats
|
|
411
|
+
if (this._stats == null) {
|
|
412
412
|
const freq: { [m: string]: number } = {};
|
|
413
413
|
let sameLength = true;
|
|
414
414
|
let firstLength = null;
|
|
@@ -435,7 +435,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
435
435
|
|
|
436
436
|
private _maxLength: number | null = null;
|
|
437
437
|
public get maxLength(): number {
|
|
438
|
-
if (this._maxLength
|
|
438
|
+
if (this._maxLength == null) {
|
|
439
439
|
this._maxLength = this.column.length === 0 ? 0 :
|
|
440
440
|
wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length).reduce((a, b) => a > b ? a : b, 0);
|
|
441
441
|
}
|
|
@@ -444,7 +444,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
444
444
|
|
|
445
445
|
private _posList: string[] | null = null;
|
|
446
446
|
public get posList(): string[] {
|
|
447
|
-
if (this._posList
|
|
447
|
+
if (this._posList == null) {
|
|
448
448
|
const posListTxt = this.column.getTag(TAGS.positionNames);
|
|
449
449
|
this._posList = posListTxt ? posListTxt.split(positionSeparator).map((p) => p.trim()) :
|
|
450
450
|
wu.count(1).take(this.maxLength).map((pos) => pos.toString()).toArray();
|
|
@@ -619,7 +619,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
619
619
|
}
|
|
620
620
|
|
|
621
621
|
get splitter(): SplitterFunc {
|
|
622
|
-
if (this._splitter
|
|
622
|
+
if (this._splitter == null)
|
|
623
623
|
this._splitter = this.getSplitter();
|
|
624
624
|
return this._splitter;
|
|
625
625
|
}
|
package/src/utils/ui-utils.ts
CHANGED
|
@@ -3,7 +3,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
|
|
4
4
|
export function getMacromoleculeColumns(): DG.Column<string>[] {
|
|
5
5
|
const columns = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
6
|
-
if (columns
|
|
6
|
+
if (columns == null) {
|
|
7
7
|
grok.shell.error('Current table does not contain macromolecules');
|
|
8
8
|
return [];
|
|
9
9
|
}
|