@datagrok/bio 2.11.2 → 2.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/dist/196.js +1 -1
- package/dist/196.js.map +1 -1
- package/dist/361.js +1 -1
- package/dist/361.js.map +1 -1
- package/dist/381.js +1 -1
- package/dist/381.js.map +1 -1
- package/dist/770.js +1 -1
- package/dist/770.js.map +1 -1
- package/dist/79.js.map +1 -1
- package/dist/868.js +1 -1
- package/dist/868.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/src/analysis/sequence-space.ts +34 -12
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +2 -1
- package/src/package.ts +39 -34
- package/src/tests/activity-cliffs-tests.ts +5 -3
- package/src/tests/activity-cliffs-utils.ts +5 -2
- package/src/utils/helm-to-molfile.ts +37 -12
- package/src/utils/monomer-lib.ts +4 -9
- package/src/utils/poly-tool/{enumerator-tools.ts → transformation.ts} +25 -127
- package/src/utils/poly-tool/ui.ts +125 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.11.
|
|
8
|
+
"version": "2.11.5",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,9 +34,9 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.39.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.39.1",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
39
|
+
"@datagrok-libraries/ml": "^6.3.53",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
41
41
|
"@datagrok-libraries/utils": "^4.0.17",
|
|
42
42
|
"cash-dom": "^8.0.0",
|
|
@@ -55,21 +55,19 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
55
55
|
return result;
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
export async function
|
|
59
|
-
|
|
60
|
-
): Promise<
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const seqList = spaceParams.seqCol.toList();
|
|
65
|
-
|
|
58
|
+
export async function getEncodedSeqSpaceCol(
|
|
59
|
+
seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames
|
|
60
|
+
): Promise<{seqList:string[], options: {[_:string]: any}}> {
|
|
61
|
+
// encodes sequences using utf charachters to also support multichar and non fasta sequences
|
|
62
|
+
const ncUH = UnitsHandler.getOrCreate(seqCol);
|
|
63
|
+
const seqList = seqCol.toList();
|
|
66
64
|
const splitter = ncUH.getSplitter();
|
|
67
65
|
const seqColLength = seqList.length;
|
|
68
66
|
let charCodeCounter = 36;
|
|
69
67
|
const charCodeMap = new Map<string, string>();
|
|
70
68
|
for (let i = 0; i < seqColLength; i++) {
|
|
71
69
|
const seq = seqList[i];
|
|
72
|
-
if (seqList[i] === null ||
|
|
70
|
+
if (seqList[i] === null || seqCol.isNone(i)) {
|
|
73
71
|
seqList[i] = null;
|
|
74
72
|
continue;
|
|
75
73
|
}
|
|
@@ -84,8 +82,8 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
|
84
82
|
seqList[i] += charCodeMap.get(char)!;
|
|
85
83
|
}
|
|
86
84
|
}
|
|
87
|
-
|
|
88
|
-
if (
|
|
85
|
+
let options = {};
|
|
86
|
+
if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
|
|
89
87
|
const monomers = Array.from(charCodeMap.keys());
|
|
90
88
|
const monomerRes = await calculateMonomerSimilarity(monomers);
|
|
91
89
|
// the susbstitution matrix contains similarity, but we need distances
|
|
@@ -98,10 +96,34 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
|
98
96
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
99
97
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
100
98
|
});
|
|
101
|
-
|
|
99
|
+
// sets distance function args in place.
|
|
100
|
+
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
102
101
|
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
103
102
|
}
|
|
103
|
+
// else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
|
|
104
|
+
// const alphabetIndexes: any = {};
|
|
105
|
+
// let i = 0;
|
|
106
|
+
// charCodeMap.forEach((value) => {
|
|
107
|
+
// alphabetIndexes[value] = i;
|
|
108
|
+
// i++;
|
|
109
|
+
// });
|
|
110
|
+
// options = {alphabetIndexes};
|
|
111
|
+
// }
|
|
112
|
+
return {seqList, options};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
116
|
+
progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
|
|
117
|
+
): Promise<ISequenceSpaceResult> {
|
|
118
|
+
const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
|
|
119
|
+
if (ncUH.isHelm())
|
|
120
|
+
return await sequenceSpaceByFingerprints(spaceParams);
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
const {seqList, options} = await getEncodedSeqSpaceCol(spaceParams.seqCol, spaceParams.similarityMetric);
|
|
104
124
|
|
|
125
|
+
spaceParams.options = spaceParams.options ?? {};
|
|
126
|
+
spaceParams.options.distanceFnArgs = options;
|
|
105
127
|
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
106
128
|
seqList,
|
|
107
129
|
spaceParams.methodName,
|
|
@@ -13,6 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
15
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
16
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
16
17
|
|
|
17
18
|
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
18
19
|
|
|
@@ -53,7 +54,7 @@ export async function demoBio01bUI() {
|
|
|
53
54
|
.step('Find activity cliffs', async () => {
|
|
54
55
|
activityCliffsViewer = (await activityCliffs(
|
|
55
56
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
56
|
-
80, dimRedMethod)) as DG.ScatterPlotViewer;
|
|
57
|
+
80, dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN)) as DG.ScatterPlotViewer;
|
|
57
58
|
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
58
59
|
|
|
59
60
|
// Show grid viewer with the cliffs
|
package/src/package.ts
CHANGED
|
@@ -7,15 +7,14 @@ import * as DG from 'datagrok-api/dg';
|
|
|
7
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
8
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
9
9
|
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
10
|
-
import {RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
11
10
|
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
12
11
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
13
12
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
14
13
|
import {
|
|
15
|
-
ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc
|
|
14
|
+
ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc, CLIFFS_COL_ENCODE_FN
|
|
16
15
|
} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
17
16
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
18
|
-
import {BitArrayMetrics
|
|
17
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
19
18
|
import {
|
|
20
19
|
TAGS as bioTAGS, ALPHABET, NOTATION,
|
|
21
20
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
@@ -36,14 +35,14 @@ import {
|
|
|
36
35
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
37
36
|
import {SequenceAlignment} from './seq_align';
|
|
38
37
|
import {
|
|
39
|
-
ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace, sequenceSpaceByFingerprints
|
|
38
|
+
ISequenceSpaceResult, getEmbeddingColsNames, getEncodedSeqSpaceCol, getSequenceSpace, sequenceSpaceByFingerprints
|
|
40
39
|
} from './analysis/sequence-space';
|
|
41
40
|
import {
|
|
42
41
|
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
43
42
|
} from './analysis/sequence-activity-cliffs';
|
|
44
43
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
45
44
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
46
|
-
import {SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
45
|
+
import {MONOMERIC_COL_TAGS, SubstructureSearchDialog, invalidateMols} from './substructure-search/substructure-search';
|
|
47
46
|
import {convert} from './utils/convert';
|
|
48
47
|
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
49
48
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
@@ -51,9 +50,6 @@ import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
|
51
50
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
52
51
|
import {
|
|
53
52
|
MonomerLibHelper,
|
|
54
|
-
getUserLibSettings,
|
|
55
|
-
setUserLibSetting,
|
|
56
|
-
getLibFileNameList,
|
|
57
53
|
getLibraryPanelUI
|
|
58
54
|
} from './utils/monomer-lib';
|
|
59
55
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
@@ -72,18 +68,18 @@ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-wid
|
|
|
72
68
|
import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
|
|
73
69
|
import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
|
|
74
70
|
import {addCopyMenuUI} from './utils/context-menu';
|
|
75
|
-
import {getPolyToolDialog} from './utils/poly-tool/
|
|
71
|
+
import {getPolyToolDialog} from './utils/poly-tool/ui';
|
|
76
72
|
import {_setPeptideColumn} from './utils/poly-tool/utils';
|
|
77
73
|
import {getRegionDo} from './utils/get-region';
|
|
78
74
|
import {GetRegionApp} from './apps/get-region-app';
|
|
79
75
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
80
|
-
import {HelmToMolfileConverter} from './utils/helm-to-molfile';
|
|
81
76
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
82
77
|
import {errInfo} from './utils/err-info';
|
|
83
78
|
|
|
84
79
|
import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
|
|
85
80
|
import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
|
|
86
81
|
from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
82
|
+
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
87
83
|
|
|
88
84
|
export const _package = new BioPackage();
|
|
89
85
|
|
|
@@ -394,12 +390,14 @@ export async function getRegionTopMenu(
|
|
|
394
390
|
//input: column activities
|
|
395
391
|
//input: double similarity = 80 [Similarity cutoff]
|
|
396
392
|
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
393
|
+
//input: string similarityMetric { choices:["Hamming", "Levenshtein", "Monomer chemical distance"] }
|
|
397
394
|
//input: object options {optional: true}
|
|
398
395
|
//output: viewer result
|
|
399
396
|
//editor: Bio:SeqActivityCliffsEditor
|
|
400
397
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<string>, activities: DG.Column,
|
|
401
|
-
similarity: number, methodName: DimReductionMethods,
|
|
402
|
-
|
|
398
|
+
similarity: number, methodName: DimReductionMethods,
|
|
399
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
|
|
400
|
+
options?: (IUMAPOptions | ITSNEOptions) & Options): Promise<DG.Viewer | undefined> {
|
|
403
401
|
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
404
402
|
return;
|
|
405
403
|
const axesNames = getEmbeddingColsNames(df);
|
|
@@ -409,21 +407,27 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
409
407
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
410
408
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
411
409
|
};
|
|
410
|
+
let cliffsEncodeFunction: (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => any =
|
|
411
|
+
getEncodedSeqSpaceCol;
|
|
412
412
|
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
seqCol
|
|
421
|
-
const
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
413
|
+
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
414
|
+
const seqCol = macroMolecule;
|
|
415
|
+
|
|
416
|
+
let sequenceSpaceFunc: SequenceSpaceFunc = getSequenceSpace;
|
|
417
|
+
if (ncUH.isHelm()) {
|
|
418
|
+
sequenceSpaceFunc = sequenceSpaceByFingerprints;
|
|
419
|
+
cliffsEncodeFunction = async (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => {
|
|
420
|
+
await invalidateMols(seqCol, false);
|
|
421
|
+
const molecularCol = seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS];
|
|
422
|
+
const fingerPrints: DG.Column =
|
|
423
|
+
await grok.functions.call('Chem:getMorganFingerprints', {molColumn: molecularCol});
|
|
424
|
+
const fingerPrintsBitArray = fingerPrints.toList().map((f: DG.BitSet) =>
|
|
425
|
+
BitArray.fromUint32Array(f.length, new Uint32Array(f.getBuffer().buffer)));
|
|
426
|
+
return {seqList: fingerPrintsBitArray, options: {}};
|
|
427
|
+
};
|
|
426
428
|
}
|
|
429
|
+
|
|
430
|
+
|
|
427
431
|
const runCliffs = async () => {
|
|
428
432
|
const sp = await getActivityCliffs(
|
|
429
433
|
df,
|
|
@@ -442,25 +446,26 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
442
446
|
createTooltipElement,
|
|
443
447
|
createPropPanelElement,
|
|
444
448
|
createLinesGrid,
|
|
445
|
-
options);
|
|
449
|
+
{...(options ?? {}), [CLIFFS_COL_ENCODE_FN]: cliffsEncodeFunction});
|
|
446
450
|
return sp;
|
|
447
451
|
};
|
|
448
452
|
|
|
449
|
-
const allowedRowCount =
|
|
450
|
-
const fastRowCount = methodName === DimReductionMethods.UMAP ?
|
|
453
|
+
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
|
|
454
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
|
|
451
455
|
if (df.rowCount > allowedRowCount) {
|
|
452
456
|
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
453
457
|
return;
|
|
454
458
|
}
|
|
455
459
|
|
|
456
|
-
return new Promise<DG.Viewer>((resolve, reject) => {
|
|
460
|
+
return new Promise<DG.Viewer | undefined>((resolve, reject) => {
|
|
457
461
|
if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
458
462
|
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
459
463
|
Do you want to continue?`))
|
|
460
464
|
.onOK(async () => {
|
|
461
|
-
const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
462
|
-
runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => {
|
|
465
|
+
//const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
466
|
+
runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => {});
|
|
463
467
|
})
|
|
468
|
+
.onCancel(() => { resolve(undefined); })
|
|
464
469
|
.show();
|
|
465
470
|
} else {
|
|
466
471
|
runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
|
|
@@ -530,7 +535,7 @@ export async function sequenceSpaceTopMenu(
|
|
|
530
535
|
methodName: methodName,
|
|
531
536
|
similarityMetric: similarityMetric,
|
|
532
537
|
embedAxesNames: embedColsNames,
|
|
533
|
-
options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.
|
|
538
|
+
options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.5,
|
|
534
539
|
usingSparseMatrix: table.rowCount > 20000},
|
|
535
540
|
};
|
|
536
541
|
|
|
@@ -1089,10 +1094,10 @@ export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
|
1089
1094
|
await demoBio05UI();
|
|
1090
1095
|
}
|
|
1091
1096
|
|
|
1092
|
-
//name:
|
|
1097
|
+
//name: polyToolColumnChoice
|
|
1093
1098
|
//input: dataframe df [Input data table]
|
|
1094
1099
|
//input: column macroMolecule
|
|
1095
|
-
export async function
|
|
1100
|
+
export async function polyToolColumnChoice(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
|
|
1096
1101
|
_setPeptideColumn(macroMolecule);
|
|
1097
1102
|
await grok.data.detectSemanticTypes(df);
|
|
1098
1103
|
}
|
|
@@ -8,6 +8,8 @@ import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
|
8
8
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
9
|
|
|
10
10
|
import {_package} from '../package-test';
|
|
11
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
|
+
import {BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
category('activityCliffs', async () => {
|
|
@@ -39,7 +41,7 @@ category('activityCliffs', async () => {
|
|
|
39
41
|
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
40
42
|
|
|
41
43
|
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
42
|
-
'sequence', 'Activity', 90, cliffsNum);
|
|
44
|
+
'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
43
45
|
});
|
|
44
46
|
|
|
45
47
|
test('activityCliffsWithEmptyRows', async () => {
|
|
@@ -49,7 +51,7 @@ category('activityCliffs', async () => {
|
|
|
49
51
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
50
52
|
|
|
51
53
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
|
|
52
|
-
'sequence', 'Activity', 90, 3);
|
|
54
|
+
'sequence', 'Activity', 90, 3, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
53
55
|
});
|
|
54
56
|
|
|
55
57
|
test('Helm', async () => {
|
|
@@ -57,6 +59,6 @@ category('activityCliffs', async () => {
|
|
|
57
59
|
const view = grok.shell.addTableView(df);
|
|
58
60
|
|
|
59
61
|
await _testActivityCliffsOpen(df, DimReductionMethods.UMAP,
|
|
60
|
-
'HELM', 'Activity', 90, 53);
|
|
62
|
+
'HELM', 'Activity', 90, 53, BitArrayMetricsNames.Tanimoto);
|
|
61
63
|
});
|
|
62
64
|
});
|
|
@@ -4,14 +4,17 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
|
|
6
6
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
7
9
|
|
|
8
10
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimReductionMethods,
|
|
9
|
-
seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number
|
|
11
|
+
seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number,
|
|
12
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics
|
|
10
13
|
): Promise<void> {
|
|
11
14
|
await grok.data.detectSemanticTypes(df);
|
|
12
15
|
const scatterPlot = await activityCliffs(
|
|
13
16
|
df, df.getCol(seqColName), df.getCol(activityColName),
|
|
14
|
-
similarityThr, drMethod, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
17
|
+
similarityThr, drMethod, similarityMetric, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
15
18
|
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
16
19
|
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
17
20
|
// similarity: 50, methodName: method
|
|
@@ -43,18 +43,29 @@ type PositionInBonds = {
|
|
|
43
43
|
|
|
44
44
|
/** Translate HELM column into molfile column and append to the dataframe */
|
|
45
45
|
export async function helm2mol(df: DG.DataFrame, helmCol: DG.Column<string>): Promise<void> {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
46
|
+
const molCol = await getMolColumnFromHelm(df, helmCol);
|
|
47
|
+
df.columns.add(molCol, true);
|
|
48
|
+
await grok.data.detectSemanticTypes(df);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
/** Translate HELM column into molfile column and append to the dataframe */
|
|
53
|
+
export async function getMolColumnFromHelm(
|
|
54
|
+
df: DG.DataFrame, helmCol: DG.Column<string>
|
|
55
|
+
): Promise<DG.Column<string>> {
|
|
53
56
|
const converter = new HelmToMolfileConverter(helmCol, df);
|
|
54
57
|
const molCol = await converter.convertToRdKitBeautifiedMolfileColumn();
|
|
55
58
|
molCol.semType = DG.SEMTYPE.MOLECULE;
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
return molCol;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function getSmilesColumnFromHelm(
|
|
63
|
+
df: DG.DataFrame, helmCol: DG.Column<string>
|
|
64
|
+
): Promise<DG.Column<string>> {
|
|
65
|
+
const converter = new HelmToMolfileConverter(helmCol, df);
|
|
66
|
+
const smilesCol = await converter.convertToSmiles();
|
|
67
|
+
smilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
68
|
+
return smilesCol;
|
|
58
69
|
}
|
|
59
70
|
|
|
60
71
|
export class HelmToMolfileConverter {
|
|
@@ -62,9 +73,24 @@ export class HelmToMolfileConverter {
|
|
|
62
73
|
this.helmColumn = helmColumn;
|
|
63
74
|
}
|
|
64
75
|
|
|
65
|
-
async
|
|
76
|
+
async convertToSmiles(): Promise<DG.Column<string>> {
|
|
77
|
+
const smiles = await this.getSmilesList();
|
|
78
|
+
const columnName = this.df.columns.getUnusedName(`smiles(${this.helmColumn.name})`);
|
|
79
|
+
return DG.Column.fromStrings(columnName, smiles.map((molecule) => {
|
|
80
|
+
if (molecule === null)
|
|
81
|
+
return '';
|
|
82
|
+
return molecule;
|
|
83
|
+
}));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private async getSmilesList(): Promise<string[]> {
|
|
66
87
|
const molfilesV2K = (await this.convertToMolfileV2KColumn()).toList();
|
|
67
88
|
const smiles = molfilesV2K.map((mol) => DG.chem.convert(mol, DG.chem.Notation.MolBlock, DG.chem.Notation.Smiles));
|
|
89
|
+
return smiles;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async convertToRdKitBeautifiedMolfileColumn(): Promise<DG.Column<string>> {
|
|
93
|
+
const smiles = await this.getSmilesList();
|
|
68
94
|
const rdKitModule: RDModule = await grok.functions.call('Chem:getRdKitModule');
|
|
69
95
|
const beautifiedMols = smiles.map((item) =>{
|
|
70
96
|
if (item === '')
|
|
@@ -75,8 +101,7 @@ export class HelmToMolfileConverter {
|
|
|
75
101
|
mol.normalize_depiction(1);
|
|
76
102
|
mol.straighten_depiction(true);
|
|
77
103
|
return mol;
|
|
78
|
-
}
|
|
79
|
-
);
|
|
104
|
+
});
|
|
80
105
|
const columnName = this.df.columns.getUnusedName(`molfile(${this.helmColumn.name})`);
|
|
81
106
|
return DG.Column.fromStrings(columnName, beautifiedMols.map((mol) => {
|
|
82
107
|
if (mol === null)
|
package/src/utils/monomer-lib.ts
CHANGED
|
@@ -8,9 +8,7 @@ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
|
|
|
8
8
|
import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
|
|
9
9
|
import {
|
|
10
10
|
createJsonMonomerLibFromSdf,
|
|
11
|
-
getJsonMonomerLibForEnumerator,
|
|
12
11
|
IMonomerLibHelper,
|
|
13
|
-
isValidEnumeratorLib,
|
|
14
12
|
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
15
13
|
import {
|
|
16
14
|
HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT, HELM_POLYMER_TYPE
|
|
@@ -18,12 +16,8 @@ import {
|
|
|
18
16
|
|
|
19
17
|
import {_package} from '../package';
|
|
20
18
|
|
|
21
|
-
|
|
22
|
-
REQ.SYMBOL, REQ.NAME, REQ.MOLFILE, REQ.AUTHOR, REQ.ID,
|
|
23
|
-
REQ.RGROUPS, REQ.SMILES, REQ.POLYMER_TYPE, REQ.MONOMER_TYPE, REQ.CREATE_DATE,
|
|
24
|
-
] as const;
|
|
19
|
+
import {PolyToolMonomerLibHandler} from '@datagrok-libraries/bio/src/utils/poly-tool/monomer-lib-handler';
|
|
25
20
|
|
|
26
|
-
const _HELM_OPTIONAL_FIELDS_ARRAY = [OPT.NATURAL_ANALOG, OPT.META] as const;
|
|
27
21
|
// -- Monomer libraries --
|
|
28
22
|
export const LIB_STORAGE_NAME = 'Libraries';
|
|
29
23
|
export const LIB_PATH = 'System:AppData/Bio/libraries/';
|
|
@@ -291,8 +285,9 @@ export class MonomerLibHelper implements IMonomerLibHelper {
|
|
|
291
285
|
}
|
|
292
286
|
const df = await fileSource.readCsv(fileName);
|
|
293
287
|
const json = toJson(df);
|
|
294
|
-
|
|
295
|
-
|
|
288
|
+
const polyToolMonomerLib = new PolyToolMonomerLibHandler(json);
|
|
289
|
+
if (polyToolMonomerLib.isValid())
|
|
290
|
+
rawLibData = polyToolMonomerLib.getJsonMonomerLib();
|
|
296
291
|
else
|
|
297
292
|
throw new Error('Invalid format of CSV monomer lib');
|
|
298
293
|
} else {
|
|
@@ -1,17 +1,14 @@
|
|
|
1
|
-
|
|
2
1
|
import * as grok from 'datagrok-api/grok';
|
|
3
2
|
import * as ui from 'datagrok-api/ui';
|
|
4
3
|
import * as DG from 'datagrok-api/dg';
|
|
5
4
|
|
|
6
5
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
6
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
-
import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
9
|
-
import {MonomerLibHelper} from '../../utils/monomer-lib';
|
|
10
7
|
import {_package} from '../../package';
|
|
11
8
|
import {addCommonTags} from './utils';
|
|
12
|
-
import
|
|
13
|
-
import {HELM_WRAPPER, ALL_MONOMERS, CYCLIZATION_TYPE, TRANSFORMATION_TYPE} from './const';
|
|
9
|
+
import {HELM_WRAPPER, ALL_MONOMERS, CYCLIZATION_TYPE} from './const';
|
|
14
10
|
import {MetaData, ConnectionData} from './types';
|
|
11
|
+
import {getMolColumnFromHelm} from '../helm-to-molfile';
|
|
15
12
|
|
|
16
13
|
abstract class TransformationBase {
|
|
17
14
|
constructor(helmColumn: DG.Column<string>, meta: MetaData) {
|
|
@@ -44,15 +41,21 @@ class TransformationNCys extends TransformationBase {
|
|
|
44
41
|
}
|
|
45
42
|
|
|
46
43
|
protected hasTerminals(helm: string): boolean {
|
|
47
|
-
if (! helm.includes(this.rightTerminal + HELM_WRAPPER.RIGHT))
|
|
48
|
-
|
|
49
|
-
if (this.leftTerminal === ALL_MONOMERS)
|
|
50
|
-
|
|
51
|
-
return helm.includes(HELM_WRAPPER.LEFT + this.leftTerminal);
|
|
44
|
+
// if (! helm.includes(this.rightTerminal + HELM_WRAPPER.RIGHT))
|
|
45
|
+
// return false;
|
|
46
|
+
// if (this.leftTerminal === ALL_MONOMERS)
|
|
47
|
+
// return true;
|
|
48
|
+
// return helm.includes(HELM_WRAPPER.LEFT + this.leftTerminal);
|
|
49
|
+
const positions = this.getLinkedPositions(helm);
|
|
50
|
+
return positions.every((el) => el > 0);
|
|
52
51
|
}
|
|
53
52
|
|
|
54
53
|
protected getLinkedPositions(helm: string): [number, number] {
|
|
55
|
-
|
|
54
|
+
const seq = helm.replace(HELM_WRAPPER.LEFT, '').replace(HELM_WRAPPER.RIGHT, '');
|
|
55
|
+
const monomers = seq.split('.');
|
|
56
|
+
const start = 0;
|
|
57
|
+
const end = monomers.findIndex((el, idx) => el === this.rightTerminal && idx > start);
|
|
58
|
+
return [start + 1, end + 1];
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
protected getTransformedHelm(helm: string): string {
|
|
@@ -145,133 +148,28 @@ function getHelmCycle(helm: string, source: ConnectionData, target: ConnectionDa
|
|
|
145
148
|
);
|
|
146
149
|
}
|
|
147
150
|
|
|
148
|
-
async function addTransformedColumn(
|
|
149
|
-
molColumn: DG.Column<string>, meta: MetaData
|
|
151
|
+
export async function addTransformedColumn(
|
|
152
|
+
molColumn: DG.Column<string>, meta: MetaData, addHelm: boolean
|
|
150
153
|
): Promise<void> {
|
|
151
154
|
const df = molColumn.dataFrame;
|
|
152
155
|
const uh = UnitsHandler.getOrCreate(molColumn);
|
|
153
156
|
const sourceHelmCol = uh.convert(NOTATION.HELM);
|
|
154
157
|
const pt = PolymerTransformation.getInstance(sourceHelmCol, meta);
|
|
155
158
|
const targetList = pt.transform();
|
|
156
|
-
const
|
|
157
|
-
const targetHelmCol = DG.Column.fromList('string',
|
|
159
|
+
const helmColName = df.columns.getUnusedName(`${meta.transformationType}(` + molColumn.name + ')');
|
|
160
|
+
const targetHelmCol = DG.Column.fromList('string', helmColName, targetList);
|
|
158
161
|
|
|
159
162
|
addCommonTags(targetHelmCol);
|
|
160
163
|
targetHelmCol.setTag('units', NOTATION.HELM);
|
|
161
|
-
targetHelmCol.setTag('cell.renderer', 'helm');
|
|
162
164
|
|
|
163
|
-
df
|
|
164
|
-
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
export function getPolyToolDialog(): DG.Dialog {
|
|
168
|
-
function getMonomerList(cyclizationType: CYCLIZATION_TYPE): string[] {
|
|
169
|
-
if (cyclizationType === cyclizationTypes[0]) {
|
|
170
|
-
return [ALL_MONOMERS].concat(
|
|
171
|
-
monomerLib.getMonomerSymbolsByType(HELM_POLYMER_TYPE.PEPTIDE)
|
|
172
|
-
);
|
|
173
|
-
}
|
|
174
|
-
if (cyclizationType === cyclizationTypes[1]) {
|
|
175
|
-
return [ALL_MONOMERS].concat(
|
|
176
|
-
monomerLib.getMonomerSymbolsByRGroup(3, HELM_POLYMER_TYPE.PEPTIDE)
|
|
177
|
-
);
|
|
178
|
-
}
|
|
179
|
-
return ['C'];
|
|
180
|
-
}
|
|
165
|
+
const molCol = await getMolColumnFromHelm(df, targetHelmCol);
|
|
166
|
+
molCol.name = df.columns.getUnusedName(`${meta.transformationType}_molfile(` + molColumn.name + ')');
|
|
181
167
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
monomerList2 = getMonomerList(CYCLIZATION_TYPE.NCys);
|
|
186
|
-
} else {
|
|
187
|
-
monomerList1 = getMonomerList(cyclizationTypeChoice.value as CYCLIZATION_TYPE);
|
|
188
|
-
monomerList2 = [...monomerList1];
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
leftTerminalChoice = ui.choiceInput(
|
|
192
|
-
'R1:', monomerList1[0], monomerList1, () => { onRGroupValueChange.next(); }
|
|
193
|
-
);
|
|
194
|
-
rightTerminalChoice = ui.choiceInput('R2:', monomerList2[0], monomerList2, () => { onRGroupValueChange.next(); });
|
|
195
|
-
onRGroupValueChange.next();
|
|
196
|
-
ui.empty(terminalControls);
|
|
197
|
-
[leftTerminalChoice, rightTerminalChoice].forEach((el) => { terminalControls.appendChild(el.root); });
|
|
168
|
+
if (addHelm) {
|
|
169
|
+
targetHelmCol.setTag('cell.renderer', 'helm');
|
|
170
|
+
df.columns.add(targetHelmCol);
|
|
198
171
|
}
|
|
172
|
+
df.columns.add(molCol, true);
|
|
199
173
|
|
|
200
|
-
|
|
201
|
-
const onRGroupValueChange = new rxjs.Subject<string>();
|
|
202
|
-
onCyclizationChoice.subscribe(() => {
|
|
203
|
-
meta.cyclizationType = cyclizationTypeChoice.value!;
|
|
204
|
-
updateMonomerList();
|
|
205
|
-
});
|
|
206
|
-
onRGroupValueChange.subscribe(() => {
|
|
207
|
-
meta.rightTerminal = rightTerminalChoice.value!;
|
|
208
|
-
meta.leftTerminal = leftTerminalChoice.value!;
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
const meta = {} as MetaData;
|
|
213
|
-
const transformations = [TRANSFORMATION_TYPE.CYCLIZATION];
|
|
214
|
-
const transformationChoice = ui.choiceInput(
|
|
215
|
-
'Modification', transformations[0], transformations, () => meta.transformationType = transformationChoice.value!
|
|
216
|
-
);
|
|
217
|
-
|
|
218
|
-
const cyclizationTypes = [CYCLIZATION_TYPE.NO, CYCLIZATION_TYPE.R3, CYCLIZATION_TYPE.NCys];
|
|
219
|
-
const cyclizationTypeChoice = ui.choiceInput(
|
|
220
|
-
'Type', cyclizationTypes[0], cyclizationTypes, () => { onCyclizationChoice.next(); }
|
|
221
|
-
);
|
|
222
|
-
|
|
223
|
-
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
224
|
-
let monomerList1: string[] = [];
|
|
225
|
-
let monomerList2: string[] = [];
|
|
226
|
-
let leftTerminalChoice = ui.choiceInput(
|
|
227
|
-
'R1:', monomerList1[0], monomerList1, () => {
|
|
228
|
-
meta.leftTerminal = leftTerminalChoice.value!;
|
|
229
|
-
}
|
|
230
|
-
);
|
|
231
|
-
let rightTerminalChoice = ui.choiceInput('R2:', monomerList2[0], monomerList2, () => {
|
|
232
|
-
meta.rightTerminal = rightTerminalChoice.value!;
|
|
233
|
-
});
|
|
234
|
-
const terminalControls = ui.divV([leftTerminalChoice.root, rightTerminalChoice.root]);
|
|
235
|
-
|
|
236
|
-
function updateMeta() {
|
|
237
|
-
meta.cyclizationType = cyclizationTypeChoice.value!;
|
|
238
|
-
meta.leftTerminal = leftTerminalChoice.value!;
|
|
239
|
-
meta.rightTerminal = rightTerminalChoice.value!;
|
|
240
|
-
meta.transformationType = transformationChoice.value!;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
updateMonomerList();
|
|
244
|
-
|
|
245
|
-
updateMeta();
|
|
246
|
-
|
|
247
|
-
const targetColumns = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
248
|
-
if (!targetColumns)
|
|
249
|
-
throw new Error('No dataframe with maceomolecule columns open');
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
const targetColumnInput = ui.columnInput(
|
|
253
|
-
'Column', grok.shell.t, targetColumns[0], null,
|
|
254
|
-
{filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE}
|
|
255
|
-
);
|
|
256
|
-
|
|
257
|
-
const div = ui.div([
|
|
258
|
-
targetColumnInput,
|
|
259
|
-
transformationChoice,
|
|
260
|
-
cyclizationTypeChoice,
|
|
261
|
-
terminalControls,
|
|
262
|
-
]);
|
|
263
|
-
|
|
264
|
-
const dialog = ui.dialog('Poly Tool')
|
|
265
|
-
.add(div)
|
|
266
|
-
.onOK(async () => {
|
|
267
|
-
const molCol = targetColumnInput.value;
|
|
268
|
-
if (!molCol) {
|
|
269
|
-
grok.shell.warning('No marcomolecule column chosen!');
|
|
270
|
-
return;
|
|
271
|
-
}
|
|
272
|
-
addTransformedColumn(molCol!, meta);
|
|
273
|
-
}
|
|
274
|
-
);
|
|
275
|
-
|
|
276
|
-
return dialog;
|
|
174
|
+
await grok.data.detectSemanticTypes(df);
|
|
277
175
|
}
|