@datagrok/bio 2.11.0 → 2.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/src/package.ts +56 -44
- package/src/tests/activity-cliffs-tests.ts +14 -3
- package/src/tests/activity-cliffs-utils.ts +6 -5
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.11.
|
|
8
|
+
"version": "2.11.2",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
37
|
"@datagrok-libraries/bio": "^5.39.0",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
39
|
+
"@datagrok-libraries/ml": "^6.3.51",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
41
41
|
"@datagrok-libraries/utils": "^4.0.17",
|
|
42
42
|
"cash-dom": "^8.0.0",
|
package/src/package.ts
CHANGED
|
@@ -3,36 +3,52 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
|
|
6
|
+
|
|
7
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
|
+
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
9
|
+
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
10
|
+
import {RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
11
|
+
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
12
|
+
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
13
|
+
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
14
|
+
import {
|
|
15
|
+
ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc
|
|
16
|
+
} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
17
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
18
|
+
import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
19
|
+
import {
|
|
20
|
+
TAGS as bioTAGS, ALPHABET, NOTATION,
|
|
21
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
22
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
23
|
+
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
24
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
25
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
|
+
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
27
|
+
import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
|
|
28
|
+
import {
|
|
29
|
+
createJsonMonomerLibFromSdf, IMonomerLibHelper
|
|
30
|
+
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
31
|
+
|
|
32
|
+
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
6
33
|
import {
|
|
7
34
|
MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,
|
|
8
35
|
} from './utils/cell-renderer';
|
|
9
36
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
10
37
|
import {SequenceAlignment} from './seq_align';
|
|
11
|
-
import {
|
|
12
|
-
|
|
38
|
+
import {
|
|
39
|
+
ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace, sequenceSpaceByFingerprints
|
|
40
|
+
} from './analysis/sequence-space';
|
|
13
41
|
import {
|
|
14
42
|
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
15
43
|
} from './analysis/sequence-activity-cliffs';
|
|
16
|
-
import {convert} from './utils/convert';
|
|
17
|
-
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
18
|
-
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
19
|
-
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
20
|
-
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
21
|
-
|
|
22
44
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
23
45
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
24
46
|
import {SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
47
|
+
import {convert} from './utils/convert';
|
|
48
|
+
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
25
49
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
26
50
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
27
|
-
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
28
|
-
import {
|
|
29
|
-
TAGS as bioTAGS, ALPHABET, NOTATION,
|
|
30
|
-
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
31
|
-
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
32
|
-
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
33
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
34
51
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
35
|
-
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
36
52
|
import {
|
|
37
53
|
MonomerLibHelper,
|
|
38
54
|
getUserLibSettings,
|
|
@@ -40,12 +56,6 @@ import {
|
|
|
40
56
|
getLibFileNameList,
|
|
41
57
|
getLibraryPanelUI
|
|
42
58
|
} from './utils/monomer-lib';
|
|
43
|
-
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
44
|
-
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
45
|
-
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
46
|
-
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
47
|
-
import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
|
|
48
|
-
|
|
49
59
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
50
60
|
import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
51
61
|
import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
@@ -53,14 +63,11 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
|
53
63
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
54
64
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
55
65
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
|
-
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
57
|
-
import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
58
66
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
59
67
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
60
68
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
61
69
|
import {MonomerCellRenderer} from './utils/monomer-cell-renderer';
|
|
62
70
|
import {BioPackage, BioPackageProperties} from './package-types';
|
|
63
|
-
import {RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
64
71
|
import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
|
|
65
72
|
import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
|
|
66
73
|
import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
|
|
@@ -71,11 +78,12 @@ import {getRegionDo} from './utils/get-region';
|
|
|
71
78
|
import {GetRegionApp} from './apps/get-region-app';
|
|
72
79
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
73
80
|
import {HelmToMolfileConverter} from './utils/helm-to-molfile';
|
|
74
|
-
import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
|
|
75
|
-
from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
76
|
-
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
77
81
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
82
|
+
import {errInfo} from './utils/err-info';
|
|
83
|
+
|
|
78
84
|
import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
|
|
85
|
+
import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
|
|
86
|
+
from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
79
87
|
|
|
80
88
|
export const _package = new BioPackage();
|
|
81
89
|
|
|
@@ -404,6 +412,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
404
412
|
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
405
413
|
let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
|
|
406
414
|
let seqCol = macroMolecule;
|
|
415
|
+
let sequenceSpaceFunc: SequenceSpaceFunc = sequenceSpaceByFingerprints;
|
|
407
416
|
if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
|
|
408
417
|
if (ncUH.isFasta()) {
|
|
409
418
|
columnDistanceMetric = ncUH.getDistanceFunctionName();
|
|
@@ -413,6 +422,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
413
422
|
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
414
423
|
tags.units = NOTATION.FASTA;
|
|
415
424
|
}
|
|
425
|
+
sequenceSpaceFunc = getSequenceSpace;
|
|
416
426
|
}
|
|
417
427
|
const runCliffs = async () => {
|
|
418
428
|
const sp = await getActivityCliffs(
|
|
@@ -427,7 +437,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
427
437
|
methodName,
|
|
428
438
|
DG.SEMTYPE.MACROMOLECULE,
|
|
429
439
|
tags,
|
|
430
|
-
|
|
440
|
+
sequenceSpaceFunc,
|
|
431
441
|
getChemSimilaritiesMatrix,
|
|
432
442
|
createTooltipElement,
|
|
433
443
|
createPropPanelElement,
|
|
@@ -443,20 +453,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
443
453
|
return;
|
|
444
454
|
}
|
|
445
455
|
|
|
446
|
-
|
|
447
|
-
|
|
456
|
+
return new Promise<DG.Viewer>((resolve, reject) => {
|
|
457
|
+
if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
458
|
+
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
448
459
|
Do you want to continue?`))
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
.
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
+
.onOK(async () => {
|
|
461
|
+
const progressBar = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
462
|
+
runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => { progressBar.close();});
|
|
463
|
+
})
|
|
464
|
+
.show();
|
|
465
|
+
} else {
|
|
466
|
+
runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
|
|
467
|
+
}
|
|
468
|
+
}).catch((err: any) => {
|
|
469
|
+
const [errMsg, errStack] = errInfo(err);
|
|
470
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
471
|
+
throw err;
|
|
472
|
+
});
|
|
460
473
|
}
|
|
461
474
|
|
|
462
475
|
//top-menu: Bio | Analyze | Sequence Space...
|
|
@@ -585,7 +598,7 @@ export async function sequenceSpaceTopMenu(
|
|
|
585
598
|
table.columns.add(embedCol);
|
|
586
599
|
}
|
|
587
600
|
embedCol.init((i) => listValues[i]);
|
|
588
|
-
|
|
601
|
+
//table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
589
602
|
}
|
|
590
603
|
if (plotEmbeddings) {
|
|
591
604
|
if (!scatterPlot) {
|
|
@@ -1084,7 +1097,6 @@ export async function enumeratorColumnChoice(df: DG.DataFrame, macroMolecule: DG
|
|
|
1084
1097
|
await grok.data.detectSemanticTypes(df);
|
|
1085
1098
|
}
|
|
1086
1099
|
|
|
1087
|
-
//top-menu: Bio | Convert | SDF to JSON Library...
|
|
1088
1100
|
//name: SDF to JSON Library
|
|
1089
1101
|
//input: dataframe table
|
|
1090
1102
|
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
4
3
|
|
|
5
4
|
import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
|
|
@@ -8,6 +7,8 @@ import {readDataframe} from './utils';
|
|
|
8
7
|
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
9
8
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
10
9
|
|
|
10
|
+
import {_package} from '../package-test';
|
|
11
|
+
|
|
11
12
|
|
|
12
13
|
category('activityCliffs', async () => {
|
|
13
14
|
let actCliffsTableView: DG.TableView;
|
|
@@ -37,7 +38,8 @@ category('activityCliffs', async () => {
|
|
|
37
38
|
viewList.push(actCliffsTableView);
|
|
38
39
|
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
39
40
|
|
|
40
|
-
await _testActivityCliffsOpen(actCliffsDf,
|
|
41
|
+
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
42
|
+
'sequence', 'Activity', 90, cliffsNum);
|
|
41
43
|
});
|
|
42
44
|
|
|
43
45
|
test('activityCliffsWithEmptyRows', async () => {
|
|
@@ -46,6 +48,15 @@ category('activityCliffs', async () => {
|
|
|
46
48
|
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
47
49
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
48
50
|
|
|
49
|
-
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows,
|
|
51
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
|
|
52
|
+
'sequence', 'Activity', 90, 3);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('Helm', async () => {
|
|
56
|
+
const df = await _package.files.readCsv('samples/sample_HELM.csv');
|
|
57
|
+
const view = grok.shell.addTableView(df);
|
|
58
|
+
|
|
59
|
+
await _testActivityCliffsOpen(df, DimReductionMethods.UMAP,
|
|
60
|
+
'HELM', 'Activity', 90, 53);
|
|
50
61
|
});
|
|
51
62
|
});
|
|
@@ -5,12 +5,13 @@ import {expect} from '@datagrok-libraries/utils/src/test';
|
|
|
5
5
|
import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
|
|
6
6
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
7
|
|
|
8
|
-
export async function _testActivityCliffsOpen(df: DG.DataFrame,
|
|
9
|
-
|
|
8
|
+
export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimReductionMethods,
|
|
9
|
+
seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number
|
|
10
|
+
): Promise<void> {
|
|
10
11
|
await grok.data.detectSemanticTypes(df);
|
|
11
12
|
const scatterPlot = await activityCliffs(
|
|
12
|
-
df, df.getCol(
|
|
13
|
-
|
|
13
|
+
df, df.getCol(seqColName), df.getCol(activityColName),
|
|
14
|
+
similarityThr, drMethod, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
14
15
|
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
15
16
|
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
16
17
|
// similarity: 50, methodName: method
|
|
@@ -27,5 +28,5 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: nu
|
|
|
27
28
|
const classList: string[] = el.className.split(' ');
|
|
28
29
|
return ['ui-btn', 'ui-btn-ok'].every((reqClassName) => classList.includes(reqClassName));
|
|
29
30
|
});
|
|
30
|
-
expect((cliffsLink as HTMLElement).innerText.toLowerCase(), `${
|
|
31
|
+
expect((cliffsLink as HTMLElement).innerText.toLowerCase(), `${tgtNumberCliffs} cliffs`);
|
|
31
32
|
}
|