@datagrok/bio 2.11.23 → 2.11.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/dist/100.js +2 -0
- package/dist/100.js.map +1 -0
- package/dist/118.js +2 -0
- package/dist/118.js.map +1 -0
- package/dist/361.js +1 -1
- package/dist/361.js.map +1 -1
- package/dist/471.js +2 -0
- package/dist/471.js.map +1 -0
- package/dist/649.js +2 -0
- package/dist/649.js.map +1 -0
- package/dist/664.js +2 -0
- package/dist/664.js.map +1 -0
- package/dist/886.js +3 -0
- package/dist/886.js.map +1 -0
- package/dist/package-test.js +1 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -2
- package/dist/package.js.map +1 -1
- package/package.json +4 -4
- package/src/analysis/sequence-space.ts +2 -75
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +3 -2
- package/src/demo/bio05-helm-msa-sequence-space.ts +1 -1
- package/src/demo/utils.ts +6 -5
- package/src/package.ts +53 -67
- package/src/tests/activity-cliffs-tests.ts +7 -7
- package/src/tests/activity-cliffs-utils.ts +5 -4
- package/src/tests/msa-tests.ts +2 -2
- package/src/tests/pepsea-tests.ts +1 -1
- package/src/tests/sequence-space-test.ts +1 -1
- package/src/tests/sequence-space-utils.ts +1 -1
- package/src/tests/utils.ts +4 -2
- package/src/widgets/bio-substructure-filter.ts +51 -24
- package/dist/23.js +0 -2
- package/dist/23.js.map +0 -1
- package/dist/282.js +0 -2
- package/dist/282.js.map +0 -1
- package/dist/378.js +0 -2
- package/dist/378.js.map +0 -1
- package/dist/40.js +0 -2
- package/dist/40.js.map +0 -1
- package/dist/65.js +0 -2
- package/dist/65.js.map +0 -1
- package/dist/935.js +0 -3
- package/dist/935.js.map +0 -1
- package/dist/package-test.js.LICENSE.txt +0 -51
- package/dist/package.js.LICENSE.txt +0 -51
- /package/dist/{935.js.LICENSE.txt → 886.js.LICENSE.txt} +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.11.
|
|
8
|
+
"version": "2.11.26",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,9 +34,9 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.39.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.39.18",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.2.1",
|
|
39
|
-
"@datagrok-libraries/ml": "^6.
|
|
39
|
+
"@datagrok-libraries/ml": "^6.4.0",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.11",
|
|
41
41
|
"@datagrok-libraries/utils": "^4.1.36",
|
|
42
42
|
"@datagrok-libraries/math": "^1.0.7",
|
|
@@ -66,7 +66,7 @@
|
|
|
66
66
|
"webpack-bundle-analyzer": "latest",
|
|
67
67
|
"webpack-cli": "^4.9.1",
|
|
68
68
|
"@datagrok/chem": "^1.8.8",
|
|
69
|
-
"@datagrok/helm": "^2.1.
|
|
69
|
+
"@datagrok/helm": "^2.1.24",
|
|
70
70
|
"@datagrok/dendrogram": "^1.2.22"
|
|
71
71
|
},
|
|
72
72
|
"scripts": {
|
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import
|
|
3
|
-
import {BitArrayMetrics
|
|
4
|
-
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
5
|
-
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
4
|
import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
7
5
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
6
|
import {getMonomerSubstitutionMatrix} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
9
|
-
import * as grok from 'datagrok-api/grok';
|
|
10
7
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
11
8
|
|
|
12
9
|
export interface ISequenceSpaceResult {
|
|
@@ -14,47 +11,6 @@ export interface ISequenceSpaceResult {
|
|
|
14
11
|
coordinates: DG.ColumnList;
|
|
15
12
|
}
|
|
16
13
|
|
|
17
|
-
export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
18
|
-
// code deprecated since seqCol is encoded
|
|
19
|
-
/* let preparedData: any;
|
|
20
|
-
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
21
|
-
const sep = spaceParams.seqCol.getTag(UnitsHandler.TAGS.separator);
|
|
22
|
-
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
23
|
-
const regex = new RegExp(sepFinal, 'g');
|
|
24
|
-
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
25
|
-
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
26
|
-
else
|
|
27
|
-
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
28
|
-
} else {
|
|
29
|
-
preparedData = spaceParams.seqCol.toList();
|
|
30
|
-
} */
|
|
31
|
-
|
|
32
|
-
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
33
|
-
spaceParams.seqCol.toList(),
|
|
34
|
-
spaceParams.methodName,
|
|
35
|
-
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
36
|
-
spaceParams.options);
|
|
37
|
-
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
38
|
-
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
39
|
-
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
43
|
-
if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
44
|
-
//we expect only string columns here
|
|
45
|
-
await invalidateMols(spaceParams.seqCol as unknown as DG.Column<string>, false);
|
|
46
|
-
|
|
47
|
-
const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
|
|
48
|
-
col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
49
|
-
methodName: spaceParams.methodName,
|
|
50
|
-
similarityMetric: spaceParams.similarityMetric,
|
|
51
|
-
xAxis: spaceParams.embedAxesNames[0],
|
|
52
|
-
yAxis: spaceParams.embedAxesNames[1],
|
|
53
|
-
options: spaceParams.options,
|
|
54
|
-
});
|
|
55
|
-
return result;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
14
|
export async function getEncodedSeqSpaceCol(
|
|
59
15
|
seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames, fingerprintType: string = 'Morgan'
|
|
60
16
|
): Promise<{seqList:string[], options: {[_:string]: any}}> {
|
|
@@ -118,32 +74,3 @@ export async function getEncodedSeqSpaceCol(
|
|
|
118
74
|
}
|
|
119
75
|
return {seqList, options};
|
|
120
76
|
}
|
|
121
|
-
|
|
122
|
-
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
123
|
-
progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
|
|
124
|
-
): Promise<ISequenceSpaceResult> {
|
|
125
|
-
const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
|
|
126
|
-
if (ncUH.isHelm())
|
|
127
|
-
return await sequenceSpaceByFingerprints(spaceParams);
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
const {seqList, options} = await getEncodedSeqSpaceCol(spaceParams.seqCol, spaceParams.similarityMetric);
|
|
131
|
-
|
|
132
|
-
spaceParams.options = spaceParams.options ?? {};
|
|
133
|
-
spaceParams.options.distanceFnArgs = options;
|
|
134
|
-
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
135
|
-
seqList,
|
|
136
|
-
spaceParams.methodName,
|
|
137
|
-
spaceParams.similarityMetric,
|
|
138
|
-
spaceParams.options,
|
|
139
|
-
true, progressFunc);
|
|
140
|
-
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
141
|
-
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
142
|
-
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
146
|
-
const axes = ['Embed_X', 'Embed_Y'];
|
|
147
|
-
const colNameInd = df.columns.names().filter((it: string) => it.includes(axes[0])).length + 1;
|
|
148
|
-
return axes.map((it) => `${it}_${colNameInd}`);
|
|
149
|
-
}
|
|
@@ -10,9 +10,9 @@ import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree
|
|
|
10
10
|
import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
|
|
11
11
|
import {handleError} from './utils';
|
|
12
12
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
13
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
14
13
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
15
14
|
import {getClusterMatrixWorker} from '@datagrok-libraries/math';
|
|
15
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
16
16
|
|
|
17
17
|
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
18
18
|
|
|
@@ -51,9 +51,10 @@ export async function demoBio01bUI() {
|
|
|
51
51
|
delay: 2000,
|
|
52
52
|
})
|
|
53
53
|
.step('Find activity cliffs', async () => {
|
|
54
|
+
const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
54
55
|
activityCliffsViewer = (await activityCliffs(
|
|
55
56
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
56
|
-
80, dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN)) as DG.ScatterPlotViewer;
|
|
57
|
+
80, dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc, {})) as DG.ScatterPlotViewer;
|
|
57
58
|
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
58
59
|
|
|
59
60
|
// Show grid viewer with the cliffs
|
|
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
6
|
import {awaitStatus, DockerContainerStatus} from '@datagrok-libraries/bio/src/utils/docker';
|
|
7
7
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
8
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
8
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
10
9
|
|
|
11
10
|
import {Pepsea, pepseaMethods, runPepsea} from '../utils/pepsea';
|
|
@@ -13,6 +12,7 @@ import {sequenceSpaceTopMenu} from '../package';
|
|
|
13
12
|
import {handleError} from './utils';
|
|
14
13
|
|
|
15
14
|
import {_package} from '../package';
|
|
15
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
16
16
|
|
|
17
17
|
const helmFn: string = 'samples/HELM.csv';
|
|
18
18
|
|
package/src/demo/utils.ts
CHANGED
|
@@ -3,10 +3,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
5
|
import {_package, sequenceSpaceTopMenu} from '../package';
|
|
6
|
-
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
7
6
|
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
7
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
|
+
import {getNormalizedEmbeddings} from
|
|
9
|
+
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/embeddings-space';
|
|
10
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
10
11
|
|
|
11
12
|
enum EMBED_COL_NAMES {
|
|
12
13
|
X = 'Embed_X',
|
|
@@ -38,8 +39,8 @@ export async function demoSequenceSpace(
|
|
|
38
39
|
|
|
39
40
|
const t1: number = Date.now();
|
|
40
41
|
_package.logger.debug('Bio: demoBio01aUI(), calc reduceDimensionality start...');
|
|
41
|
-
const redDimRes = await
|
|
42
|
-
seqList, method, StringMetricsNames.Levenshtein, {});
|
|
42
|
+
const redDimRes = await getNormalizedEmbeddings( // TODO: Rename method typo
|
|
43
|
+
[seqList], method as any, [StringMetricsNames.Levenshtein], [1], 'MANHATTAN', {distanceFnArgs: [{}]});
|
|
43
44
|
const t2: number = Date.now();
|
|
44
45
|
_package.logger.debug('Bio: demoBio01aUI(), calc reduceDimensionality ' +
|
|
45
46
|
`ET: ${((t2 - t1) / 1000)} s`);
|
|
@@ -47,7 +48,7 @@ export async function demoSequenceSpace(
|
|
|
47
48
|
for (let embedI: number = 0; embedI < embedColNameList.length; embedI++) {
|
|
48
49
|
const embedColName: string = embedColNameList[embedI];
|
|
49
50
|
const embedCol: DG.Column = df.getCol(embedColName);
|
|
50
|
-
const embedColData: Float32Array = redDimRes
|
|
51
|
+
const embedColData: Float32Array = redDimRes[embedI];
|
|
51
52
|
// TODO: User DG.Column.setRawData()
|
|
52
53
|
// embedCol.setRawData(embedColData);
|
|
53
54
|
embedCol.init((rowI) => { return embedColData[rowI]; });
|
package/src/package.ts
CHANGED
|
@@ -3,23 +3,14 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
|
-
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
9
6
|
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
10
|
-
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
11
|
-
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
12
7
|
import {DimReductionBaseEditor, PreprocessFunctionReturnType}
|
|
13
8
|
from '@datagrok-libraries/ml/src/functionEditors/dimensionality-reduction-editor';
|
|
14
|
-
import {
|
|
15
|
-
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
16
|
-
import {
|
|
17
|
-
ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc, CLIFFS_COL_ENCODE_FN
|
|
18
|
-
} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
9
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
19
10
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
20
11
|
import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
21
12
|
import {
|
|
22
|
-
TAGS as bioTAGS,
|
|
13
|
+
TAGS as bioTAGS,
|
|
23
14
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
24
15
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
16
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
@@ -38,11 +29,9 @@ import {
|
|
|
38
29
|
} from './utils/cell-renderer';
|
|
39
30
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
40
31
|
import {SequenceAlignment} from './seq_align';
|
|
32
|
+
import {getEncodedSeqSpaceCol} from './analysis/sequence-space';
|
|
41
33
|
import {
|
|
42
|
-
|
|
43
|
-
} from './analysis/sequence-space';
|
|
44
|
-
import {
|
|
45
|
-
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
34
|
+
createLinesGrid, createPropPanelElement, createTooltipElement,
|
|
46
35
|
} from './analysis/sequence-activity-cliffs';
|
|
47
36
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
48
37
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
@@ -79,15 +68,17 @@ import {GetRegionApp} from './apps/get-region-app';
|
|
|
79
68
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
80
69
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
81
70
|
import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
|
|
82
|
-
|
|
83
|
-
import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
|
|
84
|
-
import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
|
|
85
|
-
from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
71
|
+
import {ActivityCliffsEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-function-editor';
|
|
86
72
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
73
|
+
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
74
|
+
import {getEmbeddingColsNames, multiColReduceDimensionality} from
|
|
75
|
+
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
|
|
76
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
77
|
+
import {ITSNEOptions, IUMAPOptions} from
|
|
78
|
+
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reducer';
|
|
87
79
|
|
|
88
80
|
export const _package = new BioPackage();
|
|
89
81
|
|
|
90
|
-
export const BYPASS_LARGE_DATA_WARNING = 'bypassLargeDataWarning';
|
|
91
82
|
// /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
92
83
|
// let monomerLib: MonomerLib | null = null;
|
|
93
84
|
|
|
@@ -256,16 +247,24 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
256
247
|
//tags: editor
|
|
257
248
|
//input: funccall call
|
|
258
249
|
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
259
|
-
const funcEditor = new
|
|
250
|
+
const funcEditor = new ActivityCliffsEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
260
251
|
ui.dialog({title: 'Activity Cliffs'})
|
|
261
|
-
.add(funcEditor.
|
|
252
|
+
.add(funcEditor.getEditor())
|
|
262
253
|
.onOK(async () => {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
254
|
+
const params = funcEditor.getParams();
|
|
255
|
+
return call.func.prepare({
|
|
256
|
+
table: params.table,
|
|
257
|
+
molecules: params.col,
|
|
258
|
+
activities: params.activities,
|
|
259
|
+
similarity: params.similarityThreshold,
|
|
260
|
+
methodName: params.methodName,
|
|
261
|
+
similarityMetric: params.similarityMetric,
|
|
262
|
+
preprocessingFunction: params.preprocessingFunction,
|
|
263
|
+
options: params.options,
|
|
264
|
+
}).call();
|
|
265
|
+
}).show();
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
-
|
|
269
268
|
// -- Package settings editor --
|
|
270
269
|
|
|
271
270
|
// //name: packageSettingsEditor
|
|
@@ -405,74 +404,57 @@ export async function getRegionTopMenu(
|
|
|
405
404
|
//input: double similarity = 80 [Similarity cutoff]
|
|
406
405
|
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
407
406
|
//input: string similarityMetric { choices:["Hamming", "Levenshtein", "Monomer chemical distance"] }
|
|
407
|
+
//input: func preprocessingFunction
|
|
408
408
|
//input: object options {optional: true}
|
|
409
409
|
//output: viewer result
|
|
410
410
|
//editor: Bio:SeqActivityCliffsEditor
|
|
411
|
-
export async function activityCliffs(
|
|
411
|
+
export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<string>, activities: DG.Column,
|
|
412
412
|
similarity: number, methodName: DimReductionMethods,
|
|
413
|
-
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
|
|
413
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics, preprocessingFunction: DG.Func,
|
|
414
414
|
options?: (IUMAPOptions | ITSNEOptions) & Options): Promise<DG.Viewer | undefined> {
|
|
415
|
-
if (!checkInputColumnUI(
|
|
415
|
+
if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
|
|
416
416
|
return;
|
|
417
|
-
const axesNames = getEmbeddingColsNames(
|
|
417
|
+
const axesNames = getEmbeddingColsNames(table);
|
|
418
418
|
const tags = {
|
|
419
|
-
'units':
|
|
420
|
-
'aligned':
|
|
421
|
-
'separator':
|
|
422
|
-
'alphabet':
|
|
419
|
+
'units': molecules.getTag(DG.TAGS.UNITS),
|
|
420
|
+
'aligned': molecules.getTag(bioTAGS.aligned),
|
|
421
|
+
'separator': molecules.getTag(bioTAGS.separator),
|
|
422
|
+
'alphabet': molecules.getTag(bioTAGS.alphabet),
|
|
423
423
|
};
|
|
424
|
-
let cliffsEncodeFunction: (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => any =
|
|
425
|
-
getEncodedSeqSpaceCol;
|
|
426
|
-
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
427
424
|
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
428
|
-
const seqCol =
|
|
429
|
-
|
|
430
|
-
let sequenceSpaceFunc: SequenceSpaceFunc = getSequenceSpace;
|
|
431
|
-
if (ncUH.isHelm()) {
|
|
432
|
-
sequenceSpaceFunc = sequenceSpaceByFingerprints;
|
|
433
|
-
cliffsEncodeFunction = async (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => {
|
|
434
|
-
await invalidateMols(seqCol, false);
|
|
435
|
-
const molecularCol = seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS];
|
|
436
|
-
const fingerPrints: DG.Column =
|
|
437
|
-
await grok.functions.call('Chem:getMorganFingerprints', {molColumn: molecularCol});
|
|
438
|
-
const fingerPrintsBitArray = fingerPrints.toList().map((f: DG.BitSet) =>
|
|
439
|
-
BitArray.fromUint32Array(f.length, new Uint32Array(f.getBuffer().buffer)));
|
|
440
|
-
return {seqList: fingerPrintsBitArray, options: {}};
|
|
441
|
-
};
|
|
442
|
-
}
|
|
425
|
+
const seqCol = molecules;
|
|
443
426
|
|
|
444
427
|
const runCliffs = async () => {
|
|
445
428
|
const sp = await getActivityCliffs(
|
|
446
|
-
|
|
429
|
+
table,
|
|
447
430
|
seqCol,
|
|
448
|
-
null,
|
|
449
431
|
axesNames,
|
|
450
432
|
'Activity cliffs', //scatterTitle
|
|
451
433
|
activities,
|
|
452
434
|
similarity,
|
|
453
435
|
columnDistanceMetric, //similarityMetric
|
|
454
436
|
methodName,
|
|
437
|
+
{...(options ?? {})},
|
|
455
438
|
DG.SEMTYPE.MACROMOLECULE,
|
|
456
439
|
tags,
|
|
457
|
-
|
|
458
|
-
getChemSimilaritiesMatrix,
|
|
440
|
+
preprocessingFunction,
|
|
459
441
|
createTooltipElement,
|
|
460
442
|
createPropPanelElement,
|
|
461
443
|
createLinesGrid,
|
|
462
|
-
|
|
444
|
+
);
|
|
463
445
|
return sp;
|
|
464
446
|
};
|
|
465
447
|
|
|
466
448
|
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
|
|
467
449
|
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
|
|
468
|
-
if (
|
|
450
|
+
if (table.rowCount > allowedRowCount) {
|
|
469
451
|
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
470
452
|
return;
|
|
471
453
|
}
|
|
472
454
|
|
|
473
455
|
const pi = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
474
456
|
return new Promise<DG.Viewer | undefined>((resolve, reject) => {
|
|
475
|
-
if (
|
|
457
|
+
if (table.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
476
458
|
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
477
459
|
Do you want to continue?`))
|
|
478
460
|
.onOK(async () => {
|
|
@@ -495,7 +477,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
495
477
|
//meta.supportedSemTypes: Macromolecule
|
|
496
478
|
//meta.supportedTypes: string
|
|
497
479
|
//meta.supportedUnits: fasta,separator,helm
|
|
498
|
-
//meta.supportedDistanceFunctions: Hamming,
|
|
480
|
+
//meta.supportedDistanceFunctions: Levenshtein,Hamming,Monomer chemical distance,Needlemann-Wunsch
|
|
499
481
|
//input: column col {semType: Macromolecule}
|
|
500
482
|
//input: string metric
|
|
501
483
|
//input: double gapOpen = 1 {caption: Gap open penalty; default: 1; optional: true}
|
|
@@ -506,6 +488,8 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
506
488
|
export async function macromoleculePreprocessingFunction(
|
|
507
489
|
col: DG.Column, metric: MmDistanceFunctionsNames, gapOpen: number = 1, gapExtend: number = 0.6,
|
|
508
490
|
fingerprintType = 'Morgan'): Promise<PreprocessFunctionReturnType> {
|
|
491
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
492
|
+
return {entries: col.toList(), options: {}};
|
|
509
493
|
const {seqList, options} = await getEncodedSeqSpaceCol(col, metric, fingerprintType);
|
|
510
494
|
return {entries: seqList, options: {...options, gapOpen, gapExtend}};
|
|
511
495
|
}
|
|
@@ -558,13 +542,15 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
558
542
|
return;
|
|
559
543
|
if (!preprocessingFunction)
|
|
560
544
|
preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
561
|
-
|
|
562
|
-
const res = await
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
545
|
+
options ??= {};
|
|
546
|
+
const res = await multiColReduceDimensionality(table, [molecules], methodName,
|
|
547
|
+
[similarityMetric as KnownMetrics], [1], [preprocessingFunction], 'MANHATTAN',
|
|
548
|
+
plotEmbeddings, clusterEmbeddings ?? false,
|
|
549
|
+
{...options, preprocessingFuncArgs: [options.preprocessingFuncArgs ?? {}]}, {
|
|
550
|
+
fastRowCount: 10000,
|
|
551
|
+
scatterPlotName: 'Sequence space',
|
|
552
|
+
bypassLargeDataWarning: options?.[BYPASS_LARGE_DATA_WARNING],
|
|
553
|
+
});
|
|
568
554
|
return res;
|
|
569
555
|
}
|
|
570
556
|
|
|
@@ -5,7 +5,6 @@ import {after, before, category, test} from '@datagrok-libraries/utils/src/test'
|
|
|
5
5
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
8
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
8
|
|
|
10
9
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
11
10
|
import {BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
@@ -15,6 +14,7 @@ import {
|
|
|
15
14
|
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
16
15
|
|
|
17
16
|
import {_package} from '../package-test';
|
|
17
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
category('activityCliffs', async () => {
|
|
@@ -24,8 +24,8 @@ category('activityCliffs', async () => {
|
|
|
24
24
|
let monomerLibHelper: IMonomerLibHelper;
|
|
25
25
|
/** Backup actual user's monomer libraries settings */
|
|
26
26
|
let userLibSettings: LibSettings;
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
28
|
+
const helmEncodingFunc = DG.Func.find({name: 'helmPreprocessingFunction', package: 'Bio'})[0];
|
|
29
29
|
before(async () => {
|
|
30
30
|
monomerLibHelper = await getMonomerLibHelper();
|
|
31
31
|
userLibSettings = await getUserLibSettings();
|
|
@@ -57,7 +57,7 @@ category('activityCliffs', async () => {
|
|
|
57
57
|
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
58
58
|
|
|
59
59
|
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
60
|
-
'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
60
|
+
'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
|
|
61
61
|
});
|
|
62
62
|
|
|
63
63
|
test('activityCliffsWithEmptyRows', async () => {
|
|
@@ -67,14 +67,14 @@ category('activityCliffs', async () => {
|
|
|
67
67
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
68
68
|
|
|
69
69
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
|
|
70
|
-
'sequence', 'Activity', 90, 3, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
70
|
+
'sequence', 'Activity', 90, 3, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
|
|
71
71
|
});
|
|
72
72
|
|
|
73
73
|
test('Helm', async () => {
|
|
74
74
|
const df = await _package.files.readCsv('data/sample_HELM_50.csv');
|
|
75
|
-
const
|
|
75
|
+
const _view = grok.shell.addTableView(df);
|
|
76
76
|
|
|
77
77
|
await _testActivityCliffsOpen(df, DimReductionMethods.UMAP,
|
|
78
|
-
'HELM', 'Activity', 65,
|
|
78
|
+
'HELM', 'Activity', 65, 20, BitArrayMetricsNames.Tanimoto, helmEncodingFunc);
|
|
79
79
|
});
|
|
80
80
|
});
|
|
@@ -2,19 +2,20 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
4
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {activityCliffs
|
|
6
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
5
|
+
import {activityCliffs} from '../package';
|
|
7
6
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
7
|
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
|
+
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
9
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
9
10
|
|
|
10
11
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimReductionMethods,
|
|
11
12
|
seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number,
|
|
12
|
-
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics
|
|
13
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics, preprocessingFunction: DG.Func,
|
|
13
14
|
): Promise<void> {
|
|
14
15
|
await grok.data.detectSemanticTypes(df);
|
|
15
16
|
const scatterPlot = await activityCliffs(
|
|
16
17
|
df, df.getCol(seqColName), df.getCol(activityColName),
|
|
17
|
-
similarityThr, drMethod, similarityMetric, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
18
|
+
similarityThr, drMethod, similarityMetric, preprocessingFunction, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
18
19
|
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
19
20
|
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
20
21
|
// similarity: 50, methodName: method
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -81,12 +81,12 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
81
81
|
test('isCorrectHelm', async () => {
|
|
82
82
|
await awaitContainerStart();
|
|
83
83
|
await _testMSAOnColumn(helmFromCsv, helmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
84
|
-
}
|
|
84
|
+
});
|
|
85
85
|
|
|
86
86
|
test('isCorrectHelmLong', async () => {
|
|
87
87
|
await awaitContainerStart();
|
|
88
88
|
await _testMSAOnColumn(longHelmFromCsv, longHelmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
89
|
-
}
|
|
89
|
+
});
|
|
90
90
|
|
|
91
91
|
test('isCorrectSeparator', async () => {
|
|
92
92
|
await _testMSAOnColumn(
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
|
|
8
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/
|
|
8
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
9
9
|
|
|
10
10
|
category('sequenceSpace', async () => {
|
|
11
11
|
let testFastaDf: DG.DataFrame;
|
|
@@ -3,8 +3,8 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {sequenceSpaceTopMenu} from '../package';
|
|
5
5
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
6
|
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
7
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
8
8
|
|
|
9
9
|
export async function _testSequenceSpaceReturnsResult(
|
|
10
10
|
df: DG.DataFrame, algorithm: DimReductionMethods, colName: string,
|
package/src/tests/utils.ts
CHANGED
|
@@ -35,9 +35,11 @@ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
|
|
|
35
35
|
/** Waits if container is not started
|
|
36
36
|
* @param {number} ms - time to wait in milliseconds */
|
|
37
37
|
export async function awaitContainerStart(ms: number = 10000): Promise<void> {
|
|
38
|
-
const
|
|
39
|
-
if (
|
|
38
|
+
const container = await grok.dapi.docker.dockerContainers.filter('bio').first();
|
|
39
|
+
if (container.status !== 'started' && container.status !== 'checking')
|
|
40
40
|
await delay(ms);
|
|
41
|
+
// TODO: Enable with new JS API version
|
|
42
|
+
// await grok.dapi.docker.dockerContainers.run(container.id, true);
|
|
41
43
|
}
|
|
42
44
|
|
|
43
45
|
export async function awaitGrid(grid: DG.Grid, timeout: number = 5000): Promise<void> {
|