@datagrok/bio 2.4.30 → 2.4.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -8
- package/README.md +22 -7
- package/detectors.js +21 -12
- package/dist/1.js +2 -0
- package/dist/1.js.map +1 -0
- package/dist/18.js +2 -0
- package/dist/18.js.map +1 -0
- package/dist/190.js +2 -0
- package/dist/190.js.map +1 -0
- package/dist/452.js +2 -0
- package/dist/452.js.map +1 -0
- package/dist/729.js +2 -0
- package/dist/729.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/libraries/broken-lib.sdf +136 -0
- package/files/libraries/group1/mock-lib-3.json +74 -0
- package/files/libraries/mock-lib-2.json +48 -0
- package/files/tests/100_3_clustests.csv +100 -0
- package/files/tests/100_3_clustests_empty_vals.csv +100 -0
- package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
- package/package.json +4 -4
- package/scripts/sequence_generator.py +185 -48
- package/src/analysis/sequence-activity-cliffs.ts +9 -11
- package/src/analysis/sequence-diversity-viewer.ts +8 -3
- package/src/analysis/sequence-search-base-viewer.ts +4 -3
- package/src/analysis/sequence-similarity-viewer.ts +13 -7
- package/src/analysis/sequence-space.ts +15 -12
- package/src/analysis/workers/mm-distance-array-service.ts +48 -0
- package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
- package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
- package/src/apps/web-logo-app.ts +34 -0
- package/src/calculations/monomerLevelMols.ts +10 -12
- package/src/demo/bio01-similarity-diversity.ts +4 -5
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +8 -8
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +8 -5
- package/src/demo/utils.ts +4 -3
- package/src/package-test.ts +1 -2
- package/src/package.ts +138 -83
- package/src/seq_align.ts +482 -483
- package/src/substructure-search/substructure-search.ts +3 -3
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +12 -35
- package/src/tests/_first-tests.ts +1 -1
- package/src/tests/activity-cliffs-tests.ts +10 -6
- package/src/tests/activity-cliffs-utils.ts +6 -4
- package/src/tests/bio-tests.ts +20 -25
- package/src/tests/checkInputColumn-tests.ts +5 -11
- package/src/tests/converters-test.ts +19 -37
- package/src/tests/detectors-benchmark-tests.ts +35 -37
- package/src/tests/detectors-tests.ts +29 -34
- package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
- package/src/tests/fasta-export-tests.ts +3 -3
- package/src/tests/fasta-handler-test.ts +2 -3
- package/src/tests/lib-tests.ts +2 -4
- package/src/tests/mm-distance-tests.ts +25 -17
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +12 -9
- package/src/tests/pepsea-tests.ts +6 -3
- package/src/tests/renderers-test.ts +13 -11
- package/src/tests/sequence-space-test.ts +10 -7
- package/src/tests/sequence-space-utils.ts +7 -3
- package/src/tests/similarity-diversity-tests.ts +47 -61
- package/src/tests/splitters-test.ts +14 -20
- package/src/tests/to-atomic-level-tests.ts +9 -17
- package/src/tests/units-handler-splitted-tests.ts +106 -0
- package/src/tests/units-handler-tests.ts +22 -26
- package/src/tests/utils/sequences-generators.ts +6 -2
- package/src/tests/utils.ts +10 -4
- package/src/tests/viewers.ts +1 -1
- package/src/utils/atomic-works.ts +49 -57
- package/src/utils/cell-renderer.ts +25 -8
- package/src/utils/check-input-column.ts +19 -4
- package/src/utils/constants.ts +3 -3
- package/src/utils/convert.ts +56 -23
- package/src/utils/monomer-lib.ts +83 -64
- package/src/utils/multiple-sequence-alignment-ui.ts +24 -21
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +17 -7
- package/src/utils/save-as-fasta.ts +11 -4
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +21 -22
- package/src/viewers/web-logo-viewer.ts +189 -154
- package/src/widgets/bio-substructure-filter.ts +9 -6
- package/src/widgets/representations.ts +11 -12
- package/tsconfig.json +1 -1
- package/dist/258.js +0 -2
- package/dist/258.js.map +0 -1
- package/dist/562.js +0 -2
- package/dist/562.js.map +0 -1
- package/dist/705.js +0 -2
- package/dist/705.js.map +0 -1
- package/dist/925.js +0 -2
- package/dist/925.js.map +0 -1
- package/src/analysis/workers/mm-distance-worker.ts +0 -16
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
3
2
|
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
4
3
|
import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
5
4
|
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
-
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
5
|
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
6
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
9
7
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
8
|
import * as grok from 'datagrok-api/grok';
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
9
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
10
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
14
12
|
|
|
15
13
|
export interface ISequenceSpaceResult {
|
|
16
|
-
distance
|
|
14
|
+
distance?: Float32Array;
|
|
17
15
|
coordinates: DG.ColumnList;
|
|
18
16
|
}
|
|
19
17
|
|
|
@@ -44,7 +42,8 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
44
42
|
|
|
45
43
|
export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
46
44
|
if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
47
|
-
|
|
45
|
+
//we expect only string columns here
|
|
46
|
+
await invalidateMols(spaceParams.seqCol as unknown as DG.Column<string>, false);
|
|
48
47
|
|
|
49
48
|
const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
|
|
50
49
|
col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
@@ -52,7 +51,7 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
52
51
|
similarityMetric: spaceParams.similarityMetric,
|
|
53
52
|
xAxis: spaceParams.embedAxesNames[0],
|
|
54
53
|
yAxis: spaceParams.embedAxesNames[1],
|
|
55
|
-
options: spaceParams.options
|
|
54
|
+
options: spaceParams.options,
|
|
56
55
|
});
|
|
57
56
|
return result;
|
|
58
57
|
}
|
|
@@ -65,17 +64,21 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promi
|
|
|
65
64
|
if (nc.isSeparator()) {
|
|
66
65
|
const fastaCol = nc.convert(NOTATION.FASTA);
|
|
67
66
|
seqList = fastaCol.toList();
|
|
68
|
-
const uh =
|
|
67
|
+
const uh = UnitsHandler.getOrCreate(fastaCol);
|
|
69
68
|
distanceFName = uh.getDistanceFunctionName();
|
|
70
|
-
}
|
|
71
|
-
else {
|
|
69
|
+
} else {
|
|
72
70
|
distanceFName = nc.getDistanceFunctionName();
|
|
73
71
|
}
|
|
72
|
+
for (let i = 0; i < seqList.length; i++) {
|
|
73
|
+
// toList puts empty values in array and it causes downstream errors. replace with null
|
|
74
|
+
seqList[i] = spaceParams.seqCol.isNone(i) ? null : seqList[i];
|
|
75
|
+
}
|
|
74
76
|
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
75
77
|
seqList,
|
|
76
78
|
spaceParams.methodName,
|
|
77
79
|
distanceFName,
|
|
78
|
-
spaceParams.options
|
|
80
|
+
spaceParams.options,
|
|
81
|
+
true);
|
|
79
82
|
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
80
83
|
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
81
84
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
5
|
+
|
|
6
|
+
export async function calculateMMDistancesArray(
|
|
7
|
+
macromoleculeCol: DG.Column, templateIdx: number
|
|
8
|
+
): Promise<Float32Array> {
|
|
9
|
+
const values = macromoleculeCol.toList();
|
|
10
|
+
if (macromoleculeCol.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
11
|
+
throw new Error('Column has to be of macromolecule type');
|
|
12
|
+
const uh = UnitsHandler.getOrCreate(macromoleculeCol);
|
|
13
|
+
const fnName = uh.getDistanceFunctionName();
|
|
14
|
+
const threadCount = Math.min(Math.max(navigator.hardwareConcurrency - 2, 1), values.length);
|
|
15
|
+
const workers = new Array(threadCount).fill(null).map((_i) =>
|
|
16
|
+
new Worker(new URL('mm-distance-array-worker', import.meta.url)));
|
|
17
|
+
const res = new Float32Array(values.length);
|
|
18
|
+
let lmin = 0;
|
|
19
|
+
let lmax = Number.MIN_VALUE;
|
|
20
|
+
const promises = workers.map((worker, i) => {
|
|
21
|
+
const start = Math.floor(i * values.length / threadCount);
|
|
22
|
+
const end = i === workers.length - 1 ? Math.floor((i + 1) * values.length / threadCount) : values.length;
|
|
23
|
+
return new Promise<void>((resolve, reject) => {
|
|
24
|
+
worker.onmessage = ({data: {error, distanceArrayData, min, max}}) => {
|
|
25
|
+
if (error) {
|
|
26
|
+
reject(error);
|
|
27
|
+
} else {
|
|
28
|
+
lmin = Math.min(lmin, min);
|
|
29
|
+
lmax = Math.max(lmax, max);
|
|
30
|
+
res.set(distanceArrayData, start);
|
|
31
|
+
resolve();
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
worker.postMessage({fnName, values, templateIdx, start, end});
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
await Promise.all(promises);
|
|
40
|
+
res.forEach((value, index) => { res[index] = (value - lmin) / (lmax - lmin); });
|
|
41
|
+
workers.forEach((worker) => worker.terminate());
|
|
42
|
+
} catch (e) {
|
|
43
|
+
workers.forEach((worker) => worker.terminate());
|
|
44
|
+
throw e;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return res;
|
|
48
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import {isNil} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
2
|
+
import {mmDistanceFunctions} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
3
|
+
|
|
4
|
+
onmessage = (event) => {
|
|
5
|
+
const {fnName, values, templateIdx, start, end} = event.data;
|
|
6
|
+
const data: { error?: any, distanceArrayData?: Float32Array, min?: number, max?: number} = {};
|
|
7
|
+
try {
|
|
8
|
+
let lmin = 0;
|
|
9
|
+
let lmax = Number.MIN_VALUE;
|
|
10
|
+
const retVal = new Float32Array(end - start);
|
|
11
|
+
const distanceFn = mmDistanceFunctions[fnName as keyof typeof mmDistanceFunctions]();
|
|
12
|
+
|
|
13
|
+
for (let i = start; i < end; i++) {
|
|
14
|
+
const value = !isNil(values[i]) && !isNil(values[templateIdx]) ?
|
|
15
|
+
distanceFn(values[i], values[templateIdx]) : 1;
|
|
16
|
+
retVal[i - start] = value;
|
|
17
|
+
if (value < lmin)
|
|
18
|
+
lmin = value;
|
|
19
|
+
if (value > lmax)
|
|
20
|
+
lmax = value;
|
|
21
|
+
}
|
|
22
|
+
data.distanceArrayData = retVal;
|
|
23
|
+
data.min = lmin;
|
|
24
|
+
data.max = lmax;
|
|
25
|
+
} catch (e) {
|
|
26
|
+
data.error = e;
|
|
27
|
+
}
|
|
28
|
+
postMessage(data);
|
|
29
|
+
};
|
|
@@ -2,21 +2,18 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
5
|
+
import {DistanceMatrixService} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
5
6
|
|
|
6
7
|
export async function calcMmDistanceMatrix(column: DG.Column<any>): Promise<Float32Array> {
|
|
7
8
|
const values = column.toList();
|
|
8
|
-
const worker = new Worker(new URL('./mm-distance-worker.ts', import.meta.url));
|
|
9
9
|
if (column.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
10
10
|
throw new Error('Column has to be of macromolecule type');
|
|
11
|
-
const uh =
|
|
11
|
+
const uh = UnitsHandler.getOrCreate(column);
|
|
12
12
|
const fnName = uh.getDistanceFunctionName();
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
error ? reject(error) : resolve(distanceMatrixData);
|
|
18
|
-
};
|
|
19
|
-
});
|
|
13
|
+
const distanceMatrixService = new DistanceMatrixService(true, false);
|
|
14
|
+
const dm = await distanceMatrixService.calc(values, fnName);
|
|
15
|
+
distanceMatrixService.terminate();
|
|
16
|
+
return dm;
|
|
20
17
|
}
|
|
21
18
|
|
|
22
19
|
// gets index of compressed distance matrix from 2d coordinates
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
|
+
import {_package} from '../package';
|
|
7
|
+
|
|
8
|
+
export class WebLogoApp {
|
|
9
|
+
private _funcName: string = '';
|
|
10
|
+
|
|
11
|
+
df: DG.DataFrame;
|
|
12
|
+
view: DG.TableView;
|
|
13
|
+
|
|
14
|
+
constructor() {}
|
|
15
|
+
|
|
16
|
+
async init(df: DG.DataFrame, funcName: string): Promise<void> {
|
|
17
|
+
this._funcName = funcName;
|
|
18
|
+
this.df = df;
|
|
19
|
+
|
|
20
|
+
await this.buildView();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// -- View --
|
|
24
|
+
|
|
25
|
+
async buildView(): Promise<void> {
|
|
26
|
+
this.view = grok.shell.addTableView(this.df);
|
|
27
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}`;
|
|
28
|
+
|
|
29
|
+
const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot.fromType('WebLogo', {
|
|
30
|
+
sequenceColumnName: 'sequence',
|
|
31
|
+
}));
|
|
32
|
+
this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -1,35 +1,33 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
|
|
5
|
-
import * as C from '../utils/constants';
|
|
6
4
|
import {getHelmMonomers} from '../package';
|
|
7
|
-
import {TAGS as bioTAGS, getSplitter,
|
|
5
|
+
import {TAGS as bioTAGS, getSplitter, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
7
|
|
|
9
8
|
const V2000_ATOM_NAME_POS = 31;
|
|
10
9
|
|
|
11
|
-
export async function getMonomericMols(
|
|
12
|
-
pattern: boolean = false, monomersDict?: Map<string, string>
|
|
13
|
-
|
|
14
|
-
const
|
|
15
|
-
const splitter = getSplitter(units, separator);
|
|
10
|
+
export async function getMonomericMols(
|
|
11
|
+
mcol: DG.Column<string>, pattern: boolean = false, monomersDict?: Map<string, string>
|
|
12
|
+
): Promise<DG.Column> {
|
|
13
|
+
const uh = UnitsHandler.getOrCreate(mcol);
|
|
16
14
|
let molV3000Array;
|
|
17
15
|
monomersDict ??= new Map();
|
|
18
|
-
const monomers = units ===
|
|
19
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
16
|
+
const monomers = uh.units === NOTATION.HELM ?
|
|
17
|
+
getHelmMonomers(mcol) : Object.keys(uh.stats.freq).filter((it) => it !== '');
|
|
20
18
|
|
|
21
19
|
for (let i = 0; i < monomers.length; i++) {
|
|
22
20
|
if (!monomersDict.has(monomers[i]))
|
|
23
21
|
monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
|
|
24
22
|
}
|
|
25
23
|
|
|
26
|
-
if (units ===
|
|
24
|
+
if (uh.units === NOTATION.HELM) {
|
|
27
25
|
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
|
28
26
|
molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
|
|
29
27
|
} else {
|
|
30
28
|
molV3000Array = new Array<string>(mcol.length);
|
|
31
29
|
for (let i = 0; i < mcol.length; i++) {
|
|
32
|
-
const sequenceMonomers =
|
|
30
|
+
const sequenceMonomers = uh.splitted[i].filter((it) => it !== '');
|
|
33
31
|
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
34
32
|
molV3000Array[i] = molV3000;
|
|
35
33
|
}
|
|
@@ -4,7 +4,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {_package} from '../package';
|
|
6
6
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
7
|
-
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
8
7
|
import {handleError} from './utils';
|
|
9
8
|
import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
|
|
10
9
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
@@ -36,7 +35,7 @@ export async function demoBio01UI() {
|
|
|
36
35
|
// TODO: Fix column width
|
|
37
36
|
}, {
|
|
38
37
|
description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
|
|
39
|
-
delay: 2000
|
|
38
|
+
delay: 2000,
|
|
40
39
|
})
|
|
41
40
|
.step('Find the most similar sequences to the current one', async () => {
|
|
42
41
|
const simViewer = await df.plot.fromType('Sequence Similarity Search', {
|
|
@@ -46,17 +45,17 @@ export async function demoBio01UI() {
|
|
|
46
45
|
view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
|
|
47
46
|
}, {
|
|
48
47
|
description: `Add 'Sequence Similarity Search' viewer.`,
|
|
49
|
-
delay: 2000
|
|
48
|
+
delay: 2000,
|
|
50
49
|
})
|
|
51
50
|
.step('Explore most diverse sequences in a dataset', async () => {
|
|
52
51
|
const divViewer = await df.plot.fromType('Sequence Diversity Search', {
|
|
53
52
|
moleculeColumnName: 'sequence',
|
|
54
|
-
diverseColumnLabel: 'Top diverse sequences of all data'
|
|
53
|
+
diverseColumnLabel: 'Top diverse sequences of all data',
|
|
55
54
|
}) as SequenceDiversityViewer;
|
|
56
55
|
view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
|
|
57
56
|
}, {
|
|
58
57
|
description: `Add 'Sequence Deversity Search' viewer.`,
|
|
59
|
-
delay: 2000
|
|
58
|
+
delay: 2000,
|
|
60
59
|
})
|
|
61
60
|
.step('Choose another sequence for similarity search', async () => {
|
|
62
61
|
df.currentRowIdx = 3;
|
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {_package} from '../package';
|
|
6
6
|
|
|
7
7
|
import * as lev from 'fastest-levenshtein';
|
|
8
|
-
import {DistanceMatrix} from '@datagrok-libraries/
|
|
8
|
+
import {DistanceMatrix} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
9
9
|
import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
|
|
10
10
|
import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
|
|
11
11
|
import {demoSequenceSpace, handleError} from './utils';
|
|
@@ -19,11 +19,9 @@ export async function demoBio01aUI() {
|
|
|
19
19
|
let dendrogramSvc: IDendrogramService;
|
|
20
20
|
let view: DG.TableView;
|
|
21
21
|
let df: DG.DataFrame;
|
|
22
|
-
let
|
|
22
|
+
let _spViewer: DG.ScatterPlotViewer;
|
|
23
23
|
|
|
24
24
|
const dimRedMethod: string = 'UMAP';
|
|
25
|
-
const idRows: { [id: number]: number } = {};
|
|
26
|
-
const embedCols: { [colName: string]: DG.Column<number> } = {};
|
|
27
25
|
|
|
28
26
|
try {
|
|
29
27
|
const demoScript = new DemoScript(
|
|
@@ -34,7 +32,7 @@ export async function demoBio01aUI() {
|
|
|
34
32
|
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
35
33
|
_package.files.readCsv(dataFn),
|
|
36
34
|
getTreeHelper(),
|
|
37
|
-
getDendrogramService()
|
|
35
|
+
getDendrogramService(),
|
|
38
36
|
]);
|
|
39
37
|
view = grok.shell.addTableView(df);
|
|
40
38
|
view.grid.props.rowHeight = 22;
|
|
@@ -49,10 +47,10 @@ export async function demoBio01aUI() {
|
|
|
49
47
|
delay: 2000,
|
|
50
48
|
})
|
|
51
49
|
.step('Build sequence space', async () => {
|
|
52
|
-
|
|
50
|
+
_spViewer = await demoSequenceSpace(view, df, seqColName, dimRedMethod);
|
|
53
51
|
}, {
|
|
54
52
|
description: `Reduce sequence space dimensionality to display on 2D representation.`,
|
|
55
|
-
delay: 2000
|
|
53
|
+
delay: 2000,
|
|
56
54
|
})
|
|
57
55
|
.step('Cluster sequences', async () => {
|
|
58
56
|
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
@@ -80,6 +78,7 @@ export async function demoBio01aUI() {
|
|
|
80
78
|
});
|
|
81
79
|
df.currentRowIdx = 27;
|
|
82
80
|
}, {
|
|
81
|
+
// eslint-disable-next-line max-len
|
|
83
82
|
description: 'Selecting a group of rows from a data frame to show their similarity and proximity to each other on a viewer..',
|
|
84
83
|
delay: 2000,
|
|
85
84
|
})
|
|
@@ -2,16 +2,17 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {_package, activityCliffs
|
|
5
|
+
import {_package, activityCliffs} from '../package';
|
|
6
6
|
import $ from 'cash-dom';
|
|
7
7
|
|
|
8
8
|
import {TEMPS as acTEMPS} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
9
9
|
import * as lev from 'fastest-levenshtein';
|
|
10
|
-
import {DistanceMatrix} from '@datagrok-libraries/
|
|
10
|
+
import {DistanceMatrix} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
11
11
|
import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
|
|
12
12
|
import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
15
16
|
|
|
16
17
|
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
17
18
|
|
|
@@ -23,8 +24,7 @@ export async function demoBio01bUI() {
|
|
|
23
24
|
let view: DG.TableView;
|
|
24
25
|
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
25
26
|
|
|
26
|
-
const dimRedMethod:
|
|
27
|
-
const idRows: { [id: number]: number } = {};
|
|
27
|
+
const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
|
|
28
28
|
|
|
29
29
|
try {
|
|
30
30
|
const demoScript = new DemoScript(
|
|
@@ -38,7 +38,7 @@ export async function demoBio01bUI() {
|
|
|
38
38
|
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
39
39
|
_package.files.readCsv(dataFn),
|
|
40
40
|
getTreeHelper(),
|
|
41
|
-
getDendrogramService()
|
|
41
|
+
getDendrogramService(),
|
|
42
42
|
]);
|
|
43
43
|
|
|
44
44
|
view = grok.shell.addTableView(df);
|
|
@@ -62,7 +62,7 @@ export async function demoBio01bUI() {
|
|
|
62
62
|
cliffsLink.click();
|
|
63
63
|
}, {
|
|
64
64
|
description: 'Reveal similar sequences with a cliff of activity.',
|
|
65
|
-
delay: 2000
|
|
65
|
+
delay: 2000,
|
|
66
66
|
})
|
|
67
67
|
.step('Cluster sequences', async () => {
|
|
68
68
|
const seqCol: DG.Column<string> = df.getCol('sequence');
|
|
@@ -79,7 +79,7 @@ export async function demoBio01bUI() {
|
|
|
79
79
|
activityGCol.scrollIntoView();
|
|
80
80
|
}, {
|
|
81
81
|
description: 'Perform hierarchical clustering to reveal relationships between sequences.',
|
|
82
|
-
delay: 2000
|
|
82
|
+
delay: 2000,
|
|
83
83
|
})
|
|
84
84
|
.step('Browse the cliff', async () => {
|
|
85
85
|
//cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
|
|
@@ -99,7 +99,7 @@ export async function demoBio01bUI() {
|
|
|
99
99
|
// }
|
|
100
100
|
}, {
|
|
101
101
|
description: 'Zoom in to explore selected activity cliff details.',
|
|
102
|
-
delay: 2000
|
|
102
|
+
delay: 2000,
|
|
103
103
|
})
|
|
104
104
|
.start();
|
|
105
105
|
} catch (err: any) {
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
|
|
5
|
-
|
|
6
4
|
import {_package, toAtomicLevel} from '../package';
|
|
7
|
-
import $ from 'cash-dom';
|
|
8
5
|
import {handleError} from './utils';
|
|
9
6
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
10
7
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
@@ -20,7 +17,7 @@ export async function demoBio03UI(): Promise<void> {
|
|
|
20
17
|
try {
|
|
21
18
|
await new DemoScript(
|
|
22
19
|
'Atomic Level',
|
|
23
|
-
'Atomic level structure of Macromolecules'
|
|
20
|
+
'Atomic level structure of Macromolecules',
|
|
24
21
|
)
|
|
25
22
|
.step(`Loading Macromolecules notation 'Helm'`, async () => {
|
|
26
23
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -7,8 +7,9 @@ import {handleError} from './utils';
|
|
|
7
7
|
|
|
8
8
|
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
9
9
|
import {pepseaMethods, runPepsea} from '../utils/pepsea';
|
|
10
|
-
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
11
10
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
11
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
12
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
13
|
|
|
13
14
|
const helmFn: string = 'samples/sample_HELM.csv';
|
|
14
15
|
|
|
@@ -22,7 +23,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
22
23
|
|
|
23
24
|
const helmColName: string = 'HELM';
|
|
24
25
|
const msaHelmColName: string = 'msa(HELM)';
|
|
25
|
-
const dimRedMethod:
|
|
26
|
+
const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
|
|
26
27
|
|
|
27
28
|
try {
|
|
28
29
|
const demoScript = new DemoScript(
|
|
@@ -43,20 +44,21 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
43
44
|
const method: string = pepseaMethods[0];
|
|
44
45
|
const gapOpen: number = 1.53;
|
|
45
46
|
const gapExtend: number = 0;
|
|
46
|
-
msaHelmCol = await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined)
|
|
47
|
+
msaHelmCol = (await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined))!;
|
|
47
48
|
df.columns.add(msaHelmCol);
|
|
48
49
|
await grok.data.detectSemanticTypes(df);
|
|
49
50
|
}, {
|
|
51
|
+
// eslint-disable-next-line max-len
|
|
50
52
|
description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
|
|
51
53
|
delay: 2000,
|
|
52
54
|
})
|
|
53
55
|
.step('Build sequence space', async () => {
|
|
54
56
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
55
|
-
dimRedMethod,
|
|
57
|
+
dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
|
|
56
58
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
57
59
|
}, {
|
|
58
60
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
|
59
|
-
delay: 2000
|
|
61
|
+
delay: 2000,
|
|
60
62
|
})
|
|
61
63
|
.step('Analyse sequence composition', async () => {
|
|
62
64
|
wlViewer = await df.plot.fromType('WebLogo', {
|
|
@@ -66,6 +68,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
66
68
|
}) as DG.Viewer & IWebLogoViewer;
|
|
67
69
|
view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
|
|
68
70
|
}, {
|
|
71
|
+
// eslint-disable-next-line max-len
|
|
69
72
|
description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
|
|
70
73
|
delay: 2000,
|
|
71
74
|
})
|
package/src/demo/utils.ts
CHANGED
|
@@ -5,7 +5,8 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import {_package, sequenceSpaceTopMenu} from '../package';
|
|
6
6
|
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
7
7
|
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
|
-
import {
|
|
8
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
9
10
|
|
|
10
11
|
enum EMBED_COL_NAMES {
|
|
11
12
|
X = 'Embed_X',
|
|
@@ -13,7 +14,7 @@ enum EMBED_COL_NAMES {
|
|
|
13
14
|
}
|
|
14
15
|
|
|
15
16
|
export async function demoSequenceSpace(
|
|
16
|
-
view: DG.TableView, df: DG.DataFrame, colName: string, method: string
|
|
17
|
+
view: DG.TableView, df: DG.DataFrame, colName: string, method: string,
|
|
17
18
|
): Promise<DG.ScatterPlotViewer> {
|
|
18
19
|
let resSpaceViewer: DG.ScatterPlotViewer;
|
|
19
20
|
if (true) {
|
|
@@ -63,7 +64,7 @@ export async function demoSequenceSpace(
|
|
|
63
64
|
})) as DG.ScatterPlotViewer;
|
|
64
65
|
} else {
|
|
65
66
|
resSpaceViewer = (await sequenceSpaceTopMenu(df, df.getCol(colName),
|
|
66
|
-
|
|
67
|
+
DimReductionMethods.UMAP, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
|
|
67
68
|
}
|
|
68
69
|
view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
69
70
|
return resSpaceViewer;
|
package/src/package-test.ts
CHANGED
|
@@ -22,10 +22,9 @@ import './tests/substructure-filters-tests';
|
|
|
22
22
|
import './tests/pepsea-tests';
|
|
23
23
|
import './tests/viewers';
|
|
24
24
|
import './tests/units-handler-tests';
|
|
25
|
+
import './tests/units-handler-splitted-tests';
|
|
25
26
|
import './tests/to-atomic-level-tests';
|
|
26
27
|
import './tests/mm-distance-tests';
|
|
27
|
-
|
|
28
|
-
// Tests hanging github CI
|
|
29
28
|
import './tests/activity-cliffs-tests';
|
|
30
29
|
import './tests/sequence-space-test';
|
|
31
30
|
|