@datagrok/peptides 1.4.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -0
- package/dist/package-test.js +6093 -60759
- package/dist/package.js +5726 -60613
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +356 -267
- package/files/tests/aligned_5k.d42 +0 -0
- package/package.json +15 -22
- package/src/model.ts +644 -554
- package/src/package-test.ts +2 -0
- package/src/package.ts +35 -61
- package/src/styles.css +7 -0
- package/src/tests/algorithms.ts +29 -20
- package/src/tests/core.ts +57 -19
- package/src/tests/peptide-space-test.ts +5 -53
- package/src/tests/utils.ts +4 -4
- package/src/tests/viewers.ts +17 -0
- package/src/utils/algorithms.ts +25 -22
- package/src/utils/cell-renderer.ts +48 -43
- package/src/utils/constants.ts +14 -37
- package/src/utils/misc.ts +29 -18
- package/src/utils/peptide-similarity-space.ts +5 -8
- package/src/utils/statistics.ts +18 -9
- package/src/utils/types.ts +8 -0
- package/src/viewers/logo-summary.ts +287 -53
- package/src/viewers/peptide-space-viewer.ts +5 -5
- package/src/viewers/sar-viewer.ts +245 -58
- package/src/widgets/distribution.ts +102 -32
- package/src/widgets/manual-alignment.ts +3 -3
- package/src/widgets/mutation-cliffs.ts +8 -5
- package/src/widgets/peptides.ts +78 -40
- package/src/widgets/settings.ts +20 -16
- package/tsconfig.json +3 -3
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -69
- package/src/__jest__/test-node.ts +0 -97
- package/test-Peptides-62cc009524f3-d4fc804f.html +0 -276
package/src/package-test.ts
CHANGED
|
@@ -3,6 +3,8 @@ import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
|
|
|
3
3
|
|
|
4
4
|
import './tests/core';
|
|
5
5
|
import './tests/peptide-space-test';
|
|
6
|
+
import './tests/algorithms';
|
|
7
|
+
import './tests/viewers';
|
|
6
8
|
|
|
7
9
|
export const _package = new DG.Package();
|
|
8
10
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -2,28 +2,31 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
import * as C from './utils/constants';
|
|
6
5
|
|
|
7
6
|
import {analyzePeptidesUI} from './widgets/peptides';
|
|
8
7
|
import {PeptideSimilaritySpaceWidget} from './utils/peptide-similarity-space';
|
|
9
8
|
import {manualAlignmentWidget} from './widgets/manual-alignment';
|
|
10
|
-
import {
|
|
9
|
+
import {MonomerPosition, MostPotentResiduesViewer} from './viewers/sar-viewer';
|
|
11
10
|
|
|
12
11
|
import {PeptideSpaceViewer} from './viewers/peptide-space-viewer';
|
|
13
12
|
import {LogoSummary} from './viewers/logo-summary';
|
|
14
|
-
import {MonomerWorks} from '@datagrok-libraries/bio';
|
|
13
|
+
import {MonomerWorks} from '@datagrok-libraries/bio/src/monomer-works/monomer-works';
|
|
14
|
+
import {PeptidesModel} from './model';
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
let monomerWorks: MonomerWorks | null = null;
|
|
17
17
|
|
|
18
18
|
export const _package = new DG.Package();
|
|
19
|
-
let currentTable: DG.DataFrame;
|
|
20
|
-
let alignedSequenceColumn: DG.Column;
|
|
21
19
|
|
|
22
|
-
export function getMonomerWorks() {
|
|
20
|
+
export function getMonomerWorks(): MonomerWorks | null {
|
|
23
21
|
return monomerWorks;
|
|
24
|
-
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
//tags: init
|
|
25
|
+
export async function initPeptides(): Promise<void> {
|
|
26
|
+
monomerWorks ??= new MonomerWorks(await grok.functions.call('Bio:getBioLib'));
|
|
27
|
+
}
|
|
25
28
|
|
|
26
|
-
async function
|
|
29
|
+
async function openDemoData(chosenFile: string): Promise<void> {
|
|
27
30
|
const pi = DG.TaskBarProgressIndicator.create('Loading Peptides');
|
|
28
31
|
const path = _package.webRoot + 'files/' + chosenFile;
|
|
29
32
|
const peptides = await grok.data.loadTable(path);
|
|
@@ -37,13 +40,10 @@ async function main(chosenFile: string): Promise<void> {
|
|
|
37
40
|
|
|
38
41
|
//name: Peptides
|
|
39
42
|
//tags: app
|
|
40
|
-
export
|
|
43
|
+
export function Peptides(): void {
|
|
41
44
|
const wikiLink = ui.link('wiki', 'https://github.com/datagrok-ai/public/blob/master/help/domains/bio/peptides.md');
|
|
42
45
|
const textLink = ui.inlineText(['For more details, see our ', wikiLink, '.']);
|
|
43
|
-
|
|
44
|
-
let lib = await grok.functions.call('Bio:getBioLib');
|
|
45
|
-
monomerWorks = new MonomerWorks(lib);
|
|
46
|
-
}
|
|
46
|
+
|
|
47
47
|
const appDescription = ui.info(
|
|
48
48
|
[
|
|
49
49
|
ui.list([
|
|
@@ -73,19 +73,22 @@ export async function Peptides(): Promise<void> {
|
|
|
73
73
|
appDescription,
|
|
74
74
|
ui.info([textLink]),
|
|
75
75
|
ui.divH([
|
|
76
|
-
ui.button('Simple demo', () =>
|
|
77
|
-
ui.button('Complex demo', () =>
|
|
78
|
-
ui.button('HELM demo', () =>
|
|
76
|
+
ui.button('Simple demo', () => openDemoData('aligned.csv'), ''),
|
|
77
|
+
ui.button('Complex demo', () => openDemoData('aligned_2.csv'), ''),
|
|
78
|
+
ui.button('HELM demo', () => openDemoData('aligned_3.csv'), ''),
|
|
79
79
|
]),
|
|
80
80
|
]);
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
//top-menu: Bio | Peptides...
|
|
84
84
|
//name: Bio Peptides
|
|
85
|
-
export
|
|
86
|
-
const analyzeObject =
|
|
87
|
-
const dialog = ui.dialog('Analyze Peptides').add(analyzeObject.host).onOK(
|
|
88
|
-
|
|
85
|
+
export function peptidesDialog(): DG.Dialog {
|
|
86
|
+
const analyzeObject = analyzePeptidesUI(grok.shell.t);
|
|
87
|
+
const dialog = ui.dialog('Analyze Peptides').add(analyzeObject.host).onOK(async () => {
|
|
88
|
+
const startSuccess = analyzeObject.callback();
|
|
89
|
+
if (!startSuccess)
|
|
90
|
+
dialog.show();
|
|
91
|
+
});
|
|
89
92
|
return dialog.show();
|
|
90
93
|
}
|
|
91
94
|
|
|
@@ -93,9 +96,8 @@ export async function peptidesDialog(): Promise<DG.Dialog> {
|
|
|
93
96
|
//tags: panel, widgets
|
|
94
97
|
//input: column col {semType: Macromolecule}
|
|
95
98
|
//output: widget result
|
|
96
|
-
export
|
|
97
|
-
|
|
98
|
-
const analyzeObject = await analyzePeptidesUI(currentTable, alignedSequenceColumn);
|
|
99
|
+
export function peptidesPanel(col: DG.Column): DG.Widget {
|
|
100
|
+
const analyzeObject = analyzePeptidesUI(col.dataFrame, col);
|
|
99
101
|
return new DG.Widget(analyzeObject.host);
|
|
100
102
|
}
|
|
101
103
|
|
|
@@ -103,8 +105,8 @@ export async function peptidesPanel(col: DG.Column): Promise<DG.Widget> {
|
|
|
103
105
|
//description: Peptides SAR Viewer
|
|
104
106
|
//tags: viewer
|
|
105
107
|
//output: viewer result
|
|
106
|
-
export function sar():
|
|
107
|
-
return new
|
|
108
|
+
export function sar(): MonomerPosition {
|
|
109
|
+
return new MonomerPosition();
|
|
108
110
|
}
|
|
109
111
|
|
|
110
112
|
//name: peptide-sar-viewer-vertical
|
|
@@ -135,9 +137,14 @@ export function peptideSpace(): PeptideSpaceViewer {
|
|
|
135
137
|
//input: string _monomer {semType: Monomer}
|
|
136
138
|
//output: widget result
|
|
137
139
|
export function manualAlignment(_monomer: string): DG.Widget {
|
|
138
|
-
[currentTable, alignedSequenceColumn] = getOrDefine();
|
|
139
140
|
//TODO: recalculate Molfile and Molecule panels on sequence update
|
|
140
|
-
|
|
141
|
+
const df = grok.shell.t;
|
|
142
|
+
const model: PeptidesModel | null = df?.temp[PeptidesModel.modelName];
|
|
143
|
+
if (!model)
|
|
144
|
+
return new DG.Widget(ui.divText('Manual alignment works with peptides analysis'));
|
|
145
|
+
|
|
146
|
+
const col = df.getCol(model.settings.sequenceColumnName!);
|
|
147
|
+
return manualAlignmentWidget(col, df);
|
|
141
148
|
}
|
|
142
149
|
|
|
143
150
|
//name: Peptide Space
|
|
@@ -145,39 +152,6 @@ export function manualAlignment(_monomer: string): DG.Widget {
|
|
|
145
152
|
//input: column col {semType: Macromolecule}
|
|
146
153
|
//output: widget result
|
|
147
154
|
export async function peptideSpacePanel(col: DG.Column): Promise<DG.Widget> {
|
|
148
|
-
[currentTable, alignedSequenceColumn] = getOrDefine(col.dataFrame, col);
|
|
149
155
|
const widget = new PeptideSimilaritySpaceWidget(col, grok.shell.v as DG.TableView);
|
|
150
156
|
return widget.draw();
|
|
151
157
|
}
|
|
152
|
-
|
|
153
|
-
//name: Get Peptides Structure
|
|
154
|
-
//tags: panel, widgets
|
|
155
|
-
//input: column col {semType: Macromolecule}
|
|
156
|
-
//output: widget result
|
|
157
|
-
export function getPeptidesStructure(col: DG.Column): DG.Widget {
|
|
158
|
-
const getButtonTooltip = 'Retrieves peptides structure from customer database by special id column';
|
|
159
|
-
const getButton = ui.button('Get structure', async () => {
|
|
160
|
-
const progress = DG.TaskBarProgressIndicator.create('Getting structure...');
|
|
161
|
-
try {
|
|
162
|
-
const params = {peptidesTable: col.dataFrame};
|
|
163
|
-
const result = await grok.functions.call('Customerextensions:getPeptidesStructure', params);
|
|
164
|
-
const text = result ? 'Structure retreived' : 'Structure retreivial is not possible';
|
|
165
|
-
grok.shell.info(text);
|
|
166
|
-
} catch (e) {
|
|
167
|
-
console.warn(e);
|
|
168
|
-
} finally {
|
|
169
|
-
progress.close();
|
|
170
|
-
}
|
|
171
|
-
}, getButtonTooltip);
|
|
172
|
-
return new DG.Widget(getButton);
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
function getOrDefine(dataframe?: DG.DataFrame, column?: DG.Column | null): [DG.DataFrame, DG.Column] {
|
|
176
|
-
dataframe ??= grok.shell.t;
|
|
177
|
-
// column ??= dataframe.columns.bySemType(C.SEM_TYPES.MACROMOLECULE);
|
|
178
|
-
column ??= dataframe.getCol(C.COLUMNS_NAMES.MACROMOLECULE);
|
|
179
|
-
if (column === null)
|
|
180
|
-
throw new Error('Table does not contain aligned sequence columns');
|
|
181
|
-
|
|
182
|
-
return [dataframe, column];
|
|
183
|
-
}
|
package/src/styles.css
CHANGED
package/src/tests/algorithms.ts
CHANGED
|
@@ -1,29 +1,27 @@
|
|
|
1
|
-
import * as grok from 'datagrok-api/grok';
|
|
2
1
|
import * as DG from 'datagrok-api/dg';
|
|
3
2
|
|
|
4
|
-
import {category, test, expect,
|
|
3
|
+
import {category, test, expect, before} from '@datagrok-libraries/utils/src/test';
|
|
5
4
|
|
|
6
5
|
import {_package} from '../package-test';
|
|
7
|
-
import {startAnalysis} from '../widgets/peptides';
|
|
8
|
-
import {PeptidesModel} from '../model';
|
|
9
|
-
import * as C from '../utils/constants';
|
|
10
|
-
import {scaleActivity} from '../utils/misc';
|
|
11
|
-
import {ALPHABET, TAGS, NOTATION, ALIGNMENT} from '@datagrok-libraries/bio';
|
|
12
6
|
import {findMutations} from '../utils/algorithms';
|
|
13
7
|
import * as type from '../utils/types';
|
|
14
8
|
|
|
15
9
|
category('Algorithms', () => {
|
|
16
|
-
let activityCol:
|
|
17
|
-
let monomerColumns:
|
|
10
|
+
let activityCol: type.RawData;
|
|
11
|
+
let monomerColumns: type.RawColumn[];
|
|
18
12
|
let settings: type.PeptidesSettings;
|
|
19
13
|
|
|
20
14
|
before(async () => {
|
|
21
|
-
activityCol = DG.Column.fromList('int', 'test', [1, 2, 5]);
|
|
15
|
+
activityCol = DG.Column.fromList('int', 'test', [1, 2, 5]).getRawData();
|
|
22
16
|
monomerColumns = [
|
|
23
|
-
DG.Column.fromList('string', '1', '
|
|
24
|
-
DG.Column.fromList('string', '2', '
|
|
25
|
-
DG.Column.fromList('string', '3', '
|
|
26
|
-
]
|
|
17
|
+
DG.Column.fromList('string', '1', 'AAA'.split('')),
|
|
18
|
+
DG.Column.fromList('string', '2', 'BCC'.split('')),
|
|
19
|
+
DG.Column.fromList('string', '3', 'CCD'.split('')),
|
|
20
|
+
].map((col) => ({
|
|
21
|
+
name: col.name,
|
|
22
|
+
rawData: col.getRawData(),
|
|
23
|
+
cat: col.categories,
|
|
24
|
+
}));
|
|
27
25
|
settings = {maxMutations: 1, minActivityDelta: 2};
|
|
28
26
|
});
|
|
29
27
|
|
|
@@ -40,12 +38,23 @@ category('Algorithms', () => {
|
|
|
40
38
|
|
|
41
39
|
const c3 = c.get('3')!;
|
|
42
40
|
const d3 = d.get('3')!;
|
|
43
|
-
expect(c3.has(
|
|
44
|
-
expect(d3.has(
|
|
41
|
+
expect(c3.has(1), true);
|
|
42
|
+
expect(d3.has(2), true);
|
|
45
43
|
|
|
46
|
-
const
|
|
47
|
-
const
|
|
48
|
-
expect(
|
|
49
|
-
expect(
|
|
44
|
+
const c31 = c3.get(1)!;
|
|
45
|
+
const d32 = d3.get(2)!;
|
|
46
|
+
expect(c31[0], 2);
|
|
47
|
+
expect(d32[0], 1);
|
|
50
48
|
});
|
|
49
|
+
|
|
50
|
+
test('MutationCliffs - Benchmark 5k', async () => {
|
|
51
|
+
const df = (await _package.files.readBinaryDataFrames('tests/aligned_5k.d42'))[0];
|
|
52
|
+
const activityCol: type.RawData = df.getCol('Activity').getRawData();
|
|
53
|
+
const monomerCols: type.RawColumn[] = [];
|
|
54
|
+
for (let i = 1; i < 16; ++i) {
|
|
55
|
+
const col = df.getCol(i.toString());
|
|
56
|
+
monomerCols.push({name: col.name, rawData: col.getRawData(), cat: col.categories});
|
|
57
|
+
}
|
|
58
|
+
DG.time('MutationCliffs', () => findMutations(activityCol, monomerCols));
|
|
59
|
+
}, {skipReason: 'Benchmark'});
|
|
51
60
|
});
|
package/src/tests/core.ts
CHANGED
|
@@ -8,21 +8,18 @@ import {startAnalysis} from '../widgets/peptides';
|
|
|
8
8
|
import {PeptidesModel} from '../model';
|
|
9
9
|
import * as C from '../utils/constants';
|
|
10
10
|
import {scaleActivity} from '../utils/misc';
|
|
11
|
-
import {
|
|
11
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
12
|
|
|
13
13
|
category('Core', () => {
|
|
14
14
|
let simpleTable: DG.DataFrame;
|
|
15
15
|
let simpleActivityCol: DG.Column<number>;
|
|
16
16
|
let simpleAlignedSeqCol: DG.Column<string>;
|
|
17
17
|
let simpleScaledCol: DG.Column<number>;
|
|
18
|
-
let scalingFormula: (x: number) => number;
|
|
19
|
-
let simpleScaledColName: string;
|
|
20
18
|
|
|
21
19
|
let complexTable: DG.DataFrame;
|
|
22
20
|
let complexActivityCol: DG.Column<number>;
|
|
23
21
|
let complexAlignedSeqCol: DG.Column<string>;
|
|
24
22
|
let complexScaledCol: DG.Column<number>;
|
|
25
|
-
let complexScaledColName: string;
|
|
26
23
|
const alignedSequenceCol = 'AlignedSequence';
|
|
27
24
|
|
|
28
25
|
let model: PeptidesModel | null = null;
|
|
@@ -32,19 +29,17 @@ category('Core', () => {
|
|
|
32
29
|
simpleTable = DG.DataFrame.fromCsv(await _package.files.readAsText('aligned.csv'));
|
|
33
30
|
simpleActivityCol = simpleTable.getCol(simpleActivityColName);
|
|
34
31
|
simpleAlignedSeqCol = simpleTable.getCol(alignedSequenceCol);
|
|
35
|
-
simpleAlignedSeqCol.semType =
|
|
32
|
+
simpleAlignedSeqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
36
33
|
simpleAlignedSeqCol.setTag(C.TAGS.ALPHABET, ALPHABET.PT);
|
|
37
34
|
simpleAlignedSeqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
38
|
-
simpleAlignedSeqCol.setTag(
|
|
35
|
+
simpleAlignedSeqCol.setTag(bioTAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
39
36
|
simpleScaledCol = scaleActivity(simpleActivityCol, '-lg');
|
|
40
37
|
|
|
41
38
|
model = await startAnalysis(simpleActivityCol, simpleAlignedSeqCol, null, simpleTable, simpleScaledCol, '-lg');
|
|
42
39
|
expect(model instanceof PeptidesModel, true);
|
|
43
40
|
|
|
44
|
-
if (model != null)
|
|
41
|
+
if (model != null)
|
|
45
42
|
model.mutationCliffsSelection = {'11': ['D']};
|
|
46
|
-
grok.shell.closeTable(model.df);
|
|
47
|
-
}
|
|
48
43
|
});
|
|
49
44
|
|
|
50
45
|
test('Start analysis: сomplex', async () => {
|
|
@@ -52,21 +47,19 @@ category('Core', () => {
|
|
|
52
47
|
complexTable = DG.DataFrame.fromCsv(await _package.files.readAsText('aligned_2.csv'));
|
|
53
48
|
complexActivityCol = complexTable.getCol(complexActivityColName);
|
|
54
49
|
complexAlignedSeqCol = complexTable.getCol('MSA');
|
|
55
|
-
complexAlignedSeqCol.semType =
|
|
50
|
+
complexAlignedSeqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
56
51
|
complexAlignedSeqCol.setTag(C.TAGS.ALPHABET, ALPHABET.UN);
|
|
57
52
|
complexAlignedSeqCol.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
58
|
-
complexAlignedSeqCol.setTag(
|
|
59
|
-
complexAlignedSeqCol.
|
|
53
|
+
complexAlignedSeqCol.setTag(bioTAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
54
|
+
complexAlignedSeqCol.setTag(C.TAGS.SEPARATOR, '/');
|
|
60
55
|
complexScaledCol = scaleActivity(complexActivityCol, '-lg');
|
|
61
56
|
|
|
62
57
|
model = await startAnalysis(
|
|
63
58
|
complexActivityCol, complexAlignedSeqCol, null, complexTable, complexScaledCol, '-lg');
|
|
64
59
|
expect(model instanceof PeptidesModel, true);
|
|
65
60
|
|
|
66
|
-
if (model != null)
|
|
61
|
+
if (model != null)
|
|
67
62
|
model.mutationCliffsSelection = {'13': ['-']};
|
|
68
|
-
grok.shell.closeTable(model.df);
|
|
69
|
-
}
|
|
70
63
|
});
|
|
71
64
|
|
|
72
65
|
test('Save and load project', async () => {
|
|
@@ -74,10 +67,10 @@ category('Core', () => {
|
|
|
74
67
|
simpleTable = DG.DataFrame.fromCsv(await _package.files.readAsText('aligned.csv'));
|
|
75
68
|
simpleActivityCol = simpleTable.getCol(simpleActivityColName);
|
|
76
69
|
simpleAlignedSeqCol = simpleTable.getCol(alignedSequenceCol);
|
|
77
|
-
simpleAlignedSeqCol.semType =
|
|
70
|
+
simpleAlignedSeqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
78
71
|
simpleAlignedSeqCol.setTag(C.TAGS.ALPHABET, ALPHABET.PT);
|
|
79
72
|
simpleAlignedSeqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
80
|
-
simpleAlignedSeqCol.setTag(
|
|
73
|
+
simpleAlignedSeqCol.setTag(bioTAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
81
74
|
simpleScaledCol = scaleActivity(simpleActivityCol, '-lg');
|
|
82
75
|
|
|
83
76
|
model = await startAnalysis(simpleActivityCol, simpleAlignedSeqCol, null, simpleTable, simpleScaledCol, '-lg');
|
|
@@ -98,12 +91,57 @@ category('Core', () => {
|
|
|
98
91
|
grok.shell.closeTable(d);
|
|
99
92
|
await delay(500);
|
|
100
93
|
|
|
101
|
-
await
|
|
94
|
+
await sp.open();
|
|
102
95
|
v = grok.shell.getTableView('Peptides analysis');
|
|
103
|
-
grok.shell.closeTable(v.dataFrame);
|
|
104
96
|
|
|
105
97
|
await grok.dapi.layouts.delete(sl);
|
|
106
98
|
await grok.dapi.tables.delete(sti);
|
|
107
99
|
await grok.dapi.projects.delete(sp);
|
|
108
100
|
});
|
|
101
|
+
|
|
102
|
+
test('Cluster stats - Benchmark HELM 5k', async () => {
|
|
103
|
+
const df = (await _package.files.readBinaryDataFrames('tests/aligned_5k_2.d42'))[0];
|
|
104
|
+
const activityCol = df.getCol('Activity');
|
|
105
|
+
const scaledActivityCol = scaleActivity(activityCol, 'none');
|
|
106
|
+
const clustersCol = df.getCol('Cluster');
|
|
107
|
+
const sequenceCol = df.getCol('HELM');
|
|
108
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
109
|
+
sequenceCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
110
|
+
const model = await startAnalysis(activityCol, sequenceCol, clustersCol, df, scaledActivityCol, 'none');
|
|
111
|
+
|
|
112
|
+
for (let i = 0; i < 5; ++i)
|
|
113
|
+
DG.time('Cluster stats', () => model?.calculateClusterStatistics());
|
|
114
|
+
}, {skipReason: 'Benchmark'});
|
|
115
|
+
|
|
116
|
+
test('Monomer Position stats - Benchmark HELM 5k', async () => {
|
|
117
|
+
const df = (await _package.files.readBinaryDataFrames('tests/aligned_5k.d42'))[0];
|
|
118
|
+
const activityCol = df.getCol('Activity');
|
|
119
|
+
const scaledActivityCol = scaleActivity(activityCol, 'none');
|
|
120
|
+
const clustersCol = df.getCol('Cluster');
|
|
121
|
+
const sequenceCol = df.getCol('HELM');
|
|
122
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
123
|
+
sequenceCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
124
|
+
const model = await startAnalysis(activityCol, sequenceCol, clustersCol, df, scaledActivityCol, 'none');
|
|
125
|
+
|
|
126
|
+
for (let i = 0; i < 5; ++i)
|
|
127
|
+
DG.time('Monomer position stats', () => model?.calculateMonomerPositionStatistics());
|
|
128
|
+
}, {skipReason: 'Benchmark'});
|
|
129
|
+
|
|
130
|
+
test('Analysis start - Benchmark HELM 5k', async () => {
|
|
131
|
+
const df = (await _package.files.readBinaryDataFrames('tests/aligned_5k.d42'))[0];
|
|
132
|
+
const activityCol = df.getCol('Activity');
|
|
133
|
+
const scaledActivityCol = scaleActivity(activityCol, 'none');
|
|
134
|
+
const clustersCol = df.getCol('Cluster');
|
|
135
|
+
const sequenceCol = df.getCol('HELM');
|
|
136
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
137
|
+
sequenceCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
138
|
+
|
|
139
|
+
for (let i = 0; i < 5; ++i) {
|
|
140
|
+
await DG.timeAsync('Analysis start', async () => {
|
|
141
|
+
const model = await startAnalysis(activityCol, sequenceCol, clustersCol, df, scaledActivityCol, 'none');
|
|
142
|
+
if (model)
|
|
143
|
+
grok.shell.closeTable(model.df);
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}, {skipReason: 'Benchmark'});
|
|
109
147
|
});
|
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
import {/*before, after, */after, category, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
import
|
|
3
|
-
_testViewerIsDrawing,
|
|
4
|
-
_testDimensionalityReducer,
|
|
5
|
-
_testPeptideSimilaritySpaceViewer,
|
|
6
|
-
_testTableIsNotEmpty,
|
|
7
|
-
} from './utils';
|
|
2
|
+
import * as utils from './utils';
|
|
8
3
|
import {DimensionalityReducer} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
4
|
import {cleanAlignedSequencesColumn} from '../utils/peptide-similarity-space';
|
|
10
5
|
import {aligned1} from './test-data';
|
|
@@ -12,7 +7,6 @@ import {aligned1} from './test-data';
|
|
|
12
7
|
import * as DG from 'datagrok-api/dg';
|
|
13
8
|
import * as grok from 'datagrok-api/grok';
|
|
14
9
|
import {StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
15
|
-
import {computeWeights} from '../viewers/peptide-space-viewer';
|
|
16
10
|
import {_package} from '../package-test';
|
|
17
11
|
|
|
18
12
|
let table: DG.DataFrame;
|
|
@@ -22,12 +16,12 @@ category('Peptide space', async () => {
|
|
|
22
16
|
table = DG.DataFrame.fromCsv(aligned1);
|
|
23
17
|
|
|
24
18
|
test('test_table.is_not_empty', async () => {
|
|
25
|
-
_testTableIsNotEmpty(table);
|
|
19
|
+
utils._testTableIsNotEmpty(table);
|
|
26
20
|
});
|
|
27
21
|
|
|
28
22
|
test('PeptideSimilaritySpaceWidget.is_drawing', async () => {
|
|
29
23
|
view = grok.shell.addTableView(table);
|
|
30
|
-
await _testViewerIsDrawing(table, view);
|
|
24
|
+
await utils._testViewerIsDrawing(table, view);
|
|
31
25
|
});
|
|
32
26
|
|
|
33
27
|
const alignedSequencesColumn = table.getCol('AlignedSequence');
|
|
@@ -37,7 +31,7 @@ category('Peptide space', async () => {
|
|
|
37
31
|
for (const method of DimensionalityReducer.availableMethods) {
|
|
38
32
|
for (const measure of DimensionalityReducer.availableMetricsByType('String')) {
|
|
39
33
|
test(`peptide_space.DimensinalityReducer.${method}.${measure}.is_numeric`, async () => {
|
|
40
|
-
await _testDimensionalityReducer(columnData, method as StringMetrics, measure);
|
|
34
|
+
await utils._testDimensionalityReducer(columnData, method as StringMetrics, measure);
|
|
41
35
|
});
|
|
42
36
|
}
|
|
43
37
|
}
|
|
@@ -47,51 +41,9 @@ category('Peptide space', async () => {
|
|
|
47
41
|
for (const method of DimensionalityReducer.availableMethods) {
|
|
48
42
|
for (const measure of DimensionalityReducer.availableMetricsByType('String')) {
|
|
49
43
|
test(`peptide_space.PeptideSimilaritySpaceViewer.${method}.${measure}.is_proper`, async () => {
|
|
50
|
-
await _testPeptideSimilaritySpaceViewer(table, alignedSequencesColumn, method, measure, 100);//, view);
|
|
44
|
+
await utils._testPeptideSimilaritySpaceViewer(table, alignedSequencesColumn, method, measure, 100);//, view);
|
|
51
45
|
});
|
|
52
46
|
}
|
|
53
47
|
}
|
|
54
48
|
});
|
|
55
|
-
|
|
56
|
-
after(async () => {
|
|
57
|
-
view?.close();
|
|
58
|
-
});
|
|
59
49
|
});
|
|
60
|
-
|
|
61
|
-
// category('Peptide Space Performance', () => {
|
|
62
|
-
// test('test_compute_weights_performance', async () => {
|
|
63
|
-
// const table = DG.DataFrame.fromCsv(await _package.files.readAsText('peptides_large.csv'));
|
|
64
|
-
// const results: {[key: string]: {[key: string]: {[key: string]: number}}} = {};
|
|
65
|
-
// const sliceVolumes = [1, 2, 3, 4, 5, 7, 10];
|
|
66
|
-
// const methods = DimensionalityReducer.availableMethods;
|
|
67
|
-
// const metrics = DimensionalityReducer.availableMetricsByType('String');
|
|
68
|
-
// const totalRuns = sliceVolumes.length * methods.length * metrics.length;
|
|
69
|
-
// console.log('Started Peptide Space Performance benchmark...');
|
|
70
|
-
|
|
71
|
-
// let run = 0;
|
|
72
|
-
// for (const slice of sliceVolumes) {
|
|
73
|
-
// const bitset = DG.BitSet.create(table.rowCount, (i) => i < slice * 1000);
|
|
74
|
-
// const tableSlice = table.clone(bitset);
|
|
75
|
-
// const col = tableSlice.getCol('sequence');
|
|
76
|
-
// const methodObj: {[key: string]: {[key: string]: number}} = {};
|
|
77
|
-
|
|
78
|
-
// for (const method of methods) {
|
|
79
|
-
// const measureObj: {[key: string]: number} = {};
|
|
80
|
-
|
|
81
|
-
// for (const metric of metrics) {
|
|
82
|
-
// console.log(`Run ${run++}/${totalRuns}`);
|
|
83
|
-
|
|
84
|
-
// const start = new Date();
|
|
85
|
-
// await computeWeights(tableSlice, method, metric, 100, col);
|
|
86
|
-
// const stop = new Date();
|
|
87
|
-
|
|
88
|
-
// measureObj[metric] = stop.getTime() - start.getTime();
|
|
89
|
-
// }
|
|
90
|
-
// methodObj[method] = measureObj;
|
|
91
|
-
// }
|
|
92
|
-
// results[`${slice}k`] = methodObj;
|
|
93
|
-
// }
|
|
94
|
-
// console.log('Peptide Space Performance benchmark finished...');
|
|
95
|
-
// console.log(results);
|
|
96
|
-
// });
|
|
97
|
-
// });
|
package/src/tests/utils.ts
CHANGED
|
@@ -45,8 +45,8 @@ export async function _testDimensionalityReducer(
|
|
|
45
45
|
|
|
46
46
|
const [X, Y] = embcols as Array<Float32Array>;
|
|
47
47
|
|
|
48
|
-
expect(X.every((v) => v !== null && v
|
|
49
|
-
expect(Y.every((v) => v !== null && v
|
|
48
|
+
expect(X.every((v) => v !== null && !Number.isNaN(v)), true);
|
|
49
|
+
expect(Y.every((v) => v !== null && !Number.isNaN(v)), true);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
/**
|
|
@@ -64,12 +64,12 @@ export async function _testDimensionalityReducer(
|
|
|
64
64
|
export async function _testPeptideSimilaritySpaceViewer(table: DG.DataFrame, alignedSequencesColumn: DG.Column,
|
|
65
65
|
method: string, measure: string, cyclesCount: number): Promise<void> {
|
|
66
66
|
const viewer = await createPeptideSimilaritySpaceViewer(
|
|
67
|
-
table, method, measure, cyclesCount,
|
|
67
|
+
table, method, measure, cyclesCount, alignedSequencesColumn, undefined);
|
|
68
68
|
const df = viewer.dataFrame;
|
|
69
69
|
|
|
70
70
|
const axesNames = ['~X', '~Y', '~MW'];
|
|
71
71
|
const axes = axesNames.map((v) => df.getCol(v).getRawData() as Float32Array);
|
|
72
72
|
|
|
73
73
|
for (const ax of axes)
|
|
74
|
-
expect(ax.every((v) => v !== null && v
|
|
74
|
+
expect(ax.every((v) => v !== null && !Number.isNaN(v)), true);
|
|
75
75
|
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
//import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {aligned1} from './test-data';
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
category('Viewers', () => {
|
|
10
|
+
const df = DG.DataFrame.fromCsv(aligned1);
|
|
11
|
+
const viewers = DG.Func.find({package: 'Peptides', tags: ['viewer']}).map((f) => f.friendlyName);
|
|
12
|
+
for (const v of viewers) {
|
|
13
|
+
test(v, async () => {
|
|
14
|
+
await testViewer(v, df.clone(), true);
|
|
15
|
+
}, {skipReason: 'GROK-11534'});
|
|
16
|
+
}
|
|
17
|
+
});
|
package/src/utils/algorithms.ts
CHANGED
|
@@ -1,34 +1,32 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
|
|
3
1
|
import * as C from './constants';
|
|
4
2
|
import * as type from './types';
|
|
5
3
|
import {getTypedArrayConstructor} from './misc';
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
type MutationCliffInfo = {pos: string, seq1monomer: string, seq2monomer: string, seq1Idx: number, seq2Idx: number};
|
|
6
|
+
|
|
7
|
+
export function findMutations(activityArray: type.RawData, monomerInfoArray: type.RawColumn[],
|
|
9
8
|
settings: type.PeptidesSettings = {}): type.SubstitutionsInfo {
|
|
10
|
-
const nCols =
|
|
9
|
+
const nCols = monomerInfoArray.length;
|
|
11
10
|
if (nCols == 0)
|
|
12
11
|
throw new Error(`PepAlgorithmError: Couldn't find any column of semType '${C.SEM_TYPES.MONOMER}'`);
|
|
13
12
|
|
|
14
13
|
const substitutionsInfo: type.SubstitutionsInfo = new Map();
|
|
15
|
-
const nRows =
|
|
14
|
+
const nRows = activityArray.length;
|
|
16
15
|
for (let seq1Idx = 0; seq1Idx < nRows - 1; seq1Idx++) {
|
|
17
16
|
for (let seq2Idx = seq1Idx + 1; seq2Idx < nRows; seq2Idx++) {
|
|
18
17
|
let substCounter = 0;
|
|
19
|
-
const activityValSeq1 =
|
|
20
|
-
const activityValSeq2 =
|
|
18
|
+
const activityValSeq1 = activityArray[seq1Idx];
|
|
19
|
+
const activityValSeq2 = activityArray[seq2Idx];
|
|
21
20
|
const delta = activityValSeq1 - activityValSeq2;
|
|
22
21
|
if (Math.abs(delta) < (settings.minActivityDelta ?? 0))
|
|
23
22
|
continue;
|
|
24
23
|
|
|
25
24
|
let substCounterFlag = false;
|
|
26
|
-
const tempData:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
if (seq1monomer == seq2monomer)
|
|
25
|
+
const tempData: MutationCliffInfo[] = [];
|
|
26
|
+
for (const monomerInfo of monomerInfoArray) {
|
|
27
|
+
const seq1category = monomerInfo.rawData[seq1Idx];
|
|
28
|
+
const seq2category = monomerInfo.rawData[seq2Idx];
|
|
29
|
+
if (seq1category == seq2category)
|
|
32
30
|
continue;
|
|
33
31
|
|
|
34
32
|
substCounter++;
|
|
@@ -37,9 +35,9 @@ export function findMutations(activityCol: DG.Column<number>, monomerColumns: DG
|
|
|
37
35
|
break;
|
|
38
36
|
|
|
39
37
|
tempData.push({
|
|
40
|
-
pos:
|
|
41
|
-
seq1monomer:
|
|
42
|
-
seq2monomer:
|
|
38
|
+
pos: monomerInfo.name,
|
|
39
|
+
seq1monomer: monomerInfo.cat![seq1category],
|
|
40
|
+
seq2monomer: monomerInfo.cat![seq2category],
|
|
43
41
|
seq1Idx: seq1Idx,
|
|
44
42
|
seq2Idx: seq2Idx,
|
|
45
43
|
});
|
|
@@ -49,20 +47,22 @@ export function findMutations(activityCol: DG.Column<number>, monomerColumns: DG
|
|
|
49
47
|
continue;
|
|
50
48
|
|
|
51
49
|
for (const tempDataElement of tempData) {
|
|
52
|
-
const position = tempDataElement.pos;
|
|
53
|
-
|
|
54
50
|
//Working with seq1monomer
|
|
55
51
|
const seq1monomer = tempDataElement.seq1monomer;
|
|
56
52
|
if (!substitutionsInfo.has(seq1monomer))
|
|
57
53
|
substitutionsInfo.set(seq1monomer, new Map());
|
|
58
54
|
|
|
55
|
+
const position = tempDataElement.pos;
|
|
56
|
+
|
|
59
57
|
let positionsMap = substitutionsInfo.get(seq1monomer)!;
|
|
60
58
|
if (!positionsMap.has(position))
|
|
61
59
|
positionsMap.set(position, new Map());
|
|
62
60
|
|
|
63
61
|
let indexes = positionsMap.get(position)!;
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
if (indexes.has(seq1Idx))
|
|
63
|
+
(indexes.get(seq1Idx)! as number[]).push(seq2Idx);
|
|
64
|
+
else
|
|
65
|
+
indexes.set(seq1Idx, [seq2Idx]);
|
|
66
66
|
|
|
67
67
|
//Working with seq2monomer
|
|
68
68
|
const seq2monomer = tempDataElement.seq2monomer;
|
|
@@ -74,7 +74,10 @@ export function findMutations(activityCol: DG.Column<number>, monomerColumns: DG
|
|
|
74
74
|
positionsMap.set(position, new Map());
|
|
75
75
|
|
|
76
76
|
indexes = positionsMap.get(position)!;
|
|
77
|
-
|
|
77
|
+
if (indexes.has(seq2Idx))
|
|
78
|
+
(indexes.get(seq2Idx)! as number[]).push(seq1Idx);
|
|
79
|
+
else
|
|
80
|
+
indexes.set(seq2Idx, [seq1Idx]);
|
|
78
81
|
}
|
|
79
82
|
}
|
|
80
83
|
}
|