@datagrok/peptides 0.8.8 → 0.8.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -1
- package/dist/package-test.js +22626 -0
- package/dist/package.js +21429 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8840 -0
- package/jest.config.js +33 -0
- package/package.json +70 -62
- package/src/__jest__/remote.test.ts +50 -0
- package/src/__jest__/test-node.ts +96 -0
- package/src/model.ts +977 -92
- package/src/monomer-library.ts +18 -12
- package/src/package-test.ts +6 -5
- package/src/package.ts +83 -68
- package/src/peptides.ts +298 -142
- package/src/styles.css +8 -0
- package/src/tests/peptide-space-test.ts +1 -1
- package/src/tests/peptides-tests.ts +20 -81
- package/src/tests/utils.ts +4 -9
- package/src/utils/SAR-multiple-filter.ts +439 -0
- package/src/utils/SAR-multiple-selection.ts +177 -0
- package/src/utils/cell-renderer.ts +124 -97
- package/src/utils/chem-palette.ts +98 -166
- package/src/utils/constants.ts +56 -0
- package/src/utils/filtering-statistics.ts +62 -0
- package/src/utils/multiple-sequence-alignment.ts +33 -2
- package/src/utils/multivariate-analysis.ts +79 -0
- package/src/utils/peptide-similarity-space.ts +24 -53
- package/src/utils/types.ts +10 -0
- package/src/viewers/logo-viewer.ts +7 -5
- package/src/viewers/peptide-space-viewer.ts +121 -0
- package/src/viewers/sar-viewer.ts +118 -342
- package/src/viewers/stacked-barchart-viewer.ts +322 -369
- package/src/widgets/analyze-peptides.ts +50 -29
- package/src/widgets/distribution.ts +61 -0
- package/src/widgets/manual-alignment.ts +7 -4
- package/src/widgets/multiple-sequence-alignment.ts +9 -0
- package/src/widgets/peptide-molecule.ts +8 -6
- package/src/widgets/subst-table.ts +73 -0
- package/src/workers/dimensionality-reducer.ts +1 -1
- package/test-Peptides-414a1874a71a-2f1c6575.html +256 -0
- package/src/describe.ts +0 -535
- package/src/utils/split-aligned.ts +0 -72
- package/src/viewers/subst-viewer.ts +0 -285
|
@@ -10,21 +10,7 @@ import {
|
|
|
10
10
|
} from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
11
11
|
import {Measure, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
12
12
|
import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Finds a column with an activity.
|
|
16
|
-
*
|
|
17
|
-
* @param {DG.DataFrame} table The data frame to search for.
|
|
18
|
-
* @return {(string | null)} Column name or null if not found.
|
|
19
|
-
*/
|
|
20
|
-
function inferActivityColumnsName(table: DG.DataFrame): string | null {
|
|
21
|
-
const re = /activity|ic50/i;
|
|
22
|
-
for (const name of table.columns.names()) {
|
|
23
|
-
if (name.match(re))
|
|
24
|
-
return name;
|
|
25
|
-
}
|
|
26
|
-
return null;
|
|
27
|
-
}
|
|
13
|
+
import * as C from './constants';
|
|
28
14
|
|
|
29
15
|
/**
|
|
30
16
|
* Cast an aligned sequences column to clean sequences.
|
|
@@ -52,40 +38,29 @@ export function cleanAlignedSequencesColumn(col: DG.Column): Array<string> {
|
|
|
52
38
|
* @return {Promise<DG.ScatterPlotViewer>} A viewer.
|
|
53
39
|
*/
|
|
54
40
|
export async function createPeptideSimilaritySpaceViewer(
|
|
55
|
-
table: DG.DataFrame,
|
|
56
|
-
alignedSequencesColumn: DG.Column,
|
|
57
|
-
method: string,
|
|
58
|
-
measure: string,
|
|
59
|
-
cyclesCount: number,
|
|
60
|
-
view: DG.TableView | null,
|
|
61
|
-
activityColumnName?: string | null,
|
|
41
|
+
table: DG.DataFrame, method: string, measure: string, cyclesCount: number, view?: DG.TableView, col?: DG.Column,
|
|
62
42
|
): Promise<DG.ScatterPlotViewer> {
|
|
63
|
-
const pi = DG.TaskBarProgressIndicator.create('Creating embedding
|
|
64
|
-
|
|
65
|
-
activityColumnName = activityColumnName ?? inferActivityColumnsName(table);
|
|
43
|
+
const pi = DG.TaskBarProgressIndicator.create('Creating embedding...');
|
|
66
44
|
|
|
67
45
|
const axesNames = ['~X', '~Y', '~MW'];
|
|
68
|
-
const columnData =
|
|
46
|
+
const columnData = (col ?? table.getCol(C.COLUMNS_NAMES.ALIGNED_SEQUENCE)).toList()
|
|
47
|
+
.map((v) => AlignedSequenceEncoder.clean(v));
|
|
69
48
|
|
|
70
|
-
const embcols = await createDimensinalityReducingWorker(
|
|
49
|
+
const embcols = await createDimensinalityReducingWorker(
|
|
50
|
+
{data: columnData, metric: measure as StringMetrics}, method, cyclesCount);
|
|
71
51
|
|
|
72
52
|
const columns = Array.from(
|
|
73
|
-
embcols as Coordinates,
|
|
74
|
-
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
for (let i = 0; i < sequences.length; ++i) {
|
|
82
|
-
currentSequence = sequences[i];
|
|
83
|
-
mw[i] = currentSequence == null ? 0 : getSequenceMolecularWeight(currentSequence);
|
|
84
|
-
}
|
|
53
|
+
embcols as Coordinates, (v: Float32Array, k) => DG.Column.fromFloat32Array(axesNames[k], v));
|
|
54
|
+
|
|
55
|
+
function _getMW(sequences: string[]) {
|
|
56
|
+
const mw: Float32Array = new Float32Array(sequences.length);
|
|
57
|
+
|
|
58
|
+
mw.map((_, index) => getSequenceMolecularWeight(sequences[index] ?? ''));
|
|
59
|
+
|
|
85
60
|
return mw;
|
|
86
61
|
}
|
|
87
62
|
|
|
88
|
-
columns.push(DG.Column.fromFloat32Array('~MW', _getMW()));
|
|
63
|
+
columns.push(DG.Column.fromFloat32Array('~MW', _getMW(columnData)));
|
|
89
64
|
|
|
90
65
|
const edf = DG.DataFrame.fromColumns(columns);
|
|
91
66
|
|
|
@@ -103,11 +78,13 @@ export async function createPeptideSimilaritySpaceViewer(
|
|
|
103
78
|
table.columns.insert(newCol);
|
|
104
79
|
}
|
|
105
80
|
|
|
106
|
-
const viewerOptions = {
|
|
107
|
-
|
|
81
|
+
const viewerOptions = {
|
|
82
|
+
x: '~X', y: '~Y', color: C.COLUMNS_NAMES.ACTIVITY ?? '~MW', size: '~MW', title: 'Peptide Space',
|
|
83
|
+
showYSelector: false, showXSelector: false, showColorSelector: false, showSizeSelector: false,
|
|
84
|
+
};
|
|
85
|
+
const viewer = table.plot.scatter(viewerOptions);
|
|
108
86
|
|
|
109
|
-
|
|
110
|
-
view.addViewer(viewer);
|
|
87
|
+
view?.dockManager.dock(viewer, DG.DOCK_TYPE.RIGHT, null, 'Peptide Space viewer');
|
|
111
88
|
|
|
112
89
|
pi.close();
|
|
113
90
|
return viewer;
|
|
@@ -141,7 +118,8 @@ export class PeptideSimilaritySpaceWidget {
|
|
|
141
118
|
this.availableMetrics = Measure.getMetricByDataType('String');
|
|
142
119
|
this.method = this.availableMethods[0];
|
|
143
120
|
this.metrics = this.availableMetrics[0];
|
|
144
|
-
|
|
121
|
+
const df = alignedSequencesColumn.dataFrame;
|
|
122
|
+
this.currentDf = df.clone(df.filter);
|
|
145
123
|
this.alignedSequencesColumn = alignedSequencesColumn;
|
|
146
124
|
this.viewer = ui.div([]);
|
|
147
125
|
this.view = view;
|
|
@@ -155,14 +133,7 @@ export class PeptideSimilaritySpaceWidget {
|
|
|
155
133
|
*/
|
|
156
134
|
public async drawViewer(): Promise<DG.Viewer> {
|
|
157
135
|
const viewer = await createPeptideSimilaritySpaceViewer(
|
|
158
|
-
this.currentDf,
|
|
159
|
-
this.alignedSequencesColumn,
|
|
160
|
-
this.method,
|
|
161
|
-
this.metrics,
|
|
162
|
-
this.cycles,
|
|
163
|
-
null,
|
|
164
|
-
null,
|
|
165
|
-
);
|
|
136
|
+
this.currentDf, this.method, this.metrics, this.cycles, undefined, this.alignedSequencesColumn);
|
|
166
137
|
viewer.root.style.width = 'auto';
|
|
167
138
|
return viewer;
|
|
168
139
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
export type SubstitutionCases = {[aar: string]: number[][][]};
|
|
4
|
+
export type SubstitutionTooltips = { [aar: string]: {}[][]; };
|
|
5
|
+
export type DataFrameDict = {[key: string]: DG.DataFrame};
|
|
6
|
+
|
|
7
|
+
export namespace BarChart {
|
|
8
|
+
export type BarPart = {colName : string, aaName : string};
|
|
9
|
+
export type BarStatsObject = {name: string, count: number, selectedCount: number};
|
|
10
|
+
}
|
|
@@ -4,8 +4,10 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import $ from 'cash-dom';
|
|
5
5
|
|
|
6
6
|
import * as logojs from 'logojs-react';
|
|
7
|
-
import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
7
|
+
// import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
8
8
|
import {ChemPalette} from '../utils/chem-palette';
|
|
9
|
+
import {PeptidesController} from '../peptides';
|
|
10
|
+
import * as C from '../utils/constants';
|
|
9
11
|
|
|
10
12
|
/**
|
|
11
13
|
* Logo viewer.
|
|
@@ -33,7 +35,7 @@ export class Logo extends DG.JsViewer {
|
|
|
33
35
|
constructor() {
|
|
34
36
|
super();
|
|
35
37
|
this.initialized = false;
|
|
36
|
-
this.colSemType = this.string('colSemType',
|
|
38
|
+
this.colSemType = this.string('colSemType', C.SEM_TYPES.ALIGNED_SEQUENCE);
|
|
37
39
|
|
|
38
40
|
this.splitted = null;
|
|
39
41
|
this.ppm = [];
|
|
@@ -80,7 +82,7 @@ export class Logo extends DG.JsViewer {
|
|
|
80
82
|
this.initialized = true;
|
|
81
83
|
console.log('INIT');
|
|
82
84
|
this.target = this.dataFrame;
|
|
83
|
-
[this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
85
|
+
[this.splitted] = PeptidesController.splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
84
86
|
this.root.style.width = 'auto';
|
|
85
87
|
this.root.style.height = 'auto';
|
|
86
88
|
this.root.style.maxHeight = '200px';
|
|
@@ -142,8 +144,8 @@ export class Logo extends DG.JsViewer {
|
|
|
142
144
|
.aggregate();
|
|
143
145
|
}
|
|
144
146
|
if (selected)
|
|
145
|
-
[this.splitted] = splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
146
|
-
else [this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
147
|
+
[this.splitted] = PeptidesController.splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
148
|
+
else [this.splitted] = PeptidesController.splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
147
149
|
$(this.root).empty();
|
|
148
150
|
|
|
149
151
|
if (typeof this.dataFrame !== 'undefined')
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import $ from 'cash-dom';
|
|
6
|
+
|
|
7
|
+
import {getSequenceMolecularWeight} from '../utils/molecular-measure';
|
|
8
|
+
import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
|
|
9
|
+
import {
|
|
10
|
+
createDimensinalityReducingWorker,
|
|
11
|
+
} from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
12
|
+
import {StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
13
|
+
import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
|
|
14
|
+
import * as C from '../utils/constants';
|
|
15
|
+
import {PeptidesController} from '../peptides';
|
|
16
|
+
|
|
17
|
+
export class PeptideSpaceViewer extends DG.JsViewer {
|
|
18
|
+
method: string;
|
|
19
|
+
measure: string;
|
|
20
|
+
cyclesCount: number;
|
|
21
|
+
// controller: PeptidesController | null = null;
|
|
22
|
+
customProperties = new Set(['method', 'measure', 'cyclesCount']);
|
|
23
|
+
isEmbeddingCreating: boolean = false;
|
|
24
|
+
|
|
25
|
+
constructor() {
|
|
26
|
+
super();
|
|
27
|
+
|
|
28
|
+
const methodChoices = ['UMAP', 't-SNE', 'SPE', 'pSPE', 'OriginalSPE'];
|
|
29
|
+
this.method = this.addProperty('method', DG.TYPE.STRING, 'UMAP', {choices: methodChoices});
|
|
30
|
+
const measureChoices = ['Levenshtein', 'Jaro-Winkler'];
|
|
31
|
+
this.measure = this.addProperty('measure', DG.TYPE.STRING, 'Levenshtein', {choices: measureChoices});
|
|
32
|
+
this.cyclesCount = this.addProperty('cyclesCount', DG.TYPE.INT, 100);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async onFrameAttached(dataFrame: DG.DataFrame) {
|
|
36
|
+
super.onFrameAttached(dataFrame);
|
|
37
|
+
|
|
38
|
+
// this.controller = await PeptidesController.getInstance(this.dataFrame!);
|
|
39
|
+
|
|
40
|
+
await this.render(this.dataFrame!.temp[C.EMBEDDING_STATUS]);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async onPropertyChanged(property: DG.Property | null) {
|
|
44
|
+
super.onPropertyChanged(property);
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
await this.render(this.customProperties.has(property?.name ?? '') || this.dataFrame!.temp[C.EMBEDDING_STATUS]);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async render(computeData=false) {
|
|
51
|
+
if (computeData && !this.isEmbeddingCreating) {
|
|
52
|
+
this.isEmbeddingCreating = true;
|
|
53
|
+
$(this.root).empty();
|
|
54
|
+
const viewerHost = ui.waitBox(async () => {
|
|
55
|
+
await computeWeights(this.dataFrame!, this.method, this.measure, this.cyclesCount);
|
|
56
|
+
|
|
57
|
+
const viewerOptions = {
|
|
58
|
+
x: '~X', y: '~Y', color: C.COLUMNS_NAMES.ACTIVITY_SCALED ?? '~MW', size: '~MW', title: 'Peptide Space',
|
|
59
|
+
showYSelector: false, showXSelector: false, showColorSelector: false, showSizeSelector: false,
|
|
60
|
+
};
|
|
61
|
+
const viewerRoot = this.dataFrame!.plot.scatter(viewerOptions).root;
|
|
62
|
+
viewerRoot.style.width = 'auto';
|
|
63
|
+
this.isEmbeddingCreating = false;
|
|
64
|
+
viewerHost.style.paddingLeft = 'unset';
|
|
65
|
+
return viewerRoot;
|
|
66
|
+
}) as HTMLDivElement;
|
|
67
|
+
viewerHost.style.paddingLeft = '45%';
|
|
68
|
+
this.root.appendChild(viewerHost);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function computeWeights(
|
|
74
|
+
table: DG.DataFrame, method: string, measure: string, cyclesCount: number, col?: DG.Column,
|
|
75
|
+
): Promise<void> {
|
|
76
|
+
const pi = DG.TaskBarProgressIndicator.create('Creating embedding...');
|
|
77
|
+
try {
|
|
78
|
+
const axesNames = ['~X', '~Y', '~MW'];
|
|
79
|
+
const columnData = (col ?? table.getCol(C.COLUMNS_NAMES.ALIGNED_SEQUENCE)).toList()
|
|
80
|
+
.map((v) => AlignedSequenceEncoder.clean(v));
|
|
81
|
+
|
|
82
|
+
const embcols = await createDimensinalityReducingWorker(
|
|
83
|
+
{data: columnData, metric: measure as StringMetrics}, method, cyclesCount);
|
|
84
|
+
|
|
85
|
+
const columns = Array.from(
|
|
86
|
+
embcols as Coordinates, (v: Float32Array, k) => DG.Column.fromFloat32Array(axesNames[k], v));
|
|
87
|
+
|
|
88
|
+
function _getMW(sequences: string[]) {
|
|
89
|
+
const mw: Float32Array = new Float32Array(sequences.length);
|
|
90
|
+
|
|
91
|
+
mw.map((_, index) => getSequenceMolecularWeight(sequences[index] ?? ''));
|
|
92
|
+
|
|
93
|
+
return mw;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
columns.push(DG.Column.fromFloat32Array('~MW', _getMW(columnData)));
|
|
97
|
+
|
|
98
|
+
const edf = DG.DataFrame.fromColumns(columns);
|
|
99
|
+
|
|
100
|
+
// Add new axes.
|
|
101
|
+
for (const axis of axesNames) {
|
|
102
|
+
const col = table.col(axis);
|
|
103
|
+
const newCol = edf.getCol(axis);
|
|
104
|
+
|
|
105
|
+
// if (col != null) {
|
|
106
|
+
// for (let i = 0; i < newCol.length; ++i) {
|
|
107
|
+
// const v = newCol.get(i);
|
|
108
|
+
// table.set(axis, i, v);
|
|
109
|
+
// }
|
|
110
|
+
// } else
|
|
111
|
+
// table.columns.insert(newCol);
|
|
112
|
+
const columnList = table.columns as DG.ColumnList;
|
|
113
|
+
col !== null ? columnList.replace(col, newCol) : columnList.insert(newCol);
|
|
114
|
+
}
|
|
115
|
+
} catch (error) {
|
|
116
|
+
grok.shell.error('Could not compute embeddings. See console for details.');
|
|
117
|
+
console.error(error);
|
|
118
|
+
} finally {
|
|
119
|
+
pi.close();
|
|
120
|
+
}
|
|
121
|
+
}
|