@datagrok/peptides 0.4.3 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +1 -1
- package/package.json +15 -10
- package/src/describe.ts +216 -102
- package/src/{viewers/model.ts → model.ts} +28 -1
- package/src/package.ts +15 -23
- package/src/peptides.ts +60 -6
- package/src/styles.css +46 -0
- package/src/utils/cell-renderer.ts +126 -19
- package/src/utils/chem-palette.ts +317 -206
- package/src/utils/peptide-similarity-space.ts +21 -41
- package/src/utils/split-aligned.ts +8 -0
- package/src/viewers/logo-viewer.ts +48 -0
- package/src/viewers/sar-viewer.ts +263 -170
- package/src/viewers/stacked-barchart-viewer.ts +12 -2
- package/src/viewers/subst-viewer.ts +276 -0
- package/src/widgets/analyze-peptides.ts +26 -6
- package/src/widgets/manual-alignment.ts +14 -6
- package/src/widgets/peptide-molecule.ts +7 -0
- package/src/workers/dimensionality-reducer.ts +2 -2
- package/webpack.config.js +5 -1
- package/src/utils/correlation-analysis.ts +0 -123
package/detectors.js
CHANGED
|
@@ -3,7 +3,7 @@ class PeptidesPackageDetectors extends DG.Package {
|
|
|
3
3
|
//input: column col
|
|
4
4
|
//output: string semType
|
|
5
5
|
detectAligned(col) {
|
|
6
|
-
const regexp = new RegExp(/^([
|
|
6
|
+
const regexp = new RegExp(/^([^\-\n]*-){7,49}(\w|\(|\))+$/);
|
|
7
7
|
return DG.Detector.sampleCategories(col, (s) => regexp.test(s.trim())) ? 'alignedSequence' : null;
|
|
8
8
|
}
|
|
9
9
|
}
|
package/package.json
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/peptides",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@keckelt/tsne": "^1.0.2",
|
|
7
6
|
"cash-dom": "latest",
|
|
8
7
|
"d3": "latest",
|
|
9
|
-
"datagrok-api": ">=0.
|
|
8
|
+
"datagrok-api": ">=0.104.0",
|
|
10
9
|
"dayjs": "latest",
|
|
11
|
-
"jaro-winkler-typescript": "^1.0.1",
|
|
12
10
|
"jstat": "^1.9.5",
|
|
13
11
|
"logojs-react": "^2.1.1",
|
|
14
12
|
"rxjs": "^6.5.5",
|
|
15
|
-
"
|
|
16
|
-
"@datagrok-libraries/utils": ">=0.0.11",
|
|
13
|
+
"@datagrok-libraries/utils": ">=0.0.13",
|
|
17
14
|
"@datagrok-libraries/statistics": ">=0.1.5",
|
|
15
|
+
"@datagrok-libraries/ml": ">=0.0.1",
|
|
16
|
+
"@datagrok-libraries/bio": ">=0.0.4",
|
|
18
17
|
"@types/d3": "^7.0.0",
|
|
19
18
|
"@types/jquery": "^3.5.6"
|
|
20
19
|
},
|
|
21
20
|
"devDependencies": {
|
|
22
21
|
"typescript": "^4.4.4",
|
|
23
22
|
"ts-loader": "^9.2.5",
|
|
23
|
+
"css-loader": "^5.2.4",
|
|
24
|
+
"style-loader": "^2.0.0",
|
|
24
25
|
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
|
25
26
|
"@typescript-eslint/parser": "^4.29.1",
|
|
26
27
|
"eslint": "^7.32.0",
|
|
@@ -34,10 +35,12 @@
|
|
|
34
35
|
"common/ngl_viewer/ngl.js"
|
|
35
36
|
],
|
|
36
37
|
"scripts": {
|
|
38
|
+
"link-api": "npm link datagrok-api",
|
|
37
39
|
"link-utils": "npm link @datagrok-libraries/utils",
|
|
38
40
|
"link-statistics": "npm link @datagrok-libraries/statistics",
|
|
39
|
-
"link-
|
|
40
|
-
"link-
|
|
41
|
+
"link-ml": "npm link @datagrok-libraries/ml",
|
|
42
|
+
"link-bio": "npm link @datagrok-libraries/bio",
|
|
43
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/statistics link @datagrok-libraries/ml @datagrok-libraries/bio",
|
|
41
44
|
"install-dependencies": "npm install",
|
|
42
45
|
"debug-peptides": "grok publish --rebuild",
|
|
43
46
|
"release-peptides": "grok publish --rebuild --release",
|
|
@@ -51,5 +54,7 @@
|
|
|
51
54
|
"release-peptides-local": "grok publish local --rebuild --release",
|
|
52
55
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
53
56
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix"
|
|
54
|
-
}
|
|
55
|
-
|
|
57
|
+
},
|
|
58
|
+
"canEdit": [ "Developers" ],
|
|
59
|
+
"canView": [ "All users" ]
|
|
60
|
+
}
|
package/src/describe.ts
CHANGED
|
@@ -8,7 +8,7 @@ import {setAARRenderer} from './utils/cell-renderer';
|
|
|
8
8
|
|
|
9
9
|
const cp = new ChemPalette('grok');
|
|
10
10
|
|
|
11
|
-
const aarGroups = {
|
|
11
|
+
export const aarGroups = {
|
|
12
12
|
'R': 'PC',
|
|
13
13
|
'H': 'PC',
|
|
14
14
|
'K': 'PC',
|
|
@@ -45,27 +45,21 @@ const groupDescription: {[key: string]: {'description': string, 'aminoAcids': st
|
|
|
45
45
|
'-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
|
|
46
46
|
};
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
/*function customGridColumnHeader(cell: DG.GridCell) {
|
|
49
|
+
if (cell.isColHeader && cell.tableColumn != null) {
|
|
50
|
+
if (highlightedColumns.includes(parseInt(cell.tableColumn.name))) {
|
|
51
|
+
cell.style.backColor = 0xff1f77b4;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}*/
|
|
55
|
+
|
|
56
|
+
function joinDataFrames(
|
|
57
|
+
activityColumnScaled: string,
|
|
49
58
|
df: DG.DataFrame,
|
|
59
|
+
positionColumns: string[],
|
|
60
|
+
splitSeqDf: DG.DataFrame,
|
|
50
61
|
activityColumn: string,
|
|
51
|
-
|
|
52
|
-
sourceGrid: DG.Grid,
|
|
53
|
-
twoColorMode: boolean,
|
|
54
|
-
initialBitset: DG.BitSet | null,
|
|
55
|
-
grouping: boolean,
|
|
56
|
-
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
|
|
57
|
-
//Split the aligned sequence into separate AARs
|
|
58
|
-
let splitSeqDf: DG.DataFrame | undefined;
|
|
59
|
-
let invalidIndexes: number[];
|
|
60
|
-
const col: DG.Column = df.columns.bySemType('alignedSequence');
|
|
61
|
-
[splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
|
|
62
|
-
splitSeqDf.name = 'Split sequence';
|
|
63
|
-
const positionColumns = splitSeqDf.columns.names();
|
|
64
|
-
const activityColumnScaled = `${activityColumn}Scaled`;
|
|
65
|
-
const renderColNames: string[] = splitSeqDf.columns.names();
|
|
66
|
-
|
|
67
|
-
splitSeqDf.columns.add(df.getCol(activityColumn));
|
|
68
|
-
|
|
62
|
+
) {
|
|
69
63
|
if (df.col(activityColumnScaled)) {
|
|
70
64
|
df.columns.remove(activityColumnScaled);
|
|
71
65
|
}
|
|
@@ -80,13 +74,9 @@ export async function describe(
|
|
|
80
74
|
if (!positionColumns.every((col: string) => dfColsSet.has(col))) {
|
|
81
75
|
df.join(splitSeqDf, [activityColumn], [activityColumn], df.columns.names(), positionColumns, 'inner', true);
|
|
82
76
|
}
|
|
77
|
+
}
|
|
83
78
|
|
|
84
|
-
|
|
85
|
-
if (splitSeqDf.col(col.name) && col.name != activityColumn) {
|
|
86
|
-
setAARRenderer(col, sourceGrid);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
79
|
+
function sortSourceGrid(sourceGrid: DG.Grid) {
|
|
90
80
|
if (sourceGrid) {
|
|
91
81
|
const colNames:string[] = [];
|
|
92
82
|
for (let i = 0; i < sourceGrid.columns.length; i++) {
|
|
@@ -106,8 +96,16 @@ export async function describe(
|
|
|
106
96
|
});
|
|
107
97
|
sourceGrid?.columns.setOrder(colNames);
|
|
108
98
|
}
|
|
99
|
+
}
|
|
109
100
|
|
|
110
|
-
|
|
101
|
+
async function scaleActivity(
|
|
102
|
+
activityScaling: string,
|
|
103
|
+
activityColumn: string,
|
|
104
|
+
activityColumnScaled: string,
|
|
105
|
+
sourceGrid: DG.Grid,
|
|
106
|
+
splitSeqDf: DG.DataFrame,
|
|
107
|
+
) {
|
|
108
|
+
const df = sourceGrid.dataFrame!;
|
|
111
109
|
switch (activityScaling) {
|
|
112
110
|
case 'lg':
|
|
113
111
|
await df.columns.addNewCalculated(activityColumnScaled, 'Log10(${' + activityColumn + '})');
|
|
@@ -128,32 +126,17 @@ export async function describe(
|
|
|
128
126
|
sourceGrid.columns.setOrder([`${activityColumn}`]);
|
|
129
127
|
break;
|
|
130
128
|
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
const positionColName = 'Position';
|
|
134
|
-
const aminoAcidResidue = 'AAR';
|
|
135
|
-
|
|
136
|
-
//unpivot a table and handle duplicates
|
|
137
|
-
splitSeqDf = splitSeqDf.groupBy(positionColumns)
|
|
138
|
-
.add('med', activityColumnScaled, activityColumnScaled)
|
|
139
|
-
.aggregate();
|
|
140
|
-
|
|
141
|
-
const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
|
|
142
|
-
|
|
143
|
-
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
144
|
-
|
|
145
|
-
//TODO: move to chem palette
|
|
146
|
-
let groupMapping: {[key: string]: string} = {};
|
|
147
|
-
if (grouping) {
|
|
148
|
-
groupMapping = aarGroups;
|
|
149
|
-
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
150
|
-
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
151
|
-
aarCol.compact();
|
|
152
|
-
} else {
|
|
153
|
-
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
154
|
-
}
|
|
129
|
+
}
|
|
155
130
|
|
|
156
|
-
|
|
131
|
+
async function calculateStatistics(
|
|
132
|
+
matrixDf: DG.DataFrame,
|
|
133
|
+
positionColName: string,
|
|
134
|
+
aminoAcidResidue: string,
|
|
135
|
+
activityColumnScaled: string,
|
|
136
|
+
peptidesCount: number,
|
|
137
|
+
splitSeqDf: DG.DataFrame,
|
|
138
|
+
groupMapping: {[key: string]: string},
|
|
139
|
+
) {
|
|
157
140
|
matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
|
|
158
141
|
.add('count', activityColumnScaled, 'Count')
|
|
159
142
|
.aggregate();
|
|
@@ -167,39 +150,31 @@ export async function describe(
|
|
|
167
150
|
await matrixDf.columns.addNewCalculated('Ratio', '${count}/'.concat(`${peptidesCount}`));
|
|
168
151
|
|
|
169
152
|
//calculate p-values based on t-test
|
|
170
|
-
let position: string;
|
|
171
|
-
let AAR: string;
|
|
172
|
-
let currentActivity: number[];
|
|
173
|
-
let otherActivity: number[];
|
|
174
|
-
let testResult;
|
|
175
|
-
let currentMeanDiff: number;
|
|
176
153
|
let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
|
|
177
|
-
let pvalue = 1.;
|
|
178
|
-
|
|
179
154
|
const mdCol: DG.Column = matrixDf.columns.addNewFloat('Mean difference');
|
|
180
155
|
const pValCol: DG.Column = matrixDf.columns.addNewFloat('pValue');
|
|
181
156
|
for (let i = 0; i < matrixDf.rowCount; i++) {
|
|
182
|
-
position = matrixDf.get(positionColName, i);
|
|
183
|
-
|
|
157
|
+
const position = matrixDf.get(positionColName, i);
|
|
158
|
+
const aar = matrixDf.get(aminoAcidResidue, i);
|
|
184
159
|
|
|
185
160
|
//@ts-ignore
|
|
186
|
-
splitSeqDf.rows.select((row) => groupMapping[row[position]] ===
|
|
187
|
-
currentActivity = splitSeqDf
|
|
161
|
+
splitSeqDf.rows.select((row) => groupMapping[row[position]] === aar);
|
|
162
|
+
const currentActivity: number[] = splitSeqDf
|
|
188
163
|
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
189
164
|
.getCol(activityColumnScaled)
|
|
190
165
|
.toList();
|
|
191
166
|
|
|
192
167
|
//@ts-ignore
|
|
193
|
-
splitSeqDf.rows.select((row) => groupMapping[row[position]] !==
|
|
194
|
-
otherActivity = splitSeqDf
|
|
168
|
+
splitSeqDf.rows.select((row) => groupMapping[row[position]] !== aar);
|
|
169
|
+
const otherActivity: number[] = splitSeqDf
|
|
195
170
|
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
196
171
|
.getCol(activityColumnScaled)
|
|
197
172
|
.toList();
|
|
198
173
|
|
|
199
|
-
testResult = tTest(currentActivity, otherActivity);
|
|
174
|
+
const testResult = tTest(currentActivity, otherActivity);
|
|
200
175
|
// testResult = uTest(currentActivity, otherActivity);
|
|
201
|
-
currentMeanDiff = testResult['Mean difference']!;
|
|
202
|
-
pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
|
|
176
|
+
const currentMeanDiff = testResult['Mean difference']!;
|
|
177
|
+
const pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
|
|
203
178
|
|
|
204
179
|
mdCol.set(i, currentMeanDiff);
|
|
205
180
|
pvalues[i] = pvalue;
|
|
@@ -213,17 +188,12 @@ export async function describe(
|
|
|
213
188
|
pValCol.set(i, pvalues[i]);
|
|
214
189
|
}
|
|
215
190
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
// SAR matrix table
|
|
219
|
-
//pivot a table to make it matrix-like
|
|
220
|
-
matrixDf = matrixDf.groupBy([aminoAcidResidue])
|
|
221
|
-
.pivot(positionColName)
|
|
222
|
-
.add('first', 'Mean difference', '')
|
|
223
|
-
.aggregate();
|
|
224
|
-
matrixDf.name = 'SAR';
|
|
191
|
+
return matrixDf.clone();
|
|
192
|
+
}
|
|
225
193
|
|
|
226
|
-
|
|
194
|
+
async function setCategoryOrder(
|
|
195
|
+
twoColorMode: boolean, statsDf: DG.DataFrame, aminoAcidResidue: string, matrixDf: DG.DataFrame,
|
|
196
|
+
) {
|
|
227
197
|
const sortArgument = twoColorMode ? 'Absolute Mean difference' : 'Mean difference';
|
|
228
198
|
if (twoColorMode) {
|
|
229
199
|
await statsDf.columns.addNewCalculated('Absolute Mean difference', 'Abs(${Mean difference})');
|
|
@@ -238,8 +208,14 @@ export async function describe(
|
|
|
238
208
|
aarList.sort((first, second) => getWeight(second) - getWeight(first));
|
|
239
209
|
|
|
240
210
|
matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
|
|
211
|
+
}
|
|
241
212
|
|
|
242
|
-
|
|
213
|
+
function createVerticalTable(
|
|
214
|
+
statsDf: DG.DataFrame,
|
|
215
|
+
aminoAcidResidue: string,
|
|
216
|
+
positionColName: string,
|
|
217
|
+
twoColorMode: boolean,
|
|
218
|
+
) {
|
|
243
219
|
// TODO: aquire ALL of the positions
|
|
244
220
|
let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
|
|
245
221
|
.where('pValue <= 0.1')
|
|
@@ -258,30 +234,51 @@ export async function describe(
|
|
|
258
234
|
sequenceDf = sequenceDf.clone(DG.BitSet.create(sequenceDf.rowCount, (i) => {
|
|
259
235
|
return sequenceDf.get('Mean difference', i) === maxAtPos[sequenceDf.get(positionColName, i)];
|
|
260
236
|
}));
|
|
261
|
-
renderColNames.push('Mean difference');
|
|
262
237
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
238
|
+
return sequenceDf;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function createGrids(
|
|
242
|
+
matrixDf: DG.DataFrame,
|
|
243
|
+
aminoAcidResidue: string,
|
|
244
|
+
positionColumns: string[],
|
|
245
|
+
sequenceDf: DG.DataFrame,
|
|
246
|
+
positionColName: string,
|
|
247
|
+
grouping: boolean,
|
|
248
|
+
) {
|
|
249
|
+
const sarGrid = matrixDf.plot.grid();
|
|
250
|
+
sarGrid.sort([aminoAcidResidue]);
|
|
251
|
+
sarGrid.columns.setOrder([aminoAcidResidue].concat(positionColumns));
|
|
267
252
|
|
|
268
|
-
const
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
253
|
+
const sarVGrid = sequenceDf.plot.grid();
|
|
254
|
+
sarVGrid.sort([positionColName]);
|
|
255
|
+
sarVGrid.col('pValue')!.format = 'four digits after comma';
|
|
256
|
+
sarVGrid.col('pValue')!.name = 'P-Value';
|
|
272
257
|
|
|
273
258
|
if (!grouping) {
|
|
274
259
|
let tempCol = matrixDf.columns.byName(aminoAcidResidue);
|
|
275
260
|
if (tempCol) {
|
|
276
|
-
setAARRenderer(tempCol,
|
|
261
|
+
setAARRenderer(tempCol, sarGrid);
|
|
277
262
|
}
|
|
278
263
|
tempCol = sequenceDf.columns.byName(aminoAcidResidue);
|
|
279
264
|
if (tempCol) {
|
|
280
|
-
setAARRenderer(tempCol,
|
|
265
|
+
setAARRenderer(tempCol, sarGrid);
|
|
281
266
|
}
|
|
282
267
|
}
|
|
283
268
|
|
|
284
|
-
|
|
269
|
+
return [sarGrid, sarVGrid];
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function setCellRendererFunc(
|
|
273
|
+
renderColNames: string[],
|
|
274
|
+
positionColName: string,
|
|
275
|
+
aminoAcidResidue: string,
|
|
276
|
+
statsDf: DG.DataFrame,
|
|
277
|
+
twoColorMode: boolean,
|
|
278
|
+
sarGrid: DG.Grid,
|
|
279
|
+
sarVGrid: DG.Grid,
|
|
280
|
+
) {
|
|
281
|
+
const mdCol = statsDf.getCol('Mean difference');
|
|
285
282
|
const cellRendererFunc = function(args: DG.GridCellRenderArgs) {
|
|
286
283
|
args.g.save();
|
|
287
284
|
args.g.beginPath();
|
|
@@ -348,10 +345,20 @@ export async function describe(
|
|
|
348
345
|
}
|
|
349
346
|
args.g.restore();
|
|
350
347
|
};
|
|
351
|
-
|
|
352
|
-
|
|
348
|
+
sarGrid.onCellRender.subscribe(cellRendererFunc);
|
|
349
|
+
sarVGrid.onCellRender.subscribe(cellRendererFunc);
|
|
350
|
+
}
|
|
353
351
|
|
|
354
|
-
|
|
352
|
+
function setTooltipFunc(
|
|
353
|
+
renderColNames: string[],
|
|
354
|
+
statsDf: DG.DataFrame,
|
|
355
|
+
aminoAcidResidue: string,
|
|
356
|
+
positionColName: string,
|
|
357
|
+
peptidesCount: number,
|
|
358
|
+
grouping: boolean,
|
|
359
|
+
sarGrid: DG.Grid,
|
|
360
|
+
sarVGrid: DG.Grid,
|
|
361
|
+
) {
|
|
355
362
|
const onCellTooltipFunc = function(cell: DG.GridCell, x: number, y: number) {
|
|
356
363
|
if (
|
|
357
364
|
!cell.isRowHeader &&
|
|
@@ -402,10 +409,20 @@ export async function describe(
|
|
|
402
409
|
}
|
|
403
410
|
return true;
|
|
404
411
|
};
|
|
405
|
-
|
|
406
|
-
|
|
412
|
+
sarGrid.onCellTooltip(onCellTooltipFunc);
|
|
413
|
+
sarVGrid.onCellTooltip(onCellTooltipFunc);
|
|
414
|
+
}
|
|
407
415
|
|
|
408
|
-
|
|
416
|
+
function postProcessGrids(
|
|
417
|
+
sourceGrid: DG.Grid,
|
|
418
|
+
invalidIndexes: number[],
|
|
419
|
+
matrixDf: DG.DataFrame,
|
|
420
|
+
grouping: boolean,
|
|
421
|
+
aminoAcidResidue: string,
|
|
422
|
+
sarGrid: DG.Grid,
|
|
423
|
+
sarVGrid: DG.Grid,
|
|
424
|
+
) {
|
|
425
|
+
sourceGrid.onCellPrepare((cell: DG.GridCell) => {
|
|
409
426
|
const currentRowIndex = cell.tableRowIndex;
|
|
410
427
|
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader) {
|
|
411
428
|
cell.style.backColor = DG.Color.lightLightGray;
|
|
@@ -413,13 +430,110 @@ export async function describe(
|
|
|
413
430
|
});
|
|
414
431
|
|
|
415
432
|
for (const col of matrixDf.columns.names()) {
|
|
416
|
-
|
|
433
|
+
sarGrid.col(col)!.width = sarGrid.props.rowHeight;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (grouping) {
|
|
437
|
+
sarGrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
438
|
+
sarVGrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
sarGrid.props.allowEdit = false;
|
|
442
|
+
sarVGrid.props.allowEdit = false;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
export async function describe(
|
|
446
|
+
df: DG.DataFrame,
|
|
447
|
+
activityColumn: string,
|
|
448
|
+
activityScaling: string,
|
|
449
|
+
sourceGrid: DG.Grid,
|
|
450
|
+
twoColorMode: boolean,
|
|
451
|
+
initialBitset: DG.BitSet | null,
|
|
452
|
+
grouping: boolean,
|
|
453
|
+
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
|
|
454
|
+
//Split the aligned sequence into separate AARs
|
|
455
|
+
let splitSeqDf: DG.DataFrame | undefined;
|
|
456
|
+
let invalidIndexes: number[];
|
|
457
|
+
const col: DG.Column = df.columns.bySemType('alignedSequence');
|
|
458
|
+
[splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
|
|
459
|
+
splitSeqDf.name = 'Split sequence';
|
|
460
|
+
|
|
461
|
+
const positionColumns = splitSeqDf.columns.names();
|
|
462
|
+
const activityColumnScaled = `${activityColumn}Scaled`;
|
|
463
|
+
const renderColNames: string[] = splitSeqDf.columns.names();
|
|
464
|
+
const positionColName = 'Position';
|
|
465
|
+
const aminoAcidResidue = 'AAR';
|
|
466
|
+
|
|
467
|
+
splitSeqDf.columns.add(df.getCol(activityColumn));
|
|
468
|
+
|
|
469
|
+
joinDataFrames(activityColumnScaled, df, positionColumns, splitSeqDf, activityColumn);
|
|
470
|
+
|
|
471
|
+
for (const col of df.columns) {
|
|
472
|
+
if (splitSeqDf.col(col.name) && col.name != activityColumn) {
|
|
473
|
+
setAARRenderer(col, sourceGrid);
|
|
474
|
+
}
|
|
417
475
|
}
|
|
418
476
|
|
|
477
|
+
sortSourceGrid(sourceGrid);
|
|
478
|
+
|
|
479
|
+
await scaleActivity(activityScaling, activityColumn, activityColumnScaled, sourceGrid, splitSeqDf);
|
|
480
|
+
splitSeqDf = splitSeqDf.clone(initialBitset);
|
|
481
|
+
|
|
482
|
+
//unpivot a table and handle duplicates
|
|
483
|
+
splitSeqDf = splitSeqDf.groupBy(positionColumns)
|
|
484
|
+
.add('med', activityColumnScaled, activityColumnScaled)
|
|
485
|
+
.aggregate();
|
|
486
|
+
|
|
487
|
+
const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
|
|
488
|
+
|
|
489
|
+
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
490
|
+
|
|
491
|
+
//TODO: move to chem palette
|
|
492
|
+
let groupMapping: {[key: string]: string} = {};
|
|
419
493
|
if (grouping) {
|
|
420
|
-
|
|
421
|
-
|
|
494
|
+
groupMapping = aarGroups;
|
|
495
|
+
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
496
|
+
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
497
|
+
aarCol.compact();
|
|
498
|
+
} else {
|
|
499
|
+
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
422
500
|
}
|
|
423
501
|
|
|
424
|
-
|
|
502
|
+
//statistics for specific AAR at a specific position
|
|
503
|
+
const statsDf = await calculateStatistics(
|
|
504
|
+
matrixDf, positionColName, aminoAcidResidue, activityColumnScaled, peptidesCount, splitSeqDf, groupMapping,
|
|
505
|
+
);
|
|
506
|
+
|
|
507
|
+
// SAR matrix table
|
|
508
|
+
//pivot a table to make it matrix-like
|
|
509
|
+
matrixDf = statsDf.groupBy([aminoAcidResidue])
|
|
510
|
+
.pivot(positionColName)
|
|
511
|
+
.add('first', 'Mean difference', '')
|
|
512
|
+
.aggregate();
|
|
513
|
+
matrixDf.name = 'SAR';
|
|
514
|
+
|
|
515
|
+
// Setting category order
|
|
516
|
+
await setCategoryOrder(twoColorMode, statsDf, aminoAcidResidue, matrixDf);
|
|
517
|
+
|
|
518
|
+
// SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
|
|
519
|
+
const sequenceDf = createVerticalTable(statsDf, aminoAcidResidue, positionColName, twoColorMode);
|
|
520
|
+
renderColNames.push('Mean difference');
|
|
521
|
+
|
|
522
|
+
const [sarGrid, sarVGrid] = createGrids(
|
|
523
|
+
matrixDf, aminoAcidResidue, positionColumns, sequenceDf, positionColName, grouping,
|
|
524
|
+
);
|
|
525
|
+
|
|
526
|
+
setCellRendererFunc(
|
|
527
|
+
renderColNames, positionColName, aminoAcidResidue, statsDf, twoColorMode, sarGrid, sarVGrid,
|
|
528
|
+
);
|
|
529
|
+
|
|
530
|
+
// show all the statistics in a tooltip over cell
|
|
531
|
+
setTooltipFunc(
|
|
532
|
+
renderColNames, statsDf, aminoAcidResidue, positionColName, peptidesCount, grouping, sarGrid, sarVGrid,
|
|
533
|
+
);
|
|
534
|
+
|
|
535
|
+
postProcessGrids(sourceGrid, invalidIndexes, matrixDf, grouping, aminoAcidResidue, sarGrid, sarVGrid);
|
|
536
|
+
|
|
537
|
+
//TODO: return class instead
|
|
538
|
+
return [sarGrid, sarVGrid, statsDf, groupMapping];
|
|
425
539
|
}
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
|
|
3
|
-
import {describe} from '
|
|
3
|
+
import {describe} from './describe';
|
|
4
4
|
import {Subject} from 'rxjs';
|
|
5
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Model class for SAR viewers that retrieves and stores data.
|
|
8
|
+
*
|
|
9
|
+
* @class SARViewerModel
|
|
10
|
+
*/
|
|
6
11
|
class SARViewerModel {
|
|
7
12
|
private viewerGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
|
|
8
13
|
private viewerVGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
|
|
@@ -21,6 +26,11 @@ class SARViewerModel {
|
|
|
21
26
|
private isUpdating = false;
|
|
22
27
|
grouping: boolean;
|
|
23
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Creates an instance of SARViewerModel.
|
|
31
|
+
*
|
|
32
|
+
* @memberof SARViewerModel
|
|
33
|
+
*/
|
|
24
34
|
constructor() {
|
|
25
35
|
this.dataFrame = null;
|
|
26
36
|
this.activityColumn = null;
|
|
@@ -35,6 +45,18 @@ class SARViewerModel {
|
|
|
35
45
|
this.groupMapping$ = this.groupMapping.asObservable();
|
|
36
46
|
}
|
|
37
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Updates data with using specified parameters.
|
|
50
|
+
*
|
|
51
|
+
* @param {DG.DataFrame} df Working table.
|
|
52
|
+
* @param {string} activityCol Activity column name.
|
|
53
|
+
* @param {string} activityScaling Activity scaling method.
|
|
54
|
+
* @param {DG.Grid} sourceGrid Working table grid.
|
|
55
|
+
* @param {boolean} twoColorMode Bidirectional analysis enabled.
|
|
56
|
+
* @param {(DG.BitSet | null)} initialBitset Initial bitset.
|
|
57
|
+
* @param {boolean} grouping Grouping enabled.
|
|
58
|
+
* @memberof SARViewerModel
|
|
59
|
+
*/
|
|
38
60
|
async updateData(
|
|
39
61
|
df: DG.DataFrame,
|
|
40
62
|
activityCol: string,
|
|
@@ -54,6 +76,11 @@ class SARViewerModel {
|
|
|
54
76
|
await this.updateDefault();
|
|
55
77
|
}
|
|
56
78
|
|
|
79
|
+
/**
|
|
80
|
+
* Update data using current parameters.
|
|
81
|
+
*
|
|
82
|
+
* @memberof SARViewerModel
|
|
83
|
+
*/
|
|
57
84
|
async updateDefault() {
|
|
58
85
|
if (
|
|
59
86
|
this.dataFrame && this.activityColumn && this.activityScaling &&
|
package/src/package.ts
CHANGED
|
@@ -15,7 +15,7 @@ import {PeptideSimilaritySpaceWidget} from './utils/peptide-similarity-space';
|
|
|
15
15
|
import {manualAlignmentWidget} from './widgets/manual-alignment';
|
|
16
16
|
import {SARViewer, SARViewerVertical} from './viewers/sar-viewer';
|
|
17
17
|
import {peptideMoleculeWidget} from './widgets/peptide-molecule';
|
|
18
|
-
import {
|
|
18
|
+
import {SubstViewer} from './viewers/subst-viewer';
|
|
19
19
|
|
|
20
20
|
export const _package = new DG.Package();
|
|
21
21
|
let tableGrid: DG.Grid;
|
|
@@ -63,25 +63,18 @@ export function Peptides() {
|
|
|
63
63
|
'Use and analyse peptide sequence data to support your research:',
|
|
64
64
|
);
|
|
65
65
|
|
|
66
|
-
const annotationViewerDiv = ui.div();
|
|
67
|
-
|
|
68
66
|
const windows = grok.shell.windows;
|
|
69
67
|
windows.showToolbox = false;
|
|
70
68
|
windows.showHelp = false;
|
|
71
69
|
windows.showProperties = false;
|
|
72
70
|
|
|
73
|
-
const mainDiv = ui.div();
|
|
74
71
|
grok.shell.newView('Peptides', [
|
|
75
72
|
appDescription,
|
|
76
73
|
ui.info([textLink]),
|
|
77
|
-
ui.
|
|
78
|
-
ui.
|
|
79
|
-
|
|
80
|
-
ui.button('Open complex case demo', () => main('aligned_2.csv'), ''),
|
|
81
|
-
]),
|
|
82
|
-
ui.block75([annotationViewerDiv]),
|
|
74
|
+
ui.divH([
|
|
75
|
+
ui.button('Open peptide sequences demonstration set', () => main('aligned.csv'), ''),
|
|
76
|
+
ui.button('Open complex case demo', () => main('aligned_2.csv'), ''),
|
|
83
77
|
]),
|
|
84
|
-
mainDiv,
|
|
85
78
|
]);
|
|
86
79
|
}
|
|
87
80
|
|
|
@@ -90,6 +83,9 @@ export function Peptides() {
|
|
|
90
83
|
//input: column col {semType: alignedSequence}
|
|
91
84
|
//output: widget result
|
|
92
85
|
export async function peptidesPanel(col: DG.Column): Promise<DG.Widget> {
|
|
86
|
+
if (col.getTag('isAnalysisApplicable') === 'false') {
|
|
87
|
+
return new DG.Widget(ui.divText('Analysis is not applicable'));
|
|
88
|
+
}
|
|
93
89
|
view = (grok.shell.v as DG.TableView);
|
|
94
90
|
tableGrid = view.grid;
|
|
95
91
|
currentDf = col.dataFrame;
|
|
@@ -113,6 +109,14 @@ export function sarVertical(): SARViewerVertical {
|
|
|
113
109
|
return new SARViewerVertical();
|
|
114
110
|
}
|
|
115
111
|
|
|
112
|
+
//name: substitution-analysis-viewer
|
|
113
|
+
//description: Substitution Analysis Viewer
|
|
114
|
+
//tags: viewer
|
|
115
|
+
//output: viewer result
|
|
116
|
+
export function subst(): SubstViewer {
|
|
117
|
+
return new SubstViewer();
|
|
118
|
+
}
|
|
119
|
+
|
|
116
120
|
//name: StackedBarchart Widget
|
|
117
121
|
//tags: panel, widgets
|
|
118
122
|
//input: column col {semType: aminoAcids}
|
|
@@ -177,15 +181,3 @@ export async function peptideSpacePanel(col: DG.Column): Promise<DG.Widget> {
|
|
|
177
181
|
const widget = new PeptideSimilaritySpaceWidget(col, view ?? grok.shell.v);
|
|
178
182
|
return await widget.draw();
|
|
179
183
|
}
|
|
180
|
-
|
|
181
|
-
//name: Correllation analysis
|
|
182
|
-
export async function correlationAnalysis() {
|
|
183
|
-
view = (grok.shell.v as DG.TableView);
|
|
184
|
-
|
|
185
|
-
const df = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
|
|
186
|
-
const tview = grok.shell.addTableView(df);
|
|
187
|
-
const [cpviewer, bpviewer] = correlationAnalysisPlots(df.getCol('AlignedSequence'));
|
|
188
|
-
|
|
189
|
-
tview.dockManager.dock(cpviewer, 'right');
|
|
190
|
-
tview.dockManager.dock(bpviewer, 'down');
|
|
191
|
-
}
|