@datagrok/peptides 0.8.9 → 0.8.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -1
- package/dist/package-test.js +22626 -0
- package/dist/package.js +21429 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8840 -0
- package/jest.config.js +33 -0
- package/package.json +75 -62
- package/src/__jest__/remote.test.ts +50 -0
- package/src/__jest__/test-node.ts +96 -0
- package/src/model.ts +950 -86
- package/src/monomer-library.ts +8 -0
- package/src/package-test.ts +3 -2
- package/src/package.ts +57 -22
- package/src/peptides.ts +165 -119
- package/src/styles.css +8 -0
- package/src/tests/peptides-tests.ts +17 -78
- package/src/tests/utils.ts +1 -7
- package/src/utils/SAR-multiple-filter.ts +439 -0
- package/src/utils/SAR-multiple-selection.ts +177 -0
- package/src/utils/cell-renderer.ts +49 -50
- package/src/utils/chem-palette.ts +61 -163
- package/src/utils/constants.ts +56 -0
- package/src/utils/filtering-statistics.ts +62 -0
- package/src/utils/multiple-sequence-alignment.ts +33 -2
- package/src/utils/multivariate-analysis.ts +79 -0
- package/src/utils/peptide-similarity-space.ts +12 -31
- package/src/utils/types.ts +10 -0
- package/src/viewers/logo-viewer.ts +2 -1
- package/src/viewers/peptide-space-viewer.ts +121 -0
- package/src/viewers/sar-viewer.ts +111 -313
- package/src/viewers/stacked-barchart-viewer.ts +126 -173
- package/src/widgets/analyze-peptides.ts +39 -18
- package/src/widgets/distribution.ts +61 -0
- package/src/widgets/manual-alignment.ts +3 -3
- package/src/widgets/peptide-molecule.ts +4 -4
- package/src/widgets/subst-table.ts +30 -22
- package/test-Peptides-f8114def7953-4bf59d70.html +256 -0
- package/src/describe.ts +0 -534
- package/src/utils/split-aligned.ts +0 -72
- package/src/viewers/subst-viewer.ts +0 -320
package/src/describe.ts
DELETED
|
@@ -1,534 +0,0 @@
|
|
|
1
|
-
import * as ui from 'datagrok-api/ui';
|
|
2
|
-
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
// import {splitAlignedPeptides} from './utils/split-aligned';
|
|
4
|
-
import {tTest} from '@datagrok-libraries/statistics/src/tests';
|
|
5
|
-
import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
|
|
6
|
-
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
7
|
-
import {ChemPalette} from './utils/chem-palette';
|
|
8
|
-
import {setAARRenderer} from './utils/cell-renderer';
|
|
9
|
-
import {PeptidesController} from './peptides';
|
|
10
|
-
|
|
11
|
-
const cp = new ChemPalette('grok');
|
|
12
|
-
|
|
13
|
-
export const aarGroups = {
|
|
14
|
-
'R': 'PC',
|
|
15
|
-
'H': 'PC',
|
|
16
|
-
'K': 'PC',
|
|
17
|
-
'D': 'NC',
|
|
18
|
-
'E': 'NC',
|
|
19
|
-
'S': 'U',
|
|
20
|
-
'T': 'U',
|
|
21
|
-
'N': 'U',
|
|
22
|
-
'Q': 'U',
|
|
23
|
-
'C': 'SC',
|
|
24
|
-
'U': 'SC',
|
|
25
|
-
'G': 'SC',
|
|
26
|
-
'P': 'SC',
|
|
27
|
-
'A': 'H',
|
|
28
|
-
'V': 'H',
|
|
29
|
-
'I': 'H',
|
|
30
|
-
'L': 'H',
|
|
31
|
-
'M': 'H',
|
|
32
|
-
'F': 'H',
|
|
33
|
-
'Y': 'H',
|
|
34
|
-
'W': 'H',
|
|
35
|
-
'-': '-',
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
const groupDescription: {[key: string]: {'description': string, 'aminoAcids': string[]}} = {
|
|
39
|
-
'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
|
|
40
|
-
'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
|
|
41
|
-
'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
|
|
42
|
-
'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
|
|
43
|
-
'H': {
|
|
44
|
-
'description': 'Amino Acids with Hydrophobic Side Chain',
|
|
45
|
-
'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
|
|
46
|
-
},
|
|
47
|
-
'-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
/*function customGridColumnHeader(cell: DG.GridCell) {
|
|
51
|
-
if (cell.isColHeader && cell.tableColumn != null) {
|
|
52
|
-
if (highlightedColumns.includes(parseInt(cell.tableColumn.name))) {
|
|
53
|
-
cell.style.backColor = 0xff1f77b4;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
}*/
|
|
57
|
-
|
|
58
|
-
function joinDataFrames(df: DG.DataFrame, positionColumns: string[], splitSeqDf: DG.DataFrame, activityColumn: string) {
|
|
59
|
-
// if (df.col(activityColumnScaled))
|
|
60
|
-
// (df.columns as DG.ColumnList).remove(activityColumnScaled);
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
//FIXME: this column usually duplicates, so remove it then
|
|
64
|
-
// if (df.col(`${activityColumnScaled} (2)`))
|
|
65
|
-
// (df.columns as DG.ColumnList).remove(`${activityColumnScaled} (2)`);
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
// append splitSeqDf columns to source table and make sure columns are not added more than once
|
|
69
|
-
const dfColsSet = new Set((df.columns as DG.ColumnList).names());
|
|
70
|
-
if (!positionColumns.every((col: string) => dfColsSet.has(col))) {
|
|
71
|
-
df.join(
|
|
72
|
-
splitSeqDf, [activityColumn], [activityColumn], (df.columns as DG.ColumnList).names(), positionColumns, 'inner',
|
|
73
|
-
true);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
function sortSourceGrid(sourceGrid: DG.Grid) {
|
|
78
|
-
if (sourceGrid) {
|
|
79
|
-
const colNames: DG.GridColumn[] = [];
|
|
80
|
-
for (let i = 1; i < sourceGrid.columns.length; i++)
|
|
81
|
-
colNames.push(sourceGrid.columns.byIndex(i)!);
|
|
82
|
-
|
|
83
|
-
colNames.sort((a, b)=>{
|
|
84
|
-
if (a.column!.semType == 'aminoAcids') {
|
|
85
|
-
if (b.column!.semType == 'aminoAcids')
|
|
86
|
-
return 0;
|
|
87
|
-
return -1;
|
|
88
|
-
}
|
|
89
|
-
if (b.column!.semType == 'aminoAcids')
|
|
90
|
-
return 1;
|
|
91
|
-
return 0;
|
|
92
|
-
});
|
|
93
|
-
sourceGrid.columns.setOrder(colNames.map((v) => v.name));
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// export async function scaleActivity(
|
|
98
|
-
// activityScaling: string, activityColumn: string, activityColumnScaled: string, df: DG.DataFrame,
|
|
99
|
-
// ): Promise<[DG.DataFrame, string]> {
|
|
100
|
-
// // const df = sourceGrid.dataFrame!;
|
|
101
|
-
// const tempDf = df.clone(null, [activityColumn]);
|
|
102
|
-
|
|
103
|
-
// let formula = '${' + activityColumn + '}';
|
|
104
|
-
// let newColName = activityColumn;
|
|
105
|
-
// switch (activityScaling) {
|
|
106
|
-
// case 'none':
|
|
107
|
-
// break;
|
|
108
|
-
// case 'lg':
|
|
109
|
-
// formula = `Log10(${formula})`;
|
|
110
|
-
// newColName = `Log10(${newColName})`;
|
|
111
|
-
// break;
|
|
112
|
-
// case '-lg':
|
|
113
|
-
// formula = `-1*Log10(${formula})`;
|
|
114
|
-
// newColName = `-Log10(${newColName})`;
|
|
115
|
-
// break;
|
|
116
|
-
// default:
|
|
117
|
-
// throw new Error(`ScalingError: method \`${activityScaling}\` is not available.`);
|
|
118
|
-
// }
|
|
119
|
-
|
|
120
|
-
// await (tempDf.columns as DG.ColumnList).addNewCalculated(activityColumnScaled, formula);
|
|
121
|
-
|
|
122
|
-
// return [tempDf, newColName];
|
|
123
|
-
// }
|
|
124
|
-
|
|
125
|
-
async function calculateStatistics(
|
|
126
|
-
matrixDf: DG.DataFrame,
|
|
127
|
-
positionColName: string,
|
|
128
|
-
aminoAcidResidue: string,
|
|
129
|
-
activityColumnScaled: string,
|
|
130
|
-
peptidesCount: number,
|
|
131
|
-
splitSeqDf: DG.DataFrame,
|
|
132
|
-
groupMapping: StringDictionary,
|
|
133
|
-
) {
|
|
134
|
-
matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
|
|
135
|
-
.add('count', activityColumnScaled, 'Count')
|
|
136
|
-
.aggregate();
|
|
137
|
-
|
|
138
|
-
const countThreshold = 4;
|
|
139
|
-
//@ts-ignore: never gets old
|
|
140
|
-
matrixDf.rows.filter((row) => row.Count >= countThreshold && row.Count <= peptidesCount - countThreshold);
|
|
141
|
-
matrixDf = matrixDf.clone(matrixDf.filter);
|
|
142
|
-
|
|
143
|
-
// calculate additional stats
|
|
144
|
-
await (matrixDf.columns as DG.ColumnList).addNewCalculated('Ratio', '${count}/'.concat(`${peptidesCount}`));
|
|
145
|
-
|
|
146
|
-
//calculate p-values based on t-test
|
|
147
|
-
let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
|
|
148
|
-
const mdCol: DG.Column = (matrixDf.columns as DG.ColumnList).addNewFloat('Mean difference');
|
|
149
|
-
const pValCol: DG.Column = (matrixDf.columns as DG.ColumnList).addNewFloat('pValue');
|
|
150
|
-
for (let i = 0; i < matrixDf.rowCount; i++) {
|
|
151
|
-
const position = matrixDf.get(positionColName, i);
|
|
152
|
-
const aar = matrixDf.get(aminoAcidResidue, i);
|
|
153
|
-
|
|
154
|
-
//@ts-ignore
|
|
155
|
-
splitSeqDf.rows.select((row) => groupMapping[row[position]] === aar);
|
|
156
|
-
const currentActivity: number[] = splitSeqDf
|
|
157
|
-
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
158
|
-
.getCol(activityColumnScaled)
|
|
159
|
-
.toList();
|
|
160
|
-
|
|
161
|
-
//@ts-ignore
|
|
162
|
-
splitSeqDf.rows.select((row) => groupMapping[row[position]] !== aar);
|
|
163
|
-
const otherActivity: number[] = splitSeqDf
|
|
164
|
-
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
165
|
-
.getCol(activityColumnScaled)
|
|
166
|
-
.toList();
|
|
167
|
-
|
|
168
|
-
const testResult = tTest(currentActivity, otherActivity);
|
|
169
|
-
// testResult = uTest(currentActivity, otherActivity);
|
|
170
|
-
const currentMeanDiff = testResult['Mean difference']!;
|
|
171
|
-
const pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
|
|
172
|
-
|
|
173
|
-
mdCol.set(i, currentMeanDiff);
|
|
174
|
-
pvalues[i] = pvalue;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (true)
|
|
178
|
-
pvalues = fdrcorrection(pvalues)[1];
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
for (let i = 0; i < pvalues.length; ++i)
|
|
182
|
-
pValCol.set(i, pvalues[i]);
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
return matrixDf.clone();
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
async function setCategoryOrder(
|
|
189
|
-
twoColorMode: boolean, statsDf: DG.DataFrame, aminoAcidResidue: string, matrixDf: DG.DataFrame,
|
|
190
|
-
) {
|
|
191
|
-
const sortArgument = twoColorMode ? 'Absolute Mean difference' : 'Mean difference';
|
|
192
|
-
if (twoColorMode)
|
|
193
|
-
await (statsDf.columns as DG.ColumnList).addNewCalculated('Absolute Mean difference', 'Abs(${Mean difference})');
|
|
194
|
-
|
|
195
|
-
const aarWeightsDf = statsDf.groupBy([aminoAcidResidue]).sum(sortArgument, 'weight').aggregate();
|
|
196
|
-
const aarList = aarWeightsDf.getCol(aminoAcidResidue).toList();
|
|
197
|
-
const getWeight = (aar: string) => aarWeightsDf
|
|
198
|
-
.groupBy(['weight'])
|
|
199
|
-
.where(`${aminoAcidResidue} = ${aar}`)
|
|
200
|
-
.aggregate()
|
|
201
|
-
.get('weight', 0);
|
|
202
|
-
aarList.sort((first, second) => getWeight(second) - getWeight(first));
|
|
203
|
-
|
|
204
|
-
matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
function createVerticalTable(
|
|
208
|
-
statsDf: DG.DataFrame,
|
|
209
|
-
aminoAcidResidue: string,
|
|
210
|
-
positionColName: string,
|
|
211
|
-
twoColorMode: boolean,
|
|
212
|
-
) {
|
|
213
|
-
// TODO: aquire ALL of the positions
|
|
214
|
-
let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
|
|
215
|
-
.where('pValue <= 0.1')
|
|
216
|
-
.aggregate();
|
|
217
|
-
|
|
218
|
-
let tempStats: DG.Stats;
|
|
219
|
-
const maxAtPos: {[index: string]: number} = {};
|
|
220
|
-
for (const pos of sequenceDf.getCol(positionColName).categories) {
|
|
221
|
-
tempStats = DG.Stats.fromColumn(
|
|
222
|
-
sequenceDf.getCol('Mean difference'),
|
|
223
|
-
DG.BitSet.create(sequenceDf.rowCount, (i) => sequenceDf.get(positionColName, i) === pos),
|
|
224
|
-
);
|
|
225
|
-
maxAtPos[pos] = twoColorMode ?
|
|
226
|
-
(tempStats.max > Math.abs(tempStats.min) ? tempStats.max : tempStats.min) : tempStats.max;
|
|
227
|
-
}
|
|
228
|
-
sequenceDf = sequenceDf.clone(DG.BitSet.create(sequenceDf.rowCount, (i) => {
|
|
229
|
-
return sequenceDf.get('Mean difference', i) === maxAtPos[sequenceDf.get(positionColName, i)];
|
|
230
|
-
}));
|
|
231
|
-
|
|
232
|
-
return sequenceDf;
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
function createGrids(
|
|
236
|
-
matrixDf: DG.DataFrame,
|
|
237
|
-
aminoAcidResidue: string,
|
|
238
|
-
positionColumns: string[],
|
|
239
|
-
sequenceDf: DG.DataFrame,
|
|
240
|
-
positionColName: string,
|
|
241
|
-
grouping: boolean,
|
|
242
|
-
) {
|
|
243
|
-
const sarGrid = matrixDf.plot.grid();
|
|
244
|
-
sarGrid.sort([aminoAcidResidue]);
|
|
245
|
-
sarGrid.columns.setOrder([aminoAcidResidue].concat(positionColumns));
|
|
246
|
-
|
|
247
|
-
const sarVGrid = sequenceDf.plot.grid();
|
|
248
|
-
sarVGrid.sort([positionColName]);
|
|
249
|
-
sarVGrid.col('pValue')!.format = 'four digits after comma';
|
|
250
|
-
sarVGrid.col('pValue')!.name = 'P-Value';
|
|
251
|
-
|
|
252
|
-
if (!grouping) {
|
|
253
|
-
let tempCol = (matrixDf.columns as DG.ColumnList).byName(aminoAcidResidue);
|
|
254
|
-
if (tempCol)
|
|
255
|
-
setAARRenderer(tempCol, sarGrid);
|
|
256
|
-
|
|
257
|
-
tempCol = (sequenceDf.columns as DG.ColumnList).byName(aminoAcidResidue);
|
|
258
|
-
if (tempCol)
|
|
259
|
-
setAARRenderer(tempCol, sarGrid);
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
return [sarGrid, sarVGrid];
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
function setCellRendererFunc(
|
|
266
|
-
renderColNames: string[], positionColName: string, aminoAcidResidue: string, statsDf: DG.DataFrame,
|
|
267
|
-
twoColorMode: boolean, sarGrid: DG.Grid, sarVGrid: DG.Grid) {
|
|
268
|
-
const mdCol = statsDf.getCol('Mean difference');
|
|
269
|
-
const cellRendererFunc = function(args: DG.GridCellRenderArgs) {
|
|
270
|
-
args.g.save();
|
|
271
|
-
args.g.beginPath();
|
|
272
|
-
args.g.rect(args.bounds.x, args.bounds.y, args.bounds.width, args.bounds.height);
|
|
273
|
-
args.g.clip();
|
|
274
|
-
|
|
275
|
-
if (args.cell.isRowHeader && args.cell.gridColumn.visible) {
|
|
276
|
-
args.cell.gridColumn.visible = false;
|
|
277
|
-
args.preventDefault();
|
|
278
|
-
return;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
if (
|
|
282
|
-
args.cell.isTableCell &&
|
|
283
|
-
args.cell.tableRowIndex !== null &&
|
|
284
|
-
args.cell.tableColumn !== null &&
|
|
285
|
-
args.cell.cell.value !== null
|
|
286
|
-
) {
|
|
287
|
-
if (renderColNames.indexOf(args.cell.tableColumn.name) !== -1) {
|
|
288
|
-
const currentPosition = args.cell.tableColumn.name !== 'Mean difference' ?
|
|
289
|
-
args.cell.tableColumn.name : args.cell.grid.table.get(positionColName, args.cell.tableRowIndex);
|
|
290
|
-
const query =
|
|
291
|
-
`${aminoAcidResidue} = ${args.cell.grid.table.get(aminoAcidResidue, args.cell.tableRowIndex)} ` +
|
|
292
|
-
`and ${positionColName} = ${currentPosition}`;
|
|
293
|
-
|
|
294
|
-
const pVal: number = statsDf.groupBy(['pValue']).where(query).aggregate().get('pValue', 0);
|
|
295
|
-
|
|
296
|
-
let coef;
|
|
297
|
-
const variant = args.cell.cell.value < 0;
|
|
298
|
-
if (pVal < 0.01)
|
|
299
|
-
coef = variant && twoColorMode ? '#FF7900' : '#299617';
|
|
300
|
-
else if (pVal < 0.05)
|
|
301
|
-
coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
|
|
302
|
-
else if (pVal < 0.1)
|
|
303
|
-
coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
|
|
304
|
-
else
|
|
305
|
-
coef = DG.Color.toHtml(DG.Color.lightLightGray);
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
const chooseMin = () => twoColorMode ? 0 : mdCol.min;
|
|
309
|
-
const chooseMax = () => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
|
|
310
|
-
const chooseCurrent = () => twoColorMode ? Math.abs(args.cell.cell.value) : args.cell.cell.value;
|
|
311
|
-
|
|
312
|
-
const rCoef = (chooseCurrent() - chooseMin()) / (chooseMax() - chooseMin());
|
|
313
|
-
|
|
314
|
-
const maxRadius = 0.9 * (args.bounds.width > args.bounds.height ? args.bounds.height : args.bounds.width) / 2;
|
|
315
|
-
const radius = Math.floor(maxRadius * rCoef);
|
|
316
|
-
|
|
317
|
-
args.g.beginPath();
|
|
318
|
-
args.g.fillStyle = coef;
|
|
319
|
-
args.g.arc(
|
|
320
|
-
args.bounds.x + args.bounds.width / 2, args.bounds.y + args.bounds.height / 2, radius < 3 ? 3 : radius, 0,
|
|
321
|
-
Math.PI * 2, true,
|
|
322
|
-
);
|
|
323
|
-
args.g.closePath();
|
|
324
|
-
|
|
325
|
-
args.g.fill();
|
|
326
|
-
args.preventDefault();
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
args.g.restore();
|
|
330
|
-
};
|
|
331
|
-
sarGrid.onCellRender.subscribe(cellRendererFunc);
|
|
332
|
-
sarVGrid.onCellRender.subscribe(cellRendererFunc);
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
function setTooltipFunc(
|
|
336
|
-
renderColNames: string[], statsDf: DG.DataFrame, aminoAcidResidue: string, positionColName: string,
|
|
337
|
-
peptidesCount: number, grouping: boolean, sarGrid: DG.Grid, sarVGrid: DG.Grid) {
|
|
338
|
-
const onCellTooltipFunc = async function(cell: DG.GridCell, x: number, y: number) {
|
|
339
|
-
if (
|
|
340
|
-
!cell.isRowHeader && !cell.isColHeader && cell.tableColumn !== null && cell.cell.value !== null &&
|
|
341
|
-
cell.tableRowIndex !== null && renderColNames.indexOf(cell.tableColumn.name) !== -1) {
|
|
342
|
-
const tooltipMap: { [index: string]: string } = {};
|
|
343
|
-
|
|
344
|
-
for (const col of (statsDf.columns as DG.ColumnList).names()) {
|
|
345
|
-
if (col !== aminoAcidResidue && col !== positionColName) {
|
|
346
|
-
const currentPosition = cell.tableColumn.name !== 'Mean difference' ?
|
|
347
|
-
cell.tableColumn.name : cell.grid.table.get(positionColName, cell.tableRowIndex);
|
|
348
|
-
const query =
|
|
349
|
-
`${aminoAcidResidue} = ${cell.grid.table.get(aminoAcidResidue, cell.tableRowIndex)} ` +
|
|
350
|
-
`and ${positionColName} = ${currentPosition}`;
|
|
351
|
-
const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
|
|
352
|
-
let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
|
|
353
|
-
|
|
354
|
-
if (col === 'Count')
|
|
355
|
-
text += ` / ${peptidesCount}`;
|
|
356
|
-
else if (col === 'pValue')
|
|
357
|
-
text = parseFloat(text) !== 0 ? text : '<0.01';
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
tooltipMap[col === 'pValue' ? 'p-value' : col] = text;
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
ui.tooltip.show(ui.tableFromMap(tooltipMap), x, y);
|
|
365
|
-
}
|
|
366
|
-
if (
|
|
367
|
-
!cell.isColHeader &&
|
|
368
|
-
cell.tableColumn !== null &&
|
|
369
|
-
cell.tableColumn.name == aminoAcidResidue &&
|
|
370
|
-
cell.cell.value !== null &&
|
|
371
|
-
cell.tableRowIndex !== null
|
|
372
|
-
) {
|
|
373
|
-
if (grouping) {
|
|
374
|
-
const currentGroup = groupDescription[cell.cell.value];
|
|
375
|
-
const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
|
|
376
|
-
ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
|
|
377
|
-
} else
|
|
378
|
-
await cp.showTooltip(cell, x, y);
|
|
379
|
-
}
|
|
380
|
-
return true;
|
|
381
|
-
};
|
|
382
|
-
sarGrid.onCellTooltip(onCellTooltipFunc);
|
|
383
|
-
sarVGrid.onCellTooltip(onCellTooltipFunc);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
function postProcessGrids(
|
|
387
|
-
sourceGrid: DG.Grid,
|
|
388
|
-
invalidIndexes: number[],
|
|
389
|
-
grouping: boolean,
|
|
390
|
-
aminoAcidResidue: string,
|
|
391
|
-
sarGrid: DG.Grid,
|
|
392
|
-
sarVGrid: DG.Grid,
|
|
393
|
-
) {
|
|
394
|
-
sourceGrid.onCellPrepare((cell: DG.GridCell) => {
|
|
395
|
-
const currentRowIndex = cell.tableRowIndex;
|
|
396
|
-
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
|
|
397
|
-
cell.style.backColor = DG.Color.lightLightGray;
|
|
398
|
-
});
|
|
399
|
-
|
|
400
|
-
const mdCol: DG.GridColumn = sarVGrid.col('Mean difference')!;
|
|
401
|
-
mdCol.name = 'Diff';
|
|
402
|
-
|
|
403
|
-
for (const grid of [sarGrid, sarVGrid]) {
|
|
404
|
-
grid.props.rowHeight = 20;
|
|
405
|
-
grid.columns.rowHeader!.width = 20;
|
|
406
|
-
for (let i = 0; i < grid.columns.length; ++i) {
|
|
407
|
-
const col = grid.columns.byIndex(i)!;
|
|
408
|
-
if (grid == sarVGrid && col.name !== 'Diff' && col.name !== 'AAR')
|
|
409
|
-
col.width = 45;
|
|
410
|
-
else
|
|
411
|
-
col.width = grid.props.rowHeight;
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
if (grouping) {
|
|
416
|
-
sarGrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
417
|
-
sarVGrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
sarGrid.props.allowEdit = false;
|
|
421
|
-
sarVGrid.props.allowEdit = false;
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
export async function describe(
|
|
425
|
-
df: DG.DataFrame,
|
|
426
|
-
activityColumn: string,
|
|
427
|
-
activityScaling: string,
|
|
428
|
-
sourceGrid: DG.Grid,
|
|
429
|
-
twoColorMode: boolean,
|
|
430
|
-
initialBitset: DG.BitSet | null,
|
|
431
|
-
grouping: boolean,
|
|
432
|
-
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, StringDictionary]> {
|
|
433
|
-
//Split the aligned sequence into separate AARs
|
|
434
|
-
let splitSeqDf: DG.DataFrame | undefined;
|
|
435
|
-
let invalidIndexes: number[];
|
|
436
|
-
const col: DG.Column = (df.columns as DG.ColumnList).bySemType('alignedSequence')!;
|
|
437
|
-
[splitSeqDf, invalidIndexes] = PeptidesController.splitAlignedPeptides(col);
|
|
438
|
-
splitSeqDf.name = 'Split sequence';
|
|
439
|
-
|
|
440
|
-
const positionColumns = (splitSeqDf.columns as DG.ColumnList).names();
|
|
441
|
-
const activityColumnScaled = `${activityColumn}Scaled`;
|
|
442
|
-
const renderColNames: string[] = (splitSeqDf.columns as DG.ColumnList).names();
|
|
443
|
-
const positionColName = 'Pos';
|
|
444
|
-
const aminoAcidResidue = 'AAR';
|
|
445
|
-
|
|
446
|
-
(splitSeqDf.columns as DG.ColumnList).add(df.getCol(activityColumn));
|
|
447
|
-
|
|
448
|
-
joinDataFrames(df, positionColumns, splitSeqDf, activityColumn);
|
|
449
|
-
|
|
450
|
-
for (const col of (df.columns as DG.ColumnList)) {
|
|
451
|
-
if (splitSeqDf.col(col.name) && col.name != activityColumn)
|
|
452
|
-
setAARRenderer(col, sourceGrid);
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
sortSourceGrid(sourceGrid);
|
|
456
|
-
|
|
457
|
-
const [scaledDf, newColName] = await PeptidesController.scaleActivity(
|
|
458
|
-
activityScaling, activityColumn, activityColumnScaled, df);
|
|
459
|
-
//TODO: make another func
|
|
460
|
-
const scaledCol = scaledDf.getCol(activityColumnScaled);
|
|
461
|
-
const oldScaledCol = df.getCol(activityColumnScaled);
|
|
462
|
-
const oldScaledColGridName = oldScaledCol.temp['gridName'];
|
|
463
|
-
const oldScaledGridCol = sourceGrid.col(oldScaledColGridName);
|
|
464
|
-
|
|
465
|
-
(splitSeqDf.columns as DG.ColumnList).add(scaledCol);
|
|
466
|
-
(df.columns as DG.ColumnList).replace(oldScaledCol, scaledCol);
|
|
467
|
-
if (newColName === activityColumn)
|
|
468
|
-
sourceGrid.col(activityColumn)!.name = `~${activityColumn}`;
|
|
469
|
-
if (oldScaledGridCol !== null) {
|
|
470
|
-
oldScaledGridCol.name = newColName;
|
|
471
|
-
oldScaledGridCol.visible = true;
|
|
472
|
-
}
|
|
473
|
-
sourceGrid.columns.setOrder([newColName]);
|
|
474
|
-
|
|
475
|
-
splitSeqDf = splitSeqDf.clone(initialBitset);
|
|
476
|
-
|
|
477
|
-
//unpivot a table and handle duplicates
|
|
478
|
-
splitSeqDf = splitSeqDf.groupBy(positionColumns)
|
|
479
|
-
.add('med', activityColumnScaled, activityColumnScaled)
|
|
480
|
-
.aggregate();
|
|
481
|
-
|
|
482
|
-
const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
|
|
483
|
-
|
|
484
|
-
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
485
|
-
|
|
486
|
-
//TODO: move to chem palette
|
|
487
|
-
let groupMapping: StringDictionary = {};
|
|
488
|
-
if (grouping) {
|
|
489
|
-
groupMapping = aarGroups;
|
|
490
|
-
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
491
|
-
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
492
|
-
aarCol.compact();
|
|
493
|
-
} else
|
|
494
|
-
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
//statistics for specific AAR at a specific position
|
|
498
|
-
const statsDf = await calculateStatistics(
|
|
499
|
-
matrixDf, positionColName, aminoAcidResidue, activityColumnScaled, peptidesCount, splitSeqDf, groupMapping,
|
|
500
|
-
);
|
|
501
|
-
|
|
502
|
-
// SAR matrix table
|
|
503
|
-
//pivot a table to make it matrix-like
|
|
504
|
-
matrixDf = statsDf.groupBy([aminoAcidResidue])
|
|
505
|
-
.pivot(positionColName)
|
|
506
|
-
.add('first', 'Mean difference', '')
|
|
507
|
-
.aggregate();
|
|
508
|
-
matrixDf.name = 'SAR';
|
|
509
|
-
|
|
510
|
-
// Setting category order
|
|
511
|
-
await setCategoryOrder(twoColorMode, statsDf, aminoAcidResidue, matrixDf);
|
|
512
|
-
|
|
513
|
-
// SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
|
|
514
|
-
const sequenceDf = createVerticalTable(statsDf, aminoAcidResidue, positionColName, twoColorMode);
|
|
515
|
-
renderColNames.push('Mean difference');
|
|
516
|
-
|
|
517
|
-
const [sarGrid, sarVGrid] = createGrids(
|
|
518
|
-
matrixDf, aminoAcidResidue, positionColumns, sequenceDf, positionColName, grouping,
|
|
519
|
-
);
|
|
520
|
-
|
|
521
|
-
setCellRendererFunc(
|
|
522
|
-
renderColNames, positionColName, aminoAcidResidue, statsDf, twoColorMode, sarGrid, sarVGrid,
|
|
523
|
-
);
|
|
524
|
-
|
|
525
|
-
// show all the statistics in a tooltip over cell
|
|
526
|
-
setTooltipFunc(
|
|
527
|
-
renderColNames, statsDf, aminoAcidResidue, positionColName, peptidesCount, grouping, sarGrid, sarVGrid,
|
|
528
|
-
);
|
|
529
|
-
|
|
530
|
-
postProcessGrids(sourceGrid, invalidIndexes, grouping, aminoAcidResidue, sarGrid, sarVGrid);
|
|
531
|
-
|
|
532
|
-
//TODO: return class instead
|
|
533
|
-
return [sarGrid, sarVGrid, statsDf, groupMapping];
|
|
534
|
-
}
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
// import * as DG from 'datagrok-api/dg';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Split aligned sequence string into separate parts containing amino acid residues.
|
|
5
|
-
*
|
|
6
|
-
* @export
|
|
7
|
-
* @param {DG.Column} peptideColumn Column containing aligned sequences.
|
|
8
|
-
* @param {boolean} [filter=true] Filter out columns with all the same residues.
|
|
9
|
-
* @return {[DG.DataFrame, number[]]} DataFrame containing split sequence and a list of invalid indexes.
|
|
10
|
-
*/
|
|
11
|
-
// export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
|
|
12
|
-
// const splitPeptidesArray: string[][] = [];
|
|
13
|
-
// let currentSplitPeptide: string[];
|
|
14
|
-
// let modeMonomerCount = 0;
|
|
15
|
-
// let currentLength;
|
|
16
|
-
// const colLength = peptideColumn.length;
|
|
17
|
-
|
|
18
|
-
// // splitting data
|
|
19
|
-
// const monomerLengths: {[index: string]: number} = {};
|
|
20
|
-
// for (let i = 0; i < colLength; i++) {
|
|
21
|
-
// currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
|
|
22
|
-
// splitPeptidesArray.push(currentSplitPeptide);
|
|
23
|
-
// currentLength = currentSplitPeptide.length;
|
|
24
|
-
// monomerLengths[currentLength + ''] =
|
|
25
|
-
// monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
|
|
26
|
-
// }
|
|
27
|
-
// //@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
|
|
28
|
-
// modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
|
|
29
|
-
|
|
30
|
-
// // making sure all of the sequences are of the same size
|
|
31
|
-
// // and marking invalid sequences
|
|
32
|
-
// let nTerminal: string;
|
|
33
|
-
// const invalidIndexes: number[] = [];
|
|
34
|
-
// let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
|
|
35
|
-
// modeMonomerCount--; // minus N-terminal
|
|
36
|
-
// for (let i = 0; i < colLength; i++) {
|
|
37
|
-
// currentSplitPeptide = splitPeptidesArray[i];
|
|
38
|
-
// nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
|
|
39
|
-
// currentLength = currentSplitPeptide.length;
|
|
40
|
-
// if (currentLength !== modeMonomerCount)
|
|
41
|
-
// invalidIndexes.push(i);
|
|
42
|
-
|
|
43
|
-
// for (let j = 0; j < modeMonomerCount; j++)
|
|
44
|
-
// splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
|
|
45
|
-
|
|
46
|
-
// splitColumns[modeMonomerCount].push(nTerminal);
|
|
47
|
-
// }
|
|
48
|
-
// modeMonomerCount--; // minus C-terminal
|
|
49
|
-
|
|
50
|
-
// //create column names list
|
|
51
|
-
// const columnNames = Array.from({length: modeMonomerCount}, (, index) => `${index + 1 < 10 ? 0 : ''}${index + 1 }`);
|
|
52
|
-
// columnNames.splice(0, 0, 'N-terminal');
|
|
53
|
-
// columnNames.push('C-terminal');
|
|
54
|
-
|
|
55
|
-
// // filter out the columns with the same values
|
|
56
|
-
// if (filter) {
|
|
57
|
-
// splitColumns = splitColumns.filter((positionArray, index) => {
|
|
58
|
-
// const isRetained = new Set(positionArray).size > 1;
|
|
59
|
-
// if (!isRetained)
|
|
60
|
-
// columnNames.splice(index, 1);
|
|
61
|
-
|
|
62
|
-
// return isRetained;
|
|
63
|
-
// });
|
|
64
|
-
// }
|
|
65
|
-
|
|
66
|
-
// return [
|
|
67
|
-
// DG.DataFrame.fromColumns(splitColumns.map((positionArray, index) => {
|
|
68
|
-
// return DG.Column.fromList('string', columnNames[index], positionArray);
|
|
69
|
-
// })),
|
|
70
|
-
// invalidIndexes,
|
|
71
|
-
// ];
|
|
72
|
-
// }
|