@datagrok/peptides 0.8.8 → 0.8.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.eslintrc.json +2 -1
  2. package/dist/package-test.js +22626 -0
  3. package/dist/package.js +21429 -0
  4. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8840 -0
  5. package/jest.config.js +33 -0
  6. package/package.json +70 -62
  7. package/src/__jest__/remote.test.ts +50 -0
  8. package/src/__jest__/test-node.ts +96 -0
  9. package/src/model.ts +977 -92
  10. package/src/monomer-library.ts +18 -12
  11. package/src/package-test.ts +6 -5
  12. package/src/package.ts +83 -68
  13. package/src/peptides.ts +298 -142
  14. package/src/styles.css +8 -0
  15. package/src/tests/peptide-space-test.ts +1 -1
  16. package/src/tests/peptides-tests.ts +20 -81
  17. package/src/tests/utils.ts +4 -9
  18. package/src/utils/SAR-multiple-filter.ts +439 -0
  19. package/src/utils/SAR-multiple-selection.ts +177 -0
  20. package/src/utils/cell-renderer.ts +124 -97
  21. package/src/utils/chem-palette.ts +98 -166
  22. package/src/utils/constants.ts +56 -0
  23. package/src/utils/filtering-statistics.ts +62 -0
  24. package/src/utils/multiple-sequence-alignment.ts +33 -2
  25. package/src/utils/multivariate-analysis.ts +79 -0
  26. package/src/utils/peptide-similarity-space.ts +24 -53
  27. package/src/utils/types.ts +10 -0
  28. package/src/viewers/logo-viewer.ts +7 -5
  29. package/src/viewers/peptide-space-viewer.ts +121 -0
  30. package/src/viewers/sar-viewer.ts +118 -342
  31. package/src/viewers/stacked-barchart-viewer.ts +322 -369
  32. package/src/widgets/analyze-peptides.ts +50 -29
  33. package/src/widgets/distribution.ts +61 -0
  34. package/src/widgets/manual-alignment.ts +7 -4
  35. package/src/widgets/multiple-sequence-alignment.ts +9 -0
  36. package/src/widgets/peptide-molecule.ts +8 -6
  37. package/src/widgets/subst-table.ts +73 -0
  38. package/src/workers/dimensionality-reducer.ts +1 -1
  39. package/test-Peptides-414a1874a71a-2f1c6575.html +256 -0
  40. package/src/describe.ts +0 -535
  41. package/src/utils/split-aligned.ts +0 -72
  42. package/src/viewers/subst-viewer.ts +0 -285
package/src/describe.ts DELETED
@@ -1,535 +0,0 @@
1
- import * as ui from 'datagrok-api/ui';
2
- import * as DG from 'datagrok-api/dg';
3
- import {splitAlignedPeptides} from './utils/split-aligned';
4
- import {tTest} from '@datagrok-libraries/statistics/src/tests';
5
- import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
6
- import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
7
- import {ChemPalette} from './utils/chem-palette';
8
- import {setAARRenderer} from './utils/cell-renderer';
9
-
10
- const cp = new ChemPalette('grok');
11
-
12
- export const aarGroups = {
13
- 'R': 'PC',
14
- 'H': 'PC',
15
- 'K': 'PC',
16
- 'D': 'NC',
17
- 'E': 'NC',
18
- 'S': 'U',
19
- 'T': 'U',
20
- 'N': 'U',
21
- 'Q': 'U',
22
- 'C': 'SC',
23
- 'U': 'SC',
24
- 'G': 'SC',
25
- 'P': 'SC',
26
- 'A': 'H',
27
- 'V': 'H',
28
- 'I': 'H',
29
- 'L': 'H',
30
- 'M': 'H',
31
- 'F': 'H',
32
- 'Y': 'H',
33
- 'W': 'H',
34
- '-': '-',
35
- };
36
-
37
- const groupDescription: {[key: string]: {'description': string, 'aminoAcids': string[]}} = {
38
- 'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
39
- 'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
40
- 'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
41
- 'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
42
- 'H': {
43
- 'description': 'Amino Acids with Hydrophobic Side Chain',
44
- 'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
45
- },
46
- '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
47
- };
48
-
49
- /*function customGridColumnHeader(cell: DG.GridCell) {
50
- if (cell.isColHeader && cell.tableColumn != null) {
51
- if (highlightedColumns.includes(parseInt(cell.tableColumn.name))) {
52
- cell.style.backColor = 0xff1f77b4;
53
- }
54
- }
55
- }*/
56
-
57
- function joinDataFrames(
58
- activityColumnScaled: string,
59
- df: DG.DataFrame,
60
- positionColumns: string[],
61
- splitSeqDf: DG.DataFrame,
62
- activityColumn: string,
63
- ) {
64
- if (df.col(activityColumnScaled))
65
- (df.columns as DG.ColumnList).remove(activityColumnScaled);
66
-
67
-
68
- //FIXME: this column usually duplicates, so remove it then
69
- if (df.col(`${activityColumnScaled} (2)`))
70
- (df.columns as DG.ColumnList).remove(`${activityColumnScaled} (2)`);
71
-
72
-
73
- // append splitSeqDf columns to source table and make sure columns are not added more than once
74
- const dfColsSet = new Set(df.columns.names());
75
- if (!positionColumns.every((col: string) => dfColsSet.has(col)))
76
- df.join(splitSeqDf, [activityColumn], [activityColumn], df.columns.names(), positionColumns, 'inner', true);
77
- }
78
-
79
- function sortSourceGrid(sourceGrid: DG.Grid) {
80
- if (sourceGrid) {
81
- const colNames: DG.GridColumn[] = [];
82
- for (let i = 1; i < sourceGrid.columns.length; i++)
83
- colNames.push(sourceGrid.columns.byIndex(i)!);
84
-
85
- colNames.sort((a, b)=>{
86
- if (a.column!.semType == 'aminoAcids') {
87
- if (b.column!.semType == 'aminoAcids')
88
- return 0;
89
- return -1;
90
- }
91
- if (b.column!.semType == 'aminoAcids')
92
- return 1;
93
- return 0;
94
- });
95
- sourceGrid.columns.setOrder(colNames.map((v) => v.name));
96
- }
97
- }
98
-
99
- async function scaleActivity(
100
- activityScaling: string,
101
- activityColumn: string,
102
- activityColumnScaled: string,
103
- sourceGrid: DG.Grid,
104
- splitSeqDf: DG.DataFrame,
105
- ) {
106
- const df = sourceGrid.dataFrame!;
107
- switch (activityScaling) {
108
- case 'lg':
109
- await df.columns.addNewCalculated(activityColumnScaled, 'Log10(${' + activityColumn + '})');
110
- splitSeqDf.columns.add(df.getCol(activityColumnScaled));
111
- sourceGrid.col(activityColumnScaled)!.name = `Log10(${activityColumn})`;
112
- sourceGrid.columns.setOrder([`Log10(${activityColumn})`]);
113
- break;
114
- case '-lg':
115
- await df.columns.addNewCalculated(activityColumnScaled, '-1*Log10(${' + activityColumn + '})');
116
- splitSeqDf.columns.add(df.getCol(activityColumnScaled));
117
- sourceGrid.col(activityColumnScaled)!.name = `-Log10(${activityColumn})`;
118
- sourceGrid.columns.setOrder([`-Log10(${activityColumn})`]);
119
- break;
120
- default:
121
- await df.columns.addNewCalculated(activityColumnScaled, '${' + activityColumn + '}');
122
- splitSeqDf.columns.add(df.getCol(activityColumnScaled));
123
- sourceGrid.col(activityColumnScaled)!.name = `${activityColumn}`;
124
- sourceGrid.columns.setOrder([`${activityColumn}`]);
125
- break;
126
- }
127
- }
128
-
129
- async function calculateStatistics(
130
- matrixDf: DG.DataFrame,
131
- positionColName: string,
132
- aminoAcidResidue: string,
133
- activityColumnScaled: string,
134
- peptidesCount: number,
135
- splitSeqDf: DG.DataFrame,
136
- groupMapping: StringDictionary,
137
- ) {
138
- matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
139
- .add('count', activityColumnScaled, 'Count')
140
- .aggregate();
141
-
142
- const countThreshold = 4;
143
- //@ts-ignore: never gets old
144
- matrixDf.rows.filter((row) => row.Count >= countThreshold && row.Count <= peptidesCount - countThreshold);
145
- matrixDf = matrixDf.clone(matrixDf.filter);
146
-
147
- // calculate additional stats
148
- await matrixDf.columns.addNewCalculated('Ratio', '${count}/'.concat(`${peptidesCount}`));
149
-
150
- //calculate p-values based on t-test
151
- let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
152
- const mdCol: DG.Column = matrixDf.columns.addNewFloat('Mean difference');
153
- const pValCol: DG.Column = matrixDf.columns.addNewFloat('pValue');
154
- for (let i = 0; i < matrixDf.rowCount; i++) {
155
- const position = matrixDf.get(positionColName, i);
156
- const aar = matrixDf.get(aminoAcidResidue, i);
157
-
158
- //@ts-ignore
159
- splitSeqDf.rows.select((row) => groupMapping[row[position]] === aar);
160
- const currentActivity: number[] = splitSeqDf
161
- .clone(splitSeqDf.selection, [activityColumnScaled])
162
- .getCol(activityColumnScaled)
163
- .toList();
164
-
165
- //@ts-ignore
166
- splitSeqDf.rows.select((row) => groupMapping[row[position]] !== aar);
167
- const otherActivity: number[] = splitSeqDf
168
- .clone(splitSeqDf.selection, [activityColumnScaled])
169
- .getCol(activityColumnScaled)
170
- .toList();
171
-
172
- const testResult = tTest(currentActivity, otherActivity);
173
- // testResult = uTest(currentActivity, otherActivity);
174
- const currentMeanDiff = testResult['Mean difference']!;
175
- const pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
176
-
177
- mdCol.set(i, currentMeanDiff);
178
- pvalues[i] = pvalue;
179
- }
180
-
181
- if (true)
182
- pvalues = fdrcorrection(pvalues)[1];
183
-
184
-
185
- for (let i = 0; i < pvalues.length; ++i)
186
- pValCol.set(i, pvalues[i]);
187
-
188
-
189
- return matrixDf.clone();
190
- }
191
-
192
- async function setCategoryOrder(
193
- twoColorMode: boolean, statsDf: DG.DataFrame, aminoAcidResidue: string, matrixDf: DG.DataFrame,
194
- ) {
195
- const sortArgument = twoColorMode ? 'Absolute Mean difference' : 'Mean difference';
196
- if (twoColorMode)
197
- await statsDf.columns.addNewCalculated('Absolute Mean difference', 'Abs(${Mean difference})');
198
-
199
- const aarWeightsDf = statsDf.groupBy([aminoAcidResidue]).sum(sortArgument, 'weight').aggregate();
200
- const aarList = aarWeightsDf.getCol(aminoAcidResidue).toList();
201
- const getWeight = (aar: string) => aarWeightsDf
202
- .groupBy(['weight'])
203
- .where(`${aminoAcidResidue} = ${aar}`)
204
- .aggregate()
205
- .get('weight', 0);
206
- aarList.sort((first, second) => getWeight(second) - getWeight(first));
207
-
208
- matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
209
- }
210
-
211
- function createVerticalTable(
212
- statsDf: DG.DataFrame,
213
- aminoAcidResidue: string,
214
- positionColName: string,
215
- twoColorMode: boolean,
216
- ) {
217
- // TODO: aquire ALL of the positions
218
- let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
219
- .where('pValue <= 0.1')
220
- .aggregate();
221
-
222
- let tempStats: DG.Stats;
223
- const maxAtPos: {[index: string]: number} = {};
224
- for (const pos of sequenceDf.getCol(positionColName).categories) {
225
- tempStats = DG.Stats.fromColumn(
226
- sequenceDf.getCol('Mean difference'),
227
- DG.BitSet.create(sequenceDf.rowCount, (i) => sequenceDf.get(positionColName, i) === pos),
228
- );
229
- maxAtPos[pos] = twoColorMode ?
230
- (tempStats.max > Math.abs(tempStats.min) ? tempStats.max : tempStats.min) : tempStats.max;
231
- }
232
- sequenceDf = sequenceDf.clone(DG.BitSet.create(sequenceDf.rowCount, (i) => {
233
- return sequenceDf.get('Mean difference', i) === maxAtPos[sequenceDf.get(positionColName, i)];
234
- }));
235
-
236
- return sequenceDf;
237
- }
238
-
239
- function createGrids(
240
- matrixDf: DG.DataFrame,
241
- aminoAcidResidue: string,
242
- positionColumns: string[],
243
- sequenceDf: DG.DataFrame,
244
- positionColName: string,
245
- grouping: boolean,
246
- ) {
247
- const sarGrid = matrixDf.plot.grid();
248
- sarGrid.sort([aminoAcidResidue]);
249
- sarGrid.columns.setOrder([aminoAcidResidue].concat(positionColumns));
250
-
251
- const sarVGrid = sequenceDf.plot.grid();
252
- sarVGrid.sort([positionColName]);
253
- sarVGrid.col('pValue')!.format = 'four digits after comma';
254
- sarVGrid.col('pValue')!.name = 'P-Value';
255
-
256
- if (!grouping) {
257
- let tempCol = matrixDf.columns.byName(aminoAcidResidue);
258
- if (tempCol)
259
- setAARRenderer(tempCol, sarGrid);
260
-
261
- tempCol = sequenceDf.columns.byName(aminoAcidResidue);
262
- if (tempCol)
263
- setAARRenderer(tempCol, sarGrid);
264
- }
265
-
266
- return [sarGrid, sarVGrid];
267
- }
268
-
269
- function setCellRendererFunc(
270
- renderColNames: string[],
271
- positionColName: string,
272
- aminoAcidResidue: string,
273
- statsDf: DG.DataFrame,
274
- twoColorMode: boolean,
275
- sarGrid: DG.Grid,
276
- sarVGrid: DG.Grid,
277
- ) {
278
- const mdCol = statsDf.getCol('Mean difference');
279
- const cellRendererFunc = function(args: DG.GridCellRenderArgs) {
280
- args.g.save();
281
- args.g.beginPath();
282
- args.g.rect(args.bounds.x, args.bounds.y, args.bounds.width, args.bounds.height);
283
- args.g.clip();
284
-
285
- if (args.cell.isRowHeader && args.cell.gridColumn.visible) {
286
- args.cell.gridColumn.visible = false;
287
- args.preventDefault();
288
- return;
289
- }
290
-
291
- if (
292
- args.cell.isTableCell &&
293
- args.cell.tableRowIndex !== null &&
294
- args.cell.tableColumn !== null &&
295
- args.cell.cell.value !== null
296
- ) {
297
- if (renderColNames.indexOf(args.cell.tableColumn.name) !== -1) {
298
- const currentPosition = args.cell.tableColumn.name !== 'Mean difference' ?
299
- args.cell.tableColumn.name : args.cell.grid.table.get(positionColName, args.cell.tableRowIndex);
300
- const query =
301
- `${aminoAcidResidue} = ${args.cell.grid.table.get(aminoAcidResidue, args.cell.tableRowIndex)} ` +
302
- `and ${positionColName} = ${currentPosition}`;
303
-
304
- const pVal: number = statsDf.groupBy(['pValue']).where(query).aggregate().get('pValue', 0);
305
-
306
- let coef;
307
- const variant = args.cell.cell.value < 0;
308
- if (pVal < 0.01)
309
- coef = variant && twoColorMode ? '#FF7900' : '#299617';
310
- else if (pVal < 0.05)
311
- coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
312
- else if (pVal < 0.1)
313
- coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
314
- else
315
- coef = DG.Color.toHtml(DG.Color.lightLightGray);
316
-
317
-
318
- const chooseMin = () => twoColorMode ? 0 : mdCol.min;
319
- const chooseMax = () => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
320
- const chooseCurrent = () => twoColorMode ? Math.abs(args.cell.cell.value) : args.cell.cell.value;
321
-
322
- const rCoef = (chooseCurrent() - chooseMin()) / (chooseMax() - chooseMin());
323
-
324
- const maxRadius = 0.9 * (args.bounds.width > args.bounds.height ? args.bounds.height : args.bounds.width) / 2;
325
- const radius = Math.floor(maxRadius * rCoef);
326
-
327
- args.g.beginPath();
328
- args.g.fillStyle = coef;
329
- args.g.arc(
330
- args.bounds.x + args.bounds.width / 2,
331
- args.bounds.y + args.bounds.height / 2,
332
- radius < 3 ? 3 : radius,
333
- 0,
334
- Math.PI * 2,
335
- true,
336
- );
337
- args.g.closePath();
338
-
339
- args.g.fill();
340
- args.preventDefault();
341
- }
342
- }
343
- args.g.restore();
344
- };
345
- sarGrid.onCellRender.subscribe(cellRendererFunc);
346
- sarVGrid.onCellRender.subscribe(cellRendererFunc);
347
- }
348
-
349
- function setTooltipFunc(
350
- renderColNames: string[],
351
- statsDf: DG.DataFrame,
352
- aminoAcidResidue: string,
353
- positionColName: string,
354
- peptidesCount: number,
355
- grouping: boolean,
356
- sarGrid: DG.Grid,
357
- sarVGrid: DG.Grid,
358
- ) {
359
- const onCellTooltipFunc = async function(cell: DG.GridCell, x: number, y: number) {
360
- if (
361
- !cell.isRowHeader &&
362
- !cell.isColHeader &&
363
- cell.tableColumn !== null &&
364
- cell.cell.value !== null &&
365
- cell.tableRowIndex !== null &&
366
- renderColNames.indexOf(cell.tableColumn.name) !== -1
367
- ) {
368
- const tooltipMap: { [index: string]: string } = {};
369
-
370
- for (const col of statsDf.columns.names()) {
371
- if (col !== aminoAcidResidue && col !== positionColName) {
372
- const currentPosition = cell.tableColumn.name !== 'Mean difference' ?
373
- cell.tableColumn.name : cell.grid.table.get(positionColName, cell.tableRowIndex);
374
- const query =
375
- `${aminoAcidResidue} = ${cell.grid.table.get(aminoAcidResidue, cell.tableRowIndex)} ` +
376
- `and ${positionColName} = ${currentPosition}`;
377
- const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
378
- let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
379
-
380
- if (col === 'Count')
381
- text += ` / ${peptidesCount}`;
382
- else if (col === 'pValue')
383
- text = parseFloat(text) !== 0 ? text : '<0.01';
384
-
385
-
386
- tooltipMap[col === 'pValue' ? 'p-value' : col] = text;
387
- }
388
- }
389
-
390
- ui.tooltip.show(ui.tableFromMap(tooltipMap), x, y);
391
- }
392
- if (
393
- !cell.isColHeader &&
394
- cell.tableColumn !== null &&
395
- cell.tableColumn.name == aminoAcidResidue &&
396
- cell.cell.value !== null &&
397
- cell.tableRowIndex !== null
398
- ) {
399
- if (grouping) {
400
- const currentGroup = groupDescription[cell.cell.value];
401
- const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
402
- ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
403
- } else
404
- await cp.showTooltip(cell, x, y);
405
- }
406
- return true;
407
- };
408
- sarGrid.onCellTooltip(onCellTooltipFunc);
409
- sarVGrid.onCellTooltip(onCellTooltipFunc);
410
- }
411
-
412
- function postProcessGrids(
413
- sourceGrid: DG.Grid,
414
- invalidIndexes: number[],
415
- matrixDf: DG.DataFrame,
416
- grouping: boolean,
417
- aminoAcidResidue: string,
418
- sarGrid: DG.Grid,
419
- sarVGrid: DG.Grid,
420
- ) {
421
- sourceGrid.onCellPrepare((cell: DG.GridCell) => {
422
- const currentRowIndex = cell.tableRowIndex;
423
- if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
424
- cell.style.backColor = DG.Color.lightLightGray;
425
- });
426
-
427
- for (const col of matrixDf.columns.names())
428
- sarGrid.col(col)!.width = sarGrid.props.rowHeight;
429
-
430
-
431
- if (grouping) {
432
- sarGrid.col(aminoAcidResidue)!.name = 'Groups';
433
- sarVGrid.col(aminoAcidResidue)!.name = 'Groups';
434
- }
435
-
436
- sarGrid.props.allowEdit = false;
437
- sarVGrid.props.allowEdit = false;
438
-
439
- sarVGrid.col('Mean difference')!.name = 'Diff';
440
- }
441
-
442
- export async function describe(
443
- df: DG.DataFrame,
444
- activityColumn: string,
445
- activityScaling: string,
446
- sourceGrid: DG.Grid,
447
- twoColorMode: boolean,
448
- initialBitset: DG.BitSet | null,
449
- grouping: boolean,
450
- ): Promise<[DG.Grid, DG.Grid, DG.DataFrame, StringDictionary]> {
451
- //Split the aligned sequence into separate AARs
452
- let splitSeqDf: DG.DataFrame | undefined;
453
- let invalidIndexes: number[];
454
- const col: DG.Column = df.columns.bySemType('alignedSequence');
455
- [splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
456
- splitSeqDf.name = 'Split sequence';
457
-
458
- const positionColumns = splitSeqDf.columns.names();
459
- const activityColumnScaled = `${activityColumn}Scaled`;
460
- const renderColNames: string[] = splitSeqDf.columns.names();
461
- const positionColName = 'Position';
462
- const aminoAcidResidue = 'AAR';
463
-
464
- splitSeqDf.columns.add(df.getCol(activityColumn));
465
-
466
- joinDataFrames(activityColumnScaled, df, positionColumns, splitSeqDf, activityColumn);
467
-
468
- for (const col of df.columns) {
469
- if (splitSeqDf.col(col.name) && col.name != activityColumn)
470
- setAARRenderer(col, sourceGrid);
471
- }
472
-
473
- sortSourceGrid(sourceGrid);
474
-
475
- await scaleActivity(activityScaling, activityColumn, activityColumnScaled, sourceGrid, splitSeqDf);
476
- splitSeqDf = splitSeqDf.clone(initialBitset);
477
-
478
- //unpivot a table and handle duplicates
479
- splitSeqDf = splitSeqDf.groupBy(positionColumns)
480
- .add('med', activityColumnScaled, activityColumnScaled)
481
- .aggregate();
482
-
483
- const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
484
-
485
- let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
486
-
487
- //TODO: move to chem palette
488
- let groupMapping: StringDictionary = {};
489
- if (grouping) {
490
- groupMapping = aarGroups;
491
- const aarCol = matrixDf.getCol(aminoAcidResidue);
492
- aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
493
- aarCol.compact();
494
- } else
495
- Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
496
-
497
-
498
- //statistics for specific AAR at a specific position
499
- const statsDf = await calculateStatistics(
500
- matrixDf, positionColName, aminoAcidResidue, activityColumnScaled, peptidesCount, splitSeqDf, groupMapping,
501
- );
502
-
503
- // SAR matrix table
504
- //pivot a table to make it matrix-like
505
- matrixDf = statsDf.groupBy([aminoAcidResidue])
506
- .pivot(positionColName)
507
- .add('first', 'Mean difference', '')
508
- .aggregate();
509
- matrixDf.name = 'SAR';
510
-
511
- // Setting category order
512
- await setCategoryOrder(twoColorMode, statsDf, aminoAcidResidue, matrixDf);
513
-
514
- // SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
515
- const sequenceDf = createVerticalTable(statsDf, aminoAcidResidue, positionColName, twoColorMode);
516
- renderColNames.push('Mean difference');
517
-
518
- const [sarGrid, sarVGrid] = createGrids(
519
- matrixDf, aminoAcidResidue, positionColumns, sequenceDf, positionColName, grouping,
520
- );
521
-
522
- setCellRendererFunc(
523
- renderColNames, positionColName, aminoAcidResidue, statsDf, twoColorMode, sarGrid, sarVGrid,
524
- );
525
-
526
- // show all the statistics in a tooltip over cell
527
- setTooltipFunc(
528
- renderColNames, statsDf, aminoAcidResidue, positionColName, peptidesCount, grouping, sarGrid, sarVGrid,
529
- );
530
-
531
- postProcessGrids(sourceGrid, invalidIndexes, matrixDf, grouping, aminoAcidResidue, sarGrid, sarVGrid);
532
-
533
- //TODO: return class instead
534
- return [sarGrid, sarVGrid, statsDf, groupMapping];
535
- }
@@ -1,72 +0,0 @@
1
- import * as DG from 'datagrok-api/dg';
2
-
3
- /**
4
- * Split aligned sequence string into separate parts containing amino acid residues.
5
- *
6
- * @export
7
- * @param {DG.Column} peptideColumn Column containing aligned sequences.
8
- * @param {boolean} [filter=true] Filter out columns with all the same residues.
9
- * @return {[DG.DataFrame, number[]]} DataFrame containing split sequence and a list of invalid indexes.
10
- */
11
- export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
12
- const splitPeptidesArray: string[][] = [];
13
- let currentSplitPeptide: string[];
14
- let modeMonomerCount = 0;
15
- let currentLength;
16
- const colLength = peptideColumn.length;
17
-
18
- // splitting data
19
- const monomerLengths: {[index: string]: number} = {};
20
- for (let i = 0; i < colLength; i++) {
21
- currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
22
- splitPeptidesArray.push(currentSplitPeptide);
23
- currentLength = currentSplitPeptide.length;
24
- monomerLengths[currentLength + ''] =
25
- monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
26
- }
27
- //@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
28
- modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
29
-
30
- // making sure all of the sequences are of the same size
31
- // and marking invalid sequences
32
- let nTerminal: string;
33
- const invalidIndexes: number[] = [];
34
- let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
35
- modeMonomerCount--; // minus N-terminal
36
- for (let i = 0; i < colLength; i++) {
37
- currentSplitPeptide = splitPeptidesArray[i];
38
- nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
39
- currentLength = currentSplitPeptide.length;
40
- if (currentLength !== modeMonomerCount)
41
- invalidIndexes.push(i);
42
-
43
- for (let j = 0; j < modeMonomerCount; j++)
44
- splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
45
-
46
- splitColumns[modeMonomerCount].push(nTerminal);
47
- }
48
- modeMonomerCount--; // minus C-terminal
49
-
50
- //create column names list
51
- const columnNames = Array.from({length: modeMonomerCount}, (_, index) => `${index + 1 < 10 ? 0 : ''}${index + 1 }`);
52
- columnNames.splice(0, 0, 'N-terminal');
53
- columnNames.push('C-terminal');
54
-
55
- // filter out the columns with the same values
56
- if (filter) {
57
- splitColumns = splitColumns.filter((positionArray, index) => {
58
- const isRetained = new Set(positionArray).size > 1;
59
- if (!isRetained)
60
- columnNames.splice(index, 1);
61
-
62
- return isRetained;
63
- });
64
- }
65
-
66
- return [
67
- DG.DataFrame.fromColumns(splitColumns.map((positionArray, index) => {
68
- return DG.Column.fromList('string', columnNames[index], positionArray);
69
- })),
70
- invalidIndexes,
71
- ];
72
- }