@datagrok/peptides 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/package-test.js +9821 -5237
  2. package/dist/package.js +9737 -4734
  3. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +450 -360
  4. package/files/aligned.csv +648 -648
  5. package/files/aligned_2.csv +541 -10275
  6. package/files/aligned_3.csv +335 -0
  7. package/helm/JSDraw/Pistoia.HELM.js +27 -0
  8. package/package.json +24 -16
  9. package/src/__jest__/remote.test.ts +33 -15
  10. package/src/__jest__/test-node.ts +3 -2
  11. package/src/model.ts +416 -521
  12. package/src/package-test.ts +0 -2
  13. package/src/package.ts +7 -126
  14. package/src/tests/core.ts +60 -18
  15. package/src/tests/peptide-space-test.ts +7 -7
  16. package/src/tests/utils.ts +3 -19
  17. package/src/utils/cell-renderer.ts +140 -262
  18. package/src/utils/constants.ts +7 -4
  19. package/src/utils/filtering-statistics.ts +21 -53
  20. package/src/utils/misc.ts +80 -16
  21. package/src/utils/peptide-similarity-space.ts +1 -1
  22. package/src/utils/types.ts +7 -5
  23. package/src/viewers/peptide-space-viewer.ts +18 -20
  24. package/src/viewers/sar-viewer.ts +33 -22
  25. package/src/widgets/analyze-peptides.ts +34 -10
  26. package/src/widgets/distribution.ts +169 -60
  27. package/src/widgets/manual-alignment.ts +5 -4
  28. package/src/widgets/subst-table.ts +6 -2
  29. package/{test-Peptides-69a4761f6044-40ac3a0c.html → test-Peptides-eb4783c07294-f4162403.html} +43 -22
  30. package/detectors.js +0 -9
  31. package/src/monomer-library.ts +0 -193
  32. package/src/tests/msa-tests.ts +0 -27
  33. package/src/utils/chem-palette.ts +0 -280
  34. package/src/utils/multiple-sequence-alignment.ts +0 -106
  35. package/src/utils/multivariate-analysis.ts +0 -76
  36. package/src/viewers/stacked-barchart-viewer.ts +0 -339
  37. package/src/widgets/multiple-sequence-alignment.ts +0 -9
  38. package/src/widgets/peptide-molecule.ts +0 -82
@@ -1,8 +1,9 @@
1
- import {ChemPalette} from './chem-palette';
1
+ import {SeqPaletteBase} from '@datagrok-libraries/bio/src/seq-palettes';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
4
  import * as C from './constants';
5
- import {getSeparator} from './misc';
5
+ import {getPalleteByType} from './misc';
6
+ import * as types from './types';
6
7
 
7
8
  /**
8
9
  * A function to expand column size based on its contents.
@@ -16,10 +17,8 @@ import {getSeparator} from './misc';
16
17
  * @param {number} [maxSize=650] Maximum column width.
17
18
  * @param {number} [timeout=500] Timeout value.
18
19
  */
19
- export function expandColumn(
20
- col: DG.Column, grid: DG.Grid, cellRenderSize: (cellVal: string) => number,
21
- textSizeMult = 10, minSize = 30, maxSize = 650, timeout = 500,
22
- ): void {
20
+ export function expandColumn(col: DG.Column, grid: DG.Grid, cellRenderSize: (cellVal: string) => number,
21
+ textSizeMult = 10, minSize = 30, maxSize = 650, timeout = 500): void {
23
22
  let maxLen = 0;
24
23
  col.categories.forEach((ent: string) => {
25
24
  const len = cellRenderSize(ent);
@@ -40,11 +39,10 @@ export function expandColumn(
40
39
  * @param {(DG.Grid | null)} [grid=null] Grid that contains the col column.
41
40
  * @param {boolean} [grouping=false] Is grouping enabled.
42
41
  */
43
- export function setAARRenderer(col: DG.Column, grid: DG.Grid | null = null, grouping = false): void {
44
- col.semType = C.SEM_TYPES.AMINO_ACIDS;
45
- col.setTag('cell.renderer', C.SEM_TYPES.AMINO_ACIDS);
46
- if (grouping)
47
- col.setTag('groups', `${grouping}`);
42
+ export function setAARRenderer(col: DG.Column, alphabet: string, grid?: DG.Grid): void {
43
+ col.semType = C.SEM_TYPES.MONOMER;
44
+ col.setTag('cell.renderer', C.SEM_TYPES.MONOMER);
45
+ col.tags[C.TAGS.ALPHABET] = alphabet;
48
46
 
49
47
  if (grid)
50
48
  expandColumn(col, grid, (ent) => measureAAR(ent));
@@ -63,266 +61,146 @@ export function measureAAR(s: string): number {
63
61
  return end == beg ? s.length : s.length - (end - beg) + 1;
64
62
  }
65
63
 
66
- /**
67
- * A function that prints a string aligned to left or centered.
68
- *
69
- * @param {number} x x coordinate.
70
- * @param {number} y y coordinate.
71
- * @param {number} w Width.
72
- * @param {number} h Height.
73
- * @param {CanvasRenderingContext2D} g Canvas rendering context.
74
- * @param {string} s String to print.
75
- * @param {string} [color=ChemPalette.undefinedColor] String color.
76
- * @param {number} [pivot=0] Pirvot.
77
- * @param {boolean} [left=false] Is left aligned.
78
- * @param {boolean} [hideMod=false] Hide amino acid redidue modifications.
79
- * @param {number} [transparencyRate=0.0] Transparency rate where 1.0 is fully transparent
80
- * @return {number} x coordinate to start printing at.
81
- */
82
- function printLeftOrCentered(
83
- x: number, y: number, w: number, h: number,
84
- g: CanvasRenderingContext2D, s: string, color = ChemPalette.undefinedColor,
85
- pivot: number = 0, left = false, hideMod = false, transparencyRate: number = 1.0,
86
- ): number {
87
- g.textAlign = 'start';
88
- let colorPart = pivot == -1 ? s.substring(0) : s.substring(0, pivot);
89
- if (colorPart.length == 1)
90
- colorPart = colorPart.toUpperCase();
91
-
92
- if (colorPart.length >= 3) {
93
- if (colorPart.substring(0, 3) in ChemPalette.AAFullNames)
94
- colorPart = ChemPalette.AAFullNames[s.substring(0, 3)] + colorPart.substring(3);
95
- else if (colorPart.substring(1, 4) in ChemPalette.AAFullNames)
96
- colorPart = colorPart[0] + ChemPalette.AAFullNames[s.substring(1, 4)] + colorPart.substring(4);
97
- }
98
- let grayPart = pivot == -1 ? '' : s.substring(pivot);
99
- if (hideMod) {
100
- let end = colorPart.lastIndexOf(')');
101
- let beg = colorPart.indexOf('(');
102
- if (beg > -1 && end > -1 && end - beg > 2)
103
- colorPart = colorPart.substring(0, beg) + '(+)' + colorPart.substring(end + 1);
104
-
105
-
106
- end = grayPart.lastIndexOf(')');
107
- beg = grayPart.indexOf('(');
108
- if (beg > -1 && end > -1 && end - beg > 2)
109
- grayPart = grayPart.substring(0, beg) + '(+)' + grayPart.substring(end + 1);
110
- }
111
- const textSize = g.measureText(colorPart + grayPart);
112
- const indent = 5;
113
-
114
- const colorTextSize = g.measureText(colorPart);
115
- const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;
116
-
117
- function draw(dx1: number, dx2: number): void {
118
- g.fillStyle = color;
119
- g.globalAlpha = transparencyRate;
120
- g.fillText(colorPart, x + dx1, y + dy);
121
- g.fillStyle = ChemPalette.undefinedColor;
122
- g.fillText(grayPart, x + dx2, y + dy);
123
- }
124
-
125
-
126
- if (left || textSize.width > w) {
127
- draw(indent, indent + colorTextSize.width);
128
- return x + colorTextSize.width + g.measureText(grayPart).width;
129
- } else {
130
- const dx = (w - textSize.width) / 2;
131
- draw(dx, dx + colorTextSize.width);
132
- return x + dx + colorTextSize.width;
64
+ export function renderSARCell(canvasContext: CanvasRenderingContext2D, currentAAR: string, currentPosition: string,
65
+ statsDf: DG.DataFrame, twoColorMode: boolean, mdCol: DG.Column<number>, bound: DG.Rect, cellValue: number,
66
+ currentSelection: types.SelectionObject, substitutionsInfo: types.SubstitutionsInfo | null): void {
67
+ const queryAAR = `${C.COLUMNS_NAMES.AMINO_ACID_RESIDUE} = ${currentAAR}`;
68
+ const query = `${queryAAR} and ${C.COLUMNS_NAMES.POSITION} = ${currentPosition}`;
69
+ const pVal: number = statsDf
70
+ .groupBy([C.COLUMNS_NAMES.P_VALUE])
71
+ .where(query)
72
+ .aggregate()
73
+ .get(C.COLUMNS_NAMES.P_VALUE, 0);
74
+
75
+ let coef: string;
76
+ const variant = cellValue < 0;
77
+ if (pVal < 0.01)
78
+ coef = variant && twoColorMode ? '#FF7900' : '#299617';
79
+ else if (pVal < 0.05)
80
+ coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
81
+ else if (pVal < 0.1)
82
+ coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
83
+ else
84
+ coef = DG.Color.toHtml(DG.Color.lightLightGray);
85
+
86
+
87
+ const chooseMin = (): number => twoColorMode ? 0 : mdCol.min;
88
+ const chooseMax = (): number => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
89
+ const chooseCurrent = (): any => twoColorMode ? Math.abs(cellValue) : cellValue;
90
+
91
+ const rCoef = (chooseCurrent() - chooseMin()) / (chooseMax() - chooseMin());
92
+
93
+ const maxRadius = 0.9 * (bound.width > bound.height ? bound.height : bound.width) / 2;
94
+ const radius = Math.floor(maxRadius * rCoef);
95
+
96
+ const midX = bound.x + bound.width / 2;
97
+ const midY = bound.y + bound.height / 2;
98
+ canvasContext.beginPath();
99
+ canvasContext.fillStyle = coef;
100
+ canvasContext.arc(midX, midY, radius < 3 ? 3 : radius, 0, Math.PI * 2, true);
101
+ canvasContext.closePath();
102
+
103
+ canvasContext.fill();
104
+ if (substitutionsInfo) {
105
+ canvasContext.textBaseline = 'middle';
106
+ canvasContext.textAlign = 'center';
107
+ canvasContext.fillStyle = DG.Color.toHtml(DG.Color.getContrastColor(DG.Color.fromHtml(coef)));
108
+ canvasContext.font = '13px Roboto, Roboto Local, sans-serif';
109
+ let substValue = 0;
110
+ substitutionsInfo.get(currentAAR)?.get(currentPosition)?.forEach((idxs) => substValue += idxs.length);
111
+ if (substValue && substValue != 0)
112
+ canvasContext.fillText(substValue.toString(), midX, midY);
133
113
  }
134
- }
135
-
136
- export class AminoAcidsCellRenderer extends DG.GridCellRenderer {
137
- chemPalette: ChemPalette | null;
138
-
139
- get name(): string {return 'aminoAcidsCR';}
140
-
141
- get cellType(): string {return C.SEM_TYPES.AMINO_ACIDS;}
142
-
143
- get defaultHeight(): number {return 15;}
144
-
145
- get defaultWidth(): number {return 30;}
146
-
147
- constructor() {
148
- super();
149
- this.chemPalette = null;
150
- }
151
-
152
- /**
153
- * Cell renderer function.
154
- *
155
- * @param {CanvasRenderingContext2D} g Canvas rendering context.
156
- * @param {number} x x coordinate on the canvas.
157
- * @param {number} y y coordinate on the canvas.
158
- * @param {number} w width of the cell.
159
- * @param {number} h height of the cell.
160
- * @param {DG.GridCell} gridCell Grid cell.
161
- * @param {DG.GridCellStyle} cellStyle Cell style.
162
- */
163
- render(
164
- g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
165
- cellStyle: DG.GridCellStyle): void {
166
- y -= 2;
167
- g.save();
168
- g.beginPath();
169
- g.rect(x, y, w, h);
170
- g.clip();
171
- g.font = `12px monospace`;
172
- g.textBaseline = 'top';
173
- const s: string = gridCell.cell.value ? gridCell.cell.value : '-';
174
- let [color, outerS, innerS, pivot] = ChemPalette.getColorAAPivot(s);
175
- if (innerS)
176
- outerS = s;
177
114
 
178
- printLeftOrCentered(x, y, w, h, g, outerS, color, pivot, false, true);
179
- g.restore();
115
+ //TODO: frame based on currentSelection
116
+ const aarSelection = currentSelection[currentPosition];
117
+ if (aarSelection && aarSelection.includes(currentAAR)) {
118
+ canvasContext.strokeStyle = '#000';
119
+ canvasContext.lineWidth = 1;
120
+ canvasContext.strokeRect(bound.x + 1, bound.y + 1, bound.width - 1, bound.height - 1);
180
121
  }
181
122
  }
182
123
 
183
- export class AlignedSequenceCellRenderer extends DG.GridCellRenderer {
184
- get name(): string {return 'alignedSequenceCR';}
185
-
186
- get cellType(): string {return C.SEM_TYPES.ALIGNED_SEQUENCE;}
187
-
188
- get defaultHeight(): number {return 30;}
189
-
190
- get defaultWidth(): number {return 230;}
191
-
192
- /**
193
- * Cell renderer function.
194
- *
195
- * @param {CanvasRenderingContext2D} g Canvas rendering context.
196
- * @param {number} x x coordinate on the canvas.
197
- * @param {number} y y coordinate on the canvas.
198
- * @param {number} w width of the cell.
199
- * @param {number} h height of the cell.
200
- * @param {DG.GridCell} gridCell Grid cell.
201
- * @param {DG.GridCellStyle} cellStyle Cell style.
202
- * @memberof AlignedSequenceCellRenderer
203
- */
204
- render(
205
- g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
206
- cellStyle: DG.GridCellStyle,
207
- ): void {
208
- const grid = gridCell.grid;
209
- const cell = gridCell.cell;
210
- w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
211
- g.save();
212
- g.beginPath();
213
- g.rect(x, y, w, h);
214
- g.clip();
215
- g.font = '12px monospace';
216
- g.textBaseline = 'top';
217
- const s: string = cell.value ?? '';
124
+ export function renderBarchart(ctx: CanvasRenderingContext2D, col: DG.Column, monomerColStats: types.MonomerColStats,
125
+ bounds: DG.Rect, max: number): types.BarCoordinates {
126
+ let sum = col.length - (monomerColStats['-']?.count ?? 0);
127
+ const colorPalette = getPalleteByType(col.tags[C.TAGS.ALPHABET]);
128
+ const name = col.name;
129
+ const colNameSize = ctx.measureText(name);
130
+ const margin = 0.2;
131
+ const innerMargin = 0.02;
132
+ const selectLineRatio = 0.1;
133
+ const fontSize = 11;
134
+
135
+ const xMargin = bounds.x + bounds.width * margin;
136
+ const yMargin = bounds.y + bounds.height * margin / 4;
137
+ const wMargin = bounds.width - bounds.width * margin * 2;
138
+ const hMargin = bounds.height - bounds.height * margin;
139
+ const barWidth = 10;
140
+ ctx.fillStyle = 'black';
141
+ ctx.textBaseline = 'top';
142
+ ctx.font = `${hMargin * margin / 2}px`;
143
+ ctx.fillText(name, xMargin + (wMargin - colNameSize.width) / 2, yMargin + hMargin + hMargin * margin / 4);
144
+
145
+
146
+ const barCoordinates: types.BarCoordinates = {};
147
+
148
+ const xStart = xMargin + (wMargin - barWidth) / 2;
149
+ for (const [monomer, monomerStats] of Object.entries(monomerColStats)) {
150
+ if (monomer == '-')
151
+ continue;
152
+
153
+ const count = monomerStats.count;
154
+ const sBarHeight = hMargin * count / max;
155
+ const gapSize = sBarHeight * innerMargin;
156
+ const verticalShift = (max - sum) / max;
157
+ const textSize = ctx.measureText(monomer);
158
+ const subBarHeight = sBarHeight - gapSize;
159
+ const yStart = yMargin + hMargin * verticalShift + gapSize / 2;
160
+ barCoordinates[monomer] = new DG.Rect(xStart, yStart, barWidth, subBarHeight);
161
+
162
+ const color = colorPalette.get(monomer);
163
+ ctx.strokeStyle = color;
164
+ ctx.fillStyle = color;
165
+
166
+ if (textSize.width <= subBarHeight) {
167
+ if (color != SeqPaletteBase.undefinedColor)
168
+ ctx.fillRect(xStart, yStart, barWidth, subBarHeight);
169
+ else {
170
+ ctx.strokeRect(xStart + 0.5, yStart, barWidth - 1, subBarHeight);
171
+ barCoordinates[monomer].x -= 0.5;
172
+ barCoordinates[monomer].width -= 1;
173
+ }
218
174
 
219
- //TODO: can this be replaced/merged with splitSequence?
220
- const subParts = s.split(getSeparator(cell.column));
221
- const [text, simplified] = processSequence(subParts);
222
- const textSize = g.measureText(text.join(''));
223
- let x1 = Math.max(x, x + (w - textSize.width) / 2);
175
+ const leftMargin = (wMargin - (monomer.length > 1 ? fontSize : textSize.width - 8)) / 2;
176
+ const absX = xMargin + leftMargin;
177
+ const absY = yStart + subBarHeight / 2 + (monomer.length == 1 ? 4 : 0);
178
+ const origTransform = ctx.getTransform();
224
179
 
225
- subParts.forEach((amino, index) => {
226
- let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
227
- g.fillStyle = ChemPalette.undefinedColor;
228
- if (index + 1 < subParts.length) {
229
- const gap = simplified ? '' : ' ';
230
- outerAmino += `${outerAmino ? '' : '-'}${gap}`;
180
+ if (monomer.length > 1) {
181
+ ctx.translate(absX, absY);
182
+ ctx.rotate(Math.PI / 2);
183
+ ctx.translate(-absX, -absY);
231
184
  }
232
- x1 = printLeftOrCentered(x1, y, w, h, g, outerAmino, color, pivot, true);
233
- });
234
185
 
235
- g.restore();
186
+ ctx.fillStyle = 'black';
187
+ ctx.font = `${fontSize}px monospace`;
188
+ ctx.textAlign = 'center';
189
+ ctx.textBaseline = 'bottom';
190
+ ctx.fillText(monomer, absX, absY);
191
+ ctx.setTransform(origTransform);
192
+ } else
193
+ ctx.fillRect(xStart, yStart, barWidth, subBarHeight);
194
+
195
+ const selectedCount = monomerStats.selected;
196
+ if (selectedCount) {
197
+ ctx.fillStyle = 'rgb(255,165,0)';
198
+ ctx.fillRect(xStart - wMargin * selectLineRatio * 2, yStart,
199
+ barWidth * selectLineRatio, hMargin * selectedCount / max - gapSize);
200
+ }
201
+
202
+ sum -= count;
236
203
  }
237
- }
238
-
239
- export function processSequence(subParts: string[]): [string[], boolean] {
240
- const simplified = !subParts.some((amino, index) =>
241
- amino.length > 1 &&
242
- index != 0 &&
243
- index != subParts.length - 1);
244
-
245
- const text: string[] = [];
246
- const gap = simplified ? '' : ' ';
247
- subParts.forEach((amino: string, index) => {
248
- if (index < subParts.length)
249
- amino += `${amino ? '' : '-'}${gap}`;
250
-
251
- text.push(amino);
252
- });
253
- return [text, simplified];
254
- }
255
204
 
256
- export class AlignedSequenceDifferenceCellRenderer extends DG.GridCellRenderer {
257
- get name(): string {return 'alignedSequenceDifferenceCR';}
258
-
259
- get cellType(): string {return C.SEM_TYPES.ALIGNED_SEQUENCE_DIFFERENCE;}
260
-
261
- get defaultHeight(): number {return 30;}
262
-
263
- get defaultWidth(): number {return 230;}
264
-
265
- /**
266
- * Cell renderer function.
267
- *
268
- * @param {CanvasRenderingContext2D} g Canvas rendering context.
269
- * @param {number} x x coordinate on the canvas.
270
- * @param {number} y y coordinate on the canvas.
271
- * @param {number} w width of the cell.
272
- * @param {number} h height of the cell.
273
- * @param {DG.GridCell} gridCell Grid cell.
274
- * @param {DG.GridCellStyle} cellStyle Cell style.
275
- * @memberof AlignedSequenceDifferenceCellRenderer
276
- */
277
- render(
278
- g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
279
- cellStyle: DG.GridCellStyle): void {
280
- const grid = gridCell.grid;
281
- const cell = gridCell.cell;
282
-
283
- w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
284
- g.save();
285
- g.beginPath();
286
- g.rect(x, y, w, h);
287
- g.clip();
288
- g.font = '12px monospace';
289
- g.textBaseline = 'top';
290
- const s: string = cell.value ?? '';
291
-
292
- //TODO: can this be replaced/merged with splitSequence?
293
- const [s1, s2] = s.split('#');
294
- const separator = getSeparator(gridCell.tableColumn!);
295
- const subParts1 = s1.split(separator);
296
- const subParts2 = s2.split(separator);
297
- const [text] = processSequence(subParts1);
298
- const textSize = g.measureText(text.join(''));
299
- let updatedX = Math.max(x, x + (w - (textSize.width + subParts1.length * 4)) / 2);
300
- // 28 is the height of the two substitutions on top of each other + space
301
- const updatedY = Math.max(y, y + (h - 28) / 2);
302
-
303
- let amino2;
304
- let updatedAmino1: string;
305
- let updatedAmino2: string;
306
- subParts1.forEach((amino1: string, index) => {
307
- amino2 = subParts2[index];
308
- const [color1, amino1Outer, amino1Inner, pivot1] = ChemPalette.getColorAAPivot(amino1);
309
- const [color2, amino2Outer, amino2Inner, pivot2] = ChemPalette.getColorAAPivot(amino2);
310
-
311
- updatedAmino1 = amino1Outer + (amino1Inner !== '' ? '(' + amino1Inner + ')' : '');
312
- updatedAmino1 = updatedAmino1 === '' ? '-' : updatedAmino1;
313
-
314
- if (amino1 != amino2) {
315
- updatedAmino2 = amino2Outer + (amino2Inner !== '' ? '(' + amino2Inner + ')' : '');
316
- updatedAmino2 = updatedAmino2 === '' ? '-' : updatedAmino2;
317
-
318
- const vShift = 7;
319
- const subX0 = printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, updatedAmino1, color1, pivot1, true);
320
- const subX1 = printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, updatedAmino2, color2, pivot2, true);
321
- updatedX = Math.max(subX1, subX0);
322
- } else
323
- updatedX = printLeftOrCentered(updatedX, updatedY, w, h, g, updatedAmino1, color1, pivot1, true, true, 0.5);
324
- updatedX += 4;
325
- });
326
- g.restore();
327
- }
205
+ return barCoordinates;
328
206
  }
@@ -7,6 +7,8 @@ export enum COLUMNS_NAMES {
7
7
  POSITION = 'Pos',
8
8
  P_VALUE = 'pValue',
9
9
  MEAN_DIFFERENCE = 'Mean difference',
10
+ COUNT = 'Count',
11
+ RATIO = 'Ratio',
10
12
  }
11
13
 
12
14
  export enum CATEGORIES {
@@ -17,14 +19,15 @@ export enum CATEGORIES {
17
19
  export enum TAGS {
18
20
  AAR = 'AAR',
19
21
  POSITION = 'Pos',
20
- SEPARATOR = 'monomer-separator',
22
+ SEPARATOR = 'separator',
21
23
  SELECTION = 'selection',
24
+ ALPHABET = 'alphabet',
22
25
  }
23
26
 
24
27
  export enum SEM_TYPES {
25
- AMINO_ACIDS = 'aminoAcids',
26
- ALIGNED_SEQUENCE = 'alignedSequence',
27
- ALIGNED_SEQUENCE_DIFFERENCE = 'alignedSequenceDifference',
28
+ MONOMER = 'Monomer',
29
+ MACROMOLECULE = 'Macromolecule',
30
+ MACROMOLECULE_DIFFERENCE = 'MacromoleculeDifference',
28
31
  ACTIVITY = 'activity',
29
32
  ACTIVITY_SCALED = 'activityScaled',
30
33
  }
@@ -2,61 +2,29 @@ import * as DG from 'datagrok-api/dg';
2
2
 
3
3
  import {tTest} from '@datagrok-libraries/statistics/src/tests';
4
4
 
5
- /** Column statistics helper. */
6
- export class FilteringStatistics {
7
- private data?: Float32Array;
8
- private stats: Stats = {
9
- count: 0,
10
- pValue: 1.,
11
- meanDifference: 0.,
12
- };
13
-
14
- /**
15
- * Creates an instance of FilteringStatistics.
16
- * @param {Float32Array} [data] Numeric values to consider.
17
- */
18
- constructor(data?: Float32Array) {this.data = data;}
19
-
20
- /**
21
- * Sets values to make statistical analysis.
22
- * @param {Float32Array} data Those values.
23
- */
24
- setData(data: Float32Array): void {this.data = data;}
25
-
26
- /**
27
- * Sets bit mask to split population into two groups.
28
- * @param {DG.BitSet} mask The mask to perform splitting.
29
- */
30
- setMask(mask: DG.BitSet): void {
31
- if (!this.data)
32
- return;
33
- const selected = this.data.filter((_, i) => mask.get(i));
34
- const rest = this.data.filter((_, i) => !mask.get(i));
35
- this.stats = this.calcStats(selected, rest);
36
- }
37
-
38
- /**
39
- * Calculates simple statistics on two samples.
40
- * @param {Float32Array} selected First sample.
41
- * @param {Float32Array} rest Second sample.
42
- * @return {Stats} Statistics.
43
- */
44
- calcStats(selected: Float32Array, rest: Float32Array): Stats {
45
- const testResult = tTest(selected, rest);
46
- const currentMeanDiff = testResult['Mean difference']!;
47
- return {
48
- count: selected.length,
49
- pValue: testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'],
50
- meanDifference: currentMeanDiff,
51
- };
52
- }
53
-
54
- /** Returns calculated statistics. */
55
- get result(): Stats {return this.stats;}
56
- }
57
-
58
5
  export type Stats = {
59
6
  count: number,
60
7
  pValue: number,
61
8
  meanDifference: number,
9
+ ratio: number,
62
10
  };
11
+
12
+ type StatsData = Float32Array | Float64Array | Int32Array | Uint32Array | number[];
13
+
14
+ export function getStats(data: StatsData, mask: DG.BitSet): Stats {
15
+ const selected = new Float32Array(mask.trueCount);
16
+ const rest = new Float32Array(mask.falseCount);
17
+ let selectedIndex = 0;
18
+ let restIndex = 0;
19
+ data.forEach((v, i) => mask.get(i) ? selected[selectedIndex++] = v : rest[restIndex++] = v);
20
+
21
+ const testResult = tTest(selected, rest);
22
+ const currentMeanDiff = testResult['Mean difference']!;
23
+ const realCount = selected.length || data.length;
24
+ return {
25
+ count: realCount,
26
+ pValue: testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'],
27
+ meanDifference: currentMeanDiff,
28
+ ratio: realCount / data.length,
29
+ };
30
+ }
package/src/utils/misc.ts CHANGED
@@ -1,24 +1,24 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
-
3
2
  import * as C from './constants';
3
+ import * as type from './types';
4
4
 
5
- export function stringToBool(str: string): boolean {
6
- return str === 'true' ? true : false;
7
- }
5
+ import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
6
+ import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
7
+ import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
8
+ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
8
9
 
9
- export function getSeparator(col: DG.Column<string>): string {
10
- const separator = col.tags[C.TAGS.SEPARATOR];
11
- if (separator)
12
- return separator as string;
13
-
14
- const defaultSeparators = ['.', '-', ' '];
15
- const categories = col.categories;
16
- const catLen = categories.length;
17
- for (const potentialSeparator of defaultSeparators) {
18
- if (categories.filter((sequence) => sequence.includes(potentialSeparator)).length == catLen)
19
- return potentialSeparator;
10
+ export function getPalleteByType(paletteType: string): SeqPalette {
11
+ switch (paletteType) {
12
+ case 'PT':
13
+ return AminoacidsPalettes.GrokGroups;
14
+ case 'NT':
15
+ case 'DNA':
16
+ case 'RNA':
17
+ return NucleotidesPalettes.Chromatogram;
18
+ // other
19
+ default:
20
+ return UnknownSeqPalettes.Color;
20
21
  }
21
- return separator as string ?? '';
22
22
  }
23
23
 
24
24
  export function getTypedArrayConstructor(
@@ -27,3 +27,67 @@ export function getTypedArrayConstructor(
27
27
  maxNum < 65536 ? Uint16Array :
28
28
  Uint32Array;
29
29
  }
30
+
31
+ export function getSeparator(col: DG.Column<string>): string {
32
+ return col.getTag(C.TAGS.SEPARATOR) ?? '';
33
+ }
34
+
35
+ export function scaleActivity(
36
+ activityScaling: string, df: DG.DataFrame, originalActivityName?: string, cloneBitset = false,
37
+ ): [DG.DataFrame, string] {
38
+ let currentActivityColName = originalActivityName ?? C.COLUMNS_NAMES.ACTIVITY;
39
+ const flag = df.columns.names().includes(currentActivityColName) &&
40
+ currentActivityColName === originalActivityName;
41
+ currentActivityColName = flag ? currentActivityColName : C.COLUMNS_NAMES.ACTIVITY;
42
+ const tempDf = df.clone(cloneBitset ? df.filter : null, [currentActivityColName]);
43
+
44
+ let formula = (v: number): number => v;
45
+ let newColName = 'activity';
46
+ switch (activityScaling) {
47
+ case 'none':
48
+ break;
49
+ case 'lg':
50
+ formula = (v: number): number => Math.log10(v);
51
+ newColName = `Log10(${newColName})`;
52
+ break;
53
+ case '-lg':
54
+ formula = (v: number): number => -Math.log10(v);
55
+ newColName = `-Log10(${newColName})`;
56
+ break;
57
+ default:
58
+ throw new Error(`ScalingError: method \`${activityScaling}\` is not available.`);
59
+ }
60
+
61
+ const asCol = tempDf.columns.addNewFloat(C.COLUMNS_NAMES.ACTIVITY_SCALED);
62
+ const activityCol = df.getCol(currentActivityColName);
63
+ asCol.init((i) => formula(activityCol.get(i)));
64
+ df.tags['scaling'] = activityScaling;
65
+
66
+ return [tempDf, newColName];
67
+ }
68
+
69
+ export function calculateBarsData(columns: DG.Column<string>[], selection: DG.BitSet): type.MonomerDfStats {
70
+ const dfStats: type.MonomerDfStats = {};
71
+ const columnsLen = columns.length;
72
+
73
+ for (let colIndex = 0; colIndex < columnsLen; colIndex++) {
74
+ const col = columns[colIndex];
75
+ dfStats[col.name] = calculateSingleBarData(col, selection);
76
+ }
77
+
78
+ return dfStats;
79
+ }
80
+
81
+ export function calculateSingleBarData(col: DG.Column<string>, selection: DG.BitSet): type.MonomerColStats {
82
+ const colLen = col.length;
83
+ const colStats: type.MonomerColStats = {};
84
+ col.categories.forEach((monomer) => colStats[monomer] = {count: 0, selected: 0});
85
+
86
+ for (let rowIndex = 0; rowIndex < colLen; rowIndex++) {
87
+ const monomerStats = colStats[col.get(rowIndex)!];
88
+ monomerStats.count += 1;
89
+ monomerStats.selected += +selection.get(rowIndex);
90
+ }
91
+
92
+ return colStats;
93
+ }