@datagrok/peptides 0.6.1 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintrc.json CHANGED
@@ -23,7 +23,17 @@
23
23
  "error",
24
24
  120
25
25
  ],
26
+ "require-jsdoc": "off",
26
27
  "spaced-comment": "off",
27
- "require-jsdoc": "off"
28
+ "linebreak-style": "off",
29
+ "curly": [
30
+ "error",
31
+ "multi-or-nest"
32
+ ],
33
+ "brace-style": [
34
+ "error",
35
+ "1tbs",
36
+ { "allowSingleLine": true }
37
+ ]
28
38
  }
29
- }
39
+ }
package/detectors.js CHANGED
@@ -3,7 +3,7 @@ class PeptidesPackageDetectors extends DG.Package {
3
3
  //input: column col
4
4
  //output: string semType
5
5
  detectAligned(col) {
6
- const regexp = new RegExp(/^([^-^\n]*-){7,49}(\w|\(|\))+$/);
6
+ const regexp = new RegExp(/^([^\-\n]*-){7,49}(\w|\(|\))+$/);
7
7
  return DG.Detector.sampleCategories(col, (s) => regexp.test(s.trim())) ? 'alignedSequence' : null;
8
8
  }
9
9
  }
@@ -10272,4 +10272,4 @@ ID,AlignedSequence,Measured,Value
10272
10272
  1357,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-H-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,86.31581627936768
10273
10273
  1359,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-homobAla-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,93.44441627936769
10274
10274
  1360,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-D(NPyr)-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,88.34951627936769
10275
- 1361,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-3OHPhe-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,98.38061627936769
10275
+ 1361,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-3OHPhe-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,98.38061627936769
package/package.json CHANGED
@@ -1,32 +1,33 @@
1
1
  {
2
2
  "name": "@datagrok/peptides",
3
- "version": "0.6.1",
3
+ "version": "0.8.6",
4
4
  "description": "",
5
5
  "dependencies": {
6
- "@keckelt/tsne": "^1.0.2",
6
+ "@biowasm/aioli": ">=2.4.0",
7
+ "@datagrok-libraries/bio": ">=0.0.4",
8
+ "@datagrok-libraries/ml": ">=0.0.10",
9
+ "@datagrok-libraries/statistics": ">=0.1.5",
10
+ "@datagrok-libraries/utils": ">=0.0.18",
11
+ "@types/d3": "^7.0.0",
12
+ "@types/jquery": "^3.5.6",
7
13
  "cash-dom": "latest",
8
14
  "d3": "latest",
9
- "datagrok-api": ">=0.104.0",
15
+ "datagrok-api": ">=0.115.0",
10
16
  "dayjs": "latest",
11
- "jaro-winkler-typescript": "^1.0.1",
17
+ "file-loader": "^6.2.0",
12
18
  "jstat": "^1.9.5",
13
19
  "logojs-react": "^2.1.1",
14
- "rxjs": "^6.5.5",
15
- "umap-js": "^1.3.3",
16
- "@datagrok-libraries/utils": ">=0.0.13",
17
- "@datagrok-libraries/statistics": ">=0.1.5",
18
- "@types/d3": "^7.0.0",
19
- "@types/jquery": "^3.5.6"
20
+ "rxjs": "^6.5.5"
20
21
  },
21
22
  "devDependencies": {
22
- "typescript": "^4.4.4",
23
- "ts-loader": "^9.2.5",
24
- "css-loader": "^5.2.4",
25
- "style-loader": "^2.0.0",
26
23
  "@typescript-eslint/eslint-plugin": "^4.29.1",
27
24
  "@typescript-eslint/parser": "^4.29.1",
25
+ "css-loader": "^5.2.4",
28
26
  "eslint": "^7.32.0",
29
27
  "eslint-config-google": "^0.14.0",
28
+ "style-loader": "^2.0.0",
29
+ "ts-loader": "^9.2.5",
30
+ "typescript": "^4.4.4",
30
31
  "webpack": "latest",
31
32
  "webpack-cli": "latest"
32
33
  },
@@ -36,10 +37,12 @@
36
37
  "common/ngl_viewer/ngl.js"
37
38
  ],
38
39
  "scripts": {
40
+ "link-api": "npm link datagrok-api",
39
41
  "link-utils": "npm link @datagrok-libraries/utils",
40
42
  "link-statistics": "npm link @datagrok-libraries/statistics",
41
- "link-api": "npm link datagrok-api",
42
- "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/statistics",
43
+ "link-ml": "npm link @datagrok-libraries/ml",
44
+ "link-bio": "npm link @datagrok-libraries/bio",
45
+ "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/statistics link @datagrok-libraries/ml @datagrok-libraries/bio",
43
46
  "install-dependencies": "npm install",
44
47
  "debug-peptides": "grok publish --rebuild",
45
48
  "release-peptides": "grok publish --rebuild --release",
@@ -53,5 +56,11 @@
53
56
  "release-peptides-local": "grok publish local --rebuild --release",
54
57
  "lint": "eslint \"./src/**/*.ts\"",
55
58
  "lint-fix": "eslint \"./src/**/*.ts\" --fix"
56
- }
57
- }
59
+ },
60
+ "canEdit": [
61
+ "Developers"
62
+ ],
63
+ "canView": [
64
+ "All users"
65
+ ]
66
+ }
package/setup.sh ADDED
@@ -0,0 +1,15 @@
1
+ npm unlink datagrok-api
2
+ npm unlink @datagrok-libraries/utils
3
+ npm unlink @datagrok-libraries/ml
4
+ cd ../../js-api
5
+ npm install
6
+ npm link
7
+ cd ../libraries/utils
8
+ npm install
9
+ npm link
10
+ cd ../../libraries/ml
11
+ npm install
12
+ npm link datagrok-api @datagrok-libraries/utils
13
+ cd ../../packages/Peptides
14
+ npm install
15
+ npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/ml
package/src/describe.ts CHANGED
@@ -53,71 +53,56 @@ const groupDescription: {[key: string]: {'description': string, 'aminoAcids': st
53
53
  }
54
54
  }*/
55
55
 
56
- //TODO: decomposition!
57
- export async function describe(
56
+ function joinDataFrames(
57
+ activityColumnScaled: string,
58
58
  df: DG.DataFrame,
59
+ positionColumns: string[],
60
+ splitSeqDf: DG.DataFrame,
59
61
  activityColumn: string,
60
- activityScaling: string,
61
- sourceGrid: DG.Grid,
62
- twoColorMode: boolean,
63
- initialBitset: DG.BitSet | null,
64
- grouping: boolean,
65
- ): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
66
- //Split the aligned sequence into separate AARs
67
- let splitSeqDf: DG.DataFrame | undefined;
68
- let invalidIndexes: number[];
69
- const col: DG.Column = df.columns.bySemType('alignedSequence');
70
- [splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
71
- splitSeqDf.name = 'Split sequence';
62
+ ) {
63
+ if (df.col(activityColumnScaled))
64
+ (df.columns as DG.ColumnList).remove(activityColumnScaled);
72
65
 
73
- const positionColumns = splitSeqDf.columns.names();
74
- const activityColumnScaled = `${activityColumn}Scaled`;
75
- const renderColNames: string[] = splitSeqDf.columns.names();
76
-
77
- splitSeqDf.columns.add(df.getCol(activityColumn));
78
-
79
- if (df.col(activityColumnScaled)) {
80
- df.columns.remove(activityColumnScaled);
81
- }
82
66
 
83
67
  //FIXME: this column usually duplicates, so remove it then
84
- if (df.col(`${activityColumnScaled} (2)`)) {
85
- df.columns.remove(`${activityColumnScaled} (2)`);
86
- }
68
+ if (df.col(`${activityColumnScaled} (2)`))
69
+ (df.columns as DG.ColumnList).remove(`${activityColumnScaled} (2)`);
70
+
87
71
 
88
72
  // append splitSeqDf columns to source table and make sure columns are not added more than once
89
73
  const dfColsSet = new Set(df.columns.names());
90
- if (!positionColumns.every((col: string) => dfColsSet.has(col))) {
74
+ if (!positionColumns.every((col: string) => dfColsSet.has(col)))
91
75
  df.join(splitSeqDf, [activityColumn], [activityColumn], df.columns.names(), positionColumns, 'inner', true);
92
- }
93
-
94
- for (const col of df.columns) {
95
- if (splitSeqDf.col(col.name) && col.name != activityColumn) {
96
- setAARRenderer(col, sourceGrid);
97
- }
98
- }
76
+ }
99
77
 
78
+ function sortSourceGrid(sourceGrid: DG.Grid) {
100
79
  if (sourceGrid) {
101
- const colNames:string[] = [];
102
- for (let i = 0; i < sourceGrid.columns.length; i++) {
103
- colNames.push(sourceGrid.columns.byIndex(i)!.name);
104
- }
80
+ const colNames: DG.GridColumn[] = [];
81
+ for (let i = 1; i < sourceGrid.columns.length; i++)
82
+ colNames.push(sourceGrid.columns.byIndex(i)!);
83
+
105
84
  colNames.sort((a, b)=>{
106
- if (sourceGrid.columns.byName(a)?.column?.semType == 'aminoAcids') {
107
- if (sourceGrid.columns.byName(b)?.column?.semType == 'aminoAcids') {
85
+ if (a.column!.semType == 'aminoAcids') {
86
+ if (b.column!.semType == 'aminoAcids')
108
87
  return 0;
109
- }
110
88
  return -1;
111
89
  }
112
- if (sourceGrid.columns.byName(b)?.column?.semType == 'aminoAcids') {
90
+ if (b.column!.semType == 'aminoAcids')
113
91
  return 1;
114
- }
115
92
  return 0;
116
93
  });
117
- sourceGrid?.columns.setOrder(colNames);
94
+ sourceGrid.columns.setOrder(colNames.map((v) => v.name));
118
95
  }
96
+ }
119
97
 
120
- // scale activity
98
+ async function scaleActivity(
99
+ activityScaling: string,
100
+ activityColumn: string,
101
+ activityColumnScaled: string,
102
+ sourceGrid: DG.Grid,
103
+ splitSeqDf: DG.DataFrame,
104
+ ) {
105
+ const df = sourceGrid.dataFrame!;
121
106
  switch (activityScaling) {
122
107
  case 'lg':
123
108
  await df.columns.addNewCalculated(activityColumnScaled, 'Log10(${' + activityColumn + '})');
@@ -138,32 +123,17 @@ export async function describe(
138
123
  sourceGrid.columns.setOrder([`${activityColumn}`]);
139
124
  break;
140
125
  }
141
- splitSeqDf = splitSeqDf.clone(initialBitset);
142
-
143
- const positionColName = 'Position';
144
- const aminoAcidResidue = 'AAR';
145
-
146
- //unpivot a table and handle duplicates
147
- splitSeqDf = splitSeqDf.groupBy(positionColumns)
148
- .add('med', activityColumnScaled, activityColumnScaled)
149
- .aggregate();
150
-
151
- const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
152
-
153
- let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
154
-
155
- //TODO: move to chem palette
156
- let groupMapping: {[key: string]: string} = {};
157
- if (grouping) {
158
- groupMapping = aarGroups;
159
- const aarCol = matrixDf.getCol(aminoAcidResidue);
160
- aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
161
- aarCol.compact();
162
- } else {
163
- Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
164
- }
126
+ }
165
127
 
166
- //statistics for specific AAR at a specific position
128
+ async function calculateStatistics(
129
+ matrixDf: DG.DataFrame,
130
+ positionColName: string,
131
+ aminoAcidResidue: string,
132
+ activityColumnScaled: string,
133
+ peptidesCount: number,
134
+ splitSeqDf: DG.DataFrame,
135
+ groupMapping: {[key: string]: string},
136
+ ) {
167
137
  matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
168
138
  .add('count', activityColumnScaled, 'Count')
169
139
  .aggregate();
@@ -177,67 +147,54 @@ export async function describe(
177
147
  await matrixDf.columns.addNewCalculated('Ratio', '${count}/'.concat(`${peptidesCount}`));
178
148
 
179
149
  //calculate p-values based on t-test
180
- let position: string;
181
- let aar: string;
182
- let currentActivity: number[];
183
- let otherActivity: number[];
184
- let testResult;
185
- let currentMeanDiff: number;
186
150
  let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
187
- let pvalue = 1.;
188
-
189
151
  const mdCol: DG.Column = matrixDf.columns.addNewFloat('Mean difference');
190
152
  const pValCol: DG.Column = matrixDf.columns.addNewFloat('pValue');
191
153
  for (let i = 0; i < matrixDf.rowCount; i++) {
192
- position = matrixDf.get(positionColName, i);
193
- aar = matrixDf.get(aminoAcidResidue, i);
154
+ const position = matrixDf.get(positionColName, i);
155
+ const aar = matrixDf.get(aminoAcidResidue, i);
194
156
 
195
157
  //@ts-ignore
196
158
  splitSeqDf.rows.select((row) => groupMapping[row[position]] === aar);
197
- currentActivity = splitSeqDf
159
+ const currentActivity: number[] = splitSeqDf
198
160
  .clone(splitSeqDf.selection, [activityColumnScaled])
199
161
  .getCol(activityColumnScaled)
200
162
  .toList();
201
163
 
202
164
  //@ts-ignore
203
165
  splitSeqDf.rows.select((row) => groupMapping[row[position]] !== aar);
204
- otherActivity = splitSeqDf
166
+ const otherActivity: number[] = splitSeqDf
205
167
  .clone(splitSeqDf.selection, [activityColumnScaled])
206
168
  .getCol(activityColumnScaled)
207
169
  .toList();
208
170
 
209
- testResult = tTest(currentActivity, otherActivity);
171
+ const testResult = tTest(currentActivity, otherActivity);
210
172
  // testResult = uTest(currentActivity, otherActivity);
211
- currentMeanDiff = testResult['Mean difference']!;
212
- pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
173
+ const currentMeanDiff = testResult['Mean difference']!;
174
+ const pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
213
175
 
214
176
  mdCol.set(i, currentMeanDiff);
215
177
  pvalues[i] = pvalue;
216
178
  }
217
179
 
218
- if (true) {
180
+ if (true)
219
181
  pvalues = fdrcorrection(pvalues)[1];
220
- }
221
182
 
222
- for (let i = 0; i < pvalues.length; ++i) {
183
+
184
+ for (let i = 0; i < pvalues.length; ++i)
223
185
  pValCol.set(i, pvalues[i]);
224
- }
225
186
 
226
- const statsDf = matrixDf.clone();
227
187
 
228
- // SAR matrix table
229
- //pivot a table to make it matrix-like
230
- matrixDf = matrixDf.groupBy([aminoAcidResidue])
231
- .pivot(positionColName)
232
- .add('first', 'Mean difference', '')
233
- .aggregate();
234
- matrixDf.name = 'SAR';
188
+ return matrixDf.clone();
189
+ }
235
190
 
236
- // Setting category order
191
+ async function setCategoryOrder(
192
+ twoColorMode: boolean, statsDf: DG.DataFrame, aminoAcidResidue: string, matrixDf: DG.DataFrame,
193
+ ) {
237
194
  const sortArgument = twoColorMode ? 'Absolute Mean difference' : 'Mean difference';
238
- if (twoColorMode) {
195
+ if (twoColorMode)
239
196
  await statsDf.columns.addNewCalculated('Absolute Mean difference', 'Abs(${Mean difference})');
240
- }
197
+
241
198
  const aarWeightsDf = statsDf.groupBy([aminoAcidResidue]).sum(sortArgument, 'weight').aggregate();
242
199
  const aarList = aarWeightsDf.getCol(aminoAcidResidue).toList();
243
200
  const getWeight = (aar: string) => aarWeightsDf
@@ -248,8 +205,14 @@ export async function describe(
248
205
  aarList.sort((first, second) => getWeight(second) - getWeight(first));
249
206
 
250
207
  matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
208
+ }
251
209
 
252
- // SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
210
+ function createVerticalTable(
211
+ statsDf: DG.DataFrame,
212
+ aminoAcidResidue: string,
213
+ positionColName: string,
214
+ twoColorMode: boolean,
215
+ ) {
253
216
  // TODO: aquire ALL of the positions
254
217
  let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
255
218
  .where('pValue <= 0.1')
@@ -268,9 +231,18 @@ export async function describe(
268
231
  sequenceDf = sequenceDf.clone(DG.BitSet.create(sequenceDf.rowCount, (i) => {
269
232
  return sequenceDf.get('Mean difference', i) === maxAtPos[sequenceDf.get(positionColName, i)];
270
233
  }));
271
- renderColNames.push('Mean difference');
272
234
 
273
- // !!! DRAWING PHASE !!!
235
+ return sequenceDf;
236
+ }
237
+
238
+ function createGrids(
239
+ matrixDf: DG.DataFrame,
240
+ aminoAcidResidue: string,
241
+ positionColumns: string[],
242
+ sequenceDf: DG.DataFrame,
243
+ positionColName: string,
244
+ grouping: boolean,
245
+ ) {
274
246
  const sarGrid = matrixDf.plot.grid();
275
247
  sarGrid.sort([aminoAcidResidue]);
276
248
  sarGrid.columns.setOrder([aminoAcidResidue].concat(positionColumns));
@@ -282,16 +254,27 @@ export async function describe(
282
254
 
283
255
  if (!grouping) {
284
256
  let tempCol = matrixDf.columns.byName(aminoAcidResidue);
285
- if (tempCol) {
257
+ if (tempCol)
286
258
  setAARRenderer(tempCol, sarGrid);
287
- }
259
+
288
260
  tempCol = sequenceDf.columns.byName(aminoAcidResidue);
289
- if (tempCol) {
261
+ if (tempCol)
290
262
  setAARRenderer(tempCol, sarGrid);
291
- }
292
263
  }
293
264
 
294
- //render column headers and AAR symbols centered
265
+ return [sarGrid, sarVGrid];
266
+ }
267
+
268
+ function setCellRendererFunc(
269
+ renderColNames: string[],
270
+ positionColName: string,
271
+ aminoAcidResidue: string,
272
+ statsDf: DG.DataFrame,
273
+ twoColorMode: boolean,
274
+ sarGrid: DG.Grid,
275
+ sarVGrid: DG.Grid,
276
+ ) {
277
+ const mdCol = statsDf.getCol('Mean difference');
295
278
  const cellRendererFunc = function(args: DG.GridCellRenderArgs) {
296
279
  args.g.save();
297
280
  args.g.beginPath();
@@ -321,15 +304,15 @@ export async function describe(
321
304
 
322
305
  let coef;
323
306
  const variant = args.cell.cell.value < 0;
324
- if (pVal < 0.01) {
307
+ if (pVal < 0.01)
325
308
  coef = variant && twoColorMode ? '#FF7900' : '#299617';
326
- } else if (pVal < 0.05) {
309
+ else if (pVal < 0.05)
327
310
  coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
328
- } else if (pVal < 0.1) {
311
+ else if (pVal < 0.1)
329
312
  coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
330
- } else {
313
+ else
331
314
  coef = DG.Color.toHtml(DG.Color.lightLightGray);
332
- }
315
+
333
316
 
334
317
  const chooseMin = () => twoColorMode ? 0 : mdCol.min;
335
318
  const chooseMax = () => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
@@ -360,8 +343,18 @@ export async function describe(
360
343
  };
361
344
  sarGrid.onCellRender.subscribe(cellRendererFunc);
362
345
  sarVGrid.onCellRender.subscribe(cellRendererFunc);
346
+ }
363
347
 
364
- // show all the statistics in a tooltip over cell
348
+ function setTooltipFunc(
349
+ renderColNames: string[],
350
+ statsDf: DG.DataFrame,
351
+ aminoAcidResidue: string,
352
+ positionColName: string,
353
+ peptidesCount: number,
354
+ grouping: boolean,
355
+ sarGrid: DG.Grid,
356
+ sarVGrid: DG.Grid,
357
+ ) {
365
358
  const onCellTooltipFunc = function(cell: DG.GridCell, x: number, y: number) {
366
359
  if (
367
360
  !cell.isRowHeader &&
@@ -383,11 +376,11 @@ export async function describe(
383
376
  const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
384
377
  let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
385
378
 
386
- if (col === 'Count') {
379
+ if (col === 'Count')
387
380
  text += ` / ${peptidesCount}`;
388
- } else if (col === 'pValue') {
381
+ else if (col === 'pValue')
389
382
  text = parseFloat(text) !== 0 ? text : '<0.01';
390
- }
383
+
391
384
 
392
385
  tooltipMap[col === 'pValue' ? 'p-value' : col] = text;
393
386
  }
@@ -406,25 +399,33 @@ export async function describe(
406
399
  const currentGroup = groupDescription[cell.cell.value];
407
400
  const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
408
401
  ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
409
- } else {
402
+ } else
410
403
  cp.showTooltip(cell, x, y);
411
- }
412
404
  }
413
405
  return true;
414
406
  };
415
407
  sarGrid.onCellTooltip(onCellTooltipFunc);
416
408
  sarVGrid.onCellTooltip(onCellTooltipFunc);
409
+ }
417
410
 
411
+ function postProcessGrids(
412
+ sourceGrid: DG.Grid,
413
+ invalidIndexes: number[],
414
+ matrixDf: DG.DataFrame,
415
+ grouping: boolean,
416
+ aminoAcidResidue: string,
417
+ sarGrid: DG.Grid,
418
+ sarVGrid: DG.Grid,
419
+ ) {
418
420
  sourceGrid.onCellPrepare((cell: DG.GridCell) => {
419
421
  const currentRowIndex = cell.tableRowIndex;
420
- if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader) {
422
+ if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
421
423
  cell.style.backColor = DG.Color.lightLightGray;
422
- }
423
424
  });
424
425
 
425
- for (const col of matrixDf.columns.names()) {
426
+ for (const col of matrixDf.columns.names())
426
427
  sarGrid.col(col)!.width = sarGrid.props.rowHeight;
427
- }
428
+
428
429
 
429
430
  if (grouping) {
430
431
  sarGrid.col(aminoAcidResidue)!.name = 'Groups';
@@ -434,5 +435,100 @@ export async function describe(
434
435
  sarGrid.props.allowEdit = false;
435
436
  sarVGrid.props.allowEdit = false;
436
437
 
438
+ sarVGrid.col('Mean difference')!.name = 'Diff';
439
+ }
440
+
441
+ export async function describe(
442
+ df: DG.DataFrame,
443
+ activityColumn: string,
444
+ activityScaling: string,
445
+ sourceGrid: DG.Grid,
446
+ twoColorMode: boolean,
447
+ initialBitset: DG.BitSet | null,
448
+ grouping: boolean,
449
+ ): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
450
+ //Split the aligned sequence into separate AARs
451
+ let splitSeqDf: DG.DataFrame | undefined;
452
+ let invalidIndexes: number[];
453
+ const col: DG.Column = df.columns.bySemType('alignedSequence');
454
+ [splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
455
+ splitSeqDf.name = 'Split sequence';
456
+
457
+ const positionColumns = splitSeqDf.columns.names();
458
+ const activityColumnScaled = `${activityColumn}Scaled`;
459
+ const renderColNames: string[] = splitSeqDf.columns.names();
460
+ const positionColName = 'Position';
461
+ const aminoAcidResidue = 'AAR';
462
+
463
+ splitSeqDf.columns.add(df.getCol(activityColumn));
464
+
465
+ joinDataFrames(activityColumnScaled, df, positionColumns, splitSeqDf, activityColumn);
466
+
467
+ for (const col of df.columns) {
468
+ if (splitSeqDf.col(col.name) && col.name != activityColumn)
469
+ setAARRenderer(col, sourceGrid);
470
+ }
471
+
472
+ sortSourceGrid(sourceGrid);
473
+
474
+ await scaleActivity(activityScaling, activityColumn, activityColumnScaled, sourceGrid, splitSeqDf);
475
+ splitSeqDf = splitSeqDf.clone(initialBitset);
476
+
477
+ //unpivot a table and handle duplicates
478
+ splitSeqDf = splitSeqDf.groupBy(positionColumns)
479
+ .add('med', activityColumnScaled, activityColumnScaled)
480
+ .aggregate();
481
+
482
+ const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
483
+
484
+ let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
485
+
486
+ //TODO: move to chem palette
487
+ let groupMapping: {[key: string]: string} = {};
488
+ if (grouping) {
489
+ groupMapping = aarGroups;
490
+ const aarCol = matrixDf.getCol(aminoAcidResidue);
491
+ aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
492
+ aarCol.compact();
493
+ } else
494
+ Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
495
+
496
+
497
+ //statistics for specific AAR at a specific position
498
+ const statsDf = await calculateStatistics(
499
+ matrixDf, positionColName, aminoAcidResidue, activityColumnScaled, peptidesCount, splitSeqDf, groupMapping,
500
+ );
501
+
502
+ // SAR matrix table
503
+ //pivot a table to make it matrix-like
504
+ matrixDf = statsDf.groupBy([aminoAcidResidue])
505
+ .pivot(positionColName)
506
+ .add('first', 'Mean difference', '')
507
+ .aggregate();
508
+ matrixDf.name = 'SAR';
509
+
510
+ // Setting category order
511
+ await setCategoryOrder(twoColorMode, statsDf, aminoAcidResidue, matrixDf);
512
+
513
+ // SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
514
+ const sequenceDf = createVerticalTable(statsDf, aminoAcidResidue, positionColName, twoColorMode);
515
+ renderColNames.push('Mean difference');
516
+
517
+ const [sarGrid, sarVGrid] = createGrids(
518
+ matrixDf, aminoAcidResidue, positionColumns, sequenceDf, positionColName, grouping,
519
+ );
520
+
521
+ setCellRendererFunc(
522
+ renderColNames, positionColName, aminoAcidResidue, statsDf, twoColorMode, sarGrid, sarVGrid,
523
+ );
524
+
525
+ // show all the statistics in a tooltip over cell
526
+ setTooltipFunc(
527
+ renderColNames, statsDf, aminoAcidResidue, positionColName, peptidesCount, grouping, sarGrid, sarVGrid,
528
+ );
529
+
530
+ postProcessGrids(sourceGrid, invalidIndexes, matrixDf, grouping, aminoAcidResidue, sarGrid, sarVGrid);
531
+
532
+ //TODO: return class instead
437
533
  return [sarGrid, sarVGrid, statsDf, groupMapping];
438
534
  }
@@ -1,6 +1,6 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
 
3
- import {describe} from '../describe';
3
+ import {describe} from './describe';
4
4
  import {Subject} from 'rxjs';
5
5
 
6
6
  /**