@datagrok/peptides 0.3.0 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintrc.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "env": {
3
+ "browser": true,
4
+ "es2021": true
5
+ },
6
+ "extends": [
7
+ "google"
8
+ ],
9
+ "parser": "@typescript-eslint/parser",
10
+ "parserOptions": {
11
+ "ecmaVersion": 12,
12
+ "sourceType": "module"
13
+ },
14
+ "plugins": [
15
+ "@typescript-eslint"
16
+ ],
17
+ "rules": {
18
+ "indent": [
19
+ "error",
20
+ 2
21
+ ],
22
+ "max-len": [
23
+ "error",
24
+ 120
25
+ ],
26
+ "spaced-comment": "off",
27
+ "require-jsdoc": "off"
28
+ }
29
+ }
package/package.json CHANGED
@@ -1,20 +1,20 @@
1
1
  {
2
2
  "name": "@datagrok/peptides",
3
- "version": "0.3.0",
3
+ "version": "0.5.6",
4
4
  "description": "",
5
5
  "dependencies": {
6
6
  "@keckelt/tsne": "^1.0.2",
7
7
  "cash-dom": "latest",
8
8
  "d3": "latest",
9
- "datagrok-api": ">0.95.4",
9
+ "datagrok-api": ">=0.104.0",
10
10
  "dayjs": "latest",
11
11
  "jaro-winkler-typescript": "^1.0.1",
12
12
  "jstat": "^1.9.5",
13
13
  "logojs-react": "^2.1.1",
14
14
  "rxjs": "^6.5.5",
15
15
  "umap-js": "^1.3.3",
16
- "@datagrok-libraries/utils": ">=0.0.10",
17
- "@datagrok-libraries/statistics": ">=0.1.4",
16
+ "@datagrok-libraries/utils": ">=0.0.13",
17
+ "@datagrok-libraries/statistics": ">=0.1.5",
18
18
  "@types/d3": "^7.0.0",
19
19
  "@types/jquery": "^3.5.6"
20
20
  },
package/src/describe.ts CHANGED
@@ -1,15 +1,59 @@
1
- // eslint-disable-next-line no-unused-vars
2
- import * as grok from 'datagrok-api/grok';
3
1
  import * as ui from 'datagrok-api/ui';
4
2
  import * as DG from 'datagrok-api/dg';
5
3
  import {splitAlignedPeptides} from './utils/split-aligned';
6
4
  import {tTest} from '@datagrok-libraries/statistics/src/tests';
7
- import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests.js';
5
+ import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
8
6
  import {ChemPalette} from './utils/chem-palette';
9
7
  import {setAARRenderer} from './utils/cell-renderer';
10
8
 
11
9
  const cp = new ChemPalette('grok');
12
10
 
11
+ const aarGroups = {
12
+ 'R': 'PC',
13
+ 'H': 'PC',
14
+ 'K': 'PC',
15
+ 'D': 'NC',
16
+ 'E': 'NC',
17
+ 'S': 'U',
18
+ 'T': 'U',
19
+ 'N': 'U',
20
+ 'Q': 'U',
21
+ 'C': 'SC',
22
+ 'U': 'SC',
23
+ 'G': 'SC',
24
+ 'P': 'SC',
25
+ 'A': 'H',
26
+ 'V': 'H',
27
+ 'I': 'H',
28
+ 'L': 'H',
29
+ 'M': 'H',
30
+ 'F': 'H',
31
+ 'Y': 'H',
32
+ 'W': 'H',
33
+ '-': '-',
34
+ };
35
+
36
+ const groupDescription: {[key: string]: {'description': string, 'aminoAcids': string[]}} = {
37
+ 'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
38
+ 'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
39
+ 'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
40
+ 'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
41
+ 'H': {
42
+ 'description': 'Amino Acids with Hydrophobic Side Chain',
43
+ 'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
44
+ },
45
+ '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
46
+ };
47
+
48
+ /*function customGridColumnHeader(cell: DG.GridCell) {
49
+ if (cell.isColHeader && cell.tableColumn != null) {
50
+ if (highlightedColumns.includes(parseInt(cell.tableColumn.name))) {
51
+ cell.style.backColor = 0xff1f77b4;
52
+ }
53
+ }
54
+ }*/
55
+
56
+ //TODO: decomposition!
13
57
  export async function describe(
14
58
  df: DG.DataFrame,
15
59
  activityColumn: string,
@@ -17,13 +61,15 @@ export async function describe(
17
61
  sourceGrid: DG.Grid,
18
62
  twoColorMode: boolean,
19
63
  initialBitset: DG.BitSet | null,
20
- ): Promise<[DG.Grid, DG.Grid, DG.DataFrame]> {
64
+ grouping: boolean,
65
+ ): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
21
66
  //Split the aligned sequence into separate AARs
22
67
  let splitSeqDf: DG.DataFrame | undefined;
23
68
  let invalidIndexes: number[];
24
69
  const col: DG.Column = df.columns.bySemType('alignedSequence');
25
70
  [splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
26
71
  splitSeqDf.name = 'Split sequence';
72
+
27
73
  const positionColumns = splitSeqDf.columns.names();
28
74
  const activityColumnScaled = `${activityColumn}Scaled`;
29
75
  const renderColNames: string[] = splitSeqDf.columns.names();
@@ -50,6 +96,7 @@ export async function describe(
50
96
  setAARRenderer(col, sourceGrid);
51
97
  }
52
98
  }
99
+
53
100
  if (sourceGrid) {
54
101
  const colNames:string[] = [];
55
102
  for (let i = 0; i < sourceGrid.columns.length; i++) {
@@ -105,6 +152,17 @@ export async function describe(
105
152
 
106
153
  let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
107
154
 
155
+ //TODO: move to chem palette
156
+ let groupMapping: {[key: string]: string} = {};
157
+ if (grouping) {
158
+ groupMapping = aarGroups;
159
+ const aarCol = matrixDf.getCol(aminoAcidResidue);
160
+ aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
161
+ aarCol.compact();
162
+ } else {
163
+ Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
164
+ }
165
+
108
166
  //statistics for specific AAR at a specific position
109
167
  matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
110
168
  .add('count', activityColumnScaled, 'Count')
@@ -135,14 +193,14 @@ export async function describe(
135
193
  AAR = matrixDf.get(aminoAcidResidue, i);
136
194
 
137
195
  //@ts-ignore
138
- splitSeqDf.rows.select((row) => row[position] === AAR);
196
+ splitSeqDf.rows.select((row) => groupMapping[row[position]] === AAR);
139
197
  currentActivity = splitSeqDf
140
198
  .clone(splitSeqDf.selection, [activityColumnScaled])
141
199
  .getCol(activityColumnScaled)
142
200
  .toList();
143
201
 
144
202
  //@ts-ignore
145
- splitSeqDf.rows.select((row) => row[position] !== AAR);
203
+ splitSeqDf.rows.select((row) => groupMapping[row[position]] !== AAR);
146
204
  otherActivity = splitSeqDf
147
205
  .clone(splitSeqDf.selection, [activityColumnScaled])
148
206
  .getCol(activityColumnScaled)
@@ -190,11 +248,9 @@ export async function describe(
190
248
  aarList.sort((first, second) => getWeight(second) - getWeight(first));
191
249
 
192
250
  matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
193
- //const sequenceDf = segregateBestAtAllCateg(statsDf, twoColorMode);
194
251
 
195
252
  // SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
196
253
  // TODO: aquire ALL of the positions
197
-
198
254
  let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
199
255
  .where('pValue <= 0.1')
200
256
  .aggregate();
@@ -224,17 +280,14 @@ export async function describe(
224
280
  SARVgrid.col('pValue')!.format = 'four digits after comma';
225
281
  SARVgrid.col('pValue')!.name = 'P-Value';
226
282
 
227
- //FIXME: looks inefficient
228
- for (const col of matrixDf.columns) {
229
- if (col.name === aminoAcidResidue) {
230
- setAARRenderer(col, SARgrid);
231
- break;
283
+ if (!grouping) {
284
+ let tempCol = matrixDf.columns.byName(aminoAcidResidue);
285
+ if (tempCol) {
286
+ setAARRenderer(tempCol, SARgrid);
232
287
  }
233
- }
234
- for (const col of sequenceDf.columns) {
235
- if (col.name === aminoAcidResidue) {
236
- setAARRenderer(col, SARVgrid);
237
- break;
288
+ tempCol = sequenceDf.columns.byName(aminoAcidResidue);
289
+ if (tempCol) {
290
+ setAARRenderer(tempCol, SARgrid);
238
291
  }
239
292
  }
240
293
 
@@ -251,19 +304,6 @@ export async function describe(
251
304
  return;
252
305
  }
253
306
 
254
- // if (args.cell.isColHeader) {
255
- // if (args.cell.gridColumn.name != aminoAcidResidue) {
256
- // const textSize = args.g.measureText(args.cell.gridColumn.name);
257
- // args.g.fillStyle = '#4b4b4a';
258
- // args.g.fillText(
259
- // args.cell.gridColumn.name,
260
- // args.bounds.x + (args.bounds.width - textSize.width) / 2,
261
- // args.bounds.y + (textSize.actualBoundingBoxAscent + textSize.actualBoundingBoxDescent),
262
- // );
263
- // }
264
- // args.preventDefault();
265
- // }
266
-
267
307
  if (
268
308
  args.cell.isTableCell &&
269
309
  args.cell.tableRowIndex !== null &&
@@ -357,28 +397,39 @@ export async function describe(
357
397
  }
358
398
  if (
359
399
  !cell.isColHeader &&
360
- cell.tableColumn !== null &&
361
- cell.tableColumn.name == aminoAcidResidue &&
362
- cell.cell.value !== null &&
363
- cell.tableRowIndex !== null
400
+ cell.tableColumn !== null &&
401
+ cell.tableColumn.name == aminoAcidResidue &&
402
+ cell.cell.value !== null &&
403
+ cell.tableRowIndex !== null
364
404
  ) {
365
- cp.showTooltip(cell, x, y);
405
+ if (grouping) {
406
+ const currentGroup = groupDescription[cell.cell.value];
407
+ const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
408
+ ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
409
+ } else {
410
+ cp.showTooltip(cell, x, y);
411
+ }
366
412
  }
367
413
  return true;
368
414
  };
369
415
  SARgrid.onCellTooltip(onCellTooltipFunc);
370
416
  SARVgrid.onCellTooltip(onCellTooltipFunc);
371
417
 
372
- sourceGrid.onCellPrepare((cell) => {
418
+ sourceGrid.onCellPrepare((cell: DG.GridCell) => {
373
419
  const currentRowIndex = cell.tableRowIndex;
374
420
  if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader) {
375
421
  cell.style.backColor = DG.Color.lightLightGray;
376
422
  }
377
423
  });
378
424
 
379
- // for (const col of matrixDf.columns.names()) {
380
- // SARgrid.col(col)!.width = SARgrid.props.rowHeight;
381
- // }
425
+ for (const col of matrixDf.columns.names()) {
426
+ SARgrid.col(col)!.width = SARgrid.props.rowHeight;
427
+ }
428
+
429
+ if (grouping) {
430
+ SARgrid.col(aminoAcidResidue)!.name = 'Groups';
431
+ SARVgrid.col(aminoAcidResidue)!.name = 'Groups';
432
+ }
382
433
 
383
- return [SARgrid, SARVgrid, statsDf];
434
+ return [SARgrid, SARVgrid, statsDf, groupMapping];
384
435
  }
package/src/package.ts CHANGED
@@ -15,7 +15,7 @@ import {PeptideSimilaritySpaceWidget} from './utils/peptide-similarity-space';
15
15
  import {manualAlignmentWidget} from './widgets/manual-alignment';
16
16
  import {SARViewer, SARViewerVertical} from './viewers/sar-viewer';
17
17
  import {peptideMoleculeWidget} from './widgets/peptide-molecule';
18
- import {correlationAnalysisPlots} from './utils/correlation-analysis';
18
+ import {SpiralPlot} from './viewers/spiral-plot';
19
19
 
20
20
  export const _package = new DG.Package();
21
21
  let tableGrid: DG.Grid;
@@ -25,35 +25,13 @@ let view: DG.TableView;
25
25
 
26
26
  async function main(chosenFile: string) {
27
27
  const pi = DG.TaskBarProgressIndicator.create('Loading Peptides');
28
- //let peptides =
29
- // await grok.data.loadTable('https://datagrok.jnj.com/p/ejaeger.il23peptideidp5562/il-23_peptide_idp-5562');
30
28
  const path = _package.webRoot + 'files/' + chosenFile;
31
29
  const peptides = (await grok.data.loadTable(path));
32
30
  peptides.name = 'Peptides';
33
31
  peptides.setTag('dataType', 'peptides');
34
32
  const view = grok.shell.addTableView(peptides);
35
33
  tableGrid = view.grid;
36
- // peptides.onSemanticTypeDetecting.subscribe((_: any) => {
37
- // const regexp = new RegExp(/^([^-^\n]*-){2,49}(\w|\(|\))+$/);
38
- // for (const col of peptides.columns) {
39
- // col.semType = DG.Detector.sampleCategories(col, (s: any) => regexp.test(s.trim())) ? 'alignedSequence' : null;
40
- // if (col.semType == 'alignedSequence') {
41
- // expandColumn(col, tableGrid, (ent)=>{
42
- // const subParts:string[] = ent.split('-');
43
- // // eslint-disable-next-line no-unused-vars
44
- // const [text, _] = processSequence(subParts);
45
- // let textSize = 0;
46
- // text.forEach((aar)=>{
47
- // textSize += aar.length;
48
- // });
49
- // return textSize;
50
- // });
51
- // }
52
- // }
53
- // });
54
-
55
34
  view.name = 'PeptidesView';
56
-
57
35
  grok.shell.windows.showProperties = true;
58
36
 
59
37
  pi.close();
@@ -67,7 +45,6 @@ export function Peptides() {
67
45
 
68
46
  const appDescription = ui.info(
69
47
  [
70
- // ui.divText('\n To start the application :', {style: {'font-weight': 'bolder'}}),
71
48
  ui.list([
72
49
  '- automatic recognition of peptide sequences',
73
50
  '- native integration with tons of Datagrok out-of-the box features (visualization, filtering, clustering, ' +
@@ -197,18 +174,19 @@ export function manualAlignment(monomer: string) {
197
174
  //input: column col {semType: alignedSequence}
198
175
  //output: widget result
199
176
  export async function peptideSpacePanel(col: DG.Column): Promise<DG.Widget> {
200
- const widget = new PeptideSimilaritySpaceWidget(col);
177
+ const widget = new PeptideSimilaritySpaceWidget(col, view ?? grok.shell.v);
201
178
  return await widget.draw();
202
179
  }
203
180
 
204
- //name: Correllation analysis
205
- export async function correlationAnalysis() {
206
- view = (grok.shell.v as DG.TableView);
207
-
208
- const df = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
209
- const tview = grok.shell.addTableView(df);
210
- const [cpviewer, bpviewer] = correlationAnalysisPlots(df.getCol('AlignedSequence'));
211
-
212
- tview.dockManager.dock(cpviewer, 'right');
213
- tview.dockManager.dock(bpviewer, 'down');
181
+ //name: Spiral Plot
182
+ ////input: dataframe table
183
+ ////input: column activity
184
+ //tags: viewer, panel
185
+ //output: viewer result
186
+ export async function spiralPlot(): Promise<DG.Viewer> {//(table: DG.DataFrame, activity: DG.Column) {
187
+ // Read as dataframe
188
+ const table = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
189
+ const activity = await table.columns.addNewCalculated('-log10(Activity)', '0-Log10(${Activity})');
190
+ view = grok.shell.addTableView(table);
191
+ return view.addViewer(SpiralPlot.fromTable(table, {valuesColumnName: activity.name}));
214
192
  }
@@ -0,0 +1,93 @@
1
+ import * as ui from 'datagrok-api/ui';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import {createPeptideSimilaritySpaceViewer} from './utils/peptide-similarity-space';
4
+ import {addViewerToHeader} from './viewers/stacked-barchart-viewer';
5
+
6
+ /**
7
+ * Peptides controller class.
8
+ *
9
+ * @export
10
+ * @class Peptides
11
+ */
12
+ export class Peptides {
13
+ /**
14
+ * Class initializer
15
+ *
16
+ * @param {DG.Grid} tableGrid Working talbe grid.
17
+ * @param {DG.TableView} view Working view.
18
+ * @param {DG.DataFrame} currentDf Working table.
19
+ * @param {{[key: string]: string}} options SAR viewer options
20
+ * @param {DG.Column} col Aligned sequences column.
21
+ * @param {string} activityColumnChoice Activity column name.
22
+ * @memberof Peptides
23
+ */
24
+ async init(
25
+ tableGrid: DG.Grid,
26
+ view: DG.TableView,
27
+ currentDf: DG.DataFrame,
28
+ options: {[key: string]: string},
29
+ col: DG.Column,
30
+ activityColumnChoice: string,
31
+ ) {
32
+ for (let i = 0; i < tableGrid.columns.length; i++) {
33
+ const aarCol = tableGrid.columns.byIndex(i);
34
+ if (aarCol &&
35
+ aarCol.name &&
36
+ aarCol.column?.semType != 'aminoAcids'
37
+ ) {
38
+ //@ts-ignore
39
+ tableGrid.columns.byIndex(i)?.visible = false;
40
+ }
41
+ }
42
+
43
+ const originalDfColumns = (currentDf.columns as DG.ColumnList).names();
44
+
45
+ const sarViewer = view.addViewer('peptide-sar-viewer', options);
46
+ const sarNode = view.dockManager.dock(sarViewer, DG.DOCK_TYPE.DOWN, null, 'SAR Viewer');
47
+
48
+ const sarViewerVertical = view.addViewer('peptide-sar-viewer-vertical');
49
+ view.dockManager.dock(sarViewerVertical, DG.DOCK_TYPE.RIGHT, sarNode, 'SAR Vertical Viewer');
50
+
51
+ const peptideSpaceViewer = await createPeptideSimilaritySpaceViewer(
52
+ currentDf,
53
+ col,
54
+ 't-SNE',
55
+ 'Levenshtein',
56
+ 100,
57
+ view,
58
+ `${activityColumnChoice}Scaled`,
59
+ );
60
+ view.dockManager.dock(peptideSpaceViewer, DG.DOCK_TYPE.LEFT, sarNode, 'Peptide Space Viewer', 0.3);
61
+
62
+ const StackedBarchartProm = currentDf.plot.fromType('StackedBarChartAA');
63
+ addViewerToHeader(tableGrid, StackedBarchartProm);
64
+
65
+ const hideIcon = ui.iconFA('window-close', () => { //undo?, times?
66
+ const viewers = [];
67
+ for (const viewer of view.viewers) {
68
+ if (viewer.type !== DG.VIEWER.GRID) {
69
+ viewers.push(viewer);
70
+ }
71
+ }
72
+ viewers.forEach((v) => v.close());
73
+
74
+ const cols = (currentDf.columns as DG.ColumnList);
75
+ for (const colName of cols.names()) {
76
+ if (!originalDfColumns.includes(colName)) {
77
+ cols.remove(colName);
78
+ }
79
+ }
80
+
81
+ currentDf.selection.setAll(false);
82
+ currentDf.filter.setAll(true);
83
+
84
+ tableGrid.setOptions({'colHeaderHeight': 20});
85
+ tableGrid.columns.setVisible(originalDfColumns);
86
+
87
+ view.setRibbonPanels(ribbonPanels);
88
+ }, 'Close viewers and restore dataframe');
89
+
90
+ const ribbonPanels = view.getRibbonPanels();
91
+ view.setRibbonPanels([[hideIcon]]);
92
+ }
93
+ }