@datagrok/peptides 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@datagrok/peptides",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "",
5
5
  "dependencies": {
6
6
  "@keckelt/tsne": "^1.0.2",
@@ -13,7 +13,7 @@
13
13
  "logojs-react": "^2.1.1",
14
14
  "rxjs": "^6.5.5",
15
15
  "umap-js": "^1.3.3",
16
- "@datagrok-libraries/utils": ">=0.0.9",
16
+ "@datagrok-libraries/utils": ">=0.0.10",
17
17
  "@datagrok-libraries/statistics": ">=0.1.4",
18
18
  "@types/d3": "^7.0.0",
19
19
  "@types/jquery": "^3.5.6"
@@ -52,4 +52,4 @@
52
52
  "lint": "eslint \"./src/**/*.ts\"",
53
53
  "lint-fix": "eslint \"./src/**/*.ts\" --fix"
54
54
  }
55
- }
55
+ }
package/src/package.ts CHANGED
@@ -14,7 +14,8 @@ import {analyzePeptidesWidget} from './widgets/analyze-peptides';
14
14
  import {PeptideSimilaritySpaceWidget} from './utils/peptide-similarity-space';
15
15
  import {manualAlignmentWidget} from './widgets/manual-alignment';
16
16
  import {SARViewer, SARViewerVertical} from './viewers/sar-viewer';
17
- import { peptideMoleculeWidget } from './widgets/peptide-molecule';
17
+ import {peptideMoleculeWidget} from './widgets/peptide-molecule';
18
+ import {correlationAnalysisPlots} from './utils/correlation-analysis';
18
19
 
19
20
  export const _package = new DG.Package();
20
21
  let tableGrid: DG.Grid;
@@ -199,3 +200,15 @@ export async function peptideSpacePanel(col: DG.Column): Promise<DG.Widget> {
199
200
  const widget = new PeptideSimilaritySpaceWidget(col);
200
201
  return await widget.draw();
201
202
  }
203
+
204
+ //name: Correllation analysis
205
+ export async function correlationAnalysis() {
206
+ view = (grok.shell.v as DG.TableView);
207
+
208
+ const df = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
209
+ const tview = grok.shell.addTableView(df);
210
+ const [cpviewer, bpviewer] = correlationAnalysisPlots(df.getCol('AlignedSequence'));
211
+
212
+ tview.dockManager.dock(cpviewer, 'right');
213
+ tview.dockManager.dock(bpviewer, 'down');
214
+ }
@@ -0,0 +1,109 @@
1
+ /* Do not change these import lines. Datagrok will import API library in exactly the same manner */
2
+ //import * as grok from 'datagrok-api/grok';
3
+ //import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
7
+ import {assert, transposeMatrix, matrix2DataFrame} from '@datagrok-libraries/utils/src/operations';
8
+ import {Vector} from '@datagrok-libraries/utils/src/type_declarations';
9
+
10
+ /**
11
+ * Encodes amino acid sequences into a numeric representation.
12
+ *
13
+ * @export
14
+ * @param {DG.Column} col A column containing the sequences.
15
+ * @return {DG.DataFrame} The resulting data frame.
16
+ */
17
+ export function calcPositions(col: DG.Column): DG.DataFrame {
18
+ const sequences = col.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
19
+ const enc = new AlignedSequenceEncoder();
20
+ const encSeqs = sequences.map((v) => Vector.from(enc.encode(v)));
21
+ const positions = transposeMatrix(encSeqs);
22
+ return matrix2DataFrame(positions);
23
+ }
24
+
25
+ /**
26
+ * Unfolds a data frame into <category>-<value> format.
27
+ *
28
+ * @export
29
+ * @param {DG.DataFrame} df A data frame to unfold.
30
+ * @return {DG.DataFrame} The resulting data frame.
31
+ */
32
+ export function melt(df: DG.DataFrame): DG.DataFrame {
33
+ let keys: string[] = [];
34
+ const values: Float32Array = new Float32Array(df.columns.length*df.rowCount);
35
+ let i = 0;
36
+
37
+ for (const c of df.columns.toList()) {
38
+ keys = keys.concat(Array<string>(c.length).fill(c.name));
39
+ values.set(c.getRawData(), i);
40
+ i += df.rowCount;
41
+ }
42
+ assert(keys.length == values.length);
43
+ return DG.DataFrame.fromColumns([DG.Column.fromStrings('keys', keys), DG.Column.fromFloat32Array('values', values)]);
44
+ }
45
+
46
+ /*export async function calcSpearmanRhoMatrixExt(positions: Matrix): Promise<Matrix> {
47
+ const Spearman = require('spearman-rho');
48
+ const nItems = positions.length;
49
+ const rho = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
50
+
51
+ for (let i = 0; i < nItems; ++i) {
52
+ for (let j = i+1; j < nItems; ++j) {
53
+ rho[i][j] = await(new Spearman(positions[i], positions[j])).calc();
54
+ rho[j][i] = rho[i][j];
55
+ }
56
+ }
57
+ return rho;
58
+ }*/
59
+
60
+ /**
61
+ * Calculates Spearman's rho rank correlation coefficient.
62
+ *
63
+ * @export
64
+ * @param {DG.DataFrame} df A data frame to process.
65
+ * @return {DG.DataFrame} The correlation matrix.
66
+ */
67
+ export function calcSpearmanRhoMatrix(df: DG.DataFrame): DG.DataFrame {
68
+ const nItems = df.columns.length;
69
+ const rho = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
70
+
71
+ for (let i = 0; i < nItems; ++i) {
72
+ for (let j = i+1; j < nItems; ++j) {
73
+ rho[i][j] = df.columns.byIndex(i).stats.spearmanCorr(df.columns.byIndex(j));
74
+ rho[j][i] = rho[i][j];
75
+ }
76
+ }
77
+ return matrix2DataFrame(rho);
78
+ }
79
+
80
+ /**
81
+ * Creates acorrelation plot and a box plot to perform correlation analysis.
82
+ *
83
+ * @export
84
+ * @param {DG.Column} sequencesColumn A column containing amino acid sequences.
85
+ * @return {[DG.Viewer, DG.Viewer]} These two plots.
86
+ */
87
+ export function correlationAnalysisPlots(sequencesColumn: DG.Column): [DG.Viewer, DG.Viewer] {
88
+ const posDF = calcPositions(sequencesColumn);
89
+ const cpviewer = DG.Viewer.fromType(
90
+ DG.VIEWER.CORR_PLOT,
91
+ posDF,
92
+ {
93
+ 'xColumnNames': posDF.columns.names(),
94
+ 'yColumnNames': posDF.columns.names(),
95
+ 'correlationType': 'Spearman',
96
+ });
97
+
98
+ const rhoDF = calcSpearmanRhoMatrix(posDF);
99
+ const meltDF = melt(rhoDF);
100
+
101
+ const bpviewer = DG.Viewer.fromType(
102
+ DG.VIEWER.BOX_PLOT,
103
+ meltDF, {
104
+ 'categoryColumnName': 'keys',
105
+ 'valueColumnName': 'values',
106
+ 'statistics': ['min', 'max', 'avg', 'med'],
107
+ });
108
+ return [cpviewer, bpviewer];
109
+ }