@datagrok/peptides 0.4.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ // import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import $ from 'cash-dom';
6
+
7
+ // import {aarGroups} from '../describe';
8
+ import {setAARRenderer} from '../utils/cell-renderer';
9
+
10
+ export class SubstViewer extends DG.JsViewer {
11
+ viewerGrid: DG.Grid | null;
12
+ maxSubstitutions: number;
13
+ activityLimit: number;
14
+ activityColumnName: string;
15
+ casesGrid: DG.Grid | null;
16
+
17
+ constructor() {
18
+ super();
19
+
20
+ this.activityColumnName = this.string('activityColumnName');
21
+
22
+ this.maxSubstitutions = this.int('maxSubstitutions', 1);
23
+ this.activityLimit = this.float('activityLimit', 2);
24
+
25
+ this.viewerGrid = null;
26
+ this.casesGrid = null;
27
+ }
28
+
29
+ onPropertyChanged(property: DG.Property): void {
30
+ this.calcSubstitutions();
31
+ }
32
+
33
+ calcSubstitutions() {
34
+ const aarColName = 'AAR';
35
+ const df: DG.DataFrame = this.dataFrame!;
36
+ const col: DG.Column = df.columns.bySemType('alignedSequence');
37
+ // let values: number[] = df.columns.byName('IC50').toList();
38
+ const values = df.getCol(this.activityColumnName).toList().map((x) => -Math.log10(x));
39
+ // values = values;
40
+ const splitedMatrix = this.split(col);
41
+
42
+ const tableValues: { [aar: string]: number[] } = {};
43
+ const tableTooltips: { [aar: string]: {[index: string]: string}[][] } = {};
44
+ const tableCases: { [aar: string]: number[][][] } = {};
45
+
46
+ const nRows = splitedMatrix.length;
47
+ const nCols = splitedMatrix[0].length;
48
+ const nColsArray = Array(nCols);
49
+
50
+ for (let i = 0; i < nRows - 1; i++) {
51
+ for (let j = i + 1; j < nRows; j++) {
52
+ let substCounter = 0;
53
+ const subst1: { [pos: number]: [string, {[index: string]: string}] } = {};
54
+ const subst2: { [pos: number]: [string, {[index: string]: string}] } = {};
55
+ const delta = values[i] - values[j];
56
+
57
+ for (let k = 0; k < nCols; k++) {
58
+ const smik = splitedMatrix[i][k];
59
+ const smjk = splitedMatrix[j][k];
60
+ if (smik != smjk && Math.abs(delta) >= this.activityLimit) {
61
+ const vi = values[i].toFixed(2);
62
+ const vj = values[j].toFixed(2);
63
+ substCounter++;
64
+ subst1[k] = [
65
+ smik,
66
+ {key: `${smik === '-' ? 'Empty' : smik} → ${smjk === '-' ? 'Empty' : smjk}`, value: `${vi} → ${vj}`},
67
+ ];
68
+ subst2[k] = [
69
+ smjk,
70
+ {key: `${smjk === '-' ? 'Empty' : smjk} → ${smik === '-' ? 'Empty' : smik}`, value: `${vj} → ${vi}`},
71
+ ];
72
+ }
73
+ }
74
+
75
+ if (substCounter <= this.maxSubstitutions && substCounter > 0) {
76
+ Object.keys(subst1).forEach((pos) => {
77
+ const posInt = parseInt(pos);
78
+ const aar = subst1[posInt][0];
79
+ if (!Object.keys(tableValues).includes(aar)) {
80
+ tableValues[aar] = Array.apply(null, nColsArray).map(function() {
81
+ return DG.INT_NULL;
82
+ });
83
+ tableTooltips[aar] = Array.apply(null, nColsArray).map(function() {
84
+ return [];
85
+ });
86
+ tableCases[aar] = Array.apply(null, nColsArray).map(function() {
87
+ return [];
88
+ });
89
+ }
90
+
91
+ tableValues[aar][posInt] = tableValues[aar][posInt] === DG.INT_NULL ? 1 : tableValues[aar][posInt] + 1;
92
+ tableTooltips[aar][posInt] = !tableTooltips[aar][posInt].length ?
93
+ [{key: 'Substitution', value: 'Values'}] : tableTooltips[aar][posInt];
94
+ tableTooltips[aar][posInt].push(subst1[posInt][1]);
95
+ tableCases[aar][posInt].push([i, j, delta]);
96
+ });
97
+ Object.keys(subst2).forEach((pos) => {
98
+ const posInt = parseInt(pos);
99
+ const aar = subst2[posInt][0];
100
+ if (!Object.keys(tableValues).includes(aar)) {
101
+ tableValues[aar] = Array.apply(null, nColsArray).map(function() {
102
+ return DG.INT_NULL;
103
+ });
104
+ tableTooltips[aar] = Array.apply(null, nColsArray).map(function() {
105
+ return [];
106
+ });
107
+ tableCases[aar] = Array.apply(null, nColsArray).map(function() {
108
+ return [];
109
+ });
110
+ }
111
+
112
+ tableValues[aar][posInt] = tableValues[aar][posInt] === DG.INT_NULL ? 1 : tableValues[aar][posInt] + 1;
113
+ // tableValues[aar][posInt]++;
114
+ tableTooltips[aar][posInt] = !tableTooltips[aar][posInt].length ?
115
+ [{key: 'Substitution', value: 'Values'}] : tableTooltips[aar][posInt];
116
+ tableTooltips[aar][posInt].push(subst2[posInt][1]);
117
+ tableCases[aar][posInt].push([j, i, -delta]);
118
+ });
119
+ }
120
+ }
121
+ }
122
+
123
+ const tableValuesKeys = Object.keys(tableValues);
124
+ const dfLength = tableValuesKeys.length;
125
+ const cols = [...nColsArray.keys()].map((v) => DG.Column.int(v.toString(), dfLength));
126
+ const aarCol = DG.Column.string(aarColName, dfLength);
127
+ cols.splice(0, 1, aarCol);
128
+ const table = DG.DataFrame.fromColumns(cols);
129
+
130
+ for (let i = 0; i < dfLength; i++) {
131
+ const aar = tableValuesKeys[i];
132
+ tableValues[aar].splice(0, 1);
133
+ table.rows.setValues(i, [aar, ...tableValues[aar]]);
134
+ }
135
+
136
+ // let groupMapping: { [key: string]: string } = {};
137
+
138
+ //TODO: enable grouping
139
+ // Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
140
+
141
+ this.viewerGrid = table.plot.grid();
142
+
143
+ setAARRenderer(aarCol, this.viewerGrid);
144
+
145
+ this.viewerGrid.onCellTooltip(
146
+ (gCell, x, y) => {
147
+ if (gCell.cell.value !== DG.INT_NULL && gCell.tableColumn !== null && gCell.tableRowIndex !== null) {
148
+ const colName = gCell.tableColumn.name;
149
+ if (colName !== aarColName) {
150
+ const aar = this.viewerGrid!.table.get(aarColName, gCell.tableRowIndex);
151
+ const pos = parseInt(colName);
152
+ const tooltipText = tableTooltips[aar][pos].length ?
153
+ DG.HtmlTable.create(
154
+ tableTooltips[aar][pos], (item: {[index: string]: string}, idx: number) => [item.key, item.value],
155
+ ).root : ui.divText('No substitutions');
156
+ ui.tooltip.show(tooltipText, x, y);
157
+ }
158
+ }
159
+ return true;
160
+ },
161
+ );
162
+
163
+ for (const col of table.columns.names()) {
164
+ this.viewerGrid.col(col)!.width = this.viewerGrid.props.rowHeight;
165
+ }
166
+
167
+ this.viewerGrid.onCellRender.subscribe((args) => {
168
+ if (args.cell.isRowHeader && args.cell.gridColumn.visible) {
169
+ args.cell.gridColumn.visible = false;
170
+ args.preventDefault();
171
+ }
172
+ });
173
+
174
+ this.viewerGrid.props.allowEdit = false;
175
+
176
+ table.onCurrentCellChanged.subscribe((_) => {
177
+ if (table.currentCol !== null && table.currentCol.name !== aarColName && table.currentCell.value !== null) {
178
+ const aar = table.get(aarColName, table.currentRowIdx);
179
+ const pos = parseInt(table.currentCol.name);
180
+ const currentCase = tableCases[aar][pos];
181
+ const tempDfLength = currentCase.length;
182
+ const initCol = DG.Column.string('Initial', tempDfLength);
183
+ const subsCol = DG.Column.string('Substituted', tempDfLength);
184
+
185
+ const tempDf = DG.DataFrame.fromColumns([
186
+ initCol,
187
+ subsCol,
188
+ DG.Column.float('Difference', tempDfLength),
189
+ ]);
190
+
191
+ for (let i = 0; i < tempDfLength; i++) {
192
+ const row = currentCase[i];
193
+ tempDf.rows.setValues(i, [col.get(row[0]), col.get(row[1]), row[2]]);
194
+ }
195
+
196
+ initCol.semType = 'alignedSequence';
197
+ initCol.setTag('isAnalysisApplicable', 'false');
198
+ subsCol.semType = 'alignedSequence';
199
+ subsCol.setTag('isAnalysisApplicable', 'false');
200
+
201
+ this.casesGrid = tempDf.plot.grid();
202
+ this.casesGrid.props.allowEdit = false;
203
+ } else {
204
+ this.casesGrid = null;
205
+ }
206
+ this.render();
207
+ });
208
+
209
+ this.render();
210
+ }
211
+
212
+ render() {
213
+ $(this.root).empty();
214
+ this.root.appendChild(this.casesGrid === null ?
215
+ this.viewerGrid!.root : ui.splitH([this.viewerGrid!.root, this.casesGrid.root]),
216
+ );
217
+ }
218
+
219
+ split(peptideColumn: DG.Column, filter: boolean = true): string[][] {
220
+ const splitPeptidesArray: string[][] = [];
221
+ let currentSplitPeptide: string[];
222
+ let modeMonomerCount = 0;
223
+ let currentLength;
224
+ const colLength = peptideColumn.length;
225
+
226
+ // splitting data
227
+ const monomerLengths: { [index: string]: number } = {};
228
+ for (let i = 0; i < colLength; i++) {
229
+ currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
230
+ splitPeptidesArray.push(currentSplitPeptide);
231
+ currentLength = currentSplitPeptide.length;
232
+ monomerLengths[currentLength + ''] =
233
+ monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
234
+ }
235
+ //@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
236
+ modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
237
+
238
+ // making sure all of the sequences are of the same size
239
+ // and marking invalid sequences
240
+ let nTerminal: string;
241
+ const invalidIndexes: number[] = [];
242
+ let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
243
+ modeMonomerCount--; // minus N-terminal
244
+ for (let i = 0; i < colLength; i++) {
245
+ currentSplitPeptide = splitPeptidesArray[i];
246
+ nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
247
+ currentLength = currentSplitPeptide.length;
248
+ if (currentLength !== modeMonomerCount) {
249
+ invalidIndexes.push(i);
250
+ }
251
+ for (let j = 0; j < modeMonomerCount; j++) {
252
+ splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
253
+ }
254
+ splitColumns[modeMonomerCount].push(nTerminal);
255
+ }
256
+ modeMonomerCount--; // minus C-terminal
257
+
258
+ //create column names list
259
+ const columnNames = Array.from({length: modeMonomerCount}, (_, index) => `${index + 1 < 10 ? 0 : ''}${index + 1}`);
260
+ columnNames.splice(0, 0, 'N-terminal');
261
+ columnNames.push('C-terminal');
262
+
263
+ // filter out the columns with the same values
264
+ if (filter) {
265
+ splitColumns = splitColumns.filter((positionArray, index) => {
266
+ const isRetained = new Set(positionArray).size > 1;
267
+ if (!isRetained) {
268
+ columnNames.splice(index, 1);
269
+ }
270
+ return isRetained;
271
+ });
272
+ }
273
+
274
+ return splitPeptidesArray;
275
+ }
276
+ }
@@ -2,7 +2,18 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
  import {Peptides} from '../peptides';
5
+ import '../styles.css';
5
6
 
7
+ /**
8
+ * Peptide analysis widget.
9
+ *
10
+ * @export
11
+ * @param {DG.Column} col Aligned sequence column.
12
+ * @param {DG.TableView} view Working view.
13
+ * @param {DG.Grid} tableGrid Working table grid.
14
+ * @param {DG.DataFrame} currentDf Working table.
15
+ * @return {Promise<DG.Widget>} Widget containing peptide analysis.
16
+ */
6
17
  export async function analyzePeptidesWidget(
7
18
  col: DG.Column, view: DG.TableView, tableGrid: DG.Grid, currentDf: DG.DataFrame,
8
19
  ): Promise<DG.Widget> {
@@ -11,12 +22,12 @@ export async function analyzePeptidesWidget(
11
22
  tempCol = column.type === DG.TYPE.FLOAT ? column : null;
12
23
  }
13
24
  const defaultColumn: DG.Column = currentDf.col('activity') || currentDf.col('IC50') || tempCol;
14
- const histogramHost = ui.div([]);
25
+ const histogramHost = ui.div([], {id: 'pep-hist-host'});
15
26
 
16
27
  let hist: DG.Viewer;
17
28
 
18
29
  const activityScalingMethod = ui.choiceInput(
19
- 'Activity scaling',
30
+ 'Scaling',
20
31
  'none',
21
32
  ['none', 'lg', '-lg'],
22
33
  async (currentMethod: string) => {
@@ -41,6 +52,7 @@ export async function analyzePeptidesWidget(
41
52
  showXAxis: true,
42
53
  showColumnSelector: false,
43
54
  showRangeSlider: false,
55
+ showBinSelector: false,
44
56
  // bins: b,
45
57
  });
46
58
  histogramHost.lastChild?.remove();
@@ -54,7 +66,7 @@ export async function analyzePeptidesWidget(
54
66
  activityScalingMethod.fireChanged();
55
67
  };
56
68
  const activityColumnChoice = ui.columnInput(
57
- 'Activity column',
69
+ 'Activity',
58
70
  currentDf,
59
71
  defaultColumn,
60
72
  activityScalingMethodState,
@@ -66,8 +78,8 @@ export async function analyzePeptidesWidget(
66
78
  if (activityColumnChoice.value.type === DG.TYPE.FLOAT) {
67
79
  const progress = DG.TaskBarProgressIndicator.create('Loading SAR...');
68
80
  const options: {[key: string]: string} = {
69
- 'activityColumnColumnName': activityColumnChoice.value.name,
70
- 'activityScalingMethod': activityScalingMethod.value,
81
+ 'activityColumnName': activityColumnChoice.value.name,
82
+ 'scaling': activityScalingMethod.value,
71
83
  };
72
84
 
73
85
  const peptides = new Peptides();
@@ -78,10 +90,18 @@ export async function analyzePeptidesWidget(
78
90
  grok.shell.error('The activity column must be of floating point number type!');
79
91
  }
80
92
  });
93
+ startBtn.style.alignSelf = 'center';
81
94
 
82
95
  const viewer = await currentDf.plot.fromType('peptide-logo-viewer');
83
96
 
84
97
  return new DG.Widget(
85
- ui.divV([viewer.root, ui.inputs([activityColumnChoice, activityScalingMethod]), startBtn, histogramHost]),
98
+ ui.divV([
99
+ viewer.root,
100
+ ui.splitH([
101
+ ui.splitV([ui.inputs([activityColumnChoice, activityScalingMethod]), startBtn]),
102
+ histogramHost,
103
+ ], {style: {height: 'unset'}}),
104
+ // histogramHost,
105
+ ]),
86
106
  );
87
107
  }
@@ -2,13 +2,20 @@ import * as ui from 'datagrok-api/ui';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
4
  import $ from 'cash-dom';
5
- import {model} from '../viewers/model';
5
+ import {Peptides} from '../peptides';
6
6
  import {splitAlignedPeptides} from '../utils/split-aligned';
7
+ import '../styles.css';
7
8
 
9
+ /**
10
+ * Manual sequence alignment widget.
11
+ *
12
+ * @param {DG.Column} alignedSequenceCol Aligned sequence column.
13
+ * @param {DG.DataFrame} currentDf Working table.
14
+ * @return {DG.Widget} Widget for manual sequence alignment.
15
+ */
8
16
  export function manualAlignmentWidget(alignedSequenceCol: DG.Column, currentDf: DG.DataFrame) {
9
17
  const sequenceInput = ui.textInput('', alignedSequenceCol.get(currentDf.currentRowIdx));
10
- (sequenceInput.input as HTMLElement).style.height = '50px';
11
- (sequenceInput.input as HTMLElement).style.overflow = 'hidden';
18
+ $(sequenceInput.root).addClass('pep-textinput');
12
19
 
13
20
  const applyChangesBtn = ui.button('Apply', async () => {
14
21
  const newSequence = sequenceInput.value;
@@ -22,7 +29,8 @@ export function manualAlignmentWidget(alignedSequenceCol: DG.Column, currentDf:
22
29
  }
23
30
  }
24
31
 
25
- await model.updateDefault();
32
+ // await model.updateDefault();
33
+ await Peptides.recalculate();
26
34
  });
27
35
 
28
36
  const resetBtn = ui.button(
@@ -30,7 +38,7 @@ export function manualAlignmentWidget(alignedSequenceCol: DG.Column, currentDf:
30
38
  () => sequenceInput.value = alignedSequenceCol.get(currentDf.currentRowIdx),
31
39
  'Reset',
32
40
  );
33
- $(resetBtn).addClass('dt-snippet-editor-icon dt-reset-icon');
41
+ $(resetBtn).addClass('pep-snippet-editor-icon pep-reset-icon');
34
42
 
35
- return new DG.Widget(ui.divV([resetBtn, sequenceInput.root, applyChangesBtn], 'dt-textarea-box'));
43
+ return new DG.Widget(ui.divV([resetBtn, sequenceInput.root, applyChangesBtn], 'pep-textarea-box'));
36
44
  }
@@ -3,6 +3,13 @@ import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
  import {ChemPalette} from '../utils/chem-palette';
5
5
 
6
+ /**
7
+ * 3D representation widget of peptide molecule.
8
+ *
9
+ * @export
10
+ * @param {string} pep Peptide string.
11
+ * @return {Promise<DG.Widget>} Widget.
12
+ */
6
13
  export async function peptideMoleculeWidget(pep: string): Promise<DG.Widget> {
7
14
  const pi = DG.TaskBarProgressIndicator.create('Creating NGL view');
8
15
 
@@ -1,4 +1,4 @@
1
- import {DimensionalityReducer} from '@datagrok-libraries/utils/src/reduce-dimensionality';
1
+ import {DimensionalityReducer, KnownMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
2
2
  import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
3
3
 
4
4
  /**
@@ -10,7 +10,7 @@ import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
10
10
  * @param {number} cyclesCount Number of cycles to repeat.
11
11
  * @return {Coordinates} Embedding.
12
12
  */
13
- function onMessage(columnData: [], method: string, measure: string, cyclesCount: number): Coordinates {
13
+ function onMessage(columnData: [], method: KnownMethods, measure: string, cyclesCount: number): Coordinates {
14
14
  const reducer = new DimensionalityReducer(
15
15
  columnData,
16
16
  method,
package/webpack.config.js CHANGED
@@ -12,10 +12,14 @@ module.exports = {
12
12
  use: 'ts-loader',
13
13
  exclude: /node_modules/,
14
14
  },
15
+ {
16
+ test: /\.css$/i,
17
+ use: ['style-loader', 'css-loader'],
18
+ },
15
19
  ],
16
20
  },
17
21
  resolve: {
18
- extensions: ['.tsx', '.js', '.ts'],
22
+ extensions: ['.tsx', '.ts', '.js'],
19
23
  },
20
24
  devtool: 'inline-source-map',
21
25
  externals: {
@@ -1,123 +0,0 @@
1
- /* Do not change these import lines. Datagrok will import API library in exactly the same manner */
2
- import * as DG from 'datagrok-api/dg';
3
-
4
- import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
5
- import {assert, transposeMatrix} from '@datagrok-libraries/utils/src/operations';
6
- import {Vector, Matrix} from '@datagrok-libraries/utils/src/type-declarations';
7
- import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coefficient';
8
-
9
- /**
10
- * Converts a Matrix into a DataFrame.
11
- *
12
- * @export
13
- * @param {Matrix} matrix A matrix.
14
- * @return {DG.DataFrame} The data frame.
15
- */
16
- export function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
17
- return DG.DataFrame.fromColumns(matrix.map((v, i) => DG.Column.fromFloat32Array(`${i+1}`, v)));
18
- }
19
-
20
- /**
21
- * Encodes amino acid sequences into a numeric representation.
22
- *
23
- * @param {DG.Column} col A column containing the sequences.
24
- * @return {DG.DataFrame} The resulting data frame.
25
- */
26
- function calcPositions(col: DG.Column): DG.DataFrame {
27
- const sequences = col.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
28
- const enc = new AlignedSequenceEncoder();
29
- const encSeqs = sequences.map((v) => Vector.from(enc.encode(v)));
30
- const positions = transposeMatrix(encSeqs);
31
- return matrix2DataFrame(positions);
32
- }
33
-
34
- /**
35
- * Unfolds a data frame into <category>-<value> format.
36
- *
37
- * @param {DG.DataFrame} df A data frame to unfold.
38
- * @return {DG.DataFrame} The resulting data frame.
39
- */
40
- function melt(df: DG.DataFrame): DG.DataFrame {
41
- let keys: string[] = [];
42
- const values: Float32Array = new Float32Array(df.columns.length*df.rowCount);
43
- let i = 0;
44
-
45
- for (const c of df.columns.toList()) {
46
- keys = keys.concat(Array<string>(c.length).fill(c.name));
47
- values.set(c.getRawData(), i);
48
- i += df.rowCount;
49
- }
50
- assert(keys.length == values.length);
51
- return DG.DataFrame.fromColumns([DG.Column.fromStrings('keys', keys), DG.Column.fromFloat32Array('values', values)]);
52
- }
53
-
54
- /**
55
- * Calculates Spearman's rho rank correlation coefficient.
56
- *
57
- * @param {DG.DataFrame} df A data frame to process.
58
- * @return {DG.DataFrame} The correlation matrix.
59
- */
60
- function calcSpearmanRhoMatrix(df: DG.DataFrame): DG.DataFrame {
61
- const nItems = df.columns.length;
62
- const rho = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
63
-
64
- for (let i = 0; i < nItems; ++i) {
65
- for (let j = i+1; j < nItems; ++j) {
66
- rho[i][j] = df.columns.byIndex(i).stats.spearmanCorr(df.columns.byIndex(j));
67
- rho[j][i] = rho[i][j];
68
- }
69
- }
70
- return matrix2DataFrame(rho);
71
- }
72
-
73
- /**
74
- * Calculates Kendall's tau rank correlation coefficient.
75
- *
76
- * @param {DG.DataFrame} df A data frame to process.
77
- * @param {number} [alpha=0.05] The significance threshold.
78
- * @return {DG.DataFrame} The correlation matrix.
79
- */
80
- function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05): DG.DataFrame {
81
- const nItems = df.columns.length;
82
- const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
83
-
84
- for (let i = 0; i < nItems; ++i) {
85
- for (let j = i+1; j < nItems; ++j) {
86
- const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
87
- tau[i][j] = res.prob < alpha ? res.test : 0;
88
- tau[j][i] = tau[i][j];
89
- }
90
- }
91
- return matrix2DataFrame(tau);
92
- }
93
-
94
- /**
95
- * Creates acorrelation plot and a box plot to perform correlation analysis.
96
- *
97
- * @export
98
- * @param {DG.Column} sequencesColumn A column containing amino acid sequences.
99
- * @return {[DG.Viewer, DG.Viewer]} These two plots.
100
- */
101
- export function correlationAnalysisPlots(sequencesColumn: DG.Column): [DG.Viewer, DG.Viewer] {
102
- const posDF = calcPositions(sequencesColumn);
103
- const cpviewer = DG.Viewer.fromType(
104
- DG.VIEWER.CORR_PLOT,
105
- posDF,
106
- {
107
- 'xColumnNames': posDF.columns.names(),
108
- 'yColumnNames': posDF.columns.names(),
109
- 'correlationType': 'Spearman',
110
- });
111
-
112
- const rhoDF = calcKendallTauMatrix(posDF);
113
- const meltDF = melt(rhoDF);
114
-
115
- const bpviewer = DG.Viewer.fromType(
116
- DG.VIEWER.BOX_PLOT,
117
- meltDF, {
118
- 'categoryColumnName': 'keys',
119
- 'valueColumnName': 'values',
120
- 'statistics': ['min', 'max', 'avg', 'med'],
121
- });
122
- return [cpviewer, bpviewer];
123
- }