@datagrok/peptides 0.4.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,9 @@
1
1
  /* Do not change these import lines. Datagrok will import API library in exactly the same manner */
2
- //import * as grok from 'datagrok-api/grok';
3
- //import * as ui from 'datagrok-api/ui';
4
2
  import * as DG from 'datagrok-api/dg';
5
3
 
6
4
  import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
7
- import {assert, transposeMatrix} from '@datagrok-libraries/utils/src/operations';
8
- import {Vector, Matrix} from '@datagrok-libraries/utils/src/type-declarations';
5
+ import {assert} from '@datagrok-libraries/utils/src/operations';
6
+ import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
9
7
  import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coefficient';
10
8
 
11
9
  /**
@@ -15,112 +13,192 @@ import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coeffi
15
13
  * @param {Matrix} matrix A matrix.
16
14
  * @return {DG.DataFrame} The data frame.
17
15
  */
18
- export function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
16
+ function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
19
17
  return DG.DataFrame.fromColumns(matrix.map((v, i) => DG.Column.fromFloat32Array(`${i+1}`, v)));
20
18
  }
21
19
 
22
20
  /**
23
- * Encodes amino acid sequences into a numeric representation.
21
+ * Encodes sequence into a certain scale.
24
22
  *
25
- * @param {DG.Column} col A column containing the sequences.
26
- * @return {DG.DataFrame} The resulting data frame.
23
+ * @param {DG.DataFrame} df A data frame containing the sequences.
24
+ * @param {string[]} [positionColumns] If given instructs which columns to consider as sequences containing.
25
+ * @return {DG.DataFrame} The data frame with seqences encoded.
27
26
  */
28
- function calcPositions(col: DG.Column): DG.DataFrame {
29
- const sequences = col.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
30
- const enc = new AlignedSequenceEncoder();
31
- const encSeqs = sequences.map((v) => Vector.from(enc.encode(v)));
32
- const positions = transposeMatrix(encSeqs);
33
- return matrix2DataFrame(positions);
27
+ function encodeSequences(df: DG.DataFrame, positionColumns?: string[]): DG.DataFrame {
28
+ const [nCols, nRows] = [positionColumns ? positionColumns.length : df.columns.length, df.rowCount];
29
+ const enc = new AlignedSequenceEncoder('WimleyWhite');
30
+ const positions = new Array(nCols).fill(0).map((_) => new Float32Array(nRows));
31
+
32
+ for (let i = 0; i < nCols; ++i) {
33
+ const col: DG.Column = positionColumns ? df.getCol(positionColumns[i]) : df.columns.byIndex(i);
34
+
35
+ for (let j = 0; j < nRows; ++j) {
36
+ const letter = col.get(j);
37
+ positions[i][j] = enc.encodeLettter(letter);
38
+ }
39
+ }
40
+ const posDF = DG.DataFrame.fromColumns(positions.map((v, i) => DG.Column.fromFloat32Array(df.columns.names()[i], v)));
41
+ return posDF;
34
42
  }
35
43
 
36
44
  /**
37
- * Unfolds a data frame into <category>-<value> format.
45
+ * Formats an adjacency matrix into <category1>-<category2>-<value> format.
38
46
  *
39
- * @param {DG.DataFrame} df A data frame to unfold.
47
+ * @param {DG.DataFrame} adjMatrix A data matrix to deal with.
40
48
  * @return {DG.DataFrame} The resulting data frame.
41
49
  */
42
- function melt(df: DG.DataFrame): DG.DataFrame {
43
- let keys: string[] = [];
44
- const values: Float32Array = new Float32Array(df.columns.length*df.rowCount);
45
- let i = 0;
46
-
47
- for (const c of df.columns.toList()) {
48
- keys = keys.concat(Array<string>(c.length).fill(c.name));
49
- values.set(c.getRawData(), i);
50
- i += df.rowCount;
50
+ function createNetwork(adjMatrix: DG.DataFrame): DG.DataFrame {
51
+ const nCols = adjMatrix.columns.length;
52
+ const nRows = adjMatrix.rowCount;
53
+
54
+ assert(nCols == nRows);
55
+
56
+ const pos1: Array<number> = [];
57
+ const pos2: Array<number> = [];
58
+ const weight: Array<number> = [];
59
+
60
+ for (let i = 0; i < nCols; ++i) {
61
+ const c = adjMatrix.columns.byIndex(i);
62
+
63
+ for (let j = i+1; j < nRows; ++j) {
64
+ const r = c.getRawData()[j];
65
+
66
+ if (Math.abs(r) > 0) {
67
+ pos1.push(i+1);
68
+ pos2.push(j+1);
69
+ weight.push(r);
70
+ }
71
+ }
51
72
  }
52
- assert(keys.length == values.length);
53
- return DG.DataFrame.fromColumns([DG.Column.fromStrings('keys', keys), DG.Column.fromFloat32Array('values', values)]);
73
+
74
+ const pos1Col = DG.Column.fromList('int', 'pos1', pos1);
75
+ const pos2Col = DG.Column.fromList('int', 'pos2', pos2);
76
+ const weightCol = DG.Column.fromList('double', 'weight', weight);
77
+
78
+ return DG.DataFrame.fromColumns([pos1Col, pos2Col, weightCol]);
54
79
  }
55
80
 
56
81
  /**
57
- * Calculates Spearman's rho rank correlation coefficient.
82
+ * Calculates Kendall's tau rank correlation matrix.
58
83
  *
59
84
  * @param {DG.DataFrame} df A data frame to process.
85
+ * @param {number} [alpha=0.05] The significance threshold.
86
+ * @param {number} [rAbsCutoff=0.5] The absolute R cutoff.
60
87
  * @return {DG.DataFrame} The correlation matrix.
61
88
  */
62
- // eslint-disable-next-line no-unused-vars
63
- function calcSpearmanRhoMatrix(df: DG.DataFrame): DG.DataFrame {
89
+ function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05, rAbsCutoff = 0.5): DG.DataFrame {
64
90
  const nItems = df.columns.length;
65
- const rho = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
91
+ const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
66
92
 
67
93
  for (let i = 0; i < nItems; ++i) {
68
94
  for (let j = i+1; j < nItems; ++j) {
69
- rho[i][j] = df.columns.byIndex(i).stats.spearmanCorr(df.columns.byIndex(j));
70
- rho[j][i] = rho[i][j];
95
+ const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
96
+ tau[i][j] = (res.prob < alpha) && (Math.abs(res.test) >= rAbsCutoff) ? res.test : 0;
97
+ tau[j][i] = tau[i][j];
71
98
  }
72
99
  }
73
- return matrix2DataFrame(rho);
100
+ return matrix2DataFrame(tau);
74
101
  }
75
102
 
76
103
  /**
77
- * Calculates Kendall's tau rank correlation coefficient.
104
+ * Calculates a correlation matrix via method chosen.
78
105
  *
79
- * @param {DG.DataFrame} df A data frame to process.
80
- * @param {number} [alpha=0.05] The significance threshold.
106
+ * @param {DG.DataFrame} df A data frame.
81
107
  * @return {DG.DataFrame} The correlation matrix.
82
108
  */
83
- function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05): DG.DataFrame {
84
- const nItems = df.columns.length;
85
- const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
109
+ function calcCorrelationMatrix(df: DG.DataFrame): DG.DataFrame {
110
+ return calcKendallTauMatrix(df);
111
+ }
86
112
 
87
- for (let i = 0; i < nItems; ++i) {
88
- for (let j = i+1; j < nItems; ++j) {
89
- const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
90
- tau[i][j] = res.prob < alpha ? res.test : 0;
91
- tau[j][i] = tau[i][j];
113
+ type Weights = {[pos: number]: number};
114
+ type Guide = {[pos: number]: Weights};
115
+
116
+ /**
117
+ * Calculates a dictionary with the keys containing the first correlating positions.
118
+ * Values correspond to a dictionary containing the positions and corresponding R-value
119
+ * which the given position correlating with.
120
+ *
121
+ * @param {DG.DataFrame} network A network to process.
122
+ * @return {Guide} The formatted dictionary.
123
+ */
124
+ function calcGuide(network: DG.DataFrame): Guide {
125
+ assert(network.columns.length == 3);
126
+
127
+ const guide: Guide = {};
128
+ let [pos1Col, pos2Col, weightCol] = Array.from(network.columns);
129
+
130
+ pos1Col = pos1Col.getRawData();
131
+ pos2Col = pos2Col.getRawData();
132
+ weightCol = weightCol.getRawData();
133
+
134
+ function _addWeight(pos1: number, pos2: number, weight: number) {
135
+ if (guide[pos1] == undefined) {
136
+ guide[pos1] = {};
92
137
  }
138
+ guide[pos1][pos2] = weight;
93
139
  }
94
- return matrix2DataFrame(tau);
140
+
141
+ for (let i = 0; i < network.rowCount; ++i) {
142
+ const [pos1, pos2, weight] = [pos1Col[i], pos2Col[i], weightCol[i]];
143
+ _addWeight(pos1, pos2, weight);
144
+ _addWeight(pos2, pos1, weight);
145
+ }
146
+ return guide;
147
+ }
148
+
149
+ function calcCorrelations(df: DG.DataFrame, positionColumns?: string[]): Guide {
150
+ const posDF = encodeSequences(df, positionColumns);
151
+ const ccDF = calcCorrelationMatrix(posDF);
152
+ const nwDF = createNetwork(ccDF);
153
+ const guide = calcGuide(nwDF);
154
+ return guide;
95
155
  }
96
156
 
97
157
  /**
98
- * Creates acorrelation plot and a box plot to perform correlation analysis.
158
+ * Formats correlating positions to place in the corresponding tooltips.
159
+ * Higlights correlating positions' headers.
99
160
  *
100
161
  * @export
101
- * @param {DG.Column} sequencesColumn A column containing amino acid sequences.
102
- * @return {[DG.Viewer, DG.Viewer]} These two plots.
162
+ * @class CorrelationAnalysisVisualizer
103
163
  */
104
- export function correlationAnalysisPlots(sequencesColumn: DG.Column): [DG.Viewer, DG.Viewer] {
105
- const posDF = calcPositions(sequencesColumn);
106
- const cpviewer = DG.Viewer.fromType(
107
- DG.VIEWER.CORR_PLOT,
108
- posDF,
109
- {
110
- 'xColumnNames': posDF.columns.names(),
111
- 'yColumnNames': posDF.columns.names(),
112
- 'correlationType': 'Spearman',
113
- });
114
-
115
- const rhoDF = calcKendallTauMatrix(posDF);
116
- const meltDF = melt(rhoDF);
117
-
118
- const bpviewer = DG.Viewer.fromType(
119
- DG.VIEWER.BOX_PLOT,
120
- meltDF, {
121
- 'categoryColumnName': 'keys',
122
- 'valueColumnName': 'values',
123
- 'statistics': ['min', 'max', 'avg', 'med'],
124
- });
125
- return [cpviewer, bpviewer];
164
+ export class CorrelationAnalysisVisualizer {
165
+ protected guide: Guide;
166
+ protected highlightedColumns: number[];
167
+
168
+ /**
169
+ * Creates an instance of CorrelationAnalysisVisualizer.
170
+ * @param {DG.DataFrame} df A data frame to take sequences from.
171
+ * @param {string[]} positionColumns Optional columns list to take the sequences from.
172
+ * @memberof CorrelationAnalysisVisualizer
173
+ */
174
+ constructor(df: DG.DataFrame, positionColumns: string[]) {
175
+ if (df) {
176
+ this.guide = calcCorrelations(df, positionColumns);
177
+ this.highlightedColumns = Object.keys(this.guide).map((v) => parseInt(v));
178
+ } else {
179
+ throw new Error('Dataframe was not found in the grid.');
180
+ }
181
+ }
182
+
183
+ /**
184
+ * Returns a dictionary with the correlating positions and their R-value.
185
+ *
186
+ * @readonly
187
+ * @type {Guide} The dictionary.
188
+ * @memberof CorrelationAnalysisVisualizer
189
+ */
190
+ get path(): Guide {
191
+ return this.guide;
192
+ }
193
+
194
+ /**
195
+ * Checks if the position column name is found among correlelating ones.
196
+ *
197
+ * @param {string} name The name of the column.
198
+ * @return {boolean} True if the position is correlating with any oter.
199
+ * @memberof CorrelationAnalysisVisualizer
200
+ */
201
+ public isPositionCorrelating(name: string): boolean {
202
+ return this.highlightedColumns.includes(parseInt(name));
203
+ }
126
204
  }
@@ -1,6 +1,4 @@
1
1
  /* Do not change these import lines. Datagrok will import API library in exactly the same manner */
2
- // eslint-disable-next-line no-unused-vars
3
- import * as grok from 'datagrok-api/grok';
4
2
  import * as ui from 'datagrok-api/ui';
5
3
  import * as DG from 'datagrok-api/dg';
6
4
 
@@ -10,6 +8,15 @@ import {DimensionalityReducer} from '@datagrok-libraries/utils/src/reduce-dimens
10
8
  import {Measurer} from '@datagrok-libraries/utils/src/string-measure';
11
9
  import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
12
10
 
11
+ /**
12
+ * A worker to perform dimensionality reduction.
13
+ *
14
+ * @param {any[]} columnData The data to process.
15
+ * @param {string} method A method of dimensionality reduction.
16
+ * @param {string} measure A distance metrics.
17
+ * @param {number} cyclesCount Number of iterations to run.
18
+ * @return {Promise<unknown>} Resulting embedding.
19
+ */
13
20
  function createDimensinalityReducingWorker(
14
21
  columnData: any[],
15
22
  method: string,
@@ -40,7 +47,6 @@ function inferActivityColumnsName(table: DG.DataFrame): string | null {
40
47
  const re = /activity|ic50/i;
41
48
  for (const name of table.columns.names()) {
42
49
  if (name.match(re)) {
43
- console.log(`${name} found.`);
44
50
  return name;
45
51
  }
46
52
  }
@@ -103,29 +109,27 @@ export async function createPeptideSimilaritySpaceViewer(
103
109
  // Add new axes.
104
110
  for (const axis of axesNames) {
105
111
  const col = table.col(axis);
112
+ const newCol = edf.getCol(axis);
106
113
 
107
- if (col == null) {
108
- table.columns.insert(edf.getCol(axis));
114
+ if (col != null) {
115
+ for (let i = 0; i < newCol.length; ++i) {
116
+ const v = newCol.get(i);
117
+ table.set(axis, i, v);
118
+ }
109
119
  } else {
110
- table.columns.replace(col, edf.getCol(axis));
120
+ table.columns.insert(newCol);
111
121
  }
112
122
  }
113
123
 
114
- // const viewer = DG.Viewer.scatterPlot(table, {x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW'});
115
124
  const viewerOptions = {x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW'};
116
- const viewer = view !== null ?
117
- view.addViewer(DG.VIEWER.SCATTER_PLOT, viewerOptions) : DG.Viewer.scatterPlot(table, viewerOptions);
118
- // Fit view if needed.
119
- /*if (zoom) {
120
- viewer.zoom(
121
- table.getCol('~X').min,
122
- table.getCol('~Y').min,
123
- table.getCol('~X').max,
124
- table.getCol('~Y').max,
125
- );
126
- }*/
125
+ const viewer = DG.Viewer.scatterPlot(table, viewerOptions);
126
+
127
+ if (view !== null) {
128
+ view.addViewer(viewer);
129
+ }
130
+
127
131
  pi.close();
128
- return (viewer as DG.ScatterPlotViewer);
132
+ return viewer;
129
133
  }
130
134
 
131
135
  /**
@@ -1,5 +1,13 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
 
3
+ /**
4
+ * Split aligned sequence string into separate parts containing amino acid residues.
5
+ *
6
+ * @export
7
+ * @param {DG.Column} peptideColumn Column containing aligned sequences.
8
+ * @param {boolean} [filter=true] Filter out columns with all the same residues.
9
+ * @return {[DG.DataFrame, number[]]} DataFrame containing split sequence and a list of invalid indexes.
10
+ */
3
11
  export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
4
12
  const splitPeptidesArray: string[][] = [];
5
13
  let currentSplitPeptide: string[];
@@ -45,7 +53,6 @@ export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean =
45
53
  columnNames.push('C-terminal');
46
54
 
47
55
  // filter out the columns with the same values
48
-
49
56
  if (filter) {
50
57
  splitColumns = splitColumns.filter((positionArray, index) => {
51
58
  const isRetained = new Set(positionArray).size > 1;
@@ -7,6 +7,13 @@ import * as logojs from 'logojs-react';
7
7
  import {splitAlignedPeptides} from '../utils/split-aligned';
8
8
  import {ChemPalette} from '../utils/chem-palette';
9
9
 
10
+ /**
11
+ * Logo viewer.
12
+ *
13
+ * @export
14
+ * @class Logo
15
+ * @extends {DG.JsViewer}
16
+ */
10
17
  export class Logo extends DG.JsViewer {
11
18
  initialized: boolean;
12
19
  option: any;
@@ -18,6 +25,11 @@ export class Logo extends DG.JsViewer {
18
25
  LET_COLORS: Array<any>;
19
26
  target: DG.DataFrame | undefined | null;
20
27
 
28
+ /**
29
+ * Creates an instance of Logo.
30
+ *
31
+ * @memberof Logo
32
+ */
21
33
  constructor() {
22
34
  super();
23
35
  this.initialized = false;
@@ -59,9 +71,13 @@ export class Logo extends DG.JsViewer {
59
71
  ];
60
72
  }
61
73
 
74
+ /**
75
+ * Initializer function.
76
+ *
77
+ * @memberof Logo
78
+ */
62
79
  init() {
63
80
  this.initialized = true;
64
- // this.reactHost = ui.div([]);
65
81
  console.log('INIT');
66
82
  this.target = this.dataFrame;
67
83
  [this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
@@ -70,6 +86,11 @@ export class Logo extends DG.JsViewer {
70
86
  this.root.style.maxHeight = '200px';
71
87
  }
72
88
 
89
+ /**
90
+ * Function to execute when the table is attached.
91
+ *
92
+ * @memberof Logo
93
+ */
73
94
  onTableAttached() {
74
95
  if (typeof this.dataFrame !== 'undefined') {
75
96
  if (!this.initialized) {
@@ -84,16 +105,32 @@ export class Logo extends DG.JsViewer {
84
105
  this.render();
85
106
  }
86
107
 
108
+ /**
109
+ * Function that is executed when the viewer is detached.
110
+ *
111
+ * @memberof Logo
112
+ */
87
113
  detach() {
88
114
  this.subs.forEach((sub) => sub.unsubscribe());
89
115
  }
90
116
 
117
+ /**
118
+ * Function that is executed when the viewer property is changed.
119
+ *
120
+ * @param {DG.Property} property
121
+ * @memberof Logo
122
+ */
91
123
  onPropertyChanged(property: DG.Property) {
92
124
  super.onPropertyChanged(property);
93
125
 
94
126
  this.render();
95
127
  }
96
128
 
129
+ /**
130
+ * Function that renders the viewer.
131
+ *
132
+ * @memberof Logo
133
+ */
97
134
  async render() {
98
135
  const bits = this.dataFrame!.selection;
99
136
  let selected = false;
@@ -111,18 +148,24 @@ export class Logo extends DG.JsViewer {
111
148
 
112
149
  if (typeof this.dataFrame !== 'undefined') {
113
150
  this.findLogo();
114
-
115
- // if (this.reactHost !== null) {
116
- // this.root.appendChild(this.reactHost);
117
- // }
118
151
  }
119
152
  }
120
153
 
154
+ /**
155
+ * Create logo.
156
+ *
157
+ * @memberof Logo
158
+ */
121
159
  async findLogo() {
122
160
  this.getInfoFromDf();
123
161
  logojs.embedProteinLogo(this.root, {alphabet: this.LET_COLORS, ppm: this.ppm});
124
162
  }
125
163
 
164
+ /**
165
+ * Retrieves information for building logo from the dataframe.
166
+ *
167
+ * @memberof Logo
168
+ */
126
169
  getInfoFromDf() {
127
170
  let index: number = 0;
128
171
  this.ppm = [];
@@ -3,6 +3,11 @@ import * as DG from 'datagrok-api/dg';
3
3
  import {describe} from '../describe';
4
4
  import {Subject} from 'rxjs';
5
5
 
6
+ /**
7
+ * Model class for SAR viewers that retrieves and stores data.
8
+ *
9
+ * @class SARViewerModel
10
+ */
6
11
  class SARViewerModel {
7
12
  private viewerGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
8
13
  private viewerVGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
@@ -21,6 +26,11 @@ class SARViewerModel {
21
26
  private isUpdating = false;
22
27
  grouping: boolean;
23
28
 
29
+ /**
30
+ * Creates an instance of SARViewerModel.
31
+ *
32
+ * @memberof SARViewerModel
33
+ */
24
34
  constructor() {
25
35
  this.dataFrame = null;
26
36
  this.activityColumn = null;
@@ -35,6 +45,18 @@ class SARViewerModel {
35
45
  this.groupMapping$ = this.groupMapping.asObservable();
36
46
  }
37
47
 
48
+ /**
49
+ * Updates data with using specified parameters.
50
+ *
51
+ * @param {DG.DataFrame} df Working table.
52
+ * @param {string} activityCol Activity column name.
53
+ * @param {string} activityScaling Activity scaling method.
54
+ * @param {DG.Grid} sourceGrid Working table grid.
55
+ * @param {boolean} twoColorMode Bidirectional analysis enabled.
56
+ * @param {(DG.BitSet | null)} initialBitset Initial bitset.
57
+ * @param {boolean} grouping Grouping enabled.
58
+ * @memberof SARViewerModel
59
+ */
38
60
  async updateData(
39
61
  df: DG.DataFrame,
40
62
  activityCol: string,
@@ -54,6 +76,11 @@ class SARViewerModel {
54
76
  await this.updateDefault();
55
77
  }
56
78
 
79
+ /**
80
+ * Update data using current parameters.
81
+ *
82
+ * @memberof SARViewerModel
83
+ */
57
84
  async updateDefault() {
58
85
  if (
59
86
  this.dataFrame && this.activityColumn && this.activityScaling &&