@datagrok/peptides 0.6.1 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +1 -1
- package/package.json +11 -8
- package/src/describe.ts +184 -83
- package/src/{viewers/model.ts → model.ts} +1 -1
- package/src/package.ts +0 -14
- package/src/peptides.ts +27 -37
- package/src/styles.css +9 -0
- package/src/utils/peptide-similarity-space.ts +7 -33
- package/src/viewers/sar-viewer.ts +191 -180
- package/src/viewers/stacked-barchart-viewer.ts +0 -69
- package/src/widgets/analyze-peptides.ts +12 -2
- package/src/widgets/manual-alignment.ts +3 -2
- package/src/workers/dimensionality-reducer.ts +2 -2
- package/webpack.config.js +1 -1
- package/src/utils/correlation-analysis.ts +0 -204
- package/src/viewers/spiral-plot.ts +0 -97
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
/* Do not change these import lines. Datagrok will import API library in exactly the same manner */
|
|
2
|
-
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
|
|
4
|
-
import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
|
|
5
|
-
import {assert} from '@datagrok-libraries/utils/src/operations';
|
|
6
|
-
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
7
|
-
import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coefficient';
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Converts a Matrix into a DataFrame.
|
|
11
|
-
*
|
|
12
|
-
* @export
|
|
13
|
-
* @param {Matrix} matrix A matrix.
|
|
14
|
-
* @return {DG.DataFrame} The data frame.
|
|
15
|
-
*/
|
|
16
|
-
function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
|
|
17
|
-
return DG.DataFrame.fromColumns(matrix.map((v, i) => DG.Column.fromFloat32Array(`${i+1}`, v)));
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Encodes sequence into a certain scale.
|
|
22
|
-
*
|
|
23
|
-
* @param {DG.DataFrame} df A data frame containing the sequences.
|
|
24
|
-
* @param {string[]} [positionColumns] If given instructs which columns to consider as sequences containing.
|
|
25
|
-
* @return {DG.DataFrame} The data frame with seqences encoded.
|
|
26
|
-
*/
|
|
27
|
-
function encodeSequences(df: DG.DataFrame, positionColumns?: string[]): DG.DataFrame {
|
|
28
|
-
const [nCols, nRows] = [positionColumns ? positionColumns.length : df.columns.length, df.rowCount];
|
|
29
|
-
const enc = new AlignedSequenceEncoder('WimleyWhite');
|
|
30
|
-
const positions = new Array(nCols).fill(0).map((_) => new Float32Array(nRows));
|
|
31
|
-
|
|
32
|
-
for (let i = 0; i < nCols; ++i) {
|
|
33
|
-
const col: DG.Column = positionColumns ? df.getCol(positionColumns[i]) : df.columns.byIndex(i);
|
|
34
|
-
|
|
35
|
-
for (let j = 0; j < nRows; ++j) {
|
|
36
|
-
const letter = col.get(j);
|
|
37
|
-
positions[i][j] = enc.encodeLettter(letter);
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
const posDF = DG.DataFrame.fromColumns(positions.map((v, i) => DG.Column.fromFloat32Array(df.columns.names()[i], v)));
|
|
41
|
-
return posDF;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Formats an adjacency matrix into <category1>-<category2>-<value> format.
|
|
46
|
-
*
|
|
47
|
-
* @param {DG.DataFrame} adjMatrix A data matrix to deal with.
|
|
48
|
-
* @return {DG.DataFrame} The resulting data frame.
|
|
49
|
-
*/
|
|
50
|
-
function createNetwork(adjMatrix: DG.DataFrame): DG.DataFrame {
|
|
51
|
-
const nCols = adjMatrix.columns.length;
|
|
52
|
-
const nRows = adjMatrix.rowCount;
|
|
53
|
-
|
|
54
|
-
assert(nCols == nRows);
|
|
55
|
-
|
|
56
|
-
const pos1: Array<number> = [];
|
|
57
|
-
const pos2: Array<number> = [];
|
|
58
|
-
const weight: Array<number> = [];
|
|
59
|
-
|
|
60
|
-
for (let i = 0; i < nCols; ++i) {
|
|
61
|
-
const c = adjMatrix.columns.byIndex(i);
|
|
62
|
-
|
|
63
|
-
for (let j = i+1; j < nRows; ++j) {
|
|
64
|
-
const r = c.getRawData()[j];
|
|
65
|
-
|
|
66
|
-
if (Math.abs(r) > 0) {
|
|
67
|
-
pos1.push(i+1);
|
|
68
|
-
pos2.push(j+1);
|
|
69
|
-
weight.push(r);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
const pos1Col = DG.Column.fromList('int', 'pos1', pos1);
|
|
75
|
-
const pos2Col = DG.Column.fromList('int', 'pos2', pos2);
|
|
76
|
-
const weightCol = DG.Column.fromList('double', 'weight', weight);
|
|
77
|
-
|
|
78
|
-
return DG.DataFrame.fromColumns([pos1Col, pos2Col, weightCol]);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Calculates Kendall's tau rank correlation matrix.
|
|
83
|
-
*
|
|
84
|
-
* @param {DG.DataFrame} df A data frame to process.
|
|
85
|
-
* @param {number} [alpha=0.05] The significance threshold.
|
|
86
|
-
* @param {number} [rAbsCutoff=0.5] The absolute R cutoff.
|
|
87
|
-
* @return {DG.DataFrame} The correlation matrix.
|
|
88
|
-
*/
|
|
89
|
-
function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05, rAbsCutoff = 0.5): DG.DataFrame {
|
|
90
|
-
const nItems = df.columns.length;
|
|
91
|
-
const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
|
|
92
|
-
|
|
93
|
-
for (let i = 0; i < nItems; ++i) {
|
|
94
|
-
for (let j = i+1; j < nItems; ++j) {
|
|
95
|
-
const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
|
|
96
|
-
tau[i][j] = (res.prob < alpha) && (Math.abs(res.test) >= rAbsCutoff) ? res.test : 0;
|
|
97
|
-
tau[j][i] = tau[i][j];
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return matrix2DataFrame(tau);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
/**
|
|
104
|
-
* Calculates a correlation matrix via method chosen.
|
|
105
|
-
*
|
|
106
|
-
* @param {DG.DataFrame} df A data frame.
|
|
107
|
-
* @return {DG.DataFrame} The correlation matrix.
|
|
108
|
-
*/
|
|
109
|
-
function calcCorrelationMatrix(df: DG.DataFrame): DG.DataFrame {
|
|
110
|
-
return calcKendallTauMatrix(df);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
type Weights = {[pos: number]: number};
|
|
114
|
-
type Guide = {[pos: number]: Weights};
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Calculates a dictionary with the keys containing the first correlating positions.
|
|
118
|
-
* Values correspond to a dictionary containing the positions and corresponding R-value
|
|
119
|
-
* which the given position correlating with.
|
|
120
|
-
*
|
|
121
|
-
* @param {DG.DataFrame} network A network to process.
|
|
122
|
-
* @return {Guide} The formatted dictionary.
|
|
123
|
-
*/
|
|
124
|
-
function calcGuide(network: DG.DataFrame): Guide {
|
|
125
|
-
assert(network.columns.length == 3);
|
|
126
|
-
|
|
127
|
-
const guide: Guide = {};
|
|
128
|
-
let [pos1Col, pos2Col, weightCol] = Array.from(network.columns);
|
|
129
|
-
|
|
130
|
-
pos1Col = pos1Col.getRawData();
|
|
131
|
-
pos2Col = pos2Col.getRawData();
|
|
132
|
-
weightCol = weightCol.getRawData();
|
|
133
|
-
|
|
134
|
-
function _addWeight(pos1: number, pos2: number, weight: number) {
|
|
135
|
-
if (guide[pos1] == undefined) {
|
|
136
|
-
guide[pos1] = {};
|
|
137
|
-
}
|
|
138
|
-
guide[pos1][pos2] = weight;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
for (let i = 0; i < network.rowCount; ++i) {
|
|
142
|
-
const [pos1, pos2, weight] = [pos1Col[i], pos2Col[i], weightCol[i]];
|
|
143
|
-
_addWeight(pos1, pos2, weight);
|
|
144
|
-
_addWeight(pos2, pos1, weight);
|
|
145
|
-
}
|
|
146
|
-
return guide;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function calcCorrelations(df: DG.DataFrame, positionColumns?: string[]): Guide {
|
|
150
|
-
const posDF = encodeSequences(df, positionColumns);
|
|
151
|
-
const ccDF = calcCorrelationMatrix(posDF);
|
|
152
|
-
const nwDF = createNetwork(ccDF);
|
|
153
|
-
const guide = calcGuide(nwDF);
|
|
154
|
-
return guide;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
/**
|
|
158
|
-
* Formats correlating positions to place in the corresponding tooltips.
|
|
159
|
-
* Higlights correlating positions' headers.
|
|
160
|
-
*
|
|
161
|
-
* @export
|
|
162
|
-
* @class CorrelationAnalysisVisualizer
|
|
163
|
-
*/
|
|
164
|
-
export class CorrelationAnalysisVisualizer {
|
|
165
|
-
protected guide: Guide;
|
|
166
|
-
protected highlightedColumns: number[];
|
|
167
|
-
|
|
168
|
-
/**
|
|
169
|
-
* Creates an instance of CorrelationAnalysisVisualizer.
|
|
170
|
-
* @param {DG.DataFrame} df A data frame to take sequences from.
|
|
171
|
-
* @param {string[]} positionColumns Optional columns list to take the sequences from.
|
|
172
|
-
* @memberof CorrelationAnalysisVisualizer
|
|
173
|
-
*/
|
|
174
|
-
constructor(df: DG.DataFrame, positionColumns: string[]) {
|
|
175
|
-
if (df) {
|
|
176
|
-
this.guide = calcCorrelations(df, positionColumns);
|
|
177
|
-
this.highlightedColumns = Object.keys(this.guide).map((v) => parseInt(v));
|
|
178
|
-
} else {
|
|
179
|
-
throw new Error('Dataframe was not found in the grid.');
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Returns a dictionary with the correlating positions and their R-value.
|
|
185
|
-
*
|
|
186
|
-
* @readonly
|
|
187
|
-
* @type {Guide} The dictionary.
|
|
188
|
-
* @memberof CorrelationAnalysisVisualizer
|
|
189
|
-
*/
|
|
190
|
-
get path(): Guide {
|
|
191
|
-
return this.guide;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Checks if the position column name is found among correlelating ones.
|
|
196
|
-
*
|
|
197
|
-
* @param {string} name The name of the column.
|
|
198
|
-
* @return {boolean} True if the position is correlating with any oter.
|
|
199
|
-
* @memberof CorrelationAnalysisVisualizer
|
|
200
|
-
*/
|
|
201
|
-
public isPositionCorrelating(name: string): boolean {
|
|
202
|
-
return this.highlightedColumns.includes(parseInt(name));
|
|
203
|
-
}
|
|
204
|
-
}
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {_toJson} from 'datagrok-api/src/utils';
|
|
3
|
-
|
|
4
|
-
import {assert, argSort} from '@datagrok-libraries/utils/src/operations';
|
|
5
|
-
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
-
|
|
7
|
-
const api = <any>window;
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Draws 2D scatter plot from 1D series.
|
|
11
|
-
*
|
|
12
|
-
* @export
|
|
13
|
-
* @class SpiralPlot
|
|
14
|
-
* @extends {DG.ScatterPlotViewer}
|
|
15
|
-
*/
|
|
16
|
-
export class SpiralPlot extends DG.ScatterPlotViewer {
|
|
17
|
-
static axesNames = ['~X', '~Y'];
|
|
18
|
-
static valuesKey = 'valuesColumnName';
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Calculates coordinates of the projection into a spiral.
|
|
22
|
-
*
|
|
23
|
-
* @static
|
|
24
|
-
* @param {DG.DataFrame} t Source data frame.
|
|
25
|
-
* @param {Options} options Options to read values column name from. Must include {valuesColumnName: string}.
|
|
26
|
-
* @return {DG.DataFrame} Updated dataframe.
|
|
27
|
-
* @memberof SpiralPlot
|
|
28
|
-
*/
|
|
29
|
-
static updateCoordinates(t: DG.DataFrame, options: Options): DG.DataFrame {
|
|
30
|
-
assert(options[SpiralPlot.valuesKey] != undefined);
|
|
31
|
-
|
|
32
|
-
const values = t.getCol(options[SpiralPlot.valuesKey]).getRawData() as Float32Array;
|
|
33
|
-
const columns = _calcSpiralProjection(values);
|
|
34
|
-
const cdf = DG.DataFrame.fromColumns(
|
|
35
|
-
Array.from(columns).map((v, i) => DG.Column.fromFloat32Array(SpiralPlot.axesNames[i], v)),
|
|
36
|
-
);
|
|
37
|
-
return _updateCoordinates(t, cdf);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Creates new SpiralPlot from a data frame with selected values column.
|
|
42
|
-
*
|
|
43
|
-
* @static
|
|
44
|
-
* @param {DG.DataFrame} t A data frame.
|
|
45
|
-
* @param {Options} options Controlling options.
|
|
46
|
-
* @return {SpiralPlot} The plot.
|
|
47
|
-
* @memberof SpiralPlot
|
|
48
|
-
*/
|
|
49
|
-
static fromTable(t: DG.DataFrame, options: Options): SpiralPlot {
|
|
50
|
-
t = SpiralPlot.updateCoordinates(t, options);
|
|
51
|
-
[options.x, options.y] = SpiralPlot.axesNames;
|
|
52
|
-
options.color = options[SpiralPlot.valuesKey];
|
|
53
|
-
return new SpiralPlot(api.grok_Viewer_ScatterPlot(t.dart, _toJson(options)));
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Calculates 2D projection of 1D series as a spiral.
|
|
59
|
-
*
|
|
60
|
-
* @param {(number[] | Float32Array)} values The series.
|
|
61
|
-
* @return {[Float32Array, Float32Array]} X and Y componenets of the projection.
|
|
62
|
-
*/
|
|
63
|
-
function _calcSpiralProjection(values: number[] | Float32Array): [Float32Array, Float32Array] {
|
|
64
|
-
const nItems = values.length;
|
|
65
|
-
const order = argSort(Array.from(values), true);
|
|
66
|
-
const maxV = values[order[0]];
|
|
67
|
-
const X = new Float32Array(nItems).fill(0);
|
|
68
|
-
const Y = new Float32Array(nItems).fill(0);
|
|
69
|
-
|
|
70
|
-
for (const i of order) {
|
|
71
|
-
const v = maxV - values[i];
|
|
72
|
-
X[i] = v * Math.cos(Math.PI * v) - Math.random() * 1.5 + 0.75;
|
|
73
|
-
Y[i] = v * Math.sin(Math.PI * v) - Math.random() * 1.5 + 0.75;
|
|
74
|
-
}
|
|
75
|
-
return [X, Y];
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* Adds new columns from one data frame into another one.
|
|
80
|
-
*
|
|
81
|
-
* @param {DG.DataFrame} table Destination data frame.
|
|
82
|
-
* @param {DG.DataFrame} coords Source data frame.
|
|
83
|
-
* @return {DG.DataFrame} Updated data frame.
|
|
84
|
-
*/
|
|
85
|
-
function _updateCoordinates(table: DG.DataFrame, coords: DG.DataFrame): DG.DataFrame {
|
|
86
|
-
const coordsColNames: string[] = coords.columns.names();
|
|
87
|
-
const tableColNames: string[] = table.columns.names();
|
|
88
|
-
const restColNames = tableColNames.filter((v: string) => !coordsColNames.includes(v));
|
|
89
|
-
|
|
90
|
-
if (tableColNames.length == restColNames.length) {
|
|
91
|
-
for (const col of coords.columns) {
|
|
92
|
-
table.columns.add(col);
|
|
93
|
-
}
|
|
94
|
-
return table;
|
|
95
|
-
}
|
|
96
|
-
return table.join(coords, coordsColNames, coordsColNames, restColNames, [], 'right', false);
|
|
97
|
-
}
|