@datagrok/peptides 0.0.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -1
- package/detectors.js +2 -2
- package/package.json +19 -12
- package/scripts/smiles-to-3D.py +13 -0
- package/src/{peptide-sar-viewer/describe.ts → describe.ts} +104 -78
- package/src/package.ts +79 -137
- package/src/peptides.ts +76 -0
- package/src/utils/cell-renderer.ts +77 -101
- package/src/utils/chem-palette.ts +80 -53
- package/src/utils/correlation-analysis.ts +126 -0
- package/src/utils/molecular-measure.ts +175 -0
- package/src/utils/peptide-similarity-space.ts +242 -0
- package/src/utils/split-aligned.ts +65 -0
- package/src/{peptide-logo-viewer → viewers}/logo-viewer.ts +6 -4
- package/src/viewers/model.ts +76 -0
- package/src/{peptide-sar-viewer → viewers}/sar-viewer.ts +67 -23
- package/src/{stacked-barchart → viewers}/stacked-barchart-viewer.ts +29 -31
- package/src/widgets/analyze-peptides.ts +87 -0
- package/src/widgets/manual-alignment.ts +36 -0
- package/src/widgets/peptide-molecule.ts +42 -0
- package/src/workers/dimensionality-reducer.ts +29 -0
- package/tsconfig.json +12 -13
- package/webpack.config.js +4 -4
- package/src/split-aligned.ts +0 -42
- package/src/utils/misc.ts +0 -101
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/* Do not change these import lines. Datagrok will import API library in exactly the same manner */
|
|
2
|
+
//import * as grok from 'datagrok-api/grok';
|
|
3
|
+
//import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
|
|
7
|
+
import {assert, transposeMatrix} from '@datagrok-libraries/utils/src/operations';
|
|
8
|
+
import {Vector, Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
9
|
+
import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coefficient';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Converts a Matrix into a DataFrame.
|
|
13
|
+
*
|
|
14
|
+
* @export
|
|
15
|
+
* @param {Matrix} matrix A matrix.
|
|
16
|
+
* @return {DG.DataFrame} The data frame.
|
|
17
|
+
*/
|
|
18
|
+
export function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
|
|
19
|
+
return DG.DataFrame.fromColumns(matrix.map((v, i) => DG.Column.fromFloat32Array(`${i+1}`, v)));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Encodes amino acid sequences into a numeric representation.
|
|
24
|
+
*
|
|
25
|
+
* @param {DG.Column} col A column containing the sequences.
|
|
26
|
+
* @return {DG.DataFrame} The resulting data frame.
|
|
27
|
+
*/
|
|
28
|
+
function calcPositions(col: DG.Column): DG.DataFrame {
|
|
29
|
+
const sequences = col.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
|
|
30
|
+
const enc = new AlignedSequenceEncoder();
|
|
31
|
+
const encSeqs = sequences.map((v) => Vector.from(enc.encode(v)));
|
|
32
|
+
const positions = transposeMatrix(encSeqs);
|
|
33
|
+
return matrix2DataFrame(positions);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Unfolds a data frame into <category>-<value> format.
|
|
38
|
+
*
|
|
39
|
+
* @param {DG.DataFrame} df A data frame to unfold.
|
|
40
|
+
* @return {DG.DataFrame} The resulting data frame.
|
|
41
|
+
*/
|
|
42
|
+
function melt(df: DG.DataFrame): DG.DataFrame {
|
|
43
|
+
let keys: string[] = [];
|
|
44
|
+
const values: Float32Array = new Float32Array(df.columns.length*df.rowCount);
|
|
45
|
+
let i = 0;
|
|
46
|
+
|
|
47
|
+
for (const c of df.columns.toList()) {
|
|
48
|
+
keys = keys.concat(Array<string>(c.length).fill(c.name));
|
|
49
|
+
values.set(c.getRawData(), i);
|
|
50
|
+
i += df.rowCount;
|
|
51
|
+
}
|
|
52
|
+
assert(keys.length == values.length);
|
|
53
|
+
return DG.DataFrame.fromColumns([DG.Column.fromStrings('keys', keys), DG.Column.fromFloat32Array('values', values)]);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Calculates Spearman's rho rank correlation coefficient.
|
|
58
|
+
*
|
|
59
|
+
* @param {DG.DataFrame} df A data frame to process.
|
|
60
|
+
* @return {DG.DataFrame} The correlation matrix.
|
|
61
|
+
*/
|
|
62
|
+
// eslint-disable-next-line no-unused-vars
|
|
63
|
+
function calcSpearmanRhoMatrix(df: DG.DataFrame): DG.DataFrame {
|
|
64
|
+
const nItems = df.columns.length;
|
|
65
|
+
const rho = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
|
|
66
|
+
|
|
67
|
+
for (let i = 0; i < nItems; ++i) {
|
|
68
|
+
for (let j = i+1; j < nItems; ++j) {
|
|
69
|
+
rho[i][j] = df.columns.byIndex(i).stats.spearmanCorr(df.columns.byIndex(j));
|
|
70
|
+
rho[j][i] = rho[i][j];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return matrix2DataFrame(rho);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Calculates Kendall's tau rank correlation coefficient.
|
|
78
|
+
*
|
|
79
|
+
* @param {DG.DataFrame} df A data frame to process.
|
|
80
|
+
* @param {number} [alpha=0.05] The significance threshold.
|
|
81
|
+
* @return {DG.DataFrame} The correlation matrix.
|
|
82
|
+
*/
|
|
83
|
+
function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05): DG.DataFrame {
|
|
84
|
+
const nItems = df.columns.length;
|
|
85
|
+
const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
|
|
86
|
+
|
|
87
|
+
for (let i = 0; i < nItems; ++i) {
|
|
88
|
+
for (let j = i+1; j < nItems; ++j) {
|
|
89
|
+
const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
|
|
90
|
+
tau[i][j] = res.prob < alpha ? res.test : 0;
|
|
91
|
+
tau[j][i] = tau[i][j];
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return matrix2DataFrame(tau);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Creates acorrelation plot and a box plot to perform correlation analysis.
|
|
99
|
+
*
|
|
100
|
+
* @export
|
|
101
|
+
* @param {DG.Column} sequencesColumn A column containing amino acid sequences.
|
|
102
|
+
* @return {[DG.Viewer, DG.Viewer]} These two plots.
|
|
103
|
+
*/
|
|
104
|
+
export function correlationAnalysisPlots(sequencesColumn: DG.Column): [DG.Viewer, DG.Viewer] {
|
|
105
|
+
const posDF = calcPositions(sequencesColumn);
|
|
106
|
+
const cpviewer = DG.Viewer.fromType(
|
|
107
|
+
DG.VIEWER.CORR_PLOT,
|
|
108
|
+
posDF,
|
|
109
|
+
{
|
|
110
|
+
'xColumnNames': posDF.columns.names(),
|
|
111
|
+
'yColumnNames': posDF.columns.names(),
|
|
112
|
+
'correlationType': 'Spearman',
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
const rhoDF = calcKendallTauMatrix(posDF);
|
|
116
|
+
const meltDF = melt(rhoDF);
|
|
117
|
+
|
|
118
|
+
const bpviewer = DG.Viewer.fromType(
|
|
119
|
+
DG.VIEWER.BOX_PLOT,
|
|
120
|
+
meltDF, {
|
|
121
|
+
'categoryColumnName': 'keys',
|
|
122
|
+
'valueColumnName': 'values',
|
|
123
|
+
'statistics': ['min', 'max', 'avg', 'med'],
|
|
124
|
+
});
|
|
125
|
+
return [cpviewer, bpviewer];
|
|
126
|
+
}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Library of molecular weights.
|
|
3
|
+
* @link https://worldwide.promega.com/resources/tools/amino-acid-chart-amino-acid-structure */
|
|
4
|
+
const _lib = [
|
|
5
|
+
{
|
|
6
|
+
'Name': 'Alanine',
|
|
7
|
+
'Three-letter': 'Ala',
|
|
8
|
+
'One-letter': 'A',
|
|
9
|
+
'Weight': '89Da',
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
'Name': 'Arginine',
|
|
13
|
+
'Three-letter': 'Arg',
|
|
14
|
+
'One-letter': 'R',
|
|
15
|
+
'Weight': '174Da',
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
'Name': 'Asparagine',
|
|
19
|
+
'Three-letter': 'Asn',
|
|
20
|
+
'One-letter': 'N',
|
|
21
|
+
'Weight': '132Da',
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
'Name': 'Aspartic acid',
|
|
25
|
+
'Three-letter': 'Asp',
|
|
26
|
+
'One-letter': 'D',
|
|
27
|
+
'Weight': '133Da',
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
'Name': 'Asparagine or aspartic acid',
|
|
31
|
+
'Three-letter': 'Asx',
|
|
32
|
+
'One-letter': 'B',
|
|
33
|
+
'Weight': '133Da',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
'Name': 'Cysteine',
|
|
37
|
+
'Three-letter': 'Cys',
|
|
38
|
+
'One-letter': 'C',
|
|
39
|
+
'Weight': '121Da',
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
'Name': 'Glutamine',
|
|
43
|
+
'Three-letter': 'Gln',
|
|
44
|
+
'One-letter': 'Q',
|
|
45
|
+
'Weight': '146Da',
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
'Name': 'Glutamic acid',
|
|
49
|
+
'Three-letter': 'Glu',
|
|
50
|
+
'One-letter': 'E',
|
|
51
|
+
'Weight': '147Da',
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
'Name': 'Glutamine or glutamic acid',
|
|
55
|
+
'Three-letter': 'Glx',
|
|
56
|
+
'One-letter': 'Z',
|
|
57
|
+
'Weight': '147Da',
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
'Name': 'Glycine',
|
|
61
|
+
'Three-letter': 'Gly',
|
|
62
|
+
'One-letter': 'G',
|
|
63
|
+
'Weight': '75Da',
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
'Name': 'Histidine',
|
|
67
|
+
'Three-letter': 'His',
|
|
68
|
+
'One-letter': 'H',
|
|
69
|
+
'Weight': '155Da',
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
'Name': 'Isoleucine',
|
|
73
|
+
'Three-letter': 'Ile',
|
|
74
|
+
'One-letter': 'I',
|
|
75
|
+
'Weight': '131Da',
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
'Name': 'Leucine',
|
|
79
|
+
'Three-letter': 'Leu',
|
|
80
|
+
'One-letter': 'L',
|
|
81
|
+
'Weight': '131Da',
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
'Name': 'Lysine',
|
|
85
|
+
'Three-letter': 'Lys',
|
|
86
|
+
'One-letter': 'K',
|
|
87
|
+
'Weight': '146Da',
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
'Name': 'Methionine',
|
|
91
|
+
'Three-letter': 'Met',
|
|
92
|
+
'One-letter': 'M',
|
|
93
|
+
'Weight': '149Da',
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
'Name': 'Phenylalanine',
|
|
97
|
+
'Three-letter': 'Phe',
|
|
98
|
+
'One-letter': 'F',
|
|
99
|
+
'Weight': '165Da',
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
'Name': 'Proline',
|
|
103
|
+
'Three-letter': 'Pro',
|
|
104
|
+
'One-letter': 'P',
|
|
105
|
+
'Weight': '115Da',
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
'Name': 'Serine',
|
|
109
|
+
'Three-letter': 'Ser',
|
|
110
|
+
'One-letter': 'S',
|
|
111
|
+
'Weight': '105Da',
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
'Name': 'Threonine',
|
|
115
|
+
'Three-letter': 'Thr',
|
|
116
|
+
'One-letter': 'T',
|
|
117
|
+
'Weight': '119Da',
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
'Name': 'Tryptophan',
|
|
121
|
+
'Three-letter': 'Trp',
|
|
122
|
+
'One-letter': 'W',
|
|
123
|
+
'Weight': '204Da',
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
'Name': 'Tyrosine',
|
|
127
|
+
'Three-letter': 'Tyr',
|
|
128
|
+
'One-letter': 'Y',
|
|
129
|
+
'Weight': '181Da',
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
'Name': 'Valine',
|
|
133
|
+
'Three-letter': 'Val',
|
|
134
|
+
'One-letter': 'V',
|
|
135
|
+
'Weight': '117Da',
|
|
136
|
+
},
|
|
137
|
+
];
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Selection of the molecular weights of amino acid residues.
|
|
141
|
+
* @type {*} */
|
|
142
|
+
const weightsLib : {[name: string]: number} = {};
|
|
143
|
+
|
|
144
|
+
// Create a dictionary linking one-letter code with the corresponding residues weight.
|
|
145
|
+
for (const d of _lib) {
|
|
146
|
+
weightsLib[d['One-letter']] = parseFloat(d.Weight.substring(0, d.Weight.length-2));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Calculates molecular weight of the given peptide in daltons.
|
|
151
|
+
*
|
|
152
|
+
* @export
|
|
153
|
+
* @param {string} sequence Peptide sequence.
|
|
154
|
+
* @return {number} Molecular weight in Da.
|
|
155
|
+
*/
|
|
156
|
+
export function getSequenceMolecularWeight(sequence: string): number {
|
|
157
|
+
let sum = 0;
|
|
158
|
+
|
|
159
|
+
if (sequence.startsWith('NH2')) {
|
|
160
|
+
sum += 16.02;
|
|
161
|
+
sequence = sequence.substring(3);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (sequence.endsWith('COOH')) {
|
|
165
|
+
sum += 45.02;
|
|
166
|
+
sequence = sequence.substring(0, sequence.length-4);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
for (const i of sequence) {
|
|
170
|
+
if (i in weightsLib) {
|
|
171
|
+
sum += weightsLib[i];
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return sum;
|
|
175
|
+
}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/* Do not change these import lines. Datagrok will import API library in exactly the same manner */
|
|
2
|
+
// eslint-disable-next-line no-unused-vars
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {getSequenceMolecularWeight} from './molecular-measure';
|
|
8
|
+
import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
|
|
9
|
+
import {DimensionalityReducer} from '@datagrok-libraries/utils/src/reduce-dimensionality';
|
|
10
|
+
import {Measurer} from '@datagrok-libraries/utils/src/string-measure';
|
|
11
|
+
import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
|
|
12
|
+
|
|
13
|
+
function createDimensinalityReducingWorker(
|
|
14
|
+
columnData: any[],
|
|
15
|
+
method: string,
|
|
16
|
+
measure: string,
|
|
17
|
+
cyclesCount: number,
|
|
18
|
+
): Promise<unknown> {
|
|
19
|
+
return new Promise(function(resolve) {
|
|
20
|
+
const worker = new Worker(new URL('../workers/dimensionality-reducer.ts', import.meta.url));
|
|
21
|
+
worker.postMessage({
|
|
22
|
+
columnData: columnData,
|
|
23
|
+
method: method,
|
|
24
|
+
measure: measure,
|
|
25
|
+
cyclesCount: cyclesCount,
|
|
26
|
+
});
|
|
27
|
+
worker.onmessage = ({data: {embedding}}) => {
|
|
28
|
+
resolve(embedding);
|
|
29
|
+
};
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Finds a column with an activity.
|
|
35
|
+
*
|
|
36
|
+
* @param {DG.DataFrame} table The data frame to search for.
|
|
37
|
+
* @return {(string | null)} Column name or null if not found.
|
|
38
|
+
*/
|
|
39
|
+
function inferActivityColumnsName(table: DG.DataFrame): string | null {
|
|
40
|
+
const re = /activity|ic50/i;
|
|
41
|
+
for (const name of table.columns.names()) {
|
|
42
|
+
if (name.match(re)) {
|
|
43
|
+
console.log(`${name} found.`);
|
|
44
|
+
return name;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Creates scatter plot with sequences embeded.
|
|
52
|
+
*
|
|
53
|
+
* @export
|
|
54
|
+
* @param {DG.DataFrame} table The table containing samples.
|
|
55
|
+
* @param {DG.Column} alignedSequencesColumn Samples column.
|
|
56
|
+
* @param {string} method Embedding method to apply.
|
|
57
|
+
* @param {string} measure Distance metric.
|
|
58
|
+
* @param {number} cyclesCount Number of cycles to repeat.
|
|
59
|
+
* @param {(DG.TableView | null)} view View to add scatter plot to
|
|
60
|
+
* @param {(string | null)} [activityColumnName] Activity containing column to assign it to points radius.
|
|
61
|
+
* @param {boolean} [zoom=false] Whether to fit view.
|
|
62
|
+
* @return {Promise<DG.ScatterPlotViewer>} A viewer.
|
|
63
|
+
*/
|
|
64
|
+
export async function createPeptideSimilaritySpaceViewer(
|
|
65
|
+
table: DG.DataFrame,
|
|
66
|
+
alignedSequencesColumn: DG.Column,
|
|
67
|
+
method: string,
|
|
68
|
+
measure: string,
|
|
69
|
+
cyclesCount: number,
|
|
70
|
+
view: DG.TableView | null,
|
|
71
|
+
activityColumnName?: string | null,
|
|
72
|
+
zoom: boolean = false,
|
|
73
|
+
): Promise<DG.ScatterPlotViewer> {
|
|
74
|
+
const pi = DG.TaskBarProgressIndicator.create('Creating embedding.');
|
|
75
|
+
|
|
76
|
+
activityColumnName = activityColumnName ?? inferActivityColumnsName(table);
|
|
77
|
+
|
|
78
|
+
const axesNames = ['~X', '~Y', '~MW'];
|
|
79
|
+
const columnData = alignedSequencesColumn.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
|
|
80
|
+
|
|
81
|
+
const embcols = await createDimensinalityReducingWorker(columnData, method, measure, cyclesCount);
|
|
82
|
+
|
|
83
|
+
const columns = Array.from(
|
|
84
|
+
embcols as Coordinates,
|
|
85
|
+
(v: Float32Array, k) => (DG.Column.fromFloat32Array(axesNames[k], v)),
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
function _getMW(sequences = columnData) {
|
|
89
|
+
const mw: Float32Array = new Float32Array(sequences.length).fill(0);
|
|
90
|
+
let currentSequence;
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < sequences.length; ++i) {
|
|
93
|
+
currentSequence = sequences[i];
|
|
94
|
+
mw[i] = currentSequence == null ? 0 : getSequenceMolecularWeight(currentSequence);
|
|
95
|
+
}
|
|
96
|
+
return mw;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
columns.push(DG.Column.fromFloat32Array('~MW', _getMW()));
|
|
100
|
+
|
|
101
|
+
const edf = DG.DataFrame.fromColumns(columns);
|
|
102
|
+
|
|
103
|
+
// Add new axes.
|
|
104
|
+
for (const axis of axesNames) {
|
|
105
|
+
const col = table.col(axis);
|
|
106
|
+
|
|
107
|
+
if (col == null) {
|
|
108
|
+
table.columns.insert(edf.getCol(axis));
|
|
109
|
+
} else {
|
|
110
|
+
table.columns.replace(col, edf.getCol(axis));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// const viewer = DG.Viewer.scatterPlot(table, {x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW'});
|
|
115
|
+
const viewerOptions = {x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW'};
|
|
116
|
+
const viewer = view !== null ?
|
|
117
|
+
view.addViewer(DG.VIEWER.SCATTER_PLOT, viewerOptions) : DG.Viewer.scatterPlot(table, viewerOptions);
|
|
118
|
+
// Fit view if needed.
|
|
119
|
+
/*if (zoom) {
|
|
120
|
+
viewer.zoom(
|
|
121
|
+
table.getCol('~X').min,
|
|
122
|
+
table.getCol('~Y').min,
|
|
123
|
+
table.getCol('~X').max,
|
|
124
|
+
table.getCol('~Y').max,
|
|
125
|
+
);
|
|
126
|
+
}*/
|
|
127
|
+
pi.close();
|
|
128
|
+
return (viewer as DG.ScatterPlotViewer);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Controls creation of the peptide similarity space viewer.
|
|
133
|
+
*
|
|
134
|
+
* @export
|
|
135
|
+
* @class PeptideSimilaritySpaceWidget
|
|
136
|
+
*/
|
|
137
|
+
export class PeptideSimilaritySpaceWidget {
|
|
138
|
+
protected method: string;
|
|
139
|
+
protected metrics: string;
|
|
140
|
+
protected cycles: number = 100;
|
|
141
|
+
protected currentDf: DG.DataFrame;
|
|
142
|
+
protected alignedSequencesColumn: DG.Column;
|
|
143
|
+
protected availableMethods: string[];
|
|
144
|
+
protected availableMetrics: string[];
|
|
145
|
+
protected viewer: HTMLElement;
|
|
146
|
+
view: DG.TableView;
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Creates an instance of PeptideSimilaritySpaceWidget.
|
|
150
|
+
* @param {DG.Column} alignedSequencesColumn The column to get amino acid sequences from.
|
|
151
|
+
* @param {DG.TableView} view Current view
|
|
152
|
+
* @memberof PeptideSimilaritySpaceWidget
|
|
153
|
+
*/
|
|
154
|
+
constructor(alignedSequencesColumn: DG.Column, view: DG.TableView) {
|
|
155
|
+
this.availableMethods = DimensionalityReducer.availableMethods;
|
|
156
|
+
this.availableMetrics = Measurer.availableMeasures;
|
|
157
|
+
this.method = this.availableMethods[0];
|
|
158
|
+
this.metrics = this.availableMetrics[0];
|
|
159
|
+
this.currentDf = alignedSequencesColumn.dataFrame;
|
|
160
|
+
this.alignedSequencesColumn = alignedSequencesColumn;
|
|
161
|
+
this.viewer = ui.div([]);
|
|
162
|
+
this.view = view;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Creates viewer itself.
|
|
167
|
+
*
|
|
168
|
+
* @return {Promise<DG.Viewer>} the viewer.
|
|
169
|
+
* @memberof PeptideSimilaritySpaceWidget
|
|
170
|
+
*/
|
|
171
|
+
public async drawViewer(): Promise<DG.Viewer> {
|
|
172
|
+
const viewer = await createPeptideSimilaritySpaceViewer(
|
|
173
|
+
this.currentDf,
|
|
174
|
+
this.alignedSequencesColumn,
|
|
175
|
+
this.method,
|
|
176
|
+
this.metrics,
|
|
177
|
+
this.cycles,
|
|
178
|
+
null,
|
|
179
|
+
null,
|
|
180
|
+
true,
|
|
181
|
+
);
|
|
182
|
+
viewer.root.style.width = 'auto';
|
|
183
|
+
return viewer;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Updates the viewer on options changes.
|
|
188
|
+
*
|
|
189
|
+
* @protected
|
|
190
|
+
* @memberof PeptideSimilaritySpaceWidget
|
|
191
|
+
*/
|
|
192
|
+
protected async updateViewer() {
|
|
193
|
+
this.viewer.lastChild?.remove();
|
|
194
|
+
const viewer = await this.drawViewer();
|
|
195
|
+
this.viewer.appendChild(viewer.root);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Adds input controls to manage the viewer's parameters.
|
|
200
|
+
*
|
|
201
|
+
* @protected
|
|
202
|
+
* @return {Promise<HTMLElement>} Bunch of control elements.
|
|
203
|
+
* @memberof PeptideSimilaritySpaceWidget
|
|
204
|
+
*/
|
|
205
|
+
protected async drawInputs(): Promise<HTMLElement> {
|
|
206
|
+
const methodsList = ui.choiceInput('Embedding method', this.method, this.availableMethods,
|
|
207
|
+
async (currentMethod: string) => {
|
|
208
|
+
this.method = currentMethod;
|
|
209
|
+
await this.updateViewer();
|
|
210
|
+
},
|
|
211
|
+
);
|
|
212
|
+
methodsList.setTooltip('Embedding method to apply to the dataset.');
|
|
213
|
+
|
|
214
|
+
const metricsList = ui.choiceInput('Distance metric', this.metrics, this.availableMetrics,
|
|
215
|
+
async (currentMetrics: string) => {
|
|
216
|
+
this.metrics = currentMetrics;
|
|
217
|
+
await this.updateViewer();
|
|
218
|
+
},
|
|
219
|
+
);
|
|
220
|
+
metricsList.setTooltip('Custom distance metric to pass to the embedding procedure.');
|
|
221
|
+
|
|
222
|
+
const cyclesSlider = ui.intInput('Cycles count', this.cycles,
|
|
223
|
+
async (currentCycles: number) => {
|
|
224
|
+
this.cycles = currentCycles;
|
|
225
|
+
await this.updateViewer();
|
|
226
|
+
},
|
|
227
|
+
);
|
|
228
|
+
cyclesSlider.setTooltip('Number of cycles affects the embedding quality.');
|
|
229
|
+
|
|
230
|
+
return ui.inputs([methodsList, metricsList, cyclesSlider]);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Draws a viewer on property panel.
|
|
235
|
+
*
|
|
236
|
+
* @return {Promise<DG.Widget>} The corresponding widget.
|
|
237
|
+
* @memberof PeptideSimilaritySpaceWidget
|
|
238
|
+
*/
|
|
239
|
+
public async draw(): Promise<DG.Widget> {
|
|
240
|
+
return new DG.Widget(ui.divV([(await this.drawViewer()).root, await this.drawInputs()]));
|
|
241
|
+
}
|
|
242
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
|
|
4
|
+
const splitPeptidesArray: string[][] = [];
|
|
5
|
+
let currentSplitPeptide: string[];
|
|
6
|
+
let modeMonomerCount = 0;
|
|
7
|
+
let currentLength;
|
|
8
|
+
const colLength = peptideColumn.length;
|
|
9
|
+
|
|
10
|
+
// splitting data
|
|
11
|
+
const monomerLengths: {[index: string]: number} = {};
|
|
12
|
+
for (let i = 0; i < colLength; i++) {
|
|
13
|
+
currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
|
|
14
|
+
splitPeptidesArray.push(currentSplitPeptide);
|
|
15
|
+
currentLength = currentSplitPeptide.length;
|
|
16
|
+
monomerLengths[currentLength + ''] =
|
|
17
|
+
monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
|
|
18
|
+
}
|
|
19
|
+
//@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
|
|
20
|
+
modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
|
|
21
|
+
|
|
22
|
+
// making sure all of the sequences are of the same size
|
|
23
|
+
// and marking invalid sequences
|
|
24
|
+
let nTerminal: string;
|
|
25
|
+
const invalidIndexes: number[] = [];
|
|
26
|
+
let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
|
|
27
|
+
modeMonomerCount--; // minus N-terminal
|
|
28
|
+
for (let i = 0; i < colLength; i++) {
|
|
29
|
+
currentSplitPeptide = splitPeptidesArray[i];
|
|
30
|
+
nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
|
|
31
|
+
currentLength = currentSplitPeptide.length;
|
|
32
|
+
if (currentLength !== modeMonomerCount) {
|
|
33
|
+
invalidIndexes.push(i);
|
|
34
|
+
}
|
|
35
|
+
for (let j = 0; j < modeMonomerCount; j++) {
|
|
36
|
+
splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
|
|
37
|
+
}
|
|
38
|
+
splitColumns[modeMonomerCount].push(nTerminal);
|
|
39
|
+
}
|
|
40
|
+
modeMonomerCount--; // minus C-terminal
|
|
41
|
+
|
|
42
|
+
//create column names list
|
|
43
|
+
const columnNames = Array.from({length: modeMonomerCount}, (_, index) => `${index + 1 < 10 ? 0 : ''}${index + 1 }`);
|
|
44
|
+
columnNames.splice(0, 0, 'N-terminal');
|
|
45
|
+
columnNames.push('C-terminal');
|
|
46
|
+
|
|
47
|
+
// filter out the columns with the same values
|
|
48
|
+
|
|
49
|
+
if (filter) {
|
|
50
|
+
splitColumns = splitColumns.filter((positionArray, index) => {
|
|
51
|
+
const isRetained = new Set(positionArray).size > 1;
|
|
52
|
+
if (!isRetained) {
|
|
53
|
+
columnNames.splice(index, 1);
|
|
54
|
+
}
|
|
55
|
+
return isRetained;
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return [
|
|
60
|
+
DG.DataFrame.fromColumns(splitColumns.map((positionArray, index) => {
|
|
61
|
+
return DG.Column.fromList('string', columnNames[index], positionArray);
|
|
62
|
+
})),
|
|
63
|
+
invalidIndexes,
|
|
64
|
+
];
|
|
65
|
+
}
|
|
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import $ from 'cash-dom';
|
|
5
5
|
|
|
6
6
|
import * as logojs from 'logojs-react';
|
|
7
|
-
import {splitAlignedPeptides} from '../split-aligned';
|
|
7
|
+
import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
8
8
|
import {ChemPalette} from '../utils/chem-palette';
|
|
9
9
|
|
|
10
10
|
export class Logo extends DG.JsViewer {
|
|
@@ -31,6 +31,7 @@ export class Logo extends DG.JsViewer {
|
|
|
31
31
|
'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'K': 9, 'L': 10, 'M': 11,
|
|
32
32
|
'N': 12, 'P': 13, 'Q': 14, 'R': 15, 'S': 16, 'T': 17, 'U': 18, 'V': 19, 'W': 20, 'Y': 21, 'Z': 22,
|
|
33
33
|
};
|
|
34
|
+
//TODO: use chem palette
|
|
34
35
|
this.LET_COLORS = [
|
|
35
36
|
{color: 'rgb(44,160,44)', regex: 'A'},
|
|
36
37
|
{color: 'rgb(44,160,44)', regex: 'B'},
|
|
@@ -63,9 +64,10 @@ export class Logo extends DG.JsViewer {
|
|
|
63
64
|
// this.reactHost = ui.div([]);
|
|
64
65
|
console.log('INIT');
|
|
65
66
|
this.target = this.dataFrame;
|
|
66
|
-
this.splitted = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
67
|
+
[this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
67
68
|
this.root.style.width = 'auto';
|
|
68
69
|
this.root.style.height = 'auto';
|
|
70
|
+
this.root.style.maxHeight = '200px';
|
|
69
71
|
}
|
|
70
72
|
|
|
71
73
|
onTableAttached() {
|
|
@@ -103,8 +105,8 @@ export class Logo extends DG.JsViewer {
|
|
|
103
105
|
.aggregate();
|
|
104
106
|
}
|
|
105
107
|
if (selected) {
|
|
106
|
-
this.splitted = splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
107
|
-
} else this.splitted = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
108
|
+
[this.splitted] = splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
109
|
+
} else [this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
108
110
|
$(this.root).empty();
|
|
109
111
|
|
|
110
112
|
if (typeof this.dataFrame !== 'undefined') {
|