@datagrok/peptides 0.0.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -1
- package/detectors.js +2 -2
- package/package.json +19 -12
- package/scripts/smiles-to-3D.py +13 -0
- package/src/{peptide-sar-viewer/describe.ts → describe.ts} +104 -78
- package/src/package.ts +79 -137
- package/src/peptides.ts +76 -0
- package/src/utils/cell-renderer.ts +77 -101
- package/src/utils/chem-palette.ts +80 -53
- package/src/utils/correlation-analysis.ts +126 -0
- package/src/utils/molecular-measure.ts +175 -0
- package/src/utils/peptide-similarity-space.ts +242 -0
- package/src/utils/split-aligned.ts +65 -0
- package/src/{peptide-logo-viewer → viewers}/logo-viewer.ts +6 -4
- package/src/viewers/model.ts +76 -0
- package/src/{peptide-sar-viewer → viewers}/sar-viewer.ts +67 -23
- package/src/{stacked-barchart → viewers}/stacked-barchart-viewer.ts +29 -31
- package/src/widgets/analyze-peptides.ts +87 -0
- package/src/widgets/manual-alignment.ts +36 -0
- package/src/widgets/peptide-molecule.ts +42 -0
- package/src/workers/dimensionality-reducer.ts +29 -0
- package/tsconfig.json +12 -13
- package/webpack.config.js +4 -4
- package/src/split-aligned.ts +0 -42
- package/src/utils/misc.ts +0 -101
package/.eslintrc.json
CHANGED
package/detectors.js
CHANGED
|
@@ -3,7 +3,7 @@ class PeptidesPackageDetectors extends DG.Package {
|
|
|
3
3
|
//input: column col
|
|
4
4
|
//output: string semType
|
|
5
5
|
detectAligned(col) {
|
|
6
|
-
const regexp = new RegExp(
|
|
7
|
-
return DG.Detector.sampleCategories(col, (s) =>
|
|
6
|
+
const regexp = new RegExp(/^([^-^\n]*-){2,49}(\w|\(|\))+$/);
|
|
7
|
+
return DG.Detector.sampleCategories(col, (s) => regexp.test(s.trim())) ? 'alignedSequence' : null;
|
|
8
8
|
}
|
|
9
9
|
}
|
package/package.json
CHANGED
|
@@ -1,27 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/peptides",
|
|
3
|
-
"
|
|
4
|
-
"version": "0.0.1",
|
|
3
|
+
"version": "0.4.2",
|
|
5
4
|
"description": "",
|
|
6
5
|
"dependencies": {
|
|
6
|
+
"@keckelt/tsne": "^1.0.2",
|
|
7
7
|
"cash-dom": "latest",
|
|
8
8
|
"d3": "latest",
|
|
9
|
-
"datagrok-api": "
|
|
9
|
+
"datagrok-api": ">=0.95.11",
|
|
10
10
|
"dayjs": "latest",
|
|
11
|
+
"jaro-winkler-typescript": "^1.0.1",
|
|
11
12
|
"jstat": "^1.9.5",
|
|
12
13
|
"logojs-react": "^2.1.1",
|
|
13
|
-
"rxjs": "^
|
|
14
|
-
"
|
|
14
|
+
"rxjs": "^6.5.5",
|
|
15
|
+
"umap-js": "^1.3.3",
|
|
16
|
+
"@datagrok-libraries/utils": ">=0.0.11",
|
|
17
|
+
"@datagrok-libraries/statistics": ">=0.1.5",
|
|
18
|
+
"@types/d3": "^7.0.0",
|
|
19
|
+
"@types/jquery": "^3.5.6"
|
|
15
20
|
},
|
|
16
21
|
"devDependencies": {
|
|
17
|
-
"
|
|
18
|
-
"
|
|
22
|
+
"typescript": "^4.4.4",
|
|
23
|
+
"ts-loader": "^9.2.5",
|
|
19
24
|
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
|
20
25
|
"@typescript-eslint/parser": "^4.29.1",
|
|
21
26
|
"eslint": "^7.32.0",
|
|
22
27
|
"eslint-config-google": "^0.14.0",
|
|
23
|
-
"ts-loader": "^9.2.5",
|
|
24
|
-
"typescript": "^4.4.3",
|
|
25
28
|
"webpack": "latest",
|
|
26
29
|
"webpack-cli": "latest"
|
|
27
30
|
},
|
|
@@ -31,6 +34,10 @@
|
|
|
31
34
|
"common/ngl_viewer/ngl.js"
|
|
32
35
|
],
|
|
33
36
|
"scripts": {
|
|
37
|
+
"link-utils": "npm link @datagrok-libraries/utils",
|
|
38
|
+
"link-statistics": "npm link @datagrok-libraries/statistics",
|
|
39
|
+
"link-api": "npm link datagrok-api",
|
|
40
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/statistics",
|
|
34
41
|
"install-dependencies": "npm install",
|
|
35
42
|
"debug-peptides": "grok publish --rebuild",
|
|
36
43
|
"release-peptides": "grok publish --rebuild --release",
|
|
@@ -42,7 +49,7 @@
|
|
|
42
49
|
"release-peptides-public": "grok publish public --rebuild --release",
|
|
43
50
|
"debug-peptides-local": "grok publish local --rebuild",
|
|
44
51
|
"release-peptides-local": "grok publish local --rebuild --release",
|
|
45
|
-
"lint": "eslint
|
|
46
|
-
"lint-fix": "eslint ./src
|
|
52
|
+
"lint": "eslint \"./src/**/*.ts\"",
|
|
53
|
+
"lint-fix": "eslint \"./src/**/*.ts\" --fix"
|
|
47
54
|
}
|
|
48
|
-
}
|
|
55
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#name: smiTo3D
|
|
2
|
+
#language: python
|
|
3
|
+
#input: string smiles
|
|
4
|
+
#output: string sdf
|
|
5
|
+
|
|
6
|
+
from rdkit.Chem import AllChem
|
|
7
|
+
from rdkit import Chem
|
|
8
|
+
|
|
9
|
+
mol = AllChem.MolFromSmiles(smiles)
|
|
10
|
+
AllChem.EmbedMolecule(mol, AllChem.ETKDG())
|
|
11
|
+
#AllChem.UFFOptimizeMolecule(mol)
|
|
12
|
+
#mol = Chem.RemoveHs(mol)
|
|
13
|
+
sdf = Chem.MolToMolBlock(mol)
|
|
@@ -1,13 +1,52 @@
|
|
|
1
|
+
// eslint-disable-next-line no-unused-vars
|
|
1
2
|
import * as grok from 'datagrok-api/grok';
|
|
2
3
|
import * as ui from 'datagrok-api/ui';
|
|
3
4
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import {splitAlignedPeptides} from '
|
|
5
|
-
import {tTest} from '
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
5
|
+
import {splitAlignedPeptides} from './utils/split-aligned';
|
|
6
|
+
import {tTest} from '@datagrok-libraries/statistics/src/tests';
|
|
7
|
+
import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
|
|
8
|
+
import {ChemPalette} from './utils/chem-palette';
|
|
9
|
+
import {setAARRenderer} from './utils/cell-renderer';
|
|
8
10
|
|
|
9
11
|
const cp = new ChemPalette('grok');
|
|
10
12
|
|
|
13
|
+
const aarGroups = {
|
|
14
|
+
'R': 'PC',
|
|
15
|
+
'H': 'PC',
|
|
16
|
+
'K': 'PC',
|
|
17
|
+
'D': 'NC',
|
|
18
|
+
'E': 'NC',
|
|
19
|
+
'S': 'U',
|
|
20
|
+
'T': 'U',
|
|
21
|
+
'N': 'U',
|
|
22
|
+
'Q': 'U',
|
|
23
|
+
'C': 'SC',
|
|
24
|
+
'U': 'SC',
|
|
25
|
+
'G': 'SC',
|
|
26
|
+
'P': 'SC',
|
|
27
|
+
'A': 'H',
|
|
28
|
+
'V': 'H',
|
|
29
|
+
'I': 'H',
|
|
30
|
+
'L': 'H',
|
|
31
|
+
'M': 'H',
|
|
32
|
+
'F': 'H',
|
|
33
|
+
'Y': 'H',
|
|
34
|
+
'W': 'H',
|
|
35
|
+
'-': '-',
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
const groupDescription: {[key: string]: {'description': string, 'aminoAcids': string[]}} = {
|
|
39
|
+
'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
|
|
40
|
+
'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
|
|
41
|
+
'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
|
|
42
|
+
'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
|
|
43
|
+
'H': {
|
|
44
|
+
'description': 'Amino Acids with Hydrophobic Side Chain',
|
|
45
|
+
'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
|
|
46
|
+
},
|
|
47
|
+
'-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
|
|
48
|
+
};
|
|
49
|
+
|
|
11
50
|
export async function describe(
|
|
12
51
|
df: DG.DataFrame,
|
|
13
52
|
activityColumn: string,
|
|
@@ -15,19 +54,14 @@ export async function describe(
|
|
|
15
54
|
sourceGrid: DG.Grid,
|
|
16
55
|
twoColorMode: boolean,
|
|
17
56
|
initialBitset: DG.BitSet | null,
|
|
18
|
-
|
|
57
|
+
grouping: boolean,
|
|
58
|
+
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
|
|
19
59
|
//Split the aligned sequence into separate AARs
|
|
20
60
|
let splitSeqDf: DG.DataFrame | undefined;
|
|
61
|
+
let invalidIndexes: number[];
|
|
21
62
|
const col: DG.Column = df.columns.bySemType('alignedSequence');
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
splitSeqDf.name = 'Split sequence';
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (typeof splitSeqDf === 'undefined') {
|
|
28
|
-
return [null, null, null];
|
|
29
|
-
}
|
|
30
|
-
|
|
63
|
+
[splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
|
|
64
|
+
splitSeqDf.name = 'Split sequence';
|
|
31
65
|
const positionColumns = splitSeqDf.columns.names();
|
|
32
66
|
const activityColumnScaled = `${activityColumn}Scaled`;
|
|
33
67
|
const renderColNames: string[] = splitSeqDf.columns.names();
|
|
@@ -54,6 +88,7 @@ export async function describe(
|
|
|
54
88
|
setAARRenderer(col, sourceGrid);
|
|
55
89
|
}
|
|
56
90
|
}
|
|
91
|
+
|
|
57
92
|
if (sourceGrid) {
|
|
58
93
|
const colNames:string[] = [];
|
|
59
94
|
for (let i = 0; i < sourceGrid.columns.length; i++) {
|
|
@@ -109,6 +144,17 @@ export async function describe(
|
|
|
109
144
|
|
|
110
145
|
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
111
146
|
|
|
147
|
+
//TODO: move to chem palette
|
|
148
|
+
let groupMapping: {[key: string]: string} = {};
|
|
149
|
+
if (grouping) {
|
|
150
|
+
groupMapping = aarGroups;
|
|
151
|
+
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
152
|
+
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
153
|
+
aarCol.compact();
|
|
154
|
+
} else {
|
|
155
|
+
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
156
|
+
}
|
|
157
|
+
|
|
112
158
|
//statistics for specific AAR at a specific position
|
|
113
159
|
matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
|
|
114
160
|
.add('count', activityColumnScaled, 'Count')
|
|
@@ -129,6 +175,8 @@ export async function describe(
|
|
|
129
175
|
let otherActivity: number[];
|
|
130
176
|
let testResult;
|
|
131
177
|
let currentMeanDiff: number;
|
|
178
|
+
let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
|
|
179
|
+
let pvalue = 1.;
|
|
132
180
|
|
|
133
181
|
const mdCol: DG.Column = matrixDf.columns.addNewFloat('Mean difference');
|
|
134
182
|
const pValCol: DG.Column = matrixDf.columns.addNewFloat('pValue');
|
|
@@ -137,14 +185,14 @@ export async function describe(
|
|
|
137
185
|
AAR = matrixDf.get(aminoAcidResidue, i);
|
|
138
186
|
|
|
139
187
|
//@ts-ignore
|
|
140
|
-
splitSeqDf.rows.select((row) => row[position] === AAR);
|
|
188
|
+
splitSeqDf.rows.select((row) => groupMapping[row[position]] === AAR);
|
|
141
189
|
currentActivity = splitSeqDf
|
|
142
190
|
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
143
191
|
.getCol(activityColumnScaled)
|
|
144
192
|
.toList();
|
|
145
193
|
|
|
146
194
|
//@ts-ignore
|
|
147
|
-
splitSeqDf.rows.select((row) => row[position] !== AAR);
|
|
195
|
+
splitSeqDf.rows.select((row) => groupMapping[row[position]] !== AAR);
|
|
148
196
|
otherActivity = splitSeqDf
|
|
149
197
|
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
150
198
|
.getCol(activityColumnScaled)
|
|
@@ -153,9 +201,18 @@ export async function describe(
|
|
|
153
201
|
testResult = tTest(currentActivity, otherActivity);
|
|
154
202
|
// testResult = uTest(currentActivity, otherActivity);
|
|
155
203
|
currentMeanDiff = testResult['Mean difference']!;
|
|
204
|
+
pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
|
|
156
205
|
|
|
157
206
|
mdCol.set(i, currentMeanDiff);
|
|
158
|
-
|
|
207
|
+
pvalues[i] = pvalue;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (true) {
|
|
211
|
+
pvalues = fdrcorrection(pvalues)[1];
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
for (let i = 0; i < pvalues.length; ++i) {
|
|
215
|
+
pValCol.set(i, pvalues[i]);
|
|
159
216
|
}
|
|
160
217
|
|
|
161
218
|
const statsDf = matrixDf.clone();
|
|
@@ -217,17 +274,14 @@ export async function describe(
|
|
|
217
274
|
SARVgrid.col('pValue')!.format = 'four digits after comma';
|
|
218
275
|
SARVgrid.col('pValue')!.name = 'P-Value';
|
|
219
276
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
if (
|
|
223
|
-
setAARRenderer(
|
|
224
|
-
break;
|
|
277
|
+
if (!grouping) {
|
|
278
|
+
let tempCol = matrixDf.columns.byName(aminoAcidResidue);
|
|
279
|
+
if (tempCol) {
|
|
280
|
+
setAARRenderer(tempCol, SARgrid);
|
|
225
281
|
}
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
setAARRenderer(col, SARVgrid);
|
|
230
|
-
break;
|
|
282
|
+
tempCol = sequenceDf.columns.byName(aminoAcidResidue);
|
|
283
|
+
if (tempCol) {
|
|
284
|
+
setAARRenderer(tempCol, SARgrid);
|
|
231
285
|
}
|
|
232
286
|
}
|
|
233
287
|
|
|
@@ -336,7 +390,6 @@ export async function describe(
|
|
|
336
390
|
const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
|
|
337
391
|
let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
|
|
338
392
|
|
|
339
|
-
//@ts-ignore: I'm sure it's gonna be fine, text contains a number
|
|
340
393
|
if (col === 'Count') {
|
|
341
394
|
text += ` / ${peptidesCount}`;
|
|
342
395
|
} else if (col === 'pValue') {
|
|
@@ -351,66 +404,39 @@ export async function describe(
|
|
|
351
404
|
}
|
|
352
405
|
if (
|
|
353
406
|
!cell.isColHeader &&
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
407
|
+
cell.tableColumn !== null &&
|
|
408
|
+
cell.tableColumn.name == aminoAcidResidue &&
|
|
409
|
+
cell.cell.value !== null &&
|
|
410
|
+
cell.tableRowIndex !== null
|
|
358
411
|
) {
|
|
359
|
-
|
|
412
|
+
if (grouping) {
|
|
413
|
+
const currentGroup = groupDescription[cell.cell.value];
|
|
414
|
+
const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
|
|
415
|
+
ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
|
|
416
|
+
} else {
|
|
417
|
+
cp.showTooltip(cell, x, y);
|
|
418
|
+
}
|
|
360
419
|
}
|
|
361
420
|
return true;
|
|
362
421
|
};
|
|
363
422
|
SARgrid.onCellTooltip(onCellTooltipFunc);
|
|
364
423
|
SARVgrid.onCellTooltip(onCellTooltipFunc);
|
|
365
424
|
|
|
425
|
+
sourceGrid.onCellPrepare((cell) => {
|
|
426
|
+
const currentRowIndex = cell.tableRowIndex;
|
|
427
|
+
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader) {
|
|
428
|
+
cell.style.backColor = DG.Color.lightLightGray;
|
|
429
|
+
}
|
|
430
|
+
});
|
|
431
|
+
|
|
366
432
|
for (const col of matrixDf.columns.names()) {
|
|
367
433
|
SARgrid.col(col)!.width = SARgrid.props.rowHeight;
|
|
368
434
|
}
|
|
369
435
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
//Selects best (by mean difference) amino acids in all positions in all categories(p-value)
|
|
374
|
-
function segregateBestAtAllCateg(originalDf: DG.DataFrame, twoColorMode:boolean):DG.DataFrame {
|
|
375
|
-
//todo: make with group by + refactor
|
|
376
|
-
const filteredDf = originalDf.clone(DG.BitSet.create(originalDf.rowCount, (i) => {
|
|
377
|
-
return originalDf.get('Count', i) > 3;
|
|
378
|
-
}));
|
|
379
|
-
const pValueFilteredDF = filteredDf.clone(DG.BitSet.create(filteredDf.rowCount, (i) => {
|
|
380
|
-
return filteredDf.get('pValue', i) >= 0.1 && filteredDf.get('Mean difference', i) > 0;
|
|
381
|
-
}));
|
|
382
|
-
let statsDfAgr = grok.data.joinTables(pValueFilteredDF, pValueFilteredDF.groupBy(['Position'])
|
|
383
|
-
.max('Mean difference')
|
|
384
|
-
.aggregate(), ['Mean difference', 'Position'],
|
|
385
|
-
['max(Mean difference)', 'Position'], pValueFilteredDF.columns.names(), [], 'inner', false);
|
|
386
|
-
//and 'pValue' > 0.1 'pValue' < ${coef}`
|
|
387
|
-
let lastCoef = 0.0;
|
|
388
|
-
[0.01, 0.05, 0.1].forEach((coef)=>{
|
|
389
|
-
const pValueFilteredDF = filteredDf.clone(DG.BitSet.create(filteredDf.rowCount, (i) => {
|
|
390
|
-
return filteredDf.get('pValue', i) >= lastCoef &&
|
|
391
|
-
filteredDf.get('pValue', i)< coef &&
|
|
392
|
-
(filteredDf.get('Mean difference', i) > 0 || !twoColorMode);
|
|
393
|
-
}));
|
|
394
|
-
statsDfAgr = statsDfAgr.append(grok.data.joinTables(pValueFilteredDF, pValueFilteredDF.groupBy(['Position'])
|
|
395
|
-
.max('Mean difference')
|
|
396
|
-
.aggregate(), ['Mean difference', 'Position'],
|
|
397
|
-
['max(Mean difference)', 'Position'], pValueFilteredDF.columns.names(), [], 'inner', false));
|
|
398
|
-
lastCoef = coef;
|
|
399
|
-
});
|
|
400
|
-
if (twoColorMode) {
|
|
401
|
-
lastCoef = 0.0;
|
|
402
|
-
[0.01, 0.05, 0.1, 1.01].forEach((coef) => {
|
|
403
|
-
const pValueFilteredDF = filteredDf.clone(DG.BitSet.create(filteredDf.rowCount, (i) => {
|
|
404
|
-
return filteredDf.get('pValue', i) >= lastCoef &&
|
|
405
|
-
filteredDf.get('pValue', i) < coef &&
|
|
406
|
-
filteredDf.get('Mean difference', i) <= 0;
|
|
407
|
-
}));
|
|
408
|
-
statsDfAgr = statsDfAgr.append(grok.data.joinTables(pValueFilteredDF, pValueFilteredDF.groupBy(['Position'])
|
|
409
|
-
.min('Mean difference')
|
|
410
|
-
.aggregate(), ['Mean difference', 'position'],
|
|
411
|
-
['min(Mean difference)', 'position'], pValueFilteredDF.columns.names(), [], 'inner', false));
|
|
412
|
-
lastCoef = coef;
|
|
413
|
-
});
|
|
436
|
+
if (grouping) {
|
|
437
|
+
SARgrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
438
|
+
SARVgrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
414
439
|
}
|
|
415
|
-
|
|
440
|
+
|
|
441
|
+
return [SARgrid, SARVgrid, statsDf, groupMapping];
|
|
416
442
|
}
|
package/src/package.ts
CHANGED
|
@@ -3,20 +3,25 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
|
|
6
|
-
import {SARViewerBase} from './peptide-sar-viewer/sar-viewer';
|
|
7
6
|
import {
|
|
8
7
|
AlignedSequenceCellRenderer,
|
|
9
8
|
AminoAcidsCellRenderer,
|
|
10
|
-
expandColumn,
|
|
11
|
-
processSequence,
|
|
12
9
|
} from './utils/cell-renderer';
|
|
13
|
-
import {Logo} from './
|
|
14
|
-
import {StackedBarChart
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
import {Logo} from './viewers/logo-viewer';
|
|
11
|
+
import {StackedBarChart} from './viewers/stacked-barchart-viewer';
|
|
12
|
+
|
|
13
|
+
import {analyzePeptidesWidget} from './widgets/analyze-peptides';
|
|
14
|
+
import {PeptideSimilaritySpaceWidget} from './utils/peptide-similarity-space';
|
|
15
|
+
import {manualAlignmentWidget} from './widgets/manual-alignment';
|
|
16
|
+
import {SARViewer, SARViewerVertical} from './viewers/sar-viewer';
|
|
17
|
+
import {peptideMoleculeWidget} from './widgets/peptide-molecule';
|
|
18
|
+
import {correlationAnalysisPlots} from './utils/correlation-analysis';
|
|
17
19
|
|
|
18
20
|
export const _package = new DG.Package();
|
|
19
21
|
let tableGrid: DG.Grid;
|
|
22
|
+
let currentDf: DG.DataFrame;
|
|
23
|
+
let alignedSequenceCol: DG.Column;
|
|
24
|
+
let view: DG.TableView;
|
|
20
25
|
|
|
21
26
|
async function main(chosenFile: string) {
|
|
22
27
|
const pi = DG.TaskBarProgressIndicator.create('Loading Peptides');
|
|
@@ -28,25 +33,24 @@ async function main(chosenFile: string) {
|
|
|
28
33
|
peptides.setTag('dataType', 'peptides');
|
|
29
34
|
const view = grok.shell.addTableView(peptides);
|
|
30
35
|
tableGrid = view.grid;
|
|
31
|
-
peptides.onSemanticTypeDetecting.subscribe((_) => {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
49
|
-
);
|
|
36
|
+
// peptides.onSemanticTypeDetecting.subscribe((_: any) => {
|
|
37
|
+
// const regexp = new RegExp(/^([^-^\n]*-){2,49}(\w|\(|\))+$/);
|
|
38
|
+
// for (const col of peptides.columns) {
|
|
39
|
+
// col.semType = DG.Detector.sampleCategories(col, (s: any) => regexp.test(s.trim())) ? 'alignedSequence' : null;
|
|
40
|
+
// if (col.semType == 'alignedSequence') {
|
|
41
|
+
// expandColumn(col, tableGrid, (ent)=>{
|
|
42
|
+
// const subParts:string[] = ent.split('-');
|
|
43
|
+
// // eslint-disable-next-line no-unused-vars
|
|
44
|
+
// const [text, _] = processSequence(subParts);
|
|
45
|
+
// let textSize = 0;
|
|
46
|
+
// text.forEach((aar)=>{
|
|
47
|
+
// textSize += aar.length;
|
|
48
|
+
// });
|
|
49
|
+
// return textSize;
|
|
50
|
+
// });
|
|
51
|
+
// }
|
|
52
|
+
// }
|
|
53
|
+
// });
|
|
50
54
|
|
|
51
55
|
view.name = 'PeptidesView';
|
|
52
56
|
|
|
@@ -55,7 +59,7 @@ async function main(chosenFile: string) {
|
|
|
55
59
|
pi.close();
|
|
56
60
|
}
|
|
57
61
|
|
|
58
|
-
//name: Peptides
|
|
62
|
+
//name: Peptides App
|
|
59
63
|
//tags: app
|
|
60
64
|
export function Peptides() {
|
|
61
65
|
const wikiLink = ui.link('wiki', 'https://github.com/datagrok-ai/public/blob/master/help/domains/bio/peptides.md');
|
|
@@ -82,7 +86,6 @@ export function Peptides() {
|
|
|
82
86
|
'Use and analyse peptide sequence data to support your research:',
|
|
83
87
|
);
|
|
84
88
|
|
|
85
|
-
|
|
86
89
|
const annotationViewerDiv = ui.div();
|
|
87
90
|
|
|
88
91
|
const windows = grok.shell.windows;
|
|
@@ -94,7 +97,6 @@ export function Peptides() {
|
|
|
94
97
|
grok.shell.newView('Peptides', [
|
|
95
98
|
appDescription,
|
|
96
99
|
ui.info([textLink]),
|
|
97
|
-
//ui.h2('Choose .csv file'),
|
|
98
100
|
ui.div([
|
|
99
101
|
ui.block25([
|
|
100
102
|
ui.button('Open peptide sequences demonstration set', () => main('aligned.csv'), ''),
|
|
@@ -110,106 +112,34 @@ export function Peptides() {
|
|
|
110
112
|
//tags: panel, widgets
|
|
111
113
|
//input: column col {semType: alignedSequence}
|
|
112
114
|
//output: widget result
|
|
113
|
-
export async function
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
const defaultColumn: DG.Column = col.dataFrame.col('activity') || col.dataFrame.col('IC50') || tempCol;
|
|
120
|
-
|
|
121
|
-
const activityScalingMethod = ui.choiceInput('Activity scaling', 'none', ['none', 'lg', '-lg']);
|
|
122
|
-
activityScalingMethod.setTooltip('Function to apply for each value in activity column');
|
|
123
|
-
|
|
124
|
-
const activityScalingMethodState = function(_: any) {
|
|
125
|
-
activityScalingMethod.enabled =
|
|
126
|
-
activityColumnChoice.value && DG.Stats.fromColumn(activityColumnChoice.value, col.dataFrame.filter).min > 0;
|
|
127
|
-
};
|
|
128
|
-
const activityColumnChoice = ui.columnInput(
|
|
129
|
-
'Activity column',
|
|
130
|
-
col.dataFrame,
|
|
131
|
-
defaultColumn,
|
|
132
|
-
activityScalingMethodState,
|
|
133
|
-
);
|
|
134
|
-
activityColumnChoice.fireChanged();
|
|
135
|
-
|
|
136
|
-
const startBtn = ui.button('Launch SAR', async () => {
|
|
137
|
-
if (activityColumnChoice.value.type === DG.TYPE.FLOAT) {
|
|
138
|
-
const options = {
|
|
139
|
-
'activityColumnColumnName': activityColumnChoice.value.name,
|
|
140
|
-
'activityScalingMethod': activityScalingMethod.value,
|
|
141
|
-
};
|
|
142
|
-
// @ts-ignore
|
|
143
|
-
for (let i = 0; i < tableGrid.columns.length; i++) {
|
|
144
|
-
const col = tableGrid.columns.byIndex(i);
|
|
145
|
-
if (col &&
|
|
146
|
-
col.name &&
|
|
147
|
-
col.name != 'IC50'&&
|
|
148
|
-
col.column?.semType != 'aminoAcids') {
|
|
149
|
-
// @ts-ignore
|
|
150
|
-
tableGrid.columns.byIndex(i)?.visible = false;
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
//await describe(col.dataFrame, activityColumnChoice.value.name, activityScalingMethod.value, false, tableGrid);
|
|
155
|
-
|
|
156
|
-
// @ts-ignore
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
// const viewer = DG.Viewer.fromType('peptide-sar-viewer', tableGrid.table, options);
|
|
160
|
-
// (grok.shell.v as DG.TableView).addViewer(viewer);
|
|
161
|
-
// const refNode = (grok.shell.v as DG.TableView).dockManager.dock(viewer, 'right');
|
|
162
|
-
|
|
163
|
-
// const hist = DG.Viewer.fromType('peptide-sar-viewer-vertical', tableGrid.table, options);
|
|
164
|
-
// (grok.shell.v as DG.TableView).addViewer(hist);
|
|
165
|
-
// (grok.shell.v as DG.TableView).dockManager.dock(hist, DG.DOCK_TYPE.DOWN, refNode);
|
|
166
|
-
|
|
167
|
-
(grok.shell.v as DG.TableView).addViewer('peptide-sar-viewer', options);
|
|
168
|
-
// (grok.shell.v as DG.TableView).addViewer('peptide-sar-viewer-vertical', options);
|
|
169
|
-
// @ts-ignore
|
|
170
|
-
//view.dockManager.dock(ui.divText('bottom'), 'down');
|
|
171
|
-
|
|
172
|
-
// @ts-ignore
|
|
173
|
-
//console.error(sarViewer.view.dockNode);
|
|
174
|
-
|
|
175
|
-
const StackedBarchartProm = col.dataFrame.plot.fromType('StackedBarChartAA');
|
|
176
|
-
addViewerToHeader(tableGrid, StackedBarchartProm);
|
|
177
|
-
|
|
178
|
-
// tableGrid.dataFrame!.columns.names().forEach((name:string)=>{
|
|
179
|
-
// col = tableGrid.dataFrame!.columns.byName(name);
|
|
180
|
-
// if (col.semType == 'aminoAcids') {
|
|
181
|
-
// let maxLen = 0;
|
|
182
|
-
// col.categories.forEach( (ent:string)=>{
|
|
183
|
-
// if ( ent.length> maxLen) {
|
|
184
|
-
// maxLen = ent.length;
|
|
185
|
-
// }
|
|
186
|
-
// });
|
|
187
|
-
// tableGrid.columns.byName(name)!.width = maxLen*10;
|
|
188
|
-
// }
|
|
189
|
-
// });
|
|
190
|
-
} else {
|
|
191
|
-
grok.shell.error('The activity column must be of floating point number type!');
|
|
192
|
-
}
|
|
193
|
-
});
|
|
194
|
-
|
|
195
|
-
const viewer = await col.dataFrame.plot.fromType('peptide-logo-viewer');
|
|
196
|
-
|
|
197
|
-
return new DG.Widget(ui.divV([viewer.root, ui.inputs([activityColumnChoice, activityScalingMethod]), startBtn]));
|
|
115
|
+
export async function peptidesPanel(col: DG.Column): Promise<DG.Widget> {
|
|
116
|
+
view = (grok.shell.v as DG.TableView);
|
|
117
|
+
tableGrid = view.grid;
|
|
118
|
+
currentDf = col.dataFrame;
|
|
119
|
+
alignedSequenceCol = col;
|
|
120
|
+
return await analyzePeptidesWidget(col, view, tableGrid, currentDf);
|
|
198
121
|
}
|
|
199
122
|
|
|
200
123
|
//name: peptide-sar-viewer
|
|
201
124
|
//description: Peptides SAR Viewer
|
|
202
125
|
//tags: viewer
|
|
203
126
|
//output: viewer result
|
|
204
|
-
export function sar():
|
|
205
|
-
return new
|
|
127
|
+
export function sar(): SARViewer {
|
|
128
|
+
return new SARViewer();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
//name: peptide-sar-viewer-vertical
|
|
132
|
+
//description: Peptides Vertical SAR Viewer
|
|
133
|
+
//tags: viewer
|
|
134
|
+
//output: viewer result
|
|
135
|
+
export function sarVertical(): SARViewerVertical {
|
|
136
|
+
return new SARViewerVertical();
|
|
206
137
|
}
|
|
207
138
|
|
|
208
139
|
//name: StackedBarchart Widget
|
|
209
140
|
//tags: panel, widgets
|
|
210
141
|
//input: column col {semType: aminoAcids}
|
|
211
142
|
//output: widget result
|
|
212
|
-
|
|
213
143
|
export async function stackedBarchartWidget(col: DG.Column): Promise<DG.Widget> {
|
|
214
144
|
const viewer = await col.dataFrame.plot.fromType('StackedBarChartAA');
|
|
215
145
|
const panel = ui.divH([viewer.root]);
|
|
@@ -218,30 +148,13 @@ export async function stackedBarchartWidget(col: DG.Column): Promise<DG.Widget>
|
|
|
218
148
|
|
|
219
149
|
//name: Peptide Molecule
|
|
220
150
|
//tags: panel, widgets
|
|
221
|
-
//input: string
|
|
151
|
+
//input: string peptide {semType: alignedSequence}
|
|
222
152
|
//output: widget result
|
|
223
|
-
export async function
|
|
224
|
-
|
|
225
|
-
const mols = [];
|
|
226
|
-
for (let i = 1; i < split.length - 1; i++) {
|
|
227
|
-
if (split[i] in ChemPalette.AASmiles) {
|
|
228
|
-
const aar = ChemPalette.AASmiles[split[i]];
|
|
229
|
-
mols[i] = aar.substr(0, aar.length - 1);
|
|
230
|
-
} else if (!split[i] || split[i] == '-') {
|
|
231
|
-
mols[i] = '';
|
|
232
|
-
} else {
|
|
233
|
-
return new DG.Widget(ui.divH([]));
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
console.error(mols);
|
|
237
|
-
console.error(mols.join('') + 'COOH');
|
|
238
|
-
const sketch = grok.chem.svgMol(mols.join('') + 'O');
|
|
239
|
-
const panel = ui.divH([sketch]);
|
|
240
|
-
return new DG.Widget(panel);
|
|
153
|
+
export async function peptideMolecule(peptide: string): Promise<DG.Widget> {
|
|
154
|
+
return await peptideMoleculeWidget(peptide);
|
|
241
155
|
}
|
|
242
156
|
|
|
243
157
|
//name: StackedBarChartAA
|
|
244
|
-
//description: Creates an awesome viewer
|
|
245
158
|
//tags: viewer
|
|
246
159
|
//output: viewer result
|
|
247
160
|
export function stackedBarChart(): DG.JsViewer {
|
|
@@ -270,3 +183,32 @@ export function aminoAcidsCellRenderer() {
|
|
|
270
183
|
export function logov() {
|
|
271
184
|
return new Logo();
|
|
272
185
|
}
|
|
186
|
+
|
|
187
|
+
//name: Manual Alignment
|
|
188
|
+
//tags: panel, widgets
|
|
189
|
+
//input: string monomer {semType: aminoAcids}
|
|
190
|
+
//output: widget result
|
|
191
|
+
export function manualAlignment(monomer: string) {
|
|
192
|
+
return manualAlignmentWidget(alignedSequenceCol, currentDf);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
//name: Peptide Space
|
|
196
|
+
//tags: panel, widgets
|
|
197
|
+
//input: column col {semType: alignedSequence}
|
|
198
|
+
//output: widget result
|
|
199
|
+
export async function peptideSpacePanel(col: DG.Column): Promise<DG.Widget> {
|
|
200
|
+
const widget = new PeptideSimilaritySpaceWidget(col, view ?? grok.shell.v);
|
|
201
|
+
return await widget.draw();
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
//name: Correllation analysis
|
|
205
|
+
export async function correlationAnalysis() {
|
|
206
|
+
view = (grok.shell.v as DG.TableView);
|
|
207
|
+
|
|
208
|
+
const df = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
|
|
209
|
+
const tview = grok.shell.addTableView(df);
|
|
210
|
+
const [cpviewer, bpviewer] = correlationAnalysisPlots(df.getCol('AlignedSequence'));
|
|
211
|
+
|
|
212
|
+
tview.dockManager.dock(cpviewer, 'right');
|
|
213
|
+
tview.dockManager.dock(bpviewer, 'down');
|
|
214
|
+
}
|