@datagrok/peptides 0.6.1 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +12 -2
- package/detectors.js +1 -1
- package/files/aligned_2.csv +1 -1
- package/package.json +27 -18
- package/setup.sh +15 -0
- package/src/describe.ts +219 -123
- package/src/{viewers/model.ts → model.ts} +1 -1
- package/src/monomer-library.ts +187 -0
- package/src/package-test.ts +18 -0
- package/src/package.ts +45 -19
- package/src/peptides.ts +68 -37
- package/src/semantics.ts +5 -0
- package/src/styles.css +9 -0
- package/src/tests/peptide-space-test.ts +40 -0
- package/src/tests/peptides-tests.ts +120 -0
- package/src/tests/utils.ts +86 -0
- package/src/utils/cell-renderer.ts +27 -26
- package/src/utils/chem-palette.ts +5 -6
- package/src/utils/molecular-measure.ts +3 -4
- package/src/utils/multiple-sequence-alignment.ts +89 -0
- package/src/utils/peptide-similarity-space.ts +22 -41
- package/src/utils/split-aligned.ts +6 -6
- package/src/viewers/logo-viewer.ts +10 -12
- package/src/viewers/sar-viewer.ts +191 -182
- package/src/viewers/stacked-barchart-viewer.ts +21 -95
- package/src/viewers/subst-viewer.ts +10 -10
- package/src/widgets/analyze-peptides.ts +16 -7
- package/src/widgets/manual-alignment.ts +7 -4
- package/src/widgets/peptide-molecule.ts +21 -13
- package/src/workers/dimensionality-reducer.ts +2 -2
- package/tsconfig.json +1 -1
- package/webpack.config.js +16 -2
- package/src/utils/correlation-analysis.ts +0 -204
- package/src/viewers/spiral-plot.ts +0 -97
package/.eslintrc.json
CHANGED
|
@@ -23,7 +23,17 @@
|
|
|
23
23
|
"error",
|
|
24
24
|
120
|
|
25
25
|
],
|
|
26
|
+
"require-jsdoc": "off",
|
|
26
27
|
"spaced-comment": "off",
|
|
27
|
-
"
|
|
28
|
+
"linebreak-style": "off",
|
|
29
|
+
"curly": [
|
|
30
|
+
"error",
|
|
31
|
+
"multi-or-nest"
|
|
32
|
+
],
|
|
33
|
+
"brace-style": [
|
|
34
|
+
"error",
|
|
35
|
+
"1tbs",
|
|
36
|
+
{ "allowSingleLine": true }
|
|
37
|
+
]
|
|
28
38
|
}
|
|
29
|
-
}
|
|
39
|
+
}
|
package/detectors.js
CHANGED
|
@@ -3,7 +3,7 @@ class PeptidesPackageDetectors extends DG.Package {
|
|
|
3
3
|
//input: column col
|
|
4
4
|
//output: string semType
|
|
5
5
|
detectAligned(col) {
|
|
6
|
-
const regexp = new RegExp(/^([
|
|
6
|
+
const regexp = new RegExp(/^([^\-\n]*-){7,49}(\w|\(|\))+$/);
|
|
7
7
|
return DG.Detector.sampleCategories(col, (s) => regexp.test(s.trim())) ? 'alignedSequence' : null;
|
|
8
8
|
}
|
|
9
9
|
}
|
package/files/aligned_2.csv
CHANGED
|
@@ -10272,4 +10272,4 @@ ID,AlignedSequence,Measured,Value
|
|
|
10272
10272
|
1357,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-H-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,86.31581627936768
|
|
10273
10273
|
1359,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-homobAla-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,93.44441627936769
|
|
10274
10274
|
1360,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-D(NPyr)-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,88.34951627936769
|
|
10275
|
-
1361,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-3OHPhe-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,98.38061627936769
|
|
10275
|
+
1361,HOC18gEPEG2PEG2-k(Me3)-Aib-K(PEG2PEG2PgEC18OH)-Hyp-7(4OCF3Ph)W-K(Me)3-S5H(4)-mTyr-6OH2Nal 6OH2Nal-aMeK-S5H-3Pya-3OHPhe-4diFPro-PEG2PEG2eKC16OH-ON(Me)2A,SGF (No protein present) % Remaining 24 hr,98.38061627936769
|
package/package.json
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/peptides",
|
|
3
|
-
"version": "0.6
|
|
3
|
+
"version": "0.8.6",
|
|
4
4
|
"description": "",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@
|
|
6
|
+
"@biowasm/aioli": ">=2.4.0",
|
|
7
|
+
"@datagrok-libraries/bio": ">=0.0.4",
|
|
8
|
+
"@datagrok-libraries/ml": ">=0.0.10",
|
|
9
|
+
"@datagrok-libraries/statistics": ">=0.1.5",
|
|
10
|
+
"@datagrok-libraries/utils": ">=0.0.18",
|
|
11
|
+
"@types/d3": "^7.0.0",
|
|
12
|
+
"@types/jquery": "^3.5.6",
|
|
7
13
|
"cash-dom": "latest",
|
|
8
14
|
"d3": "latest",
|
|
9
|
-
"datagrok-api": ">=0.
|
|
15
|
+
"datagrok-api": ">=0.115.0",
|
|
10
16
|
"dayjs": "latest",
|
|
11
|
-
"
|
|
17
|
+
"file-loader": "^6.2.0",
|
|
12
18
|
"jstat": "^1.9.5",
|
|
13
19
|
"logojs-react": "^2.1.1",
|
|
14
|
-
"rxjs": "^6.5.5"
|
|
15
|
-
"umap-js": "^1.3.3",
|
|
16
|
-
"@datagrok-libraries/utils": ">=0.0.13",
|
|
17
|
-
"@datagrok-libraries/statistics": ">=0.1.5",
|
|
18
|
-
"@types/d3": "^7.0.0",
|
|
19
|
-
"@types/jquery": "^3.5.6"
|
|
20
|
+
"rxjs": "^6.5.5"
|
|
20
21
|
},
|
|
21
22
|
"devDependencies": {
|
|
22
|
-
"typescript": "^4.4.4",
|
|
23
|
-
"ts-loader": "^9.2.5",
|
|
24
|
-
"css-loader": "^5.2.4",
|
|
25
|
-
"style-loader": "^2.0.0",
|
|
26
23
|
"@typescript-eslint/eslint-plugin": "^4.29.1",
|
|
27
24
|
"@typescript-eslint/parser": "^4.29.1",
|
|
25
|
+
"css-loader": "^5.2.4",
|
|
28
26
|
"eslint": "^7.32.0",
|
|
29
27
|
"eslint-config-google": "^0.14.0",
|
|
28
|
+
"style-loader": "^2.0.0",
|
|
29
|
+
"ts-loader": "^9.2.5",
|
|
30
|
+
"typescript": "^4.4.4",
|
|
30
31
|
"webpack": "latest",
|
|
31
32
|
"webpack-cli": "latest"
|
|
32
33
|
},
|
|
@@ -36,10 +37,12 @@
|
|
|
36
37
|
"common/ngl_viewer/ngl.js"
|
|
37
38
|
],
|
|
38
39
|
"scripts": {
|
|
40
|
+
"link-api": "npm link datagrok-api",
|
|
39
41
|
"link-utils": "npm link @datagrok-libraries/utils",
|
|
40
42
|
"link-statistics": "npm link @datagrok-libraries/statistics",
|
|
41
|
-
"link-
|
|
42
|
-
"link-
|
|
43
|
+
"link-ml": "npm link @datagrok-libraries/ml",
|
|
44
|
+
"link-bio": "npm link @datagrok-libraries/bio",
|
|
45
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/statistics link @datagrok-libraries/ml @datagrok-libraries/bio",
|
|
43
46
|
"install-dependencies": "npm install",
|
|
44
47
|
"debug-peptides": "grok publish --rebuild",
|
|
45
48
|
"release-peptides": "grok publish --rebuild --release",
|
|
@@ -53,5 +56,11 @@
|
|
|
53
56
|
"release-peptides-local": "grok publish local --rebuild --release",
|
|
54
57
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
55
58
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix"
|
|
56
|
-
}
|
|
57
|
-
|
|
59
|
+
},
|
|
60
|
+
"canEdit": [
|
|
61
|
+
"Developers"
|
|
62
|
+
],
|
|
63
|
+
"canView": [
|
|
64
|
+
"All users"
|
|
65
|
+
]
|
|
66
|
+
}
|
package/setup.sh
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
npm unlink datagrok-api
|
|
2
|
+
npm unlink @datagrok-libraries/utils
|
|
3
|
+
npm unlink @datagrok-libraries/ml
|
|
4
|
+
cd ../../js-api
|
|
5
|
+
npm install
|
|
6
|
+
npm link
|
|
7
|
+
cd ../libraries/utils
|
|
8
|
+
npm install
|
|
9
|
+
npm link
|
|
10
|
+
cd ../../libraries/ml
|
|
11
|
+
npm install
|
|
12
|
+
npm link datagrok-api @datagrok-libraries/utils
|
|
13
|
+
cd ../../packages/Peptides
|
|
14
|
+
npm install
|
|
15
|
+
npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/ml
|
package/src/describe.ts
CHANGED
|
@@ -53,71 +53,56 @@ const groupDescription: {[key: string]: {'description': string, 'aminoAcids': st
|
|
|
53
53
|
}
|
|
54
54
|
}*/
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
function joinDataFrames(
|
|
57
|
+
activityColumnScaled: string,
|
|
58
58
|
df: DG.DataFrame,
|
|
59
|
+
positionColumns: string[],
|
|
60
|
+
splitSeqDf: DG.DataFrame,
|
|
59
61
|
activityColumn: string,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
initialBitset: DG.BitSet | null,
|
|
64
|
-
grouping: boolean,
|
|
65
|
-
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
|
|
66
|
-
//Split the aligned sequence into separate AARs
|
|
67
|
-
let splitSeqDf: DG.DataFrame | undefined;
|
|
68
|
-
let invalidIndexes: number[];
|
|
69
|
-
const col: DG.Column = df.columns.bySemType('alignedSequence');
|
|
70
|
-
[splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
|
|
71
|
-
splitSeqDf.name = 'Split sequence';
|
|
62
|
+
) {
|
|
63
|
+
if (df.col(activityColumnScaled))
|
|
64
|
+
(df.columns as DG.ColumnList).remove(activityColumnScaled);
|
|
72
65
|
|
|
73
|
-
const positionColumns = splitSeqDf.columns.names();
|
|
74
|
-
const activityColumnScaled = `${activityColumn}Scaled`;
|
|
75
|
-
const renderColNames: string[] = splitSeqDf.columns.names();
|
|
76
|
-
|
|
77
|
-
splitSeqDf.columns.add(df.getCol(activityColumn));
|
|
78
|
-
|
|
79
|
-
if (df.col(activityColumnScaled)) {
|
|
80
|
-
df.columns.remove(activityColumnScaled);
|
|
81
|
-
}
|
|
82
66
|
|
|
83
67
|
//FIXME: this column usually duplicates, so remove it then
|
|
84
|
-
if (df.col(`${activityColumnScaled} (2)`))
|
|
85
|
-
df.columns.remove(`${activityColumnScaled} (2)`);
|
|
86
|
-
|
|
68
|
+
if (df.col(`${activityColumnScaled} (2)`))
|
|
69
|
+
(df.columns as DG.ColumnList).remove(`${activityColumnScaled} (2)`);
|
|
70
|
+
|
|
87
71
|
|
|
88
72
|
// append splitSeqDf columns to source table and make sure columns are not added more than once
|
|
89
73
|
const dfColsSet = new Set(df.columns.names());
|
|
90
|
-
if (!positionColumns.every((col: string) => dfColsSet.has(col)))
|
|
74
|
+
if (!positionColumns.every((col: string) => dfColsSet.has(col)))
|
|
91
75
|
df.join(splitSeqDf, [activityColumn], [activityColumn], df.columns.names(), positionColumns, 'inner', true);
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
for (const col of df.columns) {
|
|
95
|
-
if (splitSeqDf.col(col.name) && col.name != activityColumn) {
|
|
96
|
-
setAARRenderer(col, sourceGrid);
|
|
97
|
-
}
|
|
98
|
-
}
|
|
76
|
+
}
|
|
99
77
|
|
|
78
|
+
function sortSourceGrid(sourceGrid: DG.Grid) {
|
|
100
79
|
if (sourceGrid) {
|
|
101
|
-
const colNames:
|
|
102
|
-
for (let i =
|
|
103
|
-
colNames.push(sourceGrid.columns.byIndex(i)
|
|
104
|
-
|
|
80
|
+
const colNames: DG.GridColumn[] = [];
|
|
81
|
+
for (let i = 1; i < sourceGrid.columns.length; i++)
|
|
82
|
+
colNames.push(sourceGrid.columns.byIndex(i)!);
|
|
83
|
+
|
|
105
84
|
colNames.sort((a, b)=>{
|
|
106
|
-
if (
|
|
107
|
-
if (
|
|
85
|
+
if (a.column!.semType == 'aminoAcids') {
|
|
86
|
+
if (b.column!.semType == 'aminoAcids')
|
|
108
87
|
return 0;
|
|
109
|
-
}
|
|
110
88
|
return -1;
|
|
111
89
|
}
|
|
112
|
-
if (
|
|
90
|
+
if (b.column!.semType == 'aminoAcids')
|
|
113
91
|
return 1;
|
|
114
|
-
}
|
|
115
92
|
return 0;
|
|
116
93
|
});
|
|
117
|
-
sourceGrid
|
|
94
|
+
sourceGrid.columns.setOrder(colNames.map((v) => v.name));
|
|
118
95
|
}
|
|
96
|
+
}
|
|
119
97
|
|
|
120
|
-
|
|
98
|
+
async function scaleActivity(
|
|
99
|
+
activityScaling: string,
|
|
100
|
+
activityColumn: string,
|
|
101
|
+
activityColumnScaled: string,
|
|
102
|
+
sourceGrid: DG.Grid,
|
|
103
|
+
splitSeqDf: DG.DataFrame,
|
|
104
|
+
) {
|
|
105
|
+
const df = sourceGrid.dataFrame!;
|
|
121
106
|
switch (activityScaling) {
|
|
122
107
|
case 'lg':
|
|
123
108
|
await df.columns.addNewCalculated(activityColumnScaled, 'Log10(${' + activityColumn + '})');
|
|
@@ -138,32 +123,17 @@ export async function describe(
|
|
|
138
123
|
sourceGrid.columns.setOrder([`${activityColumn}`]);
|
|
139
124
|
break;
|
|
140
125
|
}
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
const positionColName = 'Position';
|
|
144
|
-
const aminoAcidResidue = 'AAR';
|
|
145
|
-
|
|
146
|
-
//unpivot a table and handle duplicates
|
|
147
|
-
splitSeqDf = splitSeqDf.groupBy(positionColumns)
|
|
148
|
-
.add('med', activityColumnScaled, activityColumnScaled)
|
|
149
|
-
.aggregate();
|
|
150
|
-
|
|
151
|
-
const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
|
|
152
|
-
|
|
153
|
-
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
154
|
-
|
|
155
|
-
//TODO: move to chem palette
|
|
156
|
-
let groupMapping: {[key: string]: string} = {};
|
|
157
|
-
if (grouping) {
|
|
158
|
-
groupMapping = aarGroups;
|
|
159
|
-
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
160
|
-
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
161
|
-
aarCol.compact();
|
|
162
|
-
} else {
|
|
163
|
-
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
164
|
-
}
|
|
126
|
+
}
|
|
165
127
|
|
|
166
|
-
|
|
128
|
+
async function calculateStatistics(
|
|
129
|
+
matrixDf: DG.DataFrame,
|
|
130
|
+
positionColName: string,
|
|
131
|
+
aminoAcidResidue: string,
|
|
132
|
+
activityColumnScaled: string,
|
|
133
|
+
peptidesCount: number,
|
|
134
|
+
splitSeqDf: DG.DataFrame,
|
|
135
|
+
groupMapping: {[key: string]: string},
|
|
136
|
+
) {
|
|
167
137
|
matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
|
|
168
138
|
.add('count', activityColumnScaled, 'Count')
|
|
169
139
|
.aggregate();
|
|
@@ -177,67 +147,54 @@ export async function describe(
|
|
|
177
147
|
await matrixDf.columns.addNewCalculated('Ratio', '${count}/'.concat(`${peptidesCount}`));
|
|
178
148
|
|
|
179
149
|
//calculate p-values based on t-test
|
|
180
|
-
let position: string;
|
|
181
|
-
let aar: string;
|
|
182
|
-
let currentActivity: number[];
|
|
183
|
-
let otherActivity: number[];
|
|
184
|
-
let testResult;
|
|
185
|
-
let currentMeanDiff: number;
|
|
186
150
|
let pvalues: Float32Array = new Float32Array(matrixDf.rowCount).fill(1);
|
|
187
|
-
let pvalue = 1.;
|
|
188
|
-
|
|
189
151
|
const mdCol: DG.Column = matrixDf.columns.addNewFloat('Mean difference');
|
|
190
152
|
const pValCol: DG.Column = matrixDf.columns.addNewFloat('pValue');
|
|
191
153
|
for (let i = 0; i < matrixDf.rowCount; i++) {
|
|
192
|
-
position = matrixDf.get(positionColName, i);
|
|
193
|
-
aar = matrixDf.get(aminoAcidResidue, i);
|
|
154
|
+
const position = matrixDf.get(positionColName, i);
|
|
155
|
+
const aar = matrixDf.get(aminoAcidResidue, i);
|
|
194
156
|
|
|
195
157
|
//@ts-ignore
|
|
196
158
|
splitSeqDf.rows.select((row) => groupMapping[row[position]] === aar);
|
|
197
|
-
currentActivity = splitSeqDf
|
|
159
|
+
const currentActivity: number[] = splitSeqDf
|
|
198
160
|
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
199
161
|
.getCol(activityColumnScaled)
|
|
200
162
|
.toList();
|
|
201
163
|
|
|
202
164
|
//@ts-ignore
|
|
203
165
|
splitSeqDf.rows.select((row) => groupMapping[row[position]] !== aar);
|
|
204
|
-
otherActivity = splitSeqDf
|
|
166
|
+
const otherActivity: number[] = splitSeqDf
|
|
205
167
|
.clone(splitSeqDf.selection, [activityColumnScaled])
|
|
206
168
|
.getCol(activityColumnScaled)
|
|
207
169
|
.toList();
|
|
208
170
|
|
|
209
|
-
testResult = tTest(currentActivity, otherActivity);
|
|
171
|
+
const testResult = tTest(currentActivity, otherActivity);
|
|
210
172
|
// testResult = uTest(currentActivity, otherActivity);
|
|
211
|
-
currentMeanDiff = testResult['Mean difference']!;
|
|
212
|
-
pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
|
|
173
|
+
const currentMeanDiff = testResult['Mean difference']!;
|
|
174
|
+
const pvalue = testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'];
|
|
213
175
|
|
|
214
176
|
mdCol.set(i, currentMeanDiff);
|
|
215
177
|
pvalues[i] = pvalue;
|
|
216
178
|
}
|
|
217
179
|
|
|
218
|
-
if (true)
|
|
180
|
+
if (true)
|
|
219
181
|
pvalues = fdrcorrection(pvalues)[1];
|
|
220
|
-
}
|
|
221
182
|
|
|
222
|
-
|
|
183
|
+
|
|
184
|
+
for (let i = 0; i < pvalues.length; ++i)
|
|
223
185
|
pValCol.set(i, pvalues[i]);
|
|
224
|
-
}
|
|
225
186
|
|
|
226
|
-
const statsDf = matrixDf.clone();
|
|
227
187
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
matrixDf = matrixDf.groupBy([aminoAcidResidue])
|
|
231
|
-
.pivot(positionColName)
|
|
232
|
-
.add('first', 'Mean difference', '')
|
|
233
|
-
.aggregate();
|
|
234
|
-
matrixDf.name = 'SAR';
|
|
188
|
+
return matrixDf.clone();
|
|
189
|
+
}
|
|
235
190
|
|
|
236
|
-
|
|
191
|
+
async function setCategoryOrder(
|
|
192
|
+
twoColorMode: boolean, statsDf: DG.DataFrame, aminoAcidResidue: string, matrixDf: DG.DataFrame,
|
|
193
|
+
) {
|
|
237
194
|
const sortArgument = twoColorMode ? 'Absolute Mean difference' : 'Mean difference';
|
|
238
|
-
if (twoColorMode)
|
|
195
|
+
if (twoColorMode)
|
|
239
196
|
await statsDf.columns.addNewCalculated('Absolute Mean difference', 'Abs(${Mean difference})');
|
|
240
|
-
|
|
197
|
+
|
|
241
198
|
const aarWeightsDf = statsDf.groupBy([aminoAcidResidue]).sum(sortArgument, 'weight').aggregate();
|
|
242
199
|
const aarList = aarWeightsDf.getCol(aminoAcidResidue).toList();
|
|
243
200
|
const getWeight = (aar: string) => aarWeightsDf
|
|
@@ -248,8 +205,14 @@ export async function describe(
|
|
|
248
205
|
aarList.sort((first, second) => getWeight(second) - getWeight(first));
|
|
249
206
|
|
|
250
207
|
matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
|
|
208
|
+
}
|
|
251
209
|
|
|
252
|
-
|
|
210
|
+
function createVerticalTable(
|
|
211
|
+
statsDf: DG.DataFrame,
|
|
212
|
+
aminoAcidResidue: string,
|
|
213
|
+
positionColName: string,
|
|
214
|
+
twoColorMode: boolean,
|
|
215
|
+
) {
|
|
253
216
|
// TODO: aquire ALL of the positions
|
|
254
217
|
let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
|
|
255
218
|
.where('pValue <= 0.1')
|
|
@@ -268,9 +231,18 @@ export async function describe(
|
|
|
268
231
|
sequenceDf = sequenceDf.clone(DG.BitSet.create(sequenceDf.rowCount, (i) => {
|
|
269
232
|
return sequenceDf.get('Mean difference', i) === maxAtPos[sequenceDf.get(positionColName, i)];
|
|
270
233
|
}));
|
|
271
|
-
renderColNames.push('Mean difference');
|
|
272
234
|
|
|
273
|
-
|
|
235
|
+
return sequenceDf;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function createGrids(
|
|
239
|
+
matrixDf: DG.DataFrame,
|
|
240
|
+
aminoAcidResidue: string,
|
|
241
|
+
positionColumns: string[],
|
|
242
|
+
sequenceDf: DG.DataFrame,
|
|
243
|
+
positionColName: string,
|
|
244
|
+
grouping: boolean,
|
|
245
|
+
) {
|
|
274
246
|
const sarGrid = matrixDf.plot.grid();
|
|
275
247
|
sarGrid.sort([aminoAcidResidue]);
|
|
276
248
|
sarGrid.columns.setOrder([aminoAcidResidue].concat(positionColumns));
|
|
@@ -282,16 +254,27 @@ export async function describe(
|
|
|
282
254
|
|
|
283
255
|
if (!grouping) {
|
|
284
256
|
let tempCol = matrixDf.columns.byName(aminoAcidResidue);
|
|
285
|
-
if (tempCol)
|
|
257
|
+
if (tempCol)
|
|
286
258
|
setAARRenderer(tempCol, sarGrid);
|
|
287
|
-
|
|
259
|
+
|
|
288
260
|
tempCol = sequenceDf.columns.byName(aminoAcidResidue);
|
|
289
|
-
if (tempCol)
|
|
261
|
+
if (tempCol)
|
|
290
262
|
setAARRenderer(tempCol, sarGrid);
|
|
291
|
-
}
|
|
292
263
|
}
|
|
293
264
|
|
|
294
|
-
|
|
265
|
+
return [sarGrid, sarVGrid];
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function setCellRendererFunc(
|
|
269
|
+
renderColNames: string[],
|
|
270
|
+
positionColName: string,
|
|
271
|
+
aminoAcidResidue: string,
|
|
272
|
+
statsDf: DG.DataFrame,
|
|
273
|
+
twoColorMode: boolean,
|
|
274
|
+
sarGrid: DG.Grid,
|
|
275
|
+
sarVGrid: DG.Grid,
|
|
276
|
+
) {
|
|
277
|
+
const mdCol = statsDf.getCol('Mean difference');
|
|
295
278
|
const cellRendererFunc = function(args: DG.GridCellRenderArgs) {
|
|
296
279
|
args.g.save();
|
|
297
280
|
args.g.beginPath();
|
|
@@ -321,15 +304,15 @@ export async function describe(
|
|
|
321
304
|
|
|
322
305
|
let coef;
|
|
323
306
|
const variant = args.cell.cell.value < 0;
|
|
324
|
-
if (pVal < 0.01)
|
|
307
|
+
if (pVal < 0.01)
|
|
325
308
|
coef = variant && twoColorMode ? '#FF7900' : '#299617';
|
|
326
|
-
|
|
309
|
+
else if (pVal < 0.05)
|
|
327
310
|
coef = variant && twoColorMode ? '#FFA500' : '#32CD32';
|
|
328
|
-
|
|
311
|
+
else if (pVal < 0.1)
|
|
329
312
|
coef = variant && twoColorMode ? '#FBCEB1' : '#98FF98';
|
|
330
|
-
|
|
313
|
+
else
|
|
331
314
|
coef = DG.Color.toHtml(DG.Color.lightLightGray);
|
|
332
|
-
|
|
315
|
+
|
|
333
316
|
|
|
334
317
|
const chooseMin = () => twoColorMode ? 0 : mdCol.min;
|
|
335
318
|
const chooseMax = () => twoColorMode ? Math.max(Math.abs(mdCol.min), mdCol.max) : mdCol.max;
|
|
@@ -360,8 +343,18 @@ export async function describe(
|
|
|
360
343
|
};
|
|
361
344
|
sarGrid.onCellRender.subscribe(cellRendererFunc);
|
|
362
345
|
sarVGrid.onCellRender.subscribe(cellRendererFunc);
|
|
346
|
+
}
|
|
363
347
|
|
|
364
|
-
|
|
348
|
+
function setTooltipFunc(
|
|
349
|
+
renderColNames: string[],
|
|
350
|
+
statsDf: DG.DataFrame,
|
|
351
|
+
aminoAcidResidue: string,
|
|
352
|
+
positionColName: string,
|
|
353
|
+
peptidesCount: number,
|
|
354
|
+
grouping: boolean,
|
|
355
|
+
sarGrid: DG.Grid,
|
|
356
|
+
sarVGrid: DG.Grid,
|
|
357
|
+
) {
|
|
365
358
|
const onCellTooltipFunc = function(cell: DG.GridCell, x: number, y: number) {
|
|
366
359
|
if (
|
|
367
360
|
!cell.isRowHeader &&
|
|
@@ -383,11 +376,11 @@ export async function describe(
|
|
|
383
376
|
const textNum = statsDf.groupBy([col]).where(query).aggregate().get(col, 0);
|
|
384
377
|
let text = `${col === 'Count' ? textNum : textNum.toFixed(5)}`;
|
|
385
378
|
|
|
386
|
-
if (col === 'Count')
|
|
379
|
+
if (col === 'Count')
|
|
387
380
|
text += ` / ${peptidesCount}`;
|
|
388
|
-
|
|
381
|
+
else if (col === 'pValue')
|
|
389
382
|
text = parseFloat(text) !== 0 ? text : '<0.01';
|
|
390
|
-
|
|
383
|
+
|
|
391
384
|
|
|
392
385
|
tooltipMap[col === 'pValue' ? 'p-value' : col] = text;
|
|
393
386
|
}
|
|
@@ -406,25 +399,33 @@ export async function describe(
|
|
|
406
399
|
const currentGroup = groupDescription[cell.cell.value];
|
|
407
400
|
const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
|
|
408
401
|
ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
|
|
409
|
-
} else
|
|
402
|
+
} else
|
|
410
403
|
cp.showTooltip(cell, x, y);
|
|
411
|
-
}
|
|
412
404
|
}
|
|
413
405
|
return true;
|
|
414
406
|
};
|
|
415
407
|
sarGrid.onCellTooltip(onCellTooltipFunc);
|
|
416
408
|
sarVGrid.onCellTooltip(onCellTooltipFunc);
|
|
409
|
+
}
|
|
417
410
|
|
|
411
|
+
function postProcessGrids(
|
|
412
|
+
sourceGrid: DG.Grid,
|
|
413
|
+
invalidIndexes: number[],
|
|
414
|
+
matrixDf: DG.DataFrame,
|
|
415
|
+
grouping: boolean,
|
|
416
|
+
aminoAcidResidue: string,
|
|
417
|
+
sarGrid: DG.Grid,
|
|
418
|
+
sarVGrid: DG.Grid,
|
|
419
|
+
) {
|
|
418
420
|
sourceGrid.onCellPrepare((cell: DG.GridCell) => {
|
|
419
421
|
const currentRowIndex = cell.tableRowIndex;
|
|
420
|
-
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
|
|
422
|
+
if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader)
|
|
421
423
|
cell.style.backColor = DG.Color.lightLightGray;
|
|
422
|
-
}
|
|
423
424
|
});
|
|
424
425
|
|
|
425
|
-
for (const col of matrixDf.columns.names())
|
|
426
|
+
for (const col of matrixDf.columns.names())
|
|
426
427
|
sarGrid.col(col)!.width = sarGrid.props.rowHeight;
|
|
427
|
-
|
|
428
|
+
|
|
428
429
|
|
|
429
430
|
if (grouping) {
|
|
430
431
|
sarGrid.col(aminoAcidResidue)!.name = 'Groups';
|
|
@@ -434,5 +435,100 @@ export async function describe(
|
|
|
434
435
|
sarGrid.props.allowEdit = false;
|
|
435
436
|
sarVGrid.props.allowEdit = false;
|
|
436
437
|
|
|
438
|
+
sarVGrid.col('Mean difference')!.name = 'Diff';
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
export async function describe(
|
|
442
|
+
df: DG.DataFrame,
|
|
443
|
+
activityColumn: string,
|
|
444
|
+
activityScaling: string,
|
|
445
|
+
sourceGrid: DG.Grid,
|
|
446
|
+
twoColorMode: boolean,
|
|
447
|
+
initialBitset: DG.BitSet | null,
|
|
448
|
+
grouping: boolean,
|
|
449
|
+
): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
|
|
450
|
+
//Split the aligned sequence into separate AARs
|
|
451
|
+
let splitSeqDf: DG.DataFrame | undefined;
|
|
452
|
+
let invalidIndexes: number[];
|
|
453
|
+
const col: DG.Column = df.columns.bySemType('alignedSequence');
|
|
454
|
+
[splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
|
|
455
|
+
splitSeqDf.name = 'Split sequence';
|
|
456
|
+
|
|
457
|
+
const positionColumns = splitSeqDf.columns.names();
|
|
458
|
+
const activityColumnScaled = `${activityColumn}Scaled`;
|
|
459
|
+
const renderColNames: string[] = splitSeqDf.columns.names();
|
|
460
|
+
const positionColName = 'Position';
|
|
461
|
+
const aminoAcidResidue = 'AAR';
|
|
462
|
+
|
|
463
|
+
splitSeqDf.columns.add(df.getCol(activityColumn));
|
|
464
|
+
|
|
465
|
+
joinDataFrames(activityColumnScaled, df, positionColumns, splitSeqDf, activityColumn);
|
|
466
|
+
|
|
467
|
+
for (const col of df.columns) {
|
|
468
|
+
if (splitSeqDf.col(col.name) && col.name != activityColumn)
|
|
469
|
+
setAARRenderer(col, sourceGrid);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
sortSourceGrid(sourceGrid);
|
|
473
|
+
|
|
474
|
+
await scaleActivity(activityScaling, activityColumn, activityColumnScaled, sourceGrid, splitSeqDf);
|
|
475
|
+
splitSeqDf = splitSeqDf.clone(initialBitset);
|
|
476
|
+
|
|
477
|
+
//unpivot a table and handle duplicates
|
|
478
|
+
splitSeqDf = splitSeqDf.groupBy(positionColumns)
|
|
479
|
+
.add('med', activityColumnScaled, activityColumnScaled)
|
|
480
|
+
.aggregate();
|
|
481
|
+
|
|
482
|
+
const peptidesCount = splitSeqDf.getCol(activityColumnScaled).length;
|
|
483
|
+
|
|
484
|
+
let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
|
|
485
|
+
|
|
486
|
+
//TODO: move to chem palette
|
|
487
|
+
let groupMapping: {[key: string]: string} = {};
|
|
488
|
+
if (grouping) {
|
|
489
|
+
groupMapping = aarGroups;
|
|
490
|
+
const aarCol = matrixDf.getCol(aminoAcidResidue);
|
|
491
|
+
aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
|
|
492
|
+
aarCol.compact();
|
|
493
|
+
} else
|
|
494
|
+
Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
//statistics for specific AAR at a specific position
|
|
498
|
+
const statsDf = await calculateStatistics(
|
|
499
|
+
matrixDf, positionColName, aminoAcidResidue, activityColumnScaled, peptidesCount, splitSeqDf, groupMapping,
|
|
500
|
+
);
|
|
501
|
+
|
|
502
|
+
// SAR matrix table
|
|
503
|
+
//pivot a table to make it matrix-like
|
|
504
|
+
matrixDf = statsDf.groupBy([aminoAcidResidue])
|
|
505
|
+
.pivot(positionColName)
|
|
506
|
+
.add('first', 'Mean difference', '')
|
|
507
|
+
.aggregate();
|
|
508
|
+
matrixDf.name = 'SAR';
|
|
509
|
+
|
|
510
|
+
// Setting category order
|
|
511
|
+
await setCategoryOrder(twoColorMode, statsDf, aminoAcidResidue, matrixDf);
|
|
512
|
+
|
|
513
|
+
// SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
|
|
514
|
+
const sequenceDf = createVerticalTable(statsDf, aminoAcidResidue, positionColName, twoColorMode);
|
|
515
|
+
renderColNames.push('Mean difference');
|
|
516
|
+
|
|
517
|
+
const [sarGrid, sarVGrid] = createGrids(
|
|
518
|
+
matrixDf, aminoAcidResidue, positionColumns, sequenceDf, positionColName, grouping,
|
|
519
|
+
);
|
|
520
|
+
|
|
521
|
+
setCellRendererFunc(
|
|
522
|
+
renderColNames, positionColName, aminoAcidResidue, statsDf, twoColorMode, sarGrid, sarVGrid,
|
|
523
|
+
);
|
|
524
|
+
|
|
525
|
+
// show all the statistics in a tooltip over cell
|
|
526
|
+
setTooltipFunc(
|
|
527
|
+
renderColNames, statsDf, aminoAcidResidue, positionColName, peptidesCount, grouping, sarGrid, sarVGrid,
|
|
528
|
+
);
|
|
529
|
+
|
|
530
|
+
postProcessGrids(sourceGrid, invalidIndexes, matrixDf, grouping, aminoAcidResidue, sarGrid, sarVGrid);
|
|
531
|
+
|
|
532
|
+
//TODO: return class instead
|
|
437
533
|
return [sarGrid, sarVGrid, statsDf, groupMapping];
|
|
438
534
|
}
|