@datagrok/peptides 1.12.0 → 1.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/535.js +2 -2
- package/dist/package-test.js +2 -2
- package/dist/package.js +2 -2
- package/package.json +7 -7
- package/src/demo/fasta.ts +6 -25
- package/src/model.ts +275 -296
- package/src/package.ts +1 -1
- package/src/tests/core.ts +2 -10
- package/src/tests/table-view.ts +48 -48
- package/src/tests/viewers.ts +15 -13
- package/src/tests/widgets.ts +3 -4
- package/src/utils/cell-renderer.ts +33 -39
- package/src/utils/constants.ts +1 -0
- package/src/utils/misc.ts +2 -5
- package/src/utils/statistics.ts +22 -3
- package/src/utils/types.ts +6 -5
- package/src/viewers/logo-summary.ts +55 -42
- package/src/viewers/sar-viewer.ts +167 -107
- package/src/widgets/distribution.ts +60 -59
- package/src/widgets/mutation-cliffs.ts +2 -2
- package/src/widgets/peptides.ts +18 -11
|
@@ -14,44 +14,47 @@ const allConst = 'All';
|
|
|
14
14
|
const otherConst = 'Other';
|
|
15
15
|
|
|
16
16
|
export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel): DG.Widget {
|
|
17
|
+
if (!table.selection.anyTrue)
|
|
18
|
+
return new DG.Widget(ui.divText('No distribution'));
|
|
19
|
+
|
|
17
20
|
const activityCol = table.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
|
|
18
21
|
const activityColData = activityCol.getRawData();
|
|
19
22
|
const rowCount = activityCol.length;
|
|
20
|
-
const selectionObject = model.
|
|
23
|
+
const selectionObject = model.invariantMapSelection;
|
|
21
24
|
const clustersColName = model.settings.clustersColumnName;
|
|
22
25
|
let clustersProcessedObject: string[] = [];
|
|
23
26
|
if (clustersColName)
|
|
24
|
-
clustersProcessedObject = model.clusterSelection;
|
|
27
|
+
clustersProcessedObject = Object.values(model.clusterSelection).flat();
|
|
25
28
|
|
|
26
29
|
const positions = Object.keys(selectionObject);
|
|
27
|
-
let
|
|
30
|
+
let monomerStr = allConst;
|
|
28
31
|
let otherStr = '';
|
|
29
32
|
|
|
30
33
|
const updateDistributionHost = (): void => {
|
|
31
34
|
model.splitByPos = splitByPosition.value!;
|
|
32
|
-
model.
|
|
35
|
+
model.splitByMonomer = splitByMonomer.value!;
|
|
33
36
|
const res: HTMLDivElement[] = [];
|
|
34
|
-
if (splitByPosition.value &&
|
|
37
|
+
if (splitByPosition.value && splitByMonomer.value) {
|
|
35
38
|
otherStr = otherConst;
|
|
36
39
|
for (const position of positions) {
|
|
37
|
-
const
|
|
38
|
-
if (
|
|
40
|
+
const monomerList = selectionObject[position];
|
|
41
|
+
if (monomerList.length === 0)
|
|
39
42
|
continue;
|
|
40
43
|
|
|
41
44
|
const posCol = table.getCol(position);
|
|
42
45
|
const posColCategories = posCol.categories;
|
|
43
46
|
const posColData = posCol.getRawData();
|
|
44
47
|
|
|
45
|
-
for (const
|
|
46
|
-
const labels = getDistributionLegend(`${position} : ${
|
|
48
|
+
for (const monomer of monomerList) {
|
|
49
|
+
const labels = getDistributionLegend(`${position} : ${monomer}`, otherStr);
|
|
47
50
|
|
|
48
|
-
const
|
|
49
|
-
const mask = DG.BitSet.create(rowCount, (i) => posColData[i] ===
|
|
51
|
+
const monomerCategoryIndex = posColCategories.indexOf(monomer);
|
|
52
|
+
const mask = DG.BitSet.create(rowCount, (i) => posColData[i] === monomerCategoryIndex);
|
|
50
53
|
const distributionTable = DG.DataFrame.fromColumns(
|
|
51
54
|
[activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask)]);
|
|
52
55
|
const hist = getActivityDistribution(prepareTableForHistogram(distributionTable));
|
|
53
56
|
|
|
54
|
-
const stats = model.monomerPositionStats[position][
|
|
57
|
+
const stats = model.monomerPositionStats[position]![monomer]!;
|
|
55
58
|
const tableMap = getStatsTableMap(stats);
|
|
56
59
|
|
|
57
60
|
const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
|
|
@@ -66,18 +69,18 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
|
|
|
66
69
|
} else if (splitByPosition.value) {
|
|
67
70
|
otherStr = otherConst;
|
|
68
71
|
for (const position of positions) {
|
|
69
|
-
const
|
|
70
|
-
if (
|
|
72
|
+
const monomerList = selectionObject[position];
|
|
73
|
+
if (monomerList.length === 0)
|
|
71
74
|
continue;
|
|
72
75
|
|
|
73
|
-
|
|
74
|
-
const labels = getDistributionLegend(
|
|
76
|
+
monomerStr = `${position}: {${monomerList.join(', ')}}`;
|
|
77
|
+
const labels = getDistributionLegend(monomerStr, otherStr);
|
|
75
78
|
|
|
76
79
|
const posCol = table.getCol(position);
|
|
77
80
|
const posColCategories = posCol.categories;
|
|
78
81
|
const posColData = posCol.getRawData();
|
|
79
|
-
const
|
|
80
|
-
const mask = DG.BitSet.create(rowCount, (i) =>
|
|
82
|
+
const monomerIndexesList = monomerList.map((monomer) => posColCategories.indexOf(monomer));
|
|
83
|
+
const mask = DG.BitSet.create(rowCount, (i) => monomerIndexesList.includes(posColData[i]));
|
|
81
84
|
const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
|
|
82
85
|
|
|
83
86
|
const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
|
|
@@ -95,34 +98,34 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
|
|
|
95
98
|
|
|
96
99
|
res.push(distributionRoot);
|
|
97
100
|
}
|
|
98
|
-
} else if (
|
|
99
|
-
const reversedSelectionObject: {[
|
|
100
|
-
const
|
|
101
|
+
} else if (splitByMonomer.value) {
|
|
102
|
+
const reversedSelectionObject: {[monomer: string]: string[]} = {};
|
|
103
|
+
const monomers = [];
|
|
101
104
|
for (const position of positions) {
|
|
102
|
-
for (const
|
|
103
|
-
if (!reversedSelectionObject.hasOwnProperty(
|
|
104
|
-
reversedSelectionObject[
|
|
105
|
-
|
|
105
|
+
for (const monomer of selectionObject[position]) {
|
|
106
|
+
if (!reversedSelectionObject.hasOwnProperty(monomer)) {
|
|
107
|
+
reversedSelectionObject[monomer] = [position];
|
|
108
|
+
monomers.push(monomer);
|
|
106
109
|
continue;
|
|
107
110
|
}
|
|
108
|
-
if (!reversedSelectionObject[
|
|
109
|
-
reversedSelectionObject[
|
|
111
|
+
if (!reversedSelectionObject[monomer].includes(position))
|
|
112
|
+
reversedSelectionObject[monomer].push(position);
|
|
110
113
|
}
|
|
111
114
|
}
|
|
112
115
|
|
|
113
116
|
otherStr = otherConst;
|
|
114
|
-
for (const
|
|
115
|
-
const posList = reversedSelectionObject[
|
|
117
|
+
for (const monomer of monomers) {
|
|
118
|
+
const posList = reversedSelectionObject[monomer];
|
|
116
119
|
const posColList = posList.map((pos) => table.getCol(pos));
|
|
117
120
|
const posColCategoriesList = posColList.map((posCol) => posCol.categories);
|
|
118
121
|
const posColDataList = posColList.map((posCol) => posCol.getRawData());
|
|
119
|
-
const
|
|
122
|
+
const monomerCategoryIndexList = posColCategoriesList.map((posColCategories) => posColCategories.indexOf(monomer));
|
|
120
123
|
|
|
121
|
-
|
|
122
|
-
const labels = getDistributionLegend(
|
|
124
|
+
monomerStr = `${monomer}: {${posList.join(', ')}}`;
|
|
125
|
+
const labels = getDistributionLegend(monomerStr, otherStr);
|
|
123
126
|
|
|
124
127
|
const mask = DG.BitSet.create(rowCount,
|
|
125
|
-
(i) => posColDataList.some((posColData, j) => posColData[i] ===
|
|
128
|
+
(i) => posColDataList.some((posColData, j) => posColData[i] === monomerCategoryIndexList[j]));
|
|
126
129
|
const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
|
|
127
130
|
|
|
128
131
|
const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
|
|
@@ -140,36 +143,33 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
|
|
|
140
143
|
res.push(distributionRoot);
|
|
141
144
|
}
|
|
142
145
|
} else {
|
|
143
|
-
|
|
144
|
-
if (!splitCol)
|
|
146
|
+
if (!table.selection.anyTrue)
|
|
145
147
|
res.push(ui.divText('No distribution'));
|
|
146
148
|
else {
|
|
147
149
|
otherStr = '';
|
|
148
150
|
if (Object.values(selectionObject).some((selectedAar) => selectedAar.length !== 0) ||
|
|
149
151
|
clustersProcessedObject.length !== 0) {
|
|
150
|
-
|
|
152
|
+
monomerStr = '';
|
|
151
153
|
for (const position of positions) {
|
|
152
|
-
const
|
|
153
|
-
if (
|
|
154
|
-
|
|
154
|
+
const monomerList = selectionObject[position];
|
|
155
|
+
if (monomerList.length !== 0)
|
|
156
|
+
monomerStr += `${position}: {${monomerList.join(', ')}}; `;
|
|
155
157
|
}
|
|
156
158
|
if (clustersProcessedObject.length !== 0)
|
|
157
|
-
|
|
159
|
+
monomerStr += `Clusters: ${clustersProcessedObject.join(', ')}`;
|
|
158
160
|
otherStr = otherConst;
|
|
159
161
|
}
|
|
160
|
-
const labels = getDistributionLegend(
|
|
161
|
-
|
|
162
|
-
const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
|
|
162
|
+
const labels = getDistributionLegend(monomerStr, otherStr);
|
|
163
163
|
|
|
164
|
+
const distributionTable = DG.DataFrame.fromColumns([activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, table.selection)]);
|
|
164
165
|
const hist = getActivityDistribution(prepareTableForHistogram(distributionTable));
|
|
165
|
-
|
|
166
|
-
const
|
|
167
|
-
|
|
166
|
+
const bitArray = BitArray.fromString(table.selection.toBinaryString());
|
|
167
|
+
const mask = DG.BitSet.create(rowCount,
|
|
168
|
+
bitArray.allFalse ? (_): boolean => true : (i): boolean => bitArray.getBit(i));
|
|
168
169
|
const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
|
|
169
|
-
|
|
170
|
-
|
|
170
|
+
const stats = bitArray.allFalse ? {count: rowCount, pValue: null, meanDifference: 0, ratio: 1, mask: bitArray} :
|
|
171
|
+
getStats(activityColData, bitArray);
|
|
171
172
|
const tableMap = getStatsTableMap(stats);
|
|
172
|
-
|
|
173
173
|
const resultMap: {[key: string]: any} = {...tableMap, ...aggregatedColMap};
|
|
174
174
|
const distributionRoot = getStatsSummary(labels, hist, resultMap);
|
|
175
175
|
$(distributionRoot).addClass('d4-flex-col');
|
|
@@ -187,10 +187,10 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
|
|
|
187
187
|
};
|
|
188
188
|
|
|
189
189
|
let defaultValuePos = model.splitByPos;
|
|
190
|
-
let
|
|
190
|
+
let defaultValueMonomer = model.splitByMonomer;
|
|
191
191
|
if (!model.isClusterSelectionEmpty && model.isMonomerPositionSelectionEmpty) {
|
|
192
192
|
defaultValuePos = false;
|
|
193
|
-
|
|
193
|
+
defaultValueMonomer = false;
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
const splitByPosition = ui.boolInput('', defaultValuePos, updateDistributionHost);
|
|
@@ -198,14 +198,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
|
|
|
198
198
|
splitByPosition.setTooltip('Constructs distribution for each position separately');
|
|
199
199
|
setDefaultProperties(splitByPosition);
|
|
200
200
|
$(splitByPosition.root).css('margin-right', '10px');
|
|
201
|
-
const
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
setDefaultProperties(
|
|
201
|
+
const splitByMonomer = ui.boolInput('', defaultValueMonomer, updateDistributionHost);
|
|
202
|
+
splitByMonomer.addPostfix('Split by monomer');
|
|
203
|
+
splitByMonomer.setTooltip('Constructs distribution for each monomer separately');
|
|
204
|
+
setDefaultProperties(splitByMonomer);
|
|
205
205
|
|
|
206
|
-
const controlsHost = ui.divH([splitByPosition.root,
|
|
206
|
+
const controlsHost = ui.divH([splitByPosition.root, splitByMonomer.root]);
|
|
207
207
|
const distributionHost = ui.div([], 'd4-flex-wrap');
|
|
208
|
-
|
|
208
|
+
splitByMonomer.fireChanged();
|
|
209
209
|
|
|
210
210
|
return new DG.Widget(ui.divV([controlsHost, distributionHost]));
|
|
211
211
|
}
|
|
@@ -229,11 +229,12 @@ export function getActivityDistribution(table: DG.DataFrame, isTooltip: boolean
|
|
|
229
229
|
|
|
230
230
|
export function getStatsTableMap(stats: Stats, options: {fractionDigits?: number} = {}): StringDictionary {
|
|
231
231
|
options.fractionDigits ??= 3;
|
|
232
|
-
const tableMap = {
|
|
232
|
+
const tableMap: StringDictionary = {
|
|
233
233
|
'Count': `${stats.count} (${stats.ratio.toFixed(options.fractionDigits)}%)`,
|
|
234
|
-
'p-value': stats.pValue < 0.01 ? '<0.01' : stats.pValue.toFixed(options.fractionDigits),
|
|
235
234
|
'Mean difference': stats.meanDifference.toFixed(options.fractionDigits),
|
|
236
235
|
};
|
|
236
|
+
if (stats.pValue !== null)
|
|
237
|
+
tableMap['p-value'] = stats.pValue < 0.01 ? '<0.01' : stats.pValue.toFixed(options.fractionDigits);
|
|
237
238
|
return tableMap;
|
|
238
239
|
}
|
|
239
240
|
|
|
@@ -33,8 +33,8 @@ export function mutationCliffsWidget(table: DG.DataFrame, model: PeptidesModel):
|
|
|
33
33
|
const posColCategories = posCol.categories;
|
|
34
34
|
const posColData = posCol.getRawData();
|
|
35
35
|
|
|
36
|
-
for (const
|
|
37
|
-
const substitutionsMap = substInfo.get(
|
|
36
|
+
for (const monomer of currentCell[pos]) {
|
|
37
|
+
const substitutionsMap = substInfo.get(monomer)?.get(pos) as Map<number, type.UTypedArray> | undefined;
|
|
38
38
|
if (typeof substitutionsMap === 'undefined')
|
|
39
39
|
continue;
|
|
40
40
|
|
package/src/widgets/peptides.ts
CHANGED
|
@@ -20,11 +20,12 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
|
|
|
20
20
|
const logoHost = ui.div();
|
|
21
21
|
let seqColInput: DG.InputBase | null = null;
|
|
22
22
|
if (typeof col === 'undefined') {
|
|
23
|
-
const sequenceColumns = df.columns.toList()
|
|
24
|
-
.filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE && dfCol.stats.missingValueCount === 0);
|
|
23
|
+
const sequenceColumns = df.columns.toList().filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE);
|
|
25
24
|
const potentialCol = DG.Utils.firstOrNull(sequenceColumns);
|
|
26
25
|
if (potentialCol === null)
|
|
27
26
|
throw new Error('Peptides Error: table doesn\'t contain sequence columns');
|
|
27
|
+
else if (potentialCol.stats.missingValueCount !== 0)
|
|
28
|
+
grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
|
|
28
29
|
|
|
29
30
|
seqColInput = ui.columnInput('Sequence', df, potentialCol, () => {
|
|
30
31
|
const seqCol = seqColInput!.value;
|
|
@@ -37,8 +38,9 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
|
|
|
37
38
|
viewer.root.style.setProperty('height', '130px');
|
|
38
39
|
return viewer.root;
|
|
39
40
|
}));
|
|
40
|
-
|
|
41
|
-
|
|
41
|
+
if (seqCol.stats.missingValueCount !== 0)
|
|
42
|
+
grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
|
|
43
|
+
}, {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
|
|
42
44
|
seqColInput.setTooltip('Macromolecule column in FASTA, HELM or separated format');
|
|
43
45
|
} else if (!(col.getTag(bioTAGS.aligned) === ALIGNMENT.SEQ_MSA) &&
|
|
44
46
|
col.getTag(DG.TAGS.UNITS) !== NOTATION.HELM) {
|
|
@@ -89,21 +91,22 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
|
|
|
89
91
|
activityScalingMethod.setTooltip('Activity column transformation method');
|
|
90
92
|
|
|
91
93
|
const activityScalingMethodState = (): void => {
|
|
92
|
-
activityScalingMethod.enabled = (activityColumnChoice.value ?? false) &&
|
|
94
|
+
activityScalingMethod.enabled = (activityColumnChoice.value ?? false) && activityColumnChoice.value!.stats.min > 0;
|
|
93
95
|
activityScalingMethod.value = C.SCALING_METHODS.NONE;
|
|
96
|
+
if (activityColumnChoice.value!.stats.missingValueCount !== 0)
|
|
97
|
+
grok.shell.info('Activity column contains missing values. They will be ignored during analysis');
|
|
94
98
|
};
|
|
95
99
|
//TODO: add when new version of datagrok-api is available
|
|
96
100
|
const activityColumnChoice = ui.columnInput('Activity', df, defaultActivityColumn, activityScalingMethodState,
|
|
97
|
-
{filter: (col: DG.Column) =>
|
|
101
|
+
{filter: (col: DG.Column) => col.type === DG.TYPE.INT || col.type === DG.TYPE.FLOAT});
|
|
98
102
|
activityColumnChoice.setTooltip('Numerical activity column');
|
|
99
|
-
const clustersColumnChoice = ui.columnInput('Clusters', df, null, null
|
|
103
|
+
const clustersColumnChoice = ui.columnInput('Clusters', df, null, null);
|
|
100
104
|
clustersColumnChoice.setTooltip('Optional. Clusters column is used to create Logo Summary Table');
|
|
101
105
|
clustersColumnChoice.nullable = true;
|
|
102
106
|
activityColumnChoice.fireChanged();
|
|
103
107
|
activityScalingMethod.fireChanged();
|
|
104
108
|
|
|
105
|
-
const targetColumnChoice = ui.columnInput('Target', df, null, null,
|
|
106
|
-
{filter: (col: DG.Column) => col.type === DG.TYPE.STRING && col.stats.missingValueCount === 0});
|
|
109
|
+
const targetColumnChoice = ui.columnInput('Target', df, null, null, {filter: (col: DG.Column) => col.type === DG.TYPE.STRING});
|
|
107
110
|
targetColumnChoice.setTooltip('Optional. Target represents a unique binding construct for every peptide in the data. ' +
|
|
108
111
|
'Target can be used to split mutation cliff analysis for peptides specific to a certain set of targets');
|
|
109
112
|
targetColumnChoice.nullable = true;
|
|
@@ -199,10 +202,14 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
|
|
|
199
202
|
newDf.setTag(C.TAGS.UUID, dfUuid);
|
|
200
203
|
newDf.setTag('monomerType', monomerType);
|
|
201
204
|
|
|
205
|
+
const bitset = DG.BitSet.create(currentDf.rowCount,
|
|
206
|
+
(i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && currentDf.filter.get(i));
|
|
207
|
+
|
|
202
208
|
// Cloning dataframe with applied filter. If filter is not applied, cloning is
|
|
203
209
|
// needed anyway to allow filtering on the original dataframe
|
|
204
|
-
model = PeptidesModel.getInstance(newDf.clone(
|
|
205
|
-
if (clustersColumn)
|
|
210
|
+
model = PeptidesModel.getInstance(newDf.clone(bitset));
|
|
211
|
+
if (clustersColumn)
|
|
212
|
+
await model.addLogoSummaryTable();
|
|
206
213
|
await model.addMonomerPosition();
|
|
207
214
|
await model.addMostPotentResidues();
|
|
208
215
|
} else
|