@datagrok/peptides 1.12.0 → 1.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,44 +14,47 @@ const allConst = 'All';
14
14
  const otherConst = 'Other';
15
15
 
16
16
  export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel): DG.Widget {
17
+ if (!table.selection.anyTrue)
18
+ return new DG.Widget(ui.divText('No distribution'));
19
+
17
20
  const activityCol = table.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
18
21
  const activityColData = activityCol.getRawData();
19
22
  const rowCount = activityCol.length;
20
- const selectionObject = model.mutationCliffsSelection;
23
+ const selectionObject = model.invariantMapSelection;
21
24
  const clustersColName = model.settings.clustersColumnName;
22
25
  let clustersProcessedObject: string[] = [];
23
26
  if (clustersColName)
24
- clustersProcessedObject = model.clusterSelection;
27
+ clustersProcessedObject = Object.values(model.clusterSelection).flat();
25
28
 
26
29
  const positions = Object.keys(selectionObject);
27
- let aarStr = allConst;
30
+ let monomerStr = allConst;
28
31
  let otherStr = '';
29
32
 
30
33
  const updateDistributionHost = (): void => {
31
34
  model.splitByPos = splitByPosition.value!;
32
- model.splitByAAR = splitByAAR.value!;
35
+ model.splitByMonomer = splitByMonomer.value!;
33
36
  const res: HTMLDivElement[] = [];
34
- if (splitByPosition.value && splitByAAR.value) {
37
+ if (splitByPosition.value && splitByMonomer.value) {
35
38
  otherStr = otherConst;
36
39
  for (const position of positions) {
37
- const aarList = selectionObject[position];
38
- if (aarList.length === 0)
40
+ const monomerList = selectionObject[position];
41
+ if (monomerList.length === 0)
39
42
  continue;
40
43
 
41
44
  const posCol = table.getCol(position);
42
45
  const posColCategories = posCol.categories;
43
46
  const posColData = posCol.getRawData();
44
47
 
45
- for (const aar of aarList) {
46
- const labels = getDistributionLegend(`${position} : ${aar}`, otherStr);
48
+ for (const monomer of monomerList) {
49
+ const labels = getDistributionLegend(`${position} : ${monomer}`, otherStr);
47
50
 
48
- const aarCategoryIndex = posColCategories.indexOf(aar);
49
- const mask = DG.BitSet.create(rowCount, (i) => posColData[i] === aarCategoryIndex);
51
+ const monomerCategoryIndex = posColCategories.indexOf(monomer);
52
+ const mask = DG.BitSet.create(rowCount, (i) => posColData[i] === monomerCategoryIndex);
50
53
  const distributionTable = DG.DataFrame.fromColumns(
51
54
  [activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask)]);
52
55
  const hist = getActivityDistribution(prepareTableForHistogram(distributionTable));
53
56
 
54
- const stats = model.monomerPositionStats[position][aar];
57
+ const stats = model.monomerPositionStats[position]![monomer]!;
55
58
  const tableMap = getStatsTableMap(stats);
56
59
 
57
60
  const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
@@ -66,18 +69,18 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
66
69
  } else if (splitByPosition.value) {
67
70
  otherStr = otherConst;
68
71
  for (const position of positions) {
69
- const aarList = selectionObject[position];
70
- if (aarList.length === 0)
72
+ const monomerList = selectionObject[position];
73
+ if (monomerList.length === 0)
71
74
  continue;
72
75
 
73
- aarStr = `${position}: {${aarList.join(', ')}}`;
74
- const labels = getDistributionLegend(aarStr, otherStr);
76
+ monomerStr = `${position}: {${monomerList.join(', ')}}`;
77
+ const labels = getDistributionLegend(monomerStr, otherStr);
75
78
 
76
79
  const posCol = table.getCol(position);
77
80
  const posColCategories = posCol.categories;
78
81
  const posColData = posCol.getRawData();
79
- const aarIndexesList = aarList.map((aar) => posColCategories.indexOf(aar));
80
- const mask = DG.BitSet.create(rowCount, (i) => aarIndexesList.includes(posColData[i]));
82
+ const monomerIndexesList = monomerList.map((monomer) => posColCategories.indexOf(monomer));
83
+ const mask = DG.BitSet.create(rowCount, (i) => monomerIndexesList.includes(posColData[i]));
81
84
  const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
82
85
 
83
86
  const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
@@ -95,34 +98,34 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
95
98
 
96
99
  res.push(distributionRoot);
97
100
  }
98
- } else if (splitByAAR.value) {
99
- const reversedSelectionObject: {[aar: string]: string[]} = {};
100
- const aars = [];
101
+ } else if (splitByMonomer.value) {
102
+ const reversedSelectionObject: {[monomer: string]: string[]} = {};
103
+ const monomers = [];
101
104
  for (const position of positions) {
102
- for (const aar of selectionObject[position]) {
103
- if (!reversedSelectionObject.hasOwnProperty(aar)) {
104
- reversedSelectionObject[aar] = [position];
105
- aars.push(aar);
105
+ for (const monomer of selectionObject[position]) {
106
+ if (!reversedSelectionObject.hasOwnProperty(monomer)) {
107
+ reversedSelectionObject[monomer] = [position];
108
+ monomers.push(monomer);
106
109
  continue;
107
110
  }
108
- if (!reversedSelectionObject[aar].includes(position))
109
- reversedSelectionObject[aar].push(position);
111
+ if (!reversedSelectionObject[monomer].includes(position))
112
+ reversedSelectionObject[monomer].push(position);
110
113
  }
111
114
  }
112
115
 
113
116
  otherStr = otherConst;
114
- for (const aar of aars) {
115
- const posList = reversedSelectionObject[aar];
117
+ for (const monomer of monomers) {
118
+ const posList = reversedSelectionObject[monomer];
116
119
  const posColList = posList.map((pos) => table.getCol(pos));
117
120
  const posColCategoriesList = posColList.map((posCol) => posCol.categories);
118
121
  const posColDataList = posColList.map((posCol) => posCol.getRawData());
119
- const aarCategoryIndexList = posColCategoriesList.map((posColCategories) => posColCategories.indexOf(aar));
122
+ const monomerCategoryIndexList = posColCategoriesList.map((posColCategories) => posColCategories.indexOf(monomer));
120
123
 
121
- aarStr = `${aar}: {${posList.join(', ')}}`;
122
- const labels = getDistributionLegend(aarStr, otherStr);
124
+ monomerStr = `${monomer}: {${posList.join(', ')}}`;
125
+ const labels = getDistributionLegend(monomerStr, otherStr);
123
126
 
124
127
  const mask = DG.BitSet.create(rowCount,
125
- (i) => posColDataList.some((posColData, j) => posColData[i] === aarCategoryIndexList[j]));
128
+ (i) => posColDataList.some((posColData, j) => posColData[i] === monomerCategoryIndexList[j]));
126
129
  const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
127
130
 
128
131
  const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
@@ -140,36 +143,33 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
140
143
  res.push(distributionRoot);
141
144
  }
142
145
  } else {
143
- const splitCol = table.col(C.COLUMNS_NAMES.SPLIT_COL);
144
- if (!splitCol)
146
+ if (!table.selection.anyTrue)
145
147
  res.push(ui.divText('No distribution'));
146
148
  else {
147
149
  otherStr = '';
148
150
  if (Object.values(selectionObject).some((selectedAar) => selectedAar.length !== 0) ||
149
151
  clustersProcessedObject.length !== 0) {
150
- aarStr = '';
152
+ monomerStr = '';
151
153
  for (const position of positions) {
152
- const aarList = selectionObject[position];
153
- if (aarList.length !== 0)
154
- aarStr += `${position}: {${aarList.join(', ')}}; `;
154
+ const monomerList = selectionObject[position];
155
+ if (monomerList.length !== 0)
156
+ monomerStr += `${position}: {${monomerList.join(', ')}}; `;
155
157
  }
156
158
  if (clustersProcessedObject.length !== 0)
157
- aarStr += `Clusters: ${clustersProcessedObject.join(', ')}`;
159
+ monomerStr += `Clusters: ${clustersProcessedObject.join(', ')}`;
158
160
  otherStr = otherConst;
159
161
  }
160
- const labels = getDistributionLegend(aarStr, otherStr);
161
-
162
- const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
162
+ const labels = getDistributionLegend(monomerStr, otherStr);
163
163
 
164
+ const distributionTable = DG.DataFrame.fromColumns([activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, table.selection)]);
164
165
  const hist = getActivityDistribution(prepareTableForHistogram(distributionTable));
165
-
166
- const bitArray = BitArray.fromUint32Array(rowCount, splitCol.getRawData() as Uint32Array);
167
- const mask = DG.BitSet.create(rowCount, (i) => bitArray.getBit(i));
166
+ const bitArray = BitArray.fromString(table.selection.toBinaryString());
167
+ const mask = DG.BitSet.create(rowCount,
168
+ bitArray.allFalse ? (_): boolean => true : (i): boolean => bitArray.getBit(i));
168
169
  const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
169
-
170
- const stats = getStats(activityColData, bitArray);
170
+ const stats = bitArray.allFalse ? {count: rowCount, pValue: null, meanDifference: 0, ratio: 1, mask: bitArray} :
171
+ getStats(activityColData, bitArray);
171
172
  const tableMap = getStatsTableMap(stats);
172
-
173
173
  const resultMap: {[key: string]: any} = {...tableMap, ...aggregatedColMap};
174
174
  const distributionRoot = getStatsSummary(labels, hist, resultMap);
175
175
  $(distributionRoot).addClass('d4-flex-col');
@@ -187,10 +187,10 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
187
187
  };
188
188
 
189
189
  let defaultValuePos = model.splitByPos;
190
- let defaultValueAAR = model.splitByAAR;
190
+ let defaultValueMonomer = model.splitByMonomer;
191
191
  if (!model.isClusterSelectionEmpty && model.isMonomerPositionSelectionEmpty) {
192
192
  defaultValuePos = false;
193
- defaultValueAAR = false;
193
+ defaultValueMonomer = false;
194
194
  }
195
195
 
196
196
  const splitByPosition = ui.boolInput('', defaultValuePos, updateDistributionHost);
@@ -198,14 +198,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
198
198
  splitByPosition.setTooltip('Constructs distribution for each position separately');
199
199
  setDefaultProperties(splitByPosition);
200
200
  $(splitByPosition.root).css('margin-right', '10px');
201
- const splitByAAR = ui.boolInput('', defaultValueAAR, updateDistributionHost);
202
- splitByAAR.addPostfix('Split by monomer');
203
- splitByAAR.setTooltip('Constructs distribution for each monomer separately');
204
- setDefaultProperties(splitByAAR);
201
+ const splitByMonomer = ui.boolInput('', defaultValueMonomer, updateDistributionHost);
202
+ splitByMonomer.addPostfix('Split by monomer');
203
+ splitByMonomer.setTooltip('Constructs distribution for each monomer separately');
204
+ setDefaultProperties(splitByMonomer);
205
205
 
206
- const controlsHost = ui.divH([splitByPosition.root, splitByAAR.root]);
206
+ const controlsHost = ui.divH([splitByPosition.root, splitByMonomer.root]);
207
207
  const distributionHost = ui.div([], 'd4-flex-wrap');
208
- splitByAAR.fireChanged();
208
+ splitByMonomer.fireChanged();
209
209
 
210
210
  return new DG.Widget(ui.divV([controlsHost, distributionHost]));
211
211
  }
@@ -229,11 +229,12 @@ export function getActivityDistribution(table: DG.DataFrame, isTooltip: boolean
229
229
 
230
230
  export function getStatsTableMap(stats: Stats, options: {fractionDigits?: number} = {}): StringDictionary {
231
231
  options.fractionDigits ??= 3;
232
- const tableMap = {
232
+ const tableMap: StringDictionary = {
233
233
  'Count': `${stats.count} (${stats.ratio.toFixed(options.fractionDigits)}%)`,
234
- 'p-value': stats.pValue < 0.01 ? '<0.01' : stats.pValue.toFixed(options.fractionDigits),
235
234
  'Mean difference': stats.meanDifference.toFixed(options.fractionDigits),
236
235
  };
236
+ if (stats.pValue !== null)
237
+ tableMap['p-value'] = stats.pValue < 0.01 ? '<0.01' : stats.pValue.toFixed(options.fractionDigits);
237
238
  return tableMap;
238
239
  }
239
240
 
@@ -33,8 +33,8 @@ export function mutationCliffsWidget(table: DG.DataFrame, model: PeptidesModel):
33
33
  const posColCategories = posCol.categories;
34
34
  const posColData = posCol.getRawData();
35
35
 
36
- for (const aar of currentCell[pos]) {
37
- const substitutionsMap = substInfo.get(aar)?.get(pos) as Map<number, type.UTypedArray> | undefined;
36
+ for (const monomer of currentCell[pos]) {
37
+ const substitutionsMap = substInfo.get(monomer)?.get(pos) as Map<number, type.UTypedArray> | undefined;
38
38
  if (typeof substitutionsMap === 'undefined')
39
39
  continue;
40
40
 
@@ -20,11 +20,12 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
20
20
  const logoHost = ui.div();
21
21
  let seqColInput: DG.InputBase | null = null;
22
22
  if (typeof col === 'undefined') {
23
- const sequenceColumns = df.columns.toList()
24
- .filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE && dfCol.stats.missingValueCount === 0);
23
+ const sequenceColumns = df.columns.toList().filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE);
25
24
  const potentialCol = DG.Utils.firstOrNull(sequenceColumns);
26
25
  if (potentialCol === null)
27
26
  throw new Error('Peptides Error: table doesn\'t contain sequence columns');
27
+ else if (potentialCol.stats.missingValueCount !== 0)
28
+ grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
28
29
 
29
30
  seqColInput = ui.columnInput('Sequence', df, potentialCol, () => {
30
31
  const seqCol = seqColInput!.value;
@@ -37,8 +38,9 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
37
38
  viewer.root.style.setProperty('height', '130px');
38
39
  return viewer.root;
39
40
  }));
40
- //TODO: add when new version of datagrok-api is available
41
- }, {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE && col.stats.missingValueCount === 0});
41
+ if (seqCol.stats.missingValueCount !== 0)
42
+ grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
43
+ }, {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
42
44
  seqColInput.setTooltip('Macromolecule column in FASTA, HELM or separated format');
43
45
  } else if (!(col.getTag(bioTAGS.aligned) === ALIGNMENT.SEQ_MSA) &&
44
46
  col.getTag(DG.TAGS.UNITS) !== NOTATION.HELM) {
@@ -89,21 +91,22 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
89
91
  activityScalingMethod.setTooltip('Activity column transformation method');
90
92
 
91
93
  const activityScalingMethodState = (): void => {
92
- activityScalingMethod.enabled = (activityColumnChoice.value ?? false) && DG.Stats.fromColumn(activityColumnChoice.value!).min > 0;
94
+ activityScalingMethod.enabled = (activityColumnChoice.value ?? false) && activityColumnChoice.value!.stats.min > 0;
93
95
  activityScalingMethod.value = C.SCALING_METHODS.NONE;
96
+ if (activityColumnChoice.value!.stats.missingValueCount !== 0)
97
+ grok.shell.info('Activity column contains missing values. They will be ignored during analysis');
94
98
  };
95
99
  //TODO: add when new version of datagrok-api is available
96
100
  const activityColumnChoice = ui.columnInput('Activity', df, defaultActivityColumn, activityScalingMethodState,
97
- {filter: (col: DG.Column) => (col.type === DG.TYPE.INT || col.type === DG.TYPE.FLOAT) && col.stats.missingValueCount === 0});
101
+ {filter: (col: DG.Column) => col.type === DG.TYPE.INT || col.type === DG.TYPE.FLOAT});
98
102
  activityColumnChoice.setTooltip('Numerical activity column');
99
- const clustersColumnChoice = ui.columnInput('Clusters', df, null, null, {filter: (col: DG.Column) => col.stats.missingValueCount === 0});
103
+ const clustersColumnChoice = ui.columnInput('Clusters', df, null, null);
100
104
  clustersColumnChoice.setTooltip('Optional. Clusters column is used to create Logo Summary Table');
101
105
  clustersColumnChoice.nullable = true;
102
106
  activityColumnChoice.fireChanged();
103
107
  activityScalingMethod.fireChanged();
104
108
 
105
- const targetColumnChoice = ui.columnInput('Target', df, null, null,
106
- {filter: (col: DG.Column) => col.type === DG.TYPE.STRING && col.stats.missingValueCount === 0});
109
+ const targetColumnChoice = ui.columnInput('Target', df, null, null, {filter: (col: DG.Column) => col.type === DG.TYPE.STRING});
107
110
  targetColumnChoice.setTooltip('Optional. Target represents a unique binding construct for every peptide in the data. ' +
108
111
  'Target can be used to split mutation cliff analysis for peptides specific to a certain set of targets');
109
112
  targetColumnChoice.nullable = true;
@@ -199,10 +202,14 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
199
202
  newDf.setTag(C.TAGS.UUID, dfUuid);
200
203
  newDf.setTag('monomerType', monomerType);
201
204
 
205
+ const bitset = DG.BitSet.create(currentDf.rowCount,
206
+ (i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && currentDf.filter.get(i));
207
+
202
208
  // Cloning dataframe with applied filter. If filter is not applied, cloning is
203
209
  // needed anyway to allow filtering on the original dataframe
204
- model = PeptidesModel.getInstance(newDf.clone(currentDf.filter));
205
- if (clustersColumn) await model.addLogoSummaryTable();
210
+ model = PeptidesModel.getInstance(newDf.clone(bitset));
211
+ if (clustersColumn)
212
+ await model.addLogoSummaryTable();
206
213
  await model.addMonomerPosition();
207
214
  await model.addMostPotentResidues();
208
215
  } else