npm - @datagrok/peptides - Versions diffs - 1.12.0 → 1.13.1 - Mend

@datagrok/peptides 1.12.0 → 1.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +22 -0
package/dist/535.js +2 -2
package/dist/package-test.js +2 -2
package/dist/package.js +2 -2
package/package.json +7 -7
package/src/demo/fasta.ts +6 -25
package/src/model.ts +275 -296
package/src/package.ts +1 -1
package/src/tests/core.ts +2 -10
package/src/tests/table-view.ts +48 -48
package/src/tests/viewers.ts +15 -13
package/src/tests/widgets.ts +3 -4
package/src/utils/cell-renderer.ts +33 -39
package/src/utils/constants.ts +1 -0
package/src/utils/misc.ts +2 -5
package/src/utils/statistics.ts +22 -3
package/src/utils/types.ts +6 -5
package/src/viewers/logo-summary.ts +55 -42
package/src/viewers/sar-viewer.ts +167 -107
package/src/widgets/distribution.ts +60 -59
package/src/widgets/mutation-cliffs.ts +2 -2
package/src/widgets/peptides.ts +18 -11

package/src/widgets/distribution.ts CHANGED Viewed

@@ -14,44 +14,47 @@ const allConst = 'All';
 const otherConst = 'Other';
 export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel): DG.Widget {
+  if (!table.selection.anyTrue)
+    return new DG.Widget(ui.divText('No distribution'));
   const activityCol = table.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
   const activityColData = activityCol.getRawData();
   const rowCount = activityCol.length;
-  const selectionObject = model.mutationCliffsSelection;
+  const selectionObject = model.invariantMapSelection;
   const clustersColName = model.settings.clustersColumnName;
   let clustersProcessedObject: string[] = [];
   if (clustersColName)
-    clustersProcessedObject = model.clusterSelection;
+    clustersProcessedObject = Object.values(model.clusterSelection).flat();
   const positions = Object.keys(selectionObject);
-  let aarStr = allConst;
+  let monomerStr = allConst;
   let otherStr = '';
   const updateDistributionHost = (): void => {
     model.splitByPos = splitByPosition.value!;
-    model.splitByAAR = splitByAAR.value!;
+    model.splitByMonomer = splitByMonomer.value!;
     const res: HTMLDivElement[] = [];
-    if (splitByPosition.value && splitByAAR.value) {
+    if (splitByPosition.value && splitByMonomer.value) {
       otherStr = otherConst;
       for (const position of positions) {
-        const aarList = selectionObject[position];
-        if (aarList.length === 0)
+        const monomerList = selectionObject[position];
+        if (monomerList.length === 0)
           continue;
         const posCol = table.getCol(position);
         const posColCategories = posCol.categories;
         const posColData = posCol.getRawData();
-        for (const aar of aarList) {
-          const labels = getDistributionLegend(`${position} : ${aar}`, otherStr);
+        for (const monomer of monomerList) {
+          const labels = getDistributionLegend(`${position} : ${monomer}`, otherStr);
-          const aarCategoryIndex = posColCategories.indexOf(aar);
-          const mask = DG.BitSet.create(rowCount, (i) => posColData[i] === aarCategoryIndex);
+          const monomerCategoryIndex = posColCategories.indexOf(monomer);
+          const mask = DG.BitSet.create(rowCount, (i) => posColData[i] === monomerCategoryIndex);
           const distributionTable = DG.DataFrame.fromColumns(
             [activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask)]);
           const hist = getActivityDistribution(prepareTableForHistogram(distributionTable));
-          const stats = model.monomerPositionStats[position][aar];
+          const stats = model.monomerPositionStats[position]![monomer]!;
           const tableMap = getStatsTableMap(stats);
           const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
@@ -66,18 +69,18 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
     } else if (splitByPosition.value) {
       otherStr = otherConst;
       for (const position of positions) {
-        const aarList = selectionObject[position];
-        if (aarList.length === 0)
+        const monomerList = selectionObject[position];
+        if (monomerList.length === 0)
           continue;
-        aarStr = `${position}: {${aarList.join(', ')}}`;
-        const labels = getDistributionLegend(aarStr, otherStr);
+        monomerStr = `${position}: {${monomerList.join(', ')}}`;
+        const labels = getDistributionLegend(monomerStr, otherStr);
         const posCol = table.getCol(position);
         const posColCategories = posCol.categories;
         const posColData = posCol.getRawData();
-        const aarIndexesList = aarList.map((aar) => posColCategories.indexOf(aar));
-        const mask = DG.BitSet.create(rowCount, (i) => aarIndexesList.includes(posColData[i]));
+        const monomerIndexesList = monomerList.map((monomer) => posColCategories.indexOf(monomer));
+        const mask = DG.BitSet.create(rowCount, (i) => monomerIndexesList.includes(posColData[i]));
         const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
         const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
@@ -95,34 +98,34 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
         res.push(distributionRoot);
       }
-    } else if (splitByAAR.value) {
-      const reversedSelectionObject: {[aar: string]: string[]} = {};
-      const aars = [];
+    } else if (splitByMonomer.value) {
+      const reversedSelectionObject: {[monomer: string]: string[]} = {};
+      const monomers = [];
       for (const position of positions) {
-        for (const aar of selectionObject[position]) {
-          if (!reversedSelectionObject.hasOwnProperty(aar)) {
-            reversedSelectionObject[aar] = [position];
-            aars.push(aar);
+        for (const monomer of selectionObject[position]) {
+          if (!reversedSelectionObject.hasOwnProperty(monomer)) {
+            reversedSelectionObject[monomer] = [position];
+            monomers.push(monomer);
             continue;
           }
-          if (!reversedSelectionObject[aar].includes(position))
-            reversedSelectionObject[aar].push(position);
+          if (!reversedSelectionObject[monomer].includes(position))
+            reversedSelectionObject[monomer].push(position);
         }
       }
       otherStr = otherConst;
-      for (const aar of aars) {
-        const posList = reversedSelectionObject[aar];
+      for (const monomer of monomers) {
+        const posList = reversedSelectionObject[monomer];
         const posColList = posList.map((pos) => table.getCol(pos));
         const posColCategoriesList = posColList.map((posCol) => posCol.categories);
         const posColDataList = posColList.map((posCol) => posCol.getRawData());
-        const aarCategoryIndexList = posColCategoriesList.map((posColCategories) => posColCategories.indexOf(aar));
+        const monomerCategoryIndexList = posColCategoriesList.map((posColCategories) => posColCategories.indexOf(monomer));
-        aarStr = `${aar}: {${posList.join(', ')}}`;
-        const labels = getDistributionLegend(aarStr, otherStr);
+        monomerStr = `${monomer}: {${posList.join(', ')}}`;
+        const labels = getDistributionLegend(monomerStr, otherStr);
         const mask = DG.BitSet.create(rowCount,
-          (i) => posColDataList.some((posColData, j) => posColData[i] === aarCategoryIndexList[j]));
+          (i) => posColDataList.some((posColData, j) => posColData[i] === monomerCategoryIndexList[j]));
         const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
         const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
@@ -140,36 +143,33 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
         res.push(distributionRoot);
       }
     } else {
-      const splitCol = table.col(C.COLUMNS_NAMES.SPLIT_COL);
-      if (!splitCol)
+      if (!table.selection.anyTrue)
         res.push(ui.divText('No distribution'));
       else {
         otherStr = '';
         if (Object.values(selectionObject).some((selectedAar) => selectedAar.length !== 0) ||
           clustersProcessedObject.length !== 0) {
-          aarStr = '';
+          monomerStr = '';
           for (const position of positions) {
-            const aarList = selectionObject[position];
-            if (aarList.length !== 0)
-              aarStr += `${position}: {${aarList.join(', ')}}; `;
+            const monomerList = selectionObject[position];
+            if (monomerList.length !== 0)
+              monomerStr += `${position}: {${monomerList.join(', ')}}; `;
           }
           if (clustersProcessedObject.length !== 0)
-            aarStr += `Clusters: ${clustersProcessedObject.join(', ')}`;
+            monomerStr += `Clusters: ${clustersProcessedObject.join(', ')}`;
           otherStr = otherConst;
         }
-        const labels = getDistributionLegend(aarStr, otherStr);
-        const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
+        const labels = getDistributionLegend(monomerStr, otherStr);
+        const distributionTable = DG.DataFrame.fromColumns([activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, table.selection)]);
         const hist = getActivityDistribution(prepareTableForHistogram(distributionTable));
-        const bitArray = BitArray.fromUint32Array(rowCount, splitCol.getRawData() as Uint32Array);
-        const mask = DG.BitSet.create(rowCount, (i) => bitArray.getBit(i));
+        const bitArray = BitArray.fromString(table.selection.toBinaryString());
+        const mask = DG.BitSet.create(rowCount,
+          bitArray.allFalse ? (_): boolean => true : (i): boolean => bitArray.getBit(i));
         const aggregatedColMap = model.getAggregatedColumnValues({filterDf: true, mask});
-        const stats = getStats(activityColData, bitArray);
+        const stats = bitArray.allFalse ? {count: rowCount, pValue: null, meanDifference: 0, ratio: 1, mask: bitArray} :
+          getStats(activityColData, bitArray);
         const tableMap = getStatsTableMap(stats);
         const resultMap: {[key: string]: any} = {...tableMap, ...aggregatedColMap};
         const distributionRoot = getStatsSummary(labels, hist, resultMap);
         $(distributionRoot).addClass('d4-flex-col');
@@ -187,10 +187,10 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
   };
   let defaultValuePos = model.splitByPos;
-  let defaultValueAAR = model.splitByAAR;
+  let defaultValueMonomer = model.splitByMonomer;
   if (!model.isClusterSelectionEmpty && model.isMonomerPositionSelectionEmpty) {
     defaultValuePos = false;
-    defaultValueAAR = false;
+    defaultValueMonomer = false;
   }
   const splitByPosition = ui.boolInput('', defaultValuePos, updateDistributionHost);
@@ -198,14 +198,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
   splitByPosition.setTooltip('Constructs distribution for each position separately');
   setDefaultProperties(splitByPosition);
   $(splitByPosition.root).css('margin-right', '10px');
-  const splitByAAR = ui.boolInput('', defaultValueAAR, updateDistributionHost);
-  splitByAAR.addPostfix('Split by monomer');
-  splitByAAR.setTooltip('Constructs distribution for each monomer separately');
-  setDefaultProperties(splitByAAR);
+  const splitByMonomer = ui.boolInput('', defaultValueMonomer, updateDistributionHost);
+  splitByMonomer.addPostfix('Split by monomer');
+  splitByMonomer.setTooltip('Constructs distribution for each monomer separately');
+  setDefaultProperties(splitByMonomer);
-  const controlsHost = ui.divH([splitByPosition.root, splitByAAR.root]);
+  const controlsHost = ui.divH([splitByPosition.root, splitByMonomer.root]);
   const distributionHost = ui.div([], 'd4-flex-wrap');
-  splitByAAR.fireChanged();
+  splitByMonomer.fireChanged();
   return new DG.Widget(ui.divV([controlsHost, distributionHost]));
 }
@@ -229,11 +229,12 @@ export function getActivityDistribution(table: DG.DataFrame, isTooltip: boolean
 export function getStatsTableMap(stats: Stats, options: {fractionDigits?: number} = {}): StringDictionary {
   options.fractionDigits ??= 3;
-  const tableMap = {
+  const tableMap: StringDictionary = {
     'Count': `${stats.count} (${stats.ratio.toFixed(options.fractionDigits)}%)`,
-    'p-value': stats.pValue < 0.01 ? '<0.01' : stats.pValue.toFixed(options.fractionDigits),
     'Mean difference': stats.meanDifference.toFixed(options.fractionDigits),
   };
+  if (stats.pValue !== null)
+    tableMap['p-value'] = stats.pValue < 0.01 ? '<0.01' : stats.pValue.toFixed(options.fractionDigits);
   return tableMap;
 }

package/src/widgets/mutation-cliffs.ts CHANGED Viewed

@@ -33,8 +33,8 @@ export function mutationCliffsWidget(table: DG.DataFrame, model: PeptidesModel):
     const posColCategories = posCol.categories;
     const posColData = posCol.getRawData();
-    for (const aar of currentCell[pos]) {
-      const substitutionsMap = substInfo.get(aar)?.get(pos) as Map<number, type.UTypedArray> | undefined;
+    for (const monomer of currentCell[pos]) {
+      const substitutionsMap = substInfo.get(monomer)?.get(pos) as Map<number, type.UTypedArray> | undefined;
       if (typeof substitutionsMap === 'undefined')
         continue;

package/src/widgets/peptides.ts CHANGED Viewed

@@ -20,11 +20,12 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
   const logoHost = ui.div();
   let seqColInput: DG.InputBase | null = null;
   if (typeof col === 'undefined') {
-    const sequenceColumns = df.columns.toList()
-      .filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE && dfCol.stats.missingValueCount === 0);
+    const sequenceColumns = df.columns.toList().filter((dfCol) => dfCol.semType === DG.SEMTYPE.MACROMOLECULE);
     const potentialCol = DG.Utils.firstOrNull(sequenceColumns);
     if (potentialCol === null)
       throw new Error('Peptides Error: table doesn\'t contain sequence columns');
+    else if (potentialCol.stats.missingValueCount !== 0)
+      grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
     seqColInput = ui.columnInput('Sequence', df, potentialCol, () => {
       const seqCol = seqColInput!.value;
@@ -37,8 +38,9 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
         viewer.root.style.setProperty('height', '130px');
         return viewer.root;
       }));
-      //TODO: add when new version of datagrok-api is available
-    }, {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE && col.stats.missingValueCount === 0});
+      if (seqCol.stats.missingValueCount !== 0)
+        grok.shell.info('Sequences column contains missing values. They will be ignored during analysis');
+    }, {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
     seqColInput.setTooltip('Macromolecule column in FASTA, HELM or separated format');
   } else if (!(col.getTag(bioTAGS.aligned) === ALIGNMENT.SEQ_MSA) &&
     col.getTag(DG.TAGS.UNITS) !== NOTATION.HELM) {
@@ -89,21 +91,22 @@ export function analyzePeptidesUI(df: DG.DataFrame, col?: DG.Column<string>): {h
   activityScalingMethod.setTooltip('Activity column transformation method');
   const activityScalingMethodState = (): void => {
-    activityScalingMethod.enabled = (activityColumnChoice.value ?? false) && DG.Stats.fromColumn(activityColumnChoice.value!).min > 0;
+    activityScalingMethod.enabled = (activityColumnChoice.value ?? false) && activityColumnChoice.value!.stats.min > 0;
     activityScalingMethod.value = C.SCALING_METHODS.NONE;
+    if (activityColumnChoice.value!.stats.missingValueCount !== 0)
+      grok.shell.info('Activity column contains missing values. They will be ignored during analysis');
   };
   //TODO: add when new version of datagrok-api is available
   const activityColumnChoice = ui.columnInput('Activity', df, defaultActivityColumn, activityScalingMethodState,
-    {filter: (col: DG.Column) => (col.type === DG.TYPE.INT || col.type === DG.TYPE.FLOAT) && col.stats.missingValueCount === 0});
+    {filter: (col: DG.Column) => col.type === DG.TYPE.INT || col.type === DG.TYPE.FLOAT});
   activityColumnChoice.setTooltip('Numerical activity column');
-  const clustersColumnChoice = ui.columnInput('Clusters', df, null, null, {filter: (col: DG.Column) => col.stats.missingValueCount === 0});
+  const clustersColumnChoice = ui.columnInput('Clusters', df, null, null);
   clustersColumnChoice.setTooltip('Optional. Clusters column is used to create Logo Summary Table');
   clustersColumnChoice.nullable = true;
   activityColumnChoice.fireChanged();
   activityScalingMethod.fireChanged();
-  const targetColumnChoice = ui.columnInput('Target', df, null, null,
-    {filter: (col: DG.Column) => col.type === DG.TYPE.STRING && col.stats.missingValueCount === 0});
+  const targetColumnChoice = ui.columnInput('Target', df, null, null, {filter: (col: DG.Column) => col.type === DG.TYPE.STRING});
   targetColumnChoice.setTooltip('Optional. Target represents a unique binding construct for every peptide in the data. ' +
     'Target can be used to split mutation cliff analysis for peptides specific to a certain set of targets');
   targetColumnChoice.nullable = true;
@@ -199,10 +202,14 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
     newDf.setTag(C.TAGS.UUID, dfUuid);
     newDf.setTag('monomerType', monomerType);
+    const bitset = DG.BitSet.create(currentDf.rowCount,
+      (i) => !activityColumn.isNone(i) && !peptidesCol.isNone(i) && currentDf.filter.get(i));
     // Cloning dataframe with applied filter. If filter is not applied, cloning is
     // needed anyway to allow filtering on the original dataframe
-    model = PeptidesModel.getInstance(newDf.clone(currentDf.filter));
-    if (clustersColumn) await model.addLogoSummaryTable();
+    model = PeptidesModel.getInstance(newDf.clone(bitset));
+    if (clustersColumn)
+      await model.addLogoSummaryTable();
     await model.addMonomerPosition();
     await model.addMostPotentResidues();
   } else