npm - @datagrok/eda - Versions diffs - 1.4.13 → 1.5.1 - Mend

@datagrok/eda 1.4.13 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/CHANGELOG.md +11 -5
package/dist/111.js +1 -1
package/dist/111.js.map +1 -1
package/dist/128.js +1 -1
package/dist/128.js.map +1 -1
package/dist/153.js +1 -1
package/dist/153.js.map +1 -1
package/dist/23.js +1 -1
package/dist/23.js.map +1 -1
package/dist/234.js +1 -1
package/dist/234.js.map +1 -1
package/dist/242.js +1 -1
package/dist/242.js.map +1 -1
package/dist/260.js +1 -1
package/dist/260.js.map +1 -1
package/dist/33.js +1 -1
package/dist/33.js.map +1 -1
package/dist/348.js +1 -1
package/dist/348.js.map +1 -1
package/dist/377.js +1 -1
package/dist/377.js.map +1 -1
package/dist/397.js +2 -0
package/dist/397.js.map +1 -0
package/dist/412.js +1 -1
package/dist/412.js.map +1 -1
package/dist/415.js +1 -1
package/dist/415.js.map +1 -1
package/dist/501.js +1 -1
package/dist/501.js.map +1 -1
package/dist/531.js +1 -1
package/dist/531.js.map +1 -1
package/dist/583.js +1 -1
package/dist/583.js.map +1 -1
package/dist/589.js +1 -1
package/dist/589.js.map +1 -1
package/dist/603.js +1 -1
package/dist/603.js.map +1 -1
package/dist/656.js +1 -1
package/dist/656.js.map +1 -1
package/dist/682.js +1 -1
package/dist/682.js.map +1 -1
package/dist/705.js +1 -1
package/dist/705.js.map +1 -1
package/dist/727.js +1 -1
package/dist/727.js.map +1 -1
package/dist/731.js +1 -1
package/dist/731.js.map +1 -1
package/dist/738.js +1 -1
package/dist/738.js.map +1 -1
package/dist/763.js +1 -1
package/dist/763.js.map +1 -1
package/dist/778.js +1 -1
package/dist/778.js.map +1 -1
package/dist/783.js +1 -1
package/dist/783.js.map +1 -1
package/dist/793.js +1 -1
package/dist/793.js.map +1 -1
package/dist/810.js +1 -1
package/dist/810.js.map +1 -1
package/dist/860.js +1 -1
package/dist/860.js.map +1 -1
package/dist/907.js +1 -1
package/dist/907.js.map +1 -1
package/dist/950.js +1 -1
package/dist/950.js.map +1 -1
package/dist/980.js +1 -1
package/dist/980.js.map +1 -1
package/dist/990.js +1 -1
package/dist/990.js.map +1 -1
package/dist/package-test.js +1 -1
package/dist/package-test.js.map +1 -1
package/dist/package.js +1 -1
package/dist/package.js.map +1 -1
package/package.json +5 -5
package/src/package.ts +2 -1
package/src/pareto-optimization/pareto-optimizer.ts +1 -1
package/src/pls/pls-constants.ts +8 -1
package/src/pls/pls-tools.ts +176 -74
package/src/probabilistic-scoring/data-generator.ts +48 -3
package/src/probabilistic-scoring/pmpo-defs.ts +30 -2
package/src/probabilistic-scoring/pmpo-utils.ts +143 -52
package/src/probabilistic-scoring/prob-scoring.ts +477 -104
package/src/probabilistic-scoring/stat-tools.ts +1 -1
package/src/tests/pareto-tests.ts +13 -15
package/src/tests/pmpo-tests.ts +643 -3
package/test-console-output-1.log +224 -86
package/test-record-1.mp4 +0 -0

package/package.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
   "name": "@datagrok/eda",
   "friendlyName": "EDA",
-  "version": "1.4.13",
+  "version": "1.5.1",
   "description": "Exploratory Data Analysis Tools",
   "dependencies": {
     "@datagrok-libraries/math": "^1.2.6",
-    "@datagrok-libraries/ml": "^6.10.8",
-    "@datagrok-libraries/statistics": "^1.10.0",
+    "@datagrok-libraries/ml": "^6.10.10",
+    "@datagrok-libraries/statistics": "^1.12.1",
     "@datagrok-libraries/tutorials": "^1.7.4",
-    "@datagrok-libraries/utils": "^4.6.5",
+    "@datagrok-libraries/utils": "^4.7.0",
     "@keckelt/tsne": "^1.0.2",
     "@webgpu/types": "^0.1.40",
     "cash-dom": "^8.1.1",
-    "datagrok-api": "^1.26.3",
+    "datagrok-api": "^1.27.0",
     "dayjs": "^1.11.9",
     "jstat": "^1.9.6",
     "mathjs": "^15.1.0",

package/src/package.ts CHANGED Viewed

@@ -1027,8 +1027,9 @@ export class PackageFunctions {
     'outputs': [{name: 'Synthetic', type: 'dataframe'}],
   })
   static async generatePmpoDataset(@grok.decorators.param({'type': 'int'}) samples: number): Promise<DG.DataFrame> {
-    const df = await getSynteticPmpoData(samples);
+    const df = await getSynteticPmpoData(samples, false);
     df.name = 'Synthetic';
     return df;
   }
 }

package/src/pareto-optimization/pareto-optimizer.ts CHANGED Viewed

@@ -19,7 +19,7 @@ export class ParetoOptimizer {
   private toUpdatePcCols = false;
   private paretoFrontViewer: DG.Viewer;
   private resultColName: string;
-  private intervalId: NodeJS.Timeout | null = null;
+  private intervalId: ReturnType<typeof setInterval> | null = null;
   private inputsMap = new Map<string, DG.InputBase>();
   private pcPlotNode: DG.DockNode | null = null;
   private inputFormNode: DG.DockNode | null = null;

package/src/pls/pls-constants.ts CHANGED Viewed

@@ -17,8 +17,10 @@ export enum ERROR_MSG {
   ENOUGH = 'Not enough of features',
   COMP_LIN_PLS = 'Components count must be less than the number of features',
   COMP_QUA_PLS = 'Too large components count for the quadratic PLS regression',
-  COMPONENTS = 'Components count must be greater than 1',
+  COMP_ROWS = 'Components count must not exceed the number of rows',
+  COMPONENTS = 'Components count must be at least 1',
   INV_INP = 'Invalid inputs',
+  NULL_COMPS = 'Components count is not specified',
 }
 /** Widget titles */
@@ -44,6 +46,7 @@ export enum TITLE {
   BROWSE = 'Browse',
   ANALYSIS = 'Features Analysis',
   QUADRATIC = 'Quadratic',
+  BIAS = 'bias',
 }
 /** Tooltips */
@@ -100,6 +103,10 @@ export const X_COORD = 200;
 export const Y_COORD = 200;
 export const DELAY = 2000;
+export const MAX_ROWS_IN_PREDICTION_TOOLTIP = 20;
+export const NUMS_AFTER_COMMA = 3;
 /** Curves colors */
 export enum COLOR {
   AXIS = '#838383',

package/src/pls/pls-tools.ts CHANGED Viewed

@@ -4,9 +4,10 @@ import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
-import {PLS_ANALYSIS, ERROR_MSG, TITLE, HINT, LINK, COMPONENTS, INT, TIMEOUT,
+import {PLS_ANALYSIS, ERROR_MSG, TITLE, HINT, LINK, COMPONENTS,
   RESULT_NAMES, WASM_OUTPUT_IDX, RADIUS, LINE_WIDTH, COLOR, X_COORD, Y_COORD,
-  DEMO_INTRO_MD, DEMO_RESULTS_MD, DEMO_RESULTS} from './pls-constants';
+  DEMO_INTRO_MD, DEMO_RESULTS_MD, DEMO_RESULTS, NUMS_AFTER_COMMA,
+  MAX_ROWS_IN_PREDICTION_TOOLTIP} from './pls-constants';
 import {checkWasmDimensionReducerInputs, checkColumnType, checkMissingVals, describeElements} from '../utils';
 import {_partialLeastSquareRegressionInWebWorker} from '../../wasm/EDAAPI';
 import {carsDataframe} from '../data-generators';
@@ -36,6 +37,37 @@ export type PlsInput = {
 type TypedArray = Int32Array | Float32Array | Uint32Array | Float64Array;
+/** Set style for input element depending on the validity of the value */
+function setStyle(valid: boolean, element: HTMLElement, tooltip: string, errorMsg: string) {
+  if (valid) {
+    element.style.color = COLOR.VALID_TEXT;
+    element.style.borderBottomColor = COLOR.VALID_LINE;
+    ui.tooltip.bind(element, tooltip);
+  } else {
+    element.style.color = COLOR.INVALID;
+    element.style.borderBottomColor = COLOR.INVALID;
+    ui.tooltip.bind(element, () => {
+      const hint = ui.label(tooltip);
+      const err = ui.label(errorMsg);
+      err.style.color = COLOR.INVALID;
+      return ui.divV([hint, err]);
+    });
+  }
+};
+function getModelFormulaTerms(loadingsRegrCoefsTable: DG.DataFrame, bias: number): Map<string, number> {
+  const featureNames = loadingsRegrCoefsTable.col(TITLE.FEATURE)!.toList() as string[];
+  const regrCoefs = loadingsRegrCoefsTable.col(TITLE.REGR_COEFS)!.getRawData();
+  const terms = new Map([[TITLE.BIAS as string, bias]]);
+  featureNames.forEach((name, idx) => {
+    terms.set(name, regrCoefs[idx]);
+  });
+  return terms;
+}
 /** Return lines */
 export function getLines(names: string[]): DG.FormulaLine[] {
   const lines: DG.FormulaLine[] = [];
@@ -97,7 +129,7 @@ export async function getPlsAnalysis(input: PlsInput): Promise<PlsOutput> {
 /** Return debiased predction by PLS regression */
 function debiasedPrediction(features: DG.ColumnList, params: DG.Column,
-  target: DG.Column, biasedPrediction: DG.Column): DG.Column {
+  target: DG.Column, biasedPrediction: DG.Column): {debiased: DG.Column, bias: number} {
   const samples = target.length;
   const dim = features.length;
   const rawParams = params.getRawData();
@@ -113,7 +145,7 @@ function debiasedPrediction(features: DG.ColumnList, params: DG.Column,
   for (let i = 0; i < samples; ++i)
     debiased[i] = bias + biased[i];
-  return DG.Column.fromFloat32Array('Debiased', debiased, samples);
+  return {debiased: DG.Column.fromFloat32Array('Debiased', debiased, samples), bias: bias};
 }
 /** Return an input for the quadratic PLS regression */
@@ -137,7 +169,7 @@ function getQuadraticPlsInput(input: PlsInput): PlsInput {
     for (let j = i; j < colsCount; ++j) {
       col2 = cols[j];
-      raw2 = col2.getRawData();
+      raw2 = col2.getRawData();
       qaudrRaw = new Float32Array(rowsCount);
       for (let k = 0; k < rowsCount; ++k)
@@ -205,7 +237,8 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
   // 1. Predicted vs Reference scatter plot
   // Debias prediction (since PLS center data)
-  const pred = debiasedPrediction(features, result.regressionCoefficients, input.predict, result.prediction);
+  const debiased = debiasedPrediction(features, result.regressionCoefficients, input.predict, result.prediction);
+  const pred = debiased.debiased;
   pred.name = cols.getUnusedName(`${input.predict.name} ${RESULT_NAMES.SUFFIX}`);
   cols.add(pred);
   const predictVsReferScatter = view.addViewer(DG.Viewer.scatterPlot(sourceTable, {
@@ -232,6 +265,9 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
     help: LINK.COEFFS,
     showValueSelector: false,
     showStackSelector: false,
+    description: `bias = ${debiased.bias.toFixed(NUMS_AFTER_COMMA)}`,
+    descriptionVisibilityMode: 'Always',
+    descriptionPosition: 'Bottom',
   }));
   // 3. Loadings Scatter Plot
@@ -268,7 +304,7 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
   });
-  // 4.3) create lines & circles
+  // 4.3) create lines & circles
   view.addViewer(scoresScatter);
   scoresScatter.meta.formulaLines.addAll(getLines(scoreNames));
@@ -321,7 +357,7 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
   }));
   // emphasize viewers in the demo case
-  if (analysisType === PLS_ANALYSIS.DEMO) {
+  if (analysisType === PLS_ANALYSIS.DEMO) {
     grok.shell.windows.help.showHelp(ui.markdown(DEMO_RESULTS_MD));
     describeElements(
@@ -330,6 +366,10 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
       ['left', 'left', 'right', 'right', 'left'],
     );
   }
+  // Add formula tooltip to the prediction column
+  const modelFormulaTerms = getModelFormulaTerms(loadingsRegrCoefsTable, debiased.bias);
+  setPredictionTooltip(view, pred, modelFormulaTerms);
 } // performMVA
 /** Run multivariate analysis (PLS) */
@@ -372,38 +412,32 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
     return;
   }
-  let features: DG.Column[] = numCols.slice(0, numCols.length - 1);
-  let predict = numCols[numCols.length - 1];
-  let components = min(numColNames.length - 1, COMPONENTS.DEFAULT as number);
-  let isQuadratic = false;
-  const isPredictValid = () => {
-    for (const col of features)
-      if (col.name === predict.name)
-        return false;
-    return true;
+  const doFeaturesIncludePredict = () => {
+    return featuresInput.value.some((col) => col.name === predictInput.value!.name);
   };
   const isCompConsistent = () => {
-    if (components < 1)
+    if (componentsInput.value! < 1)
       return false;
-    const n = features.length;
+    if (componentsInput.value! > table.rowCount)
+      return false;
-    if (isQuadratic)
-      return components <= (n + 1) * n / 2 + n;
+    const n = featuresInput.value.length;
-    return components <= n;
-  }
+    if (isQuadraticInput.value)
+      return componentsInput.value! <= (n + 1) * n / 2 + n;
+    return componentsInput.value! <= n;
+  };
   // response (to predict)
   const predictInput = ui.input.column(TITLE.PREDICT, {
     table: table,
-    value: predict,
+    value: numCols[numCols.length - 1],
     nullable: false,
-    onValueChanged: (value) => {
-      predict = value;
-      updateIputs();
+    onValueChanged: (_) => {
+      updateInputStyles();
     },
     filter: (col: DG.Column) => isValidNumeric(col),
     tooltipText: HINT.PREDICT,
@@ -413,21 +447,21 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
   const featuresInput = ui.input.columns(TITLE.USING, {
     table: table,
     available: numColNames,
-    value: features,
-    onValueChanged: (val) => {
-      features = val;
-      updateIputs();
+    value: numCols.slice(0, numCols.length - 1),
+    onValueChanged: (_) => {
+      updateInputStyles();
     },
     tooltipText: HINT.FEATURES,
+    nullable: false,
   });
   // components count
   const componentsInput = ui.input.int(TITLE.COMPONENTS, {
-    value: components,
+    value: min(numColNames.length - 1, COMPONENTS.DEFAULT as number),
     showPlusMinus: true,
-    onValueChanged: (val) => {
-      components = val;
-      updateIputs();
+    nullable: false,
+    onValueChanged: (_) => {
+      updateInputStyles();
     },
     tooltipText: HINT.COMPONENTS,
   });
@@ -446,28 +480,14 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
     dlgRunBtnTooltip = HINT.MVA;
   }
-  const setStyle = (valid: boolean, element: HTMLElement, tooltip: string, errorMsg: string) => {
-    if (valid) {
-      element.style.color = COLOR.VALID_TEXT;
-      element.style.borderBottomColor = COLOR.VALID_LINE;
-      ui.tooltip.bind(element, tooltip);
-    } else {
-      element.style.color = COLOR.INVALID;
-      element.style.borderBottomColor = COLOR.INVALID;
-      ui.tooltip.bind(element, () => {
-        const hint = ui.label(tooltip);
-        const err = ui.label(errorMsg);
-        err.style.color = COLOR.INVALID;
-        return ui.divV([hint, err]);
-      });
-    }
-  };
-  const updateIputs = () => {
-    const predValid = isPredictValid();
+  const updateInputStyles = () => {
+    const featuresValid = featuresInput.value.length >= 1;
+    const predValid = featuresValid && !doFeaturesIncludePredict();
     let compValid: boolean;
-    if (predValid) {
+    if (!featuresValid)
+      setStyle(false, featuresInput.input, HINT.FEATURES, ERROR_MSG.ENOUGH);
+    else if (predValid) {
       setStyle(true, predictInput.input, HINT.PREDICT, '');
       setStyle(true, featuresInput.input, HINT.FEATURES, '');
     } else {
@@ -475,9 +495,12 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
       setStyle(false, featuresInput.input, HINT.FEATURES, ERROR_MSG.PREDICT);
     }
-    if (components < 1) {
+    if (componentsInput.value == null) {
+      setStyle(false, componentsInput.input, HINT.COMPONENTS, ERROR_MSG.NULL_COMPS);
+      compValid = false;
+    } else if (componentsInput.value < 1) {
       setStyle(false, componentsInput.input, HINT.COMPONENTS, ERROR_MSG.COMPONENTS);
-      compValid = false;
+      compValid = false;
     } else {
       compValid = isCompConsistent();
@@ -486,7 +509,9 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
         if (predValid)
           setStyle(true, featuresInput.input, HINT.FEATURES, '');
       } else {
-        const errMsg = isQuadratic ? ERROR_MSG.COMP_QUA_PLS : ERROR_MSG.COMP_LIN_PLS;
+        const errMsg = componentsInput.value! > table.rowCount ?
+          ERROR_MSG.COMP_ROWS :
+          isQuadraticInput.value ? ERROR_MSG.COMP_QUA_PLS : ERROR_MSG.COMP_LIN_PLS;
         setStyle(false, componentsInput.input, HINT.COMPONENTS, errMsg);
         setStyle(false, featuresInput.input, HINT.FEATURES, ERROR_MSG.ENOUGH);
       }
@@ -497,10 +522,18 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
     dlg.getButton(TITLE.RUN).disabled = !isValid;
     return isValid;
+  }; // updateInputStyles
+  const getStrColWithUniqueVals = () => {
+    for (const col of strCols) {
+      if (col.stats.uniqueCount === table.rowCount)
+        return col;
+    }
+    return undefined;
   };
   // names of samples
-  let names = (strCols.length > 0) ? strCols[0] : undefined;
+  let names = getStrColWithUniqueVals();
   const namesInputs = ui.input.column(TITLE.NAMES, {
     table: table,
     value: names,
@@ -512,11 +545,10 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
   // quadratic/linear model
   const isQuadraticInput = ui.input.bool(TITLE.QUADRATIC, {
-    value: isQuadratic,
+    value: false,
     tooltipText: HINT.QUADRATIC,
-    onValueChanged: (val) => {
-      isQuadratic = val;
-      updateIputs();
+    onValueChanged: (_) => {
+      updateInputStyles();
     },
   });
@@ -527,21 +559,15 @@ export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
       await performMVA({
         table: table,
-        features: DG.DataFrame.fromColumns(features).columns,
-        predict: predict,
-        components: components,
-        isQuadratic: isQuadratic,
+        features: DG.DataFrame.fromColumns(featuresInput.value).columns,
+        predict: predictInput.value!,
+        components: componentsInput.value!,
+        isQuadratic: isQuadraticInput.value,
         names: names,
       }, analysisType);
     }, undefined, dlgRunBtnTooltip)
     .show({x: X_COORD, y: Y_COORD});
-  // the following delay provides correct styles (see https://reddata.atlassian.net/browse/GROK-15196)
-  setTimeout(() => {
-    featuresInput.value = numCols.filter((col) => col !== predict);
-    features = featuresInput.value;
-  }, TIMEOUT);
   grok.shell.v.append(dlg.root);
 } // runMVA
@@ -555,3 +581,79 @@ export async function runDemoMVA(): Promise<void> {
   await runMVA(PLS_ANALYSIS.DEMO);
 }
+function setPredictionTooltip(view: DG.TableView, predCol: DG.Column, modelTerms: Map<string, number>): void {
+  view.grid.onCellTooltip((cell, x, y) => {
+    if (cell.isColHeader) {
+      const cellCol = cell.tableColumn;
+      if (cellCol == null)
+        return false;
+      if (cellCol.name === predCol.name) {
+        ui.tooltip.show(getPredictionTooltip(modelTerms, predCol), x, y);
+        return true;
+      }
+    }
+    return false;
+  });
+}
+function getPredictionTooltip(modelTerms: Map<string, number>, predCol: DG.Column): HTMLElement {
+  let idx = 0;
+  const bias = modelTerms.get(TITLE.BIAS) ?? 0;
+  const elements: HTMLElement[] = [];
+  if (Math.abs(bias) > 0) {
+    const biasEl = ui.divText(`${bias}`);
+    biasEl.style.marginTop = '2px';
+    biasEl.style.marginLeft = '4px';
+    elements.push(biasEl);
+    ++idx;
+  }
+  const sortedTerms = [...modelTerms.entries()]
+    .filter(([key]) => key !== TITLE.BIAS)
+    .sort((a, b) => Math.abs(b[1]) - Math.abs(a[1]));
+  const maxFeatureRows = MAX_ROWS_IN_PREDICTION_TOOLTIP - elements.length;
+  const hasOverflow = sortedTerms.length > maxFeatureRows;
+  const visibleTerms = hasOverflow ? sortedTerms.slice(0, maxFeatureRows - 1) : sortedTerms;
+  for (const [key, value] of visibleTerms) {
+    const signEl = ui.divText(idx > 0 ? '+ ' : '');
+    signEl.style.marginRight = '4px';
+    signEl.style.marginLeft = '4px';
+    const featureEl = ui.divText(`${key}`);
+    featureEl.style.fontWeight = 'bold';
+    const valueEl = ui.divText(` * ${value > 0 ? value : `(${value})`}`);
+    valueEl.style.marginLeft = '4px';
+    const rowEl = ui.divH([signEl, featureEl, valueEl]);
+    rowEl.style.marginTop = '4px';
+    elements.push(rowEl);
+    ++idx;
+  }
+  if (hasOverflow) {
+    const hidden = sortedTerms.length - visibleTerms.length;
+    const ellipsisEl = ui.divText(`(${hidden} more term${hidden > 1 ? 's' : ''})`);
+    ellipsisEl.style.marginTop = '4px';
+    ellipsisEl.style.marginLeft = '4px';
+    ellipsisEl.style.fontStyle = 'italic';
+    elements.push(ellipsisEl);
+  }
+  const headerEl = ui.divText('Formula:');
+  const leftEl = ui.divText(`${predCol.name} = `);
+  leftEl.style.fontWeight = 'bold';
+  leftEl.style.marginTop = '4px';
+  const elementsContainer = ui.divV(elements);
+  elementsContainer.style.marginTop = '4px';
+  return ui.divV([headerEl, leftEl, elementsContainer]);
+}

package/src/probabilistic-scoring/data-generator.ts CHANGED Viewed

@@ -11,12 +11,57 @@ import * as jStat from 'jstat';
 /** Generates synthetic data for pMPO model training and testing
  * @param samplesCount Number of samples to generate
  * @returns DataFrame with generated data */
-export async function getSynteticPmpoData(samplesCount: number): Promise<DG.DataFrame> {
+export async function getSynteticPmpoData(samplesCount: number, isTest: boolean = true): Promise<DG.DataFrame> {
   const df = await grok.dapi.files.readCsv(SOURCE_PATH);
   const generator = new PmpoDataGenerator(df, 'Drug', 'CNS', 'Smiles');
+  const genTable = generator.getGenerated(samplesCount);
+  if (!isTest) {
+    genTable.columns.add(DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'Const bool', new Array(samplesCount).fill(true)));
+    genTable.columns.add(DG.Column.fromInt32Array('Const int', new Int32Array(samplesCount).fill(1)));
+    // Add a copy of the first numeric column with 5 missing values
+    const firstNumCol = genTable.columns.toList().find((col) => col.isNumerical);
+    if (firstNumCol) {
+      const colWithMissing = firstNumCol.clone();
+      colWithMissing.name = `${firstNumCol.name} (missing)`;
+      for (let i = 0; i < Math.min(5, colWithMissing.length); ++i)
+        colWithMissing.set(i, DG.FLOAT_NULL);
+      genTable.columns.add(colWithMissing);
+    }
+    // Add a column with all null values
+    genTable.columns.add(DG.Column.fromFloat32Array('Nulls', new Float32Array(samplesCount).fill(DG.FLOAT_NULL)));
+    // Add categorical columns
+    const categoricalCols = getCategoricalColumns(genTable.col('CNS')!, samplesCount);
+    for (const col of categoricalCols)
+      genTable.columns.add(col);
+  }
+  return genTable;
+} // getSynteticPmpoData
+/** Generates categorical columns based on a boolean source column
+ * @param sourceBoolCol Source boolean column to base the categorical columns on
+ * @param samplesCount Number of samples to generate
+ * @returns Array of generated categorical columns */
+function getCategoricalColumns(sourceBoolCol: DG.Column, samplesCount: number): DG.Column[] {
+  const source = sourceBoolCol.toList();
+  const stringLabels = new Array<string>(samplesCount);
+  const threeCats = new Array<string>(samplesCount);
+  for (let i = 0; i < samplesCount; ++i) {
+    stringLabels[i] = source[i] ? 'active' : 'non-active';
+    threeCats[i] = source[i] ? (Math.random() < 0.5 ? 'perfect' : 'good') : (Math.random() < 0.5 ? 'bad' : 'worst');
+  }
-  return generator.getGenerated(samplesCount);
-}
+  return [
+    DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CNS (strings)', stringLabels),
+    DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CNS (4 categories)', threeCats),
+    DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'Single category', new Array<string>(samplesCount).fill('Unknown')),
+  ];
+} // getCategoricalColumns
 /** Class for generating synthetic data for pMPO model training and testing */
 export class PmpoDataGenerator {

package/src/probabilistic-scoring/pmpo-defs.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 // Constants and type definitions for probabilistic scoring (pMPO)
-// Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
+// Source paper https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
 /** Minimum number of samples required to compute pMPO */
 export const MIN_SAMPLES_COUNT = 10;
@@ -208,7 +208,8 @@ export type OptimalPoint = {
   pValTresh: number,
   r2Tresh: number,
   qCutoff: number,
-  success: boolean,
+  state: 'success' | 'canceled' | 'failed',
+  msg: string,
 };
 /** Minimum bounds for pMPO parameters during optimization */
@@ -216,3 +217,30 @@ export const LOW_PARAMS_BOUNDS = new Float32Array([0.5, Q_CUTOFF_MIN]);
 /** Maximum bounds for pMPO parameters during optimization */
 export const HIGH_PARAMS_BOUNDS = new Float32Array([R2_MAX, Q_CUTOFF_MAX]);
+export enum EQUALITY_SIGN {
+  GREATER = '>',
+  LESS = '<',
+  GREATER_OR_EQUAL = '≥',
+  LESS_OR_EQUAL = '≤',
+  DEFAULT = LESS_OR_EQUAL,
+};
+export const SIGN_OPTIONS = [
+  EQUALITY_SIGN.GREATER,
+  EQUALITY_SIGN.LESS,
+  EQUALITY_SIGN.GREATER_OR_EQUAL,
+  EQUALITY_SIGN.LESS_OR_EQUAL,
+];
+export const THRESHOLDED_DESIRABILITY_COL_NAME = 'Desirability';
+export const PREFERABLE_CATEGORIES = ['perfect', 'good', 'true', 't', 'g', 'active', 'a', 'yes', 'y'];
+export type PmpoInputId = 'descriptors' | 'desirability' | 'threshold' | 'categories';
+export type TooltipContent = string | (() => HTMLElement);
+export interface PmpoValidationResult {
+  valid: boolean;
+  errors: Map<PmpoInputId, TooltipContent>;
+}