npm - @datagrok/eda - Versions diffs - 1.1.33 → 1.1.35 - Mend

@datagrok/eda 1.1.33 → 1.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "name": "@datagrok/eda",
   "friendlyName": "EDA",
-  "version": "1.1.33",
+  "version": "1.1.35",
   "description": "Exploratory Data Analysis Tools",
   "dependencies": {
     "@datagrok-libraries/math": "^1.1.11",
-    "@datagrok-libraries/ml": "^6.6.21",
+    "@datagrok-libraries/ml": "^6.6.23",
     "@datagrok-libraries/tutorials": "^1.3.13",
-    "@datagrok-libraries/utils": "^4.2.20",
+    "@datagrok-libraries/utils": "^4.2.29",
     "@keckelt/tsne": "^1.0.2",
     "@webgpu/types": "^0.1.40",
     "cash-dom": "^8.1.1",

package/src/package.ts CHANGED Viewed

@@ -173,6 +173,7 @@ export async function reduceDimensionality(): Promise<void> {
     else
       okButton.classList.remove('disabled');
   };
+  dialog.history(() => ({editorSettings: editor.getStringInput()}), (x: any) => editor.applyStringInput(x['editorSettings']));
   editor.onColumnsChanged.subscribe(() => {
     try {
       validate();
@@ -688,11 +689,12 @@ export function isInteractiveSoftmax(df: DG.DataFrame, predictColumn: DG.Column)
 export async function trainPLSRegression(df: DG.DataFrame, predictColumn: DG.Column, components: number): Promise<Uint8Array> {
   const features = df.columns;
-  if (components > features.length)
-    throw new Error('Number of components is greater than features count');
   const model = new PlsModel();
-  await model.fit(features, predictColumn, components);
+  await model.fit(
+    features,
+    predictColumn,
+    Math.min(components, features.length),
+  );
   return model.toBytes();
 }

package/src/pls/pls-tools.ts CHANGED Viewed

@@ -92,13 +92,35 @@ export async function getPlsAnalysis(input: PlsInput): Promise<PlsOutput> {
   };
 }
+/** Return debiased predction by PLS regression */
+function debiasedPrediction(features: DG.ColumnList, params: DG.Column,
+  target: DG.Column, biasedPrediction: DG.Column): DG.Column {
+  const samples = target.length;
+  const dim = features.length;
+  const rawParams = params.getRawData();
+  const debiased = new Float32Array(samples);
+  const biased = biasedPrediction.getRawData();
+  // Compute bias
+  let bias = target.stats.avg;
+  for (let i = 0; i < dim; ++i)
+    bias -= rawParams[i] * features.byIndex(i).stats.avg;
+  // Compute debiased prediction
+  for (let i = 0; i < samples; ++i)
+    debiased[i] = bias + biased[i];
+  return DG.Column.fromFloat32Array('Debiased', debiased, samples);
+}
 /** Perform multivariate analysis using the PLS regression */
 async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<void> {
   const result = await getPlsAnalysis(input);
   const plsCols = result.tScores;
   const cols = input.table.columns;
-  const featuresNames = input.features.names();
+  const features = input.features;
+  const featuresNames = features.names();
   const prefix = (analysisType === PLS_ANALYSIS.COMPUTE_COMPONENTS) ? RESULT_NAMES.PREFIX : TITLE.XSCORE;
   // add PLS components to the table
@@ -129,7 +151,8 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
   });
   // 1. Predicted vs Reference scatter plot
-  const pred = result.prediction;
+  // Debias prediction (since PLS center data)
+  const pred = debiasedPrediction(features, result.regressionCoefficients, input.predict, result.prediction);
   pred.name = cols.getUnusedName(`${input.predict.name} ${RESULT_NAMES.SUFFIX}`);
   cols.add(pred);
   const predictVsReferScatter = view.addViewer(DG.Viewer.scatterPlot(input.table, {

package/src/regression.ts CHANGED Viewed

@@ -7,19 +7,9 @@ import * as DG from 'datagrok-api/dg';
 import {_fitLinearRegressionParamsWithDataNormalizing} from '../wasm/EDAAPI';
 import {getPlsAnalysis} from './pls/pls-tools';
-// Linear regression computations limits
-const FATURES_COUNT_LIMIT = 1000;
-const SAMPLES_COUNT_LIMIT = 1000000;
 // Default PLS components count
 const PLS_COMPONENTS_COUNT = 10;
-// Wasm computations specific constants (see https://eigen.tuxfamily.org/dox/classEigen_1_1LDLT.html)
-const BYTES_PER_VALUE = 4; // wasm computations operates 4-byte floats
-const MEMORY_SCALE = 2; // due to the features of the Eigen lib decomposition
-const BUFFERS_COUNT = 1; // due to the features of the Eigen lib decomposition
-const WASM_MEMORY = 268435456; // wasm buffer size specified in '../scripts/module.json'
 /** Compute coefficients of linear regression */
 export async function getLinearRegressionParams(features: DG.ColumnList, targets: DG.Column): Promise<Float32Array> {
   const featuresCount = features.length;
@@ -37,24 +27,6 @@ export async function getLinearRegressionParams(features: DG.ColumnList, targets
   try {
     // Analyze inputs sizes
-    const inputsAnalysis = getInputsAnalysis(featuresCount, samplesCount);
-    if (inputsAnalysis.toApplyPLS) {
-      // Apply the PLS method
-      const paramsByPLS = await getLinearRegressionParamsUsingPLS(features, targets, inputsAnalysis.components);
-      let tmpSum = 0;
-      // Compute bias (due to the centering feature of PLS)
-      for (let i = 0; i < featuresCount; ++i) {
-        params[i] = paramsByPLS[i];
-        tmpSum += paramsByPLS[i] * features.byIndex(i).stats.avg;
-      }
-      params[featuresCount] -= tmpSum;
-      return params;
-    }
     // Non-constant columns data
     const nonConstFeatureColsIndeces: number[] = [];
@@ -101,7 +73,22 @@ export async function getLinearRegressionParams(features: DG.ColumnList, targets
     params[featuresCount] = tempParams[nonConstFeaturesCount];
   } catch (e) {
-    grok.shell.error(`Fitted the trivial model: ${e instanceof Error ? e.message : 'due to the platform issue'}`);
+    // Apply PLS regression if regular linear regression failed
+    const paramsByPLS = await getLinearRegressionParamsUsingPLS(
+      features,
+      targets,
+      componentsCount(features.length, targets.length),
+    );
+    let tmpSum = 0;
+    // Compute bias (due to the centering feature of PLS)
+    for (let i = 0; i < featuresCount; ++i) {
+      params[i] = paramsByPLS[i];
+      tmpSum += paramsByPLS[i] * features.byIndex(i).stats.avg;
+    }
+    params[featuresCount] -= tmpSum;
   }
   return params;
@@ -197,36 +184,10 @@ async function getLinearRegressionParamsUsingPLS(features: DG.ColumnList,
   return plsAnalysis.regressionCoefficients.getRawData() as Float32Array;
 }
-/** Check wasm-buffer overflow */
-const wasmBufferOverflow = (featuresCount: number, samplesCount: number) => {
-  return MEMORY_SCALE * BYTES_PER_VALUE * samplesCount * (featuresCount + BUFFERS_COUNT) >= WASM_MEMORY;
-};
-/** Check whether to apply the PLS method & how many components to use */
-const getInputsAnalysis = (featuresCount: number, samplesCount: number) => {
-  if (wasmBufferOverflow(featuresCount, samplesCount) || (featuresCount >= FATURES_COUNT_LIMIT)) {
-    return {
-      toApplyPLS: true,
-      components: PLS_COMPONENTS_COUNT,
-    };
-  }
+/** Return number of PLS components to be used */
+const componentsCount = (featuresCount: number, samplesCount: number) => {
+  if (samplesCount <= featuresCount)
+    return Math.min(PLS_COMPONENTS_COUNT, samplesCount);
-  if (samplesCount >= SAMPLES_COUNT_LIMIT) {
-    return {
-      toApplyPLS: true,
-      components: Math.min(PLS_COMPONENTS_COUNT, featuresCount),
-    };
-  }
-  if (samplesCount <= featuresCount) {
-    return {
-      toApplyPLS: true,
-      components: Math.min(PLS_COMPONENTS_COUNT, samplesCount),
-    };
-  }
-  return {
-    toApplyPLS: false,
-    components: PLS_COMPONENTS_COUNT,
-  };
-}; // getInputsAnalysis
+  return Math.min(PLS_COMPONENTS_COUNT, featuresCount);
+};