npm - @datagrok/eda - Versions diffs - 1.2.2 → 1.2.4 - Mend

@datagrok/eda 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/CHANGELOG.md +15 -4
package/dist/111.js +1 -1
package/dist/111.js.map +1 -1
package/dist/128.js +2 -0
package/dist/128.js.map +1 -0
package/dist/153.js +1 -1
package/dist/153.js.map +1 -1
package/dist/23.js +1 -1
package/dist/23.js.map +1 -1
package/dist/234.js +1 -1
package/dist/234.js.map +1 -1
package/dist/260.js +1 -1
package/dist/260.js.map +1 -1
package/dist/348.js +1 -1
package/dist/348.js.map +1 -1
package/dist/377.js +1 -1
package/dist/377.js.map +1 -1
package/dist/{12a82b8001995d426ed2.wasm → 3cec7d4ab7dacdcb37e6.wasm} +0 -0
package/dist/412.js +1 -1
package/dist/412.js.map +1 -1
package/dist/531.js +1 -1
package/dist/531.js.map +1 -1
package/dist/583.js +1 -1
package/dist/583.js.map +1 -1
package/dist/603.js +1 -1
package/dist/603.js.map +1 -1
package/dist/656.js +1 -1
package/dist/656.js.map +1 -1
package/dist/682.js +1 -1
package/dist/682.js.map +1 -1
package/dist/705.js +1 -1
package/dist/705.js.map +1 -1
package/dist/727.js +1 -1
package/dist/727.js.map +1 -1
package/dist/763.js +1 -1
package/dist/763.js.map +1 -1
package/dist/778.js +1 -1
package/dist/778.js.map +1 -1
package/dist/783.js +1 -1
package/dist/783.js.map +1 -1
package/dist/793.js +1 -1
package/dist/793.js.map +1 -1
package/dist/860.js +2 -0
package/dist/860.js.map +1 -0
package/dist/950.js +1 -1
package/dist/950.js.map +1 -1
package/dist/980.js +1 -1
package/dist/980.js.map +1 -1
package/dist/990.js +1 -1
package/dist/990.js.map +1 -1
package/dist/package-test.js +1 -1
package/dist/package-test.js.map +1 -1
package/dist/package.js +1 -1
package/dist/package.js.map +1 -1
package/package.json +1 -95
package/scripts/command.txt +1 -1
package/scripts/func.json +1 -664
package/src/anova/anova-tools.ts +0 -4
package/src/eda-tools.ts +52 -17
package/src/eda-ui.ts +0 -114
package/src/global.d.ts +13 -0
package/src/missing-values-imputation/ui-constants.ts +3 -1
package/src/missing-values-imputation/ui.ts +7 -7
package/src/package.ts +21 -17
package/src/pls/pls-constants.ts +7 -7
package/src/pls/pls-ml.ts +2 -1
package/src/pls/pls-tools.ts +8 -3
package/src/tests/anova-tests.ts +1 -1
package/src/tests/linear-methods-tests.ts +6 -1
package/src/utils.ts +90 -0
package/wasm/EDA.js +28 -1
package/wasm/EDA.wasm +0 -0
package/wasm/EDAAPI.js +22 -4
package/wasm/EDAForWebWorker.js +1 -1
package/wasm/PCA/PCA.cpp +49 -58
package/wasm/PCA/PCA.h +19 -0
package/wasm/pcaExport.cpp +25 -1
package/wasm/workers/{principalComponentAnalysisWorker.js → principalComponentAnalysisNipalsWorker.js} +1 -1
package/wasm/workers/principalComponentAnalysisWorkerUpd.js +16 -0
package/dist/91.js +0 -2
package/dist/91.js.map +0 -1

package/src/anova/anova-tools.ts CHANGED Viewed

@@ -273,10 +273,6 @@ export class FactorizedData {
       this.sumsOfSquares = sumsOfSquares;
       this.subSampleSizes = subSampleSizes;
-      console.log(sums);
-      console.log(sumsOfSquares);
-      console.log(subSampleSizes);
       break;
     default:

package/src/eda-tools.ts CHANGED Viewed

@@ -4,32 +4,67 @@ import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
-import {_principalComponentAnalysisInWebWorker,
-  _partialLeastSquareRegressionInWebWorker} from '../wasm/EDAAPI';
+import {_principalComponentAnalysisInWebWorker, _principalComponentAnalysis,
+  _partialLeastSquareRegressionInWebWorker,
+  _principalComponentAnalysisNipals, _principalComponentAnalysisNipalsInWebWorker,
+} from '../wasm/EDAAPI';
-import {checkWasmDimensionReducerInputs, checkUMAPinputs, checkTSNEinputs,
-  getRowsOfNumericalColumnns} from './utils';
+import {checkWasmDimensionReducerInputs, checkUMAPinputs, checkTSNEinputs, NIPALS_PREFER_COLS_COUNT,
+  getRowsOfNumericalColumnns, centerScaleDataFrame, extractNonConstantColsDf} from './utils';
 // Principal components analysis (PCA)
 export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
-  center: boolean, scale: boolean): Promise<DG.DataFrame> {
+  toCenter: boolean, toScale: boolean): Promise<DG.DataFrame> {
   checkWasmDimensionReducerInputs(features, components);
-  const centerNum = center ? 1 : 0;
-  const scaleNum = scale ? 1 : 0;
+  const rowCount = table.rowCount;
-  return await _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
-}
+  // Extract non-const cols dataframe
+  const nonConstData = extractNonConstantColsDf(features);
+  const nonConstColsCount = nonConstData.columns.length;
-// Partial least square regression (PLS): TO REMOVE
-export async function computePLS(
-  table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number,
-): Promise<any> {
-  // Inputs are checked in the same manner as in PCA, since the same computations are applied.
-  checkWasmDimensionReducerInputs(features, components);
+  // Return zero columns if data is constant
+  if (nonConstColsCount === 0) {
+    const cols: DG.Column[] = [];
+    for (let i = 0; i < components; ++i)
+      cols.push(DG.Column.fromFloat32Array(`${i + 1}`, new Float32Array(rowCount).fill(0)));
+    return DG.DataFrame.fromColumns(cols);
+  }
+  const zeroColsToAdd = (nonConstColsCount < components) ? (components - nonConstColsCount) : 0;
+  const componentsToCompute = Math.min(components, nonConstColsCount);
+  let output: DG.DataFrame | undefined = undefined;
+  // PCA
+  if (nonConstColsCount > NIPALS_PREFER_COLS_COUNT)
+    output = await _principalComponentAnalysisNipalsInWebWorker(table, features, componentsToCompute);
+  else {
+    //try to apply the classic algorithm
+    const res = await _principalComponentAnalysisInWebWorker(table, features, componentsToCompute);
+    if (res !== -1) // the classic succeed
+      output = centerScaleDataFrame(res, toCenter, toScale);
+    else // the classic failed
+      output = await _principalComponentAnalysisNipalsInWebWorker(table, features, componentsToCompute);
+  }
+  if (output === undefined)
+    throw new Error('Failed to compute PCA');
+  output = centerScaleDataFrame(output, toCenter, toScale);
+  const cols = output.columns;
+  const count = cols.length;
+  // Add zero columns (with respect to the const cols count)
+  for (let i = 0; i < zeroColsToAdd; ++i)
+    cols.add(DG.Column.fromFloat32Array(`${count + i + 1}`, new Float32Array(rowCount).fill(0)));
-  return await _partialLeastSquareRegressionInWebWorker(table, features, predict, components);
-}
+  return output;
+} // computePCA
 // Uniform Manifold Approximation and Projection (UMAP)
 export async function computeUMAP(features: DG.ColumnList, components: number, epochs: number,

package/src/eda-ui.ts CHANGED Viewed

@@ -17,117 +17,3 @@ export function addPrefixToEachColumnName(prefix: string, columns: DG.ColumnList
   for (const col of columns.toList())
     col.name = prefix + col.name;
 }
-// Predicted vs Reference scatter plot
-export function predictedVersusReferenceScatterPlot(
-  samplesNames: DG.Column, reference: DG.Column, prediction: DG.Column,
-): DG.Viewer {
-  prediction.name = reference.name + '(predicted)';
-  const dfReferencePrediction = DG.DataFrame.fromColumns([samplesNames, reference, prediction]);
-  dfReferencePrediction.name = 'Reference vs. Predicted';
-  return DG.Viewer.scatterPlot(dfReferencePrediction,
-    {title: dfReferencePrediction.name,
-      x: reference.name,
-      y: prediction.name,
-      showRegressionLine: true,
-      markerType: 'circle',
-      labels: samplesNames.name,
-    });
-}
-// Regression Coefficients Bar Chart
-export function regressionCoefficientsBarChart(features: DG.ColumnList, regressionCoeffs: DG.Column): DG.Viewer {
-  regressionCoeffs.name = 'regression coefficient';
-  const namesOfPredictors = [];
-  for (const col of features)
-    namesOfPredictors.push(col.name);
-  const predictorNamesColumn = DG.Column.fromStrings('feature', namesOfPredictors);
-  const dfRegrCoefs = DG.DataFrame.fromColumns([predictorNamesColumn, regressionCoeffs]);
-  dfRegrCoefs.name = 'Regression Coefficients';
-  return DG.Viewer.barChart(dfRegrCoefs,
-    {title: dfRegrCoefs.name, split: 'feature',
-      value: 'regression coefficient', valueAggrType: 'avg'});
-}
-// Scores Scatter Plot
-export function scoresScatterPlot(
-  samplesNames: DG.Column, xScores: Array<DG.Column>, yScores: Array<DG.Column>,
-): DG.Viewer {
-  const scoresColumns = [samplesNames];
-  for (let i = 0; i < xScores.length; i++) {
-    xScores[i].name = `x.score.t${i+1}`;
-    scoresColumns.push(xScores[i]);
-  }
-  for (let i = 0; i < yScores.length; i++) {
-    yScores[i].name = `y.score.u${i+1}`;
-    scoresColumns.push(yScores[i]);
-  }
-  const scores = DG.DataFrame.fromColumns(scoresColumns);
-  scores.name = 'Scores';
-  //grok.shell.addTableView(scores);
-  const index = xScores.length > 1 ? 1 : 0;
-  return DG.Viewer.scatterPlot(scores,
-    {title: scores.name,
-      x: xScores[0].name,
-      y: xScores[index].name,
-      markerType: 'circle',
-      labels: samplesNames.name,
-    });
-}
-// Loading Scatter Plot
-export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.Column>): DG.Viewer {
-  const loadingCols = [];
-  const loadingLabels = [];
-  for (const col of features)
-    loadingLabels.push(col.name);
-  loadingCols.push(DG.Column.fromStrings('labels', loadingLabels));
-  for (let i = 0; i < xLoadings.length; i++) {
-    xLoadings[i].name = `x.loading.p${i+1}`;
-    loadingCols.push(xLoadings[i]);
-  }
-  const dfLoadings = DG.DataFrame.fromColumns(loadingCols);
-  dfLoadings.name = 'Loadings';
-  return DG.Viewer.scatterPlot(dfLoadings,
-    {title: dfLoadings.name,
-      x: xLoadings[0].name,
-      y: xLoadings[xLoadings.length - 1].name,
-      markerType: 'circle',
-      labels: 'labels',
-    });
-}
-// Add PLS visualization
-export function addPLSvisualization(
-  table: DG.DataFrame, samplesNames: DG.Column, features: DG.ColumnList, predict: DG.Column, plsOutput: any,
-): void {
-  const view = (table.id !== null) ? grok.shell.getTableView(table.name) : grok.shell.addTableView(table);
-  // 1. Predicted vs Reference scatter plot
-  view.addViewer(predictedVersusReferenceScatterPlot(samplesNames, predict, plsOutput[0]));
-  // 2. Regression Coefficients Bar Chart
-  view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
-  // 3. Loading Scatter Plot
-  view.addViewer(loadingScatterPlot(features, plsOutput[4]));
-  // 4. Scores Scatter Plot
-  view.addViewer(scoresScatterPlot(samplesNames, plsOutput[2], plsOutput[3]));
-}

package/src/global.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import * as grokNamespace from 'datagrok-api/grok';
+import * as uiNamespace from 'datagrok-api/ui';
+import * as DGNamespace from 'datagrok-api/dg';
+import * as rxjsNamespace from 'rxjs';
+import $Namespace from 'cash-dom';
+declare global {
+    const grok: typeof grokNamespace;
+    const ui: typeof uiNamespace;
+    const DG: typeof DGNamespace;
+    const rjxs: typeof rxjsNamespace;
+    const $: typeof $Namespace;
+}

package/src/missing-values-imputation/ui-constants.ts CHANGED Viewed

@@ -26,7 +26,7 @@ export const COPY_SUFFIX = 'copy';
 /** UI titles */
 export enum TITLE {
-  KNN_IMPUTER = 'Impute',
+  KNN_IMPUTER = 'KNN Imputation',
   TABLE = 'Table',
   IN_PLACE = 'In-place',
   COLUMNS = 'Impute',
@@ -62,3 +62,5 @@ export enum HINT {
   IMPUTATION_SETTINGS = 'Simple imputation settings',
   KEEP_EMPTY = 'Defines whether to keep empty missing values failed to be imputed OR fill them using simple imputation',
 };
+export const MAX_INPUT_NAME_LENGTH = 15;

package/src/missing-values-imputation/ui.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
-import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT} from './ui-constants';
+import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT, MAX_INPUT_NAME_LENGTH} from './ui-constants';
 import {SUPPORTED_COLUMN_TYPES, METRIC_TYPE, DISTANCE_TYPE, MetricInfo, DEFAULT, MIN_NEIGHBORS,
   impute, getMissingValsIndices, areThereFails, imputeFailed} from './knn-imputer';
@@ -190,7 +190,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
   // Metrics components
   const featuresMetrics = new Map<string, MetricInfo>();
-  const metricInfoInputs = new Map<string, HTMLDivElement>();
+  const metricInfoInputs = new Map<string, HTMLElement>();
   const metricsDiv = ui.divV([]);
   metricsDiv.style.overflow = 'auto';
@@ -214,7 +214,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
     // The following should provide a slider (see th bug https://reddata.atlassian.net/browse/GROK-14431)
     const prop = DG.Property.fromOptions({
-      'name': name,
+      'name': name.length < MAX_INPUT_NAME_LENGTH ? name : name.slice(0, MAX_INPUT_NAME_LENGTH).concat('...'),
       'inputType': 'Float',
       'min': 0,
       'max': 10,
@@ -229,11 +229,11 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
       distInfo.weight = value ?? settings.defaultWeight;
       featuresMetrics.set(name, distInfo);
     });
-    weightInput.setTooltip(HINT.WEIGHT);
+    ui.tooltip.bind(weightInput.captionLabel, name);
+    ui.tooltip.bind(weightInput.input, HINT.WEIGHT);
-    const div = ui.divH([distTypeInput.root, weightInput.root]);
-    metricInfoInputs.set(name, div);
-    metricsDiv.append(div);
+    metricInfoInputs.set(name, weightInput.root);
+    metricsDiv.append(weightInput.root);
   });
   // The main dialog

package/src/package.ts CHANGED Viewed

@@ -73,23 +73,27 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
 //name: PCA
 //description: Principal component analysis (PCA)
 //input: dataframe table
-//input: column_list features {type: numerical}
-//input: int components = 2 {caption: Components} [Number of components.]
+//input: column_list features {type: numerical; allowNulls: false}
+//input: int components = 2 {caption: Components; nullable: false; min: 1} [Number of components.]
 //input: bool center = false [Indicating whether the variables should be shifted to be zero centered.]
 //input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
 export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number, center: boolean, scale: boolean): Promise<void> {
-  const pcaTable = await computePCA(table, features, components, center, scale);
-  addPrefixToEachColumnName('PC', pcaTable.columns);
-  if (table.id === null) // table is loaded from a local file
-    grok.shell.addTableView(pcaTable);
-  else {
-    const cols = table.columns;
-    for (const col of pcaTable.columns) {
-      col.name = cols.getUnusedName(col.name);
-      cols.add(col);
+  try {
+    const pcaTable = await computePCA(table, features, components, center, scale);
+    addPrefixToEachColumnName('PC', pcaTable.columns);
+    if (table.id === null) // table is loaded from a local file
+      grok.shell.addTableView(pcaTable);
+    else {
+      const cols = table.columns;
+      for (const col of pcaTable.columns) {
+        col.name = cols.getUnusedName(col.name);
+        cols.add(col);
+      }
     }
+  } catch (error) {
+    grok.shell.warning(`Failed to compute PCA: ${error instanceof Error ? error.message : 'platform issue'}`);
   }
 }
@@ -304,7 +308,7 @@ export async function MVA(): Promise<void> {
 //name: MVA demo
 //description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
-//meta.demoPath: Compute | Multivariate analysis
+//meta.demoPath: Compute | Multivariate Analysis
 export async function demoMultivariateAnalysis(): Promise<any> {
   await runDemoMVA();
 }
@@ -552,15 +556,15 @@ export function anova(): void {
   runOneWayAnova();
 }
-//top-menu: ML | Missing Values Imputation ...
+//top-menu: ML | Impute Missing Values...
 //name: KNN impute
-//desription: Missing values imputation using the k-nearest neighbors method
+//description: Missing values imputation using the k-nearest neighbors method (KNN)
 export function kNNImputation() {
   runKNNImputer();
 }
 //name: KNN imputation for a table
-//desription: Missing values imputation using the k-nearest neighbors method for a given table
+//description: Missing values imputation using the k-nearest neighbors method
 //input: dataframe table
 export async function kNNImputationForTable(table: DG.DataFrame) {
   await runKNNImputer(table);

package/src/pls/pls-constants.ts CHANGED Viewed

@@ -50,13 +50,13 @@ export enum HINT {
 /** Links to help */
 export enum LINK {
-  PLS = 'https://datagrok.ai/help/explore/multivariate-analysis/pls#pls-components',
-  MVA = 'https://datagrok.ai/help/explore/multivariate-analysis/pls',
-  MODEL = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/predicted-vs-reference',
-  COEFFS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/regression-coefficients',
-  LOADINGS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/loadings',
-  EXPL_VARS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/explained-variance',
-  SCORES = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/scores',
+  PLS = '/help/explore/multivariate-analysis/pls#pls-components',
+  MVA = '/help/explore/multivariate-analysis/pls',
+  MODEL = '/help/explore/multivariate-analysis/plots/predicted-vs-reference',
+  COEFFS = '/help/explore/multivariate-analysis/plots/regression-coefficients',
+  LOADINGS = '/help/explore/multivariate-analysis/plots/loadings',
+  EXPL_VARS = '/help/explore/multivariate-analysis/plots/explained-variance',
+  SCORES = '/help/explore/multivariate-analysis/plots/scores',
 }
 /** Components consts */

package/src/pls/pls-ml.ts CHANGED Viewed

@@ -276,7 +276,8 @@ export class PlsModel {
       xColumnName: columns.byIndex(shift).name,
       yColumnName: columns.byIndex(shift + (components > 1 ? 1 : 0)).name,
       markerType: DG.MARKER_TYPE.CIRCLE,
-      labels: TITLE.FEATURES,
+      //@ts-ignore
+      labelFormColumnNames: [TITLE.FEATURES],
       help: LINK.LOADINGS,
     }));

package/src/pls/pls-tools.ts CHANGED Viewed

@@ -161,10 +161,13 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
     yColumnName: pred.name,
     showRegressionLine: true,
     markerType: DG.MARKER_TYPE.CIRCLE,
-    labels: input.names?.name,
+    showLabels: 'Always',
     help: LINK.MODEL,
   }));
+  if ((input.names !== undefined) && (input.names !== null))
+    predictVsReferScatter.setOptions({labelFormColumnNames: [input.names?.name]});
   // 2. Regression Coefficients Bar Chart
   result.regressionCoefficients.name = TITLE.REGR_COEFS;
   const regrCoeffsBar = view.addViewer(DG.Viewer.barChart(buffer, {
@@ -184,7 +187,7 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
     xColumnName: `${TITLE.XLOADING}1`,
     yColumnName: `${TITLE.XLOADING}${result.xLoadings.length > 1 ? '2' : '1'}`,
     markerType: DG.MARKER_TYPE.CIRCLE,
-    labels: TITLE.FEATURE,
+    labelFormColumnNames: [TITLE.FEATURE],
     help: LINK.LOADINGS,
   }));
@@ -204,11 +207,13 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
     xColumnName: plsCols[0].name,
     yColumnName: (plsCols.length > 1) ? plsCols[1].name : result.uScores[0].name,
     markerType: DG.MARKER_TYPE.CIRCLE,
-    labels: input.names?.name,
     help: LINK.SCORES,
     showViewerFormulaLines: true,
   });
+  if ((input.names !== undefined) && (input.names !== null))
+    predictVsReferScatter.setOptions({labelFormColumnNames: [input.names?.name]});
   // 4.3) create lines & circles
   scoresScatter.meta.formulaLines.addAll(getLines(scoreNames));
   view.addViewer(scoresScatter);

package/src/tests/anova-tests.ts CHANGED Viewed

@@ -83,5 +83,5 @@ category('ANOVA', () => {
     // check F-critical
     expect(eq(analysis.fCritical, EXPECTED.F_CRIT), true, 'Incorrect F-critical');
-  }, {timeout: TIMEOUT, benchmark: true});
+  }, {timeout: TIMEOUT});
 });

package/src/tests/linear-methods-tests.ts CHANGED Viewed

@@ -16,7 +16,7 @@ const ROWS = 100;
 const ROWS_K = 100;
 const COLS = 100;
 const COMPONENTS = 3;
-const TIMEOUT = 4000;
+const TIMEOUT = 9000;
 const INDEP_COLS = 2;
 const DEP_COLS = 5;
 const ERROR = 0.1;
@@ -27,6 +27,11 @@ category('Principal component analysis', () => {
     await computePCA(df, df.columns, COMPONENTS, false, false);
   }, {timeout: TIMEOUT, benchmark: true});
+  test(`Performance: 1K rows, 5K cols, ${COMPONENTS} components`, async () => {
+    const df = grok.data.demo.randomWalk(1000, 5000);
+    await computePCA(df, df.columns, COMPONENTS, false, false);
+  }, {timeout: TIMEOUT, benchmark: true});
   test('Correctness', async () => {
     // Data
     const df = regressionDataset(ROWS, COMPONENTS, DEP_COLS);

package/src/utils.ts CHANGED Viewed

@@ -11,6 +11,9 @@ const FEATURES_COUNT_MIN = 1;
 const PERCENTAGE_MIN = 0;
 const PERCENTAGE_MAX = 100;
 const MAX_ELEMENTS_COUNT = 100000000;
+export const NIPALS_PREFER_COLS_COUNT = 900;
+const TINY = 0.000001;
 // Error messages
 const COMP_POSITVE_MES = 'components must be positive.';
@@ -180,3 +183,90 @@ export function getRowsOfNumericalColumnns(columnList: DG.ColumnList): any[][] {
   return output;
 }
+/** Return centered data */
+function centerDf(df: DG.DataFrame): DG.DataFrame {
+  const rowCount = df.rowCount;
+  for (const col of df.columns) {
+    if (col.isNumerical) {
+      const avg = col.stats.avg;
+      if (Math.abs(avg) > TINY) {
+        const raw = col.getRawData();
+        for (let i = 0; i < rowCount; ++i)
+          raw[i] -= avg;
+      }
+    }
+  }
+  return df;
+}
+/** Return scaled & centered data */
+function centerScaleDf(df: DG.DataFrame): DG.DataFrame {
+  const rowCount = df.rowCount;
+  for (const col of df.columns) {
+    if (col.isNumerical) {
+      const stdev = col.stats.stdev;
+      const avg = col.stats.avg;
+      const raw = col.getRawData();
+      if (stdev > 0) {
+        for (let i = 0; i < rowCount; ++i)
+          raw[i] = (raw[i] - avg) / stdev;
+      } else {
+        for (let i = 0; i < rowCount; ++i)
+          raw[i] -= avg;
+      }
+    }
+  }
+  return df;
+}
+/** Return scaled data */
+function scaleDf(df: DG.DataFrame): DG.DataFrame {
+  const rowCount = df.rowCount;
+  for (const col of df.columns) {
+    if (col.isNumerical) {
+      const stdev = col.stats.stdev;
+      if (Math.abs(stdev - 1) > TINY && (stdev > 0)) {
+        const raw = col.getRawData();
+        for (let i = 0; i < rowCount; ++i)
+          raw[i] /= stdev;
+      }
+    }
+  }
+  return df;
+}
+/** Return standartized dataframe */
+export function centerScaleDataFrame(df: DG.DataFrame, toCenter: boolean, toScale: boolean): DG.DataFrame {
+  if (toCenter) {
+    if (toScale)
+      return centerScaleDf(df);
+    else
+      return centerDf(df);
+  }
+  if (toScale)
+    return scaleDf(df);
+  return df;
+}
+/** Return table of columns with non-zero variance */
+export function extractNonConstantColsDf(features: DG.ColumnList): DG.DataFrame {
+  const cols: DG.Column[]= [];
+  for (const col of features) {
+    if ((col.stats.stdev > 0) && (col.stats.missingValueCount < 1))
+      cols.push(col);
+  }
+  return DG.DataFrame.fromColumns(cols);
+}