@datagrok/eda 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.1.18",
4
+ "version": "1.1.19",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
7
  "@datagrok-libraries/math": "^1.1.2",
package/scripts/func.json CHANGED
@@ -1 +1 @@
1
- {"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}}
1
+ {"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "yLoadings": {"type": "newFloatColumn", "numOfRows": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings', 'yLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}}
package/src/eda-tools.ts CHANGED
@@ -21,7 +21,7 @@ export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, c
21
21
  return await _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
22
22
  }
23
23
 
24
- // Partial least square regression (PLS)
24
+ // Partial least square regression (PLS): TO REMOVE
25
25
  export async function computePLS(
26
26
  table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number,
27
27
  ): Promise<any> {
package/src/package.ts CHANGED
@@ -5,16 +5,16 @@ import * as grok from 'datagrok-api/grok';
5
5
  import * as ui from 'datagrok-api/ui';
6
6
  import * as DG from 'datagrok-api/dg';
7
7
 
8
- import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
9
-
10
8
  import {_initEDAAPI} from '../wasm/EDAAPI';
11
- import {computePCA, computePLS} from './eda-tools';
12
- import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
13
- scoresScatterPlot, predictedVersusReferenceScatterPlot, addOneWayAnovaVizualization} from './eda-ui';
14
- import {carsDataframe, testDataForBinaryClassification} from './data-generators';
9
+ import {computePCA} from './eda-tools';
10
+ import {addPrefixToEachColumnName, addOneWayAnovaVizualization} from './eda-ui';
11
+ import {testDataForBinaryClassification} from './data-generators';
15
12
  import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
16
13
  getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
17
14
 
15
+ import {PLS_ANALYSIS} from './pls/pls-constants';
16
+ import {runMVA, runDemoMVA, getPlsAnalysis, PlsOutput} from './pls/pls-tools';
17
+
18
18
  import {oneWayAnova} from './stat-tools';
19
19
  import {getDbscanWorker} from '@datagrok-libraries/math';
20
20
 
@@ -71,7 +71,7 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
71
71
  //input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
72
72
  export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number, center: boolean, scale: boolean): Promise<void> {
73
73
  const pcaTable = await computePCA(table, features, components, center, scale);
74
- addPrefixToEachColumnName('PCA', pcaTable.columns);
74
+ addPrefixToEachColumnName('PC', pcaTable.columns);
75
75
 
76
76
  if (table.id === null) // table is loaded from a local file
77
77
  grok.shell.addTableView(pcaTable);
@@ -202,68 +202,43 @@ export async function MCL(df: DG.DataFrame, cols: DG.Column[], metrics: KnownMet
202
202
  return res?.sc;
203
203
  }
204
204
 
205
- //top-menu: ML | Analyze | Multivariate Analysis...
206
- //name: Multivariate Analysis (PLS)
207
- //description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
205
+ //name: PLS
206
+ //description: Compute partial least squares (PLS) regression analysis components: prediction, regression coefficients, T- & U-scores, X-loadings.
208
207
  //input: dataframe table
209
- //input: column names
210
208
  //input: column_list features {type: numerical}
211
209
  //input: column predict {type: numerical}
212
210
  //input: int components = 3
213
- export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
214
- predict: DG.Column, components: number): Promise<void> {
215
- const plsResults = await computePLS(table, features, predict, components);
216
- addPLSvisualization(table, names, features, predict, plsResults);
211
+ //input: column names {type: string}
212
+ //output: object plsResults
213
+ export async function PLS(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number, names: DG.Column): Promise<PlsOutput> {
214
+ return await getPlsAnalysis({
215
+ table: table,
216
+ features: features,
217
+ predict: predict,
218
+ components: components,
219
+ names: names,
220
+ });
221
+ }
222
+
223
+ //top-menu: ML | Analyze | PLS...
224
+ //name: topMenuPLS
225
+ //description: Compute partial least squares (PLS) regression components. They maximally summarize the variation of the predictors while maximizing correlation with the response variable.
226
+ export async function topMenuPLS(): Promise<void> {
227
+ await runMVA(PLS_ANALYSIS.COMPUTE_COMPONENTS);
228
+ }
229
+
230
+ //top-menu: ML | Analyze | Multivariate Analysis...
231
+ //name: multivariateAnalysis
232
+ //description: Multidimensional data analysis using partial least squares (PLS) regression.
233
+ export async function MVA(): Promise<void> {
234
+ await runMVA(PLS_ANALYSIS.PERFORM_MVA);
217
235
  }
218
236
 
219
237
  //name: MVA demo
220
- //description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
238
+ //description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
221
239
  //meta.demoPath: Compute | Multivariate analysis
222
- //meta.isDemoScript: True
223
240
  export async function demoMultivariateAnalysis(): Promise<any> {
224
- const demoScript = new DemoScript('Partial least squares regression',
225
- 'Analysis of multidimensional data.');
226
-
227
- const cars = carsDataframe();
228
-
229
- const components = 3;
230
- const names = cars.columns.byName('model');
231
- const predict = cars.columns.byName('price');
232
- const features = cars.columns.remove('price').remove('model');
233
- const plsOutput = await computePLS(cars, features, predict, components);
234
-
235
- const sourceCars = carsDataframe();
236
- sourceCars.name = 'Cars';
237
- let view: any;
238
- let dialog: any;
239
-
240
- await demoScript
241
- .step('Data', async () => {
242
- grok.shell.addTableView(sourceCars);
243
- view = grok.shell.getTableView(sourceCars.name);
244
- }, {description: 'Each car has many features - patterns extraction is complicated.', delay: 0})
245
- .step('Model', async () => {
246
- dialog = ui.dialog({title: 'Multivariate Analysis (PLS)'})
247
- .add(ui.tableInput('Table', sourceCars))
248
- .add(ui.columnsInput('Features', cars, features.toList, {available: undefined, checked: features.names()}))
249
- .add(ui.columnInput('Names', cars, names, undefined))
250
- .add(ui.columnInput('Predict', cars, predict, undefined))
251
- .add(ui.intInput('Components', components, undefined))
252
- .onOK(() => {
253
- grok.shell.info('Multivariate analysis has been already performed.');
254
- })
255
- .show({x: 400, y: 140});
256
- }, {description: 'Predict car price by its other features.', delay: 0})
257
- .step('Regression coeffcicients', async () => {
258
- dialog.close();
259
- view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
260
- },
261
- {description: 'The feature "diesel" affects the price the most.', delay: 0})
262
- .step('Scores', async () => {view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]));},
263
- {description: 'Similarities & dissimilarities: alfaromeo and mercedes are different.', delay: 0})
264
- .step('Prediction', async () => {view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]));},
265
- {description: 'Closer to the line means better price prediction.', delay: 0})
266
- .start();
241
+ runDemoMVA();
267
242
  }
268
243
 
269
244
  //name: Generate linear separable dataset
@@ -0,0 +1,129 @@
1
+ // PLS specific constants
2
+
3
+ /** Types of analysis using PLS */
4
+ export enum PLS_ANALYSIS {
5
+ COMPUTE_COMPONENTS,
6
+ PERFORM_MVA,
7
+ DEMO,
8
+ }
9
+
10
+ /** Errors & warnings */
11
+ export enum ERROR_MSG {
12
+ NO_DF = 'No dataframe is opened',
13
+ NO_COLS = 'No numeric columns without missing values',
14
+ ONE_COL = 'No columns to be used as features (just one numeric columns without missing values)',
15
+ EMPTY_DF = 'Dataframe is empty',
16
+ }
17
+
18
+ /** Widget titles */
19
+ export enum TITLE {
20
+ PREDICT = 'Predict',
21
+ USING = 'Using',
22
+ COMPONENTS = 'Components',
23
+ PLS = 'PLS',
24
+ MVA = 'Multivariate Analysis (PLS)',
25
+ RUN = 'RUN',
26
+ NAMES = 'Names',
27
+ MODEL = 'Observed vs. Predicted',
28
+ FEATURE = 'Feature',
29
+ REGR_COEFS = 'Regression Coefficients',
30
+ XLOADING = 'x.loading.p',
31
+ LOADINGS = 'Loadings',
32
+ XSCORE = 'x.score.t',
33
+ YSCORE = 'y.score.u',
34
+ SCORES = 'Scores',
35
+ EXPL_VAR = 'Explained Variance',
36
+ EXPLORE = 'Explore',
37
+ }
38
+
39
+ /** Tooltips */
40
+ export enum HINT {
41
+ PREDICT = 'Column with the response variable',
42
+ FEATURES = 'Predictors (features)',
43
+ COMPONENTS = 'Number of PLS components',
44
+ PLS = 'Compute PLS components',
45
+ MVA = 'Perform multivariate analysis',
46
+ NAMES = 'Names of data samples',
47
+ }
48
+
49
+ /** Links to help */
50
+ export enum LINK {
51
+ PLS = 'https://datagrok.ai/help/explore/multivariate-analysis/pls#pls-components',
52
+ MVA = 'https://datagrok.ai/help/explore/multivariate-analysis/pls',
53
+ MODEL = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/predicted-vs-reference',
54
+ COEFFS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/regression-coefficients',
55
+ LOADINGS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/loadings',
56
+ EXPL_VARS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/explained-variance',
57
+ SCORES = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/scores',
58
+ }
59
+
60
+ /** Components consts */
61
+ export enum COMPONENTS {
62
+ DEFAULT = 3,
63
+ MIN = 1,
64
+ }
65
+
66
+ /** Items used for naming results */
67
+ export enum RESULT_NAMES {
68
+ PREFIX = 'PLS',
69
+ SUFFIX = '(predicted)',
70
+ COMP = 'component',
71
+ COMPS = 'components',
72
+ }
73
+
74
+ /** Indeces of wasm-computation output */
75
+ export enum WASM_OUTPUT_IDX {
76
+ PREDICTION = 0,
77
+ REGR_COEFFS = 1,
78
+ T_SCORES = 2,
79
+ U_SCORES = 3,
80
+ X_LOADINGS = 4,
81
+ Y_LOADINGS = 5,
82
+ }
83
+
84
+ export const INT = 'Int';
85
+ export const TIMEOUT = 6;
86
+ export const RADIUS = [0.49, 0.79, 0.99];
87
+ export const LINE_WIDTH = 1;
88
+ export const X_COORD = 200;
89
+ export const Y_COORD = 200;
90
+ export const DELAY = 2000;
91
+
92
+ /** Curves colors */
93
+ export enum COLOR {
94
+ AXIS = '#838383',
95
+ CIRCLE = '#0000FF',
96
+ };
97
+
98
+ /** Intro markdown for demo app */
99
+ export const DEMO_INTRO_MD = `# Data
100
+ Each car has many features - patterns extraction is complicated.
101
+
102
+ # Model
103
+ Predict car price by its other features.
104
+
105
+ # Try
106
+ Press 'RUN' to perform multivariate analysis using partial least squares
107
+ ([PLS](https://en.wikipedia.org/wiki/Partial_least_squares_regression)) regression.
108
+
109
+ # Essence
110
+ The method finds the latent factors that
111
+
112
+ * capture the maximum variance in the features
113
+ * maximize correlation with the response variable`;
114
+
115
+ /** Description of demo results: wizard components */
116
+ export const DEMO_RESULTS = [
117
+ {caption: TITLE.MODEL, text: 'Closer to the line means better price prediction.'},
118
+ {caption: TITLE.SCORES, text: 'The latent factor values for each data sample reflect the similarities and dissimilarities among observations.'},
119
+ {caption: TITLE.LOADINGS, text: 'The impact of each feature on the latent factors: higher loading means stronger influence.'},
120
+ {caption: TITLE.REGR_COEFS, text: 'Parameters of the obtained linear model: features make different contribution to the prediction.'},
121
+ {caption: TITLE.EXPL_VAR, text: 'How well the latent components fit source data: closer to one means better fit.'},
122
+ ];
123
+
124
+ /** Form results markdown for demo app */
125
+ export const DEMO_RESULTS_MD = DEMO_RESULTS.map((item) => `# ${item.caption}\n\n${item.text}`)
126
+ .join('\n\n') + `\n\n# Learn more
127
+
128
+ * [Multivariate analysis](${LINK.MVA}),
129
+ * [ANOVA](https://datagrok.ai/help/explore/anova)`;