npm - @datagrok/eda - Versions diffs - 1.1.28 → 1.1.29 - Mend

@datagrok/eda 1.1.28 → 1.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +5 -0
package/README.md +1 -0
package/dist/{05e5e0770f54f07e9474.wasm → 12a82b8001995d426ed2.wasm} +0 -0
package/dist/23.js +1 -1
package/dist/23.js.map +1 -1
package/dist/501.js +2 -0
package/dist/501.js.map +1 -0
package/dist/727.js +2 -0
package/dist/727.js.map +1 -0
package/dist/package.js +1 -1
package/dist/package.js.map +1 -1
package/package.json +5 -5
package/scripts/command.txt +1 -1
package/scripts/func.json +664 -1
package/scripts/module.json +1 -1
package/src/data-generators.ts +1 -44
package/src/missing-values-imputation/ui.ts +16 -6
package/src/package.ts +60 -78
package/src/regression.ts +1 -1
package/src/softmax-classifier.ts +412 -0
package/src/svm.ts +11 -33
package/src/workers/softmax-worker.ts +146 -0
package/wasm/EDA.js +55 -1
package/wasm/EDA.wasm +0 -0
package/wasm/EDAAPI.js +15 -0
package/wasm/EDAForWebWorker.js +1 -1
package/wasm/regression.h +2 -5
package/wasm/softmax-api.cpp +49 -0
package/wasm/softmax.h +156 -0
package/wasm/workers/fitSoftmaxWorker.js +13 -0
package/webpack.config.js +3 -2

package/scripts/module.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "EDA",
     "folder": "../wasm",
-    "source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp", "regression-api.cpp"],
+    "source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp", "regression-api.cpp", "softmax-api.cpp"],
     "optimizationMode": "-O3",
     "packageFile": "../src/package.ts",
     "packageJsonFile": "../package.json",

package/src/data-generators.ts CHANGED Viewed

@@ -6,15 +6,7 @@ import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
-import {checkGeneratorSVMinputs} from './utils';
-import {_generateDatasetInWebWorker} from '../wasm/EDAAPI';
-const SVM_GEN_FEATURES_INDEX = 0;
-const SVM_GEN_LABELS_INDEX = 1;
-const SVM_FEATURE_NAME = 'Feature #';
-const SVM_LABEL_NAME = 'Label';
-// Returns the dataframe "cars"
+/**  Returns the dataframe "cars" */
 export function carsDataframe(): DG.DataFrame {
   return DG.DataFrame.fromColumns(
     [
@@ -37,38 +29,3 @@ export function carsDataframe(): DG.DataFrame {
       DG.Column.fromInt32Array('price', new Int32Array([16500, 17710, 16430, 6575, 7957, 6229, 7129, 8845, 6785, 35550, 7395, 31600, 16503, 5389, 7349, 7299, 6229, 13860, 37028, 12170, 7775, 5348, 7898, 9989, 10698, 7775, 13295, 12940, 19045, 22470])),
     ]);
 } // carsDataframe
-// Generate dataset for testing binary classifiers
-export async function testDataForBinaryClassification(kernel: number, kernelParams: Array<number>,
-  name: string, samplesCount: number, featuresCount: number, min: number,
-  max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
-  // check inputs
-  checkGeneratorSVMinputs(samplesCount, featuresCount, min, max, violatorsPercentage);
-  // kernel params column
-  const kernelParamsCol = DG.Column.fromList('double', 'kernelParams', kernelParams);
-  // CALL WASM-COMPUTATIONS
-  let _output: any;
-  const _promise = _generateDatasetInWebWorker(kernel, kernelParamsCol,
-    samplesCount, featuresCount, min, max, violatorsPercentage);
-  await _promise.then(
-    (_result) => {_output = _result;},
-    (_error) => {throw new Error(`Error: ${_error}`);},
-  );
-  // Rename labels column
-  _output[SVM_GEN_LABELS_INDEX].name = SVM_LABEL_NAME;
-  // Rename feature columns
-  for (const col of _output[SVM_GEN_FEATURES_INDEX])
-    col.name = SVM_FEATURE_NAME + col.name;
-  // Create dataframe
-  const df = DG.DataFrame.fromColumns(_output[SVM_GEN_FEATURES_INDEX]);
-  df.name = name;
-  df.columns.add(_output[SVM_GEN_LABELS_INDEX]);
-  return df;
-} // testDataForMachineLearning

package/src/missing-values-imputation/ui.ts CHANGED Viewed

@@ -109,8 +109,10 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
   // Distance components
   let distType = DISTANCE_TYPE.EUCLIDEAN;
-  const distTypeInput: DG.ChoiceInput<DISTANCE_TYPE> = ui.input.choice(TITLE.DISTANCE, {value: distType,
-    items: [DISTANCE_TYPE.EUCLIDEAN, DISTANCE_TYPE.MANHATTAN], onValueChanged: () => distType = distTypeInput.value ?? DISTANCE_TYPE.EUCLIDEAN}) as DG.ChoiceInput<DISTANCE_TYPE>;
+  const distTypeInput: DG.ChoiceInput<DISTANCE_TYPE> = ui.input.choice(TITLE.DISTANCE, {
+    value: distType,
+    items: [DISTANCE_TYPE.EUCLIDEAN, DISTANCE_TYPE.MANHATTAN],
+    onValueChanged: () => distType = distTypeInput.value ?? DISTANCE_TYPE.EUCLIDEAN}) as DG.ChoiceInput<DISTANCE_TYPE>;
   distTypeInput.setTooltip(HINT.DISTANCE);
   // Target columns components (cols with missing values to be imputed)
@@ -193,8 +195,15 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
     distTypeInput.root.hidden = true; // this input will be used further
     // The following should provide a slider (see th bug https://reddata.atlassian.net/browse/GROK-14431)
-    // @ts-ignore
-    const prop = DG.Property.fromOptions({'name': name, 'inputType': 'Float', 'min': 0, 'max': 10, 'showSlider': true, 'step': 1});
+    const prop = DG.Property.fromOptions({
+      'name': name,
+      'inputType': 'Float',
+      'min': 0,
+      'max': 10,
+      // @ts-ignore
+      'showSlider': true,
+      'step': 1,
+    });
     const weightInput = ui.input.forProperty(prop);
     weightInput.value = settings.defaultWeight;
     weightInput.onChanged(() => {
@@ -239,7 +248,8 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
     .onOK(() => {
       okClicked = true;
       dlg.close();
-      availableFeatureColsNames.filter((name) => !selectedFeatureColNames.includes(name)).forEach((name) => featuresMetrics.delete(name));
+      availableFeatureColsNames.filter((name) => !selectedFeatureColNames.includes(name))
+        .forEach((name) => featuresMetrics.delete(name));
       try {
         const failedToImpute = impute(df!, targetColNames, featuresMetrics, misValsInds, distType, neighbors, inPlace);
@@ -256,5 +266,5 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
       }
     }).onClose.subscribe(() => !okClicked && reject());
-    return promise;
+  return promise;
 } // runKNNImputer

package/src/package.ts CHANGED Viewed

@@ -8,7 +8,6 @@ import * as DG from 'datagrok-api/dg';
 import {_initEDAAPI} from '../wasm/EDAAPI';
 import {computePCA} from './eda-tools';
 import {addPrefixToEachColumnName, addOneWayAnovaVizualization} from './eda-ui';
-import {testDataForBinaryClassification} from './data-generators';
 import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
   getTrainedModel, getPrediction, isApplicableSVM, isInteractiveSVM, showTrainReport, getPackedModel} from './svm';
@@ -31,7 +30,8 @@ import {MCLEditor} from '@datagrok-libraries/ml/src/MCL/mcl-editor';
 import {markovCluster} from '@datagrok-libraries/ml/src/MCL/clustering-view';
 import {MCL_OPTIONS_TAG, MCLSerializableOptions} from '@datagrok-libraries/ml/src/MCL';
-import {getLinearRegressionParams, getPredictionByLinearRegression, getTestDatasetForLinearRegression} from './regression';
+import {getLinearRegressionParams, getPredictionByLinearRegression} from './regression';
+import {SoftmaxClassifier} from './softmax-classifier';
 export const _package = new DG.Package();
@@ -299,37 +299,6 @@ export async function demoMultivariateAnalysis(): Promise<any> {
   runDemoMVA();
 }
-//name: Generate linear separable dataset
-//description: Generates linear separble dataset for testing binary classificators
-//input: string name = 'Data' {caption: name; category: Dataset}
-//input: int samplesCount = 1000 {caption: samples; category: Size}
-//input: int featuresCount = 2 {caption: features; category: Size}
-//input: double min = -39 {caption: min; category: Range}
-//input: double max = 173 {caption: max; category: Range}
-//input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
-//output: dataframe df
-export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
-  min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
-  return await testDataForBinaryClassification(LINEAR, [0, 0], name, samplesCount, featuresCount,
-    min, max, violatorsPercentage);
-}
-//name: Generate linear non-separable dataset
-//description: Generates linear non-separble dataset for testing binary classificators
-//input: string name = 'Data' {caption: name; category: Dataset}
-//input: double sigma = 90  {caption: sigma; category: Hyperparameters} [RBF-kernel paramater]
-//input: int samplesCount = 1000 {caption: samples; category: Size}
-//input: int featuresCount = 2 {caption: features; category: Size}
-//input: double min = -39 {caption: min; category: Range}
-//input: double max = 173 {caption: max; category: Range}
-//input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
-//output: dataframe df
-export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
-  featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
-  return await testDataForBinaryClassification(RBF, [sigma, 0], name, samplesCount, featuresCount,
-    min, max, violatorsPercentage);
-}
 //name: trainLinearKernelSVM
 //meta.mlname: linear kernel LS-SVM
 //meta.mlrole: train
@@ -593,48 +562,6 @@ export async function kNNImputationForTable(table: DG.DataFrame) {
   await runKNNImputer(table);
 }
-//name: linearRegression
-//description: Linear Regression demo
-//input: dataframe table
-//input: column_list features {type: numerical}
-//input: column target {type: numerical}
-//input: bool plot = true {caption: plot}
-export async function linearRegression(table: DG.DataFrame, features: DG.ColumnList, target: DG.Column, plot: boolean): Promise<void> {
-  const t1 = performance.now();
-  const params = await getLinearRegressionParams(features, target);
-  const t2 = performance.now();
-  console.log(`Fit: ${t2 - t1} ms.`);
-  const prediction = getPredictionByLinearRegression(features, params);
-  console.log(`Predict: ${performance.now() - t2} ms.`);
-  prediction.name = table.columns.getUnusedName(prediction.name);
-  table.columns.add(prediction);
-  if (plot) {
-    const view = grok.shell.tableView(table.name);
-    view.addViewer(DG.VIEWER.SCATTER_PLOT, {
-      xColumnName: target.name,
-      yColumnName: prediction.name,
-      showRegressionLine: true,
-    });
-  }
-}
-//name: generateDatasetForLinearRegressionTest
-//description: Create demo dataset for linear regression
-//input: int rowCount = 10000 {min: 1000; max: 10000000; step: 10000}
-//input: int colCount = 10 {min: 1; max: 1000; step: 10}
-//input: double featuresScale = 10 {min: -1000; max: 1000; step: 10}
-//input: double featuresBias = 10 {min: -1000; max: 1000; step: 10}
-//input: double paramsScale = 10 {min: -1000; max: 1000; step: 10}
-//input: double paramsBias = 10 {min: -1000; max: 1000; step: 10}
-//output: dataframe table
-export function generateDatasetForLinearRegressionTest(rowCount: number, colCount: number,
-  featuresScale: number, featuresBias: number, paramsScale: number, paramsBias: number): DG.DataFrame {
-  return getTestDatasetForLinearRegression(rowCount, colCount, featuresScale, featuresBias, paramsScale, paramsBias);
-}
 //name: trainLinearRegression
 //meta.mlname: Linear Regression
 //meta.mlrole: train
@@ -671,10 +598,8 @@ export function isApplicableLinearRegression(df: DG.DataFrame, predictColumn: DG
     if (!col.matches('numerical'))
       return false;
   }
-  if (!predictColumn.matches('numerical'))
-    return false;
-  return true;
+  return predictColumn.matches('numerical');
 }
 //name: isInteractiveLinearRegression
@@ -686,3 +611,60 @@ export function isApplicableLinearRegression(df: DG.DataFrame, predictColumn: DG
 export function isInteractiveLinearRegression(df: DG.DataFrame, predictColumn: DG.Column): boolean {
   return df.rowCount <= 100000;
 }
+//name: trainSoftmax
+//meta.mlname: Softmax
+//meta.mlrole: train
+//input: dataframe df
+//input: column predictColumn
+//input: double rate = 1.0 {category: Hyperparameters; min: 0.001; max: 20} [Learning rate]
+//input: int iterations = 100 {category: Hyperparameters; min: 1; max: 10000; step: 10} [Fitting iterations count]
+//input: double penalty = 0.1 {category: Hyperparameters; min: 0.0001; max: 1} [Regularization rate]
+//input: double tolerance = 0.001 {category: Hyperparameters; min: 0.00001; max: 0.1} [Fitting tolerance]
+//output: dynamic model
+export async function trainSoftmax(df: DG.DataFrame, predictColumn: DG.Column, rate: number,
+  iterations: number, penalty: number, tolerance: number): Promise<Uint8Array> {
+  const features = df.columns;
+  const model = new SoftmaxClassifier({
+    classesCount: predictColumn.categories.length,
+    featuresCount: features.length,
+  });
+  await model.fit(features, predictColumn, rate, iterations, penalty, tolerance);
+  return model.toBytes();
+}
+//name: applySoftmax
+//meta.mlname: Softmax
+//meta.mlrole: apply
+//input: dataframe df
+//input: dynamic model
+//output: dataframe table
+export function applySoftmax(df: DG.DataFrame, model: any): DG.DataFrame {
+  const features = df.columns;
+  const unpackedModel = new SoftmaxClassifier(undefined, model);
+  return DG.DataFrame.fromColumns([unpackedModel.predict(features)]);
+}
+//name: isApplicableSoftmax
+//meta.mlname: Softmax
+//meta.mlrole: isApplicable
+//input: dataframe df
+//input: column predictColumn
+//output: bool result
+export function isApplicableSoftmax(df: DG.DataFrame, predictColumn: DG.Column): boolean {
+  return SoftmaxClassifier.isApplicable(df.columns, predictColumn);
+}
+//name: isInteractiveSoftmax
+//meta.mlname: Softmax
+//meta.mlrole: isInteractive
+//input: dataframe df
+//input: column predictColumn
+//output: bool result
+export function isInteractiveSoftmax(df: DG.DataFrame, predictColumn: DG.Column): boolean {
+  return SoftmaxClassifier.isInteractive(df.columns, predictColumn);
+}

package/src/regression.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-// Linear regression tools
+// Regression tools
 import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';