npm - @datagrok/eda - Versions diffs - 1.0.3 - Mend

@datagrok/eda 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/README.md +3 -0
package/detectors.js +9 -0
package/dist/111.js +2 -0
package/dist/146.js +2 -0
package/dist/155.js +2 -0
package/dist/355.js +2 -0
package/dist/584.js +2 -0
package/dist/604.js +2 -0
package/dist/632.js +2 -0
package/dist/645.js +2 -0
package/dist/93.js +2 -0
package/dist/d711f70338306e5bddc4.wasm +0 -0
package/dist/package-test.js +2 -0
package/dist/package.js +2 -0
package/package.json +49 -0
package/package.png +0 -0
package/scripts/command.txt +1 -0
package/scripts/exportForTS.py +862 -0
package/scripts/exportForTSConstants.py +93 -0
package/scripts/func.json +1 -0
package/scripts/module.json +11 -0
package/src/EDAtools.ts +46 -0
package/src/EDAui.ts +118 -0
package/src/dataGenerators.ts +74 -0
package/src/demos.ts +38 -0
package/src/package-test.ts +12 -0
package/src/package.ts +248 -0
package/src/svm.ts +485 -0
package/src/utils.ts +51 -0
package/tsconfig.json +71 -0
package/wasm/EDA.js +443 -0
package/wasm/EDA.wasm +0 -0
package/wasm/EDAAPI.js +131 -0
package/wasm/EDAForWebWorker.js +21 -0
package/wasm/PCA/PCA.cpp +151 -0
package/wasm/PCA/PCA.h +48 -0
package/wasm/PLS/PLS.h +64 -0
package/wasm/PLS/pls.cpp +393 -0
package/wasm/callWasm.js +475 -0
package/wasm/callWasmForWebWorker.js +706 -0
package/wasm/dataGenerators.h +169 -0
package/wasm/dataMining.h +116 -0
package/wasm/pcaExport.cpp +64 -0
package/wasm/plsExport.cpp +75 -0
package/wasm/svm.h +608 -0
package/wasm/svmApi.cpp +323 -0
package/wasm/workers/errorWorker.js +13 -0
package/wasm/workers/generateDatasetWorker.js +13 -0
package/wasm/workers/normalizeDatasetWorker.js +13 -0
package/wasm/workers/partialLeastSquareRegressionWorker.js +13 -0
package/wasm/workers/predictByLSSVMWorker.js +13 -0
package/wasm/workers/principalComponentAnalysisWorker.js +13 -0
package/wasm/workers/trainAndAnalyzeLSSVMWorker.js +13 -0
package/wasm/workers/trainLSSVMWorker.js +13 -0
package/webpack.config.js +37 -0

package/wasm/dataGenerators.h ADDED Viewed

@@ -0,0 +1,169 @@
+// dataGenerators.h
+// Tools for generating datasets for testing SVM.
+#ifndef DATA_GENERATORS_H
+#define DATA_GENERATORS_H
+#include<cstdlib>
+using namespace std;
+#include "../../../../Eigen/Eigen/Dense"
+using namespace Eigen;
+#include "svm.h"
+namespace svm
+{
+	// Constants for random generation
+	const unsigned SEED = 10214313;
+	const int RAND_SCALE = 1000;
+	/* Change data labels by opposite values.
+	   Each label value is replaced by the corresponding opposite one
+	   with the specified probability
+		  labels - data labels
+		  samplesCount - number of labels
+		  changeProbability - probability that each label is changed */
+	template<typename Float>
+	int changeLabels(Float* labels, int samplesCount, Float changeProbability) noexcept
+	{
+		using namespace svm;
+		// check probability value
+		if ((changeProbability < static_cast<Float>(0)) ||
+			(changeProbability > static_cast<Float>(1)))
+			return INCORRECT_PROBABILITY;
+		// check size
+		if (samplesCount < 1)
+			return INCORRECT_SIZE;
+		// randomize
+		srand(SEED + samplesCount);
+		// change values in a random manner
+		for (int i = 0; i < samplesCount; ++i)
+			if (static_cast<Float>(rand() % RAND_SCALE) / RAND_SCALE < changeProbability)
+				labels[i] = -labels[i];
+		return NO_ERRORS;
+	} // changeLabels
+	/* Generate dataset: separable case. Features are generated randomly using the uniform distribution.
+       Each feature belongs to the corresponding segment [min, max].
+	      kernel - type of kernel
+	      kernelParams - parameters of kernel
+	      featuresCount - number of features, i.e. dimension
+	      samplesCount - number of the generated samples
+	      minVal - min value
+	      maxVal - max value
+	      data - generated data
+	      labels - generated labels
+    WARNING. Memory for data and labels must be allocated outside this function.  */
+	template<typename Float>
+	int generateSeparable(int kernel, float kernelParams[MAX_NUM_OF_KERNEL_PARAM],
+		int featuresCount, int samplesCount,
+		Float minVal, Float maxVal,
+		Float* data, Float* labels) noexcept
+	{
+		using namespace svm;
+		// check parameters correctness
+		if (!areKernelParametersCorrect(kernel, kernelParams))
+			return INCORRECT_PARAMETER_OF_KERNEL;
+		// check sizes
+		if ((featuresCount < 1) || (samplesCount < 1))
+			return INCORRECT_SIZE;
+		// randomize
+		srand(SEED + samplesCount + featuresCount);
+		// assign data pointer with a matrix
+		Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> X(data, samplesCount, featuresCount);
+		// generate random matrix: values from [-1, 1] are generated
+		X = Matrix<Float, Dynamic, Dynamic, ColMajor>::Random(samplesCount, featuresCount);
+		// generate core vector
+		RowVector<float, Dynamic> v(featuresCount);
+		// linear transform coefficients
+		Float c1 = (maxVal - minVal) / 2;
+		Float c2 = (maxVal + minVal) / 2;
+		// rescale data: each feature should belong to the correspondent [min, max] segment
+		for (int i = 0; i < featuresCount; ++i)
+		{
+			// linear [-1,1]-to-[min,max] transform
+			X.col(i) = X.col(i) * c1 + c2 * Vector<Float, Dynamic>::Ones(samplesCount);
+			Float randNum = static_cast<Float>(-0.5) + static_cast<Float>(rand() % RAND_SCALE) / RAND_SCALE;
+			v(i) = randNum * c1 + c2;
+		}
+		// bias value
+		Float bias = kernelFunc(kernel, kernelParams, v, v);
+		// This is a heruistics
+		if (kernel == RBF)
+			bias /= 2;
+		// auxilliry vector
+		RowVector<float, Dynamic> w(featuresCount);
+		// compute labels
+		for (int i = 0; i < samplesCount; ++i)
+		{
+			w = X.row(i);
+			Float val = kernelFunc(kernel, kernelParams, w, v) - bias;
+			labels[i] = (val > static_cast<Float>(0)) ? static_cast<Float>(1) : static_cast<Float>(-1);
+		}
+		return NO_ERRORS;
+	} // generateSeparable
+	/* Generate dataset: non-separable case.
+	   Features are generated randomly using the uniform distribution.
+	   Each feature belongs to the corresponding segment [min, max].
+		  kernel - type of kernel
+		  kernelParams - parameters of kernel
+		  featuresCount - number of features, i.e. dimension
+		  samplesCount - number of the generated samples
+		  minVal - min value
+		  maxVal - max value
+		  data - generated data
+		  labels - generated labels
+		  violatorsPercentage - percentage of values that violate separability
+	   WARNINGS. 1. Memory for data and labels must be allocated outside this function.
+				 2. Since violators are generated randomly, actual number of vilators
+					may differ from the given percentage. */
+	template<typename Float>
+	int generateNonSeparable(int kernel, float kernelParams[MAX_NUM_OF_KERNEL_PARAM],
+		int featuresCount, int samplesCount,
+		Float minVal, Float maxVal,
+		Float* data, Float* labels,
+		Float violatorsPercentage) noexcept
+	{
+		using namespace svm;
+		// check percentage
+		if ((violatorsPercentage < static_cast<Float>(0)) ||
+			(violatorsPercentage > static_cast<Float>(100)))
+			return INCORRECT_PERCENTAGE;
+		// generate separable dataset
+		int resCode = generateSeparable(kernel, kernelParams, featuresCount, samplesCount,
+			minVal, maxVal, data, labels);
+		if (resCode != NO_ERRORS)
+			return resCode;
+		// create violators
+		return changeLabels(labels, samplesCount, violatorsPercentage / 100);
+	} // generateNonSeparable
+}; // svm
+#endif // DATA_GENERATORS_H

package/wasm/dataMining.h ADDED Viewed

@@ -0,0 +1,116 @@
+// dataMining.h
+// Data mining tools
+#ifndef DATA_MINING_H
+#define DATA_MINING_H
+#include "../../../../Eigen/Eigen/Dense"
+using namespace Eigen;
+// data mining tools
+namespace dmt {
+	enum ResultCode {
+		NO_ERRORS = 0,
+		UNKNOWN_PROBLEM,
+		INCORRECT_SIZE
+   	};
+	// confusion matrix constants
+	const int CONFUSION_MATR_SIZE = 4;
+	const int TRUE_POSITIVE_INDEX = 0;
+	const int FALSE_NEGATIVE_INDEX = 1;
+	const int FALSE_POSITIVE_INDEX = 2;
+	const int TRUE_NEGATIVE_INDEX = 3;
+	/* Create normalized dataset from columns data.
+	   Each column of the ouput is centered and normalized.
+		  columsData - pointer to columns data
+		  rowCount - number of rows
+		  colCount - number of columns
+		  normalizedDataRows - pointer to normalized data rows
+		  means - mean values of source columns
+		  stdDevs - standard deviations of source columns
+	 REMARKS. 1. In DATAGROK, column-oriented data storage is used,
+	        	 but row-oriented approach is preffered in SVM, and
+				 this function provides it.
+			  2. Row-oriented data storage is a result. */
+	template<typename Float>
+	int getNormalizedDataset(Float* columsData, int rowCount, int colCount,
+		Float* normalizedDataRows, Float* means, Float* stdDevs) noexcept
+	{
+		// check sizes
+		if ((rowCount < 1) || (colCount < 1))
+			return INCORRECT_SIZE;
+		// pointers-to-matrices assignment
+		Map < Matrix<Float, Dynamic, Dynamic, ColMajor>> A(columsData, rowCount, colCount);
+		Map < Matrix<Float, Dynamic, Dynamic, RowMajor>> B(normalizedDataRows, rowCount, colCount);
+		Map < Vector<Float, Dynamic> > mu(means, colCount);
+		Map < Vector<Float, Dynamic> > sigma(stdDevs, colCount);
+		// compute mean values & standard deviations
+		for (int i = 0; i < colCount; ++i)
+		{
+			mu(i) = A.col(i).mean();
+			sigma(i) = sqrt(A.col(i).squaredNorm() / rowCount - mu(i) * mu(i));
+		}
+		// get A centered
+		B = A.rowwise() - mu.transpose();
+		// norm columns of B
+		for (int i = 0; i < colCount; ++i)
+		{
+			Float current = sigma(i);
+			if (current > static_cast<Float>(0))
+				B.col(i) /= current;
+		}
+		return NO_ERRORS;
+	} // createNormalizedDataset
+	/* Compare labels and their prediciotns: BINARY CLASSIFICATION CASE.
+	      labels - training labels
+		  predictions - predicted labels
+		  correctness - array of mistakes (1 - correct prediction, 0 - incorrect prediction)
+		  samplesCount - number of training samples
+		  confusionMatrix - confusion matrix  */
+	template<typename Float>
+	int compareLabelsAndTheirPredictions(Float* labels, Float* predictions,
+		Float* correctness, int samplesCount,
+		int confusionMatrix[CONFUSION_MATR_SIZE])
+	{
+		Float zero = static_cast<Float>(0);
+		// initialization
+		for (int i = 0; i < CONFUSION_MATR_SIZE; ++i)
+			confusionMatrix[i] = 0;
+		// labels vs. prediction comparison
+		for (int i = 0; i < samplesCount; ++i)
+		{
+			correctness[i] = labels[i] * predictions[i];
+			if (labels[i] > zero)
+				if (predictions[i] > zero)
+					++confusionMatrix[TRUE_POSITIVE_INDEX];
+				else
+					++confusionMatrix[FALSE_NEGATIVE_INDEX];
+			else
+				if (predictions[i] > zero)
+					++confusionMatrix[FALSE_POSITIVE_INDEX];
+				else
+					++confusionMatrix[TRUE_NEGATIVE_INDEX];
+		}
+		return NO_ERRORS;
+	} // compareLabelsAndTheirPredictions
+} // dmt
+#endif // DATA_MINING_H

package/wasm/pcaExport.cpp ADDED Viewed

@@ -0,0 +1,64 @@
+// This file contains C++-functions that are exported to wasm.
+// The tool Emscripten is applied (the header emscripten.h is included
+// and each exported function is marked by EMSCRIPTEN_KEEPALIVE).
+// Also, each function has a special DATAGROK annotation for C++-functions.
+// This approach provides further usage of C++-to-wasm export script that
+// performes all routine steps.
+#include <emscripten.h>
+// The following provides convenient naming of the exported functions.
+extern "C" {
+    int principalComponentAnalysis(float * data,
+	      int dataNumOfRows,
+	      int dataNumOfColumns,
+	      int numOfPrincipalComponents,
+		  int centerNum,
+	      int scaleNum,
+	      float * principalComponents,
+	      int principalComponentsNumOfRows,
+	      int principalComponentsNumOfColumns);
+	float error(float * data1, int data1Length, float * data2, int data2Length);
+}
+#include "PCA\PCA.h"
+//name: principalComponentAnalysis
+//input: dataframe table
+//input: column_list columns
+//input: int componentsCount
+//input: int centerNum
+//input: int scaleNum
+//output: column_list components [new(columns.rowCount, componentsCount)]
+//output: dataframe result [components]
+EMSCRIPTEN_KEEPALIVE
+int principalComponentAnalysis(float * data,
+      int dataNumOfRows,
+	  int dataNumOfColumns,
+	  int numOfPrincipalComponents,
+	  int centerNum,
+	  int scaleNum,
+	  float * principalComponents,
+	  int principalComponentsNumOfRows,
+	  int principalComponentsNumOfColumns)
+{
+	return pca::pcaUsingCorrelationMatrix(data, dataNumOfColumns, dataNumOfRows,
+	  numOfPrincipalComponents, centerNum, scaleNum, principalComponents, 0);
+}
+//name: error
+//input: dataframe df
+//input: column col1
+//input: column col2
+//output: double mad [_callResult]
+EMSCRIPTEN_KEEPALIVE
+float error(float * data1, int data1Length, float * data2, int data2Length)
+{
+	return pca::mad(data1, data2, data1Length);
+}

package/wasm/plsExport.cpp ADDED Viewed

@@ -0,0 +1,75 @@
+// This file contains C++-functions that are exported to wasm.
+// The tool Emscripten is applied (the header emscripten.h is included
+// and each exported function is marked by EMSCRIPTEN_KEEPALIVE).
+// Also, each function has a special DATAGROK annotation for C++-functions.
+// This approach provides further usage of C++-to-wasm export script that
+// performes all routine steps.
+#include <emscripten.h>
+// The following provides convenient naming of the exported functions.
+extern "C" {
+	int partialLeastSquareRegression(float * predictorColumns,
+	       int rowCount,
+		   int columnCount,
+		   float * responseColumn,
+		   int responceColumnLength,
+		   int componentsCount,
+		   float * predictionColumn,
+		   int predictionColumnLength,
+		   float * regressionCoefficients,
+		   int regressionCoefficientsLength,
+		   float * predictorScoresColumns,
+		   int predictorScoresColumnsRowCount,
+		   int predictorScoresColumnsColumnCount,
+		   float * predictionScoresColumns,
+		   int predictionScoresColumnsRowCount,
+		   int predictionScoresColumnsColumnCount,
+	       float * predictionLoadingsColumns,
+	       int predictionLoadingsColumnsRowCount,
+	       int predictionLoadingsColumnsColumnCount);
+}
+#include "PLS\PLS.h"
+//name: partialLeastSquareRegression
+//input: dataframe table
+//input: column_list features
+//input: column predict
+//input: int componentsCount
+//output: column prediction [new(predict.rowCount)]
+//output: column regressionCoefficients [new(features.columnCount)]
+//output: column_list tScores [new(predict.rowCount, componentsCount)]
+//output: column_list uScores [new(predict.rowCount, componentsCount)]
+//output: column_list xLoadings [new(features.columnCount, componentsCount)]
+EMSCRIPTEN_KEEPALIVE
+int partialLeastSquareRegression(float * predictorColumns,
+	   int rowCount,
+	   int columnCount,
+	   float * responseColumn,
+	   int responceColumnLength,
+	   int componentsCount,
+	   float * predictionColumn,
+	   int predictionColumnLength,
+	   float * regressionCoefficients,
+	   int regressionCoefficientsLength,
+	   float * predictorScoresColumns,
+	   int predictorScoresColumnsRowCount,
+	   int predictorScoresColumnsColumnCount,
+	   float * predictionScoresColumns,
+	   int predictionScoresColumnsRowCount,
+	   int predictionScoresColumnsColumnCount,
+	   float * predictionLoadingsColumns,
+	   int predictionLoadingsColumnsRowCount,
+	   int predictionLoadingsColumnsColumnCount)
+{
+	return pls::partialLeastSquareExtended(predictorColumns, rowCount, columnCount,
+		responseColumn, componentsCount, predictionColumn, regressionCoefficients,
+		predictorScoresColumns, predictionScoresColumns, predictionLoadingsColumns);
+}