@datagrok/eda 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/detectors.js +9 -0
- package/dist/111.js +2 -0
- package/dist/146.js +2 -0
- package/dist/155.js +2 -0
- package/dist/355.js +2 -0
- package/dist/584.js +2 -0
- package/dist/604.js +2 -0
- package/dist/632.js +2 -0
- package/dist/645.js +2 -0
- package/dist/93.js +2 -0
- package/dist/d711f70338306e5bddc4.wasm +0 -0
- package/dist/package-test.js +2 -0
- package/dist/package.js +2 -0
- package/package.json +49 -0
- package/package.png +0 -0
- package/scripts/command.txt +1 -0
- package/scripts/exportForTS.py +862 -0
- package/scripts/exportForTSConstants.py +93 -0
- package/scripts/func.json +1 -0
- package/scripts/module.json +11 -0
- package/src/EDAtools.ts +46 -0
- package/src/EDAui.ts +118 -0
- package/src/dataGenerators.ts +74 -0
- package/src/demos.ts +38 -0
- package/src/package-test.ts +12 -0
- package/src/package.ts +248 -0
- package/src/svm.ts +485 -0
- package/src/utils.ts +51 -0
- package/tsconfig.json +71 -0
- package/wasm/EDA.js +443 -0
- package/wasm/EDA.wasm +0 -0
- package/wasm/EDAAPI.js +131 -0
- package/wasm/EDAForWebWorker.js +21 -0
- package/wasm/PCA/PCA.cpp +151 -0
- package/wasm/PCA/PCA.h +48 -0
- package/wasm/PLS/PLS.h +64 -0
- package/wasm/PLS/pls.cpp +393 -0
- package/wasm/callWasm.js +475 -0
- package/wasm/callWasmForWebWorker.js +706 -0
- package/wasm/dataGenerators.h +169 -0
- package/wasm/dataMining.h +116 -0
- package/wasm/pcaExport.cpp +64 -0
- package/wasm/plsExport.cpp +75 -0
- package/wasm/svm.h +608 -0
- package/wasm/svmApi.cpp +323 -0
- package/wasm/workers/errorWorker.js +13 -0
- package/wasm/workers/generateDatasetWorker.js +13 -0
- package/wasm/workers/normalizeDatasetWorker.js +13 -0
- package/wasm/workers/partialLeastSquareRegressionWorker.js +13 -0
- package/wasm/workers/predictByLSSVMWorker.js +13 -0
- package/wasm/workers/principalComponentAnalysisWorker.js +13 -0
- package/wasm/workers/trainAndAnalyzeLSSVMWorker.js +13 -0
- package/wasm/workers/trainLSSVMWorker.js +13 -0
- package/webpack.config.js +37 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
""" exportConstants.py
|
|
2
|
+
Constants for C/C++-to-wasm export script.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# export settings constants
|
|
6
|
+
NAME = 'name'
|
|
7
|
+
FOLDER = 'folder'
|
|
8
|
+
RUNTIME_SYSTEM = 'runtimeSystemFile'
|
|
9
|
+
RUNTIME_SYSTEM_FOR_WEBWORKER= 'runtimeSystemForWebWorker'
|
|
10
|
+
PACKAGE_FILE = 'packageFile'
|
|
11
|
+
OPTIMIZATION_MODE = 'optimizationMode'
|
|
12
|
+
TOTAL_MEMORY = 'totalMemory'
|
|
13
|
+
PACKAGE_JSON_FILE = 'packageJsonFile'
|
|
14
|
+
WORKERS_FOLDER = 'workers'
|
|
15
|
+
|
|
16
|
+
# constants for function specification
|
|
17
|
+
ARGUMENTS = 'arguments'
|
|
18
|
+
OUTPUT = 'output'
|
|
19
|
+
ANNOTATION = 'annotation'
|
|
20
|
+
PROTOTYPE = 'prototype'
|
|
21
|
+
CALL_ARGS = 'callArgs'
|
|
22
|
+
WW_PROTOTYPE = 'prototypeForWebWorker'
|
|
23
|
+
TYPE = 'type'
|
|
24
|
+
COLUMN = 'Column'
|
|
25
|
+
COLUMNS = 'Columns'
|
|
26
|
+
NUM = 'num'
|
|
27
|
+
DATA = 'data'
|
|
28
|
+
NEW = 'new'
|
|
29
|
+
NUM_OF_ROWS = 'numOfRows'
|
|
30
|
+
NUM_OF_COLUMNS = 'numOfColumns'
|
|
31
|
+
REF = 'ref'
|
|
32
|
+
VALUE = 'value'
|
|
33
|
+
SOURCE = 'source'
|
|
34
|
+
TABLE_FROM_COLUMNS = 'tableFromColumns'
|
|
35
|
+
OBJECTS = 'objects'
|
|
36
|
+
|
|
37
|
+
AUTOMATIC_GENERATION_LINE = '// The following code is generated automatically.'
|
|
38
|
+
|
|
39
|
+
# constants for processing code that is generated by Emscripten
|
|
40
|
+
EM_LIB_EXTENSION = '.js'
|
|
41
|
+
WW_FILE_SUFFIX = 'ForWebWorker'
|
|
42
|
+
NUM_OF_LINE_TO_MODIFY = 1
|
|
43
|
+
KEY_WORD_TO_ADD = 'export '
|
|
44
|
+
LINE_TO_REPLACE = 'fetch(wasmBinaryFile,{credentials:"same-origin"})'
|
|
45
|
+
|
|
46
|
+
# constants for generating JS-code
|
|
47
|
+
CALL_WASM = 'callWasm'
|
|
48
|
+
WORKER_SUFFIX = 'Worker'
|
|
49
|
+
IN_WEBWORKER_SUFFIX = 'InWebWorker'
|
|
50
|
+
WORKER_EXTENSION = '.js'
|
|
51
|
+
CPP_WRAPPER_FUNCTION = 'cppWrapper'
|
|
52
|
+
GET_CPP_INPUT = 'getCppInput'
|
|
53
|
+
GET_RESULT = 'getResult'
|
|
54
|
+
WW_SPACE = ' ' * 2
|
|
55
|
+
WW_SUBSPACE = ' ' * 4
|
|
56
|
+
WW_SUBSUBSPACE = ' ' * 6
|
|
57
|
+
SPACE = ' ' * 2
|
|
58
|
+
SUBSPACE = ' ' * 4
|
|
59
|
+
SUBSUBSPACE = ' ' * 6
|
|
60
|
+
SUBSUBSUBSPACE = ' ' * 8
|
|
61
|
+
API_SUFFIX = 'API'
|
|
62
|
+
SERVICE_PREFFIX = '_'
|
|
63
|
+
ANY_TYPE = 'any'
|
|
64
|
+
OUTPUT_VARIABLE = '_output'
|
|
65
|
+
PROMISE_VARIABLE = '_promise'
|
|
66
|
+
RESULT_VARIBLABLE = '_result'
|
|
67
|
+
ERROR_VARIABLE = '_error'
|
|
68
|
+
|
|
69
|
+
# file operating constants
|
|
70
|
+
READ_MODE = 'r'
|
|
71
|
+
WRITE_MODE = 'w'
|
|
72
|
+
APPEND_MODE = 'a'
|
|
73
|
+
|
|
74
|
+
# annotation constants
|
|
75
|
+
ANNOT_INPUT = '//input:'
|
|
76
|
+
ANNOT_OUTPUT = '//output:'
|
|
77
|
+
ANNOT_NAME = '//name:'
|
|
78
|
+
ANNOT_NEW = 'new'
|
|
79
|
+
ANNOT_DATAFRAME = 'dataframe'
|
|
80
|
+
ANNOT_COLUMN = 'column'
|
|
81
|
+
ANNOT_COLUMN_LIST = 'column_list'
|
|
82
|
+
ANNOT_DOT = '.'
|
|
83
|
+
ANNOT_OBJECTS = 'objects'
|
|
84
|
+
|
|
85
|
+
# auxiliry maps
|
|
86
|
+
sizesMap = {'rowCount': 'numOfRows', 'columnCount': 'numOfColumns', 'data': 'data'}
|
|
87
|
+
typesMap = {'int': 'number', 'double': 'number', 'column': 'DG.Column',
|
|
88
|
+
'column_list': 'DG.ColumnList', 'dataframe': 'DG.DataFrame'}
|
|
89
|
+
|
|
90
|
+
# Emscripten constants
|
|
91
|
+
EM_MACROS = 'EMSCRIPTEN_KEEPALIVE'
|
|
92
|
+
|
|
93
|
+
PJSN_SOURCES = 'sources'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "EDA",
|
|
3
|
+
"folder": "../wasm",
|
|
4
|
+
"source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp"],
|
|
5
|
+
"optimizationMode": "-O3",
|
|
6
|
+
"packageFile": "../src/package.ts",
|
|
7
|
+
"packageJsonFile": "../package.json",
|
|
8
|
+
"runtimeSystemFile": "../wasm/callWasm.js",
|
|
9
|
+
"runtimeSystemForWebWorker": "../wasm/callWasmForWebWorker.js",
|
|
10
|
+
"totalMemory": "268435456"
|
|
11
|
+
}
|
package/src/EDAtools.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Exploratory data analysis (EDA) tools
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
import {_principalComponentAnalysisInWebWorker,
|
|
8
|
+
_partialLeastSquareRegressionInWebWorker} from '../wasm/EDAAPI';
|
|
9
|
+
|
|
10
|
+
import {checkComponenets, checkGeneratorSVMinputs} from './utils';
|
|
11
|
+
|
|
12
|
+
// Principal components analysis (PCA)
|
|
13
|
+
export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
|
|
14
|
+
center: boolean, scale: boolean): Promise<DG.DataFrame>
|
|
15
|
+
{
|
|
16
|
+
checkComponenets(features, components);
|
|
17
|
+
|
|
18
|
+
const centerNum = center ? 1 : 0;
|
|
19
|
+
const scaleNum = scale ? 1 : 0;
|
|
20
|
+
|
|
21
|
+
let _output: any;
|
|
22
|
+
let _promise = _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
|
|
23
|
+
|
|
24
|
+
await _promise.then(
|
|
25
|
+
_result => { _output = _result; },
|
|
26
|
+
_error => { throw new Error (`Error: ${_error}`); }
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
return _output;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Partial least square regression (PLS)
|
|
33
|
+
export async function computePLS(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number): Promise<any>
|
|
34
|
+
{
|
|
35
|
+
checkComponenets(features, components);
|
|
36
|
+
|
|
37
|
+
let _output: any;
|
|
38
|
+
let _promise = _partialLeastSquareRegressionInWebWorker(table, features, predict, components);
|
|
39
|
+
|
|
40
|
+
await _promise.then(
|
|
41
|
+
_result => { _output = _result; },
|
|
42
|
+
_error => { throw new Error (`Error: ${_error}`); }
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
return _output;
|
|
46
|
+
}
|
package/src/EDAui.ts
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// Custom UI for Exploratory data analysis (EDA) tools
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
// Rename PCA columns
|
|
8
|
+
export function renamePCAcolumns(pcaTable: DG.DataFrame): DG.DataFrame {
|
|
9
|
+
for (const col of pcaTable.columns.toList())
|
|
10
|
+
col.name = '_PCA' + col.name;
|
|
11
|
+
|
|
12
|
+
return pcaTable;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Predicted vs Reference scatter plot
|
|
16
|
+
export function predictedVersusReferenceScatterPlot(reference: DG.Column, prediction: DG.Column): DG.Viewer {
|
|
17
|
+
prediction.name = reference.name + '(predicted)';
|
|
18
|
+
|
|
19
|
+
let dfReferencePrediction = DG.DataFrame.fromColumns([reference, prediction]);
|
|
20
|
+
dfReferencePrediction.name = 'Reference vs. Predicted';
|
|
21
|
+
|
|
22
|
+
return DG.Viewer.scatterPlot(dfReferencePrediction,
|
|
23
|
+
{ title: dfReferencePrediction.name,
|
|
24
|
+
x: reference.name,
|
|
25
|
+
y: prediction.name,
|
|
26
|
+
showRegressionLine: true,
|
|
27
|
+
markerType: 'circle'
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Regression Coefficients Bar Chart
|
|
32
|
+
export function regressionCoefficientsBarChart(features: DG.ColumnList, regressionCoeffs: DG.Column): DG.Viewer { regressionCoeffs.name = 'regression coefficient';
|
|
33
|
+
|
|
34
|
+
let namesOfPredictors = [];
|
|
35
|
+
for (const col of features)
|
|
36
|
+
namesOfPredictors.push(col.name);
|
|
37
|
+
|
|
38
|
+
let predictorNamesColumn = DG.Column.fromStrings('feature', namesOfPredictors);
|
|
39
|
+
|
|
40
|
+
let dfRegrCoefs = DG.DataFrame.fromColumns([predictorNamesColumn, regressionCoeffs]);
|
|
41
|
+
dfRegrCoefs.name = 'Regression Coefficients';
|
|
42
|
+
|
|
43
|
+
return DG.Viewer.barChart(dfRegrCoefs,
|
|
44
|
+
{title: dfRegrCoefs.name, split: 'feature',
|
|
45
|
+
value: 'regression coefficient', valueAggrType: 'avg'});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Scores Scatter Plot
|
|
49
|
+
export function scoresScatterPlot(xScores: Array<DG.Column>, yScores: Array<DG.Column>): DG.Viewer {
|
|
50
|
+
|
|
51
|
+
let scoresColumns = [];
|
|
52
|
+
|
|
53
|
+
for (let i = 0; i < xScores.length; i++) {
|
|
54
|
+
xScores[i].name = `x.score.t${i+1}`;
|
|
55
|
+
scoresColumns.push(xScores[i]);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
for (let i = 0; i < yScores.length; i++) {
|
|
59
|
+
yScores[i].name = `y.score.u${i+1}`;
|
|
60
|
+
scoresColumns.push(yScores[i]);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
let scores = DG.DataFrame.fromColumns(scoresColumns);
|
|
64
|
+
scores.name = 'Scores';
|
|
65
|
+
//grok.shell.addTableView(scores);
|
|
66
|
+
|
|
67
|
+
return DG.Viewer.scatterPlot(scores,
|
|
68
|
+
{ title: scores.name,
|
|
69
|
+
x: xScores[0].name,
|
|
70
|
+
y: yScores[0].name,
|
|
71
|
+
markerType: 'circle'
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Loading Scatter Plot
|
|
76
|
+
export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.Column>): DG.Viewer {
|
|
77
|
+
let loadingCols = [];
|
|
78
|
+
|
|
79
|
+
let loadingLabels = [];
|
|
80
|
+
for (let col of features)
|
|
81
|
+
loadingLabels.push(col.name);
|
|
82
|
+
|
|
83
|
+
loadingCols.push(DG.Column.fromStrings('labels', loadingLabels));
|
|
84
|
+
|
|
85
|
+
for (let i = 0; i < xLoadings.length; i++) {
|
|
86
|
+
xLoadings[i].name = `x.loading.p${i+1}`;
|
|
87
|
+
loadingCols.push(xLoadings[i]);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
let dfLoadings = DG.DataFrame.fromColumns(loadingCols);
|
|
91
|
+
dfLoadings.name = 'Loadings';
|
|
92
|
+
|
|
93
|
+
return DG.Viewer.scatterPlot(dfLoadings,
|
|
94
|
+
{ title: dfLoadings.name,
|
|
95
|
+
x: xLoadings[0].name,
|
|
96
|
+
y: xLoadings[xLoadings.length - 1].name,
|
|
97
|
+
markerType: 'circle',
|
|
98
|
+
labels: 'labels'
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Add PLS visualization
|
|
103
|
+
export function addPLSvisualization(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, plsOutput: any): void {
|
|
104
|
+
|
|
105
|
+
let view = grok.shell.getTableView(table.name);
|
|
106
|
+
|
|
107
|
+
// 1. Predicted vs Reference scatter plot
|
|
108
|
+
view.addViewer(predictedVersusReferenceScatterPlot(predict, plsOutput[0]));
|
|
109
|
+
|
|
110
|
+
// 2. Regression Coefficients Bar Chart
|
|
111
|
+
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
|
|
112
|
+
|
|
113
|
+
// 3. Scores Scatter Plot
|
|
114
|
+
view.addViewer(scoresScatterPlot(plsOutput[2], plsOutput[3]));
|
|
115
|
+
|
|
116
|
+
// 4. Loading Scatter Plot
|
|
117
|
+
view.addViewer(loadingScatterPlot(features, plsOutput[4]));
|
|
118
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// Test data generation tools
|
|
2
|
+
|
|
3
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
4
|
+
import * as grok from 'datagrok-api/grok';
|
|
5
|
+
import * as ui from 'datagrok-api/ui';
|
|
6
|
+
import * as DG from 'datagrok-api/dg';
|
|
7
|
+
|
|
8
|
+
import {checkGeneratorSVMinputs} from './utils';
|
|
9
|
+
import {_generateDatasetInWebWorker} from '../wasm/EDAAPI';
|
|
10
|
+
|
|
11
|
+
const SVM_GEN_FEATURES_INDEX = 0;
|
|
12
|
+
const SVM_GEN_LABELS_INDEX = 1;
|
|
13
|
+
const SVM_FEATURE_NAME = 'Feature #';
|
|
14
|
+
const SVM_LABEL_NAME = 'Label';
|
|
15
|
+
|
|
16
|
+
// Returns the dataframe "cars"
|
|
17
|
+
export function carsDataframe(): DG.DataFrame {
|
|
18
|
+
return DG.DataFrame.fromColumns(
|
|
19
|
+
[
|
|
20
|
+
DG.Column.fromStrings('model', ['alfaromeo', 'audi', 'bmw', 'chevrolet', 'dodge1', 'dodge2', 'honda1', 'honda2', 'isuzu', 'jaguar', 'mazda', 'mercedes', 'mercury', 'mitsubishi', 'nissan1', 'nissan2', 'peugot', 'plymouth', 'porsche', 'saab', 'subaru', 'toyota1', 'toyota2', 'toyota3', 'toyota4', 'volkswagen1', 'volkswagen2', 'volvo1', 'volvo2', 'volvo3']),
|
|
21
|
+
DG.Column.fromInt32Array('diesel', new Int32Array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])),
|
|
22
|
+
DG.Column.fromInt32Array('turbo', new Int32Array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1])),
|
|
23
|
+
DG.Column.fromInt32Array('two.doors', new Int32Array([1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0])),
|
|
24
|
+
DG.Column.fromInt32Array('hatchback', new Int32Array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0])),
|
|
25
|
+
DG.Column.fromFloat32Array('wheel.base', new Float32Array([94.5, 105.80000305175781, 101.19999694824219, 94.5, 93.69999694824219, 93.69999694824219, 93.69999694824219, 96.5, 94.30000305175781, 113, 93.0999984741211, 115.5999984741211, 102.69999694824219, 93.69999694824219, 94.5, 94.5, 93.69999694824219, 114.19999694824219, 89.5, 99.0999984741211, 97.19999694824219, 95.69999694824219, 95.69999694824219, 98.4000015258789, 102.4000015258789, 97.30000305175781, 100.4000015258789, 104.30000305175781, 109.0999984741211, 109.0999984741211])),
|
|
26
|
+
DG.Column.fromFloat32Array('length', new Float32Array([171.1999969482422, 192.6999969482422, 176.8000030517578, 158.8000030517578, 157.3000030517578, 157.3000030517578, 150, 175.39999389648438, 170.6999969482422, 199.60000610351562, 166.8000030517578, 202.60000610351562, 178.39999389648438, 157.3000030517578, 170.1999969482422, 165.3000030517578, 157.3000030517578, 198.89999389648438, 168.89999389648438, 186.60000610351562, 172, 158.6999969482422, 166.3000030517578, 176.1999969482422, 175.60000610351562, 171.6999969482422, 180.1999969482422, 188.8000030517578, 188.8000030517578, 188.8000030517578])),
|
|
27
|
+
DG.Column.fromFloat32Array('width', new Float32Array([65.5, 71.4000015258789, 64.80000305175781, 63.599998474121094, 63.79999923706055, 63.79999923706055, 64, 65.19999694824219, 61.79999923706055, 69.5999984741211, 64.19999694824219, 71.69999694824219, 68, 64.4000015258789, 63.79999923706055, 63.79999923706055, 63.79999923706055, 68.4000015258789, 65, 66.5, 65.4000015258789, 63.599998474121094, 64.4000015258789, 65.5999984741211, 66.5, 65.5, 66.9000015258789, 67.19999694824219, 68.80000305175781, 68.9000015258789])),
|
|
28
|
+
DG.Column.fromFloat32Array('height', new Float32Array([52.400001525878906, 55.70000076293945, 54.29999923706055, 52, 50.79999923706055, 50.599998474121094, 52.599998474121094, 54.099998474121094, 53.5, 52.79999923706055, 54.099998474121094, 56.29999923706055, 54.79999923706055, 50.79999923706055, 53.5, 54.5, 50.599998474121094, 58.70000076293945, 51.599998474121094, 56.099998474121094, 52.5, 54.5, 53, 52, 54.900001525878906, 55.70000076293945, 55.099998474121094, 56.20000076293945, 55.5, 55.5])),
|
|
29
|
+
DG.Column.fromInt32Array('curb.weight', new Int32Array([2823, 2844, 2395, 1909, 2128, 1967, 1956, 2304, 2337, 4066, 1950, 3770, 2910, 1918, 2024, 1951, 1967, 3430, 2800, 2695, 2190, 1985, 2275, 2551, 2480, 2261, 2661, 2912, 3049, 3217])),
|
|
30
|
+
DG.Column.fromInt32Array('eng.size', new Int32Array([152, 136, 108, 90, 98, 90, 92, 110, 111, 258, 91, 183, 140, 92, 97, 97, 90, 152, 194, 121, 108, 92, 110, 146, 110, 97, 136, 141, 141, 145])),
|
|
31
|
+
DG.Column.fromInt32Array('horsepower', new Int32Array([154, 110, 101, 70, 102, 68, 76, 86, 78, 176, 68, 123, 175, 68, 69, 69, 68, 95, 207, 110, 82, 62, 56, 116, 73, 52, 110, 114, 160, 106])),
|
|
32
|
+
DG.Column.fromInt32Array('peak.rpm', new Int32Array([5000, 5500, 5800, 5400, 5500, 5500, 6000, 5800, 4800, 4750, 5000, 4350, 5000, 5500, 5200, 5200, 5500, 4150, 5900, 5250, 4400, 4800, 4500, 4800, 4500, 4800, 5500, 5400, 5300, 4800])),
|
|
33
|
+
DG.Column.fromInt32Array('symbol', new Int32Array([1, 1, 2, 0, 1, 1, 1, 0, 0, 0, 1, -1, 1, 2, 1, 1, 1, 0, 3, 2, 0, 1, 0, 2, -1, 2, 0, -2, -1, -1])),
|
|
34
|
+
DG.Column.fromInt32Array('city.mpg', new Int32Array([19, 19, 23, 38, 24, 31, 30, 27, 24, 15, 31, 22, 19, 37, 31, 31, 31, 25, 17, 21, 28, 35, 34, 24, 30, 37, 19, 23, 19, 26])),
|
|
35
|
+
DG.Column.fromInt32Array('highway.mpg', new Int32Array([26, 25, 29, 43, 30, 38, 34, 33, 29, 19, 38, 25, 24, 41, 37, 37, 38, 25, 25, 28, 33, 39, 36, 30, 33, 46, 24, 28, 25, 27])),
|
|
36
|
+
DG.Column.fromInt32Array('price', new Int32Array([16500, 17710, 16430, 6575, 7957, 6229, 7129, 8845, 6785, 35550, 7395, 31600, 16503, 5389, 7349, 7299, 6229, 13860, 37028, 12170, 7775, 5348, 7898, 9989, 10698, 7775, 13295, 12940, 19045, 22470])),
|
|
37
|
+
]);
|
|
38
|
+
} // carsDataframe
|
|
39
|
+
|
|
40
|
+
// Generate dataset for testing binary classifiers
|
|
41
|
+
export async function testDataForBinaryClassification(kernel: number, kernelParams: Array<number>,
|
|
42
|
+
name: string, samplesCount: number, featuresCount: number, min: number,
|
|
43
|
+
max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
|
|
44
|
+
|
|
45
|
+
// check inputs
|
|
46
|
+
checkGeneratorSVMinputs(samplesCount, featuresCount, min, max, violatorsPercentage);
|
|
47
|
+
|
|
48
|
+
// kernel params column
|
|
49
|
+
const kernelParamsCol = DG.Column.fromList('double', 'kernelParams', kernelParams);
|
|
50
|
+
|
|
51
|
+
// CALL WASM-COMPUTATIONS
|
|
52
|
+
let _output: any;
|
|
53
|
+
let _promise = _generateDatasetInWebWorker(kernel, kernelParamsCol,
|
|
54
|
+
samplesCount, featuresCount, min, max, violatorsPercentage);
|
|
55
|
+
|
|
56
|
+
await _promise.then(
|
|
57
|
+
_result => { _output = _result; },
|
|
58
|
+
_error => { throw new Error (`Error: ${_error}`); }
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
// Rename labels column
|
|
62
|
+
_output[SVM_GEN_LABELS_INDEX].name = SVM_LABEL_NAME;
|
|
63
|
+
|
|
64
|
+
// Rename feature columns
|
|
65
|
+
for (const col of _output[SVM_GEN_FEATURES_INDEX])
|
|
66
|
+
col.name = SVM_FEATURE_NAME + col.name;
|
|
67
|
+
|
|
68
|
+
// Create dataframe
|
|
69
|
+
const df = DG.DataFrame.fromColumns(_output[SVM_GEN_FEATURES_INDEX]);
|
|
70
|
+
df.name = name;
|
|
71
|
+
df.columns.add(_output[SVM_GEN_LABELS_INDEX]);
|
|
72
|
+
|
|
73
|
+
return df;
|
|
74
|
+
} // testDataForMachineLearning
|
package/src/demos.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {computePLS} from './EDAtools';
|
|
7
|
+
import {addPLSvisualization} from './EDAui';
|
|
8
|
+
|
|
9
|
+
// Demo multivariate analysis (PLS)
|
|
10
|
+
export async function demoPLS(rowCount: number, colCount: number, componentsCount: number): Promise<void> {
|
|
11
|
+
// check inputs
|
|
12
|
+
if ((rowCount <= 0) || (colCount <= 0) || (componentsCount <= 0) || (componentsCount > colCount)) {
|
|
13
|
+
const bal = new DG.Balloon;
|
|
14
|
+
bal.error('Incorrect inputs.');
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// further, custom interface is provided
|
|
19
|
+
|
|
20
|
+
const PREDICT = 'Reference';
|
|
21
|
+
|
|
22
|
+
const bigDemoTable = grok.data.testData('random walk', rowCount, colCount);
|
|
23
|
+
bigDemoTable.name = `${rowCount} x ${colCount}`;
|
|
24
|
+
|
|
25
|
+
for (const col of bigDemoTable.columns)
|
|
26
|
+
col.name = 'Feature ' + col.name;
|
|
27
|
+
bigDemoTable.columns.byIndex(0).name = PREDICT;
|
|
28
|
+
|
|
29
|
+
grok.shell.addTableView(bigDemoTable);
|
|
30
|
+
let predict = bigDemoTable.columns.byName(PREDICT);
|
|
31
|
+
let features = bigDemoTable.columns.remove(PREDICT);
|
|
32
|
+
|
|
33
|
+
const plsResults = await computePLS(bigDemoTable, features, predict, componentsCount);
|
|
34
|
+
|
|
35
|
+
addPLSvisualization(bigDemoTable, features, predict, plsResults);
|
|
36
|
+
|
|
37
|
+
bigDemoTable.columns.add(predict);
|
|
38
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import * as DG from "datagrok-api/dg";
|
|
2
|
+
import {runTests, tests} from '@datagrok-libraries/utils/src/test';
|
|
3
|
+
|
|
4
|
+
export let _package = new DG.Package();
|
|
5
|
+
export {tests};
|
|
6
|
+
|
|
7
|
+
//name: test
|
|
8
|
+
//output: dataframe result
|
|
9
|
+
export async function test(): Promise<DG.DataFrame> {
|
|
10
|
+
let data = await runTests();
|
|
11
|
+
return DG.DataFrame.fromObjects(data)!;
|
|
12
|
+
}
|