@datagrok/eda 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +3 -0
  2. package/detectors.js +9 -0
  3. package/dist/111.js +2 -0
  4. package/dist/146.js +2 -0
  5. package/dist/155.js +2 -0
  6. package/dist/355.js +2 -0
  7. package/dist/584.js +2 -0
  8. package/dist/604.js +2 -0
  9. package/dist/632.js +2 -0
  10. package/dist/645.js +2 -0
  11. package/dist/93.js +2 -0
  12. package/dist/d711f70338306e5bddc4.wasm +0 -0
  13. package/dist/package-test.js +2 -0
  14. package/dist/package.js +2 -0
  15. package/package.json +49 -0
  16. package/package.png +0 -0
  17. package/scripts/command.txt +1 -0
  18. package/scripts/exportForTS.py +862 -0
  19. package/scripts/exportForTSConstants.py +93 -0
  20. package/scripts/func.json +1 -0
  21. package/scripts/module.json +11 -0
  22. package/src/EDAtools.ts +46 -0
  23. package/src/EDAui.ts +118 -0
  24. package/src/dataGenerators.ts +74 -0
  25. package/src/demos.ts +38 -0
  26. package/src/package-test.ts +12 -0
  27. package/src/package.ts +248 -0
  28. package/src/svm.ts +485 -0
  29. package/src/utils.ts +51 -0
  30. package/tsconfig.json +71 -0
  31. package/wasm/EDA.js +443 -0
  32. package/wasm/EDA.wasm +0 -0
  33. package/wasm/EDAAPI.js +131 -0
  34. package/wasm/EDAForWebWorker.js +21 -0
  35. package/wasm/PCA/PCA.cpp +151 -0
  36. package/wasm/PCA/PCA.h +48 -0
  37. package/wasm/PLS/PLS.h +64 -0
  38. package/wasm/PLS/pls.cpp +393 -0
  39. package/wasm/callWasm.js +475 -0
  40. package/wasm/callWasmForWebWorker.js +706 -0
  41. package/wasm/dataGenerators.h +169 -0
  42. package/wasm/dataMining.h +116 -0
  43. package/wasm/pcaExport.cpp +64 -0
  44. package/wasm/plsExport.cpp +75 -0
  45. package/wasm/svm.h +608 -0
  46. package/wasm/svmApi.cpp +323 -0
  47. package/wasm/workers/errorWorker.js +13 -0
  48. package/wasm/workers/generateDatasetWorker.js +13 -0
  49. package/wasm/workers/normalizeDatasetWorker.js +13 -0
  50. package/wasm/workers/partialLeastSquareRegressionWorker.js +13 -0
  51. package/wasm/workers/predictByLSSVMWorker.js +13 -0
  52. package/wasm/workers/principalComponentAnalysisWorker.js +13 -0
  53. package/wasm/workers/trainAndAnalyzeLSSVMWorker.js +13 -0
  54. package/wasm/workers/trainLSSVMWorker.js +13 -0
  55. package/webpack.config.js +37 -0
@@ -0,0 +1,93 @@
1
+ """ exportConstants.py
2
+ Constants for C/C++-to-wasm export script.
3
+ """
4
+
5
+ # export settings constants
6
+ NAME = 'name'
7
+ FOLDER = 'folder'
8
+ RUNTIME_SYSTEM = 'runtimeSystemFile'
9
+ RUNTIME_SYSTEM_FOR_WEBWORKER= 'runtimeSystemForWebWorker'
10
+ PACKAGE_FILE = 'packageFile'
11
+ OPTIMIZATION_MODE = 'optimizationMode'
12
+ TOTAL_MEMORY = 'totalMemory'
13
+ PACKAGE_JSON_FILE = 'packageJsonFile'
14
+ WORKERS_FOLDER = 'workers'
15
+
16
+ # constants for function specification
17
+ ARGUMENTS = 'arguments'
18
+ OUTPUT = 'output'
19
+ ANNOTATION = 'annotation'
20
+ PROTOTYPE = 'prototype'
21
+ CALL_ARGS = 'callArgs'
22
+ WW_PROTOTYPE = 'prototypeForWebWorker'
23
+ TYPE = 'type'
24
+ COLUMN = 'Column'
25
+ COLUMNS = 'Columns'
26
+ NUM = 'num'
27
+ DATA = 'data'
28
+ NEW = 'new'
29
+ NUM_OF_ROWS = 'numOfRows'
30
+ NUM_OF_COLUMNS = 'numOfColumns'
31
+ REF = 'ref'
32
+ VALUE = 'value'
33
+ SOURCE = 'source'
34
+ TABLE_FROM_COLUMNS = 'tableFromColumns'
35
+ OBJECTS = 'objects'
36
+
37
+ AUTOMATIC_GENERATION_LINE = '// The following code is generated automatically.'
38
+
39
+ # constants for processing code that is generated by Emscripten
40
+ EM_LIB_EXTENSION = '.js'
41
+ WW_FILE_SUFFIX = 'ForWebWorker'
42
+ NUM_OF_LINE_TO_MODIFY = 1
43
+ KEY_WORD_TO_ADD = 'export '
44
+ LINE_TO_REPLACE = 'fetch(wasmBinaryFile,{credentials:"same-origin"})'
45
+
46
+ # constants for generating JS-code
47
+ CALL_WASM = 'callWasm'
48
+ WORKER_SUFFIX = 'Worker'
49
+ IN_WEBWORKER_SUFFIX = 'InWebWorker'
50
+ WORKER_EXTENSION = '.js'
51
+ CPP_WRAPPER_FUNCTION = 'cppWrapper'
52
+ GET_CPP_INPUT = 'getCppInput'
53
+ GET_RESULT = 'getResult'
54
+ WW_SPACE = ' ' * 2
55
+ WW_SUBSPACE = ' ' * 4
56
+ WW_SUBSUBSPACE = ' ' * 6
57
+ SPACE = ' ' * 2
58
+ SUBSPACE = ' ' * 4
59
+ SUBSUBSPACE = ' ' * 6
60
+ SUBSUBSUBSPACE = ' ' * 8
61
+ API_SUFFIX = 'API'
62
+ SERVICE_PREFFIX = '_'
63
+ ANY_TYPE = 'any'
64
+ OUTPUT_VARIABLE = '_output'
65
+ PROMISE_VARIABLE = '_promise'
66
+ RESULT_VARIBLABLE = '_result'
67
+ ERROR_VARIABLE = '_error'
68
+
69
+ # file operating constants
70
+ READ_MODE = 'r'
71
+ WRITE_MODE = 'w'
72
+ APPEND_MODE = 'a'
73
+
74
+ # annotation constants
75
+ ANNOT_INPUT = '//input:'
76
+ ANNOT_OUTPUT = '//output:'
77
+ ANNOT_NAME = '//name:'
78
+ ANNOT_NEW = 'new'
79
+ ANNOT_DATAFRAME = 'dataframe'
80
+ ANNOT_COLUMN = 'column'
81
+ ANNOT_COLUMN_LIST = 'column_list'
82
+ ANNOT_DOT = '.'
83
+ ANNOT_OBJECTS = 'objects'
84
+
85
+ # auxiliry maps
86
+ sizesMap = {'rowCount': 'numOfRows', 'columnCount': 'numOfColumns', 'data': 'data'}
87
+ typesMap = {'int': 'number', 'double': 'number', 'column': 'DG.Column',
88
+ 'column_list': 'DG.ColumnList', 'dataframe': 'DG.DataFrame'}
89
+
90
+ # Emscripten constants
91
+ EM_MACROS = 'EMSCRIPTEN_KEEPALIVE'
92
+
93
+ PJSN_SOURCES = 'sources'
@@ -0,0 +1 @@
1
+ {"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}}
@@ -0,0 +1,11 @@
1
+ {
2
+ "name": "EDA",
3
+ "folder": "../wasm",
4
+ "source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp"],
5
+ "optimizationMode": "-O3",
6
+ "packageFile": "../src/package.ts",
7
+ "packageJsonFile": "../package.json",
8
+ "runtimeSystemFile": "../wasm/callWasm.js",
9
+ "runtimeSystemForWebWorker": "../wasm/callWasmForWebWorker.js",
10
+ "totalMemory": "268435456"
11
+ }
@@ -0,0 +1,46 @@
1
+ // Exploratory data analysis (EDA) tools
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {_principalComponentAnalysisInWebWorker,
8
+ _partialLeastSquareRegressionInWebWorker} from '../wasm/EDAAPI';
9
+
10
+ import {checkComponenets, checkGeneratorSVMinputs} from './utils';
11
+
12
+ // Principal components analysis (PCA)
13
+ export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
14
+ center: boolean, scale: boolean): Promise<DG.DataFrame>
15
+ {
16
+ checkComponenets(features, components);
17
+
18
+ const centerNum = center ? 1 : 0;
19
+ const scaleNum = scale ? 1 : 0;
20
+
21
+ let _output: any;
22
+ let _promise = _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
23
+
24
+ await _promise.then(
25
+ _result => { _output = _result; },
26
+ _error => { throw new Error (`Error: ${_error}`); }
27
+ );
28
+
29
+ return _output;
30
+ }
31
+
32
+ // Partial least square regression (PLS)
33
+ export async function computePLS(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number): Promise<any>
34
+ {
35
+ checkComponenets(features, components);
36
+
37
+ let _output: any;
38
+ let _promise = _partialLeastSquareRegressionInWebWorker(table, features, predict, components);
39
+
40
+ await _promise.then(
41
+ _result => { _output = _result; },
42
+ _error => { throw new Error (`Error: ${_error}`); }
43
+ );
44
+
45
+ return _output;
46
+ }
package/src/EDAui.ts ADDED
@@ -0,0 +1,118 @@
1
+ // Custom UI for Exploratory data analysis (EDA) tools
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ // Rename PCA columns
8
+ export function renamePCAcolumns(pcaTable: DG.DataFrame): DG.DataFrame {
9
+ for (const col of pcaTable.columns.toList())
10
+ col.name = '_PCA' + col.name;
11
+
12
+ return pcaTable;
13
+ }
14
+
15
+ // Predicted vs Reference scatter plot
16
+ export function predictedVersusReferenceScatterPlot(reference: DG.Column, prediction: DG.Column): DG.Viewer {
17
+ prediction.name = reference.name + '(predicted)';
18
+
19
+ let dfReferencePrediction = DG.DataFrame.fromColumns([reference, prediction]);
20
+ dfReferencePrediction.name = 'Reference vs. Predicted';
21
+
22
+ return DG.Viewer.scatterPlot(dfReferencePrediction,
23
+ { title: dfReferencePrediction.name,
24
+ x: reference.name,
25
+ y: prediction.name,
26
+ showRegressionLine: true,
27
+ markerType: 'circle'
28
+ });
29
+ }
30
+
31
+ // Regression Coefficients Bar Chart
32
+ export function regressionCoefficientsBarChart(features: DG.ColumnList, regressionCoeffs: DG.Column): DG.Viewer { regressionCoeffs.name = 'regression coefficient';
33
+
34
+ let namesOfPredictors = [];
35
+ for (const col of features)
36
+ namesOfPredictors.push(col.name);
37
+
38
+ let predictorNamesColumn = DG.Column.fromStrings('feature', namesOfPredictors);
39
+
40
+ let dfRegrCoefs = DG.DataFrame.fromColumns([predictorNamesColumn, regressionCoeffs]);
41
+ dfRegrCoefs.name = 'Regression Coefficients';
42
+
43
+ return DG.Viewer.barChart(dfRegrCoefs,
44
+ {title: dfRegrCoefs.name, split: 'feature',
45
+ value: 'regression coefficient', valueAggrType: 'avg'});
46
+ }
47
+
48
+ // Scores Scatter Plot
49
+ export function scoresScatterPlot(xScores: Array<DG.Column>, yScores: Array<DG.Column>): DG.Viewer {
50
+
51
+ let scoresColumns = [];
52
+
53
+ for (let i = 0; i < xScores.length; i++) {
54
+ xScores[i].name = `x.score.t${i+1}`;
55
+ scoresColumns.push(xScores[i]);
56
+ }
57
+
58
+ for (let i = 0; i < yScores.length; i++) {
59
+ yScores[i].name = `y.score.u${i+1}`;
60
+ scoresColumns.push(yScores[i]);
61
+ }
62
+
63
+ let scores = DG.DataFrame.fromColumns(scoresColumns);
64
+ scores.name = 'Scores';
65
+ //grok.shell.addTableView(scores);
66
+
67
+ return DG.Viewer.scatterPlot(scores,
68
+ { title: scores.name,
69
+ x: xScores[0].name,
70
+ y: yScores[0].name,
71
+ markerType: 'circle'
72
+ });
73
+ }
74
+
75
+ // Loading Scatter Plot
76
+ export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.Column>): DG.Viewer {
77
+ let loadingCols = [];
78
+
79
+ let loadingLabels = [];
80
+ for (let col of features)
81
+ loadingLabels.push(col.name);
82
+
83
+ loadingCols.push(DG.Column.fromStrings('labels', loadingLabels));
84
+
85
+ for (let i = 0; i < xLoadings.length; i++) {
86
+ xLoadings[i].name = `x.loading.p${i+1}`;
87
+ loadingCols.push(xLoadings[i]);
88
+ }
89
+
90
+ let dfLoadings = DG.DataFrame.fromColumns(loadingCols);
91
+ dfLoadings.name = 'Loadings';
92
+
93
+ return DG.Viewer.scatterPlot(dfLoadings,
94
+ { title: dfLoadings.name,
95
+ x: xLoadings[0].name,
96
+ y: xLoadings[xLoadings.length - 1].name,
97
+ markerType: 'circle',
98
+ labels: 'labels'
99
+ });
100
+ }
101
+
102
+ // Add PLS visualization
103
+ export function addPLSvisualization(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, plsOutput: any): void {
104
+
105
+ let view = grok.shell.getTableView(table.name);
106
+
107
+ // 1. Predicted vs Reference scatter plot
108
+ view.addViewer(predictedVersusReferenceScatterPlot(predict, plsOutput[0]));
109
+
110
+ // 2. Regression Coefficients Bar Chart
111
+ view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
112
+
113
+ // 3. Scores Scatter Plot
114
+ view.addViewer(scoresScatterPlot(plsOutput[2], plsOutput[3]));
115
+
116
+ // 4. Loading Scatter Plot
117
+ view.addViewer(loadingScatterPlot(features, plsOutput[4]));
118
+ }
@@ -0,0 +1,74 @@
1
+ // Test data generation tools
2
+
3
+ /* Do not change these import lines to match external modules in webpack configuration */
4
+ import * as grok from 'datagrok-api/grok';
5
+ import * as ui from 'datagrok-api/ui';
6
+ import * as DG from 'datagrok-api/dg';
7
+
8
+ import {checkGeneratorSVMinputs} from './utils';
9
+ import {_generateDatasetInWebWorker} from '../wasm/EDAAPI';
10
+
11
+ const SVM_GEN_FEATURES_INDEX = 0;
12
+ const SVM_GEN_LABELS_INDEX = 1;
13
+ const SVM_FEATURE_NAME = 'Feature #';
14
+ const SVM_LABEL_NAME = 'Label';
15
+
16
+ // Returns the dataframe "cars"
17
+ export function carsDataframe(): DG.DataFrame {
18
+ return DG.DataFrame.fromColumns(
19
+ [
20
+ DG.Column.fromStrings('model', ['alfaromeo', 'audi', 'bmw', 'chevrolet', 'dodge1', 'dodge2', 'honda1', 'honda2', 'isuzu', 'jaguar', 'mazda', 'mercedes', 'mercury', 'mitsubishi', 'nissan1', 'nissan2', 'peugot', 'plymouth', 'porsche', 'saab', 'subaru', 'toyota1', 'toyota2', 'toyota3', 'toyota4', 'volkswagen1', 'volkswagen2', 'volvo1', 'volvo2', 'volvo3']),
21
+ DG.Column.fromInt32Array('diesel', new Int32Array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])),
22
+ DG.Column.fromInt32Array('turbo', new Int32Array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1])),
23
+ DG.Column.fromInt32Array('two.doors', new Int32Array([1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0])),
24
+ DG.Column.fromInt32Array('hatchback', new Int32Array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0])),
25
+ DG.Column.fromFloat32Array('wheel.base', new Float32Array([94.5, 105.80000305175781, 101.19999694824219, 94.5, 93.69999694824219, 93.69999694824219, 93.69999694824219, 96.5, 94.30000305175781, 113, 93.0999984741211, 115.5999984741211, 102.69999694824219, 93.69999694824219, 94.5, 94.5, 93.69999694824219, 114.19999694824219, 89.5, 99.0999984741211, 97.19999694824219, 95.69999694824219, 95.69999694824219, 98.4000015258789, 102.4000015258789, 97.30000305175781, 100.4000015258789, 104.30000305175781, 109.0999984741211, 109.0999984741211])),
26
+ DG.Column.fromFloat32Array('length', new Float32Array([171.1999969482422, 192.6999969482422, 176.8000030517578, 158.8000030517578, 157.3000030517578, 157.3000030517578, 150, 175.39999389648438, 170.6999969482422, 199.60000610351562, 166.8000030517578, 202.60000610351562, 178.39999389648438, 157.3000030517578, 170.1999969482422, 165.3000030517578, 157.3000030517578, 198.89999389648438, 168.89999389648438, 186.60000610351562, 172, 158.6999969482422, 166.3000030517578, 176.1999969482422, 175.60000610351562, 171.6999969482422, 180.1999969482422, 188.8000030517578, 188.8000030517578, 188.8000030517578])),
27
+ DG.Column.fromFloat32Array('width', new Float32Array([65.5, 71.4000015258789, 64.80000305175781, 63.599998474121094, 63.79999923706055, 63.79999923706055, 64, 65.19999694824219, 61.79999923706055, 69.5999984741211, 64.19999694824219, 71.69999694824219, 68, 64.4000015258789, 63.79999923706055, 63.79999923706055, 63.79999923706055, 68.4000015258789, 65, 66.5, 65.4000015258789, 63.599998474121094, 64.4000015258789, 65.5999984741211, 66.5, 65.5, 66.9000015258789, 67.19999694824219, 68.80000305175781, 68.9000015258789])),
28
+ DG.Column.fromFloat32Array('height', new Float32Array([52.400001525878906, 55.70000076293945, 54.29999923706055, 52, 50.79999923706055, 50.599998474121094, 52.599998474121094, 54.099998474121094, 53.5, 52.79999923706055, 54.099998474121094, 56.29999923706055, 54.79999923706055, 50.79999923706055, 53.5, 54.5, 50.599998474121094, 58.70000076293945, 51.599998474121094, 56.099998474121094, 52.5, 54.5, 53, 52, 54.900001525878906, 55.70000076293945, 55.099998474121094, 56.20000076293945, 55.5, 55.5])),
29
+ DG.Column.fromInt32Array('curb.weight', new Int32Array([2823, 2844, 2395, 1909, 2128, 1967, 1956, 2304, 2337, 4066, 1950, 3770, 2910, 1918, 2024, 1951, 1967, 3430, 2800, 2695, 2190, 1985, 2275, 2551, 2480, 2261, 2661, 2912, 3049, 3217])),
30
+ DG.Column.fromInt32Array('eng.size', new Int32Array([152, 136, 108, 90, 98, 90, 92, 110, 111, 258, 91, 183, 140, 92, 97, 97, 90, 152, 194, 121, 108, 92, 110, 146, 110, 97, 136, 141, 141, 145])),
31
+ DG.Column.fromInt32Array('horsepower', new Int32Array([154, 110, 101, 70, 102, 68, 76, 86, 78, 176, 68, 123, 175, 68, 69, 69, 68, 95, 207, 110, 82, 62, 56, 116, 73, 52, 110, 114, 160, 106])),
32
+ DG.Column.fromInt32Array('peak.rpm', new Int32Array([5000, 5500, 5800, 5400, 5500, 5500, 6000, 5800, 4800, 4750, 5000, 4350, 5000, 5500, 5200, 5200, 5500, 4150, 5900, 5250, 4400, 4800, 4500, 4800, 4500, 4800, 5500, 5400, 5300, 4800])),
33
+ DG.Column.fromInt32Array('symbol', new Int32Array([1, 1, 2, 0, 1, 1, 1, 0, 0, 0, 1, -1, 1, 2, 1, 1, 1, 0, 3, 2, 0, 1, 0, 2, -1, 2, 0, -2, -1, -1])),
34
+ DG.Column.fromInt32Array('city.mpg', new Int32Array([19, 19, 23, 38, 24, 31, 30, 27, 24, 15, 31, 22, 19, 37, 31, 31, 31, 25, 17, 21, 28, 35, 34, 24, 30, 37, 19, 23, 19, 26])),
35
+ DG.Column.fromInt32Array('highway.mpg', new Int32Array([26, 25, 29, 43, 30, 38, 34, 33, 29, 19, 38, 25, 24, 41, 37, 37, 38, 25, 25, 28, 33, 39, 36, 30, 33, 46, 24, 28, 25, 27])),
36
+ DG.Column.fromInt32Array('price', new Int32Array([16500, 17710, 16430, 6575, 7957, 6229, 7129, 8845, 6785, 35550, 7395, 31600, 16503, 5389, 7349, 7299, 6229, 13860, 37028, 12170, 7775, 5348, 7898, 9989, 10698, 7775, 13295, 12940, 19045, 22470])),
37
+ ]);
38
+ } // carsDataframe
39
+
40
+ // Generate dataset for testing binary classifiers
41
+ export async function testDataForBinaryClassification(kernel: number, kernelParams: Array<number>,
42
+ name: string, samplesCount: number, featuresCount: number, min: number,
43
+ max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
44
+
45
+ // check inputs
46
+ checkGeneratorSVMinputs(samplesCount, featuresCount, min, max, violatorsPercentage);
47
+
48
+ // kernel params column
49
+ const kernelParamsCol = DG.Column.fromList('double', 'kernelParams', kernelParams);
50
+
51
+ // CALL WASM-COMPUTATIONS
52
+ let _output: any;
53
+ let _promise = _generateDatasetInWebWorker(kernel, kernelParamsCol,
54
+ samplesCount, featuresCount, min, max, violatorsPercentage);
55
+
56
+ await _promise.then(
57
+ _result => { _output = _result; },
58
+ _error => { throw new Error (`Error: ${_error}`); }
59
+ );
60
+
61
+ // Rename labels column
62
+ _output[SVM_GEN_LABELS_INDEX].name = SVM_LABEL_NAME;
63
+
64
+ // Rename feature columns
65
+ for (const col of _output[SVM_GEN_FEATURES_INDEX])
66
+ col.name = SVM_FEATURE_NAME + col.name;
67
+
68
+ // Create dataframe
69
+ const df = DG.DataFrame.fromColumns(_output[SVM_GEN_FEATURES_INDEX]);
70
+ df.name = name;
71
+ df.columns.add(_output[SVM_GEN_LABELS_INDEX]);
72
+
73
+ return df;
74
+ } // testDataForMachineLearning
package/src/demos.ts ADDED
@@ -0,0 +1,38 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {computePLS} from './EDAtools';
7
+ import {addPLSvisualization} from './EDAui';
8
+
9
+ // Demo multivariate analysis (PLS)
10
+ export async function demoPLS(rowCount: number, colCount: number, componentsCount: number): Promise<void> {
11
+ // check inputs
12
+ if ((rowCount <= 0) || (colCount <= 0) || (componentsCount <= 0) || (componentsCount > colCount)) {
13
+ const bal = new DG.Balloon;
14
+ bal.error('Incorrect inputs.');
15
+ return;
16
+ }
17
+
18
+ // further, custom interface is provided
19
+
20
+ const PREDICT = 'Reference';
21
+
22
+ const bigDemoTable = grok.data.testData('random walk', rowCount, colCount);
23
+ bigDemoTable.name = `${rowCount} x ${colCount}`;
24
+
25
+ for (const col of bigDemoTable.columns)
26
+ col.name = 'Feature ' + col.name;
27
+ bigDemoTable.columns.byIndex(0).name = PREDICT;
28
+
29
+ grok.shell.addTableView(bigDemoTable);
30
+ let predict = bigDemoTable.columns.byName(PREDICT);
31
+ let features = bigDemoTable.columns.remove(PREDICT);
32
+
33
+ const plsResults = await computePLS(bigDemoTable, features, predict, componentsCount);
34
+
35
+ addPLSvisualization(bigDemoTable, features, predict, plsResults);
36
+
37
+ bigDemoTable.columns.add(predict);
38
+ }
@@ -0,0 +1,12 @@
1
+ import * as DG from "datagrok-api/dg";
2
+ import {runTests, tests} from '@datagrok-libraries/utils/src/test';
3
+
4
+ export let _package = new DG.Package();
5
+ export {tests};
6
+
7
+ //name: test
8
+ //output: dataframe result
9
+ export async function test(): Promise<DG.DataFrame> {
10
+ let data = await runTests();
11
+ return DG.DataFrame.fromObjects(data)!;
12
+ }