@datagrok/eda 1.1.24 → 1.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.vscode/settings.json +5 -0
- package/CHANGELOG.md +12 -0
- package/README.md +1 -0
- package/dist/05e5e0770f54f07e9474.wasm +0 -0
- package/dist/231.js +2 -2
- package/dist/445.js +2 -0
- package/dist/523.js +2 -2
- package/dist/901.js +2 -0
- package/dist/902.js +2 -2
- package/dist/package.js +2 -2
- package/package.json +92 -92
- package/scripts/command.txt +1 -1
- package/scripts/func.json +1 -1
- package/scripts/module.json +1 -1
- package/src/package.ts +218 -26
- package/src/pls/pls-tools.ts +11 -9
- package/src/regression.ts +232 -0
- package/src/svm.ts +65 -27
- package/wasm/EDA.js +65 -1
- package/wasm/EDA.wasm +0 -0
- package/wasm/EDAAPI.js +30 -0
- package/wasm/EDAForWebWorker.js +1 -1
- package/wasm/callWasm.js +384 -393
- package/wasm/regression-api.cpp +66 -0
- package/wasm/regression.h +128 -0
- package/wasm/workers/fitLinearRegressionParamsWithDataNormalizingWorker.js +13 -0
- package/wasm/workers/fitLinearRegressionParamsWorker.js +13 -0
- package/dist/f5343e2c2e15952ce916.wasm +0 -0
package/package.json
CHANGED
|
@@ -1,94 +1,94 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
2
|
+
"name": "@datagrok/eda",
|
|
3
|
+
"friendlyName": "EDA",
|
|
4
|
+
"version": "1.1.27",
|
|
5
|
+
"description": "Exploratory Data Analysis Tools",
|
|
6
|
+
"dependencies": {
|
|
7
|
+
"@datagrok-libraries/math": "^1.1.10",
|
|
8
|
+
"@datagrok-libraries/ml": "^6.6.11",
|
|
9
|
+
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
10
|
+
"@datagrok-libraries/utils": "^4.1.44",
|
|
11
|
+
"@keckelt/tsne": "^1.0.2",
|
|
12
|
+
"@webgpu/types": "^0.1.40",
|
|
13
|
+
"cash-dom": "^8.1.1",
|
|
14
|
+
"datagrok-api": "^1.16.0",
|
|
15
|
+
"dayjs": "^1.11.9",
|
|
16
|
+
"jstat": "^1.9.6",
|
|
17
|
+
"source-map-loader": "^4.0.1",
|
|
18
|
+
"umap-js": "^1.3.3",
|
|
19
|
+
"worker-loader": "latest"
|
|
20
|
+
},
|
|
21
|
+
"author": {
|
|
22
|
+
"name": "Viktor Makarichev",
|
|
23
|
+
"email": "vmakarichev@datagrok.ai"
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@typescript-eslint/eslint-plugin": "^5.32.0",
|
|
27
|
+
"@typescript-eslint/parser": "^5.32.0",
|
|
28
|
+
"eslint": "^8.21.0",
|
|
29
|
+
"eslint-config-google": "^0.14.0",
|
|
30
|
+
"ts-loader": "latest",
|
|
31
|
+
"typescript": "latest",
|
|
32
|
+
"webpack": "latest",
|
|
33
|
+
"webpack-cli": "latest",
|
|
34
|
+
"css-loader": "latest",
|
|
35
|
+
"style-loader": "latest"
|
|
36
|
+
},
|
|
37
|
+
"scripts": {
|
|
38
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
|
|
39
|
+
"debug-eda": "webpack && grok publish",
|
|
40
|
+
"release-eda": "webpack && grok publish --release",
|
|
41
|
+
"build-eda": "webpack",
|
|
42
|
+
"build": "webpack",
|
|
43
|
+
"debug-eda-dev": "webpack && grok publish dev",
|
|
44
|
+
"release-eda-dev": "webpack && grok publish dev --release",
|
|
45
|
+
"debug-eda-local": "webpack && grok publish local",
|
|
46
|
+
"release-eda-local": "webpack && grok publish local --release",
|
|
47
|
+
"build-all": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/tutorials run build && npm run build"
|
|
48
|
+
},
|
|
49
|
+
"canEdit": [
|
|
50
|
+
"Developers"
|
|
51
|
+
],
|
|
52
|
+
"canView": [
|
|
53
|
+
"All users"
|
|
54
|
+
],
|
|
55
|
+
"repository": {
|
|
56
|
+
"type": "git",
|
|
57
|
+
"url": "https://github.com/datagrok-ai/public.git",
|
|
58
|
+
"directory": "packages/EDA"
|
|
59
|
+
},
|
|
60
|
+
"category": "Machine Learning",
|
|
61
|
+
"sources": [
|
|
62
|
+
"wasm/EDA.js"
|
|
63
|
+
],
|
|
64
|
+
"meta": {
|
|
65
|
+
"menu": {
|
|
66
|
+
"ML": {
|
|
67
|
+
"Tools": {
|
|
68
|
+
"Impute Missing Values...": null,
|
|
69
|
+
"Random Data...": null
|
|
70
|
+
},
|
|
71
|
+
"Cluster": {
|
|
72
|
+
"Cluster...": null,
|
|
73
|
+
"DBSCAN...": null
|
|
74
|
+
},
|
|
75
|
+
"Notebooks": {
|
|
76
|
+
"Browse Notebooks": null,
|
|
77
|
+
"Open in Notebook": null,
|
|
78
|
+
"New Notebook": null
|
|
79
|
+
},
|
|
80
|
+
"Models": {
|
|
81
|
+
"Browse Models": null,
|
|
82
|
+
"Train Model...": null,
|
|
83
|
+
"Apply Model...": null
|
|
84
|
+
},
|
|
85
|
+
"Analyse": {
|
|
86
|
+
"PCA...": null,
|
|
87
|
+
"ANOVA...": null,
|
|
88
|
+
"Multivariate Analysis...": null
|
|
89
|
+
},
|
|
90
|
+
"Reduce Dimensionality": null
|
|
91
|
+
}
|
|
92
|
+
}
|
|
92
93
|
}
|
|
93
|
-
|
|
94
|
-
}
|
|
94
|
+
}
|
package/scripts/command.txt
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
em++ -O3 ../wasm/pcaExport.cpp ../wasm/PCA/PCA.cpp ../wasm/plsExport.cpp ../wasm/PLS/PLS.cpp ../wasm/svmApi.cpp -o ../wasm/EDA.js -s TOTAL_MEMORY=268435456 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME="exportEDA" -s EXPORTED_FUNCTIONS=["_principalComponentAnalysis","_error","_partialLeastSquareRegression","_generateDataset","_normalizeDataset","_trainLSSVM","_predictByLSSVM","_trainAndAnalyzeLSSVM","_malloc","_free"] -s EXPORTED_RUNTIME_METHODS=["cwrap","ccall"] -sENVIRONMENT=web,worker
|
|
1
|
+
em++ -O3 ../wasm/pcaExport.cpp ../wasm/PCA/PCA.cpp ../wasm/plsExport.cpp ../wasm/PLS/PLS.cpp ../wasm/svmApi.cpp ../wasm/regression-api.cpp -o ../wasm/EDA.js -s TOTAL_MEMORY=268435456 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME="exportEDA" -s EXPORTED_FUNCTIONS=["_principalComponentAnalysis","_error","_partialLeastSquareRegression","_generateDataset","_normalizeDataset","_trainLSSVM","_predictByLSSVM","_trainAndAnalyzeLSSVM","_fitLinearRegressionParamsWithDataNormalizing","_fitLinearRegressionParams","_malloc","_free"] -s EXPORTED_RUNTIME_METHODS=["cwrap","ccall"] -sENVIRONMENT=web,worker
|
package/scripts/func.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "yLoadings": {"type": "newFloatColumn", "numOfRows": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings', 'yLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}}
|
|
1
|
+
{"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "yLoadings": {"type": "newFloatColumn", "numOfRows": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings', 'yLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}, "regression-api.cpp": {"fitLinearRegressionParamsWithDataNormalizing": {"arguments": {"features": {"type": "floatColumns"}, "featureAvgs": {"type": "floatColumn"}, "featureStdDevs": {"type": "floatColumn"}, "targets": {"type": "floatColumn"}, "targetsAvg": {"type": "num"}, "targetsStdDev": {"type": "num"}, "paramsCount": {"type": "num"}, "params": {"type": "newFloatColumn", "numOfRows": {"ref": "paramsCount", "value": "data"}}}, "output": {"type": "column", "source": "params"}, "annotation": ["//name: fitLinearRegressionParamsWithDataNormalizing", "//input: column_list features", "//input: column featureAvgs", "//input: column featureStdDevs", "//input: column targets", "//input: double targetsAvg", "//input: double targetsStdDev", "//input: int paramsCount", "//output: column params"], "prototype": "fitLinearRegressionParamsWithDataNormalizing(features, featureAvgs, featureStdDevs, targets, targetsAvg, targetsStdDev, paramsCount)", "prototypeForWebWorker": "fitLinearRegressionParamsWithDataNormalizingInWebWorker(features, featureAvgs, featureStdDevs, targets, targetsAvg, targetsStdDev, paramsCount)", "callArgs": "[features, featureAvgs, featureStdDevs, targets, targetsAvg, targetsStdDev, paramsCount]"}, "fitLinearRegressionParams": {"arguments": {"features": {"type": "floatColumns"}, "targets": {"type": "floatColumn"}, "paramsCount": {"type": "num"}, "params": {"type": "newFloatColumn", "numOfRows": {"ref": "paramsCount", "value": "data"}}}, "output": {"type": "column", "source": "params"}, "annotation": ["//name: fitLinearRegressionParams", "//input: column_list features", "//input: column targets", "//input: int paramsCount", "//output: column params"], "prototype": "fitLinearRegressionParams(features, targets, paramsCount)", "prototypeForWebWorker": "fitLinearRegressionParamsInWebWorker(features, targets, paramsCount)", "callArgs": "[features, targets, paramsCount]"}}}
|
package/scripts/module.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "EDA",
|
|
3
3
|
"folder": "../wasm",
|
|
4
|
-
"source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp"],
|
|
4
|
+
"source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp", "regression-api.cpp"],
|
|
5
5
|
"optimizationMode": "-O3",
|
|
6
6
|
"packageFile": "../src/package.ts",
|
|
7
7
|
"packageJsonFile": "../package.json",
|
package/src/package.ts
CHANGED
|
@@ -10,7 +10,7 @@ import {computePCA} from './eda-tools';
|
|
|
10
10
|
import {addPrefixToEachColumnName, addOneWayAnovaVizualization} from './eda-ui';
|
|
11
11
|
import {testDataForBinaryClassification} from './data-generators';
|
|
12
12
|
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
13
|
-
getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
|
|
13
|
+
getTrainedModel, getPrediction, isApplicableSVM, showTrainReport, getPackedModel} from './svm';
|
|
14
14
|
|
|
15
15
|
import {PLS_ANALYSIS} from './pls/pls-constants';
|
|
16
16
|
import {runMVA, runDemoMVA, getPlsAnalysis, PlsOutput} from './pls/pls-tools';
|
|
@@ -29,6 +29,9 @@ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimen
|
|
|
29
29
|
import {runKNNImputer} from './missing-values-imputation/ui';
|
|
30
30
|
import {MCLEditor} from '@datagrok-libraries/ml/src/MCL/mcl-editor';
|
|
31
31
|
import {markovCluster} from '@datagrok-libraries/ml/src/MCL/clustering-view';
|
|
32
|
+
import {MCL_OPTIONS_TAG, MCLSerializableOptions} from '@datagrok-libraries/ml/src/MCL';
|
|
33
|
+
|
|
34
|
+
import {getLinearRegressionParams, getPredictionByLinearRegression, getTestDatasetForLinearRegression} from './regression';
|
|
32
35
|
|
|
33
36
|
export const _package = new DG.Package();
|
|
34
37
|
|
|
@@ -189,7 +192,7 @@ export function GetMCLEditor(call: DG.FuncCall): void {
|
|
|
189
192
|
df: params.table, cols: params.columns, metrics: params.distanceMetrics,
|
|
190
193
|
weights: params.weights, aggregationMethod: params.aggreaggregationMethod, preprocessingFuncs: params.preprocessingFunctions,
|
|
191
194
|
preprocessingFuncArgs: params.preprocessingFuncArgs, threshold: params.threshold, maxIterations: params.maxIterations,
|
|
192
|
-
useWebGPU: params.useWebGPU,
|
|
195
|
+
useWebGPU: params.useWebGPU, inflate: params.inflateFactor,
|
|
193
196
|
}).call(true);
|
|
194
197
|
}).show();
|
|
195
198
|
} catch (err: any) {
|
|
@@ -214,13 +217,46 @@ export function GetMCLEditor(call: DG.FuncCall): void {
|
|
|
214
217
|
//input: int threshold = 80
|
|
215
218
|
//input: int maxIterations = 10
|
|
216
219
|
//input: bool useWebGPU = false
|
|
220
|
+
//input: double inflate = 2
|
|
217
221
|
//editor: EDA: GetMCLEditor
|
|
218
222
|
export async function MCL(df: DG.DataFrame, cols: DG.Column[], metrics: KnownMetrics[],
|
|
219
223
|
weights: number[], aggregationMethod: DistanceAggregationMethod, preprocessingFuncs: (DG.Func | null | undefined)[],
|
|
220
|
-
preprocessingFuncArgs: any[], threshold: number = 80, maxIterations: number = 10, useWebGPU: boolean = false,
|
|
224
|
+
preprocessingFuncArgs: any[], threshold: number = 80, maxIterations: number = 10, useWebGPU: boolean = false, inflate: number = 0,
|
|
221
225
|
): Promise< DG.ScatterPlotViewer | undefined> {
|
|
222
|
-
const
|
|
223
|
-
|
|
226
|
+
const tv = grok.shell.tableView(df.name) ?? grok.shell.addTableView(df);
|
|
227
|
+
const serializedOptions: string = JSON.stringify({
|
|
228
|
+
cols: cols.map((col) => col.name),
|
|
229
|
+
metrics: metrics,
|
|
230
|
+
weights: weights,
|
|
231
|
+
aggregationMethod: aggregationMethod,
|
|
232
|
+
preprocessingFuncs: preprocessingFuncs.map((func) => func?.name ?? null),
|
|
233
|
+
preprocessingFuncArgs: preprocessingFuncArgs,
|
|
234
|
+
threshold: threshold,
|
|
235
|
+
maxIterations: maxIterations,
|
|
236
|
+
useWebGPU: useWebGPU,
|
|
237
|
+
inflate: inflate,
|
|
238
|
+
} satisfies MCLSerializableOptions);
|
|
239
|
+
df.setTag(MCL_OPTIONS_TAG, serializedOptions);
|
|
240
|
+
|
|
241
|
+
const sc = tv.addViewer(DG.VIEWER.SCATTER_PLOT, {title: 'MCL', initializationFunction: 'EDA:MCLInitializationFunction'}) as DG.ScatterPlotViewer;
|
|
242
|
+
return sc;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
//name: MCLInitializationFunction
|
|
246
|
+
//input: viewer sc
|
|
247
|
+
export async function MCLInitializationFunction(sc: DG.ScatterPlotViewer) {
|
|
248
|
+
const df = sc.dataFrame;
|
|
249
|
+
if (df === null)
|
|
250
|
+
throw new Error('Data frame of the scatter plot is null');
|
|
251
|
+
const mclTag = df.getTag(MCL_OPTIONS_TAG);
|
|
252
|
+
if (!mclTag)
|
|
253
|
+
throw new Error('MCL options tag on the dataFrame is not found');
|
|
254
|
+
const options: MCLSerializableOptions = JSON.parse(mclTag);
|
|
255
|
+
const cols = options.cols.map((colName) => df.columns.byName(colName));
|
|
256
|
+
const preprocessingFuncs = options.preprocessingFuncs.map((funcName) => funcName ? DG.Func.byName(funcName) : null);
|
|
257
|
+
const res = await markovCluster(df, cols, options.metrics, options.weights,
|
|
258
|
+
options.aggregationMethod, preprocessingFuncs, options.preprocessingFuncArgs, options.threshold,
|
|
259
|
+
options.maxIterations, options.useWebGPU, options.inflate, sc);
|
|
224
260
|
return res?.sc;
|
|
225
261
|
}
|
|
226
262
|
|
|
@@ -300,15 +336,10 @@ export async function testDataLinearNonSeparable(name: string, sigma: number, sa
|
|
|
300
336
|
//input: dataframe df
|
|
301
337
|
//input: string predict_column
|
|
302
338
|
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
303
|
-
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
304
339
|
//output: dynamic model
|
|
305
340
|
export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
306
|
-
gamma: number
|
|
341
|
+
gamma: number): Promise<any> {
|
|
307
342
|
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
|
|
308
|
-
|
|
309
|
-
if (toShowReport)
|
|
310
|
-
showTrainReport(df, trainedModel);
|
|
311
|
-
|
|
312
343
|
return getPackedModel(trainedModel);
|
|
313
344
|
}
|
|
314
345
|
|
|
@@ -322,6 +353,29 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
|
|
|
322
353
|
return await getPrediction(df, model);
|
|
323
354
|
}
|
|
324
355
|
|
|
356
|
+
//name: isApplicableLinearKernelSVM
|
|
357
|
+
//meta.mlname: linear kernel LS-SVM
|
|
358
|
+
//meta.mlrole: isApplicable
|
|
359
|
+
//input: dataframe df
|
|
360
|
+
//input: string predict_column
|
|
361
|
+
//output: bool result
|
|
362
|
+
export async function isApplicableLinearKernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
|
|
363
|
+
return isApplicableSVM(df, predict_column);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
//name: visualizeLinearKernelSVM
|
|
367
|
+
//meta.mlname: linear kernel LS-SVM
|
|
368
|
+
//meta.mlrole: visualize
|
|
369
|
+
//input: dataframe df
|
|
370
|
+
//input: string target_column
|
|
371
|
+
//input: string predict_column
|
|
372
|
+
//input: dynamic model
|
|
373
|
+
//output: dynamic widget
|
|
374
|
+
export async function visualizeLinearKernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
|
|
375
|
+
return showTrainReport(df, model);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
|
|
325
379
|
//name: trainRBFkernelSVM
|
|
326
380
|
//meta.mlname: RBF-kernel LS-SVM
|
|
327
381
|
//meta.mlrole: train
|
|
@@ -329,17 +383,13 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
|
|
|
329
383
|
//input: string predict_column
|
|
330
384
|
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
331
385
|
//input: double sigma = 1.5 {category: Hyperparameters}
|
|
332
|
-
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
333
386
|
//output: dynamic model
|
|
334
387
|
export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
|
|
335
|
-
gamma: number, sigma: number
|
|
388
|
+
gamma: number, sigma: number): Promise<any> {
|
|
336
389
|
const trainedModel = await getTrainedModel(
|
|
337
390
|
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
338
391
|
df, predict_column);
|
|
339
392
|
|
|
340
|
-
if (toShowReport)
|
|
341
|
-
showTrainReport(df, trainedModel);
|
|
342
|
-
|
|
343
393
|
return getPackedModel(trainedModel);
|
|
344
394
|
}
|
|
345
395
|
|
|
@@ -353,6 +403,28 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
|
|
|
353
403
|
return await getPrediction(df, model);
|
|
354
404
|
}
|
|
355
405
|
|
|
406
|
+
//name: isApplicableRBFkernelSVM
|
|
407
|
+
//meta.mlname: RBF-kernel LS-SVM
|
|
408
|
+
//meta.mlrole: isApplicable
|
|
409
|
+
//input: dataframe df
|
|
410
|
+
//input: string predict_column
|
|
411
|
+
//output: bool result
|
|
412
|
+
export async function isApplicableRBFkernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
|
|
413
|
+
return isApplicableSVM(df, predict_column);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
//name: visualizeRBFkernelSVM
|
|
417
|
+
//meta.mlname: RBF-kernel LS-SVM
|
|
418
|
+
//meta.mlrole: visualize
|
|
419
|
+
//input: dataframe df
|
|
420
|
+
//input: string target_column
|
|
421
|
+
//input: string predict_column
|
|
422
|
+
//input: dynamic model
|
|
423
|
+
//output: dynamic widget
|
|
424
|
+
export async function visualizeRBFkernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
|
|
425
|
+
return showTrainReport(df, model);
|
|
426
|
+
}
|
|
427
|
+
|
|
356
428
|
//name: trainPolynomialKernelSVM
|
|
357
429
|
//meta.mlname: polynomial kernel LS-SVM
|
|
358
430
|
//meta.mlrole: train
|
|
@@ -361,17 +433,13 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
|
|
|
361
433
|
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
362
434
|
//input: double c = 1 {category: Hyperparameters}
|
|
363
435
|
//input: double d = 2 {category: Hyperparameters}
|
|
364
|
-
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
365
436
|
//output: dynamic model
|
|
366
437
|
export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
367
|
-
gamma: number, c: number, d: number
|
|
438
|
+
gamma: number, c: number, d: number): Promise<any> {
|
|
368
439
|
const trainedModel = await getTrainedModel(
|
|
369
440
|
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
370
441
|
df, predict_column);
|
|
371
442
|
|
|
372
|
-
if (toShowReport)
|
|
373
|
-
showTrainReport(df, trainedModel);
|
|
374
|
-
|
|
375
443
|
return getPackedModel(trainedModel);
|
|
376
444
|
} // trainPolynomialKernelSVM
|
|
377
445
|
|
|
@@ -385,6 +453,28 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
|
|
|
385
453
|
return await getPrediction(df, model);
|
|
386
454
|
}
|
|
387
455
|
|
|
456
|
+
//name: isApplicablePolynomialKernelSVM
|
|
457
|
+
//meta.mlname: polynomial kernel LS-SVM
|
|
458
|
+
//meta.mlrole: isApplicable
|
|
459
|
+
//input: dataframe df
|
|
460
|
+
//input: string predict_column
|
|
461
|
+
//output: bool result
|
|
462
|
+
export async function isApplicablePolynomialKernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
|
|
463
|
+
return isApplicableSVM(df, predict_column);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
//name: visualizePolynomialKernelSVM
|
|
467
|
+
//meta.mlname: polynomial kernel LS-SVM
|
|
468
|
+
//meta.mlrole: visualize
|
|
469
|
+
//input: dataframe df
|
|
470
|
+
//input: string target_column
|
|
471
|
+
//input: string predict_column
|
|
472
|
+
//input: dynamic model
|
|
473
|
+
//output: dynamic widget
|
|
474
|
+
export async function visualizePolynomialKernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
|
|
475
|
+
return showTrainReport(df, model);
|
|
476
|
+
}
|
|
477
|
+
|
|
388
478
|
//name: trainSigmoidKernelSVM
|
|
389
479
|
//meta.mlname: sigmoid kernel LS-SVM
|
|
390
480
|
//meta.mlrole: train
|
|
@@ -393,17 +483,13 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
|
|
|
393
483
|
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
394
484
|
//input: double kappa = 1 {category: Hyperparameters}
|
|
395
485
|
//input: double theta = 1 {category: Hyperparameters}
|
|
396
|
-
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
397
486
|
//output: dynamic model
|
|
398
487
|
export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
399
|
-
gamma: number, kappa: number, theta: number
|
|
488
|
+
gamma: number, kappa: number, theta: number): Promise<any> {
|
|
400
489
|
const trainedModel = await getTrainedModel(
|
|
401
490
|
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
402
491
|
df, predict_column);
|
|
403
492
|
|
|
404
|
-
if (toShowReport)
|
|
405
|
-
showTrainReport(df, trainedModel);
|
|
406
|
-
|
|
407
493
|
return getPackedModel(trainedModel);
|
|
408
494
|
} // trainSigmoidKernelSVM
|
|
409
495
|
|
|
@@ -417,6 +503,28 @@ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promi
|
|
|
417
503
|
return await getPrediction(df, model);
|
|
418
504
|
}
|
|
419
505
|
|
|
506
|
+
//name: isApplicableSigmoidKernelSVM
|
|
507
|
+
//meta.mlname: sigmoid kernel LS-SVM
|
|
508
|
+
//meta.mlrole: isApplicable
|
|
509
|
+
//input: dataframe df
|
|
510
|
+
//input: string predict_column
|
|
511
|
+
//output: bool result
|
|
512
|
+
export async function isApplicableSigmoidKernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
|
|
513
|
+
return isApplicableSVM(df, predict_column);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
//name: visualizeSigmoidKernelSVM
|
|
517
|
+
//meta.mlname: sigmoid kernel LS-SVM
|
|
518
|
+
//meta.mlrole: visualize
|
|
519
|
+
//input: dataframe df
|
|
520
|
+
//input: string target_column
|
|
521
|
+
//input: string predict_column
|
|
522
|
+
//input: dynamic model
|
|
523
|
+
//output: dynamic widget
|
|
524
|
+
export async function visualizeSigmoidKernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
|
|
525
|
+
return showTrainReport(df, model);
|
|
526
|
+
}
|
|
527
|
+
|
|
420
528
|
//top-menu: ML | Analyze | ANOVA...
|
|
421
529
|
//name: ANOVA
|
|
422
530
|
//description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the studied feature.
|
|
@@ -436,3 +544,87 @@ export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column
|
|
|
436
544
|
export function kNNImputation() {
|
|
437
545
|
runKNNImputer();
|
|
438
546
|
}
|
|
547
|
+
|
|
548
|
+
//name: linearRegression
|
|
549
|
+
//description: Linear Regression demo
|
|
550
|
+
//input: dataframe table
|
|
551
|
+
//input: column_list features {type: numerical}
|
|
552
|
+
//input: column target {type: numerical}
|
|
553
|
+
//input: bool plot = true {caption: plot}
|
|
554
|
+
export async function linearRegression(table: DG.DataFrame, features: DG.ColumnList, target: DG.Column, plot: boolean): Promise<void> {
|
|
555
|
+
const t1 = performance.now();
|
|
556
|
+
const params = await getLinearRegressionParams(features, target);
|
|
557
|
+
const t2 = performance.now();
|
|
558
|
+
console.log(`Fit: ${t2 - t1} ms.`);
|
|
559
|
+
const prediction = getPredictionByLinearRegression(features, params);
|
|
560
|
+
console.log(`Predict: ${performance.now() - t2} ms.`);
|
|
561
|
+
|
|
562
|
+
prediction.name = table.columns.getUnusedName(prediction.name);
|
|
563
|
+
|
|
564
|
+
table.columns.add(prediction);
|
|
565
|
+
|
|
566
|
+
if (plot) {
|
|
567
|
+
const view = grok.shell.tableView(table.name);
|
|
568
|
+
view.addViewer(DG.VIEWER.SCATTER_PLOT, {
|
|
569
|
+
xColumnName: target.name,
|
|
570
|
+
yColumnName: prediction.name,
|
|
571
|
+
showRegressionLine: true,
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
//name: generateDatasetForLinearRegressionTest
|
|
577
|
+
//description: Create demo dataset for linear regression
|
|
578
|
+
//input: int rowCount = 10000 {min: 1000; max: 10000000; step: 10000}
|
|
579
|
+
//input: int colCount = 10 {min: 1; max: 1000; step: 10}
|
|
580
|
+
//input: double featuresScale = 10 {min: -1000; max: 1000; step: 10}
|
|
581
|
+
//input: double featuresBias = 10 {min: -1000; max: 1000; step: 10}
|
|
582
|
+
//input: double paramsScale = 10 {min: -1000; max: 1000; step: 10}
|
|
583
|
+
//input: double paramsBias = 10 {min: -1000; max: 1000; step: 10}
|
|
584
|
+
//output: dataframe table
|
|
585
|
+
export function generateDatasetForLinearRegressionTest(rowCount: number, colCount: number,
|
|
586
|
+
featuresScale: number, featuresBias: number, paramsScale: number, paramsBias: number): DG.DataFrame {
|
|
587
|
+
return getTestDatasetForLinearRegression(rowCount, colCount, featuresScale, featuresBias, paramsScale, paramsBias);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
//name: trainLinearRegression
|
|
591
|
+
//meta.mlname: Linear Regression
|
|
592
|
+
//meta.mlrole: train
|
|
593
|
+
//input: dataframe df
|
|
594
|
+
//input: string predict_column
|
|
595
|
+
//output: dynamic model
|
|
596
|
+
export async function trainLinearRegression(df: DG.DataFrame, predict_column: string): Promise<Uint8Array> {
|
|
597
|
+
const features = df.columns;
|
|
598
|
+
const target = features.byName(predict_column);
|
|
599
|
+
features.remove(predict_column);
|
|
600
|
+
|
|
601
|
+
const params = await getLinearRegressionParams(features, target);
|
|
602
|
+
|
|
603
|
+
return new Uint8Array(params.buffer);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
//name: applyLinearRegression
|
|
607
|
+
//meta.mlname: Linear Regression
|
|
608
|
+
//meta.mlrole: apply
|
|
609
|
+
//input: dataframe df
|
|
610
|
+
//input: dynamic model
|
|
611
|
+
//output: dataframe table
|
|
612
|
+
export function applyLinearRegression(df: DG.DataFrame, model: any): DG.DataFrame {
|
|
613
|
+
const features = df.columns;
|
|
614
|
+
const params = new Float32Array((model as Uint8Array).buffer);
|
|
615
|
+
return DG.DataFrame.fromColumns([getPredictionByLinearRegression(features, params)]);
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
//name: isApplicableLinearRegression
|
|
619
|
+
//meta.mlname: Linear Regression
|
|
620
|
+
//meta.mlrole: isApplicable
|
|
621
|
+
//input: dataframe df
|
|
622
|
+
//input: string predict_column
|
|
623
|
+
//output: bool result
|
|
624
|
+
export function isApplicableLinearRegression(df: DG.DataFrame, predict_column: string): boolean {
|
|
625
|
+
for (const col of df.columns) {
|
|
626
|
+
if ((col.type !== DG.COLUMN_TYPE.INT) && (col.type !== DG.COLUMN_TYPE.FLOAT) && (col.type !== DG.COLUMN_TYPE.QNUM) && (col.type !== DG.COLUMN_TYPE.BIG_INT))
|
|
627
|
+
return false;
|
|
628
|
+
}
|
|
629
|
+
return true;
|
|
630
|
+
}
|