@datagrok/eda 1.1.24 → 1.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,94 +1,94 @@
1
1
  {
2
- "name": "@datagrok/eda",
3
- "friendlyName": "EDA",
4
- "version": "1.1.24",
5
- "description": "Exploratory Data Analysis Tools",
6
- "dependencies": {
7
- "@datagrok-libraries/math": "^1.1.7",
8
- "@datagrok-libraries/ml": "^6.6.7",
9
- "@datagrok-libraries/tutorials": "^1.3.6",
10
- "@datagrok-libraries/utils": "^4.1.44",
11
- "@keckelt/tsne": "^1.0.2",
12
- "@webgpu/types": "^0.1.40",
13
- "cash-dom": "^8.1.1",
14
- "datagrok-api": "^1.16.0",
15
- "dayjs": "^1.11.9",
16
- "jstat": "^1.9.6",
17
- "source-map-loader": "^4.0.1",
18
- "umap-js": "^1.3.3",
19
- "worker-loader": "latest"
20
- },
21
- "author": {
22
- "name": "Viktor Makarichev",
23
- "email": "vmakarichev@datagrok.ai"
24
- },
25
- "devDependencies": {
26
- "@typescript-eslint/eslint-plugin": "^5.32.0",
27
- "@typescript-eslint/parser": "^5.32.0",
28
- "eslint": "^8.21.0",
29
- "eslint-config-google": "^0.14.0",
30
- "ts-loader": "latest",
31
- "typescript": "latest",
32
- "webpack": "latest",
33
- "webpack-cli": "latest",
34
- "css-loader": "latest",
35
- "style-loader": "latest"
36
- },
37
- "scripts": {
38
- "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
39
- "debug-eda": "webpack && grok publish",
40
- "release-eda": "webpack && grok publish --release",
41
- "build-eda": "webpack",
42
- "build": "webpack",
43
- "debug-eda-dev": "webpack && grok publish dev",
44
- "release-eda-dev": "webpack && grok publish dev --release",
45
- "debug-eda-local": "webpack && grok publish local",
46
- "release-eda-local": "webpack && grok publish local --release",
47
- "build-all": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/tutorials run build && npm run build"
48
- },
49
- "canEdit": [
50
- "Developers"
51
- ],
52
- "canView": [
53
- "All users"
54
- ],
55
- "repository": {
56
- "type": "git",
57
- "url": "https://github.com/datagrok-ai/public.git",
58
- "directory": "packages/EDA"
59
- },
60
- "category": "Machine Learning",
61
- "sources": [
62
- "wasm/EDA.js"
63
- ],
64
- "meta": {
65
- "menu": {
66
- "ML": {
67
- "Tools": {
68
- "Impute Missing Values...": null,
69
- "Random Data...": null
70
- },
71
- "Cluster": {
72
- "Cluster...": null,
73
- "DBSCAN...": null
74
- },
75
- "Notebooks": {
76
- "Browse Notebooks": null,
77
- "Open in Notebook": null,
78
- "New Notebook": null
79
- },
80
- "Models": {
81
- "Browse Models": null,
82
- "Train Model...": null,
83
- "Apply Model...": null
84
- },
85
- "Analyse": {
86
- "PCA...": null,
87
- "ANOVA...": null,
88
- "Multivariate Analysis...": null
89
- },
90
- "Reduce Dimensionality": null
91
- }
2
+ "name": "@datagrok/eda",
3
+ "friendlyName": "EDA",
4
+ "version": "1.1.27",
5
+ "description": "Exploratory Data Analysis Tools",
6
+ "dependencies": {
7
+ "@datagrok-libraries/math": "^1.1.10",
8
+ "@datagrok-libraries/ml": "^6.6.11",
9
+ "@datagrok-libraries/tutorials": "^1.3.6",
10
+ "@datagrok-libraries/utils": "^4.1.44",
11
+ "@keckelt/tsne": "^1.0.2",
12
+ "@webgpu/types": "^0.1.40",
13
+ "cash-dom": "^8.1.1",
14
+ "datagrok-api": "^1.16.0",
15
+ "dayjs": "^1.11.9",
16
+ "jstat": "^1.9.6",
17
+ "source-map-loader": "^4.0.1",
18
+ "umap-js": "^1.3.3",
19
+ "worker-loader": "latest"
20
+ },
21
+ "author": {
22
+ "name": "Viktor Makarichev",
23
+ "email": "vmakarichev@datagrok.ai"
24
+ },
25
+ "devDependencies": {
26
+ "@typescript-eslint/eslint-plugin": "^5.32.0",
27
+ "@typescript-eslint/parser": "^5.32.0",
28
+ "eslint": "^8.21.0",
29
+ "eslint-config-google": "^0.14.0",
30
+ "ts-loader": "latest",
31
+ "typescript": "latest",
32
+ "webpack": "latest",
33
+ "webpack-cli": "latest",
34
+ "css-loader": "latest",
35
+ "style-loader": "latest"
36
+ },
37
+ "scripts": {
38
+ "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
39
+ "debug-eda": "webpack && grok publish",
40
+ "release-eda": "webpack && grok publish --release",
41
+ "build-eda": "webpack",
42
+ "build": "webpack",
43
+ "debug-eda-dev": "webpack && grok publish dev",
44
+ "release-eda-dev": "webpack && grok publish dev --release",
45
+ "debug-eda-local": "webpack && grok publish local",
46
+ "release-eda-local": "webpack && grok publish local --release",
47
+ "build-all": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/tutorials run build && npm run build"
48
+ },
49
+ "canEdit": [
50
+ "Developers"
51
+ ],
52
+ "canView": [
53
+ "All users"
54
+ ],
55
+ "repository": {
56
+ "type": "git",
57
+ "url": "https://github.com/datagrok-ai/public.git",
58
+ "directory": "packages/EDA"
59
+ },
60
+ "category": "Machine Learning",
61
+ "sources": [
62
+ "wasm/EDA.js"
63
+ ],
64
+ "meta": {
65
+ "menu": {
66
+ "ML": {
67
+ "Tools": {
68
+ "Impute Missing Values...": null,
69
+ "Random Data...": null
70
+ },
71
+ "Cluster": {
72
+ "Cluster...": null,
73
+ "DBSCAN...": null
74
+ },
75
+ "Notebooks": {
76
+ "Browse Notebooks": null,
77
+ "Open in Notebook": null,
78
+ "New Notebook": null
79
+ },
80
+ "Models": {
81
+ "Browse Models": null,
82
+ "Train Model...": null,
83
+ "Apply Model...": null
84
+ },
85
+ "Analyse": {
86
+ "PCA...": null,
87
+ "ANOVA...": null,
88
+ "Multivariate Analysis...": null
89
+ },
90
+ "Reduce Dimensionality": null
91
+ }
92
+ }
92
93
  }
93
- }
94
- }
94
+ }
@@ -1 +1 @@
1
- em++ -O3 ../wasm/pcaExport.cpp ../wasm/PCA/PCA.cpp ../wasm/plsExport.cpp ../wasm/PLS/PLS.cpp ../wasm/svmApi.cpp -o ../wasm/EDA.js -s TOTAL_MEMORY=268435456 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME="exportEDA" -s EXPORTED_FUNCTIONS=["_principalComponentAnalysis","_error","_partialLeastSquareRegression","_generateDataset","_normalizeDataset","_trainLSSVM","_predictByLSSVM","_trainAndAnalyzeLSSVM","_malloc","_free"] -s EXPORTED_RUNTIME_METHODS=["cwrap","ccall"] -sENVIRONMENT=web,worker
1
+ em++ -O3 ../wasm/pcaExport.cpp ../wasm/PCA/PCA.cpp ../wasm/plsExport.cpp ../wasm/PLS/PLS.cpp ../wasm/svmApi.cpp ../wasm/regression-api.cpp -o ../wasm/EDA.js -s TOTAL_MEMORY=268435456 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME="exportEDA" -s EXPORTED_FUNCTIONS=["_principalComponentAnalysis","_error","_partialLeastSquareRegression","_generateDataset","_normalizeDataset","_trainLSSVM","_predictByLSSVM","_trainAndAnalyzeLSSVM","_fitLinearRegressionParamsWithDataNormalizing","_fitLinearRegressionParams","_malloc","_free"] -s EXPORTED_RUNTIME_METHODS=["cwrap","ccall"] -sENVIRONMENT=web,worker
package/scripts/func.json CHANGED
@@ -1 +1 @@
1
- {"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "yLoadings": {"type": "newFloatColumn", "numOfRows": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings', 'yLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}}
1
+ {"pcaExport.cpp": {"principalComponentAnalysis": {"arguments": {"columns": {"type": "floatColumns"}, "componentsCount": {"type": "num"}, "centerNum": {"type": "num"}, "scaleNum": {"type": "num"}, "components": {"type": "newFloatColumns", "numOfRows": {"ref": "columns", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "tableFromColumns", "source": "components"}, "annotation": ["//name: principalComponentAnalysis", "//input: dataframe table", "//input: column_list columns", "//input: int componentsCount", "//input: int centerNum", "//input: int scaleNum", "//output: dataframe result "], "prototype": "principalComponentAnalysis(table, columns, componentsCount, centerNum, scaleNum)", "prototypeForWebWorker": "principalComponentAnalysisInWebWorker(table, columns, componentsCount, centerNum, scaleNum)", "callArgs": "[columns, componentsCount, centerNum, scaleNum]"}, "error": {"arguments": {"col1": {"type": "floatColumn"}, "col2": {"type": "floatColumn"}}, "output": {"type": "double", "source": "_callResult"}, "annotation": ["//name: error", "//input: dataframe df", "//input: column col1", "//input: column col2", "//output: double mad "], "prototype": "error(df, col1, col2)", "prototypeForWebWorker": "errorInWebWorker(df, col1, col2)", "callArgs": "[col1, col2]"}}, "PCA/PCA.cpp": {}, "plsExport.cpp": {"partialLeastSquareRegression": {"arguments": {"features": {"type": "floatColumns"}, "predict": {"type": "floatColumn"}, "componentsCount": {"type": "num"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "predict", "value": "numOfRows"}}, "regressionCoefficients": {"type": "newFloatColumn", "numOfRows": {"ref": "features", "value": "numOfColumns"}}, "tScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "uScores": {"type": "newFloatColumns", "numOfRows": {"ref": "predict", "value": "numOfRows"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "xLoadings": {"type": "newFloatColumns", "numOfRows": {"ref": "features", "value": "numOfColumns"}, "numOfColumns": {"ref": "componentsCount", "value": "data"}}, "yLoadings": {"type": "newFloatColumn", "numOfRows": {"ref": "componentsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['prediction', 'regressionCoefficients', 'tScores', 'uScores', 'xLoadings', 'yLoadings']"}, "annotation": ["//name: partialLeastSquareRegression", "//input: dataframe table", "//input: column_list features", "//input: column predict", "//input: int componentsCount"], "prototype": "partialLeastSquareRegression(table, features, predict, componentsCount)", "prototypeForWebWorker": "partialLeastSquareRegressionInWebWorker(table, features, predict, componentsCount)", "callArgs": "[features, predict, componentsCount]"}}, "PLS/PLS.cpp": {}, "svmApi.cpp": {"generateDataset": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "samplesCount": {"type": "num"}, "featuresCount": {"type": "num"}, "min": {"type": "num"}, "max": {"type": "num"}, "violatorsPercentage": {"type": "num"}, "dataset": {"type": "newFloatColumns", "numOfRows": {"ref": "samplesCount", "value": "data"}, "numOfColumns": {"ref": "featuresCount", "value": "data"}}, "labels": {"type": "newFloatColumn", "numOfRows": {"ref": "samplesCount", "value": "data"}}}, "output": {"type": "objects", "source": "['dataset', 'labels']"}, "annotation": ["//name: generateDataset", "//input: int kernel", "//input: column kernelParams", "//input: int samplesCount", "//input: int featuresCount", "//input: double min", "//input: double max", "//input: double violatorsPercentage"], "prototype": "generateDataset(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "prototypeForWebWorker": "generateDatasetInWebWorker(kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage)", "callArgs": "[kernel, kernelParams, samplesCount, featuresCount, min, max, violatorsPercentage]"}, "normalizeDataset": {"arguments": {"data": {"type": "floatColumns"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "data", "value": "numOfColumns"}, "numOfColumns": {"ref": "data", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "data", "value": "numOfColumns"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs']"}, "annotation": ["//name: normalizeDataset", "//input: column_list data"], "prototype": "normalizeDataset(data)", "prototypeForWebWorker": "normalizeDatasetInWebWorker(data)", "callArgs": "[data]"}, "trainLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights']"}, "annotation": ["//name: trainLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "prototypeForWebWorker": "trainLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, dataset, labels]"}, "predictByLSSVM": {"arguments": {"kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "normalizedData": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "means": {"type": "floatColumn"}, "stdDevs": {"type": "floatColumn"}, "modelParams": {"type": "floatColumn"}, "precomputedWeights": {"type": "floatColumn"}, "targetData": {"type": "floatColumns"}, "prediction": {"type": "newFloatColumn", "numOfRows": {"ref": "targetData", "value": "numOfRows"}}}, "output": {"type": "column", "source": "prediction"}, "annotation": ["//name: predictByLSSVM", "//input: int kernel", "//input: column kernelParams", "//input: column_list normalizedData", "//input: column labels", "//input: column means", "//input: column stdDevs", "//input: column modelParams", "//input: column precomputedWeights", "//input: column_list targetData", "//output: column prediction"], "prototype": "predictByLSSVM(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "prototypeForWebWorker": "predictByLSSVMInWebWorker(kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData)", "callArgs": "[kernel, kernelParams, normalizedData, labels, means, stdDevs, modelParams, precomputedWeights, targetData]"}, "trainAndAnalyzeLSSVM": {"arguments": {"gamma": {"type": "num"}, "kernel": {"type": "num"}, "kernelParams": {"type": "floatColumn"}, "modelParamsCount": {"type": "num"}, "precomputedWeightsCount": {"type": "num"}, "confusionMatrixElementsCount": {"type": "num"}, "dataset": {"type": "floatColumns"}, "labels": {"type": "floatColumn"}, "normalizedData": {"type": "newFloatColumns", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}, "numOfColumns": {"ref": "dataset", "value": "numOfRows"}}, "means": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "stdDevs": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfColumns"}}, "modelParams": {"type": "newFloatColumn", "numOfRows": {"ref": "modelParamsCount", "value": "data"}}, "precomputedWeights": {"type": "newFloatColumn", "numOfRows": {"ref": "precomputedWeightsCount", "value": "data"}}, "predictedLabels": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "correctness": {"type": "newFloatColumn", "numOfRows": {"ref": "dataset", "value": "numOfRows"}}, "consfusionMatrix": {"type": "newIntColumn", "numOfRows": {"ref": "confusionMatrixElementsCount", "value": "data"}}}, "output": {"type": "objects", "source": "['normalizedData', 'means', 'stdDevs', 'modelParams', 'precomputedWeights', 'predictedLabels', 'correctness', 'consfusionMatrix']"}, "annotation": ["//name: trainAndAnalyzeLSSVM", "//input: double gamma", "//input: int kernel", "//input: column kernelParams", "//input: int modelParamsCount", "//input: int precomputedWeightsCount", "//input: int confusionMatrixElementsCount", "//input: column_list dataset", "//input: column labels"], "prototype": "trainAndAnalyzeLSSVM(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "prototypeForWebWorker": "trainAndAnalyzeLSSVMInWebWorker(gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels)", "callArgs": "[gamma, kernel, kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount, dataset, labels]"}}, "regression-api.cpp": {"fitLinearRegressionParamsWithDataNormalizing": {"arguments": {"features": {"type": "floatColumns"}, "featureAvgs": {"type": "floatColumn"}, "featureStdDevs": {"type": "floatColumn"}, "targets": {"type": "floatColumn"}, "targetsAvg": {"type": "num"}, "targetsStdDev": {"type": "num"}, "paramsCount": {"type": "num"}, "params": {"type": "newFloatColumn", "numOfRows": {"ref": "paramsCount", "value": "data"}}}, "output": {"type": "column", "source": "params"}, "annotation": ["//name: fitLinearRegressionParamsWithDataNormalizing", "//input: column_list features", "//input: column featureAvgs", "//input: column featureStdDevs", "//input: column targets", "//input: double targetsAvg", "//input: double targetsStdDev", "//input: int paramsCount", "//output: column params"], "prototype": "fitLinearRegressionParamsWithDataNormalizing(features, featureAvgs, featureStdDevs, targets, targetsAvg, targetsStdDev, paramsCount)", "prototypeForWebWorker": "fitLinearRegressionParamsWithDataNormalizingInWebWorker(features, featureAvgs, featureStdDevs, targets, targetsAvg, targetsStdDev, paramsCount)", "callArgs": "[features, featureAvgs, featureStdDevs, targets, targetsAvg, targetsStdDev, paramsCount]"}, "fitLinearRegressionParams": {"arguments": {"features": {"type": "floatColumns"}, "targets": {"type": "floatColumn"}, "paramsCount": {"type": "num"}, "params": {"type": "newFloatColumn", "numOfRows": {"ref": "paramsCount", "value": "data"}}}, "output": {"type": "column", "source": "params"}, "annotation": ["//name: fitLinearRegressionParams", "//input: column_list features", "//input: column targets", "//input: int paramsCount", "//output: column params"], "prototype": "fitLinearRegressionParams(features, targets, paramsCount)", "prototypeForWebWorker": "fitLinearRegressionParamsInWebWorker(features, targets, paramsCount)", "callArgs": "[features, targets, paramsCount]"}}}
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "EDA",
3
3
  "folder": "../wasm",
4
- "source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp"],
4
+ "source": ["pcaExport.cpp", "PCA/PCA.cpp", "plsExport.cpp", "PLS/PLS.cpp", "svmApi.cpp", "regression-api.cpp"],
5
5
  "optimizationMode": "-O3",
6
6
  "packageFile": "../src/package.ts",
7
7
  "packageJsonFile": "../package.json",
package/src/package.ts CHANGED
@@ -10,7 +10,7 @@ import {computePCA} from './eda-tools';
10
10
  import {addPrefixToEachColumnName, addOneWayAnovaVizualization} from './eda-ui';
11
11
  import {testDataForBinaryClassification} from './data-generators';
12
12
  import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
13
- getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
13
+ getTrainedModel, getPrediction, isApplicableSVM, showTrainReport, getPackedModel} from './svm';
14
14
 
15
15
  import {PLS_ANALYSIS} from './pls/pls-constants';
16
16
  import {runMVA, runDemoMVA, getPlsAnalysis, PlsOutput} from './pls/pls-tools';
@@ -29,6 +29,9 @@ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimen
29
29
  import {runKNNImputer} from './missing-values-imputation/ui';
30
30
  import {MCLEditor} from '@datagrok-libraries/ml/src/MCL/mcl-editor';
31
31
  import {markovCluster} from '@datagrok-libraries/ml/src/MCL/clustering-view';
32
+ import {MCL_OPTIONS_TAG, MCLSerializableOptions} from '@datagrok-libraries/ml/src/MCL';
33
+
34
+ import {getLinearRegressionParams, getPredictionByLinearRegression, getTestDatasetForLinearRegression} from './regression';
32
35
 
33
36
  export const _package = new DG.Package();
34
37
 
@@ -189,7 +192,7 @@ export function GetMCLEditor(call: DG.FuncCall): void {
189
192
  df: params.table, cols: params.columns, metrics: params.distanceMetrics,
190
193
  weights: params.weights, aggregationMethod: params.aggreaggregationMethod, preprocessingFuncs: params.preprocessingFunctions,
191
194
  preprocessingFuncArgs: params.preprocessingFuncArgs, threshold: params.threshold, maxIterations: params.maxIterations,
192
- useWebGPU: params.useWebGPU,
195
+ useWebGPU: params.useWebGPU, inflate: params.inflateFactor,
193
196
  }).call(true);
194
197
  }).show();
195
198
  } catch (err: any) {
@@ -214,13 +217,46 @@ export function GetMCLEditor(call: DG.FuncCall): void {
214
217
  //input: int threshold = 80
215
218
  //input: int maxIterations = 10
216
219
  //input: bool useWebGPU = false
220
+ //input: double inflate = 2
217
221
  //editor: EDA: GetMCLEditor
218
222
  export async function MCL(df: DG.DataFrame, cols: DG.Column[], metrics: KnownMetrics[],
219
223
  weights: number[], aggregationMethod: DistanceAggregationMethod, preprocessingFuncs: (DG.Func | null | undefined)[],
220
- preprocessingFuncArgs: any[], threshold: number = 80, maxIterations: number = 10, useWebGPU: boolean = false,
224
+ preprocessingFuncArgs: any[], threshold: number = 80, maxIterations: number = 10, useWebGPU: boolean = false, inflate: number = 0,
221
225
  ): Promise< DG.ScatterPlotViewer | undefined> {
222
- const res = (await markovCluster(df, cols, metrics, weights,
223
- aggregationMethod, preprocessingFuncs, preprocessingFuncArgs, threshold, maxIterations, useWebGPU));
226
+ const tv = grok.shell.tableView(df.name) ?? grok.shell.addTableView(df);
227
+ const serializedOptions: string = JSON.stringify({
228
+ cols: cols.map((col) => col.name),
229
+ metrics: metrics,
230
+ weights: weights,
231
+ aggregationMethod: aggregationMethod,
232
+ preprocessingFuncs: preprocessingFuncs.map((func) => func?.name ?? null),
233
+ preprocessingFuncArgs: preprocessingFuncArgs,
234
+ threshold: threshold,
235
+ maxIterations: maxIterations,
236
+ useWebGPU: useWebGPU,
237
+ inflate: inflate,
238
+ } satisfies MCLSerializableOptions);
239
+ df.setTag(MCL_OPTIONS_TAG, serializedOptions);
240
+
241
+ const sc = tv.addViewer(DG.VIEWER.SCATTER_PLOT, {title: 'MCL', initializationFunction: 'EDA:MCLInitializationFunction'}) as DG.ScatterPlotViewer;
242
+ return sc;
243
+ }
244
+
245
+ //name: MCLInitializationFunction
246
+ //input: viewer sc
247
+ export async function MCLInitializationFunction(sc: DG.ScatterPlotViewer) {
248
+ const df = sc.dataFrame;
249
+ if (df === null)
250
+ throw new Error('Data frame of the scatter plot is null');
251
+ const mclTag = df.getTag(MCL_OPTIONS_TAG);
252
+ if (!mclTag)
253
+ throw new Error('MCL options tag on the dataFrame is not found');
254
+ const options: MCLSerializableOptions = JSON.parse(mclTag);
255
+ const cols = options.cols.map((colName) => df.columns.byName(colName));
256
+ const preprocessingFuncs = options.preprocessingFuncs.map((funcName) => funcName ? DG.Func.byName(funcName) : null);
257
+ const res = await markovCluster(df, cols, options.metrics, options.weights,
258
+ options.aggregationMethod, preprocessingFuncs, options.preprocessingFuncArgs, options.threshold,
259
+ options.maxIterations, options.useWebGPU, options.inflate, sc);
224
260
  return res?.sc;
225
261
  }
226
262
 
@@ -300,15 +336,10 @@ export async function testDataLinearNonSeparable(name: string, sigma: number, sa
300
336
  //input: dataframe df
301
337
  //input: string predict_column
302
338
  //input: double gamma = 1.0 {category: Hyperparameters}
303
- //input: bool toShowReport = false {caption: to show report; category: Report}
304
339
  //output: dynamic model
305
340
  export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
306
- gamma: number, toShowReport: boolean): Promise<any> {
341
+ gamma: number): Promise<any> {
307
342
  const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
308
-
309
- if (toShowReport)
310
- showTrainReport(df, trainedModel);
311
-
312
343
  return getPackedModel(trainedModel);
313
344
  }
314
345
 
@@ -322,6 +353,29 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
322
353
  return await getPrediction(df, model);
323
354
  }
324
355
 
356
+ //name: isApplicableLinearKernelSVM
357
+ //meta.mlname: linear kernel LS-SVM
358
+ //meta.mlrole: isApplicable
359
+ //input: dataframe df
360
+ //input: string predict_column
361
+ //output: bool result
362
+ export async function isApplicableLinearKernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
363
+ return isApplicableSVM(df, predict_column);
364
+ }
365
+
366
+ //name: visualizeLinearKernelSVM
367
+ //meta.mlname: linear kernel LS-SVM
368
+ //meta.mlrole: visualize
369
+ //input: dataframe df
370
+ //input: string target_column
371
+ //input: string predict_column
372
+ //input: dynamic model
373
+ //output: dynamic widget
374
+ export async function visualizeLinearKernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
375
+ return showTrainReport(df, model);
376
+ }
377
+
378
+
325
379
  //name: trainRBFkernelSVM
326
380
  //meta.mlname: RBF-kernel LS-SVM
327
381
  //meta.mlrole: train
@@ -329,17 +383,13 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
329
383
  //input: string predict_column
330
384
  //input: double gamma = 1.0 {category: Hyperparameters}
331
385
  //input: double sigma = 1.5 {category: Hyperparameters}
332
- //input: bool toShowReport = false {caption: to show report; category: Report}
333
386
  //output: dynamic model
334
387
  export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
335
- gamma: number, sigma: number, toShowReport: boolean): Promise<any> {
388
+ gamma: number, sigma: number): Promise<any> {
336
389
  const trainedModel = await getTrainedModel(
337
390
  {gamma: gamma, kernel: RBF, sigma: sigma},
338
391
  df, predict_column);
339
392
 
340
- if (toShowReport)
341
- showTrainReport(df, trainedModel);
342
-
343
393
  return getPackedModel(trainedModel);
344
394
  }
345
395
 
@@ -353,6 +403,28 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
353
403
  return await getPrediction(df, model);
354
404
  }
355
405
 
406
+ //name: isApplicableRBFkernelSVM
407
+ //meta.mlname: RBF-kernel LS-SVM
408
+ //meta.mlrole: isApplicable
409
+ //input: dataframe df
410
+ //input: string predict_column
411
+ //output: bool result
412
+ export async function isApplicableRBFkernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
413
+ return isApplicableSVM(df, predict_column);
414
+ }
415
+
416
+ //name: visualizeRBFkernelSVM
417
+ //meta.mlname: RBF-kernel LS-SVM
418
+ //meta.mlrole: visualize
419
+ //input: dataframe df
420
+ //input: string target_column
421
+ //input: string predict_column
422
+ //input: dynamic model
423
+ //output: dynamic widget
424
+ export async function visualizeRBFkernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
425
+ return showTrainReport(df, model);
426
+ }
427
+
356
428
  //name: trainPolynomialKernelSVM
357
429
  //meta.mlname: polynomial kernel LS-SVM
358
430
  //meta.mlrole: train
@@ -361,17 +433,13 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
361
433
  //input: double gamma = 1.0 {category: Hyperparameters}
362
434
  //input: double c = 1 {category: Hyperparameters}
363
435
  //input: double d = 2 {category: Hyperparameters}
364
- //input: bool toShowReport = false {caption: to show report; category: Report}
365
436
  //output: dynamic model
366
437
  export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
367
- gamma: number, c: number, d: number, toShowReport: boolean): Promise<any> {
438
+ gamma: number, c: number, d: number): Promise<any> {
368
439
  const trainedModel = await getTrainedModel(
369
440
  {gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
370
441
  df, predict_column);
371
442
 
372
- if (toShowReport)
373
- showTrainReport(df, trainedModel);
374
-
375
443
  return getPackedModel(trainedModel);
376
444
  } // trainPolynomialKernelSVM
377
445
 
@@ -385,6 +453,28 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
385
453
  return await getPrediction(df, model);
386
454
  }
387
455
 
456
+ //name: isApplicablePolynomialKernelSVM
457
+ //meta.mlname: polynomial kernel LS-SVM
458
+ //meta.mlrole: isApplicable
459
+ //input: dataframe df
460
+ //input: string predict_column
461
+ //output: bool result
462
+ export async function isApplicablePolynomialKernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
463
+ return isApplicableSVM(df, predict_column);
464
+ }
465
+
466
+ //name: visualizePolynomialKernelSVM
467
+ //meta.mlname: polynomial kernel LS-SVM
468
+ //meta.mlrole: visualize
469
+ //input: dataframe df
470
+ //input: string target_column
471
+ //input: string predict_column
472
+ //input: dynamic model
473
+ //output: dynamic widget
474
+ export async function visualizePolynomialKernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
475
+ return showTrainReport(df, model);
476
+ }
477
+
388
478
  //name: trainSigmoidKernelSVM
389
479
  //meta.mlname: sigmoid kernel LS-SVM
390
480
  //meta.mlrole: train
@@ -393,17 +483,13 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
393
483
  //input: double gamma = 1.0 {category: Hyperparameters}
394
484
  //input: double kappa = 1 {category: Hyperparameters}
395
485
  //input: double theta = 1 {category: Hyperparameters}
396
- //input: bool toShowReport = false {caption: to show report; category: Report}
397
486
  //output: dynamic model
398
487
  export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
399
- gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any> {
488
+ gamma: number, kappa: number, theta: number): Promise<any> {
400
489
  const trainedModel = await getTrainedModel(
401
490
  {gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
402
491
  df, predict_column);
403
492
 
404
- if (toShowReport)
405
- showTrainReport(df, trainedModel);
406
-
407
493
  return getPackedModel(trainedModel);
408
494
  } // trainSigmoidKernelSVM
409
495
 
@@ -417,6 +503,28 @@ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promi
417
503
  return await getPrediction(df, model);
418
504
  }
419
505
 
506
+ //name: isApplicableSigmoidKernelSVM
507
+ //meta.mlname: sigmoid kernel LS-SVM
508
+ //meta.mlrole: isApplicable
509
+ //input: dataframe df
510
+ //input: string predict_column
511
+ //output: bool result
512
+ export async function isApplicableSigmoidKernelSVM(df: DG.DataFrame, predict_column: string): Promise<boolean> {
513
+ return isApplicableSVM(df, predict_column);
514
+ }
515
+
516
+ //name: visualizeSigmoidKernelSVM
517
+ //meta.mlname: sigmoid kernel LS-SVM
518
+ //meta.mlrole: visualize
519
+ //input: dataframe df
520
+ //input: string target_column
521
+ //input: string predict_column
522
+ //input: dynamic model
523
+ //output: dynamic widget
524
+ export async function visualizeSigmoidKernelSVM(df: DG.DataFrame, target_column: string, predict_column: string, model: any): Promise<any> {
525
+ return showTrainReport(df, model);
526
+ }
527
+
420
528
  //top-menu: ML | Analyze | ANOVA...
421
529
  //name: ANOVA
422
530
  //description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the studied feature.
@@ -436,3 +544,87 @@ export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column
436
544
  export function kNNImputation() {
437
545
  runKNNImputer();
438
546
  }
547
+
548
+ //name: linearRegression
549
+ //description: Linear Regression demo
550
+ //input: dataframe table
551
+ //input: column_list features {type: numerical}
552
+ //input: column target {type: numerical}
553
+ //input: bool plot = true {caption: plot}
554
+ export async function linearRegression(table: DG.DataFrame, features: DG.ColumnList, target: DG.Column, plot: boolean): Promise<void> {
555
+ const t1 = performance.now();
556
+ const params = await getLinearRegressionParams(features, target);
557
+ const t2 = performance.now();
558
+ console.log(`Fit: ${t2 - t1} ms.`);
559
+ const prediction = getPredictionByLinearRegression(features, params);
560
+ console.log(`Predict: ${performance.now() - t2} ms.`);
561
+
562
+ prediction.name = table.columns.getUnusedName(prediction.name);
563
+
564
+ table.columns.add(prediction);
565
+
566
+ if (plot) {
567
+ const view = grok.shell.tableView(table.name);
568
+ view.addViewer(DG.VIEWER.SCATTER_PLOT, {
569
+ xColumnName: target.name,
570
+ yColumnName: prediction.name,
571
+ showRegressionLine: true,
572
+ });
573
+ }
574
+ }
575
+
576
+ //name: generateDatasetForLinearRegressionTest
577
+ //description: Create demo dataset for linear regression
578
+ //input: int rowCount = 10000 {min: 1000; max: 10000000; step: 10000}
579
+ //input: int colCount = 10 {min: 1; max: 1000; step: 10}
580
+ //input: double featuresScale = 10 {min: -1000; max: 1000; step: 10}
581
+ //input: double featuresBias = 10 {min: -1000; max: 1000; step: 10}
582
+ //input: double paramsScale = 10 {min: -1000; max: 1000; step: 10}
583
+ //input: double paramsBias = 10 {min: -1000; max: 1000; step: 10}
584
+ //output: dataframe table
585
+ export function generateDatasetForLinearRegressionTest(rowCount: number, colCount: number,
586
+ featuresScale: number, featuresBias: number, paramsScale: number, paramsBias: number): DG.DataFrame {
587
+ return getTestDatasetForLinearRegression(rowCount, colCount, featuresScale, featuresBias, paramsScale, paramsBias);
588
+ }
589
+
590
+ //name: trainLinearRegression
591
+ //meta.mlname: Linear Regression
592
+ //meta.mlrole: train
593
+ //input: dataframe df
594
+ //input: string predict_column
595
+ //output: dynamic model
596
+ export async function trainLinearRegression(df: DG.DataFrame, predict_column: string): Promise<Uint8Array> {
597
+ const features = df.columns;
598
+ const target = features.byName(predict_column);
599
+ features.remove(predict_column);
600
+
601
+ const params = await getLinearRegressionParams(features, target);
602
+
603
+ return new Uint8Array(params.buffer);
604
+ }
605
+
606
+ //name: applyLinearRegression
607
+ //meta.mlname: Linear Regression
608
+ //meta.mlrole: apply
609
+ //input: dataframe df
610
+ //input: dynamic model
611
+ //output: dataframe table
612
+ export function applyLinearRegression(df: DG.DataFrame, model: any): DG.DataFrame {
613
+ const features = df.columns;
614
+ const params = new Float32Array((model as Uint8Array).buffer);
615
+ return DG.DataFrame.fromColumns([getPredictionByLinearRegression(features, params)]);
616
+ }
617
+
618
+ //name: isApplicableLinearRegression
619
+ //meta.mlname: Linear Regression
620
+ //meta.mlrole: isApplicable
621
+ //input: dataframe df
622
+ //input: string predict_column
623
+ //output: bool result
624
+ export function isApplicableLinearRegression(df: DG.DataFrame, predict_column: string): boolean {
625
+ for (const col of df.columns) {
626
+ if ((col.type !== DG.COLUMN_TYPE.INT) && (col.type !== DG.COLUMN_TYPE.FLOAT) && (col.type !== DG.COLUMN_TYPE.QNUM) && (col.type !== DG.COLUMN_TYPE.BIG_INT))
627
+ return false;
628
+ }
629
+ return true;
630
+ }