@datagrok/eda 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/_d4c0.js +279 -0
- package/dist/_d4c0.js.map +1 -0
- package/dist/node_modules_datagrok-libraries_math_src_dbscan_wasm_clustering-worker_js.js +279 -0
- package/dist/node_modules_datagrok-libraries_math_src_dbscan_wasm_clustering-worker_js.js.map +1 -0
- package/dist/node_modules_datagrok-libraries_ml_src_MCL_mcl-sparse-matrix-mult-worker_js.js +59 -0
- package/dist/node_modules_datagrok-libraries_ml_src_MCL_mcl-sparse-matrix-mult-worker_js.js.map +1 -0
- package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_distance-matrix-worker_js.js +284 -0
- package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_distance-matrix-worker_js.js.map +1 -0
- package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_single-value-knn-worker_js.js +265 -0
- package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_single-value-knn-worker_js.js.map +1 -0
- package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-worker_js.js +287 -0
- package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-worker_js.js.map +1 -0
- package/dist/package-test.js +26140 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +30337 -1
- package/dist/package.js.map +1 -1
- package/dist/src_workers_softmax-worker_ts.js +154 -0
- package/dist/src_workers_softmax-worker_ts.js.map +1 -0
- package/dist/src_workers_tsne-worker_ts.js +244 -0
- package/dist/src_workers_tsne-worker_ts.js.map +1 -0
- package/dist/src_workers_umap-worker_ts.js +252 -0
- package/dist/src_workers_umap-worker_ts.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_math_src_dbscan_wasm_dbscan_js.js +1253 -0
- package/dist/vendors-node_modules_datagrok-libraries_math_src_dbscan_wasm_dbscan_js.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_math_src_hierarchical-clustering_wasm_clustering-worker_js.js +942 -0
- package/dist/vendors-node_modules_datagrok-libraries_math_src_hierarchical-clustering_wasm_clustering-worker_js.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_math_src_webGPU_sparse-matrix_webGPU-sparse-matrix_js-07693f.js +1525 -0
- package/dist/vendors-node_modules_datagrok-libraries_math_src_webGPU_sparse-matrix_webGPU-sparse-matrix_js-07693f.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_MCL_mcl-worker_js-node_modules_datagrok-librar-e4203d.js +2244 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_MCL_mcl-worker_js-node_modules_datagrok-librar-e4203d.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-threshold-worker_js.js +286 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-threshold-worker_js.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-worker_js.js +280 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-worker_js.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-threshold-worker_js.js +282 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-threshold-worker_js.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_utils_js-node_modules_datagrok-72c7b2.js +1821 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_utils_js-node_modules_datagrok-72c7b2.js.map +1 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_multi-column-dimensionality-reduction_mulit-co-3800a0.js +7776 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_multi-column-dimensionality-reduction_mulit-co-3800a0.js.map +1 -0
- package/dist/vendors-node_modules_keckelt_tsne_lib_index_js.js +379 -0
- package/dist/vendors-node_modules_keckelt_tsne_lib_index_js.js.map +1 -0
- package/dist/vendors-node_modules_ml-matrix_matrix_mjs.js +5946 -0
- package/dist/vendors-node_modules_ml-matrix_matrix_mjs.js.map +1 -0
- package/dist/vendors-node_modules_umap-js_dist_index_js.js +2284 -0
- package/dist/vendors-node_modules_umap-js_dist_index_js.js.map +1 -0
- package/dist/wasm_EDAForWebWorker_js-wasm_callWasmForWebWorker_js.js +779 -0
- package/dist/wasm_EDAForWebWorker_js-wasm_callWasmForWebWorker_js.js.map +1 -0
- package/dist/wasm_workers_errorWorker_js.js +267 -0
- package/dist/wasm_workers_errorWorker_js.js.map +1 -0
- package/dist/wasm_workers_fitLinearRegressionParamsWithDataNormalizingWorker_js.js +267 -0
- package/dist/wasm_workers_fitLinearRegressionParamsWithDataNormalizingWorker_js.js.map +1 -0
- package/dist/wasm_workers_fitLinearRegressionParamsWorker_js.js +267 -0
- package/dist/wasm_workers_fitLinearRegressionParamsWorker_js.js.map +1 -0
- package/dist/wasm_workers_fitSoftmaxWorker_js.js +267 -0
- package/dist/wasm_workers_fitSoftmaxWorker_js.js.map +1 -0
- package/dist/wasm_workers_generateDatasetWorker_js.js +267 -0
- package/dist/wasm_workers_generateDatasetWorker_js.js.map +1 -0
- package/dist/wasm_workers_normalizeDatasetWorker_js.js +267 -0
- package/dist/wasm_workers_normalizeDatasetWorker_js.js.map +1 -0
- package/dist/wasm_workers_partialLeastSquareRegressionWorker_js.js +267 -0
- package/dist/wasm_workers_partialLeastSquareRegressionWorker_js.js.map +1 -0
- package/dist/wasm_workers_predictByLSSVMWorker_js.js +267 -0
- package/dist/wasm_workers_predictByLSSVMWorker_js.js.map +1 -0
- package/dist/wasm_workers_principalComponentAnalysisNipalsWorker_js.js +267 -0
- package/dist/wasm_workers_principalComponentAnalysisNipalsWorker_js.js.map +1 -0
- package/dist/wasm_workers_principalComponentAnalysisWorkerUpd_js.js +271 -0
- package/dist/wasm_workers_principalComponentAnalysisWorkerUpd_js.js.map +1 -0
- package/dist/wasm_workers_trainAndAnalyzeLSSVMWorker_js.js +267 -0
- package/dist/wasm_workers_trainAndAnalyzeLSSVMWorker_js.js.map +1 -0
- package/dist/wasm_workers_trainLSSVMWorker_js.js +267 -0
- package/dist/wasm_workers_trainLSSVMWorker_js.js.map +1 -0
- package/dist/wasm_workers_xgboostWorker_js.js +279 -0
- package/dist/wasm_workers_xgboostWorker_js.js.map +1 -0
- package/package.json +5 -4
- package/src/package-api.ts +259 -0
- package/src/package.g.ts +522 -0
- package/src/package.ts +907 -678
- package/test-console-output-1.log +78 -93
- package/test-record-1.mp4 +0 -0
- package/tsconfig.json +2 -2
- package/webpack.config.js +5 -0
- package/dist/111.js +0 -2
- package/dist/111.js.map +0 -1
- package/dist/128.js +0 -2
- package/dist/128.js.map +0 -1
- package/dist/153.js +0 -2
- package/dist/153.js.map +0 -1
- package/dist/23.js +0 -2
- package/dist/23.js.map +0 -1
- package/dist/234.js +0 -2
- package/dist/234.js.map +0 -1
- package/dist/242.js +0 -2
- package/dist/242.js.map +0 -1
- package/dist/260.js +0 -2
- package/dist/260.js.map +0 -1
- package/dist/33.js +0 -2
- package/dist/33.js.map +0 -1
- package/dist/348.js +0 -2
- package/dist/348.js.map +0 -1
- package/dist/377.js +0 -2
- package/dist/377.js.map +0 -1
- package/dist/412.js +0 -2
- package/dist/412.js.map +0 -1
- package/dist/415.js +0 -2
- package/dist/415.js.map +0 -1
- package/dist/501.js +0 -2
- package/dist/501.js.map +0 -1
- package/dist/531.js +0 -2
- package/dist/531.js.map +0 -1
- package/dist/583.js +0 -2
- package/dist/583.js.map +0 -1
- package/dist/589.js +0 -2
- package/dist/589.js.map +0 -1
- package/dist/603.js +0 -2
- package/dist/603.js.map +0 -1
- package/dist/656.js +0 -2
- package/dist/656.js.map +0 -1
- package/dist/682.js +0 -2
- package/dist/682.js.map +0 -1
- package/dist/705.js +0 -2
- package/dist/705.js.map +0 -1
- package/dist/727.js +0 -2
- package/dist/727.js.map +0 -1
- package/dist/731.js +0 -2
- package/dist/731.js.map +0 -1
- package/dist/738.js +0 -3
- package/dist/738.js.LICENSE.txt +0 -51
- package/dist/738.js.map +0 -1
- package/dist/763.js +0 -2
- package/dist/763.js.map +0 -1
- package/dist/778.js +0 -2
- package/dist/778.js.map +0 -1
- package/dist/783.js +0 -2
- package/dist/783.js.map +0 -1
- package/dist/793.js +0 -2
- package/dist/793.js.map +0 -1
- package/dist/801.js +0 -2
- package/dist/801.js.map +0 -1
- package/dist/810.js +0 -2
- package/dist/810.js.map +0 -1
- package/dist/860.js +0 -2
- package/dist/860.js.map +0 -1
- package/dist/907.js +0 -2
- package/dist/907.js.map +0 -1
- package/dist/950.js +0 -2
- package/dist/950.js.map +0 -1
- package/dist/980.js +0 -2
- package/dist/980.js.map +0 -1
- package/dist/990.js +0 -2
- package/dist/990.js.map +0 -1
package/src/package.ts
CHANGED
|
@@ -38,750 +38,979 @@ import {initXgboost} from '../wasm/xgbooster';
|
|
|
38
38
|
import {XGBooster} from './xgbooster';
|
|
39
39
|
|
|
40
40
|
export const _package = new DG.Package();
|
|
41
|
+
export * from './package.g';
|
|
41
42
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
}
|
|
43
|
+
export class PackageFunctions {
|
|
44
|
+
@grok.decorators.func({
|
|
45
|
+
'name': 'info'
|
|
46
|
+
})
|
|
47
|
+
static info() {
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
await _initEDAAPI();
|
|
50
|
-
await initXgboost();
|
|
51
|
-
}
|
|
49
|
+
grok.shell.info(_package.webRoot);
|
|
50
|
+
}
|
|
52
51
|
|
|
53
|
-
//top-menu: ML | Cluster | DBSCAN...
|
|
54
|
-
//name: DBSCAN
|
|
55
|
-
//description: Density-based spatial clustering of applications with noise (DBSCAN)
|
|
56
|
-
//input: dataframe df
|
|
57
|
-
//input: column xCol {type: numerical}
|
|
58
|
-
//input: column yCol {type: numerical}
|
|
59
|
-
//input: double epsilon = 0.02 {caption: Epsilon} [The maximum distance between two samples for them to be considered as in the same neighborhood.]
|
|
60
|
-
//input: int minPts = 4 {caption: Minimum points} [The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.]
|
|
61
|
-
//output: column cluster
|
|
62
|
-
export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column, epsilon: number, minPts: number) {
|
|
63
|
-
const x = xCol.getRawData() as Float32Array;
|
|
64
|
-
const y = yCol.getRawData() as Float32Array;
|
|
65
|
-
const res = await getDbscanWorker(x, y, epsilon, minPts);
|
|
66
|
-
const clusterColName = df.columns.getUnusedName('Cluster (DBSCAN)');
|
|
67
|
-
const cluster = DG.Column.fromInt32Array(clusterColName, res);
|
|
68
|
-
df.columns.add(cluster);
|
|
69
|
-
return cluster;
|
|
70
|
-
}
|
|
71
52
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
53
|
+
@grok.decorators.init({})
|
|
54
|
+
static async init(): Promise<void> {
|
|
55
|
+
|
|
56
|
+
await _initEDAAPI();
|
|
57
|
+
await initXgboost();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@grok.decorators.func({
|
|
62
|
+
'top-menu': 'ML | Cluster | DBSCAN...',
|
|
63
|
+
'name': 'DBSCAN',
|
|
64
|
+
'description': 'Density-based spatial clustering of applications with noise (DBSCAN)'
|
|
65
|
+
})
|
|
66
|
+
static async dbScan(
|
|
67
|
+
df: DG.DataFrame,
|
|
68
|
+
@grok.decorators.param({'options':{'type':'numerical'}}) xCol: DG.Column,
|
|
69
|
+
@grok.decorators.param({'options':{'type':'numerical'}}) yCol: DG.Column,
|
|
70
|
+
@grok.decorators.param({'options':{'caption':'Epsilon','initialValue':'0.02', description: 'The maximum distance between two samples for them to be considered as in the same neighborhood.'}}) epsilon: number,
|
|
71
|
+
@grok.decorators.param({'type':'int','options':{'caption':'Minimum points','initialValue':'4', description: 'The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.'}}) minPts: number) : Promise<DG.Column> {
|
|
72
|
+
|
|
73
|
+
const x = xCol.getRawData() as Float32Array;
|
|
74
|
+
const y = yCol.getRawData() as Float32Array;
|
|
75
|
+
const res = await getDbscanWorker(x, y, epsilon, minPts);
|
|
76
|
+
const clusterColName = df.columns.getUnusedName('Cluster (DBSCAN)');
|
|
77
|
+
const cluster = DG.Column.fromInt32Array(clusterColName, res);
|
|
78
|
+
df.columns.add(cluster);
|
|
79
|
+
return cluster;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
@grok.decorators.func({
|
|
83
|
+
'top-menu': 'ML | Analyze | PCA...',
|
|
84
|
+
'description': 'Principal component analysis (PCA)',
|
|
85
|
+
helpUrl: '/help/explore/dim-reduction#pca',
|
|
86
|
+
})
|
|
87
|
+
static async PCA(
|
|
88
|
+
@grok.decorators.param({'type':'dataframe','options':{'caption':'Table'}}) table: DG.DataFrame,
|
|
89
|
+
@grok.decorators.param({'type':'column_list','options':{'type':'numerical', 'nullable': false}}) features: DG.ColumnList,
|
|
90
|
+
//@ts-ignore
|
|
91
|
+
@grok.decorators.param({'type':'int','options':{'showPlusMinus': true, 'caption':'Components', 'nullable':false, 'min':'1', 'initialValue':'2', description: 'Number of components.'}}) components: number,
|
|
92
|
+
@grok.decorators.param({'type':'bool', 'options':{'caption':'Center', 'initialValue':'false', description: 'Indicating whether the variables should be shifted to be zero centered.'}}) center: boolean,
|
|
93
|
+
@grok.decorators.param({'type':'bool','options':{'caption':'Scale', 'initialValue':'false', description: 'Indicating whether the variables should be scaled to have unit variance.'}}) scale: boolean): Promise<void> {
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
const pcaTable = await computePCA(table, features, components, center, scale);
|
|
97
|
+
addPrefixToEachColumnName('PC', pcaTable.columns);
|
|
98
|
+
|
|
99
|
+
if (table.id === null) // table is loaded from a local file
|
|
100
|
+
grok.shell.addTableView(pcaTable);
|
|
101
|
+
else {
|
|
102
|
+
const cols = table.columns;
|
|
103
|
+
const pcaTableCols = pcaTable.columns.toList();
|
|
104
|
+
|
|
105
|
+
for (const col of pcaTableCols) {
|
|
106
|
+
pcaTable.columns.remove(col.name);
|
|
107
|
+
col.name = cols.getUnusedName(col.name);
|
|
108
|
+
cols.add(col);
|
|
109
|
+
}
|
|
93
110
|
}
|
|
111
|
+
} catch (error) {
|
|
112
|
+
grok.shell.warning(`Failed to compute PCA: ${error instanceof Error ? error.message : 'platform issue'}`);
|
|
94
113
|
}
|
|
95
|
-
} catch (error) {
|
|
96
|
-
grok.shell.warning(`Failed to compute PCA: ${error instanceof Error ? error.message : 'platform issue'}`);
|
|
97
114
|
}
|
|
98
|
-
}
|
|
99
115
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
for (const v of tv.viewers) {
|
|
119
|
-
if (v instanceof DG.ScatterPlotViewer && colNames.includes(v.props.xColumnName) && colNames.includes(v.props.yColumnName)) {
|
|
120
|
-
v.props.colorColumnName = resCol.name;
|
|
116
|
+
|
|
117
|
+
@grok.decorators.func({
|
|
118
|
+
'meta': {
|
|
119
|
+
'defaultPostProcessingFunction': 'true'
|
|
120
|
+
},
|
|
121
|
+
'tags': [
|
|
122
|
+
'dim-red-postprocessing-function'
|
|
123
|
+
],
|
|
124
|
+
'name': 'DBSCAN clustering'
|
|
125
|
+
})
|
|
126
|
+
static async dbscanPostProcessingFunction(
|
|
127
|
+
col1: DG.Column,
|
|
128
|
+
col2: DG.Column,
|
|
129
|
+
@grok.decorators.param({'options':{'initialValue':'0.01', description: 'Minimum distance between two points to be considered as in the same neighborhood.'}}) epsilon: number,
|
|
130
|
+
@grok.decorators.param({'type':'int','options':{'initialValue':'5', description: 'Minimum number of points to form a dense region.'}}) minimumPoints: number) {
|
|
131
|
+
|
|
132
|
+
const df = col1.dataFrame;
|
|
133
|
+
if (df === null)
|
|
121
134
|
return;
|
|
135
|
+
const resCol = await PackageFunctions.dbScan(df, col1, col2, epsilon, minimumPoints);
|
|
136
|
+
df.changeColumnType(resCol, 'string');
|
|
137
|
+
const colNames = [col1.name, col2.name];
|
|
138
|
+
const tv = grok.shell.tableView(df.name);
|
|
139
|
+
if (!tv)
|
|
140
|
+
return;
|
|
141
|
+
for (const v of tv.viewers) {
|
|
142
|
+
if (v instanceof DG.ScatterPlotViewer && colNames.includes(v.props.xColumnName) && colNames.includes(v.props.yColumnName)) {
|
|
143
|
+
v.props.colorColumnName = resCol.name;
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
122
146
|
}
|
|
123
147
|
}
|
|
124
|
-
}
|
|
125
148
|
|
|
126
|
-
//name: None (number)
|
|
127
|
-
//tags: dim-red-preprocessing-function
|
|
128
|
-
//meta.supportedTypes: int,float,double,qnum
|
|
129
|
-
//meta.supportedDistanceFunctions: Difference
|
|
130
|
-
//input: column col
|
|
131
|
-
//input: string _metric {optional: true}
|
|
132
|
-
//output: object result
|
|
133
|
-
export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
|
|
134
|
-
const range = col.stats.max - col.stats.min;
|
|
135
|
-
const entries = col.toList();
|
|
136
|
-
return {entries, options: {range}};
|
|
137
|
-
}
|
|
138
149
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
+
@grok.decorators.func({
|
|
151
|
+
'meta': {
|
|
152
|
+
'supportedTypes': 'int,float,double,qnum',
|
|
153
|
+
'supportedDistanceFunctions': 'Difference'
|
|
154
|
+
},
|
|
155
|
+
'tags': ['dim-red-preprocessing-function'],
|
|
156
|
+
'name': 'None (number)',
|
|
157
|
+
'outputs': [{name: 'result', type:'object'}]
|
|
158
|
+
})
|
|
159
|
+
static numberPreprocessingFunction(
|
|
160
|
+
col: DG.Column,
|
|
161
|
+
@grok.decorators.param({'options':{'optional':true}}) _metric: string) {
|
|
162
|
+
|
|
163
|
+
const range = col.stats.max - col.stats.min;
|
|
164
|
+
const entries = col.toList();
|
|
165
|
+
return {entries, options: {range}};
|
|
166
|
+
}
|
|
150
167
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
168
|
+
|
|
169
|
+
@grok.decorators.func({
|
|
170
|
+
'meta': {
|
|
171
|
+
'supportedTypes': 'string',
|
|
172
|
+
'supportedDistanceFunctions': 'One-Hot,Levenshtein,Hamming'
|
|
173
|
+
},
|
|
174
|
+
'tags': ['dim-red-preprocessing-function'],
|
|
175
|
+
'name': 'None (string)',
|
|
176
|
+
'outputs': [{name: 'result', type:'object'}]
|
|
177
|
+
})
|
|
178
|
+
static stringPreprocessingFunction(
|
|
179
|
+
col: DG.Column,
|
|
180
|
+
@grok.decorators.param({'options':{'optional':true}}) _metric: string) {
|
|
181
|
+
|
|
182
|
+
const entries = col.toList();
|
|
183
|
+
return {entries, options: {}};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@grok.decorators.func({
|
|
188
|
+
'top-menu': 'ML | Reduce Dimensionality...',
|
|
189
|
+
'name': 'Multi Column Dimensionality Reduction'
|
|
190
|
+
})
|
|
191
|
+
static async reduceDimensionality(): Promise<void> {
|
|
192
|
+
|
|
193
|
+
const editor = new MultiColumnDimReductionEditor();
|
|
194
|
+
const dialog = ui.dialog('Dimensionality reduction')
|
|
195
|
+
.add(editor.getEditor())
|
|
196
|
+
.onOK(async () => {
|
|
197
|
+
const params = editor.getParams();
|
|
198
|
+
if (params.columns.length === 0)
|
|
199
|
+
return;
|
|
200
|
+
await multiColReduceDimensionality(params.table, params.columns, params.methodName as DimReductionMethods,
|
|
201
|
+
params.distanceMetrics as KnownMetrics[],
|
|
202
|
+
params.weights, params.preprocessingFunctions, params.aggreaggregationMethod as DistanceAggregationMethods,
|
|
203
|
+
!!params.plotEmbeddings, !!params.clusterEmbeddings, params.options, {
|
|
204
|
+
fastRowCount: 10000,
|
|
205
|
+
}, params.postProcessingFunction, params.postProcessingFunctionArgs, params.vectorDistanceMetric);
|
|
206
|
+
}).show();
|
|
207
|
+
dialog.helpUrl = 'https://datagrok.ai/help/explore/dim-reduction.md';
|
|
208
|
+
const validate = () => {
|
|
209
|
+
const cols = editor.columnsInput.value;
|
|
210
|
+
const okButton = dialog.getButton('OK');
|
|
211
|
+
if (!okButton)
|
|
160
212
|
return;
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
}).
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
213
|
+
const isDisabled = !cols || cols.length === 0;
|
|
214
|
+
if (isDisabled)
|
|
215
|
+
okButton.classList.add('disabled');
|
|
216
|
+
else
|
|
217
|
+
okButton.classList.remove('disabled');
|
|
218
|
+
};
|
|
219
|
+
dialog.history(() => ({editorSettings: editor.getStringInput()}), (x: any) => editor.applyStringInput(x['editorSettings']));
|
|
220
|
+
editor.onColumnsChanged.subscribe(() => {
|
|
221
|
+
try {
|
|
222
|
+
validate();
|
|
223
|
+
} catch (e) {
|
|
224
|
+
console.error(e);
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
validate();
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@grok.decorators.editor()
|
|
232
|
+
static GetMCLEditor(
|
|
233
|
+
call: DG.FuncCall): void {
|
|
234
|
+
|
|
182
235
|
try {
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
236
|
+
const funcEditor = new MCLEditor();
|
|
237
|
+
const dialog = ui.dialog('Markov clustering')
|
|
238
|
+
.add(funcEditor.getEditor())
|
|
239
|
+
.onOK(async () => {
|
|
240
|
+
const params = funcEditor.params;
|
|
241
|
+
return call.func.prepare({
|
|
242
|
+
df: params.table, cols: params.columns, metrics: params.distanceMetrics,
|
|
243
|
+
weights: params.weights, aggregationMethod: params.aggreaggregationMethod, preprocessingFuncs: params.preprocessingFunctions,
|
|
244
|
+
preprocessingFuncArgs: params.preprocessingFuncArgs, threshold: params.threshold, maxIterations: params.maxIterations,
|
|
245
|
+
useWebGPU: params.useWebGPU, inflate: params.inflateFactor, minClusterSize: params.minClusterSize,
|
|
246
|
+
}).call(true);
|
|
247
|
+
}).show();
|
|
248
|
+
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
249
|
+
} catch (err: any) {
|
|
250
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
251
|
+
const errStack = err instanceof Error ? err.stack : undefined;
|
|
252
|
+
grok.shell.error(`Get region editor error: ${errMsg}`);
|
|
253
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
186
254
|
}
|
|
187
|
-
}
|
|
188
|
-
validate();
|
|
189
|
-
}
|
|
255
|
+
}
|
|
190
256
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
grok.
|
|
213
|
-
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@grok.decorators.func({
|
|
260
|
+
'top-menu': 'ML | Cluster | MCL...',
|
|
261
|
+
'name': 'MCLClustering',
|
|
262
|
+
'description': 'Markov clustering (MCL) is an unsupervised clustering algorithm for graphs based on simulation of stochastic flow.',
|
|
263
|
+
'editor': 'EDA:GetMCLEditor',
|
|
264
|
+
'outputs': [],
|
|
265
|
+
})
|
|
266
|
+
static async MCLClustering(
|
|
267
|
+
df: DG.DataFrame,
|
|
268
|
+
cols: DG.Column[],
|
|
269
|
+
@grok.decorators.param({'type':'list<string>'}) metrics: KnownMetrics[],
|
|
270
|
+
weights: number[],
|
|
271
|
+
@grok.decorators.param({'type':'string'}) aggregationMethod: DistanceAggregationMethod,
|
|
272
|
+
@grok.decorators.param({'type':'list<func>'}) preprocessingFuncs: any[],
|
|
273
|
+
@grok.decorators.param({'type':'object'}) preprocessingFuncArgs: any[],
|
|
274
|
+
@grok.decorators.param({'type':'int','options':{'initialValue':'80'}}) threshold: number = 80,
|
|
275
|
+
@grok.decorators.param({'type':'int','options':{'initialValue':'10'}}) maxIterations: number = 10,
|
|
276
|
+
@grok.decorators.param({'type':'bool','options':{'initialValue':'false'}}) useWebGPU: boolean = false,
|
|
277
|
+
@grok.decorators.param({'type':'double','options':{'initialValue':'2'}}) inflate: number = 0,
|
|
278
|
+
@grok.decorators.param({'type':'int','options':{'initialValue':'5'}}) minClusterSize: number = 5): Promise<MCLViewer> {
|
|
279
|
+
const tv = grok.shell.tableView(df.name) ?? grok.shell.addTableView(df);
|
|
280
|
+
const serializedOptions: string = JSON.stringify({
|
|
281
|
+
cols: cols.map((col) => col.name),
|
|
282
|
+
metrics: metrics,
|
|
283
|
+
weights: weights,
|
|
284
|
+
aggregationMethod: aggregationMethod,
|
|
285
|
+
preprocessingFuncs: preprocessingFuncs.map((func) => func?.name ?? null),
|
|
286
|
+
preprocessingFuncArgs: preprocessingFuncArgs,
|
|
287
|
+
threshold: threshold,
|
|
288
|
+
maxIterations: maxIterations,
|
|
289
|
+
useWebGPU: useWebGPU,
|
|
290
|
+
inflate: inflate,
|
|
291
|
+
minClusterSize: minClusterSize ?? 5,
|
|
292
|
+
} satisfies MCLSerializableOptions);
|
|
293
|
+
|
|
294
|
+
const viewer = tv.addViewer('MCL', {mclProps: serializedOptions}) as MCLViewer;
|
|
295
|
+
return viewer;
|
|
214
296
|
}
|
|
215
|
-
}
|
|
216
297
|
|
|
298
|
+
@grok.decorators.func({
|
|
299
|
+
'outputs': [{'name': 'result','type': 'viewer'}],
|
|
300
|
+
'tags': [
|
|
301
|
+
'viewer'
|
|
302
|
+
],
|
|
303
|
+
'name': 'MCL',
|
|
304
|
+
'description': 'Markov clustering viewer'
|
|
305
|
+
})
|
|
306
|
+
static markovClusteringViewer(): MCLViewer {
|
|
307
|
+
return new MCLViewer();
|
|
308
|
+
}
|
|
217
309
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
): Promise<MCLViewer> {
|
|
239
|
-
const tv = grok.shell.tableView(df.name) ?? grok.shell.addTableView(df);
|
|
240
|
-
const serializedOptions: string = JSON.stringify({
|
|
241
|
-
cols: cols.map((col) => col.name),
|
|
242
|
-
metrics: metrics,
|
|
243
|
-
weights: weights,
|
|
244
|
-
aggregationMethod: aggregationMethod,
|
|
245
|
-
preprocessingFuncs: preprocessingFuncs.map((func) => func?.name ?? null),
|
|
246
|
-
preprocessingFuncArgs: preprocessingFuncArgs,
|
|
247
|
-
threshold: threshold,
|
|
248
|
-
maxIterations: maxIterations,
|
|
249
|
-
useWebGPU: useWebGPU,
|
|
250
|
-
inflate: inflate,
|
|
251
|
-
minClusterSize: minClusterSize ?? 5,
|
|
252
|
-
} satisfies MCLSerializableOptions);
|
|
253
|
-
//df.setTag(MCL_OPTIONS_TAG, serializedOptions);
|
|
254
|
-
|
|
255
|
-
const viewer = tv.addViewer('MCL', {mclProps: serializedOptions}) as MCLViewer;
|
|
256
|
-
return viewer;
|
|
257
|
-
}
|
|
310
|
+
@grok.decorators.func({
|
|
311
|
+
'outputs': [{'name': 'plsResults', 'type': 'object'}],
|
|
312
|
+
'description': 'Compute partial least squares (PLS) regression analysis components: prediction, regression coefficients, T- & U-scores, X-loadings.'
|
|
313
|
+
})
|
|
314
|
+
static async PLS(
|
|
315
|
+
table: DG.DataFrame,
|
|
316
|
+
@grok.decorators.param({'type':'column_list','options':{'type':'numerical'}}) features: DG.ColumnList,
|
|
317
|
+
@grok.decorators.param({'type':'column','options':{'type':'numerical'}}) predict: DG.Column,
|
|
318
|
+
@grok.decorators.param({'type':'int','options':{'initialValue':'3'}}) components: number,
|
|
319
|
+
@grok.decorators.param({'type':'column','options':{'type':'string'}}) names: DG.Column): Promise<PlsOutput> {
|
|
320
|
+
|
|
321
|
+
return await getPlsAnalysis({
|
|
322
|
+
table: table,
|
|
323
|
+
features: features,
|
|
324
|
+
predict: predict,
|
|
325
|
+
components: components,
|
|
326
|
+
isQuadratic: false,
|
|
327
|
+
names: names,
|
|
328
|
+
});
|
|
329
|
+
}
|
|
258
330
|
|
|
259
|
-
//name: MCL
|
|
260
|
-
//description: Markov clustering viewer
|
|
261
|
-
//tags: viewer
|
|
262
|
-
//output: viewer result
|
|
263
|
-
export function markovClusteringViewer(): MCLViewer {
|
|
264
|
-
return new MCLViewer();
|
|
265
|
-
}
|
|
266
331
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
//input: int components = 3
|
|
273
|
-
//input: column names {type: string}
|
|
274
|
-
//output: object plsResults
|
|
275
|
-
export async function PLS(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number, names: DG.Column): Promise<PlsOutput> {
|
|
276
|
-
return await getPlsAnalysis({
|
|
277
|
-
table: table,
|
|
278
|
-
features: features,
|
|
279
|
-
predict: predict,
|
|
280
|
-
components: components,
|
|
281
|
-
isQuadratic: false,
|
|
282
|
-
names: names,
|
|
283
|
-
});
|
|
284
|
-
}
|
|
332
|
+
@grok.decorators.func({
|
|
333
|
+
'top-menu': 'ML | Analyze | PLS...',
|
|
334
|
+
'description': 'Compute partial least squares (PLS) regression components. They maximally summarize the variation of the predictors while maximizing correlation with the response variable.'
|
|
335
|
+
})
|
|
336
|
+
static async topMenuPLS(): Promise<void> {
|
|
285
337
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
//description: Compute partial least squares (PLS) regression components. They maximally summarize the variation of the predictors while maximizing correlation with the response variable.
|
|
289
|
-
export async function topMenuPLS(): Promise<void> {
|
|
290
|
-
await runMVA(PLS_ANALYSIS.COMPUTE_COMPONENTS);
|
|
291
|
-
}
|
|
338
|
+
await runMVA(PLS_ANALYSIS.COMPUTE_COMPONENTS);
|
|
339
|
+
}
|
|
292
340
|
|
|
293
|
-
//top-menu: ML | Analyze | Multivariate Analysis...
|
|
294
|
-
//name: multivariateAnalysis
|
|
295
|
-
//description: Multidimensional data analysis using partial least squares (PLS) regression.
|
|
296
|
-
export async function MVA(): Promise<void> {
|
|
297
|
-
await runMVA(PLS_ANALYSIS.PERFORM_MVA);
|
|
298
|
-
}
|
|
299
341
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
342
|
+
@grok.decorators.func({
|
|
343
|
+
'top-menu': 'ML | Analyze | Multivariate Analysis...',
|
|
344
|
+
'name': 'multivariateAnalysis',
|
|
345
|
+
'description': 'Multidimensional data analysis using partial least squares (PLS) regression.'
|
|
346
|
+
})
|
|
347
|
+
static async MVA(): Promise<void> {
|
|
306
348
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
//meta.mlrole: train
|
|
310
|
-
//input: dataframe df
|
|
311
|
-
//input: column predictColumn
|
|
312
|
-
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
313
|
-
//output: dynamic model
|
|
314
|
-
export async function trainLinearKernelSVM(df: DG.DataFrame, predictColumn: DG.Column,
|
|
315
|
-
gamma: number): Promise<any> {
|
|
316
|
-
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predictColumn);
|
|
317
|
-
return getPackedModel(trainedModel);
|
|
318
|
-
}
|
|
349
|
+
await runMVA(PLS_ANALYSIS.PERFORM_MVA);
|
|
350
|
+
}
|
|
319
351
|
|
|
320
|
-
//name: applyLinearKernelSVM
|
|
321
|
-
//meta.mlname: linear kernel LS-SVM
|
|
322
|
-
//meta.mlrole: apply
|
|
323
|
-
//input: dataframe df
|
|
324
|
-
//input: dynamic model
|
|
325
|
-
//output: dataframe table
|
|
326
|
-
export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
327
|
-
return await getPrediction(df, model);
|
|
328
|
-
}
|
|
329
352
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
353
|
+
@grok.decorators.func({
|
|
354
|
+
'meta': {
|
|
355
|
+
'demoPath': 'Compute | Multivariate Analysis'
|
|
356
|
+
},
|
|
357
|
+
'name': 'MVA demo',
|
|
358
|
+
'description': 'Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.'
|
|
359
|
+
})
|
|
360
|
+
static async demoMultivariateAnalysis(): Promise<void> {
|
|
361
|
+
await runDemoMVA();
|
|
362
|
+
}
|
|
339
363
|
|
|
340
|
-
//name: isInteractiveLinearKernelSVM
|
|
341
|
-
//meta.mlname: linear kernel LS-SVM
|
|
342
|
-
//meta.mlrole: isInteractive
|
|
343
|
-
//input: dataframe df
|
|
344
|
-
//input: column predictColumn
|
|
345
|
-
//output: bool result
|
|
346
|
-
export async function isInteractiveLinearKernelSVM(df: DG.DataFrame, predictColumn: DG.Column): Promise<boolean> {
|
|
347
|
-
return isInteractiveSVM(df, predictColumn);
|
|
348
|
-
}
|
|
349
364
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
365
|
+
@grok.decorators.func({
|
|
366
|
+
'meta': {
|
|
367
|
+
'mlname': 'linear kernel LS-SVM',
|
|
368
|
+
'mlrole': 'train'
|
|
369
|
+
}
|
|
370
|
+
})
|
|
371
|
+
static async trainLinearKernelSVM(
|
|
372
|
+
df: DG.DataFrame,
|
|
373
|
+
predictColumn: DG.Column,
|
|
374
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1.0'}}) gamma: number): Promise<any> {
|
|
375
|
+
|
|
376
|
+
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predictColumn);
|
|
377
|
+
return getPackedModel(trainedModel);
|
|
378
|
+
}
|
|
361
379
|
|
|
362
380
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
gamma: number, sigma: number): Promise<any> {
|
|
373
|
-
const trainedModel = await getTrainedModel(
|
|
374
|
-
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
375
|
-
df, predictColumn);
|
|
376
|
-
|
|
377
|
-
return getPackedModel(trainedModel);
|
|
378
|
-
}
|
|
381
|
+
@grok.decorators.func({
|
|
382
|
+
'meta': {
|
|
383
|
+
'mlname': 'linear kernel LS-SVM',
|
|
384
|
+
'mlrole': 'apply'
|
|
385
|
+
}
|
|
386
|
+
})
|
|
387
|
+
static async applyLinearKernelSVM(
|
|
388
|
+
df: DG.DataFrame,
|
|
389
|
+
model: any): Promise<DG.DataFrame> {
|
|
379
390
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
//meta.mlrole: apply
|
|
383
|
-
//input: dataframe df
|
|
384
|
-
//input: dynamic model
|
|
385
|
-
//output: dataframe table
|
|
386
|
-
export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
387
|
-
return await getPrediction(df, model);
|
|
388
|
-
}
|
|
391
|
+
return await getPrediction(df, model);
|
|
392
|
+
}
|
|
389
393
|
|
|
390
|
-
//name: isApplicableRBFkernelSVM
|
|
391
|
-
//meta.mlname: RBF-kernel LS-SVM
|
|
392
|
-
//meta.mlrole: isApplicable
|
|
393
|
-
//input: dataframe df
|
|
394
|
-
//input: column predictColumn
|
|
395
|
-
//output: bool result
|
|
396
|
-
export async function isApplicableRBFkernelSVM(df: DG.DataFrame, predictColumn: DG.Column): Promise<boolean> {
|
|
397
|
-
return isApplicableSVM(df, predictColumn);
|
|
398
|
-
}
|
|
399
394
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
395
|
+
@grok.decorators.func({
|
|
396
|
+
'meta': {
|
|
397
|
+
'mlname': 'linear kernel LS-SVM',
|
|
398
|
+
'mlrole': 'isApplicable'
|
|
399
|
+
}
|
|
400
|
+
})
|
|
401
|
+
static async isApplicableLinearKernelSVM(
|
|
402
|
+
df: DG.DataFrame,
|
|
403
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
409
404
|
|
|
405
|
+
return isApplicableSVM(df, predictColumn);
|
|
406
|
+
}
|
|
410
407
|
|
|
411
|
-
//name: visualizeRBFkernelSVM
|
|
412
|
-
//meta.mlname: RBF-kernel LS-SVM
|
|
413
|
-
//meta.mlrole: visualize
|
|
414
|
-
//input: dataframe df
|
|
415
|
-
//input: column targetColumn
|
|
416
|
-
//input: column predictColumn
|
|
417
|
-
//input: dynamic model
|
|
418
|
-
//output: dynamic widget
|
|
419
|
-
export async function visualizeRBFkernelSVM(df: DG.DataFrame, targetColumn: DG.Column, predictColumn: DG.Column, model: any): Promise<any> {
|
|
420
|
-
return showTrainReport(df, model);
|
|
421
|
-
}
|
|
422
408
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
export async function trainPolynomialKernelSVM(df: DG.DataFrame, predictColumn: DG.Column,
|
|
433
|
-
gamma: number, c: number, d: number): Promise<any> {
|
|
434
|
-
const trainedModel = await getTrainedModel(
|
|
435
|
-
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
436
|
-
df, predictColumn);
|
|
437
|
-
|
|
438
|
-
return getPackedModel(trainedModel);
|
|
439
|
-
} // trainPolynomialKernelSVM
|
|
440
|
-
|
|
441
|
-
//name: applyPolynomialKernelSVM
|
|
442
|
-
//meta.mlname: polynomial kernel LS-SVM
|
|
443
|
-
//meta.mlrole: apply
|
|
444
|
-
//input: dataframe df
|
|
445
|
-
//input: dynamic model
|
|
446
|
-
//output: dataframe table
|
|
447
|
-
export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
448
|
-
return await getPrediction(df, model);
|
|
449
|
-
}
|
|
409
|
+
@grok.decorators.func({
|
|
410
|
+
'meta': {
|
|
411
|
+
'mlname': 'linear kernel LS-SVM',
|
|
412
|
+
'mlrole': 'isInteractive'
|
|
413
|
+
}
|
|
414
|
+
})
|
|
415
|
+
static async isInteractiveLinearKernelSVM(
|
|
416
|
+
df: DG.DataFrame,
|
|
417
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
450
418
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
//meta.mlrole: isApplicable
|
|
454
|
-
//input: dataframe df
|
|
455
|
-
//input: column predictColumn
|
|
456
|
-
//output: bool result
|
|
457
|
-
export async function isApplicablePolynomialKernelSVM(df: DG.DataFrame, predictColumn: DG.Column): Promise<boolean> {
|
|
458
|
-
return isApplicableSVM(df, predictColumn);
|
|
459
|
-
}
|
|
419
|
+
return isInteractiveSVM(df, predictColumn);
|
|
420
|
+
}
|
|
460
421
|
|
|
461
|
-
//name: isInteractivePolynomialKernelSVM
|
|
462
|
-
//meta.mlname: polynomial kernel LS-SVM
|
|
463
|
-
//meta.mlrole: isInteractive
|
|
464
|
-
//input: dataframe df
|
|
465
|
-
//input: column predictColumn
|
|
466
|
-
//output: bool result
|
|
467
|
-
export async function isInteractivePolynomialKernelSVM(df: DG.DataFrame, predictColumn: DG.Column): Promise<boolean> {
|
|
468
|
-
return isInteractiveSVM(df, predictColumn);
|
|
469
|
-
}
|
|
470
422
|
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
423
|
+
@grok.decorators.func({
|
|
424
|
+
'meta': {
|
|
425
|
+
'mlname': 'linear kernel LS-SVM',
|
|
426
|
+
'mlrole': 'visualize'
|
|
427
|
+
}
|
|
428
|
+
})
|
|
429
|
+
static async visualizeLinearKernelSVM(
|
|
430
|
+
df: DG.DataFrame,
|
|
431
|
+
targetColumn: DG.Column,
|
|
432
|
+
predictColumn: DG.Column,
|
|
433
|
+
model: any): Promise<any> {
|
|
434
|
+
return showTrainReport(df, model);
|
|
435
|
+
}
|
|
482
436
|
|
|
483
|
-
//name: trainSigmoidKernelSVM
|
|
484
|
-
//meta.mlname: sigmoid kernel LS-SVM
|
|
485
|
-
//meta.mlrole: train
|
|
486
|
-
//input: dataframe df
|
|
487
|
-
//input: column predictColumn
|
|
488
|
-
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
489
|
-
//input: double kappa = 1 {category: Hyperparameters}
|
|
490
|
-
//input: double theta = 1 {category: Hyperparameters}
|
|
491
|
-
//output: dynamic model
|
|
492
|
-
export async function trainSigmoidKernelSVM(df: DG.DataFrame, predictColumn: DG.Column,
|
|
493
|
-
gamma: number, kappa: number, theta: number): Promise<any> {
|
|
494
|
-
const trainedModel = await getTrainedModel(
|
|
495
|
-
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
496
|
-
df, predictColumn);
|
|
497
|
-
|
|
498
|
-
return getPackedModel(trainedModel);
|
|
499
|
-
} // trainSigmoidKernelSVM
|
|
500
|
-
|
|
501
|
-
//name: applySigmoidKernelSVM
|
|
502
|
-
//meta.mlname: sigmoid kernel LS-SVM
|
|
503
|
-
//meta.mlrole: apply
|
|
504
|
-
//input: dataframe df
|
|
505
|
-
//input: dynamic model
|
|
506
|
-
//output: dataframe table
|
|
507
|
-
export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
508
|
-
return await getPrediction(df, model);
|
|
509
|
-
}
|
|
510
437
|
|
|
511
|
-
//name: isApplicableSigmoidKernelSVM
|
|
512
|
-
//meta.mlname: sigmoid kernel LS-SVM
|
|
513
|
-
//meta.mlrole: isApplicable
|
|
514
|
-
//input: dataframe df
|
|
515
|
-
//input: column predictColumn
|
|
516
|
-
//output: bool result
|
|
517
|
-
export async function isApplicableSigmoidKernelSVM(df: DG.DataFrame, predictColumn: DG.Column): Promise<boolean> {
|
|
518
|
-
return isApplicableSVM(df, predictColumn);
|
|
519
|
-
}
|
|
520
438
|
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
}
|
|
439
|
+
@grok.decorators.func({
|
|
440
|
+
'meta': {
|
|
441
|
+
'mlname': 'RBF-kernel LS-SVM',
|
|
442
|
+
'mlrole': 'train'
|
|
443
|
+
}
|
|
444
|
+
})
|
|
445
|
+
static async trainRBFkernelSVM(df: DG.DataFrame, predictColumn: DG.Column,
|
|
446
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1.0'}}) gamma: number,
|
|
447
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1.5'}}) sigma: number): Promise<any> {
|
|
530
448
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
//input: dataframe df
|
|
535
|
-
//input: column targetColumn
|
|
536
|
-
//input: column predictColumn
|
|
537
|
-
//input: dynamic model
|
|
538
|
-
//output: dynamic widget
|
|
539
|
-
export async function visualizeSigmoidKernelSVM(df: DG.DataFrame, targetColumn: DG.Column, predictColumn: DG.Column, model: any): Promise<any> {
|
|
540
|
-
return showTrainReport(df, model);
|
|
541
|
-
}
|
|
449
|
+
const trainedModel = await getTrainedModel(
|
|
450
|
+
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
451
|
+
df, predictColumn);
|
|
542
452
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
//description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the explored feature.
|
|
546
|
-
export function anova(): void {
|
|
547
|
-
runOneWayAnova();
|
|
548
|
-
}
|
|
453
|
+
return getPackedModel(trainedModel);
|
|
454
|
+
}
|
|
549
455
|
|
|
550
|
-
//top-menu: ML | Impute Missing Values...
|
|
551
|
-
//name: KNN impute
|
|
552
|
-
//description: Missing values imputation using the k-nearest neighbors method (KNN)
|
|
553
|
-
export function kNNImputation() {
|
|
554
|
-
runKNNImputer();
|
|
555
|
-
}
|
|
556
456
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
}
|
|
457
|
+
@grok.decorators.func({
|
|
458
|
+
'meta': {
|
|
459
|
+
'mlname': 'RBF-kernel LS-SVM',
|
|
460
|
+
'mlrole': 'apply'
|
|
461
|
+
}
|
|
462
|
+
})
|
|
463
|
+
static async applyRBFkernelSVM(
|
|
464
|
+
df: DG.DataFrame,
|
|
465
|
+
model: any): Promise<DG.DataFrame> {
|
|
563
466
|
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
//meta.mlrole: train
|
|
567
|
-
//input: dataframe df
|
|
568
|
-
//input: column predictColumn
|
|
569
|
-
//output: dynamic model
|
|
570
|
-
export async function trainLinearRegression(df: DG.DataFrame, predictColumn: DG.Column): Promise<Uint8Array> {
|
|
571
|
-
const features = df.columns;
|
|
572
|
-
const params = await getLinearRegressionParams(features, predictColumn);
|
|
573
|
-
|
|
574
|
-
return new Uint8Array(params.buffer);
|
|
575
|
-
}
|
|
467
|
+
return await getPrediction(df, model);
|
|
468
|
+
}
|
|
576
469
|
|
|
577
|
-
//name: applyLinearRegression
|
|
578
|
-
//meta.mlname: Linear Regression
|
|
579
|
-
//meta.mlrole: apply
|
|
580
|
-
//input: dataframe df
|
|
581
|
-
//input: dynamic model
|
|
582
|
-
//output: dataframe table
|
|
583
|
-
export function applyLinearRegression(df: DG.DataFrame, model: any): DG.DataFrame {
|
|
584
|
-
const features = df.columns;
|
|
585
|
-
const params = new Float32Array((model as Uint8Array).buffer);
|
|
586
|
-
return DG.DataFrame.fromColumns([getPredictionByLinearRegression(features, params)]);
|
|
587
|
-
}
|
|
588
470
|
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
471
|
+
@grok.decorators.func({
|
|
472
|
+
'meta': {
|
|
473
|
+
'mlname': 'RBF-kernel LS-SVM',
|
|
474
|
+
'mlrole': 'isApplicable'
|
|
475
|
+
}
|
|
476
|
+
})
|
|
477
|
+
static async isApplicableRBFkernelSVM(
|
|
478
|
+
df: DG.DataFrame,
|
|
479
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
480
|
+
|
|
481
|
+
return isApplicableSVM(df, predictColumn);
|
|
599
482
|
}
|
|
600
483
|
|
|
601
|
-
return predictColumn.matches('numerical');
|
|
602
|
-
}
|
|
603
484
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
485
|
+
@grok.decorators.func({
|
|
486
|
+
'meta': {
|
|
487
|
+
'mlname': 'RBF-kernel LS-SVM',
|
|
488
|
+
'mlrole': 'isInteractive'
|
|
489
|
+
}
|
|
490
|
+
})
|
|
491
|
+
static async isInteractiveRBFkernelSVM(
|
|
492
|
+
df: DG.DataFrame,
|
|
493
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
613
494
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
//meta.mlrole: train
|
|
617
|
-
//input: dataframe df
|
|
618
|
-
//input: column predictColumn
|
|
619
|
-
//input: double rate = 1.0 {category: Hyperparameters; min: 0.001; max: 20} [Learning rate]
|
|
620
|
-
//input: int iterations = 100 {category: Hyperparameters; min: 1; max: 10000; step: 10} [Fitting iterations count]
|
|
621
|
-
//input: double penalty = 0.1 {category: Hyperparameters; min: 0.0001; max: 1} [Regularization rate]
|
|
622
|
-
//input: double tolerance = 0.001 {category: Hyperparameters; min: 0.00001; max: 0.1} [Fitting tolerance]
|
|
623
|
-
//output: dynamic model
|
|
624
|
-
export async function trainSoftmax(df: DG.DataFrame, predictColumn: DG.Column, rate: number,
|
|
625
|
-
iterations: number, penalty: number, tolerance: number): Promise<Uint8Array> {
|
|
626
|
-
const features = df.columns;
|
|
627
|
-
|
|
628
|
-
const model = new SoftmaxClassifier({
|
|
629
|
-
classesCount: predictColumn.categories.length,
|
|
630
|
-
featuresCount: features.length,
|
|
631
|
-
});
|
|
632
|
-
|
|
633
|
-
await model.fit(features, predictColumn, rate, iterations, penalty, tolerance);
|
|
634
|
-
|
|
635
|
-
return model.toBytes();
|
|
636
|
-
}
|
|
495
|
+
return isInteractiveSVM(df, predictColumn);
|
|
496
|
+
}
|
|
637
497
|
|
|
638
|
-
//name: applySoftmax
|
|
639
|
-
//meta.mlname: Softmax
|
|
640
|
-
//meta.mlrole: apply
|
|
641
|
-
//input: dataframe df
|
|
642
|
-
//input: dynamic model
|
|
643
|
-
//output: dataframe table
|
|
644
|
-
export function applySoftmax(df: DG.DataFrame, model: any): DG.DataFrame {
|
|
645
|
-
const features = df.columns;
|
|
646
|
-
const unpackedModel = new SoftmaxClassifier(undefined, model);
|
|
647
|
-
|
|
648
|
-
return DG.DataFrame.fromColumns([unpackedModel.predict(features)]);
|
|
649
|
-
}
|
|
650
498
|
|
|
651
|
-
//name: isApplicableSoftmax
|
|
652
|
-
//meta.mlname: Softmax
|
|
653
|
-
//meta.mlrole: isApplicable
|
|
654
|
-
//input: dataframe df
|
|
655
|
-
//input: column predictColumn
|
|
656
|
-
//output: bool result
|
|
657
|
-
export function isApplicableSoftmax(df: DG.DataFrame, predictColumn: DG.Column): boolean {
|
|
658
|
-
return SoftmaxClassifier.isApplicable(df.columns, predictColumn);
|
|
659
|
-
}
|
|
660
499
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
500
|
+
@grok.decorators.func({
|
|
501
|
+
'meta': {
|
|
502
|
+
'mlname': 'RBF-kernel LS-SVM',
|
|
503
|
+
'mlrole': 'visualize'
|
|
504
|
+
}
|
|
505
|
+
})
|
|
506
|
+
static async visualizeRBFkernelSVM(
|
|
507
|
+
df: DG.DataFrame,
|
|
508
|
+
targetColumn: DG.Column,
|
|
509
|
+
predictColumn: DG.Column,
|
|
510
|
+
model: any): Promise<any> {
|
|
511
|
+
|
|
512
|
+
return showTrainReport(df, model);
|
|
513
|
+
}
|
|
670
514
|
|
|
671
|
-
//name: trainPLSRegression
|
|
672
|
-
//meta.mlname: PLS Regression
|
|
673
|
-
//meta.mlrole: train
|
|
674
|
-
//input: dataframe df
|
|
675
|
-
//input: column predictColumn
|
|
676
|
-
//input: int components = 3 {min: 1; max: 10} [Number of latent components]
|
|
677
|
-
//output: dynamic model
|
|
678
|
-
export async function trainPLSRegression(df: DG.DataFrame, predictColumn: DG.Column, components: number): Promise<Uint8Array> {
|
|
679
|
-
const features = df.columns;
|
|
680
|
-
|
|
681
|
-
const model = new PlsModel();
|
|
682
|
-
await model.fit(
|
|
683
|
-
features,
|
|
684
|
-
predictColumn,
|
|
685
|
-
Math.min(components, features.length),
|
|
686
|
-
);
|
|
687
|
-
|
|
688
|
-
return model.toBytes();
|
|
689
|
-
}
|
|
690
515
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
}
|
|
516
|
+
@grok.decorators.func({
|
|
517
|
+
'meta': {
|
|
518
|
+
'mlname': 'polynomial kernel LS-SVM',
|
|
519
|
+
'mlrole': 'train'
|
|
520
|
+
},
|
|
521
|
+
})
|
|
522
|
+
static async trainPolynomialKernelSVM(df: DG.DataFrame, predictColumn: DG.Column,
|
|
523
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1.0'}}) gamma: number,
|
|
524
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1'}}) c: number,
|
|
525
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '2'}}) d: number): Promise<any> {
|
|
526
|
+
|
|
527
|
+
const trainedModel = await getTrainedModel(
|
|
528
|
+
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
529
|
+
df, predictColumn);
|
|
530
|
+
|
|
531
|
+
return getPackedModel(trainedModel);
|
|
532
|
+
} // trainPolynomialKernelSVM
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
@grok.decorators.func({
|
|
536
|
+
'meta': {
|
|
537
|
+
'mlname': 'polynomial kernel LS-SVM',
|
|
538
|
+
'mlrole': 'apply'
|
|
539
|
+
},
|
|
540
|
+
})
|
|
541
|
+
static async applyPolynomialKernelSVM(
|
|
542
|
+
df: DG.DataFrame,
|
|
543
|
+
model: any): Promise<DG.DataFrame> {
|
|
544
|
+
|
|
545
|
+
return await getPrediction(df, model);
|
|
546
|
+
}
|
|
701
547
|
|
|
702
|
-
//name: isApplicablePLSRegression
|
|
703
|
-
//meta.mlname: PLS Regression
|
|
704
|
-
//meta.mlrole: isApplicable
|
|
705
|
-
//input: dataframe df
|
|
706
|
-
//input: column predictColumn
|
|
707
|
-
//output: bool result
|
|
708
|
-
export function isApplicablePLSRegression(df: DG.DataFrame, predictColumn: DG.Column): boolean {
|
|
709
|
-
return PlsModel.isApplicable(df.columns, predictColumn);
|
|
710
|
-
}
|
|
711
548
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
const unpackedModel = new PlsModel(model);
|
|
722
|
-
const viewers = unpackedModel.viewers();
|
|
723
|
-
|
|
724
|
-
return viewers.map((v) => v.root);
|
|
725
|
-
}
|
|
549
|
+
@grok.decorators.func({
|
|
550
|
+
'meta': {
|
|
551
|
+
'mlname': 'polynomial kernel LS-SVM',
|
|
552
|
+
'mlrole': 'isApplicable'
|
|
553
|
+
}
|
|
554
|
+
})
|
|
555
|
+
static async isApplicablePolynomialKernelSVM(
|
|
556
|
+
df: DG.DataFrame,
|
|
557
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
726
558
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
//meta.mlrole: isInteractive
|
|
730
|
-
//input: dataframe df
|
|
731
|
-
//input: column predictColumn
|
|
732
|
-
//output: bool result
|
|
733
|
-
export function isInteractivePLSRegression(df: DG.DataFrame, predictColumn: DG.Column): boolean {
|
|
734
|
-
return PlsModel.isInteractive(df.columns, predictColumn);
|
|
735
|
-
}
|
|
559
|
+
return isApplicableSVM(df, predictColumn);
|
|
560
|
+
}
|
|
736
561
|
|
|
737
|
-
//name: trainXGBooster
|
|
738
|
-
//meta.mlname: XGBoost
|
|
739
|
-
//meta.mlrole: train
|
|
740
|
-
//input: dataframe df
|
|
741
|
-
//input: column predictColumn
|
|
742
|
-
//input: int iterations = 20 {min: 1; max: 100} [Number of training iterations]
|
|
743
|
-
//input: double eta = 0.3 {caption: Rate; min: 0; max: 1} [Learning rate]
|
|
744
|
-
//input: int maxDepth = 6 {min: 0; max: 20} [Maximum depth of a tree]
|
|
745
|
-
//input: double lambda = 1 {min: 0; max: 100} [L2 regularization term]
|
|
746
|
-
//input: double alpha = 0 {min: 0; max: 100} [L1 regularization term]
|
|
747
|
-
//output: dynamic model
|
|
748
|
-
export async function trainXGBooster(df: DG.DataFrame, predictColumn: DG.Column,
|
|
749
|
-
iterations: number, eta: number, maxDepth: number, lambda: number, alpha: number): Promise<Uint8Array> {
|
|
750
|
-
const features = df.columns;
|
|
751
|
-
|
|
752
|
-
const booster = new XGBooster();
|
|
753
|
-
await booster.fit(features, predictColumn, iterations, eta, maxDepth, lambda, alpha);
|
|
754
|
-
|
|
755
|
-
return booster.toBytes();
|
|
756
|
-
}
|
|
757
562
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
}
|
|
563
|
+
@grok.decorators.func({
|
|
564
|
+
'meta': {
|
|
565
|
+
'mlname': 'polynomial kernel LS-SVM',
|
|
566
|
+
'mlrole': 'isInteractive'
|
|
567
|
+
}
|
|
568
|
+
})
|
|
569
|
+
static async isInteractivePolynomialKernelSVM(
|
|
570
|
+
df: DG.DataFrame,
|
|
571
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
768
572
|
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
}
|
|
573
|
+
return isInteractiveSVM(df, predictColumn);
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
@grok.decorators.func({
|
|
578
|
+
'meta': {
|
|
579
|
+
'mlname': 'polynomial kernel LS-SVM',
|
|
580
|
+
'mlrole': 'visualize'
|
|
581
|
+
},
|
|
582
|
+
'outputs': [{'name': 'widget','type': 'dynamic'}],
|
|
583
|
+
'name': 'visualizePolynomialKernelSVM'
|
|
584
|
+
})
|
|
585
|
+
static async visualizePolynomialKernelSVM(
|
|
586
|
+
df: DG.DataFrame,
|
|
587
|
+
targetColumn: DG.Column,
|
|
588
|
+
predictColumn: DG.Column,
|
|
589
|
+
model: any): Promise<any> {
|
|
590
|
+
|
|
591
|
+
return showTrainReport(df, model);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
@grok.decorators.func({
|
|
596
|
+
'meta': {
|
|
597
|
+
'mlname': 'sigmoid kernel LS-SVM',
|
|
598
|
+
'mlrole': 'train'
|
|
599
|
+
},
|
|
600
|
+
'name': 'trainSigmoidKernelSVM'
|
|
601
|
+
})
|
|
602
|
+
static async trainSigmoidKernelSVM(df: DG.DataFrame, predictColumn: DG.Column,
|
|
603
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1.0'}}) gamma: number,
|
|
604
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1'}}) kappa: number,
|
|
605
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1'}}) theta: number): Promise<any> {
|
|
606
|
+
|
|
607
|
+
const trainedModel = await getTrainedModel(
|
|
608
|
+
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
609
|
+
df, predictColumn);
|
|
610
|
+
|
|
611
|
+
return getPackedModel(trainedModel);
|
|
612
|
+
} // trainSigmoidKernelSVM
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
@grok.decorators.func({
|
|
616
|
+
'meta': {
|
|
617
|
+
'mlname': 'sigmoid kernel LS-SVM',
|
|
618
|
+
'mlrole': 'apply'
|
|
619
|
+
},
|
|
620
|
+
'name': 'applySigmoidKernelSVM'
|
|
621
|
+
})
|
|
622
|
+
static async applySigmoidKernelSVM(
|
|
623
|
+
df: DG.DataFrame,
|
|
624
|
+
model: any): Promise<DG.DataFrame> {
|
|
625
|
+
|
|
626
|
+
return await getPrediction(df, model);
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
@grok.decorators.func({
|
|
631
|
+
'meta': {
|
|
632
|
+
'mlname': 'sigmoid kernel LS-SVM',
|
|
633
|
+
'mlrole': 'isApplicable'
|
|
634
|
+
},
|
|
635
|
+
'name': 'isApplicableSigmoidKernelSVM'
|
|
636
|
+
})
|
|
637
|
+
static async isApplicableSigmoidKernelSVM(
|
|
638
|
+
df: DG.DataFrame,
|
|
639
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
640
|
+
|
|
641
|
+
return isApplicableSVM(df, predictColumn);
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
@grok.decorators.func({
|
|
646
|
+
'meta': {
|
|
647
|
+
'mlname': 'sigmoid kernel LS-SVM',
|
|
648
|
+
'mlrole': 'isInteractive'
|
|
649
|
+
},
|
|
650
|
+
'name': 'isInteractiveSigmoidKernelSVM'
|
|
651
|
+
})
|
|
652
|
+
static async isInteractiveSigmoidKernelSVM(
|
|
653
|
+
df: DG.DataFrame,
|
|
654
|
+
predictColumn: DG.Column): Promise<boolean> {
|
|
655
|
+
|
|
656
|
+
return isInteractiveSVM(df, predictColumn);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
@grok.decorators.func({
|
|
661
|
+
'meta': {
|
|
662
|
+
'mlname': 'sigmoid kernel LS-SVM',
|
|
663
|
+
'mlrole': 'visualize'
|
|
664
|
+
},
|
|
665
|
+
'name': 'visualizeSigmoidKernelSVM'
|
|
666
|
+
})
|
|
667
|
+
static async visualizeSigmoidKernelSVM(
|
|
668
|
+
df: DG.DataFrame,
|
|
669
|
+
targetColumn: DG.Column,
|
|
670
|
+
predictColumn: DG.Column,
|
|
671
|
+
model: any): Promise<any> {
|
|
672
|
+
|
|
673
|
+
return showTrainReport(df, model);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
@grok.decorators.func({
|
|
678
|
+
'top-menu': 'ML | Analyze | ANOVA...',
|
|
679
|
+
'name': 'ANOVA',
|
|
680
|
+
'description': 'One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the explored feature.'
|
|
681
|
+
})
|
|
682
|
+
static anova(): void {
|
|
683
|
+
|
|
684
|
+
runOneWayAnova();
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
@grok.decorators.func({
|
|
689
|
+
'top-menu': 'ML | Impute Missing Values...',
|
|
690
|
+
'name': 'KNN impute',
|
|
691
|
+
'description': 'Missing values imputation using the k-nearest neighbors method (KNN)'
|
|
692
|
+
})
|
|
693
|
+
static kNNImputation() {
|
|
694
|
+
|
|
695
|
+
runKNNImputer();
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
@grok.decorators.func({
|
|
700
|
+
'name': 'KNN imputation for a table',
|
|
701
|
+
'description': 'Missing values imputation using the k-nearest neighbors method'
|
|
702
|
+
})
|
|
703
|
+
static async kNNImputationForTable(
|
|
704
|
+
table: DG.DataFrame) {
|
|
705
|
+
|
|
706
|
+
await runKNNImputer(table);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
@grok.decorators.func({
|
|
711
|
+
'meta': {
|
|
712
|
+
'mlname': 'Linear Regression',
|
|
713
|
+
'mlrole': 'train'
|
|
714
|
+
},
|
|
715
|
+
'name': 'trainLinearRegression',
|
|
716
|
+
'outputs': [{'type': 'dynamic', 'name': 'model'}]
|
|
717
|
+
})
|
|
718
|
+
static async trainLinearRegression(
|
|
719
|
+
df: DG.DataFrame,
|
|
720
|
+
predictColumn: DG.Column): Promise<Uint8Array> {
|
|
721
|
+
|
|
722
|
+
const features = df.columns;
|
|
723
|
+
const params = await getLinearRegressionParams(features, predictColumn);
|
|
724
|
+
|
|
725
|
+
return new Uint8Array(params.buffer);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
@grok.decorators.func({
|
|
730
|
+
'meta': {
|
|
731
|
+
'mlname': 'Linear Regression',
|
|
732
|
+
'mlrole': 'apply'
|
|
733
|
+
},
|
|
734
|
+
'name': 'applyLinearRegression'
|
|
735
|
+
})
|
|
736
|
+
static applyLinearRegression(
|
|
737
|
+
df: DG.DataFrame,
|
|
738
|
+
model: any): DG.DataFrame {
|
|
739
|
+
|
|
740
|
+
const features = df.columns;
|
|
741
|
+
const params = new Float32Array((model as Uint8Array).buffer);
|
|
742
|
+
return DG.DataFrame.fromColumns([getPredictionByLinearRegression(features, params)]);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
@grok.decorators.func({
|
|
747
|
+
'meta': {
|
|
748
|
+
'mlname': 'Linear Regression',
|
|
749
|
+
'mlrole': 'isApplicable'
|
|
750
|
+
},
|
|
751
|
+
'name': 'isApplicableLinearRegression'
|
|
752
|
+
})
|
|
753
|
+
static isApplicableLinearRegression(
|
|
754
|
+
df: DG.DataFrame,
|
|
755
|
+
predictColumn: DG.Column): boolean {
|
|
756
|
+
|
|
757
|
+
for (const col of df.columns) {
|
|
758
|
+
if (!col.matches('numerical'))
|
|
759
|
+
return false;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
return predictColumn.matches('numerical');
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
@grok.decorators.func({
|
|
767
|
+
'meta': {
|
|
768
|
+
'mlname': 'Linear Regression',
|
|
769
|
+
'mlrole': 'isInteractive'
|
|
770
|
+
},
|
|
771
|
+
'name': 'isInteractiveLinearRegression'
|
|
772
|
+
})
|
|
773
|
+
static isInteractiveLinearRegression(
|
|
774
|
+
df: DG.DataFrame,
|
|
775
|
+
predictColumn: DG.Column): boolean {
|
|
776
|
+
|
|
777
|
+
return df.rowCount <= 100000;
|
|
778
|
+
}
|
|
778
779
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
780
|
+
|
|
781
|
+
@grok.decorators.func({
|
|
782
|
+
'meta': {
|
|
783
|
+
'mlname': 'Softmax',
|
|
784
|
+
'mlrole': 'train'
|
|
785
|
+
},
|
|
786
|
+
'name': 'trainSoftmax',
|
|
787
|
+
'outputs': [{'type': 'dynamic', 'name': 'model'}]
|
|
788
|
+
})
|
|
789
|
+
static async trainSoftmax(df: DG.DataFrame, predictColumn: DG.Column,
|
|
790
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '1.0', 'min': '0.001', 'max': '20', description: 'Learning rate.'}}) rate: number,
|
|
791
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '100', 'min': '1', 'max': '10000', 'step': '10', description: 'Fitting iterations count'}}) iterations: number,
|
|
792
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '0.1', 'min': '0.0001', 'max': '1', description: 'Regularization rate.'}}) penalty: number,
|
|
793
|
+
@grok.decorators.param({'options':{'category':'Hyperparameters', 'initialValue': '0.001', 'min': '0.00001', 'max': '0.1', description: 'Fitting tolerance.'}}) tolerance: number): Promise<Uint8Array> {
|
|
794
|
+
|
|
795
|
+
const features = df.columns;
|
|
796
|
+
|
|
797
|
+
const model = new SoftmaxClassifier({
|
|
798
|
+
classesCount: predictColumn.categories.length,
|
|
799
|
+
featuresCount: features.length,
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
await model.fit(features, predictColumn, rate, iterations, penalty, tolerance);
|
|
803
|
+
|
|
804
|
+
return model.toBytes();
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
@grok.decorators.func({
|
|
809
|
+
'meta': {
|
|
810
|
+
'mlname': 'Softmax',
|
|
811
|
+
'mlrole': 'apply'
|
|
812
|
+
},
|
|
813
|
+
'name': 'applySoftmax'
|
|
814
|
+
})
|
|
815
|
+
static applySoftmax(
|
|
816
|
+
df: DG.DataFrame,
|
|
817
|
+
model: any): DG.DataFrame {
|
|
818
|
+
|
|
819
|
+
const features = df.columns;
|
|
820
|
+
const unpackedModel = new SoftmaxClassifier(undefined, model);
|
|
821
|
+
|
|
822
|
+
return DG.DataFrame.fromColumns([unpackedModel.predict(features)]);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
@grok.decorators.func({
|
|
827
|
+
'meta': {
|
|
828
|
+
'mlname': 'Softmax',
|
|
829
|
+
'mlrole': 'isApplicable'
|
|
830
|
+
},
|
|
831
|
+
'name': 'isApplicableSoftmax'
|
|
832
|
+
})
|
|
833
|
+
static isApplicableSoftmax(
|
|
834
|
+
df: DG.DataFrame,
|
|
835
|
+
predictColumn: DG.Column): boolean {
|
|
836
|
+
|
|
837
|
+
return SoftmaxClassifier.isApplicable(df.columns, predictColumn);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
@grok.decorators.func({
|
|
842
|
+
'meta': {
|
|
843
|
+
'mlname': 'Softmax',
|
|
844
|
+
'mlrole': 'isInteractive'
|
|
845
|
+
},
|
|
846
|
+
'name': 'isInteractiveSoftmax'
|
|
847
|
+
})
|
|
848
|
+
static isInteractiveSoftmax(
|
|
849
|
+
df: DG.DataFrame,
|
|
850
|
+
predictColumn: DG.Column): boolean {
|
|
851
|
+
|
|
852
|
+
return SoftmaxClassifier.isInteractive(df.columns, predictColumn);
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
@grok.decorators.func({
|
|
857
|
+
'meta': {
|
|
858
|
+
'mlname': 'PLS Regression',
|
|
859
|
+
'mlrole': 'train'
|
|
860
|
+
},
|
|
861
|
+
'name': 'trainPLSRegression',
|
|
862
|
+
'outputs': [{'name': 'model', 'type': 'dynamic'}],
|
|
863
|
+
})
|
|
864
|
+
static async trainPLSRegression(
|
|
865
|
+
df: DG.DataFrame,
|
|
866
|
+
predictColumn: DG.Column,
|
|
867
|
+
@grok.decorators.param({'type':'int','options':{'min':'1','max':'10','initialValue':'3', description: 'Number of latent components.'}}) components: number): Promise<Uint8Array> {
|
|
868
|
+
|
|
869
|
+
const features = df.columns;
|
|
870
|
+
|
|
871
|
+
const model = new PlsModel();
|
|
872
|
+
await model.fit(
|
|
873
|
+
features,
|
|
874
|
+
predictColumn,
|
|
875
|
+
Math.min(components, features.length),
|
|
876
|
+
);
|
|
877
|
+
|
|
878
|
+
return model.toBytes();
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
@grok.decorators.func({
|
|
883
|
+
'meta': {
|
|
884
|
+
'mlname': 'PLS Regression',
|
|
885
|
+
'mlrole': 'apply'
|
|
886
|
+
},
|
|
887
|
+
'name': 'applyPLSRegression'
|
|
888
|
+
})
|
|
889
|
+
static applyPLSRegression(
|
|
890
|
+
df: DG.DataFrame,
|
|
891
|
+
model: any): DG.DataFrame {
|
|
892
|
+
|
|
893
|
+
const unpackedModel = new PlsModel(model);
|
|
894
|
+
return DG.DataFrame.fromColumns([unpackedModel.predict(df.columns)]);
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
@grok.decorators.func({
|
|
899
|
+
'meta': {
|
|
900
|
+
'mlname': 'PLS Regression',
|
|
901
|
+
'mlrole': 'isApplicable'
|
|
902
|
+
},
|
|
903
|
+
'name': 'isApplicablePLSRegression'
|
|
904
|
+
})
|
|
905
|
+
static isApplicablePLSRegression(
|
|
906
|
+
df: DG.DataFrame,
|
|
907
|
+
predictColumn: DG.Column): boolean {
|
|
908
|
+
|
|
909
|
+
return PlsModel.isApplicable(df.columns, predictColumn);
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
@grok.decorators.func({
|
|
914
|
+
'meta': {
|
|
915
|
+
'mlname': 'PLS Regression',
|
|
916
|
+
'mlrole': 'visualize'
|
|
917
|
+
},
|
|
918
|
+
'name': 'visualizePLSRegression'
|
|
919
|
+
})
|
|
920
|
+
static async visualizePLSRegression(
|
|
921
|
+
df: DG.DataFrame,
|
|
922
|
+
targetColumn: DG.Column,
|
|
923
|
+
predictColumn: DG.Column,
|
|
924
|
+
model: any): Promise<any> {
|
|
925
|
+
|
|
926
|
+
const unpackedModel = new PlsModel(model);
|
|
927
|
+
const viewers = unpackedModel.viewers();
|
|
928
|
+
|
|
929
|
+
return viewers.map((v) => v.root);
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
@grok.decorators.func({
|
|
934
|
+
'meta': {
|
|
935
|
+
'mlname': 'PLS Regression',
|
|
936
|
+
'mlrole': 'isInteractive'
|
|
937
|
+
},
|
|
938
|
+
'name': 'isInteractivePLSRegression'
|
|
939
|
+
})
|
|
940
|
+
static isInteractivePLSRegression(
|
|
941
|
+
df: DG.DataFrame,
|
|
942
|
+
predictColumn: DG.Column): boolean {
|
|
943
|
+
|
|
944
|
+
return PlsModel.isInteractive(df.columns, predictColumn);
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
@grok.decorators.func({
|
|
948
|
+
'meta': {
|
|
949
|
+
'mlname': 'XGBoost',
|
|
950
|
+
'mlrole': 'train'
|
|
951
|
+
},
|
|
952
|
+
'name': 'trainXGBooster',
|
|
953
|
+
'outputs': [{'name': 'model', 'type': 'dynamic'}],
|
|
954
|
+
})
|
|
955
|
+
static async trainXGBooster(
|
|
956
|
+
df: DG.DataFrame,
|
|
957
|
+
predictColumn: DG.Column,
|
|
958
|
+
@grok.decorators.param({'type':'int','options':{'min':'1','max':'100','initialValue':'20', description: 'Number of training iterations.'}}) iterations: number,
|
|
959
|
+
@grok.decorators.param({'type':'double','options':{'caption':'Rate','min':'0','max':'1','initialValue':'0.3', description: 'Learning rate.'}}) eta: number,
|
|
960
|
+
@grok.decorators.param({'type':'int','options':{'min':'0','max':'20','initialValue':'6', description: 'Maximum depth of a tree.'}}) maxDepth: number,
|
|
961
|
+
@grok.decorators.param({'type':'double','options':{'min':'0','max':'100','initialValue':'1', description: 'L2 regularization term.'}}) lambda: number,
|
|
962
|
+
@grok.decorators.param({'type':'double','options':{'min':'0','max':'100','initialValue':'0', description: 'L1 regularization term.'}}) alpha: number): Promise<Uint8Array> {
|
|
963
|
+
const features = df.columns;
|
|
964
|
+
|
|
965
|
+
const booster = new XGBooster();
|
|
966
|
+
await booster.fit(features, predictColumn, iterations, eta, maxDepth, lambda, alpha);
|
|
967
|
+
|
|
968
|
+
return booster.toBytes();
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
@grok.decorators.func({
|
|
973
|
+
'meta': {
|
|
974
|
+
'mlname': 'XGBoost',
|
|
975
|
+
'mlrole': 'apply'
|
|
976
|
+
},
|
|
977
|
+
'name': 'applyXGBooster'
|
|
978
|
+
})
|
|
979
|
+
static applyXGBooster(
|
|
980
|
+
df: DG.DataFrame,
|
|
981
|
+
model: any): DG.DataFrame {
|
|
982
|
+
|
|
983
|
+
const unpackedModel = new XGBooster(model);
|
|
984
|
+
return DG.DataFrame.fromColumns([unpackedModel.predict(df.columns)]);
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
@grok.decorators.func({
|
|
989
|
+
'meta': {
|
|
990
|
+
'mlname': 'XGBoost',
|
|
991
|
+
'mlrole': 'isInteractive'
|
|
992
|
+
},
|
|
993
|
+
'name': 'isInteractiveXGBooster'
|
|
994
|
+
})
|
|
995
|
+
static isInteractiveXGBooster(
|
|
996
|
+
df: DG.DataFrame,
|
|
997
|
+
predictColumn: DG.Column): boolean {
|
|
998
|
+
|
|
999
|
+
return XGBooster.isInteractive(df.columns, predictColumn);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
|
|
1003
|
+
@grok.decorators.func({
|
|
1004
|
+
'meta': {
|
|
1005
|
+
'mlname': 'XGBoost',
|
|
1006
|
+
'mlrole': 'isApplicable'
|
|
1007
|
+
},
|
|
1008
|
+
'name': 'isApplicableXGBooster'
|
|
1009
|
+
})
|
|
1010
|
+
static isApplicableXGBooster(
|
|
1011
|
+
df: DG.DataFrame,
|
|
1012
|
+
predictColumn: DG.Column): boolean {
|
|
1013
|
+
|
|
1014
|
+
return XGBooster.isApplicable(df.columns, predictColumn);
|
|
1015
|
+
}
|
|
787
1016
|
}
|