@datagrok/eda 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/detectors.js +9 -0
- package/dist/111.js +2 -0
- package/dist/146.js +2 -0
- package/dist/155.js +2 -0
- package/dist/355.js +2 -0
- package/dist/584.js +2 -0
- package/dist/604.js +2 -0
- package/dist/632.js +2 -0
- package/dist/645.js +2 -0
- package/dist/93.js +2 -0
- package/dist/d711f70338306e5bddc4.wasm +0 -0
- package/dist/package-test.js +2 -0
- package/dist/package.js +2 -0
- package/package.json +49 -0
- package/package.png +0 -0
- package/scripts/command.txt +1 -0
- package/scripts/exportForTS.py +862 -0
- package/scripts/exportForTSConstants.py +93 -0
- package/scripts/func.json +1 -0
- package/scripts/module.json +11 -0
- package/src/EDAtools.ts +46 -0
- package/src/EDAui.ts +118 -0
- package/src/dataGenerators.ts +74 -0
- package/src/demos.ts +38 -0
- package/src/package-test.ts +12 -0
- package/src/package.ts +248 -0
- package/src/svm.ts +485 -0
- package/src/utils.ts +51 -0
- package/tsconfig.json +71 -0
- package/wasm/EDA.js +443 -0
- package/wasm/EDA.wasm +0 -0
- package/wasm/EDAAPI.js +131 -0
- package/wasm/EDAForWebWorker.js +21 -0
- package/wasm/PCA/PCA.cpp +151 -0
- package/wasm/PCA/PCA.h +48 -0
- package/wasm/PLS/PLS.h +64 -0
- package/wasm/PLS/pls.cpp +393 -0
- package/wasm/callWasm.js +475 -0
- package/wasm/callWasmForWebWorker.js +706 -0
- package/wasm/dataGenerators.h +169 -0
- package/wasm/dataMining.h +116 -0
- package/wasm/pcaExport.cpp +64 -0
- package/wasm/plsExport.cpp +75 -0
- package/wasm/svm.h +608 -0
- package/wasm/svmApi.cpp +323 -0
- package/wasm/workers/errorWorker.js +13 -0
- package/wasm/workers/generateDatasetWorker.js +13 -0
- package/wasm/workers/normalizeDatasetWorker.js +13 -0
- package/wasm/workers/partialLeastSquareRegressionWorker.js +13 -0
- package/wasm/workers/predictByLSSVMWorker.js +13 -0
- package/wasm/workers/principalComponentAnalysisWorker.js +13 -0
- package/wasm/workers/trainAndAnalyzeLSSVMWorker.js +13 -0
- package/wasm/workers/trainLSSVMWorker.js +13 -0
- package/webpack.config.js +37 -0
package/src/svm.ts
ADDED
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
/* Support vector machine (SVM) tools.
|
|
2
|
+
It's developed for applying in combination with DATAGROK predictive tools.
|
|
3
|
+
|
|
4
|
+
Training & predicting are provided by wasm-computations.
|
|
5
|
+
|
|
6
|
+
Least square support vector machine (LS-SVM) is implemented:
|
|
7
|
+
[1] Suykens, J., Vandewalle, J. "Least Squares Support Vector Machine Classifiers",
|
|
8
|
+
Neural Processing Letters 9, 293-300 (1999). https://doi.org/10.1023/A:1018628609742
|
|
9
|
+
*/
|
|
10
|
+
import * as grok from 'datagrok-api/grok';
|
|
11
|
+
import * as ui from 'datagrok-api/ui';
|
|
12
|
+
import * as DG from 'datagrok-api/dg';
|
|
13
|
+
|
|
14
|
+
import {_trainAndAnalyzeLSSVMInWebWorker, _predictByLSSVMInWebWorker} from '../wasm/EDAAPI';
|
|
15
|
+
|
|
16
|
+
// 1. CONSTANTS
|
|
17
|
+
|
|
18
|
+
// kernel types
|
|
19
|
+
export const LINEAR = 0;
|
|
20
|
+
export const POLYNOMIAL = 1;
|
|
21
|
+
export const RBF = 2;
|
|
22
|
+
export const SIGMOID = 3;
|
|
23
|
+
|
|
24
|
+
// output-related
|
|
25
|
+
const CONFUSION_MATR_SIZE = 4;
|
|
26
|
+
const NORMALIZED_DATA_INDEX = 0;
|
|
27
|
+
const MEANS_INDEX = 1;
|
|
28
|
+
const STD_DEVS_INDEX = 2;
|
|
29
|
+
const MODEL_PARAMS_INDEX = 3;
|
|
30
|
+
const MODEL_WEIGHTS_INDEX = 4;
|
|
31
|
+
const PREDICTED_LABELS_INDEX = 5;
|
|
32
|
+
const CORRECTNESS_INDEX = 6;
|
|
33
|
+
const CONFUSION_MATRIX_INDEX = 7;
|
|
34
|
+
const TRUE_POSITIVE_INDEX = 0;
|
|
35
|
+
const FALSE_NEGATIVE_INDEX = 1;
|
|
36
|
+
const FALSE_POSITIVE_INDEX = 2;
|
|
37
|
+
const TRUE_NEGATIVE_INDEX = 3;
|
|
38
|
+
|
|
39
|
+
// kernel parameters indeces
|
|
40
|
+
const RBF_SIGMA_INDEX = 0;
|
|
41
|
+
const POLYNOMIAL_C_INDEX = 0;
|
|
42
|
+
const POLYNOMIAL_D_INDEX = 1;
|
|
43
|
+
const SIGMOID_KAPPA_INDEX = 0;
|
|
44
|
+
const SIGMOID_THETA_INDEX = 1;
|
|
45
|
+
|
|
46
|
+
// hyperparameters limits
|
|
47
|
+
const GAMMA_INFIMUM_LIMIT = 0;
|
|
48
|
+
const RBF_SIGMA_INFIMUM_LIMIT = 0;
|
|
49
|
+
const POLYNOMIAL_C_INFIMUM_LIMIT = 0;
|
|
50
|
+
const POLYNOMIAL_D_INFIMUM_LIMIT = 0;
|
|
51
|
+
|
|
52
|
+
// error messages
|
|
53
|
+
const WRONG_GAMMA_MESSAGE = 'gamma must be strictly positive.';
|
|
54
|
+
const WRONG_RBF_SIGMA_MESSAGE = 'sigma must be strictly positive.';
|
|
55
|
+
const WRONG_POLYNOMIAL_C_MESSAGE = 'c must be strictly positive.';
|
|
56
|
+
const WRONG_POLYNOMIAL_D_MESSAGE = 'd must be strictly positive.';
|
|
57
|
+
const WRONG_KERNEL_MESSAGE = 'incorrect kernel.';
|
|
58
|
+
|
|
59
|
+
// names
|
|
60
|
+
const LABELS = 'Labels';
|
|
61
|
+
const PREDICTED = 'predicted';
|
|
62
|
+
const CORRECTNESS = 'correctness';
|
|
63
|
+
const CONFUSION_MATRIX_NAME = 'Confusion matrix';
|
|
64
|
+
const MEAN = 'mean';
|
|
65
|
+
const STD_DEV = 'std dev';
|
|
66
|
+
const MODEL_PARAMS_NAME = 'alpha';
|
|
67
|
+
const MODEL_WEIGHTS_NAME = 'weight';
|
|
68
|
+
const GAMMA = 'gamma';
|
|
69
|
+
const KERNEL = 'kernel';
|
|
70
|
+
const KERNEL_PARAMS = 'kernel params';
|
|
71
|
+
const KERNEL_PARAM_1 = 'kernel param 1';
|
|
72
|
+
const KERNEL_PARAM_2 = 'kernel param 2';
|
|
73
|
+
const FEATURES_COUNT_NAME = 'features count';
|
|
74
|
+
const TRAIN_SAMPLES_COUNT_NAME = 'train samples count';
|
|
75
|
+
const TRAIN_ERROR = 'Train error,%';
|
|
76
|
+
const KERNEL_TYPE_TO_NAME_MAP = ['linear', 'polynomial', 'RBF', 'sigmoid'];
|
|
77
|
+
const POSITIVE_NAME = 'positive (P)';
|
|
78
|
+
const NEGATIVE_NAME = 'negative (N)';
|
|
79
|
+
const PREDICTED_POSITIVE_NAME = 'predicted positive (PP)';
|
|
80
|
+
const PREDICTED_NEGATIVE_NAME = 'predicted negative (PN)';
|
|
81
|
+
const SENSITIVITY = 'Sensitivity';
|
|
82
|
+
const SPECIFICITY = 'Specificity';
|
|
83
|
+
const BALANCED_ACCURACY = 'Balanced accuracy';
|
|
84
|
+
const POSITIVE_PREDICTIVE_VALUE = 'Positive predicitve value';
|
|
85
|
+
const NEGATIVE_PREDICTIVE_VALUE = 'Negative predicitve value';
|
|
86
|
+
const ML_REPORT = 'Model report';
|
|
87
|
+
const ML_REPORT_PREDICTED_LABELS = 'Predicted labels';
|
|
88
|
+
const ML_REPORT_TRAIN_LABELS = 'Train labels';
|
|
89
|
+
const ML_REPORT_CORRECTNESS = 'Prediction correctness';
|
|
90
|
+
const PREDICTION = 'prediction';
|
|
91
|
+
|
|
92
|
+
// Pack/unpack constants
|
|
93
|
+
const BYTES = 4;
|
|
94
|
+
const INTS_COUNT = 3;
|
|
95
|
+
const KER_PARAMS_COUNT = 2;
|
|
96
|
+
const MODEL_KERNEL_INDEX = 0;
|
|
97
|
+
const SAMPLES_COUNT_INDEX = 1;
|
|
98
|
+
const FEATURES_COUNT_INDEX = 2;
|
|
99
|
+
|
|
100
|
+
// misc
|
|
101
|
+
const INIT_VALUE = 0; // any number can be used
|
|
102
|
+
const LS_SVM_ADD_CONST = 1; // see [1] for more details
|
|
103
|
+
|
|
104
|
+
// 2. TOOLS
|
|
105
|
+
|
|
106
|
+
// Check LS-SVM learning hyperparameters
|
|
107
|
+
function checkHyperparameters(hyperparameters: any): void {
|
|
108
|
+
// check gamma
|
|
109
|
+
if (hyperparameters.gamma <= GAMMA_INFIMUM_LIMIT)
|
|
110
|
+
throw new Error(WRONG_GAMMA_MESSAGE);
|
|
111
|
+
|
|
112
|
+
// check kernel & its parameters
|
|
113
|
+
switch (hyperparameters.kernel) {
|
|
114
|
+
case LINEAR: // the case of linear kernel
|
|
115
|
+
return;
|
|
116
|
+
|
|
117
|
+
case RBF: // the case of RBF kernel
|
|
118
|
+
if(hyperparameters.sigma <= RBF_SIGMA_INFIMUM_LIMIT)
|
|
119
|
+
throw new Error(WRONG_RBF_SIGMA_MESSAGE);
|
|
120
|
+
return;
|
|
121
|
+
|
|
122
|
+
case POLYNOMIAL: // the case of polynomial kernel
|
|
123
|
+
// check c
|
|
124
|
+
if(hyperparameters.cParam <= POLYNOMIAL_C_INFIMUM_LIMIT)
|
|
125
|
+
throw new Error(WRONG_POLYNOMIAL_C_MESSAGE);
|
|
126
|
+
// check d
|
|
127
|
+
if(hyperparameters.dParam <= POLYNOMIAL_D_INFIMUM_LIMIT)
|
|
128
|
+
throw new Error(WRONG_POLYNOMIAL_D_MESSAGE);
|
|
129
|
+
return;
|
|
130
|
+
|
|
131
|
+
case SIGMOID: // the case of polynomial kernel
|
|
132
|
+
return;
|
|
133
|
+
|
|
134
|
+
default: // incorrect kernel
|
|
135
|
+
throw new Error(WRONG_KERNEL_MESSAGE);
|
|
136
|
+
} // switch
|
|
137
|
+
} // checkHyperparameters
|
|
138
|
+
|
|
139
|
+
// Returnes labels predicted by the model specified
|
|
140
|
+
async function predict(model: any, dataset: DG.ColumnList): Promise<DG.Column>
|
|
141
|
+
{
|
|
142
|
+
let _output: any;
|
|
143
|
+
|
|
144
|
+
let _promise = _predictByLSSVMInWebWorker(model.kernelType, model.kernelParams,
|
|
145
|
+
model.normalizedTrainData.columns, model.trainLabels, model.means, model.stdDevs,
|
|
146
|
+
model.modelParams, model.modelWeights, dataset);
|
|
147
|
+
|
|
148
|
+
await _promise.then(
|
|
149
|
+
_result => { _output = _result; },
|
|
150
|
+
_error => { throw new Error (`Error: ${_error}`); }
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
return _output;
|
|
154
|
+
} // predict
|
|
155
|
+
|
|
156
|
+
// Evaluate accuracy of the model
|
|
157
|
+
function evaluateAccuracy(model: any): void {
|
|
158
|
+
let data = model.confusionMatrix.getRawData();
|
|
159
|
+
|
|
160
|
+
// here, the classic notation is used (see https://en.wikipedia.org/wiki/Sensitivity_and_specificity)
|
|
161
|
+
|
|
162
|
+
let TP = data[TRUE_POSITIVE_INDEX]; // true positive
|
|
163
|
+
let TN = data[TRUE_NEGATIVE_INDEX]; // true negative
|
|
164
|
+
let FP = data[FALSE_POSITIVE_INDEX]; // false positive
|
|
165
|
+
let FN = data[FALSE_NEGATIVE_INDEX]; // false negative
|
|
166
|
+
|
|
167
|
+
let P = TP + FN; // positive
|
|
168
|
+
let N = FP + TN; // negative
|
|
169
|
+
|
|
170
|
+
let TPR = TP / P; // true positive rate
|
|
171
|
+
let TNR = TN / N; // true negative rate
|
|
172
|
+
|
|
173
|
+
let PPV = TP / (TP + FP); // positive predicitve value
|
|
174
|
+
let NPV = TN / (TN + FN); // negative predicitve value
|
|
175
|
+
|
|
176
|
+
let ACC = (TP + TN) / (P + N); // accuracy
|
|
177
|
+
let BA = (TPR + TNR) / 2; // balanced accuracy
|
|
178
|
+
|
|
179
|
+
model.sensitivity = TPR;
|
|
180
|
+
model.specificity = TNR;
|
|
181
|
+
model.balancedAccuracy = BA;
|
|
182
|
+
model.positivePredicitveValue = PPV;
|
|
183
|
+
model.negativePredicitveValue = NPV;
|
|
184
|
+
model.trainError = (1 - ACC) * 100; // train error, %
|
|
185
|
+
} // evaluateAccuracy
|
|
186
|
+
|
|
187
|
+
// Returns trained LS-SVM model.
|
|
188
|
+
async function trainAndAnalyzeModel(hyperparameters: any, dataset: DG.ColumnList,
|
|
189
|
+
labels: DG.Column): Promise<any>
|
|
190
|
+
{
|
|
191
|
+
// check correctness of hyperparameter gamma
|
|
192
|
+
checkHyperparameters(hyperparameters)
|
|
193
|
+
|
|
194
|
+
// create default kernel params array
|
|
195
|
+
const kernelParamsArray = [INIT_VALUE, INIT_VALUE];
|
|
196
|
+
|
|
197
|
+
// fill kernelParams
|
|
198
|
+
switch (hyperparameters.kernel)
|
|
199
|
+
{
|
|
200
|
+
case LINEAR: // no kernel parameters in the case of linear kernel
|
|
201
|
+
break;
|
|
202
|
+
|
|
203
|
+
case RBF: // sigma parameter in the case of RBF-kernel
|
|
204
|
+
kernelParamsArray[RBF_SIGMA_INDEX] = hyperparameters.sigma;
|
|
205
|
+
break;
|
|
206
|
+
|
|
207
|
+
case POLYNOMIAL: // sigma parameter in the case of polynomial kernel
|
|
208
|
+
kernelParamsArray[POLYNOMIAL_C_INDEX] = hyperparameters.cParam;
|
|
209
|
+
kernelParamsArray[POLYNOMIAL_D_INDEX] = hyperparameters.dParam;
|
|
210
|
+
break;
|
|
211
|
+
|
|
212
|
+
case SIGMOID: // sigma parameter in the case of sigmoid kernel
|
|
213
|
+
kernelParamsArray[SIGMOID_KAPPA_INDEX] = hyperparameters.kappa;
|
|
214
|
+
kernelParamsArray[SIGMOID_THETA_INDEX] = hyperparameters.theta;
|
|
215
|
+
break;
|
|
216
|
+
|
|
217
|
+
default: // incorrect kernel
|
|
218
|
+
throw new Error(WRONG_KERNEL_MESSAGE);
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
// create kernel params column
|
|
222
|
+
let kernelParams = DG.Column.fromList('double', KERNEL_PARAMS, kernelParamsArray);
|
|
223
|
+
|
|
224
|
+
// compute size of model params & precomputed weigths
|
|
225
|
+
let trainCols = dataset.toList();
|
|
226
|
+
let modelParamsCount = trainCols[0].length + LS_SVM_ADD_CONST;
|
|
227
|
+
let precomputedWeightsCount = trainCols.length + LS_SVM_ADD_CONST;
|
|
228
|
+
let confusionMatrixElementsCount = CONFUSION_MATR_SIZE;
|
|
229
|
+
|
|
230
|
+
// call webassembly training function
|
|
231
|
+
|
|
232
|
+
let output: any;
|
|
233
|
+
let _promise = _trainAndAnalyzeLSSVMInWebWorker(hyperparameters.gamma, hyperparameters.kernel,
|
|
234
|
+
kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount,
|
|
235
|
+
dataset, labels);
|
|
236
|
+
|
|
237
|
+
await _promise.then(
|
|
238
|
+
_result => { output = _result; },
|
|
239
|
+
_error => { throw new Error (`Error: ${_error}`); }
|
|
240
|
+
);
|
|
241
|
+
|
|
242
|
+
// rename output columns
|
|
243
|
+
output[MEANS_INDEX].name = MEAN;
|
|
244
|
+
output[STD_DEVS_INDEX].name = STD_DEV;
|
|
245
|
+
output[MODEL_PARAMS_INDEX].name = MODEL_PARAMS_NAME;
|
|
246
|
+
output[MODEL_WEIGHTS_INDEX].name = MODEL_WEIGHTS_NAME;
|
|
247
|
+
|
|
248
|
+
output[PREDICTED_LABELS_INDEX].name = PREDICTED;
|
|
249
|
+
output[CORRECTNESS_INDEX].name = CORRECTNESS;
|
|
250
|
+
output[CONFUSION_MATRIX_INDEX].name = CONFUSION_MATRIX_NAME;
|
|
251
|
+
|
|
252
|
+
// complete model
|
|
253
|
+
let model = {
|
|
254
|
+
trainGamma: hyperparameters.gamma,
|
|
255
|
+
kernelType: hyperparameters.kernel,
|
|
256
|
+
kernelParams: kernelParams,
|
|
257
|
+
trainLabels: labels,
|
|
258
|
+
normalizedTrainData: DG.DataFrame.fromColumns(output[NORMALIZED_DATA_INDEX]),
|
|
259
|
+
means: output[MEANS_INDEX],
|
|
260
|
+
stdDevs: output[STD_DEVS_INDEX],
|
|
261
|
+
modelParams: output[MODEL_PARAMS_INDEX],
|
|
262
|
+
modelWeights: output[MODEL_WEIGHTS_INDEX],
|
|
263
|
+
predictedLabels: output[PREDICTED_LABELS_INDEX],
|
|
264
|
+
correctness: output[CORRECTNESS_INDEX],
|
|
265
|
+
confusionMatrix: output[CONFUSION_MATRIX_INDEX],
|
|
266
|
+
trainError: undefined,
|
|
267
|
+
featuresCount: trainCols.length,
|
|
268
|
+
trainSamplesCount: trainCols[0].length
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
evaluateAccuracy(model);
|
|
272
|
+
|
|
273
|
+
return model;
|
|
274
|
+
} // trainAndAnalyzeModel
|
|
275
|
+
|
|
276
|
+
// Wrapper for combining the function "trainAndAnalyzeModel" with Datagrok predicitve tools
|
|
277
|
+
export async function getTrainedModel(hyperparameters: any, df: DG.DataFrame, predict_column: string): Promise<any> {
|
|
278
|
+
let columns = df.columns;
|
|
279
|
+
let labels = columns.byName(predict_column);
|
|
280
|
+
columns.remove(predict_column);
|
|
281
|
+
|
|
282
|
+
return await trainAndAnalyzeModel(hyperparameters, columns, labels);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Returns dataframe with short info about model
|
|
286
|
+
function getModelInfo(model: any): DG.DataFrame {
|
|
287
|
+
let kernelParams = model.kernelParams.getRawData();
|
|
288
|
+
|
|
289
|
+
return DG.DataFrame.fromColumns([
|
|
290
|
+
DG.Column.fromList('double', GAMMA, [model.trainGamma]),
|
|
291
|
+
DG.Column.fromStrings(KERNEL, [KERNEL_TYPE_TO_NAME_MAP[model.kernelType]]),
|
|
292
|
+
DG.Column.fromList('double', KERNEL_PARAM_1, [kernelParams[0]]),
|
|
293
|
+
DG.Column.fromList('double', KERNEL_PARAM_2, [kernelParams[1]]),
|
|
294
|
+
DG.Column.fromList('double', FEATURES_COUNT_NAME, [model.featuresCount]),
|
|
295
|
+
DG.Column.fromList('double', TRAIN_SAMPLES_COUNT_NAME, [model.trainSamplesCount]),
|
|
296
|
+
DG.Column.fromList('double', TRAIN_ERROR, [model.trainError]),
|
|
297
|
+
DG.Column.fromList('double', BALANCED_ACCURACY, [model.balancedAccuracy]),
|
|
298
|
+
DG.Column.fromList('double', SENSITIVITY, [model.sensitivity]),
|
|
299
|
+
DG.Column.fromList('double', SPECIFICITY, [model.specificity]),
|
|
300
|
+
DG.Column.fromList('double', POSITIVE_PREDICTIVE_VALUE, [model.positivePredicitveValue]),
|
|
301
|
+
DG.Column.fromList('double', NEGATIVE_PREDICTIVE_VALUE, [model.negativePredicitveValue])
|
|
302
|
+
]);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Get dataframe with confusion matrix
|
|
306
|
+
function getConfusionMatrixDF(model: any): DG.DataFrame
|
|
307
|
+
{
|
|
308
|
+
let data = model.confusionMatrix.getRawData();
|
|
309
|
+
|
|
310
|
+
return DG.DataFrame.fromColumns([
|
|
311
|
+
DG.Column.fromStrings('', [POSITIVE_NAME, NEGATIVE_NAME]),
|
|
312
|
+
DG.Column.fromList('int', PREDICTED_POSITIVE_NAME,
|
|
313
|
+
[data[TRUE_POSITIVE_INDEX], data[FALSE_POSITIVE_INDEX]]),
|
|
314
|
+
DG.Column.fromList('int', PREDICTED_NEGATIVE_NAME,
|
|
315
|
+
[data[FALSE_NEGATIVE_INDEX], data[TRUE_NEGATIVE_INDEX]])
|
|
316
|
+
]);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Show training report
|
|
320
|
+
export function showTrainReport(df: DG.DataFrame, model: any): void {
|
|
321
|
+
df.name = ML_REPORT;
|
|
322
|
+
df.columns.add(model.trainLabels);
|
|
323
|
+
df.columns.add(model.predictedLabels);
|
|
324
|
+
df.columns.add(model.correctness);
|
|
325
|
+
let dfView = grok.shell.addTableView(df);
|
|
326
|
+
dfView.addViewer(DG.Viewer.form(getModelInfo(model)));
|
|
327
|
+
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
328
|
+
{ title: ML_REPORT_PREDICTED_LABELS,
|
|
329
|
+
color: model.predictedLabels.name
|
|
330
|
+
}));
|
|
331
|
+
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
332
|
+
{ title: ML_REPORT_TRAIN_LABELS,
|
|
333
|
+
color: model.trainLabels.name
|
|
334
|
+
}));
|
|
335
|
+
dfView.addViewer(DG.Viewer.grid(getConfusionMatrixDF(model)));
|
|
336
|
+
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
337
|
+
{ title: ML_REPORT_CORRECTNESS,
|
|
338
|
+
color: model.correctness.name
|
|
339
|
+
}));
|
|
340
|
+
} // showTrainReport
|
|
341
|
+
|
|
342
|
+
// Returns trained model packed into UInt8Array
|
|
343
|
+
export function getPackedModel(model: any): any {
|
|
344
|
+
|
|
345
|
+
// get principal data
|
|
346
|
+
let dataCols = model.normalizedTrainData.columns;
|
|
347
|
+
let samplesCount = model.trainSamplesCount;
|
|
348
|
+
let featuresCount = model.featuresCount;
|
|
349
|
+
|
|
350
|
+
/*let bufferSize = BYTES * (7 + featuresCount * samplesCount
|
|
351
|
+
+ 3 * featuresCount + 2 * samplesCount);*/
|
|
352
|
+
|
|
353
|
+
// compute size of packed model
|
|
354
|
+
let bufferSize = BYTES * (INTS_COUNT + KER_PARAMS_COUNT +
|
|
355
|
+
samplesCount + featuresCount + featuresCount + samplesCount + LS_SVM_ADD_CONST
|
|
356
|
+
+ featuresCount + LS_SVM_ADD_CONST + featuresCount * samplesCount);
|
|
357
|
+
|
|
358
|
+
// packed model
|
|
359
|
+
let result = new Uint8Array(bufferSize);
|
|
360
|
+
let buffer = result.buffer;
|
|
361
|
+
let offset = 0;
|
|
362
|
+
|
|
363
|
+
// pack kernel type and sizes
|
|
364
|
+
let ints = new Int32Array(buffer, offset, INTS_COUNT);
|
|
365
|
+
ints[MODEL_KERNEL_INDEX] = model.kernelType;
|
|
366
|
+
ints[SAMPLES_COUNT_INDEX] = samplesCount;
|
|
367
|
+
ints[FEATURES_COUNT_INDEX] = featuresCount;
|
|
368
|
+
offset += INTS_COUNT * BYTES;
|
|
369
|
+
|
|
370
|
+
// pack kernel parameters
|
|
371
|
+
let floats = new Float32Array(buffer, offset, KER_PARAMS_COUNT);
|
|
372
|
+
floats.set(model.kernelParams.getRawData());
|
|
373
|
+
offset += KER_PARAMS_COUNT * BYTES;
|
|
374
|
+
|
|
375
|
+
// pack pack labels of training data
|
|
376
|
+
floats = new Float32Array(buffer, offset, samplesCount);
|
|
377
|
+
floats.set(model.trainLabels.getRawData());
|
|
378
|
+
offset += samplesCount * BYTES;
|
|
379
|
+
|
|
380
|
+
// pack mean values of training data
|
|
381
|
+
floats = new Float32Array(buffer, offset, featuresCount);
|
|
382
|
+
floats.set(model.means.getRawData());
|
|
383
|
+
offset += featuresCount * BYTES;
|
|
384
|
+
|
|
385
|
+
// pack standard deviations of training data
|
|
386
|
+
floats = new Float32Array(buffer, offset, featuresCount);
|
|
387
|
+
floats.set(model.stdDevs.getRawData());
|
|
388
|
+
offset += featuresCount * BYTES;
|
|
389
|
+
|
|
390
|
+
// pack model paramters
|
|
391
|
+
floats = new Float32Array(buffer, offset, samplesCount + LS_SVM_ADD_CONST);
|
|
392
|
+
floats.set(model.modelParams.getRawData());
|
|
393
|
+
offset += (samplesCount + LS_SVM_ADD_CONST) * BYTES;
|
|
394
|
+
|
|
395
|
+
// pack model's precomputed weights
|
|
396
|
+
floats = new Float32Array(buffer, offset, featuresCount + LS_SVM_ADD_CONST);
|
|
397
|
+
floats.set(model.modelWeights.getRawData());
|
|
398
|
+
offset += (featuresCount + LS_SVM_ADD_CONST) * BYTES;
|
|
399
|
+
|
|
400
|
+
// pack training dataset
|
|
401
|
+
for (const col of dataCols) {
|
|
402
|
+
floats = new Float32Array(buffer, offset, featuresCount);
|
|
403
|
+
floats.set(col.getRawData());
|
|
404
|
+
offset += featuresCount * BYTES;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
return result;
|
|
408
|
+
} // getPackedModel
|
|
409
|
+
|
|
410
|
+
// Returns unpacked model
|
|
411
|
+
function getUnpackedModel(packedModel: any): any {
|
|
412
|
+
|
|
413
|
+
let modelBytes = packedModel.buffer;
|
|
414
|
+
let offset = 0;
|
|
415
|
+
|
|
416
|
+
// extract kernel type and sizes
|
|
417
|
+
let header = new Int32Array(modelBytes, offset, INTS_COUNT);
|
|
418
|
+
offset += INTS_COUNT * BYTES;
|
|
419
|
+
let samplesCount = header[SAMPLES_COUNT_INDEX];
|
|
420
|
+
let featuresCount = header[FEATURES_COUNT_INDEX];
|
|
421
|
+
|
|
422
|
+
// extract parameters of kernel
|
|
423
|
+
const kernelParams = DG.Column.fromFloat32Array(KERNEL_PARAMS,
|
|
424
|
+
new Float32Array(modelBytes, offset, KER_PARAMS_COUNT));
|
|
425
|
+
offset += KER_PARAMS_COUNT * BYTES;
|
|
426
|
+
|
|
427
|
+
// extract training labels
|
|
428
|
+
const trainLabels = DG.Column.fromFloat32Array(LABELS,
|
|
429
|
+
new Float32Array(modelBytes, offset, samplesCount));
|
|
430
|
+
offset += samplesCount * BYTES;
|
|
431
|
+
|
|
432
|
+
// extract mean values of training data
|
|
433
|
+
const means = DG.Column.fromFloat32Array( MEAN,
|
|
434
|
+
new Float32Array(modelBytes, offset, featuresCount));
|
|
435
|
+
offset += featuresCount * BYTES;
|
|
436
|
+
|
|
437
|
+
// extract standard deviations of training data
|
|
438
|
+
const stdDevs = DG.Column.fromFloat32Array( STD_DEV,
|
|
439
|
+
new Float32Array(modelBytes, offset, featuresCount));
|
|
440
|
+
offset += featuresCount * BYTES;
|
|
441
|
+
|
|
442
|
+
// extract parameters of model
|
|
443
|
+
const modelParams = DG.Column.fromFloat32Array( MODEL_PARAMS_NAME,
|
|
444
|
+
new Float32Array(modelBytes, offset, samplesCount + LS_SVM_ADD_CONST));
|
|
445
|
+
offset += (samplesCount + LS_SVM_ADD_CONST) * BYTES;
|
|
446
|
+
|
|
447
|
+
// extract model's precomputed weights
|
|
448
|
+
const modelWeights = DG.Column.fromFloat32Array( MODEL_WEIGHTS_NAME,
|
|
449
|
+
new Float32Array(modelBytes, offset, featuresCount + LS_SVM_ADD_CONST));
|
|
450
|
+
offset += (featuresCount + LS_SVM_ADD_CONST) * BYTES;
|
|
451
|
+
|
|
452
|
+
// extract training data columns
|
|
453
|
+
let dataCols = [];
|
|
454
|
+
|
|
455
|
+
for (let i = 0; i < samplesCount; i++) {
|
|
456
|
+
dataCols.push( DG.Column.fromFloat32Array( i.toString(),
|
|
457
|
+
new Float32Array(modelBytes, offset, featuresCount)) );
|
|
458
|
+
offset += featuresCount * BYTES;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
const normalizedTrainData = DG.DataFrame.fromColumns(dataCols);
|
|
462
|
+
|
|
463
|
+
let model = { kernelType: header[MODEL_KERNEL_INDEX],
|
|
464
|
+
kernelParams: kernelParams,
|
|
465
|
+
trainLabels: trainLabels,
|
|
466
|
+
means: means,
|
|
467
|
+
stdDevs: stdDevs,
|
|
468
|
+
modelParams: modelParams,
|
|
469
|
+
modelWeights: modelWeights,
|
|
470
|
+
normalizedTrainData: normalizedTrainData
|
|
471
|
+
};
|
|
472
|
+
|
|
473
|
+
return model;
|
|
474
|
+
} // getUnpackedModel
|
|
475
|
+
|
|
476
|
+
// Wrapper for combining the function "predict" with Datagrok predicitve tools
|
|
477
|
+
export async function getPrediction(df: DG.DataFrame, packedModel: any): Promise<DG.DataFrame> {
|
|
478
|
+
|
|
479
|
+
let model = getUnpackedModel(new Uint8Array(packedModel));
|
|
480
|
+
|
|
481
|
+
let res = await predict(model, df.columns);
|
|
482
|
+
res.name = PREDICTION;
|
|
483
|
+
|
|
484
|
+
return DG.DataFrame.fromColumns([res]);
|
|
485
|
+
} // getPrediction
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
// Inputs correctness check tools
|
|
6
|
+
|
|
7
|
+
//Limitation constants
|
|
8
|
+
const COMP_MIN = 1;
|
|
9
|
+
const SAMPLES_COUNT_MIN = 1;
|
|
10
|
+
const FEATURES_COUNT_MIN = 1;
|
|
11
|
+
const PERCENTAGE_MIN = 0;
|
|
12
|
+
const PERCENTAGE_MAX = 100;
|
|
13
|
+
const MAX_ELEMENTS_COUNT = 100000000;
|
|
14
|
+
|
|
15
|
+
// Error messages
|
|
16
|
+
const COMP_POSITVE_MES = 'components must be positive.';
|
|
17
|
+
const COMP_EXCESS = 'components must not be greater than feautures count.';
|
|
18
|
+
const INCORERRECT_MIN_MAX_MES = 'min must be less than max.';
|
|
19
|
+
const INCORERRECT_FEATURES_MES = 'features must be positive.';
|
|
20
|
+
const INCORERRECT_SAMPLES_MES = 'samples must be positive.';
|
|
21
|
+
const INCORERRECT_PERCENTAGE_MES = 'violators percentage must be from the range from 0 to 100.';
|
|
22
|
+
const DATAFRAME_IS_TOO_BIG_MES = 'dataframe is too big.';
|
|
23
|
+
|
|
24
|
+
// Check components count (PCA, PLS)
|
|
25
|
+
export function checkComponenets(features: DG.ColumnList, components: number): void {
|
|
26
|
+
if (components < COMP_MIN)
|
|
27
|
+
throw new Error(COMP_POSITVE_MES);
|
|
28
|
+
|
|
29
|
+
if (components > features.length)
|
|
30
|
+
throw new Error(COMP_EXCESS);
|
|
31
|
+
|
|
32
|
+
if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
|
|
33
|
+
throw new Error(DATAFRAME_IS_TOO_BIG_MES);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Check inputs of data for SVM testing generator
|
|
37
|
+
export function checkGeneratorSVMinputs(samplesCount: number, featuresCount: number,
|
|
38
|
+
min: number, max: number, violatorsPercentage: number): void
|
|
39
|
+
{
|
|
40
|
+
if (min >= max)
|
|
41
|
+
throw new Error(INCORERRECT_MIN_MAX_MES);
|
|
42
|
+
|
|
43
|
+
if (featuresCount < FEATURES_COUNT_MIN)
|
|
44
|
+
throw new Error(INCORERRECT_FEATURES_MES);
|
|
45
|
+
|
|
46
|
+
if (samplesCount < SAMPLES_COUNT_MIN)
|
|
47
|
+
throw new Error(INCORERRECT_SAMPLES_MES);
|
|
48
|
+
|
|
49
|
+
if ((violatorsPercentage < PERCENTAGE_MIN) || (violatorsPercentage > PERCENTAGE_MAX))
|
|
50
|
+
throw new Error(INCORERRECT_PERCENTAGE_MES);
|
|
51
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
/* Visit https://aka.ms/tsconfig.json to read more about this file */
|
|
4
|
+
|
|
5
|
+
/* Basic Options */
|
|
6
|
+
// "incremental": true, /* Enable incremental compilation */
|
|
7
|
+
"target": "es6", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
|
|
8
|
+
"module": "es2020", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
|
|
9
|
+
"lib": ["es2020", "dom"], /* Specify library files to be included in the compilation. */
|
|
10
|
+
"allowJs": true, /* Allow javascript files to be compiled. */
|
|
11
|
+
// "checkJs": true, /* Report errors in .js files. */
|
|
12
|
+
// "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', 'react', 'react-jsx' or 'react-jsxdev'. */
|
|
13
|
+
// "declaration": true, /* Generates corresponding '.d.ts' file. */
|
|
14
|
+
// "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */
|
|
15
|
+
// "sourceMap": true, /* Generates corresponding '.map' file. */
|
|
16
|
+
// "outFile": "./", /* Concatenate and emit output to single file. */
|
|
17
|
+
// "outDir": "./", /* Redirect output structure to the directory. */
|
|
18
|
+
// "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
|
|
19
|
+
// "composite": true, /* Enable project compilation */
|
|
20
|
+
// "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */
|
|
21
|
+
// "removeComments": true, /* Do not emit comments to output. */
|
|
22
|
+
// "noEmit": true, /* Do not emit outputs. */
|
|
23
|
+
// "importHelpers": true, /* Import emit helpers from 'tslib'. */
|
|
24
|
+
// "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
|
|
25
|
+
// "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
|
|
26
|
+
|
|
27
|
+
/* Strict Type-Checking Options */
|
|
28
|
+
"strict": true, /* Enable all strict type-checking options. */
|
|
29
|
+
// "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */
|
|
30
|
+
// "strictNullChecks": true, /* Enable strict null checks. */
|
|
31
|
+
// "strictFunctionTypes": true, /* Enable strict checking of function types. */
|
|
32
|
+
// "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
|
|
33
|
+
// "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */
|
|
34
|
+
// "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */
|
|
35
|
+
// "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */
|
|
36
|
+
|
|
37
|
+
/* Additional Checks */
|
|
38
|
+
// "noUnusedLocals": true, /* Report errors on unused locals. */
|
|
39
|
+
// "noUnusedParameters": true, /* Report errors on unused parameters. */
|
|
40
|
+
// "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */
|
|
41
|
+
// "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */
|
|
42
|
+
// "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */
|
|
43
|
+
// "noPropertyAccessFromIndexSignature": true, /* Require undeclared properties from index signatures to use element accesses. */
|
|
44
|
+
|
|
45
|
+
/* Module Resolution Options */
|
|
46
|
+
"moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
|
|
47
|
+
// "baseUrl": "./", /* Base directory to resolve non-absolute module names. */
|
|
48
|
+
// "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
|
|
49
|
+
// "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */
|
|
50
|
+
// "typeRoots": [], /* List of folders to include type definitions from. */
|
|
51
|
+
// "types": [], /* Type declaration files to be included in compilation. */
|
|
52
|
+
// "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
|
|
53
|
+
"esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */
|
|
54
|
+
// "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */
|
|
55
|
+
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
|
56
|
+
|
|
57
|
+
/* Source Map Options */
|
|
58
|
+
// "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */
|
|
59
|
+
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
|
60
|
+
// "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */
|
|
61
|
+
// "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
|
|
62
|
+
|
|
63
|
+
/* Experimental Options */
|
|
64
|
+
// "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */
|
|
65
|
+
// "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */
|
|
66
|
+
|
|
67
|
+
/* Advanced Options */
|
|
68
|
+
"skipLibCheck": false, /* Skip type checking of declaration files. */
|
|
69
|
+
"forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */
|
|
70
|
+
}
|
|
71
|
+
}
|