@datagrok/eda 1.1.9 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +1 -0
- package/.eslintrc.json +45 -0
- package/dist/42.js +2 -0
- package/dist/729.js +1 -1
- package/dist/943.js +2 -2
- package/dist/package-test.js +2 -2
- package/dist/package.js +2 -2
- package/package.json +7 -3
- package/src/data-generators.ts +13 -13
- package/src/eda-tools.ts +42 -42
- package/src/eda-ui.ts +65 -58
- package/src/package-test.ts +2 -2
- package/src/package.ts +52 -60
- package/src/stat-tools.ts +72 -61
- package/src/svm.ts +144 -151
- package/src/utils.ts +13 -17
- package/src/workers/tsne-worker.ts +6 -6
- package/src/workers/umap-worker.ts +3 -3
package/src/svm.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
Least square support vector machine (LS-SVM) is implemented:
|
|
7
7
|
[1] Suykens, J., Vandewalle, J. "Least Squares Support Vector Machine Classifiers",
|
|
8
|
-
|
|
8
|
+
Neural Processing Letters 9, 293-300 (1999). https://doi.org/10.1023/A:1018628609742
|
|
9
9
|
*/
|
|
10
10
|
import * as grok from 'datagrok-api/grok';
|
|
11
11
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -67,7 +67,7 @@ const MODEL_PARAMS_NAME = 'alpha';
|
|
|
67
67
|
const MODEL_WEIGHTS_NAME = 'weight';
|
|
68
68
|
const GAMMA = 'gamma';
|
|
69
69
|
const KERNEL = 'kernel';
|
|
70
|
-
const KERNEL_PARAMS = 'kernel params';
|
|
70
|
+
const KERNEL_PARAMS = 'kernel params';
|
|
71
71
|
const KERNEL_PARAM_1 = 'kernel param 1';
|
|
72
72
|
const KERNEL_PARAM_2 = 'kernel param 2';
|
|
73
73
|
const FEATURES_COUNT_NAME = 'features count';
|
|
@@ -111,43 +111,42 @@ function checkHyperparameters(hyperparameters: any): void {
|
|
|
111
111
|
|
|
112
112
|
// check kernel & its parameters
|
|
113
113
|
switch (hyperparameters.kernel) {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
114
|
+
case LINEAR: // the case of linear kernel
|
|
115
|
+
return;
|
|
116
|
+
|
|
117
|
+
case RBF: // the case of RBF kernel
|
|
118
|
+
if (hyperparameters.sigma <= RBF_SIGMA_INFIMUM_LIMIT)
|
|
119
|
+
throw new Error(WRONG_RBF_SIGMA_MESSAGE);
|
|
120
|
+
return;
|
|
121
|
+
|
|
122
|
+
case POLYNOMIAL: // the case of polynomial kernel
|
|
123
|
+
// check c
|
|
124
|
+
if (hyperparameters.cParam <= POLYNOMIAL_C_INFIMUM_LIMIT)
|
|
125
|
+
throw new Error(WRONG_POLYNOMIAL_C_MESSAGE);
|
|
126
126
|
// check d
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
127
|
+
if (hyperparameters.dParam <= POLYNOMIAL_D_INFIMUM_LIMIT)
|
|
128
|
+
throw new Error(WRONG_POLYNOMIAL_D_MESSAGE);
|
|
129
|
+
return;
|
|
130
|
+
|
|
131
|
+
case SIGMOID: // the case of polynomial kernel
|
|
132
|
+
return;
|
|
133
|
+
|
|
134
|
+
default: // incorrect kernel
|
|
135
|
+
throw new Error(WRONG_KERNEL_MESSAGE);
|
|
136
|
+
} // switch
|
|
137
137
|
} // checkHyperparameters
|
|
138
138
|
|
|
139
139
|
// Returnes labels predicted by the model specified
|
|
140
|
-
async function predict(model: any, dataset: DG.ColumnList): Promise<DG.Column>
|
|
141
|
-
{
|
|
140
|
+
async function predict(model: any, dataset: DG.ColumnList): Promise<DG.Column> {
|
|
142
141
|
let _output: any;
|
|
143
142
|
|
|
144
|
-
|
|
145
|
-
model.normalizedTrainData.columns, model.trainLabels, model.means, model.stdDevs,
|
|
143
|
+
const _promise = _predictByLSSVMInWebWorker(model.kernelType, model.kernelParams,
|
|
144
|
+
model.normalizedTrainData.columns, model.trainLabels, model.means, model.stdDevs,
|
|
146
145
|
model.modelParams, model.modelWeights, dataset);
|
|
147
146
|
|
|
148
147
|
await _promise.then(
|
|
149
|
-
_result => {
|
|
150
|
-
_error => {
|
|
148
|
+
(_result) => {_output = _result;},
|
|
149
|
+
(_error) => {throw new Error(`Error: ${_error}`);},
|
|
151
150
|
);
|
|
152
151
|
|
|
153
152
|
return _output;
|
|
@@ -155,26 +154,26 @@ async function predict(model: any, dataset: DG.ColumnList): Promise<DG.Column>
|
|
|
155
154
|
|
|
156
155
|
// Evaluate accuracy of the model
|
|
157
156
|
function evaluateAccuracy(model: any): void {
|
|
158
|
-
|
|
157
|
+
const data = model.confusionMatrix.getRawData();
|
|
159
158
|
|
|
160
159
|
// here, the classic notation is used (see https://en.wikipedia.org/wiki/Sensitivity_and_specificity)
|
|
161
160
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
161
|
+
const TP = data[TRUE_POSITIVE_INDEX]; // true positive
|
|
162
|
+
const TN = data[TRUE_NEGATIVE_INDEX]; // true negative
|
|
163
|
+
const FP = data[FALSE_POSITIVE_INDEX]; // false positive
|
|
164
|
+
const FN = data[FALSE_NEGATIVE_INDEX]; // false negative
|
|
166
165
|
|
|
167
|
-
|
|
168
|
-
|
|
166
|
+
const P = TP + FN; // positive
|
|
167
|
+
const N = FP + TN; // negative
|
|
169
168
|
|
|
170
|
-
|
|
171
|
-
|
|
169
|
+
const TPR = TP / P; // true positive rate
|
|
170
|
+
const TNR = TN / N; // true negative rate
|
|
172
171
|
|
|
173
|
-
|
|
174
|
-
|
|
172
|
+
const PPV = TP / (TP + FP); // positive predicitve value
|
|
173
|
+
const NPV = TN / (TN + FN); // negative predicitve value
|
|
175
174
|
|
|
176
|
-
|
|
177
|
-
|
|
175
|
+
const ACC = (TP + TN) / (P + N); // accuracy
|
|
176
|
+
const BA = (TPR + TNR) / 2; // balanced accuracy
|
|
178
177
|
|
|
179
178
|
model.sensitivity = TPR;
|
|
180
179
|
model.specificity = TNR;
|
|
@@ -185,59 +184,57 @@ function evaluateAccuracy(model: any): void {
|
|
|
185
184
|
} // evaluateAccuracy
|
|
186
185
|
|
|
187
186
|
// Returns trained LS-SVM model.
|
|
188
|
-
async function trainAndAnalyzeModel(hyperparameters: any, dataset: DG.ColumnList,
|
|
189
|
-
labels: DG.Column): Promise<any>
|
|
190
|
-
{
|
|
187
|
+
async function trainAndAnalyzeModel(hyperparameters: any, dataset: DG.ColumnList,
|
|
188
|
+
labels: DG.Column): Promise<any> {
|
|
191
189
|
// check correctness of hyperparameter gamma
|
|
192
|
-
checkHyperparameters(hyperparameters)
|
|
190
|
+
checkHyperparameters(hyperparameters);
|
|
193
191
|
|
|
194
192
|
// create default kernel params array
|
|
195
193
|
const kernelParamsArray = [INIT_VALUE, INIT_VALUE];
|
|
196
194
|
|
|
197
195
|
// fill kernelParams
|
|
198
|
-
switch (hyperparameters.kernel)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
throw new Error(WRONG_KERNEL_MESSAGE);
|
|
196
|
+
switch (hyperparameters.kernel) {
|
|
197
|
+
case LINEAR: // no kernel parameters in the case of linear kernel
|
|
198
|
+
break;
|
|
199
|
+
|
|
200
|
+
case RBF: // sigma parameter in the case of RBF-kernel
|
|
201
|
+
kernelParamsArray[RBF_SIGMA_INDEX] = hyperparameters.sigma;
|
|
202
|
+
break;
|
|
203
|
+
|
|
204
|
+
case POLYNOMIAL: // sigma parameter in the case of polynomial kernel
|
|
205
|
+
kernelParamsArray[POLYNOMIAL_C_INDEX] = hyperparameters.cParam;
|
|
206
|
+
kernelParamsArray[POLYNOMIAL_D_INDEX] = hyperparameters.dParam;
|
|
207
|
+
break;
|
|
208
|
+
|
|
209
|
+
case SIGMOID: // sigma parameter in the case of sigmoid kernel
|
|
210
|
+
kernelParamsArray[SIGMOID_KAPPA_INDEX] = hyperparameters.kappa;
|
|
211
|
+
kernelParamsArray[SIGMOID_THETA_INDEX] = hyperparameters.theta;
|
|
212
|
+
break;
|
|
213
|
+
|
|
214
|
+
default: // incorrect kernel
|
|
215
|
+
throw new Error(WRONG_KERNEL_MESSAGE);
|
|
219
216
|
};
|
|
220
217
|
|
|
221
218
|
// create kernel params column
|
|
222
|
-
|
|
219
|
+
const kernelParams = DG.Column.fromList('double', KERNEL_PARAMS, kernelParamsArray);
|
|
223
220
|
|
|
224
221
|
// compute size of model params & precomputed weigths
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
222
|
+
const trainCols = dataset.toList();
|
|
223
|
+
const modelParamsCount = trainCols[0].length + LS_SVM_ADD_CONST;
|
|
224
|
+
const precomputedWeightsCount = trainCols.length + LS_SVM_ADD_CONST;
|
|
225
|
+
const confusionMatrixElementsCount = CONFUSION_MATR_SIZE;
|
|
229
226
|
|
|
230
227
|
// call webassembly training function
|
|
231
228
|
|
|
232
229
|
let output: any;
|
|
233
|
-
|
|
234
|
-
kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount,
|
|
230
|
+
const _promise = _trainAndAnalyzeLSSVMInWebWorker(hyperparameters.gamma, hyperparameters.kernel,
|
|
231
|
+
kernelParams, modelParamsCount, precomputedWeightsCount, confusionMatrixElementsCount,
|
|
235
232
|
dataset, labels);
|
|
236
|
-
|
|
233
|
+
|
|
237
234
|
await _promise.then(
|
|
238
|
-
_result => {
|
|
239
|
-
_error => {
|
|
240
|
-
);
|
|
235
|
+
(_result) => {output = _result;},
|
|
236
|
+
(_error) => {throw new Error(`Error: ${_error}`);},
|
|
237
|
+
);
|
|
241
238
|
|
|
242
239
|
// rename output columns
|
|
243
240
|
output[MEANS_INDEX].name = MEAN;
|
|
@@ -250,7 +247,7 @@ async function trainAndAnalyzeModel(hyperparameters: any, dataset: DG.ColumnList
|
|
|
250
247
|
output[CONFUSION_MATRIX_INDEX].name = CONFUSION_MATRIX_NAME;
|
|
251
248
|
|
|
252
249
|
// complete model
|
|
253
|
-
|
|
250
|
+
const model = {
|
|
254
251
|
trainGamma: hyperparameters.gamma,
|
|
255
252
|
kernelType: hyperparameters.kernel,
|
|
256
253
|
kernelParams: kernelParams,
|
|
@@ -265,7 +262,7 @@ async function trainAndAnalyzeModel(hyperparameters: any, dataset: DG.ColumnList
|
|
|
265
262
|
confusionMatrix: output[CONFUSION_MATRIX_INDEX],
|
|
266
263
|
trainError: undefined,
|
|
267
264
|
featuresCount: trainCols.length,
|
|
268
|
-
trainSamplesCount: trainCols[0].length
|
|
265
|
+
trainSamplesCount: trainCols[0].length,
|
|
269
266
|
};
|
|
270
267
|
|
|
271
268
|
evaluateAccuracy(model);
|
|
@@ -274,17 +271,17 @@ async function trainAndAnalyzeModel(hyperparameters: any, dataset: DG.ColumnList
|
|
|
274
271
|
} // trainAndAnalyzeModel
|
|
275
272
|
|
|
276
273
|
// Wrapper for combining the function "trainAndAnalyzeModel" with Datagrok predicitve tools
|
|
277
|
-
export async function getTrainedModel(hyperparameters: any, df: DG.DataFrame,
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
columns.remove(
|
|
274
|
+
export async function getTrainedModel(hyperparameters: any, df: DG.DataFrame, predictColumn: string): Promise<any> {
|
|
275
|
+
const columns = df.columns;
|
|
276
|
+
const labels = columns.byName(predictColumn);
|
|
277
|
+
columns.remove(predictColumn);
|
|
281
278
|
|
|
282
|
-
return await trainAndAnalyzeModel(hyperparameters, columns, labels);
|
|
279
|
+
return await trainAndAnalyzeModel(hyperparameters, columns, labels);
|
|
283
280
|
}
|
|
284
281
|
|
|
285
282
|
// Returns dataframe with short info about model
|
|
286
283
|
function getModelInfo(model: any): DG.DataFrame {
|
|
287
|
-
|
|
284
|
+
const kernelParams = model.kernelParams.getRawData();
|
|
288
285
|
|
|
289
286
|
return DG.DataFrame.fromColumns([
|
|
290
287
|
DG.Column.fromList('double', GAMMA, [model.trainGamma]),
|
|
@@ -296,72 +293,70 @@ function getModelInfo(model: any): DG.DataFrame {
|
|
|
296
293
|
DG.Column.fromList('double', TRAIN_ERROR, [model.trainError]),
|
|
297
294
|
DG.Column.fromList('double', BALANCED_ACCURACY, [model.balancedAccuracy]),
|
|
298
295
|
DG.Column.fromList('double', SENSITIVITY, [model.sensitivity]),
|
|
299
|
-
DG.Column.fromList('double', SPECIFICITY, [model.specificity]),
|
|
300
|
-
DG.Column.fromList('double', POSITIVE_PREDICTIVE_VALUE, [model.positivePredicitveValue]),
|
|
301
|
-
DG.Column.fromList('double', NEGATIVE_PREDICTIVE_VALUE, [model.negativePredicitveValue])
|
|
302
|
-
]);
|
|
303
|
-
}
|
|
296
|
+
DG.Column.fromList('double', SPECIFICITY, [model.specificity]),
|
|
297
|
+
DG.Column.fromList('double', POSITIVE_PREDICTIVE_VALUE, [model.positivePredicitveValue]),
|
|
298
|
+
DG.Column.fromList('double', NEGATIVE_PREDICTIVE_VALUE, [model.negativePredicitveValue]),
|
|
299
|
+
]);
|
|
300
|
+
}
|
|
304
301
|
|
|
305
302
|
// Get dataframe with confusion matrix
|
|
306
|
-
function getConfusionMatrixDF(model: any): DG.DataFrame
|
|
307
|
-
|
|
308
|
-
let data = model.confusionMatrix.getRawData();
|
|
303
|
+
function getConfusionMatrixDF(model: any): DG.DataFrame {
|
|
304
|
+
const data = model.confusionMatrix.getRawData();
|
|
309
305
|
|
|
310
306
|
return DG.DataFrame.fromColumns([
|
|
311
307
|
DG.Column.fromStrings('', [POSITIVE_NAME, NEGATIVE_NAME]),
|
|
312
|
-
DG.Column.fromList('int', PREDICTED_POSITIVE_NAME,
|
|
308
|
+
DG.Column.fromList('int', PREDICTED_POSITIVE_NAME,
|
|
313
309
|
[data[TRUE_POSITIVE_INDEX], data[FALSE_POSITIVE_INDEX]]),
|
|
314
|
-
DG.Column.fromList('int', PREDICTED_NEGATIVE_NAME,
|
|
315
|
-
[data[FALSE_NEGATIVE_INDEX], data[TRUE_NEGATIVE_INDEX]])
|
|
310
|
+
DG.Column.fromList('int', PREDICTED_NEGATIVE_NAME,
|
|
311
|
+
[data[FALSE_NEGATIVE_INDEX], data[TRUE_NEGATIVE_INDEX]]),
|
|
316
312
|
]);
|
|
317
313
|
}
|
|
318
314
|
|
|
319
315
|
// Show training report
|
|
320
316
|
export function showTrainReport(df: DG.DataFrame, model: any): void {
|
|
321
317
|
df.name = ML_REPORT;
|
|
322
|
-
df.columns.add(model.trainLabels);
|
|
323
|
-
df.columns.add(model.predictedLabels);
|
|
324
|
-
df.columns.add(model.correctness);
|
|
325
|
-
|
|
318
|
+
df.columns.add(model.trainLabels);
|
|
319
|
+
df.columns.add(model.predictedLabels);
|
|
320
|
+
df.columns.add(model.correctness);
|
|
321
|
+
const dfView = grok.shell.addTableView(df);
|
|
326
322
|
dfView.addViewer(DG.Viewer.form(getModelInfo(model)));
|
|
327
|
-
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
328
|
-
{
|
|
329
|
-
color: model.predictedLabels.name
|
|
330
|
-
}));
|
|
331
|
-
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
332
|
-
{
|
|
333
|
-
color: model.trainLabels.name
|
|
323
|
+
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
324
|
+
{title: ML_REPORT_PREDICTED_LABELS,
|
|
325
|
+
color: model.predictedLabels.name,
|
|
326
|
+
}));
|
|
327
|
+
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
328
|
+
{title: ML_REPORT_TRAIN_LABELS,
|
|
329
|
+
color: model.trainLabels.name,
|
|
334
330
|
}));
|
|
335
|
-
dfView.addViewer(DG.Viewer.grid(getConfusionMatrixDF(model)));
|
|
336
|
-
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
337
|
-
{
|
|
338
|
-
color: model.correctness.name
|
|
331
|
+
dfView.addViewer(DG.Viewer.grid(getConfusionMatrixDF(model)));
|
|
332
|
+
dfView.addViewer(DG.Viewer.scatterPlot(df,
|
|
333
|
+
{title: ML_REPORT_CORRECTNESS,
|
|
334
|
+
color: model.correctness.name,
|
|
339
335
|
}));
|
|
340
336
|
} // showTrainReport
|
|
341
337
|
|
|
342
338
|
// Returns trained model packed into UInt8Array
|
|
343
339
|
export function getPackedModel(model: any): any {
|
|
344
|
-
|
|
345
340
|
// get principal data
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
341
|
+
const dataCols = model.normalizedTrainData.columns;
|
|
342
|
+
const samplesCount = model.trainSamplesCount;
|
|
343
|
+
const featuresCount = model.featuresCount;
|
|
349
344
|
|
|
350
|
-
/*let bufferSize = BYTES * (7 + featuresCount * samplesCount
|
|
345
|
+
/*let bufferSize = BYTES * (7 + featuresCount * samplesCount
|
|
351
346
|
+ 3 * featuresCount + 2 * samplesCount);*/
|
|
352
347
|
|
|
353
348
|
// compute size of packed model
|
|
354
|
-
|
|
355
|
-
samplesCount + featuresCount + featuresCount + samplesCount + LS_SVM_ADD_CONST
|
|
356
|
-
|
|
349
|
+
const bufferSize = BYTES * (INTS_COUNT + KER_PARAMS_COUNT +
|
|
350
|
+
samplesCount + featuresCount + featuresCount + samplesCount + LS_SVM_ADD_CONST +
|
|
351
|
+
featuresCount + LS_SVM_ADD_CONST + featuresCount * samplesCount);
|
|
357
352
|
|
|
358
353
|
// packed model
|
|
359
|
-
|
|
360
|
-
|
|
354
|
+
const result = new Uint8Array(bufferSize);
|
|
355
|
+
const buffer = result.buffer;
|
|
361
356
|
let offset = 0;
|
|
362
357
|
|
|
363
358
|
// pack kernel type and sizes
|
|
364
|
-
|
|
359
|
+
const ints = new Int32Array(buffer, offset, INTS_COUNT);
|
|
365
360
|
ints[MODEL_KERNEL_INDEX] = model.kernelType;
|
|
366
361
|
ints[SAMPLES_COUNT_INDEX] = samplesCount;
|
|
367
362
|
ints[FEATURES_COUNT_INDEX] = featuresCount;
|
|
@@ -379,7 +374,7 @@ export function getPackedModel(model: any): any {
|
|
|
379
374
|
|
|
380
375
|
// pack mean values of training data
|
|
381
376
|
floats = new Float32Array(buffer, offset, featuresCount);
|
|
382
|
-
floats.set(model.means.getRawData());
|
|
377
|
+
floats.set(model.means.getRawData());
|
|
383
378
|
offset += featuresCount * BYTES;
|
|
384
379
|
|
|
385
380
|
// pack standard deviations of training data
|
|
@@ -389,7 +384,7 @@ export function getPackedModel(model: any): any {
|
|
|
389
384
|
|
|
390
385
|
// pack model paramters
|
|
391
386
|
floats = new Float32Array(buffer, offset, samplesCount + LS_SVM_ADD_CONST);
|
|
392
|
-
floats.set(model.modelParams.getRawData());
|
|
387
|
+
floats.set(model.modelParams.getRawData());
|
|
393
388
|
offset += (samplesCount + LS_SVM_ADD_CONST) * BYTES;
|
|
394
389
|
|
|
395
390
|
// pack model's precomputed weights
|
|
@@ -403,40 +398,39 @@ export function getPackedModel(model: any): any {
|
|
|
403
398
|
floats.set(col.getRawData());
|
|
404
399
|
offset += featuresCount * BYTES;
|
|
405
400
|
}
|
|
406
|
-
|
|
407
|
-
return result;
|
|
401
|
+
|
|
402
|
+
return result;
|
|
408
403
|
} // getPackedModel
|
|
409
404
|
|
|
410
405
|
// Returns unpacked model
|
|
411
406
|
function getUnpackedModel(packedModel: any): any {
|
|
412
|
-
|
|
413
|
-
let modelBytes = packedModel.buffer;
|
|
407
|
+
const modelBytes = packedModel.buffer;
|
|
414
408
|
let offset = 0;
|
|
415
409
|
|
|
416
410
|
// extract kernel type and sizes
|
|
417
|
-
|
|
411
|
+
const header = new Int32Array(modelBytes, offset, INTS_COUNT);
|
|
418
412
|
offset += INTS_COUNT * BYTES;
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
413
|
+
const samplesCount = header[SAMPLES_COUNT_INDEX];
|
|
414
|
+
const featuresCount = header[FEATURES_COUNT_INDEX];
|
|
415
|
+
|
|
422
416
|
// extract parameters of kernel
|
|
423
|
-
const kernelParams = DG.Column.fromFloat32Array(KERNEL_PARAMS,
|
|
417
|
+
const kernelParams = DG.Column.fromFloat32Array(KERNEL_PARAMS,
|
|
424
418
|
new Float32Array(modelBytes, offset, KER_PARAMS_COUNT));
|
|
425
419
|
offset += KER_PARAMS_COUNT * BYTES;
|
|
426
420
|
|
|
427
421
|
// extract training labels
|
|
428
422
|
const trainLabels = DG.Column.fromFloat32Array(LABELS,
|
|
429
|
-
new Float32Array(modelBytes, offset, samplesCount));
|
|
430
|
-
offset += samplesCount * BYTES;
|
|
423
|
+
new Float32Array(modelBytes, offset, samplesCount));
|
|
424
|
+
offset += samplesCount * BYTES;
|
|
431
425
|
|
|
432
426
|
// extract mean values of training data
|
|
433
|
-
const means = DG.Column.fromFloat32Array( MEAN,
|
|
434
|
-
|
|
427
|
+
const means = DG.Column.fromFloat32Array( MEAN,
|
|
428
|
+
new Float32Array(modelBytes, offset, featuresCount));
|
|
435
429
|
offset += featuresCount * BYTES;
|
|
436
430
|
|
|
437
431
|
// extract standard deviations of training data
|
|
438
432
|
const stdDevs = DG.Column.fromFloat32Array( STD_DEV,
|
|
439
|
-
new Float32Array(modelBytes, offset, featuresCount));
|
|
433
|
+
new Float32Array(modelBytes, offset, featuresCount));
|
|
440
434
|
offset += featuresCount * BYTES;
|
|
441
435
|
|
|
442
436
|
// extract parameters of model
|
|
@@ -446,11 +440,11 @@ function getUnpackedModel(packedModel: any): any {
|
|
|
446
440
|
|
|
447
441
|
// extract model's precomputed weights
|
|
448
442
|
const modelWeights = DG.Column.fromFloat32Array( MODEL_WEIGHTS_NAME,
|
|
449
|
-
new Float32Array(modelBytes, offset, featuresCount + LS_SVM_ADD_CONST));
|
|
443
|
+
new Float32Array(modelBytes, offset, featuresCount + LS_SVM_ADD_CONST));
|
|
450
444
|
offset += (featuresCount + LS_SVM_ADD_CONST) * BYTES;
|
|
451
445
|
|
|
452
446
|
// extract training data columns
|
|
453
|
-
|
|
447
|
+
const dataCols = [];
|
|
454
448
|
|
|
455
449
|
for (let i = 0; i < samplesCount; i++) {
|
|
456
450
|
dataCols.push( DG.Column.fromFloat32Array( i.toString(),
|
|
@@ -458,28 +452,27 @@ function getUnpackedModel(packedModel: any): any {
|
|
|
458
452
|
offset += featuresCount * BYTES;
|
|
459
453
|
}
|
|
460
454
|
|
|
461
|
-
const normalizedTrainData = DG.DataFrame.fromColumns(dataCols);
|
|
455
|
+
const normalizedTrainData = DG.DataFrame.fromColumns(dataCols);
|
|
462
456
|
|
|
463
|
-
|
|
457
|
+
const model = {kernelType: header[MODEL_KERNEL_INDEX],
|
|
464
458
|
kernelParams: kernelParams,
|
|
465
459
|
trainLabels: trainLabels,
|
|
466
460
|
means: means,
|
|
467
461
|
stdDevs: stdDevs,
|
|
468
462
|
modelParams: modelParams,
|
|
469
463
|
modelWeights: modelWeights,
|
|
470
|
-
normalizedTrainData: normalizedTrainData
|
|
464
|
+
normalizedTrainData: normalizedTrainData,
|
|
471
465
|
};
|
|
472
466
|
|
|
473
|
-
return model;
|
|
467
|
+
return model;
|
|
474
468
|
} // getUnpackedModel
|
|
475
469
|
|
|
476
470
|
// Wrapper for combining the function "predict" with Datagrok predicitve tools
|
|
477
|
-
export async function getPrediction(df: DG.DataFrame, packedModel: any): Promise<DG.DataFrame> {
|
|
478
|
-
|
|
479
|
-
let model = getUnpackedModel(new Uint8Array(packedModel));
|
|
471
|
+
export async function getPrediction(df: DG.DataFrame, packedModel: any): Promise<DG.DataFrame> {
|
|
472
|
+
const model = getUnpackedModel(new Uint8Array(packedModel));
|
|
480
473
|
|
|
481
|
-
|
|
482
|
-
res.name = PREDICTION;
|
|
474
|
+
const res = await predict(model, df.columns);
|
|
475
|
+
res.name = PREDICTION;
|
|
483
476
|
|
|
484
477
|
return DG.DataFrame.fromColumns([res]);
|
|
485
478
|
} // getPrediction
|
package/src/utils.ts
CHANGED
|
@@ -30,7 +30,7 @@ const INCORRECT_LEARNING_RATE_MES = 'learning rate must be positive.';
|
|
|
30
30
|
const INCORRECT_PERPLEXITY_MES = 'perplexity must be at least 2 and not greater than samples count.';
|
|
31
31
|
const INCORRECT_STEPS_MES = 'steps must be non-negative.';
|
|
32
32
|
const INCORRECT_CYCLES_MES = 'cycles must be positive.';
|
|
33
|
-
const INCORRECT_CUTOFF_MES = 'cutoff must be non-negative.'
|
|
33
|
+
const INCORRECT_CUTOFF_MES = 'cutoff must be non-negative.';
|
|
34
34
|
|
|
35
35
|
// Check column type
|
|
36
36
|
export function checkColumnType(col: DG.Column): void {
|
|
@@ -52,8 +52,7 @@ export function checkDimensionReducerInputs(features: DG.ColumnList, components:
|
|
|
52
52
|
|
|
53
53
|
// Check UMAP inputs
|
|
54
54
|
export function checkUMAPinputs(features: DG.ColumnList, components: number, epochs: number,
|
|
55
|
-
neighbors: number, minDist: number, spread: number): void
|
|
56
|
-
{
|
|
55
|
+
neighbors: number, minDist: number, spread: number): void {
|
|
57
56
|
// General dim reducer checks
|
|
58
57
|
checkDimensionReducerInputs(features, components);
|
|
59
58
|
|
|
@@ -73,13 +72,12 @@ export function checkUMAPinputs(features: DG.ColumnList, components: number, epo
|
|
|
73
72
|
throw new Error(INCORRECT_EPOCH_MES);
|
|
74
73
|
|
|
75
74
|
if ((neighbors < 2) || (neighbors > features.byIndex(0).length))
|
|
76
|
-
throw new Error(INCORRECT_NEIBORS_MES);
|
|
75
|
+
throw new Error(INCORRECT_NEIBORS_MES);
|
|
77
76
|
}
|
|
78
77
|
|
|
79
78
|
// Check t-SNE inputs
|
|
80
|
-
export function checkTSNEinputs(features: DG.ColumnList, components: number,
|
|
81
|
-
learningRate: number, perplexity: number, iterations: number): void
|
|
82
|
-
{
|
|
79
|
+
export function checkTSNEinputs(features: DG.ColumnList, components: number,
|
|
80
|
+
learningRate: number, perplexity: number, iterations: number): void {
|
|
83
81
|
// General dim reducer checks
|
|
84
82
|
checkDimensionReducerInputs(features, components);
|
|
85
83
|
|
|
@@ -96,13 +94,12 @@ export function checkTSNEinputs(features: DG.ColumnList, components: number,
|
|
|
96
94
|
throw new Error(INCORRECT_ITERATIONS_MES);
|
|
97
95
|
|
|
98
96
|
if ((perplexity < 2) || (perplexity > features.byIndex(0).length))
|
|
99
|
-
throw new Error(INCORRECT_PERPLEXITY_MES);
|
|
97
|
+
throw new Error(INCORRECT_PERPLEXITY_MES);
|
|
100
98
|
}
|
|
101
99
|
|
|
102
100
|
// Check SPE inputs
|
|
103
101
|
export function checkSPEinputs(features: DG.ColumnList, dimension: number,
|
|
104
|
-
steps: number, cycles: number, cutoff: number, lambda: number): void
|
|
105
|
-
{
|
|
102
|
+
steps: number, cycles: number, cutoff: number, lambda: number): void {
|
|
106
103
|
// General dim reducer checks
|
|
107
104
|
checkDimensionReducerInputs(features, dimension);
|
|
108
105
|
|
|
@@ -136,16 +133,15 @@ export function checkWasmDimensionReducerInputs(features: DG.ColumnList, compone
|
|
|
136
133
|
}
|
|
137
134
|
|
|
138
135
|
// Check inputs of data for SVM testing generator
|
|
139
|
-
export function checkGeneratorSVMinputs(samplesCount: number, featuresCount: number,
|
|
140
|
-
min: number, max: number, violatorsPercentage: number): void
|
|
141
|
-
{
|
|
136
|
+
export function checkGeneratorSVMinputs(samplesCount: number, featuresCount: number,
|
|
137
|
+
min: number, max: number, violatorsPercentage: number): void {
|
|
142
138
|
if (min >= max)
|
|
143
139
|
throw new Error(INCORERRECT_MIN_MAX_MES);
|
|
144
|
-
|
|
145
|
-
if (featuresCount < FEATURES_COUNT_MIN)
|
|
140
|
+
|
|
141
|
+
if (featuresCount < FEATURES_COUNT_MIN)
|
|
146
142
|
throw new Error(INCORERRECT_FEATURES_MES);
|
|
147
143
|
|
|
148
|
-
if (samplesCount < SAMPLES_COUNT_MIN)
|
|
144
|
+
if (samplesCount < SAMPLES_COUNT_MIN)
|
|
149
145
|
throw new Error(INCORERRECT_SAMPLES_MES);
|
|
150
146
|
|
|
151
147
|
if ((violatorsPercentage < PERCENTAGE_MIN) || (violatorsPercentage > PERCENTAGE_MAX))
|
|
@@ -156,7 +152,7 @@ export function checkGeneratorSVMinputs(samplesCount: number, featuresCount: num
|
|
|
156
152
|
export function getRowsOfNumericalColumnns(columnList: DG.ColumnList): any[][] {
|
|
157
153
|
const columns = columnList.toList();
|
|
158
154
|
const rowCount = columns[0].length;
|
|
159
|
-
const colCount = columns.length;
|
|
155
|
+
const colCount = columns.length;
|
|
160
156
|
|
|
161
157
|
const output = [] as any[][];
|
|
162
158
|
|
|
@@ -2,19 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import {TSNE} from '@keckelt/tsne';
|
|
4
4
|
|
|
5
|
-
onmessage = async function
|
|
5
|
+
onmessage = async function(evt) {
|
|
6
6
|
const tsne = new TSNE({
|
|
7
7
|
epsilon: evt.data.options.learningRate,
|
|
8
8
|
perplexity: evt.data.options.perplexity,
|
|
9
|
-
dim: evt.data.options.components
|
|
9
|
+
dim: evt.data.options.components,
|
|
10
10
|
});
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
tsne.initDataRaw(evt.data.data);
|
|
13
13
|
|
|
14
14
|
const iterCount = evt.data.options.iterations;
|
|
15
|
-
|
|
16
|
-
for(let i = 0; i < iterCount; ++i)
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < iterCount; ++i)
|
|
17
17
|
tsne.step();
|
|
18
18
|
|
|
19
19
|
postMessage({'embeddings': tsne.getSolution()});
|
|
20
|
-
}
|
|
20
|
+
};
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
// Worker for the method UMAP
|
|
2
2
|
|
|
3
|
-
import {
|
|
3
|
+
import {UMAP} from 'umap-js';
|
|
4
4
|
|
|
5
|
-
onmessage = async function
|
|
5
|
+
onmessage = async function(evt) {
|
|
6
6
|
const umap = new UMAP(evt.data.options);
|
|
7
7
|
const embeddings = umap.fit(evt.data.data);
|
|
8
8
|
postMessage({'embeddings': embeddings});
|
|
9
|
-
}
|
|
9
|
+
};
|