@datagrok/eda 1.1.31 → 1.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,94 +1,95 @@
1
1
  {
2
- "name": "@datagrok/eda",
3
- "friendlyName": "EDA",
4
- "version": "1.1.31",
5
- "description": "Exploratory Data Analysis Tools",
6
- "dependencies": {
7
- "@datagrok-libraries/math": "^1.1.11",
8
- "@datagrok-libraries/ml": "^6.6.15",
9
- "@datagrok-libraries/tutorials": "^1.3.6",
10
- "@datagrok-libraries/utils": "^4.2.20",
11
- "@keckelt/tsne": "^1.0.2",
12
- "@webgpu/types": "^0.1.40",
13
- "cash-dom": "^8.1.1",
14
- "datagrok-api": "^1.20.0",
15
- "dayjs": "^1.11.9",
16
- "jstat": "^1.9.6",
17
- "source-map-loader": "^4.0.1",
18
- "umap-js": "^1.3.3",
19
- "worker-loader": "latest"
20
- },
21
- "author": {
22
- "name": "Viktor Makarichev",
23
- "email": "vmakarichev@datagrok.ai"
24
- },
25
- "devDependencies": {
26
- "@typescript-eslint/eslint-plugin": "^5.32.0",
27
- "@typescript-eslint/parser": "^5.32.0",
28
- "css-loader": "latest",
29
- "eslint": "^8.21.0",
30
- "eslint-config-google": "^0.14.0",
31
- "style-loader": "latest",
32
- "ts-loader": "latest",
33
- "typescript": "latest",
34
- "webpack": "latest",
35
- "webpack-cli": "latest"
36
- },
37
- "scripts": {
38
- "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
39
- "debug-eda": "webpack && grok publish",
40
- "release-eda": "webpack && grok publish --release",
41
- "build-eda": "webpack",
42
- "build": "webpack",
43
- "debug-eda-dev": "webpack && grok publish dev",
44
- "release-eda-dev": "webpack && grok publish dev --release",
45
- "debug-eda-local": "webpack && grok publish local",
46
- "release-eda-local": "webpack && grok publish local --release",
47
- "build-all": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/tutorials run build && npm run build"
48
- },
49
- "canEdit": [
50
- "Developers"
51
- ],
52
- "canView": [
53
- "All users"
54
- ],
55
- "repository": {
56
- "type": "git",
57
- "url": "https://github.com/datagrok-ai/public.git",
58
- "directory": "packages/EDA"
59
- },
60
- "category": "Machine Learning",
61
- "sources": [
62
- "wasm/EDA.js"
63
- ],
64
- "meta": {
65
- "menu": {
66
- "ML": {
67
- "Tools": {
68
- "Impute Missing Values...": null,
69
- "Random Data...": null
70
- },
71
- "Cluster": {
72
- "Cluster...": null,
73
- "DBSCAN...": null
74
- },
75
- "Notebooks": {
76
- "Browse Notebooks": null,
77
- "Open in Notebook": null,
78
- "New Notebook": null
79
- },
80
- "Models": {
81
- "Browse Models": null,
82
- "Train Model...": null,
83
- "Apply Model...": null
84
- },
85
- "Analyse": {
86
- "PCA...": null,
87
- "ANOVA...": null,
88
- "Multivariate Analysis...": null
89
- },
90
- "Reduce Dimensionality": null
91
- }
92
- }
2
+ "name": "@datagrok/eda",
3
+ "friendlyName": "EDA",
4
+ "version": "1.1.32",
5
+ "description": "Exploratory Data Analysis Tools",
6
+ "dependencies": {
7
+ "@datagrok-libraries/math": "^1.1.11",
8
+ "@datagrok-libraries/ml": "^6.6.15",
9
+ "@datagrok-libraries/tutorials": "^1.3.13",
10
+ "@datagrok-libraries/utils": "^4.2.20",
11
+ "@keckelt/tsne": "^1.0.2",
12
+ "@webgpu/types": "^0.1.40",
13
+ "cash-dom": "^8.1.1",
14
+ "datagrok-api": "^1.20.1",
15
+ "dayjs": "^1.11.9",
16
+ "jstat": "^1.9.6",
17
+ "source-map-loader": "^4.0.1",
18
+ "umap-js": "^1.3.3",
19
+ "worker-loader": "latest"
20
+ },
21
+ "author": {
22
+ "name": "Viktor Makarichev",
23
+ "email": "vmakarichev@datagrok.ai"
24
+ },
25
+ "devDependencies": {
26
+ "@typescript-eslint/eslint-plugin": "^5.32.0",
27
+ "@typescript-eslint/parser": "^5.32.0",
28
+ "css-loader": "latest",
29
+ "eslint": "^8.21.0",
30
+ "eslint-config-google": "^0.14.0",
31
+ "style-loader": "latest",
32
+ "ts-loader": "latest",
33
+ "typescript": "latest",
34
+ "webpack": "latest",
35
+ "webpack-cli": "latest"
36
+ },
37
+ "scripts": {
38
+ "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
39
+ "debug-eda": "webpack && grok publish",
40
+ "release-eda": "webpack && grok publish --release",
41
+ "build-eda": "webpack",
42
+ "build": "webpack",
43
+ "debug-eda-dev": "webpack && grok publish dev",
44
+ "release-eda-dev": "webpack && grok publish dev --release",
45
+ "debug-eda-local": "webpack && grok publish local",
46
+ "release-eda-local": "webpack && grok publish local --release",
47
+ "build-all": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/tutorials run build && npm run build"
48
+ },
49
+ "canEdit": [
50
+ "Developers"
51
+ ],
52
+ "canView": [
53
+ "All users"
54
+ ],
55
+ "repository": {
56
+ "type": "git",
57
+ "url": "https://github.com/datagrok-ai/public.git",
58
+ "directory": "packages/EDA"
59
+ },
60
+ "category": "Machine Learning",
61
+ "sources": [
62
+ "wasm/EDA.js",
63
+ "wasm/XGBoostAPI.js"
64
+ ],
65
+ "meta": {
66
+ "menu": {
67
+ "ML": {
68
+ "Tools": {
69
+ "Impute Missing Values...": null,
70
+ "Random Data...": null
71
+ },
72
+ "Cluster": {
73
+ "Cluster...": null,
74
+ "DBSCAN...": null
75
+ },
76
+ "Notebooks": {
77
+ "Browse Notebooks": null,
78
+ "Open in Notebook": null,
79
+ "New Notebook": null
80
+ },
81
+ "Models": {
82
+ "Browse Models": null,
83
+ "Train Model...": null,
84
+ "Apply Model...": null
85
+ },
86
+ "Analyse": {
87
+ "PCA...": null,
88
+ "ANOVA...": null,
89
+ "Multivariate Analysis...": null
90
+ },
91
+ "Reduce Dimensionality": null
92
+ }
93
93
  }
94
+ }
94
95
  }
@@ -1,6 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
3
3
  import './tests/dim-reduction-tests';
4
+ import './tests/linear-methods-tests';
5
+ import './tests/classifiers-tests';
4
6
  export const _package = new DG.Package();
5
7
  export {tests};
6
8
 
package/src/package.ts CHANGED
@@ -34,6 +34,9 @@ import {getLinearRegressionParams, getPredictionByLinearRegression} from './regr
34
34
  import {PlsModel} from './pls/pls-ml';
35
35
  import {SoftmaxClassifier} from './softmax-classifier';
36
36
 
37
+ import {initXgboost} from '../wasm/xgbooster';
38
+ import {XGBooster} from './xgbooster';
39
+
37
40
  export const _package = new DG.Package();
38
41
 
39
42
  //name: info
@@ -44,6 +47,7 @@ export function info() {
44
47
  //tags: init
45
48
  export async function init(): Promise<void> {
46
49
  await _initEDAAPI();
50
+ await initXgboost();
47
51
  }
48
52
 
49
53
  //top-menu: ML | Cluster | DBSCAN...
@@ -258,9 +262,12 @@ export async function MCLInitializationFunction(sc: DG.ScatterPlotViewer) {
258
262
  const options: MCLSerializableOptions = JSON.parse(mclTag);
259
263
  const cols = options.cols.map((colName) => df.columns.byName(colName));
260
264
  const preprocessingFuncs = options.preprocessingFuncs.map((funcName) => funcName ? DG.Func.byName(funcName) : null);
265
+ // let presetMatrix = null;
266
+ // if (df.temp['sparseMatrix'])
267
+ // presetMatrix = df.temp['sparseMatrix'];
261
268
  const res = await markovCluster(df, cols, options.metrics, options.weights,
262
269
  options.aggregationMethod, preprocessingFuncs, options.preprocessingFuncArgs, options.threshold,
263
- options.maxIterations, options.useWebGPU, options.inflate, options.minClusterSize, sc);
270
+ options.maxIterations, options.useWebGPU, options.inflate, options.minClusterSize, sc /**presetMatrix */);
264
271
  return res?.sc;
265
272
  }
266
273
 
@@ -300,7 +307,7 @@ export async function MVA(): Promise<void> {
300
307
  //description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
301
308
  //meta.demoPath: Compute | Multivariate analysis
302
309
  export async function demoMultivariateAnalysis(): Promise<any> {
303
- runDemoMVA();
310
+ await runDemoMVA();
304
311
  }
305
312
 
306
313
  //name: trainLinearKernelSVM
@@ -737,3 +744,55 @@ export async function visualizePLSRegression(df: DG.DataFrame, targetColumn: DG.
737
744
  export function isInteractivePLSRegression(df: DG.DataFrame, predictColumn: DG.Column): boolean {
738
745
  return PlsModel.isInteractive(df.columns, predictColumn);
739
746
  }
747
+
748
+ //name: trainXGBooster
749
+ //meta.mlname: XGBoost
750
+ //meta.mlrole: train
751
+ //input: dataframe df
752
+ //input: column predictColumn
753
+ //input: int iterations = 20 {min: 1; max: 100} [Number of training iterations]
754
+ //input: double eta = 0.3 {caption: Rate; min: 0; max: 1} [Learning rate]
755
+ //input: int maxDepth = 6 {min: 0; max: 20} [Maximum depth of a tree]
756
+ //input: double lambda = 1 {min: 0; max: 100} [L2 regularization term]
757
+ //input: double alpha = 0 {min: 0; max: 100} [L1 regularization term]
758
+ //output: dynamic model
759
+ export async function trainXGBooster(df: DG.DataFrame, predictColumn: DG.Column,
760
+ iterations: number, eta: number, maxDepth: number, lambda: number, alpha: number): Promise<Uint8Array> {
761
+ const features = df.columns;
762
+
763
+ const booster = new XGBooster();
764
+ await booster.fit(features, predictColumn, iterations, eta, maxDepth, lambda, alpha);
765
+
766
+ return booster.toBytes();
767
+ }
768
+
769
+ //name: applyXGBooster
770
+ //meta.mlname: XGBoost
771
+ //meta.mlrole: apply
772
+ //input: dataframe df
773
+ //input: dynamic model
774
+ //output: dataframe table
775
+ export function applyXGBooster(df: DG.DataFrame, model: any): DG.DataFrame {
776
+ const unpackedModel = new XGBooster(model);
777
+ return DG.DataFrame.fromColumns([unpackedModel.predict(df.columns)]);
778
+ }
779
+
780
+ //name: isInteractiveXGBooster
781
+ //meta.mlname: XGBoost
782
+ //meta.mlrole: isInteractive
783
+ //input: dataframe df
784
+ //input: column predictColumn
785
+ //output: bool result
786
+ export function isInteractiveXGBooster(df: DG.DataFrame, predictColumn: DG.Column): boolean {
787
+ return XGBooster.isInteractive(df.columns, predictColumn);
788
+ }
789
+
790
+ //name: isApplicableXGBooster
791
+ //meta.mlname: XGBoost
792
+ //meta.mlrole: isApplicable
793
+ //input: dataframe df
794
+ //input: column predictColumn
795
+ //output: bool result
796
+ export function isApplicableXGBooster(df: DG.DataFrame, predictColumn: DG.Column): boolean {
797
+ return XGBooster.isApplicable(df.columns, predictColumn);
798
+ }
@@ -35,6 +35,7 @@ export enum TITLE {
35
35
  EXPL_VAR = 'Explained Variance',
36
36
  EXPLORE = 'Explore',
37
37
  FEATURES = 'Feature names',
38
+ BROWSE = 'Browse',
38
39
  }
39
40
 
40
41
  /** Tooltips */
@@ -115,11 +116,26 @@ The method finds the latent factors that
115
116
 
116
117
  /** Description of demo results: wizard components */
117
118
  export const DEMO_RESULTS = [
118
- {caption: TITLE.MODEL, text: 'Closer to the line means better price prediction.'},
119
- {caption: TITLE.SCORES, text: 'The latent factor values for each data sample reflect the similarities and dissimilarities among observations.'},
120
- {caption: TITLE.LOADINGS, text: 'The impact of each feature on the latent factors: higher loading means stronger influence.'},
121
- {caption: TITLE.REGR_COEFS, text: 'Parameters of the obtained linear model: features make different contribution to the prediction.'},
122
- {caption: TITLE.EXPL_VAR, text: 'How well the latent components fit source data: closer to one means better fit.'},
119
+ {
120
+ caption: TITLE.MODEL,
121
+ text: 'Closer to the line means better price prediction.',
122
+ },
123
+ {
124
+ caption: TITLE.SCORES,
125
+ text: 'The latent factor values for each sample reflect the similarities and dissimilarities among observations.',
126
+ },
127
+ {
128
+ caption: TITLE.LOADINGS,
129
+ text: 'The impact of each feature on the latent factors: higher loading means stronger influence.',
130
+ },
131
+ {
132
+ caption: TITLE.REGR_COEFS,
133
+ text: 'Parameters of the obtained linear model: features make different contribution to the prediction.',
134
+ },
135
+ {
136
+ caption: TITLE.EXPL_VAR,
137
+ text: 'How well the latent components fit source data: closer to one means better fit.',
138
+ },
123
139
  ];
124
140
 
125
141
  /** Form results markdown for demo app */
@@ -110,7 +110,11 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
110
110
  if (analysisType === PLS_ANALYSIS.COMPUTE_COMPONENTS)
111
111
  return;
112
112
 
113
- const view = grok.shell.tableView(input.table.name);
113
+ //const view = grok.shell.tableView(input.table.name);
114
+
115
+ const view = (analysisType === PLS_ANALYSIS.DEMO) ?
116
+ (grok.shell.view(TITLE.BROWSE) as DG.BrowseView).preview as DG.TableView :
117
+ grok.shell.tableView(input.table.name);
114
118
 
115
119
  // 0.1 Buffer table
116
120
  const buffer = DG.DataFrame.fromColumns([
@@ -248,7 +252,9 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
248
252
 
249
253
  /** Run multivariate analysis (PLS) */
250
254
  export async function runMVA(analysisType: PLS_ANALYSIS): Promise<void> {
251
- const table = grok.shell.t;
255
+ const table = (analysisType === PLS_ANALYSIS.DEMO) ?
256
+ ((grok.shell.view(TITLE.BROWSE) as DG.BrowseView).preview as DG.TableView).table :
257
+ grok.shell.t;
252
258
 
253
259
  if (table === null) {
254
260
  grok.shell.warning(ERROR_MSG.NO_DF);
@@ -0,0 +1,114 @@
1
+ // Tests for classifiers
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+ import {_package} from '../package-test';
7
+
8
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
9
+
10
+ import {classificationDataset, accuracy} from './utils';
11
+ import {SoftmaxClassifier} from '../softmax-classifier';
12
+ import {XGBooster} from '../xgbooster';
13
+
14
+ const ROWS_K = 50;
15
+ const MIN_COLS = 2;
16
+ const COLS = 100;
17
+ const TIMEOUT = 8000;
18
+ const MIN_ACCURACY = 0.9;
19
+
20
+ category('Softmax', () => {
21
+ test(`Performance: ${ROWS_K}K samples, ${COLS} features`, async () => {
22
+ // Data
23
+ const df = classificationDataset(ROWS_K * 1000, COLS, false);
24
+ const features = df.columns;
25
+ const target = features.byIndex(COLS);
26
+ features.remove(target.name);
27
+
28
+ // Fit & pack trained model
29
+ const model = new SoftmaxClassifier({
30
+ classesCount: target.categories.length,
31
+ featuresCount: features.length,
32
+ });
33
+ await model.fit(features, target);
34
+ const modelBytes = model.toBytes();
35
+
36
+ // Unpack & apply model
37
+ const unpackedModel = new SoftmaxClassifier(undefined, modelBytes);
38
+ unpackedModel.predict(features);
39
+ }, {timeout: TIMEOUT, benchmark: true});
40
+
41
+ test('Correctness', async () => {
42
+ // Prepare data
43
+ const df = classificationDataset(ROWS_K, MIN_COLS, true);
44
+ const features = df.columns;
45
+ const target = features.byIndex(MIN_COLS);
46
+ features.remove(target.name);
47
+
48
+ // Fit & pack trained model
49
+ const model = new SoftmaxClassifier({
50
+ classesCount: target.categories.length,
51
+ featuresCount: features.length,
52
+ });
53
+
54
+ await model.fit(features, target);
55
+ const modelBytes = model.toBytes();
56
+
57
+ // Unpack & apply model
58
+ const unpackedModel = new SoftmaxClassifier(undefined, modelBytes);
59
+ const prediction = unpackedModel.predict(features);
60
+
61
+ // Evaluate accuracy
62
+ const acc = accuracy(target, prediction);
63
+ expect(
64
+ acc > MIN_ACCURACY,
65
+ true,
66
+ `Softmax failed, too small accuracy: ${acc}; expected: <= ${MIN_ACCURACY}`,
67
+ );
68
+ }, {timeout: TIMEOUT});
69
+ }); // Softmax
70
+
71
+ category('XGBoost', () => {
72
+ test(`Performance: ${ROWS_K}K samples, ${COLS} features`, async () => {
73
+ // Data
74
+ const df = classificationDataset(ROWS_K * 1000, COLS, false);
75
+ const features = df.columns;
76
+ const target = features.byIndex(COLS);
77
+ features.remove(target.name);
78
+
79
+ // Fit & pack trained model
80
+ const model = new XGBooster();
81
+ await model.fit(features, target);
82
+ const modelBytes = model.toBytes();
83
+
84
+ // Unpack & apply model
85
+ const unpackedModel = new XGBooster(modelBytes);
86
+ unpackedModel.predict(features);
87
+ }, {timeout: TIMEOUT, benchmark: true});
88
+
89
+ test('Correctness', async () => {
90
+ // Prepare data
91
+ const df = classificationDataset(ROWS_K, MIN_COLS, true);
92
+ const features = df.columns;
93
+ const target = features.byIndex(MIN_COLS);
94
+ features.remove(target.name);
95
+
96
+ // Fit & pack trained model
97
+ const model = new XGBooster();
98
+
99
+ await model.fit(features, target);
100
+ const modelBytes = model.toBytes();
101
+
102
+ // Unpack & apply model
103
+ const unpackedModel = new XGBooster(modelBytes);
104
+ const prediction = unpackedModel.predict(features);
105
+
106
+ // Evaluate accuracy
107
+ const acc = accuracy(target, prediction);
108
+ expect(
109
+ acc > MIN_ACCURACY,
110
+ true,
111
+ `XGBoost failed, too small accuracy: ${acc}; expected: <= ${MIN_ACCURACY}`,
112
+ );
113
+ }, {timeout: TIMEOUT});
114
+ }); // XGBoost
@@ -0,0 +1,150 @@
1
+ // Tests for PCA, PLS & linear regression
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+ import {_package} from '../package-test';
7
+
8
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
9
+ import {computePCA} from '../eda-tools';
10
+ import {getPlsAnalysis} from '../pls/pls-tools';
11
+ import {PlsModel} from '../pls/pls-ml';
12
+ import {getLinearRegressionParams, getPredictionByLinearRegression} from '../regression';
13
+ import {regressionDataset, madNorm, madError} from './utils';
14
+
15
+ const ROWS = 100;
16
+ const ROWS_K = 100;
17
+ const COLS = 100;
18
+ const COMPONENTS = 3;
19
+ const TIMEOUT = 4000;
20
+ const INDEP_COLS = 2;
21
+ const DEP_COLS = 5;
22
+ const ERROR = 0.1;
23
+
24
+ category('Principal component analysis', () => {
25
+ test(`Performance: ${ROWS_K}K rows, ${COLS} cols, ${COMPONENTS} components`, async () => {
26
+ const df = grok.data.demo.randomWalk(ROWS_K * 1000, COLS);
27
+ await computePCA(df, df.columns, COMPONENTS, false, false);
28
+ }, {timeout: TIMEOUT, benchmark: true});
29
+
30
+ test('Correctness', async () => {
31
+ // Data
32
+ const df = regressionDataset(ROWS, COMPONENTS, DEP_COLS);
33
+
34
+ // Apply
35
+ const pca = await computePCA(df, df.columns, COMPONENTS + 1, false, false);
36
+
37
+ // Check
38
+ const lastPca = pca.columns.byIndex(COMPONENTS);
39
+ const norm = madNorm(lastPca);
40
+
41
+ // the last PCA component must be small due to df construction
42
+ expect((norm < ERROR), true, 'Incorrect PCA computations');
43
+ }, {timeout: TIMEOUT});
44
+ }); // PCA
45
+
46
+ category('Partial least squares regression', () => {
47
+ test(`Performance: ${ROWS_K}K rows, ${COLS} cols, ${COMPONENTS} components`, async () => {
48
+ // Data
49
+ const df = grok.data.demo.randomWalk(ROWS_K * 1000, COLS);
50
+ const cols = df.columns;
51
+
52
+ // Apply
53
+ await getPlsAnalysis({
54
+ table: df,
55
+ features: cols,
56
+ predict: cols.byIndex(COLS - 1),
57
+ components: COMPONENTS,
58
+ names: undefined,
59
+ });
60
+ }, {timeout: TIMEOUT, benchmark: true});
61
+
62
+ test('Correctness', async () => {
63
+ // Data
64
+ const df = regressionDataset(ROWS_K, COMPONENTS, DEP_COLS);
65
+ const cols = df.columns;
66
+ const target = cols.byIndex(COMPONENTS + DEP_COLS - 1);
67
+
68
+ // Apply
69
+ const plsRes = await getPlsAnalysis({
70
+ table: df,
71
+ features: cols,
72
+ predict: target,
73
+ components: COMPONENTS,
74
+ names: undefined,
75
+ });
76
+
77
+ // Check deviation
78
+ const deviation = madError(target, plsRes.prediction);
79
+ expect(
80
+ (deviation < ERROR),
81
+ true,
82
+ `Incorrect PLS computations, error is too big: ${deviation}; expected: < ${ERROR}`,
83
+ );
84
+ }, {timeout: TIMEOUT});
85
+
86
+ test(`Predictive modeling: ${ROWS_K}K samples, ${COLS} features, ${COMPONENTS} components`, async () => {
87
+ // Prepare data
88
+ const df = regressionDataset(ROWS_K * 1000, COMPONENTS, COLS - COMPONENTS + 1);
89
+ const features = df.columns;
90
+ const target = features.byIndex(COLS);
91
+ features.remove(target.name);
92
+
93
+ // Train & pack model
94
+ const model = new PlsModel();
95
+ await model.fit(features, target, COMPONENTS);
96
+ const packed = model.toBytes();
97
+
98
+ // Unpack model & predict
99
+ const unpackedModel = new PlsModel(packed);
100
+ const prediction = unpackedModel.predict(features);
101
+
102
+ // Check deviation
103
+ const deviation = madError(target, prediction);
104
+ expect(
105
+ (deviation < ERROR),
106
+ true,
107
+ `Incorrect PLS (ML) computations, error is too big: ${deviation}; expected: < ${ERROR}`,
108
+ );
109
+ }, {timeout: TIMEOUT, benchmark: true});
110
+ }); // PLS
111
+
112
+ category('Linear regression', () => {
113
+ test(`Performance: ${ROWS_K}K samples, ${COLS} features`, async () => {
114
+ // Prepare data
115
+ const df = regressionDataset(ROWS_K * 1000, COLS, 1);
116
+ const features = df.columns;
117
+ const target = features.byIndex(COLS);
118
+
119
+ // Train & pack model
120
+ const params = await getLinearRegressionParams(features, target);
121
+ const packed = new Uint8Array(params.buffer);
122
+
123
+ // Unpack & apply model
124
+ const unpackedParams = new Float32Array(packed.buffer);
125
+ getPredictionByLinearRegression(features, unpackedParams);
126
+ }, {timeout: TIMEOUT, benchmark: true});
127
+
128
+ test('Correctness', async () => {
129
+ // Prepare data
130
+ const df = regressionDataset(ROWS, INDEP_COLS, 1);
131
+ const features = df.columns;
132
+ const target = features.byIndex(INDEP_COLS);
133
+
134
+ // Train & pack model
135
+ const params = await getLinearRegressionParams(features, target);
136
+ const packed = new Uint8Array(params.buffer);
137
+
138
+ // Unpack & apply model
139
+ const unpackedParams = new Float32Array(packed.buffer);
140
+ const prediction = getPredictionByLinearRegression(features, unpackedParams);
141
+
142
+ // Evaluate model
143
+ const error = madError(prediction, prediction);
144
+ expect(
145
+ error < ERROR,
146
+ true,
147
+ `Incorrect linear regression computations, error is too big: ${error}; expected: < ${ERROR}`,
148
+ );
149
+ }, {timeout: TIMEOUT});
150
+ }); // Linear regression