@datagrok/eda 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE.txt +202 -0
- package/README.md +13 -1
- package/dist/221.js +2 -0
- package/dist/694.js +2 -0
- package/dist/729.js +2 -0
- package/dist/80.js +2 -0
- package/dist/package-test.js +2 -2
- package/dist/package.js +2 -2
- package/package.json +13 -9
- package/src/eda-tools.ts +185 -0
- package/src/{EDAui.ts → eda-ui.ts} +14 -1
- package/src/package.ts +77 -10
- package/src/stat-tools.ts +266 -0
- package/src/utils.ts +130 -3
- package/src/workers/tsne-worker.ts +20 -0
- package/src/workers/umap-worker.ts +9 -0
- package/src/EDAtools.ts +0 -46
- /package/src/{dataGenerators.ts → data-generators.ts} +0 -0
package/src/utils.ts
CHANGED
|
@@ -14,21 +14,123 @@ const MAX_ELEMENTS_COUNT = 100000000;
|
|
|
14
14
|
|
|
15
15
|
// Error messages
|
|
16
16
|
const COMP_POSITVE_MES = 'components must be positive.';
|
|
17
|
-
const COMP_EXCESS = 'components must not be greater than
|
|
17
|
+
const COMP_EXCESS = 'components must not be greater than features count.';
|
|
18
18
|
const INCORERRECT_MIN_MAX_MES = 'min must be less than max.';
|
|
19
19
|
const INCORERRECT_FEATURES_MES = 'features must be positive.';
|
|
20
20
|
const INCORERRECT_SAMPLES_MES = 'samples must be positive.';
|
|
21
21
|
const INCORERRECT_PERCENTAGE_MES = 'violators percentage must be from the range from 0 to 100.';
|
|
22
22
|
const DATAFRAME_IS_TOO_BIG_MES = 'dataframe is too big.';
|
|
23
|
+
const UNSUPPORTED_COLUMN_TYPE_MES = 'unsupported column type: ';
|
|
24
|
+
const INCORRECT_MIN_DIST_MES = 'min distance must be positive.';
|
|
25
|
+
const INCORRECT_SPREAD_MES = 'spread must be positive.';
|
|
26
|
+
const INCORRECT_EPOCH_MES = 'number of epoch must be at least 1.';
|
|
27
|
+
const INCORRECT_NEIBORS_MES = 'number of neibors must be at least 2 and not greater than samples count.';
|
|
28
|
+
const INCORRECT_ITERATIONS_MES = 'number of iterations must be at least 1.';
|
|
29
|
+
const INCORRECT_LEARNING_RATE_MES = 'learning rate must be positive.';
|
|
30
|
+
const INCORRECT_PERPLEXITY_MES = 'perplexity must be at least 2 and not greater than samples count.';
|
|
31
|
+
const INCORRECT_STEPS_MES = 'steps must be non-negative.';
|
|
32
|
+
const INCORRECT_CYCLES_MES = 'cycles must be positive.';
|
|
33
|
+
const INCORRECT_CUTOFF_MES = 'cutoff must be non-negative.'
|
|
23
34
|
|
|
24
|
-
// Check
|
|
25
|
-
export function
|
|
35
|
+
// Check column type
|
|
36
|
+
export function checkColumnType(col: DG.Column): void {
|
|
37
|
+
if ((col.type != DG.COLUMN_TYPE.FLOAT) && (col.type != DG.COLUMN_TYPE.INT))
|
|
38
|
+
throw new Error(UNSUPPORTED_COLUMN_TYPE_MES + col.type);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Check dimension reducer inputs
|
|
42
|
+
export function checkDimensionReducerInputs(features: DG.ColumnList, components: number): void {
|
|
26
43
|
if (components < COMP_MIN)
|
|
27
44
|
throw new Error(COMP_POSITVE_MES);
|
|
28
45
|
|
|
29
46
|
if (components > features.length)
|
|
30
47
|
throw new Error(COMP_EXCESS);
|
|
31
48
|
|
|
49
|
+
for (const col of features)
|
|
50
|
+
checkColumnType(col);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Check UMAP inputs
|
|
54
|
+
export function checkUMAPinputs(features: DG.ColumnList, components: number, epochs: number,
|
|
55
|
+
neighbors: number, minDist: number, spread: number): void
|
|
56
|
+
{
|
|
57
|
+
// General dim reducer checks
|
|
58
|
+
checkDimensionReducerInputs(features, components);
|
|
59
|
+
|
|
60
|
+
// Check data total size
|
|
61
|
+
if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
|
|
62
|
+
throw new Error(DATAFRAME_IS_TOO_BIG_MES);
|
|
63
|
+
|
|
64
|
+
// UMAP specific checks
|
|
65
|
+
|
|
66
|
+
if (minDist <= 0)
|
|
67
|
+
throw new Error(INCORRECT_MIN_DIST_MES);
|
|
68
|
+
|
|
69
|
+
if (spread <= 0)
|
|
70
|
+
throw new Error(INCORRECT_SPREAD_MES);
|
|
71
|
+
|
|
72
|
+
if (epochs < 1)
|
|
73
|
+
throw new Error(INCORRECT_EPOCH_MES);
|
|
74
|
+
|
|
75
|
+
if ((neighbors < 2) || (neighbors > features.byIndex(0).length))
|
|
76
|
+
throw new Error(INCORRECT_NEIBORS_MES);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check t-SNE inputs
|
|
80
|
+
export function checkTSNEinputs(features: DG.ColumnList, components: number,
|
|
81
|
+
learningRate: number, perplexity: number, iterations: number): void
|
|
82
|
+
{
|
|
83
|
+
// General dim reducer checks
|
|
84
|
+
checkDimensionReducerInputs(features, components);
|
|
85
|
+
|
|
86
|
+
// Check data total size
|
|
87
|
+
if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
|
|
88
|
+
throw new Error(DATAFRAME_IS_TOO_BIG_MES);
|
|
89
|
+
|
|
90
|
+
// t-SNE specific checks
|
|
91
|
+
|
|
92
|
+
if (learningRate < 0)
|
|
93
|
+
throw new Error(INCORRECT_LEARNING_RATE_MES);
|
|
94
|
+
|
|
95
|
+
if (iterations < 1)
|
|
96
|
+
throw new Error(INCORRECT_ITERATIONS_MES);
|
|
97
|
+
|
|
98
|
+
if ((perplexity < 2) || (perplexity > features.byIndex(0).length))
|
|
99
|
+
throw new Error(INCORRECT_PERPLEXITY_MES);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Check SPE inputs
|
|
103
|
+
export function checkSPEinputs(features: DG.ColumnList, dimension: number,
|
|
104
|
+
steps: number, cycles: number, cutoff: number, lambda: number): void
|
|
105
|
+
{
|
|
106
|
+
// General dim reducer checks
|
|
107
|
+
checkDimensionReducerInputs(features, dimension);
|
|
108
|
+
|
|
109
|
+
// Check data total size
|
|
110
|
+
if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
|
|
111
|
+
throw new Error(DATAFRAME_IS_TOO_BIG_MES);
|
|
112
|
+
|
|
113
|
+
// SPE specific checks
|
|
114
|
+
|
|
115
|
+
if (steps < 0)
|
|
116
|
+
throw new Error(INCORRECT_STEPS_MES);
|
|
117
|
+
|
|
118
|
+
if (cycles <= 0)
|
|
119
|
+
throw new Error(INCORRECT_CYCLES_MES);
|
|
120
|
+
|
|
121
|
+
if (cutoff < 0)
|
|
122
|
+
throw new Error(INCORRECT_CUTOFF_MES);
|
|
123
|
+
|
|
124
|
+
if (lambda <= 0)
|
|
125
|
+
throw new Error(INCORRECT_LEARNING_RATE_MES);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Check wasm dimension reducer inputs
|
|
129
|
+
export function checkWasmDimensionReducerInputs(features: DG.ColumnList, components: number): void {
|
|
130
|
+
// General dim reducer checks
|
|
131
|
+
checkDimensionReducerInputs(features, components);
|
|
132
|
+
|
|
133
|
+
// Check data total size
|
|
32
134
|
if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
|
|
33
135
|
throw new Error(DATAFRAME_IS_TOO_BIG_MES);
|
|
34
136
|
}
|
|
@@ -49,3 +151,28 @@ export function checkGeneratorSVMinputs(samplesCount: number, featuresCount: num
|
|
|
49
151
|
if ((violatorsPercentage < PERCENTAGE_MIN) || (violatorsPercentage > PERCENTAGE_MAX))
|
|
50
152
|
throw new Error(INCORERRECT_PERCENTAGE_MES);
|
|
51
153
|
}
|
|
154
|
+
|
|
155
|
+
// Returns rows of column data
|
|
156
|
+
export function getRowsOfNumericalColumnns(columnList: DG.ColumnList): any[][] {
|
|
157
|
+
const columns = columnList.toList();
|
|
158
|
+
const rowCount = columns[0].length;
|
|
159
|
+
const colCount = columns.length;
|
|
160
|
+
|
|
161
|
+
const output = [] as any[][];
|
|
162
|
+
|
|
163
|
+
for (let i = 0; i < rowCount; ++i)
|
|
164
|
+
output.push(Array(colCount));
|
|
165
|
+
|
|
166
|
+
for (let j = 0; j < colCount; ++j) {
|
|
167
|
+
const col = columns[j];
|
|
168
|
+
|
|
169
|
+
checkColumnType(col);
|
|
170
|
+
|
|
171
|
+
const array = col.getRawData();
|
|
172
|
+
|
|
173
|
+
for (let i = 0; i < rowCount; ++i)
|
|
174
|
+
output[i][j] = array[i];
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return output;
|
|
178
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// Worker for the method t-SNE
|
|
2
|
+
|
|
3
|
+
import {TSNE} from '@keckelt/tsne';
|
|
4
|
+
|
|
5
|
+
onmessage = async function (evt) {
|
|
6
|
+
const tsne = new TSNE({
|
|
7
|
+
epsilon: evt.data.options.learningRate,
|
|
8
|
+
perplexity: evt.data.options.perplexity,
|
|
9
|
+
dim: evt.data.options.components
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
tsne.initDataRaw(evt.data.data);
|
|
13
|
+
|
|
14
|
+
const iterCount = evt.data.options.iterations;
|
|
15
|
+
|
|
16
|
+
for(let i = 0; i < iterCount; ++i)
|
|
17
|
+
tsne.step();
|
|
18
|
+
|
|
19
|
+
postMessage({'embeddings': tsne.getSolution()});
|
|
20
|
+
}
|
package/src/EDAtools.ts
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
// Exploratory data analysis (EDA) tools
|
|
2
|
-
|
|
3
|
-
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import * as ui from 'datagrok-api/ui';
|
|
5
|
-
import * as DG from 'datagrok-api/dg';
|
|
6
|
-
|
|
7
|
-
import {_principalComponentAnalysisInWebWorker,
|
|
8
|
-
_partialLeastSquareRegressionInWebWorker} from '../wasm/EDAAPI';
|
|
9
|
-
|
|
10
|
-
import {checkComponenets, checkGeneratorSVMinputs} from './utils';
|
|
11
|
-
|
|
12
|
-
// Principal components analysis (PCA)
|
|
13
|
-
export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
|
|
14
|
-
center: boolean, scale: boolean): Promise<DG.DataFrame>
|
|
15
|
-
{
|
|
16
|
-
checkComponenets(features, components);
|
|
17
|
-
|
|
18
|
-
const centerNum = center ? 1 : 0;
|
|
19
|
-
const scaleNum = scale ? 1 : 0;
|
|
20
|
-
|
|
21
|
-
let _output: any;
|
|
22
|
-
let _promise = _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
|
|
23
|
-
|
|
24
|
-
await _promise.then(
|
|
25
|
-
_result => { _output = _result; },
|
|
26
|
-
_error => { throw new Error (`Error: ${_error}`); }
|
|
27
|
-
);
|
|
28
|
-
|
|
29
|
-
return _output;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// Partial least square regression (PLS)
|
|
33
|
-
export async function computePLS(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number): Promise<any>
|
|
34
|
-
{
|
|
35
|
-
checkComponenets(features, components);
|
|
36
|
-
|
|
37
|
-
let _output: any;
|
|
38
|
-
let _promise = _partialLeastSquareRegressionInWebWorker(table, features, predict, components);
|
|
39
|
-
|
|
40
|
-
await _promise.then(
|
|
41
|
-
_result => { _output = _result; },
|
|
42
|
-
_error => { throw new Error (`Error: ${_error}`); }
|
|
43
|
-
);
|
|
44
|
-
|
|
45
|
-
return _output;
|
|
46
|
-
}
|
|
File without changes
|