@datagrok/eda 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils.ts CHANGED
@@ -14,21 +14,123 @@ const MAX_ELEMENTS_COUNT = 100000000;
14
14
 
15
15
  // Error messages
16
16
  const COMP_POSITVE_MES = 'components must be positive.';
17
- const COMP_EXCESS = 'components must not be greater than feautures count.';
17
+ const COMP_EXCESS = 'components must not be greater than features count.';
18
18
  const INCORERRECT_MIN_MAX_MES = 'min must be less than max.';
19
19
  const INCORERRECT_FEATURES_MES = 'features must be positive.';
20
20
  const INCORERRECT_SAMPLES_MES = 'samples must be positive.';
21
21
  const INCORERRECT_PERCENTAGE_MES = 'violators percentage must be from the range from 0 to 100.';
22
22
  const DATAFRAME_IS_TOO_BIG_MES = 'dataframe is too big.';
23
+ const UNSUPPORTED_COLUMN_TYPE_MES = 'unsupported column type: ';
24
+ const INCORRECT_MIN_DIST_MES = 'min distance must be positive.';
25
+ const INCORRECT_SPREAD_MES = 'spread must be positive.';
26
+ const INCORRECT_EPOCH_MES = 'number of epoch must be at least 1.';
27
+ const INCORRECT_NEIBORS_MES = 'number of neibors must be at least 2 and not greater than samples count.';
28
+ const INCORRECT_ITERATIONS_MES = 'number of iterations must be at least 1.';
29
+ const INCORRECT_LEARNING_RATE_MES = 'learning rate must be positive.';
30
+ const INCORRECT_PERPLEXITY_MES = 'perplexity must be at least 2 and not greater than samples count.';
31
+ const INCORRECT_STEPS_MES = 'steps must be non-negative.';
32
+ const INCORRECT_CYCLES_MES = 'cycles must be positive.';
33
+ const INCORRECT_CUTOFF_MES = 'cutoff must be non-negative.'
23
34
 
24
- // Check components count (PCA, PLS)
25
- export function checkComponenets(features: DG.ColumnList, components: number): void {
35
+ // Check column type
36
+ export function checkColumnType(col: DG.Column): void {
37
+ if ((col.type != DG.COLUMN_TYPE.FLOAT) && (col.type != DG.COLUMN_TYPE.INT))
38
+ throw new Error(UNSUPPORTED_COLUMN_TYPE_MES + col.type);
39
+ }
40
+
41
+ // Check dimension reducer inputs
42
+ export function checkDimensionReducerInputs(features: DG.ColumnList, components: number): void {
26
43
  if (components < COMP_MIN)
27
44
  throw new Error(COMP_POSITVE_MES);
28
45
 
29
46
  if (components > features.length)
30
47
  throw new Error(COMP_EXCESS);
31
48
 
49
+ for (const col of features)
50
+ checkColumnType(col);
51
+ }
52
+
53
+ // Check UMAP inputs
54
+ export function checkUMAPinputs(features: DG.ColumnList, components: number, epochs: number,
55
+ neighbors: number, minDist: number, spread: number): void
56
+ {
57
+ // General dim reducer checks
58
+ checkDimensionReducerInputs(features, components);
59
+
60
+ // Check data total size
61
+ if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
62
+ throw new Error(DATAFRAME_IS_TOO_BIG_MES);
63
+
64
+ // UMAP specific checks
65
+
66
+ if (minDist <= 0)
67
+ throw new Error(INCORRECT_MIN_DIST_MES);
68
+
69
+ if (spread <= 0)
70
+ throw new Error(INCORRECT_SPREAD_MES);
71
+
72
+ if (epochs < 1)
73
+ throw new Error(INCORRECT_EPOCH_MES);
74
+
75
+ if ((neighbors < 2) || (neighbors > features.byIndex(0).length))
76
+ throw new Error(INCORRECT_NEIBORS_MES);
77
+ }
78
+
79
+ // Check t-SNE inputs
80
+ export function checkTSNEinputs(features: DG.ColumnList, components: number,
81
+ learningRate: number, perplexity: number, iterations: number): void
82
+ {
83
+ // General dim reducer checks
84
+ checkDimensionReducerInputs(features, components);
85
+
86
+ // Check data total size
87
+ if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
88
+ throw new Error(DATAFRAME_IS_TOO_BIG_MES);
89
+
90
+ // t-SNE specific checks
91
+
92
+ if (learningRate < 0)
93
+ throw new Error(INCORRECT_LEARNING_RATE_MES);
94
+
95
+ if (iterations < 1)
96
+ throw new Error(INCORRECT_ITERATIONS_MES);
97
+
98
+ if ((perplexity < 2) || (perplexity > features.byIndex(0).length))
99
+ throw new Error(INCORRECT_PERPLEXITY_MES);
100
+ }
101
+
102
+ // Check SPE inputs
103
+ export function checkSPEinputs(features: DG.ColumnList, dimension: number,
104
+ steps: number, cycles: number, cutoff: number, lambda: number): void
105
+ {
106
+ // General dim reducer checks
107
+ checkDimensionReducerInputs(features, dimension);
108
+
109
+ // Check data total size
110
+ if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
111
+ throw new Error(DATAFRAME_IS_TOO_BIG_MES);
112
+
113
+ // SPE specific checks
114
+
115
+ if (steps < 0)
116
+ throw new Error(INCORRECT_STEPS_MES);
117
+
118
+ if (cycles <= 0)
119
+ throw new Error(INCORRECT_CYCLES_MES);
120
+
121
+ if (cutoff < 0)
122
+ throw new Error(INCORRECT_CUTOFF_MES);
123
+
124
+ if (lambda <= 0)
125
+ throw new Error(INCORRECT_LEARNING_RATE_MES);
126
+ }
127
+
128
+ // Check wasm dimension reducer inputs
129
+ export function checkWasmDimensionReducerInputs(features: DG.ColumnList, components: number): void {
130
+ // General dim reducer checks
131
+ checkDimensionReducerInputs(features, components);
132
+
133
+ // Check data total size
32
134
  if (features.length * features.byIndex(0).length > MAX_ELEMENTS_COUNT)
33
135
  throw new Error(DATAFRAME_IS_TOO_BIG_MES);
34
136
  }
@@ -49,3 +151,28 @@ export function checkGeneratorSVMinputs(samplesCount: number, featuresCount: num
49
151
  if ((violatorsPercentage < PERCENTAGE_MIN) || (violatorsPercentage > PERCENTAGE_MAX))
50
152
  throw new Error(INCORERRECT_PERCENTAGE_MES);
51
153
  }
154
+
155
+ // Returns rows of column data
156
+ export function getRowsOfNumericalColumnns(columnList: DG.ColumnList): any[][] {
157
+ const columns = columnList.toList();
158
+ const rowCount = columns[0].length;
159
+ const colCount = columns.length;
160
+
161
+ const output = [] as any[][];
162
+
163
+ for (let i = 0; i < rowCount; ++i)
164
+ output.push(Array(colCount));
165
+
166
+ for (let j = 0; j < colCount; ++j) {
167
+ const col = columns[j];
168
+
169
+ checkColumnType(col);
170
+
171
+ const array = col.getRawData();
172
+
173
+ for (let i = 0; i < rowCount; ++i)
174
+ output[i][j] = array[i];
175
+ }
176
+
177
+ return output;
178
+ }
@@ -0,0 +1,20 @@
1
+ // Worker for the method t-SNE
2
+
3
+ import {TSNE} from '@keckelt/tsne';
4
+
5
+ onmessage = async function (evt) {
6
+ const tsne = new TSNE({
7
+ epsilon: evt.data.options.learningRate,
8
+ perplexity: evt.data.options.perplexity,
9
+ dim: evt.data.options.components
10
+ });
11
+
12
+ tsne.initDataRaw(evt.data.data);
13
+
14
+ const iterCount = evt.data.options.iterations;
15
+
16
+ for(let i = 0; i < iterCount; ++i)
17
+ tsne.step();
18
+
19
+ postMessage({'embeddings': tsne.getSolution()});
20
+ }
@@ -0,0 +1,9 @@
1
+ // Worker for the method UMAP
2
+
3
+ import { UMAP } from 'umap-js';
4
+
5
+ onmessage = async function (evt) {
6
+ const umap = new UMAP(evt.data.options);
7
+ const embeddings = umap.fit(evt.data.data);
8
+ postMessage({'embeddings': embeddings});
9
+ }
package/src/EDAtools.ts DELETED
@@ -1,46 +0,0 @@
1
- // Exploratory data analysis (EDA) tools
2
-
3
- import * as grok from 'datagrok-api/grok';
4
- import * as ui from 'datagrok-api/ui';
5
- import * as DG from 'datagrok-api/dg';
6
-
7
- import {_principalComponentAnalysisInWebWorker,
8
- _partialLeastSquareRegressionInWebWorker} from '../wasm/EDAAPI';
9
-
10
- import {checkComponenets, checkGeneratorSVMinputs} from './utils';
11
-
12
- // Principal components analysis (PCA)
13
- export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
14
- center: boolean, scale: boolean): Promise<DG.DataFrame>
15
- {
16
- checkComponenets(features, components);
17
-
18
- const centerNum = center ? 1 : 0;
19
- const scaleNum = scale ? 1 : 0;
20
-
21
- let _output: any;
22
- let _promise = _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
23
-
24
- await _promise.then(
25
- _result => { _output = _result; },
26
- _error => { throw new Error (`Error: ${_error}`); }
27
- );
28
-
29
- return _output;
30
- }
31
-
32
- // Partial least square regression (PLS)
33
- export async function computePLS(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number): Promise<any>
34
- {
35
- checkComponenets(features, components);
36
-
37
- let _output: any;
38
- let _promise = _partialLeastSquareRegressionInWebWorker(table, features, predict, components);
39
-
40
- await _promise.then(
41
- _result => { _output = _result; },
42
- _error => { throw new Error (`Error: ${_error}`); }
43
- );
44
-
45
- return _output;
46
- }
File without changes