@datagrok/eda 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,12 +1,13 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.1.5",
4
+ "version": "1.1.7",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
7
  "@datagrok-libraries/ml": "^6.3.39",
8
8
  "@datagrok-libraries/tutorials": "^1.3.6",
9
9
  "@datagrok-libraries/utils": "^4.1.4",
10
+ "@datagrok-libraries/math": "^1.0.3",
10
11
  "@keckelt/tsne": "^1.0.2",
11
12
  "cash-dom": "^8.1.1",
12
13
  "datagrok-api": "^1.16.0",
package/src/package.ts CHANGED
@@ -14,6 +14,7 @@ import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
14
14
  getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
15
15
 
16
16
  import {oneWayAnova} from './stat-tools';
17
+ import { getDbscanWorker } from '@datagrok-libraries/math';
17
18
 
18
19
  export const _package = new DG.Package();
19
20
 
@@ -27,9 +28,26 @@ export async function init(): Promise<void> {
27
28
  await _initEDAAPI();
28
29
  }
29
30
 
31
+ //top-menu: ML | Cluster | DBSCAN...
32
+ //name: DBSCAN
33
+ //description: Density-based spatial clustering of applications with noise (DBSCAN)
34
+ //input: dataframe df
35
+ //input: column xCol {type: numerical}
36
+ //input: column yCol {type: numerical}
37
+ //input: double epsilon = 0.02 {caption: Epsilon} [The maximum distance between two samples for them to be considered as in the same neighborhood.]
38
+ //input: int minPts = 4 {caption: Minimum points} [The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.]
39
+ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column, epsilon: number, minPts: number) {
40
+ const x = xCol.getRawData() as Float32Array;
41
+ const y = yCol.getRawData() as Float32Array;
42
+ const res = await getDbscanWorker(x, y, epsilon, minPts);
43
+ const clusterColName = df.columns.getUnusedName('Cluster');
44
+ const cluster = DG.Column.fromInt32Array(clusterColName, res);
45
+ df.columns.add(cluster);
46
+ }
47
+
30
48
  //top-menu: ML | Dimensionality Reduction | PCA...
31
49
  //name: PCA
32
- //description: Principal component analysis (PCA).
50
+ //description: Principal component analysis (PCA)
33
51
  //input: dataframe table
34
52
  //input: column_list features {type: numerical}
35
53
  //input: int components = 2 {caption: Components} [Number of components.]
@@ -46,7 +64,7 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
46
64
 
47
65
  //top-menu: ML | Dimensionality Reduction | UMAP...
48
66
  //name: UMAP
49
- //description: Uniform Manifold Approximation and Projection (UMAP).
67
+ //description: Uniform Manifold Approximation and Projection (UMAP)
50
68
  //input: dataframe table {category: Data}
51
69
  //input: column_list features {type: numerical; category: Data}
52
70
  //input: int components = 2 {caption: Components; min: 1; max: 20; category: Hyperparameters} [The number of components (dimensions) to project the data to.]
@@ -63,7 +81,7 @@ export async function UMAP(table: DG.DataFrame, features: DG.ColumnList, compone
63
81
 
64
82
  //top-menu: ML | Dimensionality Reduction | t-SNE...
65
83
  //name: t-SNE
66
- //description: t-distributed stochastic neighbor embedding (t-SNE).
84
+ //description: t-distributed stochastic neighbor embedding (t-SNE)
67
85
  //input: dataframe table {category: Data}
68
86
  //input: column_list features {type: numerical; category: Data}
69
87
  //input: int components = 2 {caption: Components; category: Hyperparameters} [Dimension of the embedded space.]
@@ -79,7 +97,7 @@ export async function tSNE(table: DG.DataFrame, features: DG.ColumnList, compone
79
97
 
80
98
  //top-menu: ML | Dimensionality Reduction | SPE...
81
99
  //name: SPE
82
- //description: Stochastic proximity embedding (SPE).
100
+ //description: Stochastic proximity embedding (SPE)
83
101
  //input: dataframe table {category: Data}
84
102
  //input: column_list features {type: numerical; category: Data}
85
103
  //input: int dimension = 2 {caption: Dimension; category: Hyperparameters} [Dimension of the embedded space.]
@@ -162,7 +180,7 @@ export async function demoMultivariateAnalysis(): Promise<any> {
162
180
  }
163
181
 
164
182
  //name: Generate linear separable dataset
165
- //description: Generates linear separble dataset for testing binary classificators.
183
+ //description: Generates linear separble dataset for testing binary classificators
166
184
  //input: string name = 'Data' {caption: name; category: Dataset}
167
185
  //input: int samplesCount = 1000 {caption: samples; category: Size}
168
186
  //input: int featuresCount = 2 {caption: features; category: Size}
@@ -178,7 +196,7 @@ export async function testDataLinearSeparable(name: string, samplesCount: number
178
196
  }
179
197
 
180
198
  //name: Generate linear non-separable dataset
181
- //description: Generates linear non-separble dataset for testing binary classificators.
199
+ //description: Generates linear non-separble dataset for testing binary classificators
182
200
  //input: string name = 'Data' {caption: name; category: Dataset}
183
201
  //input: double sigma = 90 {caption: sigma; category: Hyperparameters} [RBF-kernel paramater]
184
202
  //input: int samplesCount = 1000 {caption: samples; category: Size}
@@ -322,7 +340,7 @@ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promi
322
340
  }
323
341
 
324
342
  //top-menu: ML | Analyze | ANOVA...
325
- //name: One-way ANOVA
343
+ //name: ANOVA
326
344
  //description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the studied feature.
327
345
  //input: dataframe table
328
346
  //input: column factor {type: categorical}