@datagrok/eda 1.1.6 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,12 +1,13 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.1.6",
4
+ "version": "1.1.8",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
- "@datagrok-libraries/ml": "^6.3.39",
7
+ "@datagrok-libraries/ml": "^6.3.70",
8
8
  "@datagrok-libraries/tutorials": "^1.3.6",
9
9
  "@datagrok-libraries/utils": "^4.1.4",
10
+ "@datagrok-libraries/math": "^1.0.7",
10
11
  "@keckelt/tsne": "^1.0.2",
11
12
  "cash-dom": "^8.1.1",
12
13
  "datagrok-api": "^1.16.0",
@@ -50,5 +51,35 @@
50
51
  "category": "Machine Learning",
51
52
  "sources": [
52
53
  "wasm/EDA.js"
53
- ]
54
+ ],
55
+ "meta": {
56
+ "menu": {
57
+ "ML": {
58
+ "Tools": {
59
+ "Impute Missing Values...": null,
60
+ "Random Data...": null
61
+ },
62
+ "Cluster": {
63
+ "Cluster...": null,
64
+ "DBSCAN...": null
65
+ },
66
+ "Notebooks": {
67
+ "Browse Notebooks": null,
68
+ "Open in Notebook": null,
69
+ "New Notebook": null
70
+ },
71
+ "Models": {
72
+ "Browse Models": null,
73
+ "Train Model...": null,
74
+ "Apply Model...": null
75
+ },
76
+ "Analyse": {
77
+ "PCA...": null,
78
+ "ANOVA...": null,
79
+ "Multivariate Analysis...": null
80
+ },
81
+ "Reduce Dimensionality": null
82
+ }
83
+ }
84
+ }
54
85
  }
package/src/package.ts CHANGED
@@ -14,6 +14,15 @@ import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
14
14
  getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
15
15
 
16
16
  import {oneWayAnova} from './stat-tools';
17
+ import { getDbscanWorker } from '@datagrok-libraries/math';
18
+
19
+ import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
20
+ import {MultiColumnDimReductionEditor} from
21
+ '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reduction-editor';
22
+ import {multiColReduceDimensionality} from
23
+ '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
24
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
25
+ import { KnownMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
17
26
 
18
27
  export const _package = new DG.Package();
19
28
 
@@ -27,7 +36,24 @@ export async function init(): Promise<void> {
27
36
  await _initEDAAPI();
28
37
  }
29
38
 
30
- //top-menu: ML | Dimensionality Reduction | PCA...
39
+ //top-menu: ML | Cluster | DBSCAN...
40
+ //name: DBSCAN
41
+ //description: Density-based spatial clustering of applications with noise (DBSCAN)
42
+ //input: dataframe df
43
+ //input: column xCol {type: numerical}
44
+ //input: column yCol {type: numerical}
45
+ //input: double epsilon = 0.02 {caption: Epsilon} [The maximum distance between two samples for them to be considered as in the same neighborhood.]
46
+ //input: int minPts = 4 {caption: Minimum points} [The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.]
47
+ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column, epsilon: number, minPts: number) {
48
+ const x = xCol.getRawData() as Float32Array;
49
+ const y = yCol.getRawData() as Float32Array;
50
+ const res = await getDbscanWorker(x, y, epsilon, minPts);
51
+ const clusterColName = df.columns.getUnusedName('Cluster');
52
+ const cluster = DG.Column.fromInt32Array(clusterColName, res);
53
+ df.columns.add(cluster);
54
+ }
55
+
56
+ //top-menu: ML | Analyze | PCA...
31
57
  //name: PCA
32
58
  //description: Principal component analysis (PCA)
33
59
  //input: dataframe table
@@ -44,54 +70,46 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
44
70
  return pcaTable;
45
71
  }
46
72
 
47
- //top-menu: ML | Dimensionality Reduction | UMAP...
48
- //name: UMAP
49
- //description: Uniform Manifold Approximation and Projection (UMAP)
50
- //input: dataframe table {category: Data}
51
- //input: column_list features {type: numerical; category: Data}
52
- //input: int components = 2 {caption: Components; min: 1; max: 20; category: Hyperparameters} [The number of components (dimensions) to project the data to.]
53
- //input: int epochs = 100 {caption: Epochs; category: Hyperparameters} [The number of epochs to optimize embeddings.]
54
- //input: int neighbors = 15 {caption: Neighbors; category: Hyperparameters} [The number of nearest neighbors to construct the fuzzy manifold.]
55
- //input: double minDist = 0.1 {caption: Minimum distance; min: 0; max: 1; category: Hyperparameters} [The effective minimum distance between embedded points.]
56
- //input: double spread = 1.0 {caption: Spread; category: Hyperparameters} [The effective scale of embedded points.]
57
- //output: dataframe result {action:join(table)}
58
- export async function UMAP(table: DG.DataFrame, features: DG.ColumnList, components: number,
59
- epochs: number, neighbors: number, minDist: number, spread: number): Promise<DG.DataFrame>
60
- {
61
- return await computeUMAP(features, components, epochs, neighbors, minDist, spread);
73
+
74
+ //name: None (number)
75
+ //tags: dim-red-preprocessing-function
76
+ //meta.supportedTypes: int,float,double,qnum
77
+ //meta.supportedDistanceFunctions: Difference
78
+ //input: column col
79
+ //input: string _metric {optional: true}
80
+ //output: object result
81
+ export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
82
+ const entries = col.toList();
83
+ return {entries, options: {}};
62
84
  }
63
85
 
64
- //top-menu: ML | Dimensionality Reduction | t-SNE...
65
- //name: t-SNE
66
- //description: t-distributed stochastic neighbor embedding (t-SNE)
67
- //input: dataframe table {category: Data}
68
- //input: column_list features {type: numerical; category: Data}
69
- //input: int components = 2 {caption: Components; category: Hyperparameters} [Dimension of the embedded space.]
70
- //input: double learningRate = 10 {caption: Learning rate; category: Hyperparameters} [Optimization tuning parameter. Should be in the range 10...1000.]
71
- //input: int perplexity = 30 {caption: Perplexity; category: Hyperparameters} [The number of nearest neighbors. Should be less than the number of samples.]
72
- //input: int iterations = 500 {caption: Iterations; category: Hyperparameters} [Maximum number of iterations for the optimization. Should be at least 250.]
73
- //output: dataframe result {action:join(table)}
74
- export async function tSNE(table: DG.DataFrame, features: DG.ColumnList, components: number,
75
- learningRate: number, perplexity: number, iterations: number): Promise<DG.DataFrame>
76
- {
77
- return await computeTSNE(features, components, learningRate, perplexity, iterations);
86
+ //name: None (string)
87
+ //tags: dim-red-preprocessing-function
88
+ //meta.supportedTypes: string
89
+ //meta.supportedDistanceFunctions: Levenshtein,Hamming,One-Hot
90
+ //input: column col
91
+ //input: string _metric {optional: true}
92
+ //output: object result
93
+ export function stringPreprocessingFunction(col: DG.Column, _metric: string) {
94
+ const entries = col.toList();
95
+ return {entries, options: {}};
78
96
  }
79
97
 
80
- //top-menu: ML | Dimensionality Reduction | SPE...
81
- //name: SPE
82
- //description: Stochastic proximity embedding (SPE)
83
- //input: dataframe table {category: Data}
84
- //input: column_list features {type: numerical; category: Data}
85
- //input: int dimension = 2 {caption: Dimension; category: Hyperparameters} [Dimension of the embedded space.]
86
- //input: int steps = 0 {caption: Steps; category: Hyperparameters} [Number of random selections of point pairs and distance computations between them.]
87
- //input: int cycles = 1000000 {caption: Cycles; category: Hyperparameters} [Number of the method cycles.]
88
- //input: double cutoff = 0.0 {caption: Cutoff; category: Hyperparameters} [Cutoff distance between points.]
89
- //input: double lambda = 2.0 {caption: Learning rate; category: Hyperparameters} [Optimization tuning parameter.]
90
- //output: dataframe result {action:join(table)}
91
- export async function SPE(table: DG.DataFrame, features: DG.ColumnList, dimension: number,
92
- steps: number, cycles: number, cutoff: number, lambda: number): Promise<DG.DataFrame>
93
- {
94
- return await computeSPE(features, dimension, steps, cycles, cutoff, lambda);
98
+ //top-menu: ML | Reduce Dimensionality...
99
+ //name: Multi Column Dimensionality Reduction
100
+ export async function reduceDimensionality(): Promise<void> {
101
+ const editor = new MultiColumnDimReductionEditor();
102
+ ui.dialog('Dimensionality reduction').add(editor.getEditor()).onOK(async () => {
103
+ const params = editor.getParams();
104
+ if (params.columns.length === 0)
105
+ return;
106
+ await multiColReduceDimensionality(params.table, params.columns, params.methodName as DimReductionMethods,
107
+ params.distanceMetrics as KnownMetrics[],
108
+ params.weights, params.preprocessingFunctions, params.aggreaggregationMethod as DistanceAggregationMethods,
109
+ !!params.plotEmbeddings, !!params.clusterEmbeddings, params.options, {
110
+ fastRowCount: 10000,
111
+ });
112
+ }).show();
95
113
  }
96
114
 
97
115
  //top-menu: ML | Analyze | Multivariate Analysis...