@datagrok/eda 1.1.7 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.1.7",
4
+ "version": "1.1.8",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
- "@datagrok-libraries/ml": "^6.3.39",
7
+ "@datagrok-libraries/ml": "^6.3.70",
8
8
  "@datagrok-libraries/tutorials": "^1.3.6",
9
9
  "@datagrok-libraries/utils": "^4.1.4",
10
- "@datagrok-libraries/math": "^1.0.3",
10
+ "@datagrok-libraries/math": "^1.0.7",
11
11
  "@keckelt/tsne": "^1.0.2",
12
12
  "cash-dom": "^8.1.1",
13
13
  "datagrok-api": "^1.16.0",
@@ -51,5 +51,35 @@
51
51
  "category": "Machine Learning",
52
52
  "sources": [
53
53
  "wasm/EDA.js"
54
- ]
54
+ ],
55
+ "meta": {
56
+ "menu": {
57
+ "ML": {
58
+ "Tools": {
59
+ "Impute Missing Values...": null,
60
+ "Random Data...": null
61
+ },
62
+ "Cluster": {
63
+ "Cluster...": null,
64
+ "DBSCAN...": null
65
+ },
66
+ "Notebooks": {
67
+ "Browse Notebooks": null,
68
+ "Open in Notebook": null,
69
+ "New Notebook": null
70
+ },
71
+ "Models": {
72
+ "Browse Models": null,
73
+ "Train Model...": null,
74
+ "Apply Model...": null
75
+ },
76
+ "Analyse": {
77
+ "PCA...": null,
78
+ "ANOVA...": null,
79
+ "Multivariate Analysis...": null
80
+ },
81
+ "Reduce Dimensionality": null
82
+ }
83
+ }
84
+ }
55
85
  }
package/src/package.ts CHANGED
@@ -16,6 +16,14 @@ import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
16
16
  import {oneWayAnova} from './stat-tools';
17
17
  import { getDbscanWorker } from '@datagrok-libraries/math';
18
18
 
19
+ import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
20
+ import {MultiColumnDimReductionEditor} from
21
+ '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reduction-editor';
22
+ import {multiColReduceDimensionality} from
23
+ '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
24
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
25
+ import { KnownMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
26
+
19
27
  export const _package = new DG.Package();
20
28
 
21
29
  //name: info
@@ -45,7 +53,7 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
45
53
  df.columns.add(cluster);
46
54
  }
47
55
 
48
- //top-menu: ML | Dimensionality Reduction | PCA...
56
+ //top-menu: ML | Analyze | PCA...
49
57
  //name: PCA
50
58
  //description: Principal component analysis (PCA)
51
59
  //input: dataframe table
@@ -62,54 +70,46 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
62
70
  return pcaTable;
63
71
  }
64
72
 
65
- //top-menu: ML | Dimensionality Reduction | UMAP...
66
- //name: UMAP
67
- //description: Uniform Manifold Approximation and Projection (UMAP)
68
- //input: dataframe table {category: Data}
69
- //input: column_list features {type: numerical; category: Data}
70
- //input: int components = 2 {caption: Components; min: 1; max: 20; category: Hyperparameters} [The number of components (dimensions) to project the data to.]
71
- //input: int epochs = 100 {caption: Epochs; category: Hyperparameters} [The number of epochs to optimize embeddings.]
72
- //input: int neighbors = 15 {caption: Neighbors; category: Hyperparameters} [The number of nearest neighbors to construct the fuzzy manifold.]
73
- //input: double minDist = 0.1 {caption: Minimum distance; min: 0; max: 1; category: Hyperparameters} [The effective minimum distance between embedded points.]
74
- //input: double spread = 1.0 {caption: Spread; category: Hyperparameters} [The effective scale of embedded points.]
75
- //output: dataframe result {action:join(table)}
76
- export async function UMAP(table: DG.DataFrame, features: DG.ColumnList, components: number,
77
- epochs: number, neighbors: number, minDist: number, spread: number): Promise<DG.DataFrame>
78
- {
79
- return await computeUMAP(features, components, epochs, neighbors, minDist, spread);
73
+
74
+ //name: None (number)
75
+ //tags: dim-red-preprocessing-function
76
+ //meta.supportedTypes: int,float,double,qnum
77
+ //meta.supportedDistanceFunctions: Difference
78
+ //input: column col
79
+ //input: string _metric {optional: true}
80
+ //output: object result
81
+ export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
82
+ const entries = col.toList();
83
+ return {entries, options: {}};
80
84
  }
81
85
 
82
- //top-menu: ML | Dimensionality Reduction | t-SNE...
83
- //name: t-SNE
84
- //description: t-distributed stochastic neighbor embedding (t-SNE)
85
- //input: dataframe table {category: Data}
86
- //input: column_list features {type: numerical; category: Data}
87
- //input: int components = 2 {caption: Components; category: Hyperparameters} [Dimension of the embedded space.]
88
- //input: double learningRate = 10 {caption: Learning rate; category: Hyperparameters} [Optimization tuning parameter. Should be in the range 10...1000.]
89
- //input: int perplexity = 30 {caption: Perplexity; category: Hyperparameters} [The number of nearest neighbors. Should be less than the number of samples.]
90
- //input: int iterations = 500 {caption: Iterations; category: Hyperparameters} [Maximum number of iterations for the optimization. Should be at least 250.]
91
- //output: dataframe result {action:join(table)}
92
- export async function tSNE(table: DG.DataFrame, features: DG.ColumnList, components: number,
93
- learningRate: number, perplexity: number, iterations: number): Promise<DG.DataFrame>
94
- {
95
- return await computeTSNE(features, components, learningRate, perplexity, iterations);
86
+ //name: None (string)
87
+ //tags: dim-red-preprocessing-function
88
+ //meta.supportedTypes: string
89
+ //meta.supportedDistanceFunctions: Levenshtein,Hamming,One-Hot
90
+ //input: column col
91
+ //input: string _metric {optional: true}
92
+ //output: object result
93
+ export function stringPreprocessingFunction(col: DG.Column, _metric: string) {
94
+ const entries = col.toList();
95
+ return {entries, options: {}};
96
96
  }
97
97
 
98
- //top-menu: ML | Dimensionality Reduction | SPE...
99
- //name: SPE
100
- //description: Stochastic proximity embedding (SPE)
101
- //input: dataframe table {category: Data}
102
- //input: column_list features {type: numerical; category: Data}
103
- //input: int dimension = 2 {caption: Dimension; category: Hyperparameters} [Dimension of the embedded space.]
104
- //input: int steps = 0 {caption: Steps; category: Hyperparameters} [Number of random selections of point pairs and distance computations between them.]
105
- //input: int cycles = 1000000 {caption: Cycles; category: Hyperparameters} [Number of the method cycles.]
106
- //input: double cutoff = 0.0 {caption: Cutoff; category: Hyperparameters} [Cutoff distance between points.]
107
- //input: double lambda = 2.0 {caption: Learning rate; category: Hyperparameters} [Optimization tuning parameter.]
108
- //output: dataframe result {action:join(table)}
109
- export async function SPE(table: DG.DataFrame, features: DG.ColumnList, dimension: number,
110
- steps: number, cycles: number, cutoff: number, lambda: number): Promise<DG.DataFrame>
111
- {
112
- return await computeSPE(features, dimension, steps, cycles, cutoff, lambda);
98
+ //top-menu: ML | Reduce Dimensionality...
99
+ //name: Multi Column Dimensionality Reduction
100
+ export async function reduceDimensionality(): Promise<void> {
101
+ const editor = new MultiColumnDimReductionEditor();
102
+ ui.dialog('Dimensionality reduction').add(editor.getEditor()).onOK(async () => {
103
+ const params = editor.getParams();
104
+ if (params.columns.length === 0)
105
+ return;
106
+ await multiColReduceDimensionality(params.table, params.columns, params.methodName as DimReductionMethods,
107
+ params.distanceMetrics as KnownMetrics[],
108
+ params.weights, params.preprocessingFunctions, params.aggreaggregationMethod as DistanceAggregationMethods,
109
+ !!params.plotEmbeddings, !!params.clusterEmbeddings, params.options, {
110
+ fastRowCount: 10000,
111
+ });
112
+ }).show();
113
113
  }
114
114
 
115
115
  //top-menu: ML | Analyze | Multivariate Analysis...