@datagrok/eda 1.1.13 → 1.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.1.13",
4
+ "version": "1.1.15",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
- "@datagrok-libraries/ml": "^6.4.5",
7
+ "@datagrok-libraries/ml": "^6.4.11",
8
8
  "@datagrok-libraries/tutorials": "^1.3.6",
9
- "@datagrok-libraries/utils": "^4.1.42",
9
+ "@datagrok-libraries/utils": "^4.1.44",
10
10
  "@datagrok-libraries/math": "^1.0.7",
11
11
  "@keckelt/tsne": "^1.0.2",
12
12
  "cash-dom": "^8.1.1",
package/src/package.ts CHANGED
@@ -50,13 +50,15 @@ export async function init(): Promise<void> {
50
50
  //input: column yCol {type: numerical}
51
51
  //input: double epsilon = 0.02 {caption: Epsilon} [The maximum distance between two samples for them to be considered as in the same neighborhood.]
52
52
  //input: int minPts = 4 {caption: Minimum points} [The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.]
53
+ //output: column cluster
53
54
  export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column, epsilon: number, minPts: number) {
54
55
  const x = xCol.getRawData() as Float32Array;
55
56
  const y = yCol.getRawData() as Float32Array;
56
57
  const res = await getDbscanWorker(x, y, epsilon, minPts);
57
- const clusterColName = df.columns.getUnusedName('Cluster');
58
+ const clusterColName = df.columns.getUnusedName('Cluster (DBSCAN)');
58
59
  const cluster = DG.Column.fromInt32Array(clusterColName, res);
59
60
  df.columns.add(cluster);
61
+ return cluster;
60
62
  }
61
63
 
62
64
  //top-menu: ML | Analyze | PCA...
@@ -83,6 +85,31 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
83
85
  }
84
86
  }
85
87
 
88
+ //name: DBSCAN clustering
89
+ //tags: dim-red-postprocessing-function
90
+ //meta.defaultPostProcessingFunction: true
91
+ //input: column col1
92
+ //input: column col2
93
+ //input: double epsilon = 0.01 {default: 0.01}[Minimum distance between two points to be considered as in the same neighborhood.]
94
+ //input: int minimumPoints = 5 {default: 5}[Minimum number of points to form a dense region.]
95
+ export async function dbscanPostProcessingFunction(col1: DG.Column, col2: DG.Column, epsilon: number, minimumPoints: number) {
96
+ const df = col1.dataFrame;
97
+ if (df === null)
98
+ return;
99
+ const resCol = await dbScan(df, col1, col2, epsilon, minimumPoints);
100
+ df.changeColumnType(resCol, 'string');
101
+ const colNames = [col1.name, col2.name];
102
+ const tv = grok.shell.tableView(df.name);
103
+ if (!tv)
104
+ return;
105
+ // find the correct scatterPlotViewer and set the colorColumnName
106
+ for (const v of tv.viewers) {
107
+ if (v instanceof DG.ScatterPlotViewer && colNames.includes(v.props.xColumnName) && colNames.includes(v.props.yColumnName)) {
108
+ v.props.colorColumnName = resCol.name;
109
+ return;
110
+ }
111
+ }
112
+ }
86
113
 
87
114
  //name: None (number)
88
115
  //tags: dim-red-preprocessing-function
@@ -122,7 +149,7 @@ export async function reduceDimensionality(): Promise<void> {
122
149
  params.weights, params.preprocessingFunctions, params.aggreaggregationMethod as DistanceAggregationMethods,
123
150
  !!params.plotEmbeddings, !!params.clusterEmbeddings, params.options, {
124
151
  fastRowCount: 10000,
125
- });
152
+ }, params.postProcessingFunction, params.postProcessingFunctionArgs);
126
153
  }).show();
127
154
  }
128
155