@datagrok/eda 1.1.11 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.1.11",
4
+ "version": "1.1.13",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
- "@datagrok-libraries/ml": "^6.4.1",
7
+ "@datagrok-libraries/ml": "^6.4.5",
8
8
  "@datagrok-libraries/tutorials": "^1.3.6",
9
- "@datagrok-libraries/utils": "^4.1.4",
9
+ "@datagrok-libraries/utils": "^4.1.42",
10
10
  "@datagrok-libraries/math": "^1.0.7",
11
11
  "@keckelt/tsne": "^1.0.2",
12
12
  "cash-dom": "^8.1.1",
package/src/eda-ui.ts CHANGED
@@ -117,7 +117,7 @@ export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.
117
117
  export function addPLSvisualization(
118
118
  table: DG.DataFrame, samplesNames: DG.Column, features: DG.ColumnList, predict: DG.Column, plsOutput: any,
119
119
  ): void {
120
- const view = grok.shell.getTableView(table.name);
120
+ const view = (table.id !== null) ? grok.shell.getTableView(table.name) : grok.shell.addTableView(table);
121
121
 
122
122
  // 1. Predicted vs Reference scatter plot
123
123
  view.addViewer(predictedVersusReferenceScatterPlot(samplesNames, predict, plsOutput[0]));
@@ -1,6 +1,6 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
3
-
3
+ import './tests/dim-reduction-tests';
4
4
  export const _package = new DG.Package();
5
5
  export {tests};
6
6
 
package/src/package.ts CHANGED
@@ -18,7 +18,7 @@ import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
18
18
  import {oneWayAnova} from './stat-tools';
19
19
  import {getDbscanWorker} from '@datagrok-libraries/math';
20
20
 
21
- import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
21
+ import {DistanceAggregationMethod, DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
22
22
  import {MultiColumnDimReductionEditor} from
23
23
  '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reduction-editor';
24
24
  import {multiColReduceDimensionality} from
@@ -27,6 +27,8 @@ import {KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
27
27
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
28
28
 
29
29
  import {runKNNImputer} from './missing-values-imputation/ui';
30
+ import {MCLEditor} from '@datagrok-libraries/ml/src/MCL/mcl-editor';
31
+ import {markovCluster} from '@datagrok-libraries/ml/src/MCL/clustering-view';
30
32
 
31
33
  export const _package = new DG.Package();
32
34
 
@@ -65,12 +67,20 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
65
67
  //input: int components = 2 {caption: Components} [Number of components.]
66
68
  //input: bool center = false [Indicating whether the variables should be shifted to be zero centered.]
67
69
  //input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
68
- //output: dataframe result {action:join(table)}
69
- export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
70
- center: boolean, scale: boolean): Promise<DG.DataFrame> {
70
+ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number, center: boolean, scale: boolean): Promise<void> {
71
71
  const pcaTable = await computePCA(table, features, components, center, scale);
72
72
  addPrefixToEachColumnName('PCA', pcaTable.columns);
73
- return pcaTable;
73
+
74
+ if (table.id === null) // table is loaded from a local file
75
+ grok.shell.addTableView(pcaTable);
76
+ else {
77
+ const cols = table.columns;
78
+
79
+ for (const col of pcaTable.columns) {
80
+ col.name = cols.getUnusedName(col.name);
81
+ cols.add(col);
82
+ }
83
+ }
74
84
  }
75
85
 
76
86
 
@@ -82,8 +92,9 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
82
92
  //input: string _metric {optional: true}
83
93
  //output: object result
84
94
  export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
95
+ const range = col.stats.max - col.stats.min;
85
96
  const entries = col.toList();
86
- return {entries, options: {}};
97
+ return {entries, options: {range}};
87
98
  }
88
99
 
89
100
  //name: None (string)
@@ -115,6 +126,51 @@ export async function reduceDimensionality(): Promise<void> {
115
126
  }).show();
116
127
  }
117
128
 
129
+ //name: GetMCLEditor
130
+ //tags: editor
131
+ //input: funccall call
132
+ export function GetMCLEditor(call: DG.FuncCall): void {
133
+ try {
134
+ const funcEditor = new MCLEditor();
135
+ ui.dialog('Markov clustering')
136
+ .add(funcEditor.getEditor())
137
+ .onOK(async () => {
138
+ const params = funcEditor.params;
139
+ return call.func.prepare({
140
+ df: params.table, cols: params.columns, metrics: params.distanceMetrics,
141
+ weights: params.weights, aggregationMethod: params.aggreaggregationMethod, preprocessingFuncs: params.preprocessingFunctions,
142
+ preprocessingFuncArgs: params.preprocessingFuncArgs, threshold: params.threshold, maxIterations: params.maxIterations,
143
+ }).call(true);
144
+ }).show();
145
+ } catch (err: any) {
146
+ const errMsg = err instanceof Error ? err.message : err.toString();
147
+ const errStack = err instanceof Error ? err.stack : undefined;
148
+ grok.shell.error(`Get region editor error: ${errMsg}`);
149
+ _package.logger.error(errMsg, undefined, errStack);
150
+ }
151
+ }
152
+
153
+
154
+ //top-menu: ML | Cluster | MCL...
155
+ //name: MCL
156
+ //description: Markov clustering (MCL) is an unsupervised clustering algorithm for graphs based on simulation of stochastic flow.
157
+ //input: dataframe df
158
+ //input: list<column> cols
159
+ //input: list<string> metrics
160
+ //input: list<double> weights
161
+ //input: string aggregationMethod
162
+ //input: list<func> preprocessingFuncs
163
+ //input: object preprocessingFuncArgs
164
+ //input: int threshold = 80
165
+ //input: int maxIterations = 10
166
+ //editor: EDA: GetMCLEditor
167
+ export async function MCL(df: DG.DataFrame, cols: DG.Column[], metrics: KnownMetrics[],
168
+ weights: number[], aggregationMethod: DistanceAggregationMethod, preprocessingFuncs: (DG.Func | null | undefined)[],
169
+ preprocessingFuncArgs: any[], threshold: number = 80, maxIterations: number = 10) {
170
+ return await markovCluster(df, cols, metrics, weights,
171
+ aggregationMethod, preprocessingFuncs, preprocessingFuncArgs, threshold, maxIterations);
172
+ }
173
+
118
174
  //top-menu: ML | Analyze | Multivariate Analysis...
119
175
  //name: Multivariate Analysis (PLS)
120
176
  //description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
@@ -349,6 +405,6 @@ export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column
349
405
  //top-menu: ML | Missing Values Imputation ...
350
406
  //name: KNN impute
351
407
  //desription: Missing values imputation using the k-nearest neighbors method
352
- export function kNNImputation() {
353
- runKNNImputer();
408
+ export function kNNImputation() {
409
+ runKNNImputer();
354
410
  }
@@ -0,0 +1,99 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+ import {_package} from '../package-test';
5
+
6
+ // tests for dimensionality reduction
7
+
8
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
9
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
10
+ import {KnownMetrics, NumberMetricsNames, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
11
+ import {multiColReduceDimensionality}
12
+ from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
13
+
14
+ const DEMOG_COLNAMES = {
15
+ SUBJ: 'subj',
16
+ STUDY: 'study',
17
+ SITE: 'site',
18
+ AGE: 'age',
19
+ SEX: 'sex',
20
+ RACE: 'race',
21
+ DISEASE: 'disease',
22
+ WEIGHT: 'weight',
23
+ HEIGHT: 'height',
24
+ } as const;
25
+ category('Dimensionality reduction: UMAP', () => {
26
+ test('Numeric column', async () => {
27
+ await testDimensionalityReductionUI(
28
+ [DEMOG_COLNAMES.AGE], DimReductionMethods.UMAP, [NumberMetricsNames.Difference]);
29
+ }, {timeout: 30000});
30
+
31
+ test('String column', async () => {
32
+ await testDimensionalityReductionUI(
33
+ [DEMOG_COLNAMES.SEX], DimReductionMethods.UMAP, [StringMetricsNames.Onehot]);
34
+ }, {timeout: 30000});
35
+
36
+ test('Numeric and string columns', async () => {
37
+ await testDimensionalityReductionUI(
38
+ [DEMOG_COLNAMES.SEX, DEMOG_COLNAMES.AGE], DimReductionMethods.UMAP,
39
+ [StringMetricsNames.Onehot, NumberMetricsNames.Difference]);
40
+ });
41
+
42
+ test('All demog columns', async () => {
43
+ const allDemogCols = grok.data.demo.demog(10).columns.toList()
44
+ .filter((col) => Object.values(DEMOG_COLNAMES).includes(col.name as any)); ;
45
+ const distFuncs = allDemogCols.map((col) => col.type === DG.COLUMN_TYPE.STRING ?
46
+ StringMetricsNames.Onehot : NumberMetricsNames.Difference);
47
+ const colNames = allDemogCols.map((col) => col.name);
48
+ await testDimensionalityReductionUI( colNames, DimReductionMethods.UMAP, distFuncs);
49
+ });
50
+ });
51
+
52
+ category('Dimensionality reduction: T-SNE', () => {
53
+ test('Numeric column', async () => {
54
+ await testDimensionalityReductionUI(
55
+ [DEMOG_COLNAMES.AGE], DimReductionMethods.T_SNE, [NumberMetricsNames.Difference]);
56
+ }, {timeout: 30000});
57
+
58
+ test('String column', async () => {
59
+ await testDimensionalityReductionUI(
60
+ [DEMOG_COLNAMES.SEX], DimReductionMethods.T_SNE, [StringMetricsNames.Onehot]);
61
+ }, {timeout: 30000});
62
+
63
+ test('Numeric and string columns', async () => {
64
+ await testDimensionalityReductionUI(
65
+ [DEMOG_COLNAMES.SEX, DEMOG_COLNAMES.AGE], DimReductionMethods.T_SNE,
66
+ [StringMetricsNames.Onehot, NumberMetricsNames.Difference]);
67
+ });
68
+
69
+ test('All demog columns', async () => {
70
+ const allDemogCols = grok.data.demo.demog(10).columns.toList()
71
+ .filter((col) => Object.values(DEMOG_COLNAMES).includes(col.name as any));
72
+ const distFuncs = allDemogCols.map((col) => col.type === DG.COLUMN_TYPE.STRING ?
73
+ StringMetricsNames.Onehot : NumberMetricsNames.Difference);
74
+ const colNames = allDemogCols.map((col) => col.name);
75
+ await testDimensionalityReductionUI(colNames, DimReductionMethods.T_SNE, distFuncs);
76
+ });
77
+ });
78
+
79
+ async function testDimensionalityReductionUI(
80
+ columns: string[], methodName: DimReductionMethods, metrics: KnownMetrics[],
81
+ ) {
82
+ const df = grok.data.demo.demog(100);
83
+ const _tv = grok.shell.addTableView(df);
84
+ const dimRedResult = await multiColReduceDimensionality(
85
+ df, columns.map((c) => df.col(c)!), methodName, metrics,
86
+ columns.map(() => 1), columns.map(() => undefined),
87
+ 'EUCLIDEAN', true, true, {preprocessingFuncArgs: columns.map(() => ({}))});
88
+ expect(!!dimRedResult, true, 'No scatterplot returned');
89
+ const addedEmbeddingsCols = df.columns.names().filter((c) => c.toLowerCase().startsWith('embed'));
90
+ expect(addedEmbeddingsCols.length, 2, 'Wrong number of embeddings added');
91
+ const clusterColName = df.columns.names().find((c) => c.toLowerCase().startsWith('cluster'));
92
+ expect(!!clusterColName, true, 'No cluster column added');
93
+ for (const embedColName of addedEmbeddingsCols) {
94
+ const c = df.col(embedColName)!;
95
+ expect(new Array(c.length).fill(null).every((_, i) => !c.isNone(i) && !isNaN(c.get(i))), true,
96
+ 'Embedding column has null-ish values');
97
+ }
98
+ await new Promise((resolve) => setTimeout(resolve, 500));
99
+ }
package/dist/100.js DELETED
@@ -1,2 +0,0 @@
1
- var eda;(()=>{"use strict";var e,r,t={7100:(e,r,t)=>{var a;!function(e){e.EUCLIDEAN="EUCLIDEAN",e.MANHATTAN="MANHATTAN"}(a||(a={}));const n=e=>null==e;var o=t(6361),i=t(9657);onmessage=e=>{const{values:r,fnNames:t,startRow:s,startCol:c,chunckSize:l,opts:u,weights:p,aggregationMethod:f}=e.data,h={};try{let e=s,g=c,v=0,d=0,m=Number.MIN_VALUE;const w=function(e,r){return e===a.MANHATTAN?e=>e.reduce(((e,t,a)=>e+t*r[a]),0):e=>{const t=e.reduce(((e,t,a)=>e+(t*r[a])**2),0);return Math.sqrt(t)}}(f,p);r.forEach(((e,a)=>{if((0,o.sQ)(t[a]))for(let t=0;t<e.length;++t)n(e[t])||(r[a][t]=new i.Z(r[a][t]._data,r[a][t]._length))}));const b=new Array(t.length).fill(null).map(((e,r)=>new o.UH(t[r]).getMeasure(u[r]))),A=new Float32Array(l);for(;v<l;){const t=b.map(((t,a)=>n(r[a][e])||n(r[a][g])?1:t(r[a][e],r[a][g]))),a=1===t.length?t[0]:w(t);A[v]=a,a<d&&(d=a),a>m&&(m=a),v++,g++,g===r[0].length&&(e++,g=e+1)}h.distanceMatrixData=A,h.min=d,h.max=m}catch(e){h.error=e}postMessage(h)}}},a={};function n(e){var r=a[e];if(void 0!==r)return r.exports;var o=a[e]={exports:{}};return t[e](o,o.exports,n),o.exports}n.m=t,n.x=()=>{var e=n.O(void 0,[483,361],(()=>n(7100)));return n.O(e)},e=[],n.O=(r,t,a,o)=>{if(!t){var i=1/0;for(u=0;u<e.length;u++){for(var[t,a,o]=e[u],s=!0,c=0;c<t.length;c++)(!1&o||i>=o)&&Object.keys(n.O).every((e=>n.O[e](t[c])))?t.splice(c--,1):(s=!1,o<i&&(i=o));if(s){e.splice(u--,1);var l=a();void 0!==l&&(r=l)}}return r}o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[t,a,o]},n.d=(e,r)=>{for(var t in r)n.o(r,t)&&!n.o(e,t)&&Object.defineProperty(e,t,{enumerable:!0,get:r[t]})},n.f={},n.e=e=>Promise.all(Object.keys(n.f).reduce(((r,t)=>(n.f[t](e,r),r)),[])),n.u=e=>e+".js",n.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),n.o=(e,r)=>Object.prototype.hasOwnProperty.call(e,r),(()=>{var e;n.g.importScripts&&(e=n.g.location+"");var r=n.g.document;if(!e&&r&&(r.currentScript&&(e=r.currentScript.src),!e)){var t=r.getElementsByTagName("script");if(t.length)for(var a=t.length-1;a>-1&&!e;)e=t[a--].src}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),n.p=e})(),(()=>{var e={100:1};n.f.i=(r,t)=>{e[r]||importScripts(n.p+n.u(r))};var r=self.webpackChunkeda=self.webpackChunkeda||[],t=r.push.bind(r);r.push=r=>{var[a,o,i]=r;for(var s in o)n.o(o,s)&&(n.m[s]=o[s]);for(i&&i(n);a.length;)e[a.pop()]=1;t(r)}})(),r=n.x,n.x=()=>Promise.all([n.e(483),n.e(361)]).then(r);var o=n.x();eda=o})();
2
- //# sourceMappingURL=data:application/json;charset=utf-8;base64,
package/dist/118.js DELETED
@@ -1,2 +0,0 @@
1
- var eda;(()=>{"use strict";var e,r,t={7118:(e,r,t)=>{var o,a;(a=o||(o={})).EUCLIDEAN="EUCLIDEAN",a.MANHATTAN="MANHATTAN";const n=e=>null==e;var s=t(6361),l=t(9657);onmessage=async e=>{const{values:r,startIdx:t,endIdx:o,sampleLength:a,fnName:i,opts:c}=e.data;try{let e=new Float32Array(a);const p=o-t;if((0,s.sQ)(i))for(let e=0;e<r.length;++e)n(r[e])||(r[e]=new l.Z(r[e]._data,r[e]._length));let f=0;const h=Math.floor(p/a),u=new s.UH(i).getMeasure(c),g=r.length-2-Math.floor(Math.sqrt(-8*t+4*r.length*(r.length-1)-7)/2-.5),v=t-r.length*g+Math.floor((g+1)*(g+2)/2);let d=g,b=v,m=0;for(;f<p&&m<a;){const t=n(r[d])||n(r[b])?1:u(r[d],r[b]);for(e[m]=t,m++,f+=h,b+=h;b>=r.length&&f<p;)d++,b=d+1+(b-r.length)}m<a&&(e=e.slice(0,m)),postMessage({distance:e})}catch(e){postMessage({error:e})}}}},o={};function a(e){var r=o[e];if(void 0!==r)return r.exports;var n=o[e]={exports:{}};return t[e](n,n.exports,a),n.exports}a.m=t,a.x=()=>{var e=a.O(void 0,[483,361],(()=>a(7118)));return a.O(e)},e=[],a.O=(r,t,o,n)=>{if(!t){var s=1/0;for(p=0;p<e.length;p++){for(var[t,o,n]=e[p],l=!0,i=0;i<t.length;i++)(!1&n||s>=n)&&Object.keys(a.O).every((e=>a.O[e](t[i])))?t.splice(i--,1):(l=!1,n<s&&(s=n));if(l){e.splice(p--,1);var c=o();void 0!==c&&(r=c)}}return r}n=n||0;for(var p=e.length;p>0&&e[p-1][2]>n;p--)e[p]=e[p-1];e[p]=[t,o,n]},a.d=(e,r)=>{for(var t in r)a.o(r,t)&&!a.o(e,t)&&Object.defineProperty(e,t,{enumerable:!0,get:r[t]})},a.f={},a.e=e=>Promise.all(Object.keys(a.f).reduce(((r,t)=>(a.f[t](e,r),r)),[])),a.u=e=>e+".js",a.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),a.o=(e,r)=>Object.prototype.hasOwnProperty.call(e,r),(()=>{var e;a.g.importScripts&&(e=a.g.location+"");var r=a.g.document;if(!e&&r&&(r.currentScript&&(e=r.currentScript.src),!e)){var t=r.getElementsByTagName("script");if(t.length)for(var o=t.length-1;o>-1&&!e;)e=t[o--].src}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),a.p=e})(),(()=>{var e={118:1};a.f.i=(r,t)=>{e[r]||importScripts(a.p+a.u(r))};var r=self.webpackChunkeda=self.webpackChunkeda||[],t=r.push.bind(r);r.push=r=>{var[o,n,s]=r;for(var l in n)a.o(n,l)&&(a.m[l]=n[l]);for(s&&s(a);o.length;)e[o.pop()]=1;t(r)}})(),r=a.x,a.x=()=>Promise.all([a.e(483),a.e(361)]).then(r);var n=a.x();eda=n})();
2
- //# sourceMappingURL=data:application/json;charset=utf-8;base64,