@datagrok/eda 1.4.11 → 1.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,11 +1,12 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.4.11",
4
+ "version": "1.4.12",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
7
  "@datagrok-libraries/math": "^1.2.6",
8
8
  "@datagrok-libraries/ml": "^6.10.8",
9
+ "@datagrok-libraries/statistics": "^1.10.0",
9
10
  "@datagrok-libraries/tutorials": "^1.7.4",
10
11
  "@datagrok-libraries/utils": "^4.6.5",
11
12
  "@keckelt/tsne": "^1.0.2",
@@ -14,6 +15,7 @@
14
15
  "datagrok-api": "^1.26.3",
15
16
  "dayjs": "^1.11.9",
16
17
  "jstat": "^1.9.6",
18
+ "mathjs": "^15.1.0",
17
19
  "source-map-loader": "^4.0.1",
18
20
  "umap-js": "^1.3.3",
19
21
  "worker-loader": "^3.0.8",
@@ -1,4 +1,4 @@
1
- // Analysis of Variances (ANOVA): computations
1
+ // Analysis of Variances (ANOVA) - computations
2
2
 
3
3
  /* REFERENCES
4
4
 
@@ -1,4 +1,4 @@
1
- // Analysis of Variances (ANOVA): UI
1
+ // Analysis of Variances (ANOVA) - UI
2
2
 
3
3
  import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
@@ -274,4 +274,18 @@ export namespace funcs {
274
274
  export async function paretoFrontViewer(): Promise<any> {
275
275
  return await grok.functions.call('EDA:ParetoFrontViewer', {});
276
276
  }
277
+
278
+ /**
279
+ Train probabilistic multi-parameter optimization (pMPO) model
280
+ */
281
+ export async function trainPmpo(): Promise<void> {
282
+ return await grok.functions.call('EDA:TrainPmpo', {});
283
+ }
284
+
285
+ /**
286
+ Apply trained probabilistic multi-parameter optimization (pMPO) model to score samples
287
+ */
288
+ export async function applyPmpo(table: DG.DataFrame , file: DG.FileInfo ): Promise<void> {
289
+ return await grok.functions.call('EDA:ApplyPmpo', { table, file });
290
+ }
277
291
  }
package/src/package.g.ts CHANGED
@@ -37,34 +37,34 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
37
37
  }
38
38
 
39
39
  //name: DBSCAN clustering
40
- //tags: dim-red-postprocessing-function
41
40
  //input: column col1
42
41
  //input: column col2
43
42
  //input: double epsilon = 0.01 { description: Minimum distance between two points to be considered as in the same neighborhood. }
44
43
  //input: int minimumPoints = 5 { description: Minimum number of points to form a dense region. }
45
44
  //meta.defaultPostProcessingFunction: true
45
+ //meta.role: dimRedPostprocessingFunction
46
46
  export async function dbscanPostProcessingFunction(col1: DG.Column, col2: DG.Column, epsilon: number, minimumPoints: number) : Promise<void> {
47
47
  await PackageFunctions.dbscanPostProcessingFunction(col1, col2, epsilon, minimumPoints);
48
48
  }
49
49
 
50
50
  //name: None (number)
51
- //tags: dim-red-preprocessing-function
52
51
  //input: column col
53
52
  //input: string _metric { optional: true }
54
53
  //output: object result
55
54
  //meta.supportedTypes: int,float,double,qnum
56
55
  //meta.supportedDistanceFunctions: Difference
56
+ //meta.role: dimRedPreprocessingFunction
57
57
  export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
58
58
  return PackageFunctions.numberPreprocessingFunction(col, _metric);
59
59
  }
60
60
 
61
61
  //name: None (string)
62
- //tags: dim-red-preprocessing-function
63
62
  //input: column col
64
63
  //input: string _metric { optional: true }
65
64
  //output: object result
66
65
  //meta.supportedTypes: string
67
66
  //meta.supportedDistanceFunctions: One-Hot,Levenshtein,Hamming
67
+ //meta.role: dimRedPreprocessingFunction
68
68
  export function stringPreprocessingFunction(col: DG.Column, _metric: string) {
69
69
  return PackageFunctions.stringPreprocessingFunction(col, _metric);
70
70
  }
@@ -102,9 +102,9 @@ export async function MCLClustering(df: DG.DataFrame, cols: DG.Column[], metrics
102
102
 
103
103
  //name: MCL
104
104
  //description: Markov clustering viewer
105
- //tags: viewer
106
105
  //output: viewer result
107
106
  //meta.showInGallery: false
107
+ //meta.role: viewer
108
108
  export function markovClusteringViewer() : any {
109
109
  return PackageFunctions.markovClusteringViewer();
110
110
  }
@@ -532,9 +532,22 @@ export function paretoFront() : void {
532
532
 
533
533
  //name: Pareto front
534
534
  //description: Pareto front viewer
535
- //tags: viewer
536
535
  //output: viewer result
537
536
  //meta.icon: icons/pareto-front-viewer.svg
537
+ //meta.role: viewer
538
538
  export function paretoFrontViewer() : any {
539
539
  return PackageFunctions.paretoFrontViewer();
540
540
  }
541
+
542
+ //description: Train probabilistic multi-parameter optimization (pMPO) model
543
+ //top-menu: Chem | Calculate | Train pMPO...
544
+ export function trainPmpo() : void {
545
+ PackageFunctions.trainPmpo();
546
+ }
547
+
548
+ //description: Apply trained probabilistic multi-parameter optimization (pMPO) model to score samples
549
+ //input: dataframe table
550
+ //input: file file
551
+ export async function applyPmpo(table: DG.DataFrame, file: DG.FileInfo) : Promise<void> {
552
+ await PackageFunctions.applyPmpo(table, file);
553
+ }
package/src/package.ts CHANGED
@@ -38,6 +38,8 @@ import {initXgboost} from '../wasm/xgbooster';
38
38
  import {XGBooster} from './xgbooster';
39
39
  import {ParetoOptimizer} from './pareto-optimization/pareto-optimizer';
40
40
  import {ParetoFrontViewer} from './pareto-optimization/pareto-front-viewer';
41
+ import {Pmpo} from './probabilistic-scoring/prob-scoring';
42
+ import {loadPmpoParams} from './probabilistic-scoring/pmpo-utils';
41
43
 
42
44
  export const _package = new DG.Package();
43
45
  export * from './package.g';
@@ -113,12 +115,7 @@ export class PackageFunctions {
113
115
 
114
116
 
115
117
  @grok.decorators.func({
116
- 'meta': {
117
- 'defaultPostProcessingFunction': 'true',
118
- },
119
- 'tags': [
120
- 'dim-red-postprocessing-function',
121
- ],
118
+ 'meta': {'defaultPostProcessingFunction': 'true', role: 'dimRedPostprocessingFunction'},
122
119
  'name': 'DBSCAN clustering',
123
120
  })
124
121
  static async dbscanPostProcessingFunction(
@@ -148,8 +145,8 @@ export class PackageFunctions {
148
145
  'meta': {
149
146
  'supportedTypes': 'int,float,double,qnum',
150
147
  'supportedDistanceFunctions': 'Difference',
148
+ 'role': 'dimRedPreprocessingFunction'
151
149
  },
152
- 'tags': ['dim-red-preprocessing-function'],
153
150
  'name': 'None (number)',
154
151
  'outputs': [{name: 'result', type: 'object'}],
155
152
  })
@@ -166,8 +163,8 @@ export class PackageFunctions {
166
163
  'meta': {
167
164
  'supportedTypes': 'string',
168
165
  'supportedDistanceFunctions': 'One-Hot,Levenshtein,Hamming',
166
+ 'role': 'dimRedPreprocessingFunction'
169
167
  },
170
- 'tags': ['dim-red-preprocessing-function'],
171
168
  'name': 'None (string)',
172
169
  'outputs': [{name: 'result', type: 'object'}],
173
170
  })
@@ -289,10 +286,7 @@ export class PackageFunctions {
289
286
 
290
287
  @grok.decorators.func({
291
288
  'outputs': [{'name': 'result', 'type': 'viewer'}],
292
- 'tags': [
293
- 'viewer',
294
- ],
295
- 'meta': {showInGallery: 'false'},
289
+ 'meta': {showInGallery: 'false', role: 'viewer'},
296
290
  'name': 'MCL',
297
291
  'description': 'Markov clustering viewer',
298
292
  })
@@ -984,11 +978,48 @@ export class PackageFunctions {
984
978
  @grok.decorators.func({
985
979
  'name': 'Pareto front',
986
980
  'description': 'Pareto front viewer',
987
- 'tags': ['viewer'],
988
981
  'outputs': [{'name': 'result', 'type': 'viewer'}],
989
- 'meta': {'icon': 'icons/pareto-front-viewer.svg'},
982
+ 'meta': {'icon': 'icons/pareto-front-viewer.svg', role: 'viewer'},
990
983
  })
991
984
  static paretoFrontViewer(): DG.Viewer {
992
985
  return new ParetoFrontViewer();
993
986
  }
987
+
988
+ @grok.decorators.func({
989
+ 'top-menu': 'Chem | Calculate | Train pMPO...',
990
+ 'name': 'trainPmpo',
991
+ 'description': 'Train probabilistic multi-parameter optimization (pMPO) model',
992
+ })
993
+ static trainPmpo(): void {
994
+ const df = grok.shell.t;
995
+ if (df === null) {
996
+ grok.shell.warning('No dataframe is opened');
997
+ return;
998
+ }
999
+
1000
+ if (!Pmpo.isTableValid(df))
1001
+ return;
1002
+
1003
+ const pMPO = new Pmpo(df);
1004
+ pMPO.runTrainingApp();
1005
+ }
1006
+
1007
+ @grok.decorators.func({
1008
+ //'top-menu': 'ML | Apply pMPO...',
1009
+ 'name': 'applyPmpo',
1010
+ 'description': 'Apply trained probabilistic multi-parameter optimization (pMPO) model to score samples',
1011
+ })
1012
+ static async applyPmpo(
1013
+ @grok.decorators.param({'type': 'dataframe'}) table: DG.DataFrame,
1014
+ @grok.decorators.param({'type': 'file'}) file: DG.FileInfo,
1015
+ ): Promise<void> {
1016
+ try {
1017
+ const params = await loadPmpoParams(file);
1018
+ const predName = table.columns.getUnusedName('pMPO score');
1019
+ const prediction = Pmpo.predict(table, params, predName);
1020
+ table.columns.add(prediction, true);
1021
+ } catch (err) {
1022
+ grok.shell.warning(`Failed to apply pMPO: ${err instanceof Error ? err.message : 'the platform issue.'}`);
1023
+ }
1024
+ }
994
1025
  }
@@ -6,6 +6,7 @@ import {OPT_TYPE} from './defs';
6
6
 
7
7
  export const PALETTE = [DG.Color.darkGreen, DG.Color.yellow, DG.Color.darkRed];
8
8
 
9
+ /** Return output color palette w.r.t. the specified type of optimization */
9
10
  export function getOutputPalette(type: OPT_TYPE): number[] {
10
11
  if (type === OPT_TYPE.MIN)
11
12
  return [...PALETTE];
@@ -13,13 +14,14 @@ export function getOutputPalette(type: OPT_TYPE): number[] {
13
14
  return [...PALETTE].reverse();
14
15
  }
15
16
 
16
- export function getColorScaleDiv(type: OPT_TYPE): HTMLElement {
17
+ /** Return div with color scale description */
18
+ export function getColorScaleDiv(type: OPT_TYPE, useMinMax: boolean = true): HTMLElement {
17
19
  const scale = ui.label('Color scale:');
18
20
  scale.style.paddingRight = '7px';
19
21
  const elems = [scale];
20
- const minLbl = ui.label('min');
22
+ const minLbl = ui.label(useMinMax ? 'min' : 'worst');
21
23
  const midLbl = ui.label('. . .');
22
- const maxLbl = ui.label('max');
24
+ const maxLbl = ui.label(useMinMax ? 'max' : 'best');
23
25
  const palette = getOutputPalette(type);
24
26
 
25
27
  const colorElems = [minLbl, midLbl, maxLbl].map((el, idx) => {
@@ -36,4 +38,4 @@ export function getColorScaleDiv(type: OPT_TYPE): HTMLElement {
36
38
  elems.push(...colorElems);
37
39
 
38
40
  return ui.divH(elems);
39
- }
41
+ } // getColorScaleDiv
@@ -0,0 +1,108 @@
1
+ // Constants and type definitions for probabilistic scoring (pMPO)
2
+ // Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
3
+
4
+ /** Minimum number of samples required to compute pMPO */
5
+ export const MIN_SAMPLES_COUNT = 10;
6
+
7
+ export const PMPO_NON_APPLICABLE = 'pMPO is not applicable';
8
+ export const PMPO_COMPUTE_FAILED = 'Failed to compute pMPO parameters';
9
+
10
+ /** Basic statistics for desired and non-desired compounds */
11
+ export type BasicStats = {
12
+ desAvg: number,
13
+ desStd: number,
14
+ nonDesAvg: number,
15
+ nonDesStd: number,
16
+ };
17
+
18
+ /** Descriptor statistics including basic stats, t-statistics and p-value */
19
+ export type DescriptorStatistics = BasicStats & {
20
+ desLen: number,
21
+ nonSesLen: number,
22
+ tstat: number,
23
+ pValue: number,
24
+ };
25
+
26
+ /** Cutoff parameters for the basic functions of the pMPO model */
27
+ export type Cutoff = {
28
+ cutoff: number,
29
+ cutoffDesired: number,
30
+ cutoffNotDesired: number,
31
+ };
32
+
33
+ /** Generalized Sigmoid parameters for the desirability functions of the pMPO model */
34
+ export type SigmoidParams = {
35
+ pX0: number,
36
+ b: number,
37
+ c: number,
38
+ };
39
+
40
+ /** pMPO parameters including basic stats, cutoffs, sigmoid params, z-score, weight, intersections */
41
+ export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
42
+ zScore: number,
43
+ weight: number,
44
+ intersections: number[],
45
+ x0: number,
46
+ xBound: number,
47
+ };
48
+
49
+ export type CorrelationTriple = [string, string, number];
50
+
51
+ const DESIRED = 'desired';
52
+ const NON_DESIRED = 'non-desired';
53
+ const MEAN = 'Mean';
54
+ const STD = 'Std';
55
+ const T_STAT = 't-statistics';
56
+ export const P_VAL = 'p-value';
57
+ const MEAN_DES = `${MEAN}(${DESIRED})`;
58
+ const MEAN_NON_DES = `${MEAN}(${NON_DESIRED})`;
59
+ const STD_DES = `${STD}(${DESIRED})`;
60
+ const STD_NON_DES = `${STD}(${NON_DESIRED})`;
61
+
62
+ /** Map of statistic field names to their display titles */
63
+ export const STAT_TO_TITLE_MAP = new Map([
64
+ ['desAvg', MEAN_DES],
65
+ ['desStd', STD_DES],
66
+ ['nonDesAvg', MEAN_NON_DES],
67
+ ['nonDesStd', STD_NON_DES],
68
+ ['tstat', T_STAT],
69
+ ['pValue', P_VAL],
70
+ ]);
71
+
72
+ export const DESCR_TITLE = 'Descriptor';
73
+ export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
74
+ export const SELECTED_TITLE = 'Selected';
75
+ export const WEIGHT_TITLE = 'Weight';
76
+ export const SCORES_TITLE = 'pMPO score';
77
+ export const DESIRABILITY_COL_NAME = 'Desirability';
78
+
79
+ /** Minimum p-value threshold for filtering descriptors */
80
+ export const P_VAL_TRES_MIN = 0.01;
81
+
82
+ /** Minimum R-squared threshold for filtering correlated descriptors */
83
+ export const R2_MIN = 0.01;
84
+
85
+ /** Minimum q-cutoff for descriptors in the pMPO model */
86
+ export const Q_CUTOFF_MIN = 0.01;
87
+
88
+ /** Colors used for selected and skipped descriptors */
89
+ export enum COLORS {
90
+ SELECTED = 'rgb(26, 146, 26)',
91
+ SKIPPED = 'rgb(208, 57, 67)',
92
+ };
93
+
94
+ export const TINY = 1e-8;
95
+
96
+ /** Folder path for storing pMPO models */
97
+ export const FOLDER = 'System:AppData/Chem/mpo';
98
+
99
+ /** Desirability profile properties type */
100
+ export type DesirabilityProfileProperties = Record<string, {
101
+ line: [number, number][],
102
+ weight: number,
103
+ min?: number,
104
+ max?: number,
105
+ }>;
106
+
107
+ export const STAT_GRID_HEIGHT = 75;
108
+ export const DESIRABILITY_COLUMN_WIDTH = 305;