@datagrok/eda 1.4.11 → 1.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +8 -0
- package/css/pmpo.css +26 -0
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/eslintrc.json +46 -0
- package/files/drugs-props-test.csv +126 -0
- package/files/drugs-props-train.csv +664 -0
- package/files/mpo-done.ipynb +2123 -0
- package/package.json +3 -1
- package/src/anova/anova-tools.ts +1 -1
- package/src/anova/anova-ui.ts +1 -1
- package/src/package-api.ts +14 -0
- package/src/package.g.ts +18 -5
- package/src/package.ts +45 -14
- package/src/pareto-optimization/utils.ts +6 -4
- package/src/probabilistic-scoring/pmpo-defs.ts +108 -0
- package/src/probabilistic-scoring/pmpo-utils.ts +580 -0
- package/src/probabilistic-scoring/prob-scoring.ts +637 -0
- package/src/probabilistic-scoring/stat-tools.ts +168 -0
- package/src/softmax-classifier.ts +1 -1
- package/test-console-output-1.log +77 -47
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/eda",
|
|
3
3
|
"friendlyName": "EDA",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.12",
|
|
5
5
|
"description": "Exploratory Data Analysis Tools",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@datagrok-libraries/math": "^1.2.6",
|
|
8
8
|
"@datagrok-libraries/ml": "^6.10.8",
|
|
9
|
+
"@datagrok-libraries/statistics": "^1.10.0",
|
|
9
10
|
"@datagrok-libraries/tutorials": "^1.7.4",
|
|
10
11
|
"@datagrok-libraries/utils": "^4.6.5",
|
|
11
12
|
"@keckelt/tsne": "^1.0.2",
|
|
@@ -14,6 +15,7 @@
|
|
|
14
15
|
"datagrok-api": "^1.26.3",
|
|
15
16
|
"dayjs": "^1.11.9",
|
|
16
17
|
"jstat": "^1.9.6",
|
|
18
|
+
"mathjs": "^15.1.0",
|
|
17
19
|
"source-map-loader": "^4.0.1",
|
|
18
20
|
"umap-js": "^1.3.3",
|
|
19
21
|
"worker-loader": "^3.0.8",
|
package/src/anova/anova-tools.ts
CHANGED
package/src/anova/anova-ui.ts
CHANGED
package/src/package-api.ts
CHANGED
|
@@ -274,4 +274,18 @@ export namespace funcs {
|
|
|
274
274
|
export async function paretoFrontViewer(): Promise<any> {
|
|
275
275
|
return await grok.functions.call('EDA:ParetoFrontViewer', {});
|
|
276
276
|
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
Train probabilistic multi-parameter optimization (pMPO) model
|
|
280
|
+
*/
|
|
281
|
+
export async function trainPmpo(): Promise<void> {
|
|
282
|
+
return await grok.functions.call('EDA:TrainPmpo', {});
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
Apply trained probabilistic multi-parameter optimization (pMPO) model to score samples
|
|
287
|
+
*/
|
|
288
|
+
export async function applyPmpo(table: DG.DataFrame , file: DG.FileInfo ): Promise<void> {
|
|
289
|
+
return await grok.functions.call('EDA:ApplyPmpo', { table, file });
|
|
290
|
+
}
|
|
277
291
|
}
|
package/src/package.g.ts
CHANGED
|
@@ -37,34 +37,34 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
//name: DBSCAN clustering
|
|
40
|
-
//tags: dim-red-postprocessing-function
|
|
41
40
|
//input: column col1
|
|
42
41
|
//input: column col2
|
|
43
42
|
//input: double epsilon = 0.01 { description: Minimum distance between two points to be considered as in the same neighborhood. }
|
|
44
43
|
//input: int minimumPoints = 5 { description: Minimum number of points to form a dense region. }
|
|
45
44
|
//meta.defaultPostProcessingFunction: true
|
|
45
|
+
//meta.role: dimRedPostprocessingFunction
|
|
46
46
|
export async function dbscanPostProcessingFunction(col1: DG.Column, col2: DG.Column, epsilon: number, minimumPoints: number) : Promise<void> {
|
|
47
47
|
await PackageFunctions.dbscanPostProcessingFunction(col1, col2, epsilon, minimumPoints);
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
//name: None (number)
|
|
51
|
-
//tags: dim-red-preprocessing-function
|
|
52
51
|
//input: column col
|
|
53
52
|
//input: string _metric { optional: true }
|
|
54
53
|
//output: object result
|
|
55
54
|
//meta.supportedTypes: int,float,double,qnum
|
|
56
55
|
//meta.supportedDistanceFunctions: Difference
|
|
56
|
+
//meta.role: dimRedPreprocessingFunction
|
|
57
57
|
export function numberPreprocessingFunction(col: DG.Column, _metric: string) {
|
|
58
58
|
return PackageFunctions.numberPreprocessingFunction(col, _metric);
|
|
59
59
|
}
|
|
60
60
|
|
|
61
61
|
//name: None (string)
|
|
62
|
-
//tags: dim-red-preprocessing-function
|
|
63
62
|
//input: column col
|
|
64
63
|
//input: string _metric { optional: true }
|
|
65
64
|
//output: object result
|
|
66
65
|
//meta.supportedTypes: string
|
|
67
66
|
//meta.supportedDistanceFunctions: One-Hot,Levenshtein,Hamming
|
|
67
|
+
//meta.role: dimRedPreprocessingFunction
|
|
68
68
|
export function stringPreprocessingFunction(col: DG.Column, _metric: string) {
|
|
69
69
|
return PackageFunctions.stringPreprocessingFunction(col, _metric);
|
|
70
70
|
}
|
|
@@ -102,9 +102,9 @@ export async function MCLClustering(df: DG.DataFrame, cols: DG.Column[], metrics
|
|
|
102
102
|
|
|
103
103
|
//name: MCL
|
|
104
104
|
//description: Markov clustering viewer
|
|
105
|
-
//tags: viewer
|
|
106
105
|
//output: viewer result
|
|
107
106
|
//meta.showInGallery: false
|
|
107
|
+
//meta.role: viewer
|
|
108
108
|
export function markovClusteringViewer() : any {
|
|
109
109
|
return PackageFunctions.markovClusteringViewer();
|
|
110
110
|
}
|
|
@@ -532,9 +532,22 @@ export function paretoFront() : void {
|
|
|
532
532
|
|
|
533
533
|
//name: Pareto front
|
|
534
534
|
//description: Pareto front viewer
|
|
535
|
-
//tags: viewer
|
|
536
535
|
//output: viewer result
|
|
537
536
|
//meta.icon: icons/pareto-front-viewer.svg
|
|
537
|
+
//meta.role: viewer
|
|
538
538
|
export function paretoFrontViewer() : any {
|
|
539
539
|
return PackageFunctions.paretoFrontViewer();
|
|
540
540
|
}
|
|
541
|
+
|
|
542
|
+
//description: Train probabilistic multi-parameter optimization (pMPO) model
|
|
543
|
+
//top-menu: Chem | Calculate | Train pMPO...
|
|
544
|
+
export function trainPmpo() : void {
|
|
545
|
+
PackageFunctions.trainPmpo();
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
//description: Apply trained probabilistic multi-parameter optimization (pMPO) model to score samples
|
|
549
|
+
//input: dataframe table
|
|
550
|
+
//input: file file
|
|
551
|
+
export async function applyPmpo(table: DG.DataFrame, file: DG.FileInfo) : Promise<void> {
|
|
552
|
+
await PackageFunctions.applyPmpo(table, file);
|
|
553
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -38,6 +38,8 @@ import {initXgboost} from '../wasm/xgbooster';
|
|
|
38
38
|
import {XGBooster} from './xgbooster';
|
|
39
39
|
import {ParetoOptimizer} from './pareto-optimization/pareto-optimizer';
|
|
40
40
|
import {ParetoFrontViewer} from './pareto-optimization/pareto-front-viewer';
|
|
41
|
+
import {Pmpo} from './probabilistic-scoring/prob-scoring';
|
|
42
|
+
import {loadPmpoParams} from './probabilistic-scoring/pmpo-utils';
|
|
41
43
|
|
|
42
44
|
export const _package = new DG.Package();
|
|
43
45
|
export * from './package.g';
|
|
@@ -113,12 +115,7 @@ export class PackageFunctions {
|
|
|
113
115
|
|
|
114
116
|
|
|
115
117
|
@grok.decorators.func({
|
|
116
|
-
'meta': {
|
|
117
|
-
'defaultPostProcessingFunction': 'true',
|
|
118
|
-
},
|
|
119
|
-
'tags': [
|
|
120
|
-
'dim-red-postprocessing-function',
|
|
121
|
-
],
|
|
118
|
+
'meta': {'defaultPostProcessingFunction': 'true', role: 'dimRedPostprocessingFunction'},
|
|
122
119
|
'name': 'DBSCAN clustering',
|
|
123
120
|
})
|
|
124
121
|
static async dbscanPostProcessingFunction(
|
|
@@ -148,8 +145,8 @@ export class PackageFunctions {
|
|
|
148
145
|
'meta': {
|
|
149
146
|
'supportedTypes': 'int,float,double,qnum',
|
|
150
147
|
'supportedDistanceFunctions': 'Difference',
|
|
148
|
+
'role': 'dimRedPreprocessingFunction'
|
|
151
149
|
},
|
|
152
|
-
'tags': ['dim-red-preprocessing-function'],
|
|
153
150
|
'name': 'None (number)',
|
|
154
151
|
'outputs': [{name: 'result', type: 'object'}],
|
|
155
152
|
})
|
|
@@ -166,8 +163,8 @@ export class PackageFunctions {
|
|
|
166
163
|
'meta': {
|
|
167
164
|
'supportedTypes': 'string',
|
|
168
165
|
'supportedDistanceFunctions': 'One-Hot,Levenshtein,Hamming',
|
|
166
|
+
'role': 'dimRedPreprocessingFunction'
|
|
169
167
|
},
|
|
170
|
-
'tags': ['dim-red-preprocessing-function'],
|
|
171
168
|
'name': 'None (string)',
|
|
172
169
|
'outputs': [{name: 'result', type: 'object'}],
|
|
173
170
|
})
|
|
@@ -289,10 +286,7 @@ export class PackageFunctions {
|
|
|
289
286
|
|
|
290
287
|
@grok.decorators.func({
|
|
291
288
|
'outputs': [{'name': 'result', 'type': 'viewer'}],
|
|
292
|
-
'
|
|
293
|
-
'viewer',
|
|
294
|
-
],
|
|
295
|
-
'meta': {showInGallery: 'false'},
|
|
289
|
+
'meta': {showInGallery: 'false', role: 'viewer'},
|
|
296
290
|
'name': 'MCL',
|
|
297
291
|
'description': 'Markov clustering viewer',
|
|
298
292
|
})
|
|
@@ -984,11 +978,48 @@ export class PackageFunctions {
|
|
|
984
978
|
@grok.decorators.func({
|
|
985
979
|
'name': 'Pareto front',
|
|
986
980
|
'description': 'Pareto front viewer',
|
|
987
|
-
'tags': ['viewer'],
|
|
988
981
|
'outputs': [{'name': 'result', 'type': 'viewer'}],
|
|
989
|
-
'meta': {'icon': 'icons/pareto-front-viewer.svg'},
|
|
982
|
+
'meta': {'icon': 'icons/pareto-front-viewer.svg', role: 'viewer'},
|
|
990
983
|
})
|
|
991
984
|
static paretoFrontViewer(): DG.Viewer {
|
|
992
985
|
return new ParetoFrontViewer();
|
|
993
986
|
}
|
|
987
|
+
|
|
988
|
+
@grok.decorators.func({
|
|
989
|
+
'top-menu': 'Chem | Calculate | Train pMPO...',
|
|
990
|
+
'name': 'trainPmpo',
|
|
991
|
+
'description': 'Train probabilistic multi-parameter optimization (pMPO) model',
|
|
992
|
+
})
|
|
993
|
+
static trainPmpo(): void {
|
|
994
|
+
const df = grok.shell.t;
|
|
995
|
+
if (df === null) {
|
|
996
|
+
grok.shell.warning('No dataframe is opened');
|
|
997
|
+
return;
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
if (!Pmpo.isTableValid(df))
|
|
1001
|
+
return;
|
|
1002
|
+
|
|
1003
|
+
const pMPO = new Pmpo(df);
|
|
1004
|
+
pMPO.runTrainingApp();
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
@grok.decorators.func({
|
|
1008
|
+
//'top-menu': 'ML | Apply pMPO...',
|
|
1009
|
+
'name': 'applyPmpo',
|
|
1010
|
+
'description': 'Apply trained probabilistic multi-parameter optimization (pMPO) model to score samples',
|
|
1011
|
+
})
|
|
1012
|
+
static async applyPmpo(
|
|
1013
|
+
@grok.decorators.param({'type': 'dataframe'}) table: DG.DataFrame,
|
|
1014
|
+
@grok.decorators.param({'type': 'file'}) file: DG.FileInfo,
|
|
1015
|
+
): Promise<void> {
|
|
1016
|
+
try {
|
|
1017
|
+
const params = await loadPmpoParams(file);
|
|
1018
|
+
const predName = table.columns.getUnusedName('pMPO score');
|
|
1019
|
+
const prediction = Pmpo.predict(table, params, predName);
|
|
1020
|
+
table.columns.add(prediction, true);
|
|
1021
|
+
} catch (err) {
|
|
1022
|
+
grok.shell.warning(`Failed to apply pMPO: ${err instanceof Error ? err.message : 'the platform issue.'}`);
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
994
1025
|
}
|
|
@@ -6,6 +6,7 @@ import {OPT_TYPE} from './defs';
|
|
|
6
6
|
|
|
7
7
|
export const PALETTE = [DG.Color.darkGreen, DG.Color.yellow, DG.Color.darkRed];
|
|
8
8
|
|
|
9
|
+
/** Return output color palette w.r.t. the specified type of optimization */
|
|
9
10
|
export function getOutputPalette(type: OPT_TYPE): number[] {
|
|
10
11
|
if (type === OPT_TYPE.MIN)
|
|
11
12
|
return [...PALETTE];
|
|
@@ -13,13 +14,14 @@ export function getOutputPalette(type: OPT_TYPE): number[] {
|
|
|
13
14
|
return [...PALETTE].reverse();
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
/** Return div with color scale description */
|
|
18
|
+
export function getColorScaleDiv(type: OPT_TYPE, useMinMax: boolean = true): HTMLElement {
|
|
17
19
|
const scale = ui.label('Color scale:');
|
|
18
20
|
scale.style.paddingRight = '7px';
|
|
19
21
|
const elems = [scale];
|
|
20
|
-
const minLbl = ui.label('min');
|
|
22
|
+
const minLbl = ui.label(useMinMax ? 'min' : 'worst');
|
|
21
23
|
const midLbl = ui.label('. . .');
|
|
22
|
-
const maxLbl = ui.label('max');
|
|
24
|
+
const maxLbl = ui.label(useMinMax ? 'max' : 'best');
|
|
23
25
|
const palette = getOutputPalette(type);
|
|
24
26
|
|
|
25
27
|
const colorElems = [minLbl, midLbl, maxLbl].map((el, idx) => {
|
|
@@ -36,4 +38,4 @@ export function getColorScaleDiv(type: OPT_TYPE): HTMLElement {
|
|
|
36
38
|
elems.push(...colorElems);
|
|
37
39
|
|
|
38
40
|
return ui.divH(elems);
|
|
39
|
-
}
|
|
41
|
+
} // getColorScaleDiv
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// Constants and type definitions for probabilistic scoring (pMPO)
|
|
2
|
+
// Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
|
|
3
|
+
|
|
4
|
+
/** Minimum number of samples required to compute pMPO */
|
|
5
|
+
export const MIN_SAMPLES_COUNT = 10;
|
|
6
|
+
|
|
7
|
+
export const PMPO_NON_APPLICABLE = 'pMPO is not applicable';
|
|
8
|
+
export const PMPO_COMPUTE_FAILED = 'Failed to compute pMPO parameters';
|
|
9
|
+
|
|
10
|
+
/** Basic statistics for desired and non-desired compounds */
|
|
11
|
+
export type BasicStats = {
|
|
12
|
+
desAvg: number,
|
|
13
|
+
desStd: number,
|
|
14
|
+
nonDesAvg: number,
|
|
15
|
+
nonDesStd: number,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
/** Descriptor statistics including basic stats, t-statistics and p-value */
|
|
19
|
+
export type DescriptorStatistics = BasicStats & {
|
|
20
|
+
desLen: number,
|
|
21
|
+
nonSesLen: number,
|
|
22
|
+
tstat: number,
|
|
23
|
+
pValue: number,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/** Cutoff parameters for the basic functions of the pMPO model */
|
|
27
|
+
export type Cutoff = {
|
|
28
|
+
cutoff: number,
|
|
29
|
+
cutoffDesired: number,
|
|
30
|
+
cutoffNotDesired: number,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
/** Generalized Sigmoid parameters for the desirability functions of the pMPO model */
|
|
34
|
+
export type SigmoidParams = {
|
|
35
|
+
pX0: number,
|
|
36
|
+
b: number,
|
|
37
|
+
c: number,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/** pMPO parameters including basic stats, cutoffs, sigmoid params, z-score, weight, intersections */
|
|
41
|
+
export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
|
|
42
|
+
zScore: number,
|
|
43
|
+
weight: number,
|
|
44
|
+
intersections: number[],
|
|
45
|
+
x0: number,
|
|
46
|
+
xBound: number,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export type CorrelationTriple = [string, string, number];
|
|
50
|
+
|
|
51
|
+
const DESIRED = 'desired';
|
|
52
|
+
const NON_DESIRED = 'non-desired';
|
|
53
|
+
const MEAN = 'Mean';
|
|
54
|
+
const STD = 'Std';
|
|
55
|
+
const T_STAT = 't-statistics';
|
|
56
|
+
export const P_VAL = 'p-value';
|
|
57
|
+
const MEAN_DES = `${MEAN}(${DESIRED})`;
|
|
58
|
+
const MEAN_NON_DES = `${MEAN}(${NON_DESIRED})`;
|
|
59
|
+
const STD_DES = `${STD}(${DESIRED})`;
|
|
60
|
+
const STD_NON_DES = `${STD}(${NON_DESIRED})`;
|
|
61
|
+
|
|
62
|
+
/** Map of statistic field names to their display titles */
|
|
63
|
+
export const STAT_TO_TITLE_MAP = new Map([
|
|
64
|
+
['desAvg', MEAN_DES],
|
|
65
|
+
['desStd', STD_DES],
|
|
66
|
+
['nonDesAvg', MEAN_NON_DES],
|
|
67
|
+
['nonDesStd', STD_NON_DES],
|
|
68
|
+
['tstat', T_STAT],
|
|
69
|
+
['pValue', P_VAL],
|
|
70
|
+
]);
|
|
71
|
+
|
|
72
|
+
export const DESCR_TITLE = 'Descriptor';
|
|
73
|
+
export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
|
|
74
|
+
export const SELECTED_TITLE = 'Selected';
|
|
75
|
+
export const WEIGHT_TITLE = 'Weight';
|
|
76
|
+
export const SCORES_TITLE = 'pMPO score';
|
|
77
|
+
export const DESIRABILITY_COL_NAME = 'Desirability';
|
|
78
|
+
|
|
79
|
+
/** Minimum p-value threshold for filtering descriptors */
|
|
80
|
+
export const P_VAL_TRES_MIN = 0.01;
|
|
81
|
+
|
|
82
|
+
/** Minimum R-squared threshold for filtering correlated descriptors */
|
|
83
|
+
export const R2_MIN = 0.01;
|
|
84
|
+
|
|
85
|
+
/** Minimum q-cutoff for descriptors in the pMPO model */
|
|
86
|
+
export const Q_CUTOFF_MIN = 0.01;
|
|
87
|
+
|
|
88
|
+
/** Colors used for selected and skipped descriptors */
|
|
89
|
+
export enum COLORS {
|
|
90
|
+
SELECTED = 'rgb(26, 146, 26)',
|
|
91
|
+
SKIPPED = 'rgb(208, 57, 67)',
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
export const TINY = 1e-8;
|
|
95
|
+
|
|
96
|
+
/** Folder path for storing pMPO models */
|
|
97
|
+
export const FOLDER = 'System:AppData/Chem/mpo';
|
|
98
|
+
|
|
99
|
+
/** Desirability profile properties type */
|
|
100
|
+
export type DesirabilityProfileProperties = Record<string, {
|
|
101
|
+
line: [number, number][],
|
|
102
|
+
weight: number,
|
|
103
|
+
min?: number,
|
|
104
|
+
max?: number,
|
|
105
|
+
}>;
|
|
106
|
+
|
|
107
|
+
export const STAT_GRID_HEIGHT = 75;
|
|
108
|
+
export const DESIRABILITY_COLUMN_WIDTH = 305;
|