@datagrok/eda 1.4.10 → 1.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,272 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import '../../css/pareto.css';
6
+ import {NumericFeature, OPT_TYPE, DIFFERENCE, RATIO, COL_NAME, PC_MAX_COLS, ColorOpt} from './defs';
7
+ import {getColorScaleDiv, getOutputPalette} from './utils';
8
+
9
+ /** Pareto front optimization app */
10
+ export class ParetoOptimizer {
11
+ private df: DG.DataFrame;
12
+ private numCols: DG.Column[];
13
+ private numColNames: string[] = [];
14
+ private numColsCount: number;
15
+ private rowCount: number;
16
+ private features = new Map<string, NumericFeature>();
17
+ private view: DG.TableView;
18
+ private pcPlot: DG.Viewer<DG.IPcPlotSettings>;
19
+ private toUpdatePcCols = false;
20
+ private paretoFrontViewer: DG.Viewer;
21
+ private resultColName: string;
22
+ private intervalId: NodeJS.Timeout | null = null;
23
+ private inputsMap = new Map<string, DG.InputBase>();
24
+ private pcPlotNode: DG.DockNode | null = null;
25
+ private inputFormNode: DG.DockNode | null = null;
26
+ private toChangeParetoViewerOptions = true;
27
+
28
+ constructor(df: DG.DataFrame) {
29
+ this.df = df;
30
+ const cols = df.columns;
31
+ const colList = cols.toList();
32
+ this.numCols = colList.filter((col) => col.isNumerical);
33
+ this.numColNames = this.numCols.map((col) => col.name);
34
+ this.numColsCount = this.numCols.length;
35
+ this.rowCount = df.rowCount;
36
+ this.view = grok.shell.getTableView(df.name);
37
+
38
+ this.paretoFrontViewer = DG.Viewer.fromType('Pareto front', df);
39
+
40
+ const paretoFrontViewerNode = this.view.dockManager.dock(
41
+ this.paretoFrontViewer,
42
+ DG.DOCK_TYPE.RIGHT,
43
+ null,
44
+ undefined,
45
+ RATIO.VIEWER,
46
+ );
47
+
48
+ this.pcPlot = DG.Viewer.pcPlot(df, {legendPosition: 'Top'});
49
+ const gridNode = this.view.dockManager.findNode(this.view.grid.root) ?? paretoFrontViewerNode;
50
+ this.pcPlotNode = this.view.dockManager.dock(this.pcPlot, DG.DOCK_TYPE.DOWN, gridNode, undefined, RATIO.VIEWER);
51
+ this.toUpdatePcCols = this.numColNames.length > PC_MAX_COLS;
52
+
53
+ this.resultColName = this.df.columns.getUnusedName(COL_NAME.OPT);
54
+ this.showResultOptCol();
55
+
56
+ this.view.subs.push(...this.getSubscriptions());
57
+ } // constructor
58
+
59
+ private isApplicable(): boolean {
60
+ if (this.rowCount < 1) {
61
+ grok.shell.warning('Cannot compute Pareto front: the table is empty.');
62
+ return false;
63
+ }
64
+
65
+ if (this.numColsCount < 2) {
66
+ grok.shell.warning('Cannot compute Pareto front: at least two numeric columns are required.');
67
+ return false;
68
+ }
69
+
70
+ return true;
71
+ } // isApplicable
72
+
73
+ public run(): void {
74
+ if (!this.isApplicable())
75
+ return;
76
+
77
+ this.buildInputsForm();
78
+ this.computeParetoFront();
79
+ this.updateVisualization();
80
+ } // run
81
+
82
+ private getSubscriptions() {
83
+ return [
84
+ this.paretoFrontViewer.onDetached.subscribe(() => {
85
+ if (this.pcPlotNode !== null) {
86
+ this.view.dockManager.close(this.pcPlotNode);
87
+ this.pcPlotNode = null;
88
+ }
89
+
90
+ if (this.inputFormNode !== null) {
91
+ this.view.dockManager.close(this.inputFormNode);
92
+ this.inputFormNode = null;
93
+ }
94
+
95
+ this.numCols.forEach((col) => col.colors.setDisabled());
96
+ this.features.clear();
97
+ }),
98
+ ];
99
+ } // getSubscriptions
100
+
101
+ private buildInputsForm(): void {
102
+ const form = ui.form([]);
103
+ form.classList.add('pareto-input-form');
104
+ form.append(ui.h1('Optimize'));
105
+
106
+ this.numCols.forEach((col, idx) => {
107
+ const feature: NumericFeature = {
108
+ toOptimize: this.numColsCount - idx - 1 < DIFFERENCE,
109
+ optType: OPT_TYPE.MIN,
110
+ };
111
+
112
+ const name = col.name;
113
+
114
+ const optimizationTypeInput = ui.input.choice(name, {
115
+ value: feature.toOptimize ? feature.optType : null,
116
+ nullable: true,
117
+ items: [null, OPT_TYPE.MIN, OPT_TYPE.MAX],
118
+ onValueChanged: (val) => {
119
+ if (val == null)
120
+ feature.toOptimize = false;
121
+ else {
122
+ feature.toOptimize = true;
123
+ feature.optType = val;
124
+ }
125
+
126
+ this.computeParetoFront();
127
+ this.updateVisualization();
128
+ },
129
+ });
130
+ ui.tooltip.bind(optimizationTypeInput.input, () => {
131
+ if (feature.toOptimize)
132
+ return ui.markdown(`M${feature.optType.slice(1)} **${name}** during Pareto optimization`);
133
+
134
+ return ui.markdown(`Ignore **${name}** during Pareto optimization`);
135
+ });
136
+
137
+ this.inputsMap.set(name, optimizationTypeInput);
138
+
139
+ form.append(optimizationTypeInput.root);
140
+ this.features.set(name, feature);
141
+ });
142
+
143
+ this.inputFormNode = this.view.dockManager.dock(form, DG.DOCK_TYPE.LEFT, null, undefined, RATIO.FORM);
144
+ } // buildInputsForm
145
+
146
+ private computeParetoFront(): void {
147
+ if (!this.toChangeParetoViewerOptions)
148
+ return;
149
+
150
+ const featureNames = this.getMinMaxFeatureNames();
151
+
152
+ this.paretoFrontViewer.setOptions({
153
+ minimizeColumnNames: featureNames.toMin,
154
+ maximizeColumnNames: featureNames.toMax,
155
+ });
156
+ } // computeParetoFront
157
+
158
+ private updatePcPlot(colNames: string[], colorOpt: ColorOpt): void {
159
+ this.pcPlot.setOptions(colorOpt);
160
+
161
+ // update value columns: check that optimized cols are included
162
+ if (this.toUpdatePcCols) {
163
+ const prevColNames = this.pcPlot.getOptions().look['columnNames'];
164
+
165
+ let toUpdatePcPlotColNames = false;
166
+
167
+ colNames.forEach((name) => {
168
+ if (!prevColNames.includes(name))
169
+ toUpdatePcPlotColNames = true;
170
+ });
171
+
172
+ if (toUpdatePcPlotColNames) {
173
+ const valColNames = [...colNames];
174
+ const notIncluded = this.numColNames.filter((name) => !valColNames.includes(name));
175
+ valColNames.push(...notIncluded.slice(0, PC_MAX_COLS - colNames.length));
176
+ this.pcPlot.setOptions({columnNames: valColNames});
177
+ }
178
+ }
179
+ } // updatePcPlot
180
+
181
+ private updateVisualization(): void {
182
+ const colNames: string[] = [];
183
+
184
+ this.features.forEach((fea, name) => {
185
+ if (fea.toOptimize)
186
+ colNames.push(name);
187
+ });
188
+
189
+ const colorOpt: ColorOpt = {'colorColumnName': (colNames.length > 0) ? this.resultColName: undefined};
190
+ this.updatePcPlot(colNames, colorOpt);
191
+ this.markOptColsWithColor();
192
+ this.updateTooltips();
193
+ } // updateVisualization
194
+
195
+ private showResultOptCol(): void {
196
+ // show a column with the results, once it is added
197
+ this.intervalId = setInterval(() => {
198
+ const gridCol = this.view.grid.columns.byName(this.resultColName);
199
+
200
+ if (gridCol !== null) {
201
+ gridCol.visible = true;
202
+ this.stopChecking();
203
+ }
204
+ }, 1000);
205
+ } // showResultOptCol
206
+
207
+ private stopChecking(): void {
208
+ if (this.intervalId) {
209
+ clearInterval(this.intervalId);
210
+ this.intervalId = null;
211
+ }
212
+ }
213
+
214
+ private markOptColsWithColor(): void {
215
+ this.numCols.forEach((col) => col.colors.setDisabled());
216
+
217
+ this.features.forEach((fea, name) => {
218
+ if (!fea.toOptimize)
219
+ return;
220
+
221
+ const col = this.df.col(name);
222
+
223
+ if (col != null)
224
+ col.colors.setLinear(getOutputPalette(fea.optType), {min: col.stats.min, max: col.stats.max});
225
+ });
226
+ } // markOptColsWithColor
227
+
228
+ private updateTooltips(): void {
229
+ const features = this.features;
230
+
231
+ this.view.grid.onCellTooltip(function(cell, x, y) {
232
+ if (cell.isColHeader) {
233
+ const cellCol = cell.tableColumn;
234
+ if (cellCol) {
235
+ const name = cell.tableColumn.name;
236
+ const feature = features.get(name);
237
+
238
+ if (feature !== undefined) {
239
+ const elems = [ui.markdown(`**${name}**`)];
240
+
241
+ if (feature.toOptimize) {
242
+ elems.push(ui.markdown(`This feature is **${feature.optType}d** during Pareto optimization.`));
243
+ elems.push(getColorScaleDiv(feature.optType));
244
+ }
245
+
246
+ ui.tooltip.show(ui.divV(elems), x, y);
247
+
248
+ return true;
249
+ }
250
+
251
+ return false;
252
+ }
253
+ }
254
+ });
255
+ } // updateTooltips
256
+
257
+ private getMinMaxFeatureNames() {
258
+ const minimizeColumnNames: string[] = [];
259
+ const maximizeColumnNames: string[] = [];
260
+
261
+ this.features.forEach((fea, name) => {
262
+ if (fea.toOptimize) {
263
+ if (fea.optType === OPT_TYPE.MIN)
264
+ minimizeColumnNames.push(name);
265
+ else
266
+ maximizeColumnNames.push(name);
267
+ }
268
+ });
269
+
270
+ return {toMin: minimizeColumnNames, toMax: maximizeColumnNames};
271
+ }
272
+ } // ParetoOptimizer
@@ -0,0 +1,41 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {OPT_TYPE} from './defs';
6
+
7
+ export const PALETTE = [DG.Color.darkGreen, DG.Color.yellow, DG.Color.darkRed];
8
+
9
+ /** Return output color palette w.r.t. the specified type of optimization */
10
+ export function getOutputPalette(type: OPT_TYPE): number[] {
11
+ if (type === OPT_TYPE.MIN)
12
+ return [...PALETTE];
13
+
14
+ return [...PALETTE].reverse();
15
+ }
16
+
17
+ /** Return div with color scale description */
18
+ export function getColorScaleDiv(type: OPT_TYPE, useMinMax: boolean = true): HTMLElement {
19
+ const scale = ui.label('Color scale:');
20
+ scale.style.paddingRight = '7px';
21
+ const elems = [scale];
22
+ const minLbl = ui.label(useMinMax ? 'min' : 'worst');
23
+ const midLbl = ui.label('. . .');
24
+ const maxLbl = ui.label(useMinMax ? 'max' : 'best');
25
+ const palette = getOutputPalette(type);
26
+
27
+ const colorElems = [minLbl, midLbl, maxLbl].map((el, idx) => {
28
+ if (idx !== 1) {
29
+ el.style.fontWeight = 'bold';
30
+ el.style.color = DG.Color.toRgb(palette[idx]);
31
+ }
32
+
33
+ el.style.marginRight = '5px';
34
+
35
+ return el;
36
+ });
37
+
38
+ elems.push(...colorElems);
39
+
40
+ return ui.divH(elems);
41
+ } // getColorScaleDiv
@@ -0,0 +1,108 @@
1
+ // Constants and type definitions for probabilistic scoring (pMPO)
2
+ // Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
3
+
4
+ /** Minimum number of samples required to compute pMPO */
5
+ export const MIN_SAMPLES_COUNT = 10;
6
+
7
+ export const PMPO_NON_APPLICABLE = 'pMPO is not applicable';
8
+ export const PMPO_COMPUTE_FAILED = 'Failed to compute pMPO parameters';
9
+
10
+ /** Basic statistics for desired and non-desired compounds */
11
+ export type BasicStats = {
12
+ desAvg: number,
13
+ desStd: number,
14
+ nonDesAvg: number,
15
+ nonDesStd: number,
16
+ };
17
+
18
+ /** Descriptor statistics including basic stats, t-statistics and p-value */
19
+ export type DescriptorStatistics = BasicStats & {
20
+ desLen: number,
21
+ nonSesLen: number,
22
+ tstat: number,
23
+ pValue: number,
24
+ };
25
+
26
+ /** Cutoff parameters for the basic functions of the pMPO model */
27
+ export type Cutoff = {
28
+ cutoff: number,
29
+ cutoffDesired: number,
30
+ cutoffNotDesired: number,
31
+ };
32
+
33
+ /** Generalized Sigmoid parameters for the desirability functions of the pMPO model */
34
+ export type SigmoidParams = {
35
+ pX0: number,
36
+ b: number,
37
+ c: number,
38
+ };
39
+
40
+ /** pMPO parameters including basic stats, cutoffs, sigmoid params, z-score, weight, intersections */
41
+ export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
42
+ zScore: number,
43
+ weight: number,
44
+ intersections: number[],
45
+ x0: number,
46
+ xBound: number,
47
+ };
48
+
49
+ export type CorrelationTriple = [string, string, number];
50
+
51
+ const DESIRED = 'desired';
52
+ const NON_DESIRED = 'non-desired';
53
+ const MEAN = 'Mean';
54
+ const STD = 'Std';
55
+ const T_STAT = 't-statistics';
56
+ export const P_VAL = 'p-value';
57
+ const MEAN_DES = `${MEAN}(${DESIRED})`;
58
+ const MEAN_NON_DES = `${MEAN}(${NON_DESIRED})`;
59
+ const STD_DES = `${STD}(${DESIRED})`;
60
+ const STD_NON_DES = `${STD}(${NON_DESIRED})`;
61
+
62
+ /** Map of statistic field names to their display titles */
63
+ export const STAT_TO_TITLE_MAP = new Map([
64
+ ['desAvg', MEAN_DES],
65
+ ['desStd', STD_DES],
66
+ ['nonDesAvg', MEAN_NON_DES],
67
+ ['nonDesStd', STD_NON_DES],
68
+ ['tstat', T_STAT],
69
+ ['pValue', P_VAL],
70
+ ]);
71
+
72
+ export const DESCR_TITLE = 'Descriptor';
73
+ export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
74
+ export const SELECTED_TITLE = 'Selected';
75
+ export const WEIGHT_TITLE = 'Weight';
76
+ export const SCORES_TITLE = 'pMPO score';
77
+ export const DESIRABILITY_COL_NAME = 'Desirability';
78
+
79
+ /** Minimum p-value threshold for filtering descriptors */
80
+ export const P_VAL_TRES_MIN = 0.01;
81
+
82
+ /** Minimum R-squared threshold for filtering correlated descriptors */
83
+ export const R2_MIN = 0.01;
84
+
85
+ /** Minimum q-cutoff for descriptors in the pMPO model */
86
+ export const Q_CUTOFF_MIN = 0.01;
87
+
88
+ /** Colors used for selected and skipped descriptors */
89
+ export enum COLORS {
90
+ SELECTED = 'rgb(26, 146, 26)',
91
+ SKIPPED = 'rgb(208, 57, 67)',
92
+ };
93
+
94
+ export const TINY = 1e-8;
95
+
96
+ /** Folder path for storing pMPO models */
97
+ export const FOLDER = 'System:AppData/Chem/mpo';
98
+
99
+ /** Desirability profile properties type */
100
+ export type DesirabilityProfileProperties = Record<string, {
101
+ line: [number, number][],
102
+ weight: number,
103
+ min?: number,
104
+ max?: number,
105
+ }>;
106
+
107
+ export const STAT_GRID_HEIGHT = 75;
108
+ export const DESIRABILITY_COLUMN_WIDTH = 305;