@datagrok/eda 1.4.13 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -5
- package/dist/111.js +1 -1
- package/dist/111.js.map +1 -1
- package/dist/128.js +1 -1
- package/dist/128.js.map +1 -1
- package/dist/153.js +1 -1
- package/dist/153.js.map +1 -1
- package/dist/23.js +1 -1
- package/dist/23.js.map +1 -1
- package/dist/234.js +1 -1
- package/dist/234.js.map +1 -1
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/260.js +1 -1
- package/dist/260.js.map +1 -1
- package/dist/33.js +1 -1
- package/dist/33.js.map +1 -1
- package/dist/348.js +1 -1
- package/dist/348.js.map +1 -1
- package/dist/377.js +1 -1
- package/dist/377.js.map +1 -1
- package/dist/397.js +2 -0
- package/dist/397.js.map +1 -0
- package/dist/412.js +1 -1
- package/dist/412.js.map +1 -1
- package/dist/415.js +1 -1
- package/dist/415.js.map +1 -1
- package/dist/501.js +1 -1
- package/dist/501.js.map +1 -1
- package/dist/531.js +1 -1
- package/dist/531.js.map +1 -1
- package/dist/583.js +1 -1
- package/dist/583.js.map +1 -1
- package/dist/589.js +1 -1
- package/dist/589.js.map +1 -1
- package/dist/603.js +1 -1
- package/dist/603.js.map +1 -1
- package/dist/656.js +1 -1
- package/dist/656.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/727.js +1 -1
- package/dist/727.js.map +1 -1
- package/dist/731.js +1 -1
- package/dist/731.js.map +1 -1
- package/dist/738.js +1 -1
- package/dist/738.js.map +1 -1
- package/dist/763.js +1 -1
- package/dist/763.js.map +1 -1
- package/dist/778.js +1 -1
- package/dist/778.js.map +1 -1
- package/dist/783.js +1 -1
- package/dist/783.js.map +1 -1
- package/dist/793.js +1 -1
- package/dist/793.js.map +1 -1
- package/dist/810.js +1 -1
- package/dist/810.js.map +1 -1
- package/dist/860.js +1 -1
- package/dist/860.js.map +1 -1
- package/dist/907.js +1 -1
- package/dist/907.js.map +1 -1
- package/dist/950.js +1 -1
- package/dist/950.js.map +1 -1
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/990.js +1 -1
- package/dist/990.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +5 -5
- package/src/package.ts +2 -1
- package/src/pareto-optimization/pareto-optimizer.ts +1 -1
- package/src/pls/pls-constants.ts +3 -1
- package/src/pls/pls-tools.ts +73 -69
- package/src/probabilistic-scoring/data-generator.ts +48 -3
- package/src/probabilistic-scoring/pmpo-defs.ts +30 -2
- package/src/probabilistic-scoring/pmpo-utils.ts +143 -52
- package/src/probabilistic-scoring/prob-scoring.ts +475 -102
- package/src/probabilistic-scoring/stat-tools.ts +1 -1
- package/src/tests/pareto-tests.ts +13 -15
- package/src/tests/pmpo-tests.ts +643 -3
- package/test-console-output-1.log +221 -93
- package/test-record-1.mp4 +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/* eslint-disable max-len */
|
|
2
2
|
// Probabilistic scoring (pMPO) features
|
|
3
|
-
//
|
|
3
|
+
// Source paper https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
|
|
4
4
|
|
|
5
5
|
import * as grok from 'datagrok-api/grok';
|
|
6
6
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -12,17 +12,24 @@ import '../../css/pmpo.css';
|
|
|
12
12
|
|
|
13
13
|
import {getDesiredTables, getDescriptorStatistics, getBoolPredictionColumn, getPmpoEvaluation} from './stat-tools';
|
|
14
14
|
import {MIN_SAMPLES_COUNT, PMPO_NON_APPLICABLE, DescriptorStatistics, P_VAL_TRES_MIN, DESCR_TITLE,
|
|
15
|
-
R2_MIN, Q_CUTOFF_MIN, PmpoParams, SCORES_TITLE, DESCR_TABLE_TITLE,
|
|
15
|
+
R2_MIN, Q_CUTOFF_MIN, PmpoParams, SCORES_TITLE, DESCR_TABLE_TITLE, SELECTED_TITLE,
|
|
16
16
|
P_VAL, DESIRABILITY_COL_NAME, STAT_GRID_HEIGHT, DESIRABILITY_COLUMN_WIDTH, WEIGHT_TITLE,
|
|
17
17
|
P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT, USE_SIGMOID_DEFAULT, ROC_TRESHOLDS,
|
|
18
18
|
FPR_TITLE, TPR_TITLE, COLORS, THRESHOLD, AUTO_TUNE_MAX_APPLICABLE_ROWS, DEFAULT_OPTIMIZATION_SETTINGS,
|
|
19
|
-
P_VAL_TRES_MAX, R2_MAX, Q_CUTOFF_MAX, OptimalPoint, LOW_PARAMS_BOUNDS, HIGH_PARAMS_BOUNDS, FORMAT
|
|
19
|
+
P_VAL_TRES_MAX, R2_MAX, Q_CUTOFF_MAX, OptimalPoint, LOW_PARAMS_BOUNDS, HIGH_PARAMS_BOUNDS, FORMAT,
|
|
20
|
+
EQUALITY_SIGN, SIGN_OPTIONS, THRESHOLDED_DESIRABILITY_COL_NAME, PMPO_COMPUTE_FAILED,
|
|
21
|
+
PmpoInputId, TooltipContent, PmpoValidationResult} from './pmpo-defs';
|
|
20
22
|
import {addSelectedDescriptorsCol, getDescriptorStatisticsTable, getFilteredByPvalue, getFilteredByCorrelations,
|
|
21
23
|
getModelParams, getDescrTooltip, saveModel, getScoreTooltip, getDesirabilityProfileJson, getCorrelationTriples,
|
|
22
|
-
addCorrelationColumns, setPvalColumnColorCoding, setCorrColumnColorCoding, PmpoError
|
|
24
|
+
addCorrelationColumns, setPvalColumnColorCoding, setCorrColumnColorCoding, PmpoError, getInitCol,
|
|
25
|
+
getBoolDesirabilityColData, isDesirabilityValid,
|
|
26
|
+
getDesirabilityColumnFromCategories,
|
|
27
|
+
getSelectedCategories} from './pmpo-utils';
|
|
23
28
|
import {getOutputPalette} from '../pareto-optimization/utils';
|
|
24
29
|
import {OPT_TYPE} from '../pareto-optimization/defs';
|
|
25
30
|
import {optimizeNM} from './nelder-mead';
|
|
31
|
+
import {getMissingValsIndices} from '../missing-values-imputation/knn-imputer';
|
|
32
|
+
import {DesirabilityProfile} from '@datagrok-libraries/statistics/src/mpo/mpo';
|
|
26
33
|
|
|
27
34
|
export type PmpoTrainingResult = {
|
|
28
35
|
params: Map<string, PmpoParams>,
|
|
@@ -40,6 +47,7 @@ export type PmpoAppItems = {
|
|
|
40
47
|
rocCurve: DG.Viewer;
|
|
41
48
|
confusionMatrix: DG.Viewer;
|
|
42
49
|
controls: Controls;
|
|
50
|
+
profile: DesirabilityProfile | null;
|
|
43
51
|
};
|
|
44
52
|
|
|
45
53
|
/** Class implementing probabilistic MPO (pMPO) model training and prediction */
|
|
@@ -78,12 +86,6 @@ export class Pmpo {
|
|
|
78
86
|
return false;
|
|
79
87
|
}
|
|
80
88
|
|
|
81
|
-
// Check desirability
|
|
82
|
-
if (desirability.type !== DG.COLUMN_TYPE.BOOL) {
|
|
83
|
-
showWarning(`: "${desirability.name}" must be boolean column.`);
|
|
84
|
-
return false;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
89
|
if (desirability.stats.stdev === 0) { // TRUE & FALSE
|
|
88
90
|
showWarning(`: "${desirability.name}" has a single category.`);
|
|
89
91
|
return false;
|
|
@@ -98,8 +100,8 @@ export class Pmpo {
|
|
|
98
100
|
return false;
|
|
99
101
|
}
|
|
100
102
|
|
|
101
|
-
if (col.stats.missingValueCount
|
|
102
|
-
showWarning(`: "${col.name}" contains missing values.`);
|
|
103
|
+
if (col.stats.missingValueCount === col.length) {
|
|
104
|
+
showWarning(`: "${col.name}" contains only missing values.`);
|
|
103
105
|
return false;
|
|
104
106
|
}
|
|
105
107
|
|
|
@@ -124,29 +126,20 @@ export class Pmpo {
|
|
|
124
126
|
return false;
|
|
125
127
|
}
|
|
126
128
|
|
|
127
|
-
let
|
|
128
|
-
let validNumericColsCount = 0;
|
|
129
|
+
let validColsCount = 0;
|
|
129
130
|
|
|
130
131
|
// Check numeric columns and boolean columns
|
|
131
132
|
for (const col of df.columns) {
|
|
132
|
-
if (col.isNumerical) {
|
|
133
|
-
if (
|
|
134
|
-
++
|
|
135
|
-
}
|
|
136
|
-
++boolColsCount;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
// Check boolean columns count
|
|
140
|
-
if (boolColsCount < 1) {
|
|
141
|
-
if (toShowMsg)
|
|
142
|
-
grok.shell.warning(PMPO_NON_APPLICABLE + ': no boolean columns.');
|
|
143
|
-
return false;
|
|
133
|
+
if (col.isNumerical || (col.type === DG.TYPE.BOOL)) {
|
|
134
|
+
if (col.stats.stdev > 0)
|
|
135
|
+
++validColsCount;
|
|
136
|
+
}
|
|
144
137
|
}
|
|
145
138
|
|
|
146
139
|
// Check valid numeric columns count
|
|
147
|
-
if (
|
|
140
|
+
if (validColsCount < 2) {
|
|
148
141
|
if (toShowMsg)
|
|
149
|
-
grok.shell.warning(PMPO_NON_APPLICABLE + ':
|
|
142
|
+
grok.shell.warning(PMPO_NON_APPLICABLE + ': not enough of non-constant columns.');
|
|
150
143
|
return false;
|
|
151
144
|
}
|
|
152
145
|
|
|
@@ -210,10 +203,12 @@ export class Pmpo {
|
|
|
210
203
|
static predict(df: DG.DataFrame, params: Map<string, PmpoParams>, useSigmoid: boolean, predictionName: string): DG.Column {
|
|
211
204
|
const count = df.rowCount;
|
|
212
205
|
const scores = new Float64Array(count).fill(0);
|
|
206
|
+
const colsWithMissingVals: DG.Column[] = [];
|
|
213
207
|
|
|
214
208
|
// Compute pMPO scores (see https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
|
|
215
209
|
params.forEach((param, name) => {
|
|
216
210
|
const col = df.col(name);
|
|
211
|
+
|
|
217
212
|
const b = param.b;
|
|
218
213
|
const c = param.c;
|
|
219
214
|
const x0 = param.cutoff;
|
|
@@ -225,6 +220,9 @@ export class Pmpo {
|
|
|
225
220
|
if (col == null)
|
|
226
221
|
throw new Error(`Failed to apply pMPO: inconsistent data, no column "${name}" in the table "${df.name}"`);
|
|
227
222
|
|
|
223
|
+
if (col.stats.missingValueCount > 0)
|
|
224
|
+
colsWithMissingVals.push(col);
|
|
225
|
+
|
|
228
226
|
const vals = col.getRawData();
|
|
229
227
|
|
|
230
228
|
if (useSigmoid) {
|
|
@@ -247,11 +245,13 @@ export class Pmpo {
|
|
|
247
245
|
} // predict
|
|
248
246
|
|
|
249
247
|
private params: Map<string, PmpoParams> | null = null;
|
|
248
|
+
private desirabilityProfile: DesirabilityProfile | null = null;
|
|
250
249
|
|
|
251
250
|
private table: DG.DataFrame;
|
|
252
251
|
private view: DG.TableView;
|
|
253
|
-
private
|
|
252
|
+
private desirabilityColumns: DG.Column[];
|
|
254
253
|
private numericCols: DG.Column[];
|
|
254
|
+
private missingValsIndeces: Map<string, number[]>;
|
|
255
255
|
|
|
256
256
|
private initTable = DG.DataFrame.create();
|
|
257
257
|
|
|
@@ -262,6 +262,9 @@ export class Pmpo {
|
|
|
262
262
|
|
|
263
263
|
private desirabilityProfileRoots = new Map<string, HTMLElement>();
|
|
264
264
|
|
|
265
|
+
private tresholdedColumn: DG.Column | null = null;
|
|
266
|
+
private threshColTooltip: string | null = null;
|
|
267
|
+
|
|
265
268
|
private rocCurve = DG.Viewer.scatterPlot(this.initTable, {
|
|
266
269
|
showTitle: true,
|
|
267
270
|
showSizeSelector: false,
|
|
@@ -281,9 +284,10 @@ export class Pmpo {
|
|
|
281
284
|
constructor(df: DG.DataFrame, view?: DG.TableView) {
|
|
282
285
|
this.table = df;
|
|
283
286
|
this.view = view ?? (grok.shell.tableView(df.name) ?? grok.shell.addTableView(df));
|
|
284
|
-
this.
|
|
287
|
+
this.desirabilityColumns = this.getDesirabilityColumns();
|
|
285
288
|
this.numericCols = this.getValidNumericCols();
|
|
286
289
|
this.predictionName = df.columns.getUnusedName(SCORES_TITLE);
|
|
290
|
+
this.missingValsIndeces = getMissingValsIndices(this.numericCols);
|
|
287
291
|
};
|
|
288
292
|
|
|
289
293
|
/** Sets the ribbon panels in the table view (removes the first panel) */
|
|
@@ -476,7 +480,9 @@ export class Pmpo {
|
|
|
476
480
|
|
|
477
481
|
grid.sort([this.predictionName], [false]);
|
|
478
482
|
|
|
479
|
-
grid.col(name)
|
|
483
|
+
const scoresCol = grid.col(name);
|
|
484
|
+
scoresCol!.format = '0.0000';
|
|
485
|
+
scoresCol!.isTextColorCoded = true;
|
|
480
486
|
|
|
481
487
|
// set tooltips
|
|
482
488
|
grid.onCellTooltip((cell, x, y) => {
|
|
@@ -487,6 +493,12 @@ export class Pmpo {
|
|
|
487
493
|
ui.tooltip.show(getScoreTooltip(), x, y);
|
|
488
494
|
|
|
489
495
|
return true;
|
|
496
|
+
} else {
|
|
497
|
+
if (this.tresholdedColumn != null && cell.tableColumn.name === this.tresholdedColumn.name) {
|
|
498
|
+
ui.tooltip.show(ui.markdown(this.threshColTooltip ?? ''), x, y);
|
|
499
|
+
|
|
500
|
+
return true;
|
|
501
|
+
}
|
|
490
502
|
}
|
|
491
503
|
|
|
492
504
|
return false;
|
|
@@ -505,13 +517,15 @@ export class Pmpo {
|
|
|
505
517
|
this.desirabilityProfileRoots.clear();
|
|
506
518
|
|
|
507
519
|
const desirabilityProfile = getDesirabilityProfileJson(this.params, useSigmoidalCorrection, '', '', true);
|
|
520
|
+
this.desirabilityProfile = getDesirabilityProfileJson(this.params, useSigmoidalCorrection, '', '', false);
|
|
508
521
|
|
|
509
522
|
// Set weights
|
|
510
523
|
const descrNames = descrStatsTable.col(DESCR_TITLE)!.toList();
|
|
511
524
|
const weightsRaw = descrStatsTable.col(WEIGHT_TITLE)!.getRawData();
|
|
512
525
|
const props = desirabilityProfile.properties;
|
|
526
|
+
const names: string[] = Object.keys(props);
|
|
513
527
|
|
|
514
|
-
for (const name of
|
|
528
|
+
for (const name of names)
|
|
515
529
|
weightsRaw[descrNames.indexOf(name)] = props[name].weight;
|
|
516
530
|
|
|
517
531
|
// Set HTML elements
|
|
@@ -523,20 +537,16 @@ export class Pmpo {
|
|
|
523
537
|
if (rootsCol == null)
|
|
524
538
|
return;
|
|
525
539
|
|
|
526
|
-
const rows = rootsCol.querySelectorAll('div.d4-flex-row.ui-div');
|
|
540
|
+
const rows = rootsCol.querySelectorAll('div.d4-flex-row.ui-div.statistics-mpo-row');
|
|
527
541
|
|
|
528
|
-
rows.forEach((row) => {
|
|
542
|
+
rows.forEach((row, idx) => {
|
|
529
543
|
const children = row.children;
|
|
530
544
|
if (children.length < 2) // expecting descriptor name, weight & profile
|
|
531
545
|
return;
|
|
532
546
|
|
|
533
|
-
const
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
const descrName = (descrDivChildren[0] as HTMLElement).innerText;
|
|
538
|
-
|
|
539
|
-
this.desirabilityProfileRoots.set(descrName, children[2] as HTMLElement);
|
|
547
|
+
const profileRoot = children[2] as HTMLElement;
|
|
548
|
+
profileRoot.style.width = '100%';
|
|
549
|
+
this.desirabilityProfileRoots.set(names[idx], profileRoot);
|
|
540
550
|
});
|
|
541
551
|
} // updateDesirabilityProfileData
|
|
542
552
|
|
|
@@ -586,6 +596,26 @@ export class Pmpo {
|
|
|
586
596
|
});
|
|
587
597
|
} // updateConfusionMatrix
|
|
588
598
|
|
|
599
|
+
/** Sets null values for the predicted scores in rows with missing values in any of the descriptors */
|
|
600
|
+
private getIndecesOfMissingValues(colNames: string[]): number[] {
|
|
601
|
+
const indeces: number[] = [];
|
|
602
|
+
|
|
603
|
+
colNames.forEach((name) => {
|
|
604
|
+
const inds = this.missingValsIndeces.get(name);
|
|
605
|
+
|
|
606
|
+
if (inds != null)
|
|
607
|
+
indeces.push(...inds);
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
return indeces;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/** Sets null values for the predicted scores in rows with missing values in any of the descriptors */
|
|
614
|
+
private setNulls(scores: DG.Column, indeces: number[]): void {
|
|
615
|
+
const raw = scores.getRawData();
|
|
616
|
+
indeces.forEach((ind) => raw[ind] = DG.FLOAT_NULL);
|
|
617
|
+
}
|
|
618
|
+
|
|
589
619
|
/** Fits the pMPO model to the given data and updates the viewers accordingly */
|
|
590
620
|
private fitAndUpdateViewers(df: DG.DataFrame, descriptors: DG.ColumnList, desirability: DG.Column,
|
|
591
621
|
pValTresh: number, r2Tresh: number, qCutoff: number, useSigmoid: boolean): void {
|
|
@@ -599,6 +629,10 @@ export class Pmpo {
|
|
|
599
629
|
|
|
600
630
|
const prediction = Pmpo.predict(df, this.params, useSigmoid, this.predictionName);
|
|
601
631
|
|
|
632
|
+
// Set nulls for rows with missing values in any of the selected descriptors
|
|
633
|
+
const indecesOfMissingVals = this.getIndecesOfMissingValues(selectedByCorr);
|
|
634
|
+
this.setNulls(prediction, indecesOfMissingVals);
|
|
635
|
+
|
|
602
636
|
// Mark predictions with a color
|
|
603
637
|
prediction.colors.setLinear(getOutputPalette(OPT_TYPE.MAX), {min: prediction.stats.min, max: prediction.stats.max});
|
|
604
638
|
|
|
@@ -663,6 +697,7 @@ export class Pmpo {
|
|
|
663
697
|
rocCurve: this.rocCurve,
|
|
664
698
|
confusionMatrix: this.confusionMatrix,
|
|
665
699
|
controls: this.getInputForm(false),
|
|
700
|
+
profile: this.desirabilityProfile,
|
|
666
701
|
};
|
|
667
702
|
} // getViewers
|
|
668
703
|
|
|
@@ -670,35 +705,76 @@ export class Pmpo {
|
|
|
670
705
|
private getInputForm(addBtn: boolean): Controls {
|
|
671
706
|
const form = ui.form([]);
|
|
672
707
|
form.append(ui.h2('Training data'));
|
|
673
|
-
const
|
|
708
|
+
const initDesirability = getInitCol(this.desirabilityColumns);
|
|
709
|
+
|
|
710
|
+
// returns the desirability column to be used for computations, based on the input desirability column and threshold settings
|
|
711
|
+
const getDesirabilityColumn = (): DG.Column => {
|
|
712
|
+
// remove existing thresholded column if exists
|
|
713
|
+
if (this.tresholdedColumn != null) {
|
|
714
|
+
this.table.columns.remove(this.tresholdedColumn.name);
|
|
715
|
+
this.tresholdedColumn = null;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
if (desInput.value!.type === DG.COLUMN_TYPE.BOOL)
|
|
719
|
+
return desInput.value!;
|
|
720
|
+
|
|
721
|
+
const boolDesirabilityData = (desInput.value!.type === DG.COLUMN_TYPE.STRING) ?
|
|
722
|
+
getDesirabilityColumnFromCategories(desInput.value!, desirableCategoriesInput!.value!) :
|
|
723
|
+
getBoolDesirabilityColData(
|
|
724
|
+
desInput.value!,
|
|
725
|
+
desirabilityThresholdInput.value!,
|
|
726
|
+
signInput.value as EQUALITY_SIGN,
|
|
727
|
+
);
|
|
728
|
+
|
|
729
|
+
this.tresholdedColumn = boolDesirabilityData.column;
|
|
730
|
+
this.threshColTooltip = boolDesirabilityData.tooltip;
|
|
731
|
+
|
|
732
|
+
this.tresholdedColumn.name = this.table.columns.getUnusedName(THRESHOLDED_DESIRABILITY_COL_NAME);
|
|
733
|
+
this.table.columns.add(this.tresholdedColumn);
|
|
734
|
+
|
|
735
|
+
return this.tresholdedColumn;
|
|
736
|
+
}; // getDesirabilityColumn
|
|
674
737
|
|
|
675
738
|
// Function to run computations on input changes
|
|
676
739
|
const runComputations = () => {
|
|
677
|
-
|
|
678
|
-
|
|
740
|
+
if (!areInputsValid())
|
|
741
|
+
return;
|
|
679
742
|
|
|
743
|
+
try {
|
|
680
744
|
this.fitAndUpdateViewers(
|
|
681
745
|
this.table,
|
|
682
746
|
DG.DataFrame.fromColumns(descrInput.value).columns,
|
|
683
|
-
|
|
747
|
+
getDesirabilityColumn(),
|
|
684
748
|
pInput.value!,
|
|
685
749
|
rInput.value!,
|
|
686
750
|
qInput.value!,
|
|
687
751
|
useSigmoidInput.value,
|
|
688
752
|
);
|
|
689
753
|
} catch (err) {
|
|
690
|
-
err instanceof PmpoError
|
|
691
|
-
grok.shell.warning(err.message)
|
|
692
|
-
|
|
754
|
+
if (err instanceof PmpoError) {
|
|
755
|
+
grok.shell.warning(err.message);
|
|
756
|
+
ui.tooltip.bind(desInput.input, err.message);
|
|
757
|
+
ui.tooltip.bind(descrInput.input, err.message);
|
|
758
|
+
} else {
|
|
759
|
+
const msg = err instanceof Error ? err.message : PMPO_COMPUTE_FAILED + ': the platform issue.';
|
|
760
|
+
grok.shell.error(msg);
|
|
761
|
+
ui.tooltip.bind(desInput.input, msg);
|
|
762
|
+
ui.tooltip.bind(descrInput.input, msg);
|
|
763
|
+
};
|
|
764
|
+
|
|
765
|
+
desInput.input.classList.add('d4-invalid');
|
|
766
|
+
descrInput.input.classList.add('d4-invalid');
|
|
693
767
|
}
|
|
694
|
-
};
|
|
768
|
+
}; // runComputations
|
|
695
769
|
|
|
696
770
|
// Descriptor columns input
|
|
697
771
|
const descrInput = ui.input.columns('Descriptors', {
|
|
698
772
|
table: this.table,
|
|
699
773
|
nullable: false,
|
|
700
|
-
available:
|
|
701
|
-
checked:
|
|
774
|
+
available: this.numericCols.map((col) => col.name),
|
|
775
|
+
checked: this.numericCols.filter((col) => {
|
|
776
|
+
return (col.name !== initDesirability.name) && (col.stats.stdev > 0) && (col.stats.missingValueCount < col.length);
|
|
777
|
+
}).map((col) => col.name),
|
|
702
778
|
tooltipText: 'Descriptor columns used for model construction.',
|
|
703
779
|
onValueChanged: (value) => {
|
|
704
780
|
if (value != null) {
|
|
@@ -709,21 +785,163 @@ export class Pmpo {
|
|
|
709
785
|
});
|
|
710
786
|
form.append(descrInput.root);
|
|
711
787
|
|
|
712
|
-
|
|
713
|
-
|
|
788
|
+
descrInput.addValidator(() => {
|
|
789
|
+
if (descrInput.value == null || descrInput.value.length < 1)
|
|
790
|
+
return 'Select at least one descriptor column.';
|
|
791
|
+
if (desInput.value != null && descrInput.value.includes(desInput.value))
|
|
792
|
+
return 'Desirability column cannot be used as a descriptor.';
|
|
793
|
+
const zeroStdevCols = descrInput.value.filter((col) => col.stats.stdev === 0).map((col) => col.name);
|
|
794
|
+
if (zeroStdevCols.length > 0)
|
|
795
|
+
return `Descriptor columns with zero variance: ${zeroStdevCols.join(', ')}`;
|
|
796
|
+
const nullCols = descrInput.value.filter((col) => col.stats.missingValueCount === col.length).map((col) => col.name);
|
|
797
|
+
if (nullCols.length > 0)
|
|
798
|
+
return `Descriptor columns with only missing values: ${nullCols.join(', ')}`;
|
|
799
|
+
return null;
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
// Desirability column input and related controls
|
|
803
|
+
const setVisibilityOfDesirabilityAuxInputs = (value: DG.Column) => {
|
|
804
|
+
if (value.type === DG.COLUMN_TYPE.BOOL)
|
|
805
|
+
desOptionsInputDiv.hidden = true;
|
|
806
|
+
else {
|
|
807
|
+
desOptionsInputDiv.hidden = false;
|
|
808
|
+
const isString = (value.type === DG.COLUMN_TYPE.STRING);
|
|
809
|
+
desirabilityThresholdInput.root.hidden = isString;
|
|
810
|
+
signInput.root.hidden = isString;
|
|
811
|
+
}
|
|
812
|
+
}; // setVisibilityOfDesirabilityAuxInputs
|
|
813
|
+
|
|
814
|
+
const desInput = ui.input.column('Desirability', {
|
|
714
815
|
nullable: false,
|
|
715
|
-
value:
|
|
716
|
-
|
|
816
|
+
value: initDesirability,
|
|
817
|
+
table: this.table,
|
|
818
|
+
filter: (col) => this.desirabilityColumns.includes(col),
|
|
717
819
|
tooltipText: 'Desirability column.',
|
|
718
820
|
onValueChanged: (value) => {
|
|
719
821
|
if (value != null) {
|
|
822
|
+
updateDesirableCategoriesInput();
|
|
823
|
+
setVisibilityOfDesirabilityAuxInputs(value);
|
|
824
|
+
areComputationsBlocked = true;
|
|
825
|
+
desirabilityThresholdInput.value = Math.round(value.stats.avg * 100) / 100;
|
|
826
|
+
areComputationsBlocked = false;
|
|
720
827
|
areTunedSettingsUsed = false;
|
|
721
828
|
checkAutoTuneAndRun();
|
|
722
829
|
}
|
|
723
|
-
},
|
|
830
|
+
}, // onValueChanged
|
|
724
831
|
});
|
|
725
832
|
form.append(desInput.root);
|
|
726
833
|
|
|
834
|
+
desInput.addValidator(() => {
|
|
835
|
+
if (desInput.value == null)
|
|
836
|
+
return 'Select a desirability column.';
|
|
837
|
+
if (descrInput.value != null && descrInput.value.includes(desInput.value))
|
|
838
|
+
return 'Desirability column cannot be used as a descriptor.';
|
|
839
|
+
if (desInput.value.type === DG.COLUMN_TYPE.BOOL) {
|
|
840
|
+
if (desInput.value.stats.stdev === 0)
|
|
841
|
+
return 'All desirability values are the same - scoring is not feasible.';
|
|
842
|
+
} else if (desInput.value.type === DG.COLUMN_TYPE.STRING) {
|
|
843
|
+
if (desInput.value.categories.length < 2)
|
|
844
|
+
return 'String desirability column must have at least 2 categories.';
|
|
845
|
+
} else {
|
|
846
|
+
if (desInput.value.stats.stdev === 0) {
|
|
847
|
+
return desInput.value.stats.missingValueCount < desInput.value.length ?
|
|
848
|
+
'All desirability values are the same - scoring is not feasible.' :
|
|
849
|
+
'Empty column cannot be used as desirability column.';
|
|
850
|
+
}
|
|
851
|
+
if (desirabilityThresholdInput.value == null)
|
|
852
|
+
return 'Specify non-null desirability threshold.';
|
|
853
|
+
if (!isDesirabilityValid(desInput.value, desirabilityThresholdInput.value, signInput.value as EQUALITY_SIGN)) {
|
|
854
|
+
return `All compounds are either desired or non-desired for ${desInput.value.name} ` +
|
|
855
|
+
`${signInput.value} ${desirabilityThresholdInput.value}. Adjust the threshold or condition.`;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
return null;
|
|
859
|
+
});
|
|
860
|
+
|
|
861
|
+
let areComputationsBlocked = false;
|
|
862
|
+
|
|
863
|
+
const signInput = ui.input.choice('Condition', {
|
|
864
|
+
value: EQUALITY_SIGN.DEFAULT,
|
|
865
|
+
items: SIGN_OPTIONS,
|
|
866
|
+
nullable: false,
|
|
867
|
+
tooltipText: 'How to compare numeric Desirability column values against the threshold.',
|
|
868
|
+
onValueChanged: (_value) => {
|
|
869
|
+
areTunedSettingsUsed = false;
|
|
870
|
+
checkAutoTuneAndRun();
|
|
871
|
+
},
|
|
872
|
+
});
|
|
873
|
+
|
|
874
|
+
const desirabilityThresholdInput = ui.input.float('Threshold', {
|
|
875
|
+
value: Math.round(initDesirability.stats.avg * 100) / 100,
|
|
876
|
+
nullable: false,
|
|
877
|
+
tooltipText: 'Boundary value that separates desired from non-desired compounds.',
|
|
878
|
+
format: '0.00',
|
|
879
|
+
onValueChanged: (value) => {
|
|
880
|
+
if (value != null) {
|
|
881
|
+
if (areComputationsBlocked)
|
|
882
|
+
return;
|
|
883
|
+
areTunedSettingsUsed = false;
|
|
884
|
+
checkAutoTuneAndRun();
|
|
885
|
+
}
|
|
886
|
+
},
|
|
887
|
+
});
|
|
888
|
+
|
|
889
|
+
desirabilityThresholdInput.addValidator(() => {
|
|
890
|
+
if (desInput.value == null || desInput.value.type === DG.COLUMN_TYPE.BOOL ||
|
|
891
|
+
desInput.value.type === DG.COLUMN_TYPE.STRING)
|
|
892
|
+
return null;
|
|
893
|
+
if (desirabilityThresholdInput.value == null)
|
|
894
|
+
return 'Specify non-null desirability threshold.';
|
|
895
|
+
if (!isDesirabilityValid(desInput.value, desirabilityThresholdInput.value, signInput.value as EQUALITY_SIGN))
|
|
896
|
+
return 'Adjust the threshold to get both desired and non-desired groups.';
|
|
897
|
+
return null;
|
|
898
|
+
});
|
|
899
|
+
|
|
900
|
+
const desOptionsInputDiv = ui.divV([signInput.root, desirabilityThresholdInput.root]);
|
|
901
|
+
|
|
902
|
+
form.append(desOptionsInputDiv);
|
|
903
|
+
|
|
904
|
+
let desirableCategoriesInput: DG.InputBase<string[] | null> | null = null;
|
|
905
|
+
|
|
906
|
+
// For string columns - input for selecting which categories are considered desirable
|
|
907
|
+
const updateDesirableCategoriesInput = () => {
|
|
908
|
+
if (desirableCategoriesInput != null) {
|
|
909
|
+
desirableCategoriesInput.root.remove();
|
|
910
|
+
desirableCategoriesInput = null;
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
if (desInput.value?.type === DG.COLUMN_TYPE.STRING) {
|
|
914
|
+
desirableCategoriesInput = ui.input.multiChoice('Preferred', {
|
|
915
|
+
value: getSelectedCategories(desInput.value!.categories),
|
|
916
|
+
items: desInput.value!.categories,
|
|
917
|
+
nullable: false,
|
|
918
|
+
tooltipText: 'Select which categories should be treated as desirable.',
|
|
919
|
+
onValueChanged: (value) => {
|
|
920
|
+
if (value != null) {
|
|
921
|
+
if (areComputationsBlocked)
|
|
922
|
+
return;
|
|
923
|
+
areTunedSettingsUsed = false;
|
|
924
|
+
checkAutoTuneAndRun();
|
|
925
|
+
}
|
|
926
|
+
},
|
|
927
|
+
});
|
|
928
|
+
|
|
929
|
+
desirableCategoriesInput.addValidator(() => {
|
|
930
|
+
if (desirableCategoriesInput!.value == null || desirableCategoriesInput!.value.length === 0)
|
|
931
|
+
return 'Select at least one preferable category.';
|
|
932
|
+
if (desInput.value != null && desirableCategoriesInput!.value.length === desInput.value.categories.length)
|
|
933
|
+
return 'At least one category must be non-preferable.';
|
|
934
|
+
return null;
|
|
935
|
+
});
|
|
936
|
+
|
|
937
|
+
desOptionsInputDiv.append(desirableCategoriesInput.root);
|
|
938
|
+
}
|
|
939
|
+
}; // updateDesirableCategoriesInput
|
|
940
|
+
|
|
941
|
+
setVisibilityOfDesirabilityAuxInputs(desInput.value!);
|
|
942
|
+
|
|
943
|
+
// Settings inputs
|
|
944
|
+
|
|
727
945
|
const header = ui.h2('Settings');
|
|
728
946
|
form.append(header);
|
|
729
947
|
ui.tooltip.bind(header, 'Settings of the pMPO model.');
|
|
@@ -744,25 +962,52 @@ export class Pmpo {
|
|
|
744
962
|
// Flag indicating whether optimal parameters from auto-tuning are currently used
|
|
745
963
|
let areTunedSettingsUsed = false;
|
|
746
964
|
|
|
965
|
+
// Auto-tune parameters and run computations; if auto-tune is not applicable, just run computations with current settings
|
|
747
966
|
const setOptimalParametersAndRun = async () => {
|
|
967
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
968
|
+
|
|
969
|
+
if (!areInputsValid())
|
|
970
|
+
return;
|
|
971
|
+
|
|
748
972
|
if (!areTunedSettingsUsed) {
|
|
749
973
|
const optimalSettings = await this.getOptimalSettings(
|
|
750
974
|
DG.DataFrame.fromColumns(descrInput.value).columns,
|
|
751
|
-
|
|
975
|
+
getDesirabilityColumn(),
|
|
752
976
|
useSigmoidInput.value,
|
|
753
977
|
);
|
|
754
978
|
|
|
755
|
-
if (optimalSettings.success) {
|
|
979
|
+
if (optimalSettings.state === 'success') {
|
|
756
980
|
pInput.value = Math.max(optimalSettings.pValTresh, P_VAL_TRES_MIN);
|
|
757
981
|
rInput.value = Math.max(optimalSettings.r2Tresh, R2_MIN);
|
|
758
982
|
qInput.value = Math.max(optimalSettings.qCutoff, Q_CUTOFF_MIN);
|
|
759
983
|
areTunedSettingsUsed = true;
|
|
984
|
+
runComputations();
|
|
760
985
|
} else
|
|
761
|
-
|
|
762
|
-
|
|
986
|
+
grok.shell.warning(optimalSettings.msg);
|
|
987
|
+
/*descrInput.input.classList.add('d4-invalid');
|
|
988
|
+
desInput.input.classList.add('d4-invalid');
|
|
989
|
+
ui.tooltip.bind(descrInput.input, optimalSettings.msg);
|
|
990
|
+
ui.tooltip.bind(desInput.input, optimalSettings.msg);*/
|
|
991
|
+
} else
|
|
992
|
+
runComputations();
|
|
993
|
+
}; // setOptimalParametersAndRun
|
|
763
994
|
|
|
764
|
-
|
|
765
|
-
|
|
995
|
+
// Validates all inputs before running computations using registered validators
|
|
996
|
+
const areInputsValid = (): boolean => {
|
|
997
|
+
const results = [
|
|
998
|
+
descrInput.validate(),
|
|
999
|
+
desInput.validate(),
|
|
1000
|
+
desirabilityThresholdInput.validate(),
|
|
1001
|
+
pInput.validate(),
|
|
1002
|
+
rInput.validate(),
|
|
1003
|
+
qInput.validate(),
|
|
1004
|
+
];
|
|
1005
|
+
|
|
1006
|
+
if (desirableCategoriesInput != null)
|
|
1007
|
+
results.push(desirableCategoriesInput.validate());
|
|
1008
|
+
|
|
1009
|
+
return results.every((r) => r);
|
|
1010
|
+
}; // areInputsValid
|
|
766
1011
|
|
|
767
1012
|
const checkAutoTuneAndRun = () => {
|
|
768
1013
|
if (autoTuneInput.value)
|
|
@@ -784,6 +1029,8 @@ export class Pmpo {
|
|
|
784
1029
|
// If auto-tuning is turned on, set optimal parameters and run computations
|
|
785
1030
|
if (value)
|
|
786
1031
|
await setOptimalParametersAndRun();
|
|
1032
|
+
else
|
|
1033
|
+
runComputations();
|
|
787
1034
|
},
|
|
788
1035
|
});
|
|
789
1036
|
form.append(autoTuneInput.root);
|
|
@@ -810,6 +1057,14 @@ export class Pmpo {
|
|
|
810
1057
|
});
|
|
811
1058
|
form.append(pInput.root);
|
|
812
1059
|
|
|
1060
|
+
pInput.addValidator(() => {
|
|
1061
|
+
if (pInput.value == null)
|
|
1062
|
+
return 'P-value is required.';
|
|
1063
|
+
if (pInput.value < P_VAL_TRES_MIN || pInput.value > P_VAL_TRES_MAX)
|
|
1064
|
+
return `P-value must be between ${P_VAL_TRES_MIN} and ${P_VAL_TRES_MAX}.`;
|
|
1065
|
+
return null;
|
|
1066
|
+
});
|
|
1067
|
+
|
|
813
1068
|
// R² threshold input
|
|
814
1069
|
const rInput = ui.input.float('R²', {
|
|
815
1070
|
// @ts-ignore
|
|
@@ -834,6 +1089,14 @@ export class Pmpo {
|
|
|
834
1089
|
});
|
|
835
1090
|
form.append(rInput.root);
|
|
836
1091
|
|
|
1092
|
+
rInput.addValidator(() => {
|
|
1093
|
+
if (rInput.value == null)
|
|
1094
|
+
return 'R² is required.';
|
|
1095
|
+
if (rInput.value < R2_MIN || rInput.value > R2_MAX)
|
|
1096
|
+
return `R² must be between ${R2_MIN} and ${R2_MAX}.`;
|
|
1097
|
+
return null;
|
|
1098
|
+
});
|
|
1099
|
+
|
|
837
1100
|
// q-cutoff input
|
|
838
1101
|
const qInput = ui.input.float('q-cutoff', {
|
|
839
1102
|
// @ts-ignore
|
|
@@ -857,6 +1120,14 @@ export class Pmpo {
|
|
|
857
1120
|
});
|
|
858
1121
|
form.append(qInput.root);
|
|
859
1122
|
|
|
1123
|
+
qInput.addValidator(() => {
|
|
1124
|
+
if (qInput.value == null)
|
|
1125
|
+
return 'Q-cutoff is required.';
|
|
1126
|
+
if (qInput.value < Q_CUTOFF_MIN || qInput.value > Q_CUTOFF_MAX)
|
|
1127
|
+
return `Q-cutoff must be between ${Q_CUTOFF_MIN} and ${Q_CUTOFF_MAX}.`;
|
|
1128
|
+
return null;
|
|
1129
|
+
});
|
|
1130
|
+
|
|
860
1131
|
const setEnability = (toEnable: boolean) => {
|
|
861
1132
|
pInput.enabled = toEnable;
|
|
862
1133
|
rInput.enabled = toEnable;
|
|
@@ -894,24 +1165,110 @@ export class Pmpo {
|
|
|
894
1165
|
};
|
|
895
1166
|
} // getInputForm
|
|
896
1167
|
|
|
897
|
-
/**
|
|
898
|
-
|
|
1168
|
+
/** Validates all pMPO inputs and returns structured errors without mutating the DOM */
|
|
1169
|
+
static validateInputs(params: {
|
|
1170
|
+
descriptors: DG.Column[] | null,
|
|
1171
|
+
desirability: DG.Column | null,
|
|
1172
|
+
threshold: number | null,
|
|
1173
|
+
sign: EQUALITY_SIGN,
|
|
1174
|
+
desirableCategories: string[] | null,
|
|
1175
|
+
pValue: number | null,
|
|
1176
|
+
r2: number | null,
|
|
1177
|
+
qCutoff: number | null,
|
|
1178
|
+
}): PmpoValidationResult {
|
|
1179
|
+
const errors = new Map<PmpoInputId, TooltipContent>();
|
|
1180
|
+
const {descriptors, desirability, threshold, sign, desirableCategories, pValue, r2, qCutoff} = params;
|
|
1181
|
+
|
|
1182
|
+
// Settings null or out of range
|
|
1183
|
+
if (pValue == null || r2 == null || qCutoff == null)
|
|
1184
|
+
return {valid: false, errors};
|
|
1185
|
+
|
|
1186
|
+
if ((pValue <= 0) || (pValue > 1) || (r2 < 0) || (r2 > 1) || (qCutoff <= 0) || (qCutoff > 1))
|
|
1187
|
+
return {valid: false, errors};
|
|
1188
|
+
|
|
1189
|
+
// Column inputs null
|
|
1190
|
+
if (descriptors == null || desirability == null)
|
|
1191
|
+
return {valid: false, errors};
|
|
1192
|
+
|
|
1193
|
+
// At least one descriptor
|
|
1194
|
+
if (descriptors.length < 1) {
|
|
1195
|
+
errors.set('descriptors', 'Select at least one descriptor column.');
|
|
1196
|
+
return {valid: false, errors};
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// Desirability column must not be among descriptors
|
|
1200
|
+
if (descriptors.includes(desirability)) {
|
|
1201
|
+
const msg = 'Desirability column cannot be used as a descriptor.';
|
|
1202
|
+
errors.set('descriptors', msg);
|
|
1203
|
+
errors.set('desirability', msg);
|
|
1204
|
+
return {valid: false, errors};
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
// No zero-variance descriptor columns
|
|
1208
|
+
const zeroStdevCols = descriptors.filter((col) => col.stats.stdev === 0).map((col) => col.name);
|
|
1209
|
+
if (zeroStdevCols.length > 0)
|
|
1210
|
+
errors.set('descriptors', () => ui.markdown(`Descriptor columns with zero variance cannot be used: **${zeroStdevCols.join(', ')}**`));
|
|
1211
|
+
|
|
1212
|
+
// No all-null descriptor columns
|
|
1213
|
+
const nullCols = descriptors.filter((col) => col.stats.missingValueCount === col.length).map((col) => col.name);
|
|
1214
|
+
if (nullCols.length > 0)
|
|
1215
|
+
errors.set('descriptors', () => ui.markdown(`Descriptor columns with only missing values cannot be used: **${nullCols.join(', ')}**`));
|
|
1216
|
+
|
|
1217
|
+
// Validate desirability column based on its type
|
|
1218
|
+
if (desirability.type === DG.COLUMN_TYPE.BOOL) {
|
|
1219
|
+
if (desirability.stats.stdev === 0)
|
|
1220
|
+
errors.set('desirability', 'All desirability values are the same - scoring is not feasible.');
|
|
1221
|
+
} else if (desirability.type === DG.COLUMN_TYPE.STRING) {
|
|
1222
|
+
const catsCount = desirability.categories.length;
|
|
1223
|
+
const selectedCatsCount = desirableCategories?.length ?? 0;
|
|
1224
|
+
|
|
1225
|
+
if (catsCount < 2)
|
|
1226
|
+
errors.set('desirability', 'String desirability column must have at least 2 categories.');
|
|
1227
|
+
else if (selectedCatsCount === 0)
|
|
1228
|
+
errors.set('desirability', 'Select at least one preferable category.');
|
|
1229
|
+
else if (selectedCatsCount === catsCount)
|
|
1230
|
+
errors.set('desirability', 'At least one category must be non-preferable.');
|
|
1231
|
+
} else {
|
|
1232
|
+
// Numeric desirability
|
|
1233
|
+
if (desirability.stats.stdev === 0) {
|
|
1234
|
+
errors.set('desirability',
|
|
1235
|
+
desirability.stats.missingValueCount < desirability.length ?
|
|
1236
|
+
'All desirability values are the same - scoring is not feasible.' :
|
|
1237
|
+
'Empty column cannot be used as desirability column.',
|
|
1238
|
+
);
|
|
1239
|
+
} else if (threshold == null)
|
|
1240
|
+
errors.set('desirability', 'Specify non-null desirability threshold.');
|
|
1241
|
+
else if (!isDesirabilityValid(desirability, threshold, sign)) {
|
|
1242
|
+
errors.set('desirability', () => ui.markdown(`All compounds are either desired or non-desired for
|
|
1243
|
+
<div align="center">
|
|
1244
|
+
**${desirability.name} ${sign} ${threshold}.**
|
|
1245
|
+
</div>
|
|
1246
|
+
Adjust the threshold or condition to get both groups.`));
|
|
1247
|
+
errors.set('threshold', 'Adjust the threshold to get both desired and non-desired groups.');
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
return {valid: !errors.size, errors};
|
|
1252
|
+
} // validateInputs
|
|
1253
|
+
|
|
1254
|
+
/** Retrieves acceptable desirability columns (boolean or numerical with non-zero standard deviation) from the data frame */
|
|
1255
|
+
private getDesirabilityColumns(): DG.Column[] {
|
|
899
1256
|
const res: DG.Column[] = [];
|
|
900
1257
|
|
|
901
1258
|
for (const col of this.table.columns) {
|
|
902
|
-
if ((col.type === DG.COLUMN_TYPE.BOOL)
|
|
1259
|
+
if (((col.type === DG.COLUMN_TYPE.BOOL) || (col.isNumerical) || (col.type === DG.COLUMN_TYPE.STRING)))
|
|
903
1260
|
res.push(col);
|
|
904
1261
|
}
|
|
905
1262
|
|
|
906
1263
|
return res;
|
|
907
|
-
} //
|
|
1264
|
+
} // getDesirabilityColumns
|
|
908
1265
|
|
|
909
1266
|
/** Retrieves valid (numerical, no missing values, non-zero standard deviation) numeric columns from the data frame */
|
|
910
1267
|
private getValidNumericCols(): DG.Column[] {
|
|
911
1268
|
const res: DG.Column[] = [];
|
|
912
1269
|
|
|
913
1270
|
for (const col of this.table.columns) {
|
|
914
|
-
if (
|
|
1271
|
+
if (col.isNumerical)
|
|
915
1272
|
res.push(col);
|
|
916
1273
|
}
|
|
917
1274
|
|
|
@@ -920,47 +1277,49 @@ export class Pmpo {
|
|
|
920
1277
|
|
|
921
1278
|
/** Fits the pMPO model to the given data and updates the viewers accordingly */
|
|
922
1279
|
private async getOptimalSettings(descriptors: DG.ColumnList, desirability: DG.Column, useSigmoid: boolean): Promise<OptimalPoint> {
|
|
923
|
-
const
|
|
924
|
-
pValTresh: 0,
|
|
925
|
-
r2Tresh: 0,
|
|
926
|
-
qCutoff: 0,
|
|
927
|
-
success: false,
|
|
928
|
-
};
|
|
1280
|
+
const pi = DG.TaskBarProgressIndicator.create('Optimizing... ', {cancelable: true});
|
|
929
1281
|
|
|
930
|
-
|
|
931
|
-
|
|
1282
|
+
try {
|
|
1283
|
+
const descriptorNames = descriptors.names();
|
|
1284
|
+
const {desired, nonDesired} = getDesiredTables(this.table, desirability);
|
|
932
1285
|
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
1286
|
+
// Compute descriptors' statistics
|
|
1287
|
+
const descrStats = new Map<string, DescriptorStatistics>();
|
|
1288
|
+
descriptorNames.forEach((name) => {
|
|
1289
|
+
descrStats.set(name, getDescriptorStatistics(desired.col(name)!, nonDesired.col(name)!));
|
|
1290
|
+
});
|
|
1291
|
+
const descrStatsTable = getDescriptorStatisticsTable(descrStats);
|
|
939
1292
|
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
1293
|
+
// Filter by p-value
|
|
1294
|
+
const selectedByPvalue = getFilteredByPvalue(descrStatsTable, P_VAL_TRES_DEFAULT);
|
|
1295
|
+
if (selectedByPvalue.length < 1) {
|
|
1296
|
+
pi.close();
|
|
944
1297
|
|
|
945
|
-
|
|
1298
|
+
return {
|
|
1299
|
+
pValTresh: 0,
|
|
1300
|
+
r2Tresh: 0,
|
|
1301
|
+
qCutoff: 0,
|
|
1302
|
+
state: 'failed',
|
|
1303
|
+
msg: 'No descriptors passed the p-value threshold filter.',
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
946
1306
|
|
|
947
|
-
|
|
948
|
-
// Filter by correlations
|
|
949
|
-
const selectedByCorr = getFilteredByCorrelations(descriptors, selectedByPvalue, descrStats, point[0], correlationTriples);
|
|
1307
|
+
const correlationTriples = getCorrelationTriples(descriptors, selectedByPvalue);
|
|
950
1308
|
|
|
951
|
-
|
|
952
|
-
|
|
1309
|
+
const funcToBeMinimized = (point: Float32Array) => {
|
|
1310
|
+
// Filter by correlations
|
|
1311
|
+
const selectedByCorr = getFilteredByCorrelations(descriptors, selectedByPvalue, descrStats, point[0], correlationTriples);
|
|
953
1312
|
|
|
954
|
-
|
|
955
|
-
|
|
1313
|
+
// Compute pMPO parameters - training
|
|
1314
|
+
const params = getModelParams(desired, nonDesired, selectedByCorr, point[1]);
|
|
956
1315
|
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
}; // funcToBeMinimized
|
|
1316
|
+
// Get predictions
|
|
1317
|
+
const prediction = Pmpo.predict(this.table, params, useSigmoid, this.predictionName);
|
|
960
1318
|
|
|
961
|
-
|
|
1319
|
+
// Evaluate predictions and return 1 - AUC (since optimization minimizes the function, but we want to maximize AUC)
|
|
1320
|
+
return 1 - getPmpoEvaluation(desirability, prediction).auc;
|
|
1321
|
+
}; // funcToBeMinimized
|
|
962
1322
|
|
|
963
|
-
try {
|
|
964
1323
|
const optimalResult = await optimizeNM(
|
|
965
1324
|
pi,
|
|
966
1325
|
funcToBeMinimized,
|
|
@@ -978,14 +1337,28 @@ export class Pmpo {
|
|
|
978
1337
|
pValTresh: P_VAL_TRES_DEFAULT,
|
|
979
1338
|
r2Tresh: optimalResult.optimalPoint[0],
|
|
980
1339
|
qCutoff: optimalResult.optimalPoint[1],
|
|
981
|
-
|
|
1340
|
+
state: 'success',
|
|
1341
|
+
msg: 'Optimization completed successfully.',
|
|
982
1342
|
};
|
|
983
|
-
} else
|
|
984
|
-
return
|
|
1343
|
+
} else {
|
|
1344
|
+
return {
|
|
1345
|
+
pValTresh: 0,
|
|
1346
|
+
r2Tresh: 0,
|
|
1347
|
+
qCutoff: 0,
|
|
1348
|
+
state: 'canceled',
|
|
1349
|
+
msg: 'Auto-tuning was canceled by the user.',
|
|
1350
|
+
};
|
|
1351
|
+
}
|
|
985
1352
|
} catch (err) {
|
|
986
1353
|
pi.close();
|
|
987
1354
|
|
|
988
|
-
return
|
|
1355
|
+
return {
|
|
1356
|
+
pValTresh: 0,
|
|
1357
|
+
r2Tresh: 0,
|
|
1358
|
+
qCutoff: 0,
|
|
1359
|
+
state: 'failed',
|
|
1360
|
+
msg: err instanceof Error ? err.message : 'Optimization failed due to an unexpected error.',
|
|
1361
|
+
};
|
|
989
1362
|
}
|
|
990
1363
|
} // getOptimalSettings
|
|
991
1364
|
}; // Pmpo
|