@datagrok/eda 1.4.13 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +11 -5
  2. package/dist/111.js +1 -1
  3. package/dist/111.js.map +1 -1
  4. package/dist/128.js +1 -1
  5. package/dist/128.js.map +1 -1
  6. package/dist/153.js +1 -1
  7. package/dist/153.js.map +1 -1
  8. package/dist/23.js +1 -1
  9. package/dist/23.js.map +1 -1
  10. package/dist/234.js +1 -1
  11. package/dist/234.js.map +1 -1
  12. package/dist/242.js +1 -1
  13. package/dist/242.js.map +1 -1
  14. package/dist/260.js +1 -1
  15. package/dist/260.js.map +1 -1
  16. package/dist/33.js +1 -1
  17. package/dist/33.js.map +1 -1
  18. package/dist/348.js +1 -1
  19. package/dist/348.js.map +1 -1
  20. package/dist/377.js +1 -1
  21. package/dist/377.js.map +1 -1
  22. package/dist/397.js +2 -0
  23. package/dist/397.js.map +1 -0
  24. package/dist/412.js +1 -1
  25. package/dist/412.js.map +1 -1
  26. package/dist/415.js +1 -1
  27. package/dist/415.js.map +1 -1
  28. package/dist/501.js +1 -1
  29. package/dist/501.js.map +1 -1
  30. package/dist/531.js +1 -1
  31. package/dist/531.js.map +1 -1
  32. package/dist/583.js +1 -1
  33. package/dist/583.js.map +1 -1
  34. package/dist/589.js +1 -1
  35. package/dist/589.js.map +1 -1
  36. package/dist/603.js +1 -1
  37. package/dist/603.js.map +1 -1
  38. package/dist/656.js +1 -1
  39. package/dist/656.js.map +1 -1
  40. package/dist/682.js +1 -1
  41. package/dist/682.js.map +1 -1
  42. package/dist/705.js +1 -1
  43. package/dist/705.js.map +1 -1
  44. package/dist/727.js +1 -1
  45. package/dist/727.js.map +1 -1
  46. package/dist/731.js +1 -1
  47. package/dist/731.js.map +1 -1
  48. package/dist/738.js +1 -1
  49. package/dist/738.js.map +1 -1
  50. package/dist/763.js +1 -1
  51. package/dist/763.js.map +1 -1
  52. package/dist/778.js +1 -1
  53. package/dist/778.js.map +1 -1
  54. package/dist/783.js +1 -1
  55. package/dist/783.js.map +1 -1
  56. package/dist/793.js +1 -1
  57. package/dist/793.js.map +1 -1
  58. package/dist/810.js +1 -1
  59. package/dist/810.js.map +1 -1
  60. package/dist/860.js +1 -1
  61. package/dist/860.js.map +1 -1
  62. package/dist/907.js +1 -1
  63. package/dist/907.js.map +1 -1
  64. package/dist/950.js +1 -1
  65. package/dist/950.js.map +1 -1
  66. package/dist/980.js +1 -1
  67. package/dist/980.js.map +1 -1
  68. package/dist/990.js +1 -1
  69. package/dist/990.js.map +1 -1
  70. package/dist/package-test.js +1 -1
  71. package/dist/package-test.js.map +1 -1
  72. package/dist/package.js +1 -1
  73. package/dist/package.js.map +1 -1
  74. package/package.json +5 -5
  75. package/src/package.ts +2 -1
  76. package/src/pareto-optimization/pareto-optimizer.ts +1 -1
  77. package/src/pls/pls-constants.ts +8 -1
  78. package/src/pls/pls-tools.ts +176 -74
  79. package/src/probabilistic-scoring/data-generator.ts +48 -3
  80. package/src/probabilistic-scoring/pmpo-defs.ts +30 -2
  81. package/src/probabilistic-scoring/pmpo-utils.ts +143 -52
  82. package/src/probabilistic-scoring/prob-scoring.ts +477 -104
  83. package/src/probabilistic-scoring/stat-tools.ts +1 -1
  84. package/src/tests/pareto-tests.ts +13 -15
  85. package/src/tests/pmpo-tests.ts +643 -3
  86. package/test-console-output-1.log +224 -86
  87. package/test-record-1.mp4 +0 -0
@@ -1,6 +1,6 @@
1
1
  /* eslint-disable max-len */
2
2
  // Probabilistic scoring (pMPO) features
3
- // Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
3
+ // Source paper https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
4
4
 
5
5
  import * as grok from 'datagrok-api/grok';
6
6
  import * as ui from 'datagrok-api/ui';
@@ -12,17 +12,24 @@ import '../../css/pmpo.css';
12
12
 
13
13
  import {getDesiredTables, getDescriptorStatistics, getBoolPredictionColumn, getPmpoEvaluation} from './stat-tools';
14
14
  import {MIN_SAMPLES_COUNT, PMPO_NON_APPLICABLE, DescriptorStatistics, P_VAL_TRES_MIN, DESCR_TITLE,
15
- R2_MIN, Q_CUTOFF_MIN, PmpoParams, SCORES_TITLE, DESCR_TABLE_TITLE, PMPO_COMPUTE_FAILED, SELECTED_TITLE,
15
+ R2_MIN, Q_CUTOFF_MIN, PmpoParams, SCORES_TITLE, DESCR_TABLE_TITLE, SELECTED_TITLE,
16
16
  P_VAL, DESIRABILITY_COL_NAME, STAT_GRID_HEIGHT, DESIRABILITY_COLUMN_WIDTH, WEIGHT_TITLE,
17
17
  P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT, USE_SIGMOID_DEFAULT, ROC_TRESHOLDS,
18
18
  FPR_TITLE, TPR_TITLE, COLORS, THRESHOLD, AUTO_TUNE_MAX_APPLICABLE_ROWS, DEFAULT_OPTIMIZATION_SETTINGS,
19
- P_VAL_TRES_MAX, R2_MAX, Q_CUTOFF_MAX, OptimalPoint, LOW_PARAMS_BOUNDS, HIGH_PARAMS_BOUNDS, FORMAT} from './pmpo-defs';
19
+ P_VAL_TRES_MAX, R2_MAX, Q_CUTOFF_MAX, OptimalPoint, LOW_PARAMS_BOUNDS, HIGH_PARAMS_BOUNDS, FORMAT,
20
+ EQUALITY_SIGN, SIGN_OPTIONS, THRESHOLDED_DESIRABILITY_COL_NAME, PMPO_COMPUTE_FAILED,
21
+ PmpoInputId, TooltipContent, PmpoValidationResult} from './pmpo-defs';
20
22
  import {addSelectedDescriptorsCol, getDescriptorStatisticsTable, getFilteredByPvalue, getFilteredByCorrelations,
21
23
  getModelParams, getDescrTooltip, saveModel, getScoreTooltip, getDesirabilityProfileJson, getCorrelationTriples,
22
- addCorrelationColumns, setPvalColumnColorCoding, setCorrColumnColorCoding, PmpoError} from './pmpo-utils';
24
+ addCorrelationColumns, setPvalColumnColorCoding, setCorrColumnColorCoding, PmpoError, getInitCol,
25
+ getBoolDesirabilityColData, isDesirabilityValid,
26
+ getDesirabilityColumnFromCategories,
27
+ getSelectedCategories} from './pmpo-utils';
23
28
  import {getOutputPalette} from '../pareto-optimization/utils';
24
29
  import {OPT_TYPE} from '../pareto-optimization/defs';
25
30
  import {optimizeNM} from './nelder-mead';
31
+ import {getMissingValsIndices} from '../missing-values-imputation/knn-imputer';
32
+ import {DesirabilityProfile} from '@datagrok-libraries/statistics/src/mpo/mpo';
26
33
 
27
34
  export type PmpoTrainingResult = {
28
35
  params: Map<string, PmpoParams>,
@@ -40,6 +47,7 @@ export type PmpoAppItems = {
40
47
  rocCurve: DG.Viewer;
41
48
  confusionMatrix: DG.Viewer;
42
49
  controls: Controls;
50
+ profile: DesirabilityProfile | null;
43
51
  };
44
52
 
45
53
  /** Class implementing probabilistic MPO (pMPO) model training and prediction */
@@ -78,12 +86,6 @@ export class Pmpo {
78
86
  return false;
79
87
  }
80
88
 
81
- // Check desirability
82
- if (desirability.type !== DG.COLUMN_TYPE.BOOL) {
83
- showWarning(`: "${desirability.name}" must be boolean column.`);
84
- return false;
85
- }
86
-
87
89
  if (desirability.stats.stdev === 0) { // TRUE & FALSE
88
90
  showWarning(`: "${desirability.name}" has a single category.`);
89
91
  return false;
@@ -98,8 +100,8 @@ export class Pmpo {
98
100
  return false;
99
101
  }
100
102
 
101
- if (col.stats.missingValueCount > 0) {
102
- showWarning(`: "${col.name}" contains missing values.`);
103
+ if (col.stats.missingValueCount === col.length) {
104
+ showWarning(`: "${col.name}" contains only missing values.`);
103
105
  return false;
104
106
  }
105
107
 
@@ -124,29 +126,20 @@ export class Pmpo {
124
126
  return false;
125
127
  }
126
128
 
127
- let boolColsCount = 0;
128
- let validNumericColsCount = 0;
129
+ let validColsCount = 0;
129
130
 
130
131
  // Check numeric columns and boolean columns
131
132
  for (const col of df.columns) {
132
- if (col.isNumerical) {
133
- if ((col.stats.missingValueCount < 1) && (col.stats.stdev > 0))
134
- ++validNumericColsCount;
135
- } else if (col.type == DG.COLUMN_TYPE.BOOL)
136
- ++boolColsCount;
137
- }
138
-
139
- // Check boolean columns count
140
- if (boolColsCount < 1) {
141
- if (toShowMsg)
142
- grok.shell.warning(PMPO_NON_APPLICABLE + ': no boolean columns.');
143
- return false;
133
+ if (col.isNumerical || (col.type === DG.TYPE.BOOL)) {
134
+ if (col.stats.stdev > 0)
135
+ ++validColsCount;
136
+ }
144
137
  }
145
138
 
146
139
  // Check valid numeric columns count
147
- if (validNumericColsCount < 1) {
140
+ if (validColsCount < 2) {
148
141
  if (toShowMsg)
149
- grok.shell.warning(PMPO_NON_APPLICABLE + ': no numeric columns without missing values and non-zero variance.');
142
+ grok.shell.warning(PMPO_NON_APPLICABLE + ': not enough of non-constant columns.');
150
143
  return false;
151
144
  }
152
145
 
@@ -210,10 +203,12 @@ export class Pmpo {
210
203
  static predict(df: DG.DataFrame, params: Map<string, PmpoParams>, useSigmoid: boolean, predictionName: string): DG.Column {
211
204
  const count = df.rowCount;
212
205
  const scores = new Float64Array(count).fill(0);
206
+ const colsWithMissingVals: DG.Column[] = [];
213
207
 
214
208
  // Compute pMPO scores (see https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
215
209
  params.forEach((param, name) => {
216
210
  const col = df.col(name);
211
+
217
212
  const b = param.b;
218
213
  const c = param.c;
219
214
  const x0 = param.cutoff;
@@ -225,6 +220,9 @@ export class Pmpo {
225
220
  if (col == null)
226
221
  throw new Error(`Failed to apply pMPO: inconsistent data, no column "${name}" in the table "${df.name}"`);
227
222
 
223
+ if (col.stats.missingValueCount > 0)
224
+ colsWithMissingVals.push(col);
225
+
228
226
  const vals = col.getRawData();
229
227
 
230
228
  if (useSigmoid) {
@@ -247,11 +245,13 @@ export class Pmpo {
247
245
  } // predict
248
246
 
249
247
  private params: Map<string, PmpoParams> | null = null;
248
+ private desirabilityProfile: DesirabilityProfile | null = null;
250
249
 
251
250
  private table: DG.DataFrame;
252
251
  private view: DG.TableView;
253
- private boolCols: DG.Column[];
252
+ private desirabilityColumns: DG.Column[];
254
253
  private numericCols: DG.Column[];
254
+ private missingValsIndeces: Map<string, number[]>;
255
255
 
256
256
  private initTable = DG.DataFrame.create();
257
257
 
@@ -262,6 +262,9 @@ export class Pmpo {
262
262
 
263
263
  private desirabilityProfileRoots = new Map<string, HTMLElement>();
264
264
 
265
+ private tresholdedColumn: DG.Column | null = null;
266
+ private threshColTooltip: string | null = null;
267
+
265
268
  private rocCurve = DG.Viewer.scatterPlot(this.initTable, {
266
269
  showTitle: true,
267
270
  showSizeSelector: false,
@@ -281,9 +284,10 @@ export class Pmpo {
281
284
  constructor(df: DG.DataFrame, view?: DG.TableView) {
282
285
  this.table = df;
283
286
  this.view = view ?? (grok.shell.tableView(df.name) ?? grok.shell.addTableView(df));
284
- this.boolCols = this.getBoolCols();
287
+ this.desirabilityColumns = this.getDesirabilityColumns();
285
288
  this.numericCols = this.getValidNumericCols();
286
289
  this.predictionName = df.columns.getUnusedName(SCORES_TITLE);
290
+ this.missingValsIndeces = getMissingValsIndices(this.numericCols);
287
291
  };
288
292
 
289
293
  /** Sets the ribbon panels in the table view (removes the first panel) */
@@ -476,7 +480,9 @@ export class Pmpo {
476
480
 
477
481
  grid.sort([this.predictionName], [false]);
478
482
 
479
- grid.col(name)!.format = '0.0000';
483
+ const scoresCol = grid.col(name);
484
+ scoresCol!.format = '0.0000';
485
+ scoresCol!.isTextColorCoded = true;
480
486
 
481
487
  // set tooltips
482
488
  grid.onCellTooltip((cell, x, y) => {
@@ -487,6 +493,12 @@ export class Pmpo {
487
493
  ui.tooltip.show(getScoreTooltip(), x, y);
488
494
 
489
495
  return true;
496
+ } else {
497
+ if (this.tresholdedColumn != null && cell.tableColumn.name === this.tresholdedColumn.name) {
498
+ ui.tooltip.show(ui.markdown(this.threshColTooltip ?? ''), x, y);
499
+
500
+ return true;
501
+ }
490
502
  }
491
503
 
492
504
  return false;
@@ -505,13 +517,15 @@ export class Pmpo {
505
517
  this.desirabilityProfileRoots.clear();
506
518
 
507
519
  const desirabilityProfile = getDesirabilityProfileJson(this.params, useSigmoidalCorrection, '', '', true);
520
+ this.desirabilityProfile = getDesirabilityProfileJson(this.params, useSigmoidalCorrection, '', '', false);
508
521
 
509
522
  // Set weights
510
523
  const descrNames = descrStatsTable.col(DESCR_TITLE)!.toList();
511
524
  const weightsRaw = descrStatsTable.col(WEIGHT_TITLE)!.getRawData();
512
525
  const props = desirabilityProfile.properties;
526
+ const names: string[] = Object.keys(props);
513
527
 
514
- for (const name of Object.keys(props))
528
+ for (const name of names)
515
529
  weightsRaw[descrNames.indexOf(name)] = props[name].weight;
516
530
 
517
531
  // Set HTML elements
@@ -523,20 +537,16 @@ export class Pmpo {
523
537
  if (rootsCol == null)
524
538
  return;
525
539
 
526
- const rows = rootsCol.querySelectorAll('div.d4-flex-row.ui-div');
527
-
528
- rows.forEach((row) => {
529
- const children = row.children;
530
- if (children.length < 2) // expecting descriptor name, weight & profile
531
- return;
540
+ const rows = rootsCol.querySelectorAll('div.d4-flex-row.ui-div.statistics-mpo-row');
532
541
 
533
- const descrDivChildren = (children[0] as HTMLElement).children;
534
- if (descrDivChildren.length < 1) // expecting 1 div with descriptor name
542
+ rows.forEach((row, idx) => {
543
+ const editor = row.querySelector('.statistics-mpo-line-editor') as HTMLElement;
544
+ if (!editor)
535
545
  return;
536
546
 
537
- const descrName = (descrDivChildren[0] as HTMLElement).innerText;
538
-
539
- this.desirabilityProfileRoots.set(descrName, children[2] as HTMLElement);
547
+ editor.style.width = '100%';
548
+ editor.style.height = '100%';
549
+ this.desirabilityProfileRoots.set(names[idx], editor);
540
550
  });
541
551
  } // updateDesirabilityProfileData
542
552
 
@@ -586,6 +596,26 @@ export class Pmpo {
586
596
  });
587
597
  } // updateConfusionMatrix
588
598
 
599
+ /** Sets null values for the predicted scores in rows with missing values in any of the descriptors */
600
+ private getIndecesOfMissingValues(colNames: string[]): number[] {
601
+ const indeces: number[] = [];
602
+
603
+ colNames.forEach((name) => {
604
+ const inds = this.missingValsIndeces.get(name);
605
+
606
+ if (inds != null)
607
+ indeces.push(...inds);
608
+ });
609
+
610
+ return indeces;
611
+ }
612
+
613
+ /** Sets null values for the predicted scores in rows with missing values in any of the descriptors */
614
+ private setNulls(scores: DG.Column, indeces: number[]): void {
615
+ const raw = scores.getRawData();
616
+ indeces.forEach((ind) => raw[ind] = DG.FLOAT_NULL);
617
+ }
618
+
589
619
  /** Fits the pMPO model to the given data and updates the viewers accordingly */
590
620
  private fitAndUpdateViewers(df: DG.DataFrame, descriptors: DG.ColumnList, desirability: DG.Column,
591
621
  pValTresh: number, r2Tresh: number, qCutoff: number, useSigmoid: boolean): void {
@@ -599,6 +629,10 @@ export class Pmpo {
599
629
 
600
630
  const prediction = Pmpo.predict(df, this.params, useSigmoid, this.predictionName);
601
631
 
632
+ // Set nulls for rows with missing values in any of the selected descriptors
633
+ const indecesOfMissingVals = this.getIndecesOfMissingValues(selectedByCorr);
634
+ this.setNulls(prediction, indecesOfMissingVals);
635
+
602
636
  // Mark predictions with a color
603
637
  prediction.colors.setLinear(getOutputPalette(OPT_TYPE.MAX), {min: prediction.stats.min, max: prediction.stats.max});
604
638
 
@@ -663,6 +697,7 @@ export class Pmpo {
663
697
  rocCurve: this.rocCurve,
664
698
  confusionMatrix: this.confusionMatrix,
665
699
  controls: this.getInputForm(false),
700
+ profile: this.desirabilityProfile,
666
701
  };
667
702
  } // getViewers
668
703
 
@@ -670,35 +705,76 @@ export class Pmpo {
670
705
  private getInputForm(addBtn: boolean): Controls {
671
706
  const form = ui.form([]);
672
707
  form.append(ui.h2('Training data'));
673
- const numericColNames = this.numericCols.map((col) => col.name);
708
+ const initDesirability = getInitCol(this.desirabilityColumns);
709
+
710
+ // returns the desirability column to be used for computations, based on the input desirability column and threshold settings
711
+ const getDesirabilityColumn = (): DG.Column => {
712
+ // remove existing thresholded column if exists
713
+ if (this.tresholdedColumn != null) {
714
+ this.table.columns.remove(this.tresholdedColumn.name);
715
+ this.tresholdedColumn = null;
716
+ }
717
+
718
+ if (desInput.value!.type === DG.COLUMN_TYPE.BOOL)
719
+ return desInput.value!;
720
+
721
+ const boolDesirabilityData = (desInput.value!.type === DG.COLUMN_TYPE.STRING) ?
722
+ getDesirabilityColumnFromCategories(desInput.value!, desirableCategoriesInput!.value!) :
723
+ getBoolDesirabilityColData(
724
+ desInput.value!,
725
+ desirabilityThresholdInput.value!,
726
+ signInput.value as EQUALITY_SIGN,
727
+ );
728
+
729
+ this.tresholdedColumn = boolDesirabilityData.column;
730
+ this.threshColTooltip = boolDesirabilityData.tooltip;
731
+
732
+ this.tresholdedColumn.name = this.table.columns.getUnusedName(THRESHOLDED_DESIRABILITY_COL_NAME);
733
+ this.table.columns.add(this.tresholdedColumn);
734
+
735
+ return this.tresholdedColumn;
736
+ }; // getDesirabilityColumn
674
737
 
675
738
  // Function to run computations on input changes
676
739
  const runComputations = () => {
677
- try {
678
- //grok.shell.info('Running...');
740
+ if (!areInputsValid())
741
+ return;
679
742
 
743
+ try {
680
744
  this.fitAndUpdateViewers(
681
745
  this.table,
682
746
  DG.DataFrame.fromColumns(descrInput.value).columns,
683
- this.table.col(desInput.value!)!,
747
+ getDesirabilityColumn(),
684
748
  pInput.value!,
685
749
  rInput.value!,
686
750
  qInput.value!,
687
751
  useSigmoidInput.value,
688
752
  );
689
753
  } catch (err) {
690
- err instanceof PmpoError ?
691
- grok.shell.warning(err.message) :
692
- grok.shell.error(err instanceof Error ? err.message : PMPO_COMPUTE_FAILED + ': the platform issue.');
754
+ if (err instanceof PmpoError) {
755
+ grok.shell.warning(err.message);
756
+ ui.tooltip.bind(desInput.input, err.message);
757
+ ui.tooltip.bind(descrInput.input, err.message);
758
+ } else {
759
+ const msg = err instanceof Error ? err.message : PMPO_COMPUTE_FAILED + ': the platform issue.';
760
+ grok.shell.error(msg);
761
+ ui.tooltip.bind(desInput.input, msg);
762
+ ui.tooltip.bind(descrInput.input, msg);
763
+ };
764
+
765
+ desInput.input.classList.add('d4-invalid');
766
+ descrInput.input.classList.add('d4-invalid');
693
767
  }
694
- };
768
+ }; // runComputations
695
769
 
696
770
  // Descriptor columns input
697
771
  const descrInput = ui.input.columns('Descriptors', {
698
772
  table: this.table,
699
773
  nullable: false,
700
- available: numericColNames,
701
- checked: numericColNames,
774
+ available: this.numericCols.map((col) => col.name),
775
+ checked: this.numericCols.filter((col) => {
776
+ return (col.name !== initDesirability.name) && (col.stats.stdev > 0) && (col.stats.missingValueCount < col.length);
777
+ }).map((col) => col.name),
702
778
  tooltipText: 'Descriptor columns used for model construction.',
703
779
  onValueChanged: (value) => {
704
780
  if (value != null) {
@@ -709,21 +785,163 @@ export class Pmpo {
709
785
  });
710
786
  form.append(descrInput.root);
711
787
 
712
- // Desirability column input
713
- const desInput = ui.input.choice('Desirability', {
788
+ descrInput.addValidator(() => {
789
+ if (descrInput.value == null || descrInput.value.length < 1)
790
+ return 'Select at least one descriptor column.';
791
+ if (desInput.value != null && descrInput.value.includes(desInput.value))
792
+ return 'Desirability column cannot be used as a descriptor.';
793
+ const zeroStdevCols = descrInput.value.filter((col) => col.stats.stdev === 0).map((col) => col.name);
794
+ if (zeroStdevCols.length > 0)
795
+ return `Descriptor columns with zero variance: ${zeroStdevCols.join(', ')}`;
796
+ const nullCols = descrInput.value.filter((col) => col.stats.missingValueCount === col.length).map((col) => col.name);
797
+ if (nullCols.length > 0)
798
+ return `Descriptor columns with only missing values: ${nullCols.join(', ')}`;
799
+ return null;
800
+ });
801
+
802
+ // Desirability column input and related controls
803
+ const setVisibilityOfDesirabilityAuxInputs = (value: DG.Column) => {
804
+ if (value.type === DG.COLUMN_TYPE.BOOL)
805
+ desOptionsInputDiv.hidden = true;
806
+ else {
807
+ desOptionsInputDiv.hidden = false;
808
+ const isString = (value.type === DG.COLUMN_TYPE.STRING);
809
+ desirabilityThresholdInput.root.hidden = isString;
810
+ signInput.root.hidden = isString;
811
+ }
812
+ }; // setVisibilityOfDesirabilityAuxInputs
813
+
814
+ const desInput = ui.input.column('Desirability', {
714
815
  nullable: false,
715
- value: this.boolCols[0].name,
716
- items: this.boolCols.map((col) => col.name),
816
+ value: initDesirability,
817
+ table: this.table,
818
+ filter: (col) => this.desirabilityColumns.includes(col),
717
819
  tooltipText: 'Desirability column.',
718
820
  onValueChanged: (value) => {
719
821
  if (value != null) {
822
+ updateDesirableCategoriesInput();
823
+ setVisibilityOfDesirabilityAuxInputs(value);
824
+ areComputationsBlocked = true;
825
+ desirabilityThresholdInput.value = Math.round(value.stats.avg * 100) / 100;
826
+ areComputationsBlocked = false;
720
827
  areTunedSettingsUsed = false;
721
828
  checkAutoTuneAndRun();
722
829
  }
723
- },
830
+ }, // onValueChanged
724
831
  });
725
832
  form.append(desInput.root);
726
833
 
834
+ desInput.addValidator(() => {
835
+ if (desInput.value == null)
836
+ return 'Select a desirability column.';
837
+ if (descrInput.value != null && descrInput.value.includes(desInput.value))
838
+ return 'Desirability column cannot be used as a descriptor.';
839
+ if (desInput.value.type === DG.COLUMN_TYPE.BOOL) {
840
+ if (desInput.value.stats.stdev === 0)
841
+ return 'All desirability values are the same - scoring is not feasible.';
842
+ } else if (desInput.value.type === DG.COLUMN_TYPE.STRING) {
843
+ if (desInput.value.categories.length < 2)
844
+ return 'String desirability column must have at least 2 categories.';
845
+ } else {
846
+ if (desInput.value.stats.stdev === 0) {
847
+ return desInput.value.stats.missingValueCount < desInput.value.length ?
848
+ 'All desirability values are the same - scoring is not feasible.' :
849
+ 'Empty column cannot be used as desirability column.';
850
+ }
851
+ if (desirabilityThresholdInput.value == null)
852
+ return 'Specify non-null desirability threshold.';
853
+ if (!isDesirabilityValid(desInput.value, desirabilityThresholdInput.value, signInput.value as EQUALITY_SIGN)) {
854
+ return `All compounds are either desired or non-desired for ${desInput.value.name} ` +
855
+ `${signInput.value} ${desirabilityThresholdInput.value}. Adjust the threshold or condition.`;
856
+ }
857
+ }
858
+ return null;
859
+ });
860
+
861
+ let areComputationsBlocked = false;
862
+
863
+ const signInput = ui.input.choice('Condition', {
864
+ value: EQUALITY_SIGN.DEFAULT,
865
+ items: SIGN_OPTIONS,
866
+ nullable: false,
867
+ tooltipText: 'How to compare numeric Desirability column values against the threshold.',
868
+ onValueChanged: (_value) => {
869
+ areTunedSettingsUsed = false;
870
+ checkAutoTuneAndRun();
871
+ },
872
+ });
873
+
874
+ const desirabilityThresholdInput = ui.input.float('Threshold', {
875
+ value: Math.round(initDesirability.stats.avg * 100) / 100,
876
+ nullable: false,
877
+ tooltipText: 'Boundary value that separates desired from non-desired compounds.',
878
+ format: '0.00',
879
+ onValueChanged: (value) => {
880
+ if (value != null) {
881
+ if (areComputationsBlocked)
882
+ return;
883
+ areTunedSettingsUsed = false;
884
+ checkAutoTuneAndRun();
885
+ }
886
+ },
887
+ });
888
+
889
+ desirabilityThresholdInput.addValidator(() => {
890
+ if (desInput.value == null || desInput.value.type === DG.COLUMN_TYPE.BOOL ||
891
+ desInput.value.type === DG.COLUMN_TYPE.STRING)
892
+ return null;
893
+ if (desirabilityThresholdInput.value == null)
894
+ return 'Specify non-null desirability threshold.';
895
+ if (!isDesirabilityValid(desInput.value, desirabilityThresholdInput.value, signInput.value as EQUALITY_SIGN))
896
+ return 'Adjust the threshold to get both desired and non-desired groups.';
897
+ return null;
898
+ });
899
+
900
+ const desOptionsInputDiv = ui.divV([signInput.root, desirabilityThresholdInput.root]);
901
+
902
+ form.append(desOptionsInputDiv);
903
+
904
+ let desirableCategoriesInput: DG.InputBase<string[] | null> | null = null;
905
+
906
+ // For string columns - input for selecting which categories are considered desirable
907
+ const updateDesirableCategoriesInput = () => {
908
+ if (desirableCategoriesInput != null) {
909
+ desirableCategoriesInput.root.remove();
910
+ desirableCategoriesInput = null;
911
+ }
912
+
913
+ if (desInput.value?.type === DG.COLUMN_TYPE.STRING) {
914
+ desirableCategoriesInput = ui.input.multiChoice('Preferred', {
915
+ value: getSelectedCategories(desInput.value!.categories),
916
+ items: desInput.value!.categories,
917
+ nullable: false,
918
+ tooltipText: 'Select which categories should be treated as desirable.',
919
+ onValueChanged: (value) => {
920
+ if (value != null) {
921
+ if (areComputationsBlocked)
922
+ return;
923
+ areTunedSettingsUsed = false;
924
+ checkAutoTuneAndRun();
925
+ }
926
+ },
927
+ });
928
+
929
+ desirableCategoriesInput.addValidator(() => {
930
+ if (desirableCategoriesInput!.value == null || desirableCategoriesInput!.value.length === 0)
931
+ return 'Select at least one preferable category.';
932
+ if (desInput.value != null && desirableCategoriesInput!.value.length === desInput.value.categories.length)
933
+ return 'At least one category must be non-preferable.';
934
+ return null;
935
+ });
936
+
937
+ desOptionsInputDiv.append(desirableCategoriesInput.root);
938
+ }
939
+ }; // updateDesirableCategoriesInput
940
+
941
+ setVisibilityOfDesirabilityAuxInputs(desInput.value!);
942
+
943
+ // Settings inputs
944
+
727
945
  const header = ui.h2('Settings');
728
946
  form.append(header);
729
947
  ui.tooltip.bind(header, 'Settings of the pMPO model.');
@@ -744,25 +962,52 @@ export class Pmpo {
744
962
  // Flag indicating whether optimal parameters from auto-tuning are currently used
745
963
  let areTunedSettingsUsed = false;
746
964
 
965
+ // Auto-tune parameters and run computations; if auto-tune is not applicable, just run computations with current settings
747
966
  const setOptimalParametersAndRun = async () => {
967
+ await new Promise((resolve) => setTimeout(resolve, 50));
968
+
969
+ if (!areInputsValid())
970
+ return;
971
+
748
972
  if (!areTunedSettingsUsed) {
749
973
  const optimalSettings = await this.getOptimalSettings(
750
974
  DG.DataFrame.fromColumns(descrInput.value).columns,
751
- this.table.col(desInput.value!)!,
975
+ getDesirabilityColumn(),
752
976
  useSigmoidInput.value,
753
977
  );
754
978
 
755
- if (optimalSettings.success) {
979
+ if (optimalSettings.state === 'success') {
756
980
  pInput.value = Math.max(optimalSettings.pValTresh, P_VAL_TRES_MIN);
757
981
  rInput.value = Math.max(optimalSettings.r2Tresh, R2_MIN);
758
982
  qInput.value = Math.max(optimalSettings.qCutoff, Q_CUTOFF_MIN);
759
983
  areTunedSettingsUsed = true;
984
+ runComputations();
760
985
  } else
761
- autoTuneInput.value = false; // revert to manual mode if optimization failed
762
- }
986
+ grok.shell.warning(optimalSettings.msg);
987
+ /*descrInput.input.classList.add('d4-invalid');
988
+ desInput.input.classList.add('d4-invalid');
989
+ ui.tooltip.bind(descrInput.input, optimalSettings.msg);
990
+ ui.tooltip.bind(desInput.input, optimalSettings.msg);*/
991
+ } else
992
+ runComputations();
993
+ }; // setOptimalParametersAndRun
763
994
 
764
- runComputations();
765
- };
995
+ // Validates all inputs before running computations using registered validators
996
+ const areInputsValid = (): boolean => {
997
+ const results = [
998
+ descrInput.validate(),
999
+ desInput.validate(),
1000
+ desirabilityThresholdInput.validate(),
1001
+ pInput.validate(),
1002
+ rInput.validate(),
1003
+ qInput.validate(),
1004
+ ];
1005
+
1006
+ if (desirableCategoriesInput != null)
1007
+ results.push(desirableCategoriesInput.validate());
1008
+
1009
+ return results.every((r) => r);
1010
+ }; // areInputsValid
766
1011
 
767
1012
  const checkAutoTuneAndRun = () => {
768
1013
  if (autoTuneInput.value)
@@ -784,6 +1029,8 @@ export class Pmpo {
784
1029
  // If auto-tuning is turned on, set optimal parameters and run computations
785
1030
  if (value)
786
1031
  await setOptimalParametersAndRun();
1032
+ else
1033
+ runComputations();
787
1034
  },
788
1035
  });
789
1036
  form.append(autoTuneInput.root);
@@ -810,6 +1057,14 @@ export class Pmpo {
810
1057
  });
811
1058
  form.append(pInput.root);
812
1059
 
1060
+ pInput.addValidator(() => {
1061
+ if (pInput.value == null)
1062
+ return 'P-value is required.';
1063
+ if (pInput.value < P_VAL_TRES_MIN || pInput.value > P_VAL_TRES_MAX)
1064
+ return `P-value must be between ${P_VAL_TRES_MIN} and ${P_VAL_TRES_MAX}.`;
1065
+ return null;
1066
+ });
1067
+
813
1068
  // R² threshold input
814
1069
  const rInput = ui.input.float('R²', {
815
1070
  // @ts-ignore
@@ -834,6 +1089,14 @@ export class Pmpo {
834
1089
  });
835
1090
  form.append(rInput.root);
836
1091
 
1092
+ rInput.addValidator(() => {
1093
+ if (rInput.value == null)
1094
+ return 'R² is required.';
1095
+ if (rInput.value < R2_MIN || rInput.value > R2_MAX)
1096
+ return `R² must be between ${R2_MIN} and ${R2_MAX}.`;
1097
+ return null;
1098
+ });
1099
+
837
1100
  // q-cutoff input
838
1101
  const qInput = ui.input.float('q-cutoff', {
839
1102
  // @ts-ignore
@@ -857,6 +1120,14 @@ export class Pmpo {
857
1120
  });
858
1121
  form.append(qInput.root);
859
1122
 
1123
+ qInput.addValidator(() => {
1124
+ if (qInput.value == null)
1125
+ return 'Q-cutoff is required.';
1126
+ if (qInput.value < Q_CUTOFF_MIN || qInput.value > Q_CUTOFF_MAX)
1127
+ return `Q-cutoff must be between ${Q_CUTOFF_MIN} and ${Q_CUTOFF_MAX}.`;
1128
+ return null;
1129
+ });
1130
+
860
1131
  const setEnability = (toEnable: boolean) => {
861
1132
  pInput.enabled = toEnable;
862
1133
  rInput.enabled = toEnable;
@@ -894,24 +1165,110 @@ export class Pmpo {
894
1165
  };
895
1166
  } // getInputForm
896
1167
 
897
- /** Retrieves boolean columns from the data frame */
898
- private getBoolCols(): DG.Column[] {
1168
+ /** Validates all pMPO inputs and returns structured errors without mutating the DOM */
1169
+ static validateInputs(params: {
1170
+ descriptors: DG.Column[] | null,
1171
+ desirability: DG.Column | null,
1172
+ threshold: number | null,
1173
+ sign: EQUALITY_SIGN,
1174
+ desirableCategories: string[] | null,
1175
+ pValue: number | null,
1176
+ r2: number | null,
1177
+ qCutoff: number | null,
1178
+ }): PmpoValidationResult {
1179
+ const errors = new Map<PmpoInputId, TooltipContent>();
1180
+ const {descriptors, desirability, threshold, sign, desirableCategories, pValue, r2, qCutoff} = params;
1181
+
1182
+ // Settings null or out of range
1183
+ if (pValue == null || r2 == null || qCutoff == null)
1184
+ return {valid: false, errors};
1185
+
1186
+ if ((pValue <= 0) || (pValue > 1) || (r2 < 0) || (r2 > 1) || (qCutoff <= 0) || (qCutoff > 1))
1187
+ return {valid: false, errors};
1188
+
1189
+ // Column inputs null
1190
+ if (descriptors == null || desirability == null)
1191
+ return {valid: false, errors};
1192
+
1193
+ // At least one descriptor
1194
+ if (descriptors.length < 1) {
1195
+ errors.set('descriptors', 'Select at least one descriptor column.');
1196
+ return {valid: false, errors};
1197
+ }
1198
+
1199
+ // Desirability column must not be among descriptors
1200
+ if (descriptors.includes(desirability)) {
1201
+ const msg = 'Desirability column cannot be used as a descriptor.';
1202
+ errors.set('descriptors', msg);
1203
+ errors.set('desirability', msg);
1204
+ return {valid: false, errors};
1205
+ }
1206
+
1207
+ // No zero-variance descriptor columns
1208
+ const zeroStdevCols = descriptors.filter((col) => col.stats.stdev === 0).map((col) => col.name);
1209
+ if (zeroStdevCols.length > 0)
1210
+ errors.set('descriptors', () => ui.markdown(`Descriptor columns with zero variance cannot be used: **${zeroStdevCols.join(', ')}**`));
1211
+
1212
+ // No all-null descriptor columns
1213
+ const nullCols = descriptors.filter((col) => col.stats.missingValueCount === col.length).map((col) => col.name);
1214
+ if (nullCols.length > 0)
1215
+ errors.set('descriptors', () => ui.markdown(`Descriptor columns with only missing values cannot be used: **${nullCols.join(', ')}**`));
1216
+
1217
+ // Validate desirability column based on its type
1218
+ if (desirability.type === DG.COLUMN_TYPE.BOOL) {
1219
+ if (desirability.stats.stdev === 0)
1220
+ errors.set('desirability', 'All desirability values are the same - scoring is not feasible.');
1221
+ } else if (desirability.type === DG.COLUMN_TYPE.STRING) {
1222
+ const catsCount = desirability.categories.length;
1223
+ const selectedCatsCount = desirableCategories?.length ?? 0;
1224
+
1225
+ if (catsCount < 2)
1226
+ errors.set('desirability', 'String desirability column must have at least 2 categories.');
1227
+ else if (selectedCatsCount === 0)
1228
+ errors.set('desirability', 'Select at least one preferable category.');
1229
+ else if (selectedCatsCount === catsCount)
1230
+ errors.set('desirability', 'At least one category must be non-preferable.');
1231
+ } else {
1232
+ // Numeric desirability
1233
+ if (desirability.stats.stdev === 0) {
1234
+ errors.set('desirability',
1235
+ desirability.stats.missingValueCount < desirability.length ?
1236
+ 'All desirability values are the same - scoring is not feasible.' :
1237
+ 'Empty column cannot be used as desirability column.',
1238
+ );
1239
+ } else if (threshold == null)
1240
+ errors.set('desirability', 'Specify non-null desirability threshold.');
1241
+ else if (!isDesirabilityValid(desirability, threshold, sign)) {
1242
+ errors.set('desirability', () => ui.markdown(`All compounds are either desired or non-desired for
1243
+ <div align="center">
1244
+ **${desirability.name} ${sign} ${threshold}.**
1245
+ </div>
1246
+ Adjust the threshold or condition to get both groups.`));
1247
+ errors.set('threshold', 'Adjust the threshold to get both desired and non-desired groups.');
1248
+ }
1249
+ }
1250
+
1251
+ return {valid: !errors.size, errors};
1252
+ } // validateInputs
1253
+
1254
+ /** Retrieves acceptable desirability columns (boolean or numerical with non-zero standard deviation) from the data frame */
1255
+ private getDesirabilityColumns(): DG.Column[] {
899
1256
  const res: DG.Column[] = [];
900
1257
 
901
1258
  for (const col of this.table.columns) {
902
- if ((col.type === DG.COLUMN_TYPE.BOOL) && (col.stats.stdev > 0))
1259
+ if (((col.type === DG.COLUMN_TYPE.BOOL) || (col.isNumerical) || (col.type === DG.COLUMN_TYPE.STRING)))
903
1260
  res.push(col);
904
1261
  }
905
1262
 
906
1263
  return res;
907
- } // getBoolCols
1264
+ } // getDesirabilityColumns
908
1265
 
909
1266
  /** Retrieves valid (numerical, no missing values, non-zero standard deviation) numeric columns from the data frame */
910
1267
  private getValidNumericCols(): DG.Column[] {
911
1268
  const res: DG.Column[] = [];
912
1269
 
913
1270
  for (const col of this.table.columns) {
914
- if ((col.isNumerical) && (col.stats.missingValueCount < 1) && (col.stats.stdev > 0))
1271
+ if (col.isNumerical)
915
1272
  res.push(col);
916
1273
  }
917
1274
 
@@ -920,47 +1277,49 @@ export class Pmpo {
920
1277
 
921
1278
  /** Fits the pMPO model to the given data and updates the viewers accordingly */
922
1279
  private async getOptimalSettings(descriptors: DG.ColumnList, desirability: DG.Column, useSigmoid: boolean): Promise<OptimalPoint> {
923
- const failedResult: OptimalPoint = {
924
- pValTresh: 0,
925
- r2Tresh: 0,
926
- qCutoff: 0,
927
- success: false,
928
- };
1280
+ const pi = DG.TaskBarProgressIndicator.create('Optimizing... ', {cancelable: true});
929
1281
 
930
- const descriptorNames = descriptors.names();
931
- const {desired, nonDesired} = getDesiredTables(this.table, desirability);
1282
+ try {
1283
+ const descriptorNames = descriptors.names();
1284
+ const {desired, nonDesired} = getDesiredTables(this.table, desirability);
932
1285
 
933
- // Compute descriptors' statistics
934
- const descrStats = new Map<string, DescriptorStatistics>();
935
- descriptorNames.forEach((name) => {
936
- descrStats.set(name, getDescriptorStatistics(desired.col(name)!, nonDesired.col(name)!));
937
- });
938
- const descrStatsTable = getDescriptorStatisticsTable(descrStats);
1286
+ // Compute descriptors' statistics
1287
+ const descrStats = new Map<string, DescriptorStatistics>();
1288
+ descriptorNames.forEach((name) => {
1289
+ descrStats.set(name, getDescriptorStatistics(desired.col(name)!, nonDesired.col(name)!));
1290
+ });
1291
+ const descrStatsTable = getDescriptorStatisticsTable(descrStats);
939
1292
 
940
- // Filter by p-value
941
- const selectedByPvalue = getFilteredByPvalue(descrStatsTable, P_VAL_TRES_DEFAULT);
942
- if (selectedByPvalue.length < 1)
943
- return failedResult;
1293
+ // Filter by p-value
1294
+ const selectedByPvalue = getFilteredByPvalue(descrStatsTable, P_VAL_TRES_DEFAULT);
1295
+ if (selectedByPvalue.length < 1) {
1296
+ pi.close();
944
1297
 
945
- const correlationTriples = getCorrelationTriples(descriptors, selectedByPvalue);
1298
+ return {
1299
+ pValTresh: 0,
1300
+ r2Tresh: 0,
1301
+ qCutoff: 0,
1302
+ state: 'failed',
1303
+ msg: 'No descriptors passed the p-value threshold filter.',
1304
+ };
1305
+ }
946
1306
 
947
- const funcToBeMinimized = (point: Float32Array) => {
948
- // Filter by correlations
949
- const selectedByCorr = getFilteredByCorrelations(descriptors, selectedByPvalue, descrStats, point[0], correlationTriples);
1307
+ const correlationTriples = getCorrelationTriples(descriptors, selectedByPvalue);
950
1308
 
951
- // Compute pMPO parameters - training
952
- const params = getModelParams(desired, nonDesired, selectedByCorr, point[1]);
1309
+ const funcToBeMinimized = (point: Float32Array) => {
1310
+ // Filter by correlations
1311
+ const selectedByCorr = getFilteredByCorrelations(descriptors, selectedByPvalue, descrStats, point[0], correlationTriples);
953
1312
 
954
- // Get predictions
955
- const prediction = Pmpo.predict(this.table, params, useSigmoid, this.predictionName);
1313
+ // Compute pMPO parameters - training
1314
+ const params = getModelParams(desired, nonDesired, selectedByCorr, point[1]);
956
1315
 
957
- // Evaluate predictions and return 1 - AUC (since optimization minimizes the function, but we want to maximize AUC)
958
- return 1 - getPmpoEvaluation(desirability, prediction).auc;
959
- }; // funcToBeMinimized
1316
+ // Get predictions
1317
+ const prediction = Pmpo.predict(this.table, params, useSigmoid, this.predictionName);
960
1318
 
961
- const pi = DG.TaskBarProgressIndicator.create('Optimizing... ', {cancelable: true});
1319
+ // Evaluate predictions and return 1 - AUC (since optimization minimizes the function, but we want to maximize AUC)
1320
+ return 1 - getPmpoEvaluation(desirability, prediction).auc;
1321
+ }; // funcToBeMinimized
962
1322
 
963
- try {
964
1323
  const optimalResult = await optimizeNM(
965
1324
  pi,
966
1325
  funcToBeMinimized,
@@ -978,14 +1337,28 @@ export class Pmpo {
978
1337
  pValTresh: P_VAL_TRES_DEFAULT,
979
1338
  r2Tresh: optimalResult.optimalPoint[0],
980
1339
  qCutoff: optimalResult.optimalPoint[1],
981
- success: true,
1340
+ state: 'success',
1341
+ msg: 'Optimization completed successfully.',
982
1342
  };
983
- } else
984
- return failedResult;
1343
+ } else {
1344
+ return {
1345
+ pValTresh: 0,
1346
+ r2Tresh: 0,
1347
+ qCutoff: 0,
1348
+ state: 'canceled',
1349
+ msg: 'Auto-tuning was canceled by the user.',
1350
+ };
1351
+ }
985
1352
  } catch (err) {
986
1353
  pi.close();
987
1354
 
988
- return failedResult;
1355
+ return {
1356
+ pValTresh: 0,
1357
+ r2Tresh: 0,
1358
+ qCutoff: 0,
1359
+ state: 'failed',
1360
+ msg: err instanceof Error ? err.message : 'Optimization failed due to an unexpected error.',
1361
+ };
989
1362
  }
990
1363
  } // getOptimalSettings
991
1364
  }; // Pmpo