@datagrok/eda 1.4.12 → 1.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,8 @@ export type BasicStats = {
13
13
  desStd: number,
14
14
  nonDesAvg: number,
15
15
  nonDesStd: number,
16
+ min: number,
17
+ max: number,
16
18
  };
17
19
 
18
20
  /** Descriptor statistics including basic stats, t-statistics and p-value */
@@ -44,6 +46,7 @@ export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
44
46
  intersections: number[],
45
47
  x0: number,
46
48
  xBound: number,
49
+ inflection: number,
47
50
  };
48
51
 
49
52
  export type CorrelationTriple = [string, string, number];
@@ -74,17 +77,40 @@ export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
74
77
  export const SELECTED_TITLE = 'Selected';
75
78
  export const WEIGHT_TITLE = 'Weight';
76
79
  export const SCORES_TITLE = 'pMPO score';
77
- export const DESIRABILITY_COL_NAME = 'Desirability';
80
+ export const DESIRABILITY_COL_NAME = 'Desirability Curve';
81
+
82
+ /** Default p-value threshold for filtering descriptors */
83
+ export const P_VAL_TRES_DEFAULT = 0.001;
78
84
 
79
85
  /** Minimum p-value threshold for filtering descriptors */
80
- export const P_VAL_TRES_MIN = 0.01;
86
+ export const P_VAL_TRES_MIN = 0.001;
87
+
88
+ /** Maximum p-value threshold for filtering descriptors */
89
+ export const P_VAL_TRES_MAX = 1;
90
+
91
+ /** Default R-squared threshold for filtering correlated descriptors */
92
+ export const R2_DEFAULT = 0.53;
81
93
 
82
94
  /** Minimum R-squared threshold for filtering correlated descriptors */
83
95
  export const R2_MIN = 0.01;
84
96
 
97
+ /** Maximum R-squared threshold for filtering correlated descriptors */
98
+ export const R2_MAX = 1.0;
99
+
100
+ /** Default q-cutoff for descriptors in the pMPO model */
101
+ export const Q_CUTOFF_DEFAULT = 0.05;
102
+
85
103
  /** Minimum q-cutoff for descriptors in the pMPO model */
86
104
  export const Q_CUTOFF_MIN = 0.01;
87
105
 
106
+ /** Maximum q-cutoff for descriptors in the pMPO model */
107
+ export const Q_CUTOFF_MAX = 1;
108
+
109
+ /** Default setting for using sigmoid correction in pMPO */
110
+ export const USE_SIGMOID_DEFAULT = true;
111
+
112
+ export const FORMAT = '0.000';
113
+
88
114
  /** Colors used for selected and skipped descriptors */
89
115
  export enum COLORS {
90
116
  SELECTED = 'rgb(26, 146, 26)',
@@ -106,3 +132,87 @@ export type DesirabilityProfileProperties = Record<string, {
106
132
 
107
133
  export const STAT_GRID_HEIGHT = 75;
108
134
  export const DESIRABILITY_COLUMN_WIDTH = 305;
135
+
136
+ const POSITIVE_BASIC_RANGE_SIGMA_COEFFS = [0, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5];
137
+
138
+ /** Basic range sigma coefficients for desirability profile points */
139
+ export const BASIC_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS
140
+ .slice(1)
141
+ .map((v) => -v)
142
+ .reverse()
143
+ .concat(POSITIVE_BASIC_RANGE_SIGMA_COEFFS);
144
+
145
+ const EXTRA_RANGE_SIGMA_COEFFS = [0.12, 0.37, 0.63, 0.75, 0.88, 1.25, 1.75, 2.25, 2.75];
146
+ const EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS.concat(EXTRA_RANGE_SIGMA_COEFFS).sort();
147
+
148
+ /** Extended range sigma coefficients for desirability profile points */
149
+ export const EXTENDED_RANGE_SIGMA_COEFFS = EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS
150
+ .slice(1)
151
+ .map((v) => -v)
152
+ .reverse()
153
+ .concat(EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS);
154
+
155
+ /** Confusion matrix type */
156
+ export type ConfusionMatrix = {
157
+ TP: number,
158
+ TN: number,
159
+ FP: number,
160
+ FN: number,
161
+ };
162
+
163
+ // Titles for ROC curve columns
164
+ export const TPR_TITLE = 'TPR (Sensitivity)';
165
+ export const FPR_TITLE = 'FPR (1 - Specificity)';
166
+ export const THRESHOLD = 'Threshold';
167
+
168
+ // Number of points in ROC curve
169
+ const ROC_POINTS = 100;
170
+ export const ROC_TRESHOLDS_COUNT = ROC_POINTS + 1;
171
+
172
+ /** ROC curve thresholds from 0.0 to 1.0 */
173
+ export const ROC_TRESHOLDS = new Float32Array(Array.from({length: ROC_TRESHOLDS_COUNT}, (_, i) => i / ROC_POINTS));
174
+
175
+ /** Sample dataframe for pMPO training: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/ */
176
+ export const SOURCE_PATH = 'System:AppData/Eda/drugs-props-train.csv';
177
+
178
+ /** Scores of the sample dataframe computed using https://github.com/Merck/pmpo */
179
+ export const SCORES_PATH = 'System:AppData/Eda/drugs-props-train-scores.csv';
180
+
181
+ /** Name of the synthetic drug used in the sample dataframe */
182
+ export const SYNTHETIC_DRUG_NAME = 'Synthetic drug';
183
+
184
+ /** pMPO model evaluation result type */
185
+ export type ModelEvaluationResult = {
186
+ auc: number,
187
+ threshold: number,
188
+ tpr: Float32Array,
189
+ fpr: Float32Array,
190
+ };
191
+
192
+ /** Maximum number of rows for which auto-tuning is applicable */
193
+ export const AUTO_TUNE_MAX_APPLICABLE_ROWS = 10000;
194
+
195
+ /** Default settings for optimization in pMPO parameter tuning */
196
+ export const DEFAULT_OPTIMIZATION_SETTINGS = new Map<string, number>([
197
+ ['tolerance', 0.001],
198
+ ['maxIter', 25],
199
+ ['nonZeroParam', 0.0001],
200
+ ['initialScale', 0.02],
201
+ ['scaleReflaction', 1],
202
+ ['scaleExpansion', 2],
203
+ ['scaleContraction', -0.5],
204
+ ]);
205
+
206
+ /** Optimal point type for pMPO parameter tuning */
207
+ export type OptimalPoint = {
208
+ pValTresh: number,
209
+ r2Tresh: number,
210
+ qCutoff: number,
211
+ success: boolean,
212
+ };
213
+
214
+ /** Minimum bounds for pMPO parameters during optimization */
215
+ export const LOW_PARAMS_BOUNDS = new Float32Array([0.5, Q_CUTOFF_MIN]);
216
+
217
+ /** Maximum bounds for pMPO parameters during optimization */
218
+ export const HIGH_PARAMS_BOUNDS = new Float32Array([R2_MAX, Q_CUTOFF_MAX]);
@@ -9,9 +9,10 @@ import '../../css/pmpo.css';
9
9
 
10
10
  import {COLORS, DESCR_TABLE_TITLE, DESCR_TITLE, DescriptorStatistics, DesirabilityProfileProperties,
11
11
  DESIRABILITY_COL_NAME, FOLDER, P_VAL, PMPO_COMPUTE_FAILED, PmpoParams, SCORES_TITLE,
12
- SELECTED_TITLE, STAT_TO_TITLE_MAP, TINY, WEIGHT_TITLE,
13
- CorrelationTriple} from './pmpo-defs';
14
- import {computeSigmoidParamsFromX0, getCutoffs, normalPdf, sigmoidS, solveNormalIntersection} from './stat-tools';
12
+ SELECTED_TITLE, STAT_TO_TITLE_MAP, TINY, WEIGHT_TITLE, CorrelationTriple,
13
+ BASIC_RANGE_SIGMA_COEFFS, EXTENDED_RANGE_SIGMA_COEFFS} from './pmpo-defs';
14
+ import {computeSigmoidParamsFromX0, getCutoffs, gaussDesirabilityFunc, sigmoidS,
15
+ solveNormalIntersection} from './stat-tools';
15
16
  import {getColorScaleDiv} from '../pareto-optimization/utils';
16
17
  import {OPT_TYPE} from '../pareto-optimization/defs';
17
18
 
@@ -243,11 +244,22 @@ export function getModelParams(desired: DG.DataFrame, nonDesired: DG.DataFrame,
243
244
  // Unbiased standard deviation
244
245
  const sigmaNonDes = nonDesCol.stats.stdev * Math.sqrt((nonDesLen - 1) / nonDesLen);
245
246
 
246
- // Compute cutoffs and intersections
247
+ // Compute cutoffs
247
248
  const cutoffs = getCutoffs(muDes, sigmaDes, muNonDes, sigmaNonDes);
249
+
250
+ // column_stats['inflection'] = np.exp(-np.square((column_stats['cutoff'] - column_stats['good_mean'])) /
251
+ // (2 * np.square(column_stats['good_std'])))
252
+ // Compute inflection point
253
+ const inflection = Math.exp(-((cutoffs.cutoff - muDes) ** 2) / (2 * (sigmaDes ** 2)));
254
+
255
+ // Compute intersections of the two normal distributions
248
256
  const intersections = solveNormalIntersection(muDes, sigmaDes, muNonDes, sigmaNonDes);
249
257
 
250
- // Compute parameters for the generalized sigmoid function
258
+ const b = (Math.pow(inflection, -1.0) - 1.0);
259
+ const n = (Math.pow(qCutoff, -1.0) - 1.0);
260
+ const c = Math.pow(10.0, ((Math.log10(n / b)) / (-1.0 * (muNonDes - cutoffs.cutoff))));
261
+
262
+ // Compute parameters for the generalized sigmoid function TODO: delete
251
263
 
252
264
  let x0: number | null = null;
253
265
 
@@ -279,17 +291,20 @@ export function getModelParams(desired: DG.DataFrame, nonDesired: DG.DataFrame,
279
291
  desStd: sigmaDes,
280
292
  nonDesAvg: muNonDes,
281
293
  nonDesStd: sigmaNonDes,
294
+ min: Math.min(desCol.stats.min, nonDesCol.stats.min),
295
+ max: Math.max(desCol.stats.max, nonDesCol.stats.max),
282
296
  cutoff: cutoffs.cutoff,
283
297
  cutoffDesired: cutoffs.cutoffDesired,
284
298
  cutoffNotDesired: cutoffs.cutoffNotDesired,
285
299
  pX0: sigmoidParams.pX0,
286
- b: sigmoidParams.b,
287
- c: sigmoidParams.c,
300
+ b: b,
301
+ c: c,
288
302
  zScore: z,
289
303
  weight: z,
290
304
  intersections: intersections,
291
305
  x0: x0,
292
306
  xBound: xBound,
307
+ inflection: inflection,
293
308
  });
294
309
  });
295
310
 
@@ -338,12 +353,13 @@ export async function loadPmpoParams(file: DG.FileInfo): Promise<Map<string, Pmp
338
353
  * @param name Name of the desirability profile.
339
354
  * @param description Description of the desirability profile.
340
355
  */
341
- export function getDesirabilityProfileJson(params: Map<string, PmpoParams>, name: string, description: string) {
356
+ export function getDesirabilityProfileJson(params: Map<string, PmpoParams>, useSigmoidalCorrection: boolean,
357
+ name: string, description: string, truncatedRange: boolean): any {
342
358
  return {
343
359
  'type': 'MPO Desirability Profile',
344
360
  'name': name,
345
361
  'description': description,
346
- 'properties': getDesirabilityProfileProperties(params),
362
+ 'properties': getDesirabilityProfileProperties(params, useSigmoidalCorrection, truncatedRange),
347
363
  };
348
364
  }
349
365
 
@@ -351,7 +367,8 @@ export function getDesirabilityProfileJson(params: Map<string, PmpoParams>, name
351
367
  * @param params Map of descriptor names to their pMPO parameters.
352
368
  * @param modelName Suggested model name (used as default file name).
353
369
  */
354
- export async function saveModel(params: Map<string, PmpoParams>, modelName: string): Promise<void> {
370
+ export async function saveModel(params: Map<string, PmpoParams>, modelName: string,
371
+ useSigmoidalCorrection: boolean): Promise<void> {
355
372
  let fileName = modelName;
356
373
  const nameInput = ui.input.string('File', {
357
374
  value: fileName,
@@ -388,8 +405,10 @@ export async function saveModel(params: Map<string, PmpoParams>, modelName: stri
388
405
  if (typeInput.value) {
389
406
  return getDesirabilityProfileJson(
390
407
  params,
408
+ useSigmoidalCorrection,
391
409
  nameInput.value,
392
410
  descriptionInput.value,
411
+ false,
393
412
  );
394
413
  }
395
414
 
@@ -462,7 +481,10 @@ export function addCorrelationColumns(df: DG.DataFrame, descriptorNames: string[
462
481
  return df;
463
482
  } // addCorrelationColumns
464
483
 
465
- /* Sets color coding for the p-value column in the statistics table */
484
+ /** Sets color coding for the p-value column in the statistics table
485
+ * @param table DataFrame with descriptor statistics.
486
+ * @param pValTresh P-value threshold.
487
+ */
466
488
  export function setPvalColumnColorCoding(table: DG.DataFrame, pValTresh: number): void {
467
489
  const pValCol = table.col(P_VAL);
468
490
  if (pValCol == null)
@@ -475,7 +497,11 @@ export function setPvalColumnColorCoding(table: DG.DataFrame, pValTresh: number)
475
497
  pValCol.meta.colors.setConditional(rules);
476
498
  } // setPvalColumnColorCoding
477
499
 
478
- /* Sets color coding for the p-value column in the statistics table */
500
+ /** Sets color coding for the correlation columns in the statistics table.
501
+ * @param table DataFrame with descriptor statistics.
502
+ * @param descriptorNames List of descriptor names.
503
+ * @param r2Tresh R-squared threshold.
504
+ */
479
505
  export function setCorrColumnColorCoding(table: DG.DataFrame, descriptorNames: string[], r2Tresh: number): void {
480
506
  descriptorNames.forEach((name) => {
481
507
  const col = table.col(name);
@@ -483,8 +509,8 @@ export function setCorrColumnColorCoding(table: DG.DataFrame, descriptorNames: s
483
509
  return;
484
510
 
485
511
  const rules: Record<string, string> = {};
486
- rules[`>${r2Tresh}`] = COLORS.SKIPPED;
487
- rules[`=<${r2Tresh}`] = COLORS.SELECTED;
512
+ rules[`>=${r2Tresh}`] = COLORS.SKIPPED;
513
+ rules[`<${r2Tresh}`] = COLORS.SELECTED;
488
514
 
489
515
  col.meta.colors.setConditional(rules);
490
516
  });
@@ -492,20 +518,18 @@ export function setCorrColumnColorCoding(table: DG.DataFrame, descriptorNames: s
492
518
 
493
519
  /** Returns desirability profile properties for the given pMPO parameters.
494
520
  * @param params Map of descriptor names to their pMPO parameters.
521
+ * @param useSigmoidalCorrection Whether to use sigmoidal correction in desirability functions.
522
+ * @param displayProfile Whether to create a profile to be displayed in the stat grid (true - truncated range).
495
523
  */
496
- function getDesirabilityProfileProperties(params: Map<string, PmpoParams>) {
524
+ function getDesirabilityProfileProperties(params: Map<string, PmpoParams>,
525
+ useSigmoidalCorrection: boolean, truncatedRange: boolean): DesirabilityProfileProperties {
497
526
  const props: DesirabilityProfileProperties = {};
498
527
 
499
- let maxWeight = 0;
500
- params.forEach((param) => maxWeight = Math.max(maxWeight, param.weight));
501
-
502
- const scale = (maxWeight > 0) ? (1 / maxWeight) : 1;
503
-
504
528
  params.forEach((param, name) => {
505
- const range = significantPoints(param);
529
+ const range = significantPoints(param, truncatedRange);
506
530
  props[name] = {
507
- weight: param.weight * scale,
508
- line: getLine(param),
531
+ weight: param.weight,
532
+ line: getLine(param, useSigmoidalCorrection, truncatedRange),
509
533
  min: Math.min(...range),
510
534
  max: Math.max(...range),
511
535
  };
@@ -514,67 +538,66 @@ function getDesirabilityProfileProperties(params: Map<string, PmpoParams>) {
514
538
  return props;
515
539
  } // getDesirabilityProfileProperties
516
540
 
517
- /** Returns array of arguments for Gaussian function centered at mu with stddev sigma. */
518
- function getArgsOfGaussFunc(mu: number, sigma: number): number[] {
519
- return [
520
- mu - 3 * sigma,
521
- mu - 2.5 * sigma,
522
- mu - 2 * sigma,
523
- mu - 1.5 * sigma,
524
- mu - sigma,
525
- mu - 0.5 * sigma,
526
- mu - 0.25 * sigma,
527
- mu,
528
- mu + 0.25 * sigma,
529
- mu + 0.5 * sigma,
530
- mu + sigma,
531
- mu + 1.5 * sigma,
532
- mu + 2 * sigma,
533
- mu + 2.5 * sigma,
534
- mu + 3 * sigma,
535
- ];
541
+ /** Returns array of arguments for Gaussian function centered at mu with stddev sigma.
542
+ * @param mu Mean of the Gaussian function.
543
+ * @param sigma Standard deviation of the Gaussian function.
544
+ * @param truncatedRange Whether to use truncated range (for interactive app) or extended range (for full profile).
545
+ * @return Array of arguments for the Gaussian function.
546
+ */
547
+ function getArgsOfGaussFunc(mu: number, sigma: number, truncatedRange: boolean): number[] {
548
+ return truncatedRange ?
549
+ BASIC_RANGE_SIGMA_COEFFS.map((coeff) => mu + coeff * sigma) : // range for interactive app
550
+ EXTENDED_RANGE_SIGMA_COEFFS.map((coeff) => mu + coeff * sigma); // actual full range for desirability profile
536
551
  } // getArgsOfGaussFunc
537
552
 
538
- /** Returns scale factor for the given pMPO parameters and range of x values. */
539
- function getScale(param: PmpoParams, range: number[]): number {
540
- const values = range.map((x) => basicFunction(x, param));
541
-
542
- return Math.max(...values);
543
- }
544
-
545
- /** Basic pMPO function combining Gaussian and sigmoid functions. */
546
- function basicFunction(x: number, param: PmpoParams): number {
547
- return normalPdf(x, param.desAvg, param.desStd) * sigmoidS(x, param.x0, param.b, param.c);
553
+ /** Basic pMPO function combining Gaussian and sigmoid functions.
554
+ * @param x Argument.
555
+ * @param param pMPO parameters.
556
+ * @param useSigmoidalCorrection Whether to use sigmoidal correction.
557
+ * @return Value of the basic pMPO function at x.
558
+ */
559
+ function basicFunction(x: number, param: PmpoParams, useSigmoidalCorrection: boolean): number {
560
+ return gaussDesirabilityFunc(x, param.desAvg, param.desStd) *
561
+ (useSigmoidalCorrection ? sigmoidS(x, param.cutoff, param.b, param.c) : 1);
548
562
  }
549
563
 
550
- /** Returns line points for the given pMPO parameters. */
551
- function getLine(param: PmpoParams): [number, number][] {
552
- //const range = getArgsOfGaussFunc(param.desAvg, param.desStd);
553
- const range = significantPoints(param);
554
- const scale = getScale(param, range);
564
+ /** Returns line points for the given pMPO parameters.
565
+ * @param param pMPO parameters.
566
+ * @param useSigmoidalCorrection Whether to use sigmoidal correction.
567
+ * @param truncatedRange Whether to use truncated range (for interactive app) or extended range (for full profile).
568
+ * @return Array of [x, y] points representing the desirability function line.
569
+ */
570
+ function getLine(param: PmpoParams, useSigmoidalCorrection: boolean, truncatedRange: boolean): [number, number][] {
571
+ const range = significantPoints(param, truncatedRange);
555
572
 
556
- return range.map((x) => [x, basicFunction(x, param) / scale]);
573
+ return range.map((x) => [x, basicFunction(x, param, useSigmoidalCorrection)]);
557
574
  }
558
575
 
559
- /** Returns significant points for the given pMPO parameters. */
560
- function significantPoints(param: PmpoParams): number[] {
561
- const start = param.desAvg - 10 * param.desStd;
562
- const end = param.desAvg + 10 * param.desStd;
563
- const steps = 1000;
564
-
565
- let arg = start;
566
- let func = basicFunction(arg, param);
567
- let x = 0;
568
- let y = 0;
569
-
570
- for (let i = 0; i <= steps; i++) {
571
- x = start + ((end - start) * i) / steps;
572
- y = basicFunction(x, param);
573
- if (y > func) {
574
- arg = x;
575
- func = y;
576
- }
576
+ /** Returns significant points for the given pMPO parameters.
577
+ * @param param pMPO parameters.
578
+ * @param truncatedRange Whether to use truncated range (for interactive app) or extended range (for full profile).
579
+ * @return Array of significant points for the desirability function.
580
+ */
581
+ function significantPoints(param: PmpoParams, truncatedRange: boolean): number[] {
582
+ const points = getArgsOfGaussFunc(param.desAvg, param.desStd, truncatedRange);
583
+
584
+ /* Truncate range to show less points */
585
+ if (truncatedRange) {
586
+ const min = Math.min(param.min, param.desAvg - 3 * param.desStd);
587
+ const max = Math.max(param.max, param.desAvg + 3 * param.desStd);
588
+
589
+ return points
590
+ .filter((x) => (min <= x) && (x <= max))
591
+ .sort();
577
592
  }
578
593
 
579
- return getArgsOfGaussFunc(arg, param.desStd);
594
+ return points;
580
595
  } // significantPoints
596
+
597
+ /** Custom error class for pMPO-related errors. */
598
+ export class PmpoError extends Error {
599
+ constructor(message: string) {
600
+ super(message);
601
+ this.name = 'PmpoError';
602
+ }
603
+ }