@datagrok/eda 1.4.12 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/.eslintrc.json +0 -1
  2. package/CHANGELOG.md +10 -0
  3. package/CLAUDE.md +185 -0
  4. package/css/pmpo.css +9 -0
  5. package/dist/111.js +1 -1
  6. package/dist/111.js.map +1 -1
  7. package/dist/128.js +1 -1
  8. package/dist/128.js.map +1 -1
  9. package/dist/153.js +1 -1
  10. package/dist/153.js.map +1 -1
  11. package/dist/23.js +1 -1
  12. package/dist/23.js.map +1 -1
  13. package/dist/234.js +1 -1
  14. package/dist/234.js.map +1 -1
  15. package/dist/242.js +1 -1
  16. package/dist/242.js.map +1 -1
  17. package/dist/260.js +1 -1
  18. package/dist/260.js.map +1 -1
  19. package/dist/33.js +1 -1
  20. package/dist/33.js.map +1 -1
  21. package/dist/348.js +1 -1
  22. package/dist/348.js.map +1 -1
  23. package/dist/377.js +1 -1
  24. package/dist/377.js.map +1 -1
  25. package/dist/397.js +2 -0
  26. package/dist/397.js.map +1 -0
  27. package/dist/412.js +1 -1
  28. package/dist/412.js.map +1 -1
  29. package/dist/415.js +1 -1
  30. package/dist/415.js.map +1 -1
  31. package/dist/501.js +1 -1
  32. package/dist/501.js.map +1 -1
  33. package/dist/531.js +1 -1
  34. package/dist/531.js.map +1 -1
  35. package/dist/583.js +1 -1
  36. package/dist/583.js.map +1 -1
  37. package/dist/589.js +1 -1
  38. package/dist/589.js.map +1 -1
  39. package/dist/603.js +1 -1
  40. package/dist/603.js.map +1 -1
  41. package/dist/656.js +1 -1
  42. package/dist/656.js.map +1 -1
  43. package/dist/682.js +1 -1
  44. package/dist/682.js.map +1 -1
  45. package/dist/705.js +1 -1
  46. package/dist/705.js.map +1 -1
  47. package/dist/727.js +1 -1
  48. package/dist/727.js.map +1 -1
  49. package/dist/731.js +1 -1
  50. package/dist/731.js.map +1 -1
  51. package/dist/738.js +1 -1
  52. package/dist/738.js.map +1 -1
  53. package/dist/763.js +1 -1
  54. package/dist/763.js.map +1 -1
  55. package/dist/778.js +1 -1
  56. package/dist/778.js.map +1 -1
  57. package/dist/783.js +1 -1
  58. package/dist/783.js.map +1 -1
  59. package/dist/793.js +1 -1
  60. package/dist/793.js.map +1 -1
  61. package/dist/810.js +1 -1
  62. package/dist/810.js.map +1 -1
  63. package/dist/860.js +1 -1
  64. package/dist/860.js.map +1 -1
  65. package/dist/907.js +1 -1
  66. package/dist/907.js.map +1 -1
  67. package/dist/950.js +1 -1
  68. package/dist/950.js.map +1 -1
  69. package/dist/980.js +1 -1
  70. package/dist/980.js.map +1 -1
  71. package/dist/990.js +1 -1
  72. package/dist/990.js.map +1 -1
  73. package/dist/package-test.js +1 -1
  74. package/dist/package-test.js.map +1 -1
  75. package/dist/package.js +1 -1
  76. package/dist/package.js.map +1 -1
  77. package/eslintrc.json +0 -1
  78. package/files/drugs-props-train-scores.csv +664 -0
  79. package/package.json +11 -7
  80. package/src/package-api.ts +7 -3
  81. package/src/package-test.ts +4 -1
  82. package/src/package.g.ts +21 -9
  83. package/src/package.ts +33 -23
  84. package/src/pareto-optimization/pareto-computations.ts +6 -0
  85. package/src/pareto-optimization/pareto-optimizer.ts +1 -1
  86. package/src/pls/pls-constants.ts +3 -1
  87. package/src/pls/pls-tools.ts +73 -69
  88. package/src/probabilistic-scoring/data-generator.ts +202 -0
  89. package/src/probabilistic-scoring/nelder-mead.ts +204 -0
  90. package/src/probabilistic-scoring/pmpo-defs.ts +141 -3
  91. package/src/probabilistic-scoring/pmpo-utils.ts +240 -126
  92. package/src/probabilistic-scoring/prob-scoring.ts +862 -135
  93. package/src/probabilistic-scoring/stat-tools.ts +141 -6
  94. package/src/tests/anova-tests.ts +1 -1
  95. package/src/tests/classifiers-tests.ts +1 -1
  96. package/src/tests/dim-reduction-tests.ts +1 -1
  97. package/src/tests/linear-methods-tests.ts +1 -1
  98. package/src/tests/mis-vals-imputation-tests.ts +1 -1
  99. package/src/tests/pareto-tests.ts +251 -0
  100. package/src/tests/pmpo-tests.ts +797 -0
  101. package/test-console-output-1.log +303 -239
  102. package/test-record-1.mp4 +0 -0
  103. package/files/mpo-done.ipynb +0 -2123
@@ -0,0 +1,202 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {DescriptorStatistics, SOURCE_PATH, SYNTHETIC_DRUG_NAME} from './pmpo-defs';
6
+ import {getDescriptorStatistics, getDesiredTables} from './stat-tools';
7
+
8
+ //@ts-ignore: no types
9
+ import * as jStat from 'jstat';
10
+
11
+ /** Generates synthetic data for pMPO model training and testing
12
+ * @param samplesCount Number of samples to generate
13
+ * @returns DataFrame with generated data */
14
+ export async function getSynteticPmpoData(samplesCount: number, isTest: boolean = true): Promise<DG.DataFrame> {
15
+ const df = await grok.dapi.files.readCsv(SOURCE_PATH);
16
+ const generator = new PmpoDataGenerator(df, 'Drug', 'CNS', 'Smiles');
17
+ const genTable = generator.getGenerated(samplesCount);
18
+
19
+ if (!isTest) {
20
+ genTable.columns.add(DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'Const bool', new Array(samplesCount).fill(true)));
21
+ genTable.columns.add(DG.Column.fromInt32Array('Const int', new Int32Array(samplesCount).fill(1)));
22
+
23
+ // Add a copy of the first numeric column with 5 missing values
24
+ const firstNumCol = genTable.columns.toList().find((col) => col.isNumerical);
25
+ if (firstNumCol) {
26
+ const colWithMissing = firstNumCol.clone();
27
+ colWithMissing.name = `${firstNumCol.name} (missing)`;
28
+ for (let i = 0; i < Math.min(5, colWithMissing.length); ++i)
29
+ colWithMissing.set(i, DG.FLOAT_NULL);
30
+ genTable.columns.add(colWithMissing);
31
+ }
32
+
33
+ // Add a column with all null values
34
+ genTable.columns.add(DG.Column.fromFloat32Array('Nulls', new Float32Array(samplesCount).fill(DG.FLOAT_NULL)));
35
+
36
+ // Add categorical columns
37
+ const categoricalCols = getCategoricalColumns(genTable.col('CNS')!, samplesCount);
38
+ for (const col of categoricalCols)
39
+ genTable.columns.add(col);
40
+ }
41
+
42
+ return genTable;
43
+ } // getSynteticPmpoData
44
+
45
+ /** Generates categorical columns based on a boolean source column
46
+ * @param sourceBoolCol Source boolean column to base the categorical columns on
47
+ * @param samplesCount Number of samples to generate
48
+ * @returns Array of generated categorical columns */
49
+ function getCategoricalColumns(sourceBoolCol: DG.Column, samplesCount: number): DG.Column[] {
50
+ const source = sourceBoolCol.toList();
51
+ const stringLabels = new Array<string>(samplesCount);
52
+ const threeCats = new Array<string>(samplesCount);
53
+
54
+ for (let i = 0; i < samplesCount; ++i) {
55
+ stringLabels[i] = source[i] ? 'active' : 'non-active';
56
+ threeCats[i] = source[i] ? (Math.random() < 0.5 ? 'perfect' : 'good') : (Math.random() < 0.5 ? 'bad' : 'worst');
57
+ }
58
+
59
+ return [
60
+ DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CNS (strings)', stringLabels),
61
+ DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CNS (4 categories)', threeCats),
62
+ DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'Single category', new Array<string>(samplesCount).fill('Unknown')),
63
+ ];
64
+ } // getCategoricalColumns
65
+
66
+ /** Class for generating synthetic data for pMPO model training and testing */
67
+ export class PmpoDataGenerator {
68
+ private sourceDf: DG.DataFrame;
69
+ private drugName: string;
70
+ private desirabilityColName: string;
71
+ private smilesColName: string;
72
+ private desiredProbability: number;
73
+ private descriptorStats: Map<string, DescriptorStatistics>;
74
+
75
+ constructor(df: DG.DataFrame, drugName: string, desirabilityColName: string, smilesColName: string) {
76
+ this.sourceDf = df;
77
+ this.drugName = drugName;
78
+ this.desirabilityColName = desirabilityColName;
79
+ this.smilesColName = smilesColName;
80
+
81
+ const descriptorNames = df.columns.toList().filter((col) => col.isNumerical).map((col) => col.name);
82
+ const {desired, nonDesired} = getDesiredTables(df, df.col(desirabilityColName)!);
83
+
84
+ // Compute descriptors' statistics
85
+ this.descriptorStats = new Map<string, DescriptorStatistics>();
86
+ descriptorNames.forEach((name) => {
87
+ this.descriptorStats.set(name, getDescriptorStatistics(desired.col(name)!, nonDesired.col(name)!));
88
+ });
89
+
90
+ // Probability of desired class
91
+ this.desiredProbability = desired.rowCount / df.rowCount;
92
+ } // constructor
93
+
94
+ /** Generates synthetic data for pMPO model training and testing
95
+ * @param samplesCount Number of samples to generate
96
+ * @returns DataFrame with generated data */
97
+ public getGenerated(samplesCount: number): DG.DataFrame {
98
+ if (samplesCount <= 1)
99
+ throw new Error('Failed to generate pMPO data: sample count must be greater than 1.');
100
+
101
+ let result: DG.DataFrame;
102
+
103
+ /* Use rows from the source dataframe if the requested sample count
104
+ is less than or equal to the source dataframe row count */
105
+ if (samplesCount <= this.sourceDf.rowCount) {
106
+ const rowMask = DG.BitSet.create(this.sourceDf.rowCount);
107
+
108
+ for (let i = 0; i < samplesCount; ++i)
109
+ rowMask.set(i, true);
110
+
111
+ result = this.sourceDf.clone(rowMask);
112
+ } else {
113
+ const cloneDf = this.getClonedSourceDfWithFloatNumericCols();
114
+ result = cloneDf.append(this.getSyntheticTable(samplesCount - this.sourceDf.rowCount));
115
+ }
116
+
117
+ // Check boolean columns and ensure non-zero stdev
118
+ for (const col of result.columns) {
119
+ if (col.type === DG.COLUMN_TYPE.BOOL && col.stats.stdev === 0) {
120
+ // All values are the same, flip the first value
121
+ let value = col.get(0);
122
+ col.set(0, !value);
123
+
124
+ value = col.get(1);
125
+ col.set(1, !value);
126
+ }
127
+ }
128
+
129
+ return result;
130
+ } // getGenerated
131
+
132
+ /** Generates a synthetic data table
133
+ * @param samplesCount Number of samples to generate
134
+ * @returns DataFrame with synthetic data */
135
+ private getSyntheticTable(samplesCount: number): DG.DataFrame {
136
+ const desirabilityRaw = new Array<boolean>(samplesCount);
137
+
138
+ for (let i = 0; i < samplesCount; ++i)
139
+ desirabilityRaw[i] = (Math.random() < this.desiredProbability);
140
+
141
+
142
+ const cols = [
143
+ this.getDrugColumn(samplesCount),
144
+ this.getSmilesColumn(samplesCount),
145
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, this.desirabilityColName, desirabilityRaw),
146
+ ];
147
+
148
+ this.descriptorStats.forEach((stat, name) => {
149
+ const arr = new Float32Array(samplesCount);
150
+
151
+ for (let i = 0; i < samplesCount; ++i) {
152
+ if (desirabilityRaw[i])
153
+ arr[i] = jStat.normal.sample(stat.desAvg, stat.desStd);
154
+ else
155
+ arr[i] = jStat.normal.sample(stat.nonDesAvg, stat.nonDesStd);
156
+ }
157
+
158
+ // @ts-ignore
159
+ cols.push(DG.Column.fromFloat32Array(name, arr));
160
+ });
161
+
162
+ return DG.DataFrame.fromColumns(cols);
163
+ } // getSyntheticTable
164
+
165
+ /** Generates a column with synthetic drug names
166
+ * @param samplesCount Number of samples to generate
167
+ * @returns Column with synthetic drug names */
168
+ private getDrugColumn(samplesCount: number): DG.Column<string> {
169
+ return DG.Column.fromList(
170
+ DG.COLUMN_TYPE.STRING,
171
+ this.drugName,
172
+ Array.from({length: samplesCount}, (_, i) => `${SYNTHETIC_DRUG_NAME} ${i + 1}`));
173
+ }
174
+
175
+ /** Generates a column with synthetic SMILES strings
176
+ * @param samplesCount Number of samples to generate
177
+ * @returns Column with synthetic SMILES strings */
178
+ private getSmilesColumn(samplesCount: number): DG.Column<string> {
179
+ return DG.Column.fromList(
180
+ DG.COLUMN_TYPE.STRING,
181
+ this.smilesColName,
182
+ Array.from({length: samplesCount}, () => 'C'));
183
+ }
184
+
185
+ /** Clones the source dataframe converting numerical columns to Float type
186
+ * @returns Cloned dataframe */
187
+ private getClonedSourceDfWithFloatNumericCols(): DG.DataFrame {
188
+ const cols: DG.Column[] = [];
189
+
190
+ this.sourceDf.columns.toList().forEach((col) => {
191
+ if (col.isNumerical)
192
+ cols.push(col.clone().convertTo(DG.COLUMN_TYPE.FLOAT));
193
+ else
194
+ cols.push(col.clone());
195
+ });
196
+
197
+ const clone = DG.DataFrame.fromColumns(cols);
198
+ clone.name = this.sourceDf.name;
199
+
200
+ return clone;
201
+ }
202
+ } // PmpoDataGenerator
@@ -0,0 +1,204 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ function getInitialParams(
6
+ objectiveFunc: (x: Float32Array) => number,
7
+ settings: Map<string, number>,
8
+ paramsInitial: Float32Array,
9
+ restrictionsBottom: Float32Array,
10
+ restrictionsTop: Float32Array): [Float32Array[], number[]] {
11
+ const dim = paramsInitial.length + 1;
12
+ const dimParams = paramsInitial.length;
13
+ const nonZeroParam = settings.get('nonZeroParam')!;
14
+ const initScale = settings.get('initialScale')!;
15
+
16
+ const optParams = new Array<Float32Array>(dim);
17
+ const pointObjectives = new Array<number>(dim);
18
+
19
+ for (let i = 0; i < dim; i++) {
20
+ optParams[i] = new Float32Array(dimParams);
21
+ for (let j = 0; j < dimParams; j++) {
22
+ optParams[i][j] = paramsInitial[j];
23
+ if (i != 0 && i - 1 === j) {
24
+ if (paramsInitial[j] == 0)
25
+ optParams[i][j] = nonZeroParam;
26
+ else
27
+ optParams[i][j] += initScale * paramsInitial[i - 1];
28
+
29
+ if (optParams[i][j] < restrictionsBottom[j])
30
+ optParams[i][j] = restrictionsBottom[j];
31
+ else if (optParams[i][j] > restrictionsTop[j])
32
+ optParams[i][j] = restrictionsTop[j];
33
+ }
34
+ }
35
+
36
+ pointObjectives[i] = objectiveFunc(optParams[i]);
37
+ }
38
+
39
+ return [optParams, pointObjectives];
40
+ } // getInitialParams
41
+
42
+ function fillCentroid(centroid: Float32Array, dimParams: number, lastIndex: number, optParams: Float32Array[]) {
43
+ for (let i = 0; i < dimParams; i++) {
44
+ let val = 0;
45
+ for (let j = 0; j < dimParams + 1; j++) {
46
+ if (j != lastIndex)
47
+ val += optParams[j][i];
48
+ }
49
+
50
+ centroid[i] = val / dimParams;
51
+ }
52
+ } // fillCentroid
53
+
54
+ function fillPoint(
55
+ centroid: Float32Array, point: Float32Array,
56
+ lastIndex: number, optParams: Float32Array[],
57
+ scale: number, dimParams: number,
58
+ restrictionsBottom: Float32Array,
59
+ restrictionsTop: Float32Array) {
60
+ for (let i = 0; i < dimParams; i++) {
61
+ point[i] = centroid[i];
62
+ point[i] += scale * (centroid[i] - optParams[lastIndex][i]);
63
+
64
+ if (point[i] < restrictionsBottom[i])
65
+ point[i] = restrictionsBottom[i];
66
+ else if (point[i] > restrictionsTop[i])
67
+ point[i] = restrictionsTop[i];
68
+ }
69
+ } // fillPoint
70
+
71
+ export async function optimizeNM(pi: DG.ProgressIndicator,
72
+ objectiveFunc: (x: Float32Array) => number, paramsInitial: Float32Array,
73
+ settings: Map<string, number>, restrictionsBottom: Float32Array, restrictionsTop: Float32Array) {
74
+ // Settings initialization
75
+ const tolerance = settings.get('tolerance')!;
76
+ const maxIter = settings.get('maxIter')!;
77
+ const scaleReflection = settings.get('scaleReflaction')!;
78
+ const scaleExpansion = settings.get('scaleExpansion')!;
79
+ const scaleContraction = settings.get('scaleContraction')!;
80
+
81
+ const dim = paramsInitial.length + 1;
82
+ const dimParams = paramsInitial.length;
83
+
84
+ const [optParams, pointObjectives] = getInitialParams(
85
+ objectiveFunc,
86
+ settings,
87
+ paramsInitial,
88
+ restrictionsBottom,
89
+ restrictionsTop,
90
+ );
91
+
92
+ const indexes = new Array<number>(dim);
93
+ for (let i = 0; i < dim; i++)
94
+ indexes[i] = i;
95
+
96
+ const lastIndex = indexes.length - 1;
97
+
98
+ let iteration = 0;
99
+ let best = 0;
100
+ let previousBest = 0;
101
+ let noImprovement = 0;
102
+
103
+ const centroid = new Float32Array(dimParams);
104
+ const reflectionPoint = new Float32Array(dimParams);
105
+ const expansionPoint = new Float32Array(dimParams);
106
+ const contractionPoint = new Float32Array(dimParams);
107
+ const costs = new Array<number>(maxIter);
108
+
109
+ if (dim > 1) {
110
+ let percentage = 0;
111
+
112
+ while (true) {
113
+ indexes.sort((a:number, b:number) => {
114
+ return pointObjectives[a] - pointObjectives[b];
115
+ });
116
+
117
+ percentage = Math.min(100, Math.floor(100 * (iteration) / maxIter));
118
+ pi.update(percentage, `Optimizing pMPO... (${percentage}%)`);
119
+ await new Promise((r) => setTimeout(r, 1));
120
+
121
+ if (pi.canceled)
122
+ break;
123
+
124
+ if (iteration > maxIter)
125
+ break;
126
+
127
+ if (iteration == 0) {
128
+ best = pointObjectives[0];
129
+ previousBest = 2*pointObjectives[indexes[0]];
130
+ }
131
+ costs[iteration] = best;
132
+
133
+ ++iteration;
134
+
135
+ best = pointObjectives[indexes[0]];
136
+ if (previousBest - best > tolerance)
137
+ noImprovement = 0;
138
+ else {
139
+ ++noImprovement;
140
+ if (noImprovement > 2 * dim)
141
+ break;
142
+ }
143
+
144
+ previousBest = best;
145
+
146
+ //centroid
147
+ fillCentroid(centroid, dimParams, indexes[lastIndex], optParams);
148
+
149
+ // reflection
150
+ fillPoint(centroid, reflectionPoint, indexes[lastIndex],
151
+ optParams, scaleReflection, dimParams, restrictionsBottom, restrictionsTop);
152
+ const reflectionScore = objectiveFunc(reflectionPoint);
153
+
154
+ // expansion
155
+ if (reflectionScore < pointObjectives[indexes[lastIndex]]) {
156
+ fillPoint(centroid, expansionPoint, indexes[lastIndex],
157
+ optParams, scaleExpansion, dimParams, restrictionsBottom, restrictionsTop);
158
+
159
+ const expansionScore = objectiveFunc(expansionPoint);
160
+
161
+ if (expansionScore < reflectionScore) {
162
+ pointObjectives[indexes[lastIndex]] = expansionScore;
163
+
164
+ for (let i = 0; i < dimParams; i++)
165
+ optParams[indexes[lastIndex]][i] = expansionPoint[i];
166
+
167
+ continue;
168
+ } else {
169
+ pointObjectives[indexes[lastIndex]] = reflectionScore;
170
+
171
+ for (let i = 0; i < dimParams; i++)
172
+ optParams[indexes[lastIndex]][i] = reflectionPoint[i];
173
+
174
+ continue;
175
+ }
176
+ }
177
+
178
+ // Contraction
179
+ fillPoint(centroid, contractionPoint, indexes[lastIndex],
180
+ optParams, scaleContraction, dimParams, restrictionsBottom, restrictionsTop);
181
+
182
+ const contractionScore = objectiveFunc(contractionPoint);
183
+
184
+ if (contractionScore < pointObjectives[indexes[lastIndex]]) {
185
+ pointObjectives[indexes[lastIndex]] = contractionScore;
186
+
187
+ for (let i = 0; i < dimParams; i++)
188
+ optParams[indexes[lastIndex]][i] = contractionPoint[i];
189
+
190
+ continue;
191
+ }
192
+
193
+ break;
194
+ } // while
195
+
196
+ for (let i = iteration; i < maxIter; i++)
197
+ costs[i] = pointObjectives[indexes[0]];
198
+ } // if
199
+
200
+ return {
201
+ optimalPoint: optParams[indexes[0]],
202
+ iterations: iteration,
203
+ };
204
+ }; // optimizeNM
@@ -1,5 +1,5 @@
1
1
  // Constants and type definitions for probabilistic scoring (pMPO)
2
- // Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
2
+ // Source paper https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
3
3
 
4
4
  /** Minimum number of samples required to compute pMPO */
5
5
  export const MIN_SAMPLES_COUNT = 10;
@@ -13,6 +13,8 @@ export type BasicStats = {
13
13
  desStd: number,
14
14
  nonDesAvg: number,
15
15
  nonDesStd: number,
16
+ min: number,
17
+ max: number,
16
18
  };
17
19
 
18
20
  /** Descriptor statistics including basic stats, t-statistics and p-value */
@@ -44,6 +46,7 @@ export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
44
46
  intersections: number[],
45
47
  x0: number,
46
48
  xBound: number,
49
+ inflection: number,
47
50
  };
48
51
 
49
52
  export type CorrelationTriple = [string, string, number];
@@ -74,17 +77,40 @@ export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
74
77
  export const SELECTED_TITLE = 'Selected';
75
78
  export const WEIGHT_TITLE = 'Weight';
76
79
  export const SCORES_TITLE = 'pMPO score';
77
- export const DESIRABILITY_COL_NAME = 'Desirability';
80
+ export const DESIRABILITY_COL_NAME = 'Desirability Curve';
81
+
82
+ /** Default p-value threshold for filtering descriptors */
83
+ export const P_VAL_TRES_DEFAULT = 0.001;
78
84
 
79
85
  /** Minimum p-value threshold for filtering descriptors */
80
- export const P_VAL_TRES_MIN = 0.01;
86
+ export const P_VAL_TRES_MIN = 0.001;
87
+
88
+ /** Maximum p-value threshold for filtering descriptors */
89
+ export const P_VAL_TRES_MAX = 1;
90
+
91
+ /** Default R-squared threshold for filtering correlated descriptors */
92
+ export const R2_DEFAULT = 0.53;
81
93
 
82
94
  /** Minimum R-squared threshold for filtering correlated descriptors */
83
95
  export const R2_MIN = 0.01;
84
96
 
97
+ /** Maximum R-squared threshold for filtering correlated descriptors */
98
+ export const R2_MAX = 1.0;
99
+
100
+ /** Default q-cutoff for descriptors in the pMPO model */
101
+ export const Q_CUTOFF_DEFAULT = 0.05;
102
+
85
103
  /** Minimum q-cutoff for descriptors in the pMPO model */
86
104
  export const Q_CUTOFF_MIN = 0.01;
87
105
 
106
+ /** Maximum q-cutoff for descriptors in the pMPO model */
107
+ export const Q_CUTOFF_MAX = 1;
108
+
109
+ /** Default setting for using sigmoid correction in pMPO */
110
+ export const USE_SIGMOID_DEFAULT = true;
111
+
112
+ export const FORMAT = '0.000';
113
+
88
114
  /** Colors used for selected and skipped descriptors */
89
115
  export enum COLORS {
90
116
  SELECTED = 'rgb(26, 146, 26)',
@@ -106,3 +132,115 @@ export type DesirabilityProfileProperties = Record<string, {
106
132
 
107
133
  export const STAT_GRID_HEIGHT = 75;
108
134
  export const DESIRABILITY_COLUMN_WIDTH = 305;
135
+
136
+ const POSITIVE_BASIC_RANGE_SIGMA_COEFFS = [0, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5];
137
+
138
+ /** Basic range sigma coefficients for desirability profile points */
139
+ export const BASIC_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS
140
+ .slice(1)
141
+ .map((v) => -v)
142
+ .reverse()
143
+ .concat(POSITIVE_BASIC_RANGE_SIGMA_COEFFS);
144
+
145
+ const EXTRA_RANGE_SIGMA_COEFFS = [0.12, 0.37, 0.63, 0.75, 0.88, 1.25, 1.75, 2.25, 2.75];
146
+ const EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS.concat(EXTRA_RANGE_SIGMA_COEFFS).sort();
147
+
148
+ /** Extended range sigma coefficients for desirability profile points */
149
+ export const EXTENDED_RANGE_SIGMA_COEFFS = EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS
150
+ .slice(1)
151
+ .map((v) => -v)
152
+ .reverse()
153
+ .concat(EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS);
154
+
155
+ /** Confusion matrix type */
156
+ export type ConfusionMatrix = {
157
+ TP: number,
158
+ TN: number,
159
+ FP: number,
160
+ FN: number,
161
+ };
162
+
163
+ // Titles for ROC curve columns
164
+ export const TPR_TITLE = 'TPR (Sensitivity)';
165
+ export const FPR_TITLE = 'FPR (1 - Specificity)';
166
+ export const THRESHOLD = 'Threshold';
167
+
168
+ // Number of points in ROC curve
169
+ const ROC_POINTS = 100;
170
+ export const ROC_TRESHOLDS_COUNT = ROC_POINTS + 1;
171
+
172
+ /** ROC curve thresholds from 0.0 to 1.0 */
173
+ export const ROC_TRESHOLDS = new Float32Array(Array.from({length: ROC_TRESHOLDS_COUNT}, (_, i) => i / ROC_POINTS));
174
+
175
+ /** Sample dataframe for pMPO training: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/ */
176
+ export const SOURCE_PATH = 'System:AppData/Eda/drugs-props-train.csv';
177
+
178
+ /** Scores of the sample dataframe computed using https://github.com/Merck/pmpo */
179
+ export const SCORES_PATH = 'System:AppData/Eda/drugs-props-train-scores.csv';
180
+
181
+ /** Name of the synthetic drug used in the sample dataframe */
182
+ export const SYNTHETIC_DRUG_NAME = 'Synthetic drug';
183
+
184
+ /** pMPO model evaluation result type */
185
+ export type ModelEvaluationResult = {
186
+ auc: number,
187
+ threshold: number,
188
+ tpr: Float32Array,
189
+ fpr: Float32Array,
190
+ };
191
+
192
+ /** Maximum number of rows for which auto-tuning is applicable */
193
+ export const AUTO_TUNE_MAX_APPLICABLE_ROWS = 10000;
194
+
195
+ /** Default settings for optimization in pMPO parameter tuning */
196
+ export const DEFAULT_OPTIMIZATION_SETTINGS = new Map<string, number>([
197
+ ['tolerance', 0.001],
198
+ ['maxIter', 25],
199
+ ['nonZeroParam', 0.0001],
200
+ ['initialScale', 0.02],
201
+ ['scaleReflaction', 1],
202
+ ['scaleExpansion', 2],
203
+ ['scaleContraction', -0.5],
204
+ ]);
205
+
206
+ /** Optimal point type for pMPO parameter tuning */
207
+ export type OptimalPoint = {
208
+ pValTresh: number,
209
+ r2Tresh: number,
210
+ qCutoff: number,
211
+ state: 'success' | 'canceled' | 'failed',
212
+ msg: string,
213
+ };
214
+
215
+ /** Minimum bounds for pMPO parameters during optimization */
216
+ export const LOW_PARAMS_BOUNDS = new Float32Array([0.5, Q_CUTOFF_MIN]);
217
+
218
+ /** Maximum bounds for pMPO parameters during optimization */
219
+ export const HIGH_PARAMS_BOUNDS = new Float32Array([R2_MAX, Q_CUTOFF_MAX]);
220
+
221
+ export enum EQUALITY_SIGN {
222
+ GREATER = '>',
223
+ LESS = '<',
224
+ GREATER_OR_EQUAL = '≥',
225
+ LESS_OR_EQUAL = '≤',
226
+ DEFAULT = LESS_OR_EQUAL,
227
+ };
228
+
229
+ export const SIGN_OPTIONS = [
230
+ EQUALITY_SIGN.GREATER,
231
+ EQUALITY_SIGN.LESS,
232
+ EQUALITY_SIGN.GREATER_OR_EQUAL,
233
+ EQUALITY_SIGN.LESS_OR_EQUAL,
234
+ ];
235
+
236
+ export const THRESHOLDED_DESIRABILITY_COL_NAME = 'Desirability';
237
+
238
+ export const PREFERABLE_CATEGORIES = ['perfect', 'good', 'true', 't', 'g', 'active', 'a', 'yes', 'y'];
239
+
240
+ export type PmpoInputId = 'descriptors' | 'desirability' | 'threshold' | 'categories';
241
+ export type TooltipContent = string | (() => HTMLElement);
242
+
243
+ export interface PmpoValidationResult {
244
+ valid: boolean;
245
+ errors: Map<PmpoInputId, TooltipContent>;
246
+ }