@datagrok/eda 1.4.11 → 1.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.eslintrc.json +0 -1
  2. package/CHANGELOG.md +15 -0
  3. package/CLAUDE.md +185 -0
  4. package/README.md +8 -0
  5. package/css/pmpo.css +35 -0
  6. package/dist/package-test.js +1 -1
  7. package/dist/package-test.js.map +1 -1
  8. package/dist/package.js +1 -1
  9. package/dist/package.js.map +1 -1
  10. package/eslintrc.json +45 -0
  11. package/files/drugs-props-test.csv +126 -0
  12. package/files/drugs-props-train-scores.csv +664 -0
  13. package/files/drugs-props-train.csv +664 -0
  14. package/package.json +9 -3
  15. package/src/anova/anova-tools.ts +1 -1
  16. package/src/anova/anova-ui.ts +1 -1
  17. package/src/package-api.ts +18 -0
  18. package/src/package-test.ts +4 -1
  19. package/src/package.g.ts +25 -0
  20. package/src/package.ts +55 -15
  21. package/src/pareto-optimization/pareto-computations.ts +6 -0
  22. package/src/pareto-optimization/utils.ts +6 -4
  23. package/src/probabilistic-scoring/data-generator.ts +157 -0
  24. package/src/probabilistic-scoring/nelder-mead.ts +204 -0
  25. package/src/probabilistic-scoring/pmpo-defs.ts +218 -0
  26. package/src/probabilistic-scoring/pmpo-utils.ts +603 -0
  27. package/src/probabilistic-scoring/prob-scoring.ts +991 -0
  28. package/src/probabilistic-scoring/stat-tools.ts +303 -0
  29. package/src/softmax-classifier.ts +1 -1
  30. package/src/tests/anova-tests.ts +1 -1
  31. package/src/tests/classifiers-tests.ts +1 -1
  32. package/src/tests/dim-reduction-tests.ts +1 -1
  33. package/src/tests/linear-methods-tests.ts +1 -1
  34. package/src/tests/mis-vals-imputation-tests.ts +1 -1
  35. package/src/tests/pareto-tests.ts +253 -0
  36. package/src/tests/pmpo-tests.ts +157 -0
  37. package/test-console-output-1.log +175 -209
  38. package/test-record-1.mp4 +0 -0
@@ -0,0 +1,204 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ function getInitialParams(
6
+ objectiveFunc: (x: Float32Array) => number,
7
+ settings: Map<string, number>,
8
+ paramsInitial: Float32Array,
9
+ restrictionsBottom: Float32Array,
10
+ restrictionsTop: Float32Array): [Float32Array[], number[]] {
11
+ const dim = paramsInitial.length + 1;
12
+ const dimParams = paramsInitial.length;
13
+ const nonZeroParam = settings.get('nonZeroParam')!;
14
+ const initScale = settings.get('initialScale')!;
15
+
16
+ const optParams = new Array<Float32Array>(dim);
17
+ const pointObjectives = new Array<number>(dim);
18
+
19
+ for (let i = 0; i < dim; i++) {
20
+ optParams[i] = new Float32Array(dimParams);
21
+ for (let j = 0; j < dimParams; j++) {
22
+ optParams[i][j] = paramsInitial[j];
23
+ if (i != 0 && i - 1 === j) {
24
+ if (paramsInitial[j] == 0)
25
+ optParams[i][j] = nonZeroParam;
26
+ else
27
+ optParams[i][j] += initScale * paramsInitial[i - 1];
28
+
29
+ if (optParams[i][j] < restrictionsBottom[j])
30
+ optParams[i][j] = restrictionsBottom[j];
31
+ else if (optParams[i][j] > restrictionsTop[j])
32
+ optParams[i][j] = restrictionsTop[j];
33
+ }
34
+ }
35
+
36
+ pointObjectives[i] = objectiveFunc(optParams[i]);
37
+ }
38
+
39
+ return [optParams, pointObjectives];
40
+ } // getInitialParams
41
+
42
+ function fillCentroid(centroid: Float32Array, dimParams: number, lastIndex: number, optParams: Float32Array[]) {
43
+ for (let i = 0; i < dimParams; i++) {
44
+ let val = 0;
45
+ for (let j = 0; j < dimParams + 1; j++) {
46
+ if (j != lastIndex)
47
+ val += optParams[j][i];
48
+ }
49
+
50
+ centroid[i] = val / dimParams;
51
+ }
52
+ } // fillCentroid
53
+
54
+ function fillPoint(
55
+ centroid: Float32Array, point: Float32Array,
56
+ lastIndex: number, optParams: Float32Array[],
57
+ scale: number, dimParams: number,
58
+ restrictionsBottom: Float32Array,
59
+ restrictionsTop: Float32Array) {
60
+ for (let i = 0; i < dimParams; i++) {
61
+ point[i] = centroid[i];
62
+ point[i] += scale * (centroid[i] - optParams[lastIndex][i]);
63
+
64
+ if (point[i] < restrictionsBottom[i])
65
+ point[i] = restrictionsBottom[i];
66
+ else if (point[i] > restrictionsTop[i])
67
+ point[i] = restrictionsTop[i];
68
+ }
69
+ } // fillPoint
70
+
71
+ export async function optimizeNM(pi: DG.ProgressIndicator,
72
+ objectiveFunc: (x: Float32Array) => number, paramsInitial: Float32Array,
73
+ settings: Map<string, number>, restrictionsBottom: Float32Array, restrictionsTop: Float32Array) {
74
+ // Settings initialization
75
+ const tolerance = settings.get('tolerance')!;
76
+ const maxIter = settings.get('maxIter')!;
77
+ const scaleReflection = settings.get('scaleReflaction')!;
78
+ const scaleExpansion = settings.get('scaleExpansion')!;
79
+ const scaleContraction = settings.get('scaleContraction')!;
80
+
81
+ const dim = paramsInitial.length + 1;
82
+ const dimParams = paramsInitial.length;
83
+
84
+ const [optParams, pointObjectives] = getInitialParams(
85
+ objectiveFunc,
86
+ settings,
87
+ paramsInitial,
88
+ restrictionsBottom,
89
+ restrictionsTop,
90
+ );
91
+
92
+ const indexes = new Array<number>(dim);
93
+ for (let i = 0; i < dim; i++)
94
+ indexes[i] = i;
95
+
96
+ const lastIndex = indexes.length - 1;
97
+
98
+ let iteration = 0;
99
+ let best = 0;
100
+ let previousBest = 0;
101
+ let noImprovement = 0;
102
+
103
+ const centroid = new Float32Array(dimParams);
104
+ const reflectionPoint = new Float32Array(dimParams);
105
+ const expansionPoint = new Float32Array(dimParams);
106
+ const contractionPoint = new Float32Array(dimParams);
107
+ const costs = new Array<number>(maxIter);
108
+
109
+ if (dim > 1) {
110
+ let percentage = 0;
111
+
112
+ while (true) {
113
+ indexes.sort((a:number, b:number) => {
114
+ return pointObjectives[a] - pointObjectives[b];
115
+ });
116
+
117
+ percentage = Math.min(100, Math.floor(100 * (iteration) / maxIter));
118
+ pi.update(percentage, `Optimizing pMPO... (${percentage}%)`);
119
+ await new Promise((r) => setTimeout(r, 1));
120
+
121
+ if (pi.canceled)
122
+ break;
123
+
124
+ if (iteration > maxIter)
125
+ break;
126
+
127
+ if (iteration == 0) {
128
+ best = pointObjectives[0];
129
+ previousBest = 2*pointObjectives[indexes[0]];
130
+ }
131
+ costs[iteration] = best;
132
+
133
+ ++iteration;
134
+
135
+ best = pointObjectives[indexes[0]];
136
+ if (previousBest - best > tolerance)
137
+ noImprovement = 0;
138
+ else {
139
+ ++noImprovement;
140
+ if (noImprovement > 2 * dim)
141
+ break;
142
+ }
143
+
144
+ previousBest = best;
145
+
146
+ //centroid
147
+ fillCentroid(centroid, dimParams, indexes[lastIndex], optParams);
148
+
149
+ // reflection
150
+ fillPoint(centroid, reflectionPoint, indexes[lastIndex],
151
+ optParams, scaleReflection, dimParams, restrictionsBottom, restrictionsTop);
152
+ const reflectionScore = objectiveFunc(reflectionPoint);
153
+
154
+ // expansion
155
+ if (reflectionScore < pointObjectives[indexes[lastIndex]]) {
156
+ fillPoint(centroid, expansionPoint, indexes[lastIndex],
157
+ optParams, scaleExpansion, dimParams, restrictionsBottom, restrictionsTop);
158
+
159
+ const expansionScore = objectiveFunc(expansionPoint);
160
+
161
+ if (expansionScore < reflectionScore) {
162
+ pointObjectives[indexes[lastIndex]] = expansionScore;
163
+
164
+ for (let i = 0; i < dimParams; i++)
165
+ optParams[indexes[lastIndex]][i] = expansionPoint[i];
166
+
167
+ continue;
168
+ } else {
169
+ pointObjectives[indexes[lastIndex]] = reflectionScore;
170
+
171
+ for (let i = 0; i < dimParams; i++)
172
+ optParams[indexes[lastIndex]][i] = reflectionPoint[i];
173
+
174
+ continue;
175
+ }
176
+ }
177
+
178
+ // Contraction
179
+ fillPoint(centroid, contractionPoint, indexes[lastIndex],
180
+ optParams, scaleContraction, dimParams, restrictionsBottom, restrictionsTop);
181
+
182
+ const contractionScore = objectiveFunc(contractionPoint);
183
+
184
+ if (contractionScore < pointObjectives[indexes[lastIndex]]) {
185
+ pointObjectives[indexes[lastIndex]] = contractionScore;
186
+
187
+ for (let i = 0; i < dimParams; i++)
188
+ optParams[indexes[lastIndex]][i] = contractionPoint[i];
189
+
190
+ continue;
191
+ }
192
+
193
+ break;
194
+ } // while
195
+
196
+ for (let i = iteration; i < maxIter; i++)
197
+ costs[i] = pointObjectives[indexes[0]];
198
+ } // if
199
+
200
+ return {
201
+ optimalPoint: optParams[indexes[0]],
202
+ iterations: iteration,
203
+ };
204
+ }; // optimizeNM
@@ -0,0 +1,218 @@
1
+ // Constants and type definitions for probabilistic scoring (pMPO)
2
+ // Link: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
3
+
4
+ /** Minimum number of samples required to compute pMPO */
5
+ export const MIN_SAMPLES_COUNT = 10;
6
+
7
+ export const PMPO_NON_APPLICABLE = 'pMPO is not applicable';
8
+ export const PMPO_COMPUTE_FAILED = 'Failed to compute pMPO parameters';
9
+
10
+ /** Basic statistics for desired and non-desired compounds */
11
+ export type BasicStats = {
12
+ desAvg: number,
13
+ desStd: number,
14
+ nonDesAvg: number,
15
+ nonDesStd: number,
16
+ min: number,
17
+ max: number,
18
+ };
19
+
20
+ /** Descriptor statistics including basic stats, t-statistics and p-value */
21
+ export type DescriptorStatistics = BasicStats & {
22
+ desLen: number,
23
+ nonSesLen: number,
24
+ tstat: number,
25
+ pValue: number,
26
+ };
27
+
28
+ /** Cutoff parameters for the basic functions of the pMPO model */
29
+ export type Cutoff = {
30
+ cutoff: number,
31
+ cutoffDesired: number,
32
+ cutoffNotDesired: number,
33
+ };
34
+
35
+ /** Generalized Sigmoid parameters for the desirability functions of the pMPO model */
36
+ export type SigmoidParams = {
37
+ pX0: number,
38
+ b: number,
39
+ c: number,
40
+ };
41
+
42
+ /** pMPO parameters including basic stats, cutoffs, sigmoid params, z-score, weight, intersections */
43
+ export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
44
+ zScore: number,
45
+ weight: number,
46
+ intersections: number[],
47
+ x0: number,
48
+ xBound: number,
49
+ inflection: number,
50
+ };
51
+
52
+ export type CorrelationTriple = [string, string, number];
53
+
54
+ const DESIRED = 'desired';
55
+ const NON_DESIRED = 'non-desired';
56
+ const MEAN = 'Mean';
57
+ const STD = 'Std';
58
+ const T_STAT = 't-statistics';
59
+ export const P_VAL = 'p-value';
60
+ const MEAN_DES = `${MEAN}(${DESIRED})`;
61
+ const MEAN_NON_DES = `${MEAN}(${NON_DESIRED})`;
62
+ const STD_DES = `${STD}(${DESIRED})`;
63
+ const STD_NON_DES = `${STD}(${NON_DESIRED})`;
64
+
65
+ /** Map of statistic field names to their display titles */
66
+ export const STAT_TO_TITLE_MAP = new Map([
67
+ ['desAvg', MEAN_DES],
68
+ ['desStd', STD_DES],
69
+ ['nonDesAvg', MEAN_NON_DES],
70
+ ['nonDesStd', STD_NON_DES],
71
+ ['tstat', T_STAT],
72
+ ['pValue', P_VAL],
73
+ ]);
74
+
75
+ export const DESCR_TITLE = 'Descriptor';
76
+ export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
77
+ export const SELECTED_TITLE = 'Selected';
78
+ export const WEIGHT_TITLE = 'Weight';
79
+ export const SCORES_TITLE = 'pMPO score';
80
+ export const DESIRABILITY_COL_NAME = 'Desirability Curve';
81
+
82
+ /** Default p-value threshold for filtering descriptors */
83
+ export const P_VAL_TRES_DEFAULT = 0.001;
84
+
85
+ /** Minimum p-value threshold for filtering descriptors */
86
+ export const P_VAL_TRES_MIN = 0.001;
87
+
88
+ /** Maximum p-value threshold for filtering descriptors */
89
+ export const P_VAL_TRES_MAX = 1;
90
+
91
+ /** Default R-squared threshold for filtering correlated descriptors */
92
+ export const R2_DEFAULT = 0.53;
93
+
94
+ /** Minimum R-squared threshold for filtering correlated descriptors */
95
+ export const R2_MIN = 0.01;
96
+
97
+ /** Maximum R-squared threshold for filtering correlated descriptors */
98
+ export const R2_MAX = 1.0;
99
+
100
+ /** Default q-cutoff for descriptors in the pMPO model */
101
+ export const Q_CUTOFF_DEFAULT = 0.05;
102
+
103
+ /** Minimum q-cutoff for descriptors in the pMPO model */
104
+ export const Q_CUTOFF_MIN = 0.01;
105
+
106
+ /** Maximum q-cutoff for descriptors in the pMPO model */
107
+ export const Q_CUTOFF_MAX = 1;
108
+
109
+ /** Default setting for using sigmoid correction in pMPO */
110
+ export const USE_SIGMOID_DEFAULT = true;
111
+
112
+ export const FORMAT = '0.000';
113
+
114
+ /** Colors used for selected and skipped descriptors */
115
+ export enum COLORS {
116
+ SELECTED = 'rgb(26, 146, 26)',
117
+ SKIPPED = 'rgb(208, 57, 67)',
118
+ };
119
+
120
+ export const TINY = 1e-8;
121
+
122
+ /** Folder path for storing pMPO models */
123
+ export const FOLDER = 'System:AppData/Chem/mpo';
124
+
125
+ /** Desirability profile properties type */
126
+ export type DesirabilityProfileProperties = Record<string, {
127
+ line: [number, number][],
128
+ weight: number,
129
+ min?: number,
130
+ max?: number,
131
+ }>;
132
+
133
+ export const STAT_GRID_HEIGHT = 75;
134
+ export const DESIRABILITY_COLUMN_WIDTH = 305;
135
+
136
+ const POSITIVE_BASIC_RANGE_SIGMA_COEFFS = [0, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5];
137
+
138
+ /** Basic range sigma coefficients for desirability profile points */
139
+ export const BASIC_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS
140
+ .slice(1)
141
+ .map((v) => -v)
142
+ .reverse()
143
+ .concat(POSITIVE_BASIC_RANGE_SIGMA_COEFFS);
144
+
145
+ const EXTRA_RANGE_SIGMA_COEFFS = [0.12, 0.37, 0.63, 0.75, 0.88, 1.25, 1.75, 2.25, 2.75];
146
+ const EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS.concat(EXTRA_RANGE_SIGMA_COEFFS).sort();
147
+
148
+ /** Extended range sigma coefficients for desirability profile points */
149
+ export const EXTENDED_RANGE_SIGMA_COEFFS = EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS
150
+ .slice(1)
151
+ .map((v) => -v)
152
+ .reverse()
153
+ .concat(EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS);
154
+
155
+ /** Confusion matrix type */
156
+ export type ConfusionMatrix = {
157
+ TP: number,
158
+ TN: number,
159
+ FP: number,
160
+ FN: number,
161
+ };
162
+
163
+ // Titles for ROC curve columns
164
+ export const TPR_TITLE = 'TPR (Sensitivity)';
165
+ export const FPR_TITLE = 'FPR (1 - Specificity)';
166
+ export const THRESHOLD = 'Threshold';
167
+
168
+ // Number of points in ROC curve
169
+ const ROC_POINTS = 100;
170
+ export const ROC_TRESHOLDS_COUNT = ROC_POINTS + 1;
171
+
172
+ /** ROC curve thresholds from 0.0 to 1.0 */
173
+ export const ROC_TRESHOLDS = new Float32Array(Array.from({length: ROC_TRESHOLDS_COUNT}, (_, i) => i / ROC_POINTS));
174
+
175
+ /** Sample dataframe for pMPO training: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/ */
176
+ export const SOURCE_PATH = 'System:AppData/Eda/drugs-props-train.csv';
177
+
178
+ /** Scores of the sample dataframe computed using https://github.com/Merck/pmpo */
179
+ export const SCORES_PATH = 'System:AppData/Eda/drugs-props-train-scores.csv';
180
+
181
+ /** Name of the synthetic drug used in the sample dataframe */
182
+ export const SYNTHETIC_DRUG_NAME = 'Synthetic drug';
183
+
184
+ /** pMPO model evaluation result type */
185
+ export type ModelEvaluationResult = {
186
+ auc: number,
187
+ threshold: number,
188
+ tpr: Float32Array,
189
+ fpr: Float32Array,
190
+ };
191
+
192
+ /** Maximum number of rows for which auto-tuning is applicable */
193
+ export const AUTO_TUNE_MAX_APPLICABLE_ROWS = 10000;
194
+
195
+ /** Default settings for optimization in pMPO parameter tuning */
196
+ export const DEFAULT_OPTIMIZATION_SETTINGS = new Map<string, number>([
197
+ ['tolerance', 0.001],
198
+ ['maxIter', 25],
199
+ ['nonZeroParam', 0.0001],
200
+ ['initialScale', 0.02],
201
+ ['scaleReflaction', 1],
202
+ ['scaleExpansion', 2],
203
+ ['scaleContraction', -0.5],
204
+ ]);
205
+
206
+ /** Optimal point type for pMPO parameter tuning */
207
+ export type OptimalPoint = {
208
+ pValTresh: number,
209
+ r2Tresh: number,
210
+ qCutoff: number,
211
+ success: boolean,
212
+ };
213
+
214
+ /** Minimum bounds for pMPO parameters during optimization */
215
+ export const LOW_PARAMS_BOUNDS = new Float32Array([0.5, Q_CUTOFF_MIN]);
216
+
217
+ /** Maximum bounds for pMPO parameters during optimization */
218
+ export const HIGH_PARAMS_BOUNDS = new Float32Array([R2_MAX, Q_CUTOFF_MAX]);