@datagrok/eda 1.4.12 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +0 -1
- package/CHANGELOG.md +10 -0
- package/CLAUDE.md +185 -0
- package/css/pmpo.css +9 -0
- package/dist/111.js +1 -1
- package/dist/111.js.map +1 -1
- package/dist/128.js +1 -1
- package/dist/128.js.map +1 -1
- package/dist/153.js +1 -1
- package/dist/153.js.map +1 -1
- package/dist/23.js +1 -1
- package/dist/23.js.map +1 -1
- package/dist/234.js +1 -1
- package/dist/234.js.map +1 -1
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/260.js +1 -1
- package/dist/260.js.map +1 -1
- package/dist/33.js +1 -1
- package/dist/33.js.map +1 -1
- package/dist/348.js +1 -1
- package/dist/348.js.map +1 -1
- package/dist/377.js +1 -1
- package/dist/377.js.map +1 -1
- package/dist/397.js +2 -0
- package/dist/397.js.map +1 -0
- package/dist/412.js +1 -1
- package/dist/412.js.map +1 -1
- package/dist/415.js +1 -1
- package/dist/415.js.map +1 -1
- package/dist/501.js +1 -1
- package/dist/501.js.map +1 -1
- package/dist/531.js +1 -1
- package/dist/531.js.map +1 -1
- package/dist/583.js +1 -1
- package/dist/583.js.map +1 -1
- package/dist/589.js +1 -1
- package/dist/589.js.map +1 -1
- package/dist/603.js +1 -1
- package/dist/603.js.map +1 -1
- package/dist/656.js +1 -1
- package/dist/656.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/727.js +1 -1
- package/dist/727.js.map +1 -1
- package/dist/731.js +1 -1
- package/dist/731.js.map +1 -1
- package/dist/738.js +1 -1
- package/dist/738.js.map +1 -1
- package/dist/763.js +1 -1
- package/dist/763.js.map +1 -1
- package/dist/778.js +1 -1
- package/dist/778.js.map +1 -1
- package/dist/783.js +1 -1
- package/dist/783.js.map +1 -1
- package/dist/793.js +1 -1
- package/dist/793.js.map +1 -1
- package/dist/810.js +1 -1
- package/dist/810.js.map +1 -1
- package/dist/860.js +1 -1
- package/dist/860.js.map +1 -1
- package/dist/907.js +1 -1
- package/dist/907.js.map +1 -1
- package/dist/950.js +1 -1
- package/dist/950.js.map +1 -1
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/990.js +1 -1
- package/dist/990.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/eslintrc.json +0 -1
- package/files/drugs-props-train-scores.csv +664 -0
- package/package.json +11 -7
- package/src/package-api.ts +7 -3
- package/src/package-test.ts +4 -1
- package/src/package.g.ts +21 -9
- package/src/package.ts +33 -23
- package/src/pareto-optimization/pareto-computations.ts +6 -0
- package/src/pareto-optimization/pareto-optimizer.ts +1 -1
- package/src/pls/pls-constants.ts +3 -1
- package/src/pls/pls-tools.ts +73 -69
- package/src/probabilistic-scoring/data-generator.ts +202 -0
- package/src/probabilistic-scoring/nelder-mead.ts +204 -0
- package/src/probabilistic-scoring/pmpo-defs.ts +141 -3
- package/src/probabilistic-scoring/pmpo-utils.ts +240 -126
- package/src/probabilistic-scoring/prob-scoring.ts +862 -135
- package/src/probabilistic-scoring/stat-tools.ts +141 -6
- package/src/tests/anova-tests.ts +1 -1
- package/src/tests/classifiers-tests.ts +1 -1
- package/src/tests/dim-reduction-tests.ts +1 -1
- package/src/tests/linear-methods-tests.ts +1 -1
- package/src/tests/mis-vals-imputation-tests.ts +1 -1
- package/src/tests/pareto-tests.ts +251 -0
- package/src/tests/pmpo-tests.ts +797 -0
- package/test-console-output-1.log +303 -239
- package/test-record-1.mp4 +0 -0
- package/files/mpo-done.ipynb +0 -2123
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {DescriptorStatistics, SOURCE_PATH, SYNTHETIC_DRUG_NAME} from './pmpo-defs';
|
|
6
|
+
import {getDescriptorStatistics, getDesiredTables} from './stat-tools';
|
|
7
|
+
|
|
8
|
+
//@ts-ignore: no types
|
|
9
|
+
import * as jStat from 'jstat';
|
|
10
|
+
|
|
11
|
+
/** Generates synthetic data for pMPO model training and testing
|
|
12
|
+
* @param samplesCount Number of samples to generate
|
|
13
|
+
* @returns DataFrame with generated data */
|
|
14
|
+
export async function getSynteticPmpoData(samplesCount: number, isTest: boolean = true): Promise<DG.DataFrame> {
|
|
15
|
+
const df = await grok.dapi.files.readCsv(SOURCE_PATH);
|
|
16
|
+
const generator = new PmpoDataGenerator(df, 'Drug', 'CNS', 'Smiles');
|
|
17
|
+
const genTable = generator.getGenerated(samplesCount);
|
|
18
|
+
|
|
19
|
+
if (!isTest) {
|
|
20
|
+
genTable.columns.add(DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'Const bool', new Array(samplesCount).fill(true)));
|
|
21
|
+
genTable.columns.add(DG.Column.fromInt32Array('Const int', new Int32Array(samplesCount).fill(1)));
|
|
22
|
+
|
|
23
|
+
// Add a copy of the first numeric column with 5 missing values
|
|
24
|
+
const firstNumCol = genTable.columns.toList().find((col) => col.isNumerical);
|
|
25
|
+
if (firstNumCol) {
|
|
26
|
+
const colWithMissing = firstNumCol.clone();
|
|
27
|
+
colWithMissing.name = `${firstNumCol.name} (missing)`;
|
|
28
|
+
for (let i = 0; i < Math.min(5, colWithMissing.length); ++i)
|
|
29
|
+
colWithMissing.set(i, DG.FLOAT_NULL);
|
|
30
|
+
genTable.columns.add(colWithMissing);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Add a column with all null values
|
|
34
|
+
genTable.columns.add(DG.Column.fromFloat32Array('Nulls', new Float32Array(samplesCount).fill(DG.FLOAT_NULL)));
|
|
35
|
+
|
|
36
|
+
// Add categorical columns
|
|
37
|
+
const categoricalCols = getCategoricalColumns(genTable.col('CNS')!, samplesCount);
|
|
38
|
+
for (const col of categoricalCols)
|
|
39
|
+
genTable.columns.add(col);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return genTable;
|
|
43
|
+
} // getSynteticPmpoData
|
|
44
|
+
|
|
45
|
+
/** Generates categorical columns based on a boolean source column
|
|
46
|
+
* @param sourceBoolCol Source boolean column to base the categorical columns on
|
|
47
|
+
* @param samplesCount Number of samples to generate
|
|
48
|
+
* @returns Array of generated categorical columns */
|
|
49
|
+
function getCategoricalColumns(sourceBoolCol: DG.Column, samplesCount: number): DG.Column[] {
|
|
50
|
+
const source = sourceBoolCol.toList();
|
|
51
|
+
const stringLabels = new Array<string>(samplesCount);
|
|
52
|
+
const threeCats = new Array<string>(samplesCount);
|
|
53
|
+
|
|
54
|
+
for (let i = 0; i < samplesCount; ++i) {
|
|
55
|
+
stringLabels[i] = source[i] ? 'active' : 'non-active';
|
|
56
|
+
threeCats[i] = source[i] ? (Math.random() < 0.5 ? 'perfect' : 'good') : (Math.random() < 0.5 ? 'bad' : 'worst');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return [
|
|
60
|
+
DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CNS (strings)', stringLabels),
|
|
61
|
+
DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CNS (4 categories)', threeCats),
|
|
62
|
+
DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'Single category', new Array<string>(samplesCount).fill('Unknown')),
|
|
63
|
+
];
|
|
64
|
+
} // getCategoricalColumns
|
|
65
|
+
|
|
66
|
+
/** Class for generating synthetic data for pMPO model training and testing */
|
|
67
|
+
export class PmpoDataGenerator {
|
|
68
|
+
private sourceDf: DG.DataFrame;
|
|
69
|
+
private drugName: string;
|
|
70
|
+
private desirabilityColName: string;
|
|
71
|
+
private smilesColName: string;
|
|
72
|
+
private desiredProbability: number;
|
|
73
|
+
private descriptorStats: Map<string, DescriptorStatistics>;
|
|
74
|
+
|
|
75
|
+
constructor(df: DG.DataFrame, drugName: string, desirabilityColName: string, smilesColName: string) {
|
|
76
|
+
this.sourceDf = df;
|
|
77
|
+
this.drugName = drugName;
|
|
78
|
+
this.desirabilityColName = desirabilityColName;
|
|
79
|
+
this.smilesColName = smilesColName;
|
|
80
|
+
|
|
81
|
+
const descriptorNames = df.columns.toList().filter((col) => col.isNumerical).map((col) => col.name);
|
|
82
|
+
const {desired, nonDesired} = getDesiredTables(df, df.col(desirabilityColName)!);
|
|
83
|
+
|
|
84
|
+
// Compute descriptors' statistics
|
|
85
|
+
this.descriptorStats = new Map<string, DescriptorStatistics>();
|
|
86
|
+
descriptorNames.forEach((name) => {
|
|
87
|
+
this.descriptorStats.set(name, getDescriptorStatistics(desired.col(name)!, nonDesired.col(name)!));
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Probability of desired class
|
|
91
|
+
this.desiredProbability = desired.rowCount / df.rowCount;
|
|
92
|
+
} // constructor
|
|
93
|
+
|
|
94
|
+
/** Generates synthetic data for pMPO model training and testing
|
|
95
|
+
* @param samplesCount Number of samples to generate
|
|
96
|
+
* @returns DataFrame with generated data */
|
|
97
|
+
public getGenerated(samplesCount: number): DG.DataFrame {
|
|
98
|
+
if (samplesCount <= 1)
|
|
99
|
+
throw new Error('Failed to generate pMPO data: sample count must be greater than 1.');
|
|
100
|
+
|
|
101
|
+
let result: DG.DataFrame;
|
|
102
|
+
|
|
103
|
+
/* Use rows from the source dataframe if the requested sample count
|
|
104
|
+
is less than or equal to the source dataframe row count */
|
|
105
|
+
if (samplesCount <= this.sourceDf.rowCount) {
|
|
106
|
+
const rowMask = DG.BitSet.create(this.sourceDf.rowCount);
|
|
107
|
+
|
|
108
|
+
for (let i = 0; i < samplesCount; ++i)
|
|
109
|
+
rowMask.set(i, true);
|
|
110
|
+
|
|
111
|
+
result = this.sourceDf.clone(rowMask);
|
|
112
|
+
} else {
|
|
113
|
+
const cloneDf = this.getClonedSourceDfWithFloatNumericCols();
|
|
114
|
+
result = cloneDf.append(this.getSyntheticTable(samplesCount - this.sourceDf.rowCount));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Check boolean columns and ensure non-zero stdev
|
|
118
|
+
for (const col of result.columns) {
|
|
119
|
+
if (col.type === DG.COLUMN_TYPE.BOOL && col.stats.stdev === 0) {
|
|
120
|
+
// All values are the same, flip the first value
|
|
121
|
+
let value = col.get(0);
|
|
122
|
+
col.set(0, !value);
|
|
123
|
+
|
|
124
|
+
value = col.get(1);
|
|
125
|
+
col.set(1, !value);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return result;
|
|
130
|
+
} // getGenerated
|
|
131
|
+
|
|
132
|
+
/** Generates a synthetic data table
|
|
133
|
+
* @param samplesCount Number of samples to generate
|
|
134
|
+
* @returns DataFrame with synthetic data */
|
|
135
|
+
private getSyntheticTable(samplesCount: number): DG.DataFrame {
|
|
136
|
+
const desirabilityRaw = new Array<boolean>(samplesCount);
|
|
137
|
+
|
|
138
|
+
for (let i = 0; i < samplesCount; ++i)
|
|
139
|
+
desirabilityRaw[i] = (Math.random() < this.desiredProbability);
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
const cols = [
|
|
143
|
+
this.getDrugColumn(samplesCount),
|
|
144
|
+
this.getSmilesColumn(samplesCount),
|
|
145
|
+
DG.Column.fromList(DG.COLUMN_TYPE.BOOL, this.desirabilityColName, desirabilityRaw),
|
|
146
|
+
];
|
|
147
|
+
|
|
148
|
+
this.descriptorStats.forEach((stat, name) => {
|
|
149
|
+
const arr = new Float32Array(samplesCount);
|
|
150
|
+
|
|
151
|
+
for (let i = 0; i < samplesCount; ++i) {
|
|
152
|
+
if (desirabilityRaw[i])
|
|
153
|
+
arr[i] = jStat.normal.sample(stat.desAvg, stat.desStd);
|
|
154
|
+
else
|
|
155
|
+
arr[i] = jStat.normal.sample(stat.nonDesAvg, stat.nonDesStd);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// @ts-ignore
|
|
159
|
+
cols.push(DG.Column.fromFloat32Array(name, arr));
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
return DG.DataFrame.fromColumns(cols);
|
|
163
|
+
} // getSyntheticTable
|
|
164
|
+
|
|
165
|
+
/** Generates a column with synthetic drug names
|
|
166
|
+
* @param samplesCount Number of samples to generate
|
|
167
|
+
* @returns Column with synthetic drug names */
|
|
168
|
+
private getDrugColumn(samplesCount: number): DG.Column<string> {
|
|
169
|
+
return DG.Column.fromList(
|
|
170
|
+
DG.COLUMN_TYPE.STRING,
|
|
171
|
+
this.drugName,
|
|
172
|
+
Array.from({length: samplesCount}, (_, i) => `${SYNTHETIC_DRUG_NAME} ${i + 1}`));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/** Generates a column with synthetic SMILES strings
|
|
176
|
+
* @param samplesCount Number of samples to generate
|
|
177
|
+
* @returns Column with synthetic SMILES strings */
|
|
178
|
+
private getSmilesColumn(samplesCount: number): DG.Column<string> {
|
|
179
|
+
return DG.Column.fromList(
|
|
180
|
+
DG.COLUMN_TYPE.STRING,
|
|
181
|
+
this.smilesColName,
|
|
182
|
+
Array.from({length: samplesCount}, () => 'C'));
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/** Clones the source dataframe converting numerical columns to Float type
|
|
186
|
+
* @returns Cloned dataframe */
|
|
187
|
+
private getClonedSourceDfWithFloatNumericCols(): DG.DataFrame {
|
|
188
|
+
const cols: DG.Column[] = [];
|
|
189
|
+
|
|
190
|
+
this.sourceDf.columns.toList().forEach((col) => {
|
|
191
|
+
if (col.isNumerical)
|
|
192
|
+
cols.push(col.clone().convertTo(DG.COLUMN_TYPE.FLOAT));
|
|
193
|
+
else
|
|
194
|
+
cols.push(col.clone());
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
const clone = DG.DataFrame.fromColumns(cols);
|
|
198
|
+
clone.name = this.sourceDf.name;
|
|
199
|
+
|
|
200
|
+
return clone;
|
|
201
|
+
}
|
|
202
|
+
} // PmpoDataGenerator
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
function getInitialParams(
|
|
6
|
+
objectiveFunc: (x: Float32Array) => number,
|
|
7
|
+
settings: Map<string, number>,
|
|
8
|
+
paramsInitial: Float32Array,
|
|
9
|
+
restrictionsBottom: Float32Array,
|
|
10
|
+
restrictionsTop: Float32Array): [Float32Array[], number[]] {
|
|
11
|
+
const dim = paramsInitial.length + 1;
|
|
12
|
+
const dimParams = paramsInitial.length;
|
|
13
|
+
const nonZeroParam = settings.get('nonZeroParam')!;
|
|
14
|
+
const initScale = settings.get('initialScale')!;
|
|
15
|
+
|
|
16
|
+
const optParams = new Array<Float32Array>(dim);
|
|
17
|
+
const pointObjectives = new Array<number>(dim);
|
|
18
|
+
|
|
19
|
+
for (let i = 0; i < dim; i++) {
|
|
20
|
+
optParams[i] = new Float32Array(dimParams);
|
|
21
|
+
for (let j = 0; j < dimParams; j++) {
|
|
22
|
+
optParams[i][j] = paramsInitial[j];
|
|
23
|
+
if (i != 0 && i - 1 === j) {
|
|
24
|
+
if (paramsInitial[j] == 0)
|
|
25
|
+
optParams[i][j] = nonZeroParam;
|
|
26
|
+
else
|
|
27
|
+
optParams[i][j] += initScale * paramsInitial[i - 1];
|
|
28
|
+
|
|
29
|
+
if (optParams[i][j] < restrictionsBottom[j])
|
|
30
|
+
optParams[i][j] = restrictionsBottom[j];
|
|
31
|
+
else if (optParams[i][j] > restrictionsTop[j])
|
|
32
|
+
optParams[i][j] = restrictionsTop[j];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
pointObjectives[i] = objectiveFunc(optParams[i]);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return [optParams, pointObjectives];
|
|
40
|
+
} // getInitialParams
|
|
41
|
+
|
|
42
|
+
function fillCentroid(centroid: Float32Array, dimParams: number, lastIndex: number, optParams: Float32Array[]) {
|
|
43
|
+
for (let i = 0; i < dimParams; i++) {
|
|
44
|
+
let val = 0;
|
|
45
|
+
for (let j = 0; j < dimParams + 1; j++) {
|
|
46
|
+
if (j != lastIndex)
|
|
47
|
+
val += optParams[j][i];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
centroid[i] = val / dimParams;
|
|
51
|
+
}
|
|
52
|
+
} // fillCentroid
|
|
53
|
+
|
|
54
|
+
function fillPoint(
|
|
55
|
+
centroid: Float32Array, point: Float32Array,
|
|
56
|
+
lastIndex: number, optParams: Float32Array[],
|
|
57
|
+
scale: number, dimParams: number,
|
|
58
|
+
restrictionsBottom: Float32Array,
|
|
59
|
+
restrictionsTop: Float32Array) {
|
|
60
|
+
for (let i = 0; i < dimParams; i++) {
|
|
61
|
+
point[i] = centroid[i];
|
|
62
|
+
point[i] += scale * (centroid[i] - optParams[lastIndex][i]);
|
|
63
|
+
|
|
64
|
+
if (point[i] < restrictionsBottom[i])
|
|
65
|
+
point[i] = restrictionsBottom[i];
|
|
66
|
+
else if (point[i] > restrictionsTop[i])
|
|
67
|
+
point[i] = restrictionsTop[i];
|
|
68
|
+
}
|
|
69
|
+
} // fillPoint
|
|
70
|
+
|
|
71
|
+
export async function optimizeNM(pi: DG.ProgressIndicator,
|
|
72
|
+
objectiveFunc: (x: Float32Array) => number, paramsInitial: Float32Array,
|
|
73
|
+
settings: Map<string, number>, restrictionsBottom: Float32Array, restrictionsTop: Float32Array) {
|
|
74
|
+
// Settings initialization
|
|
75
|
+
const tolerance = settings.get('tolerance')!;
|
|
76
|
+
const maxIter = settings.get('maxIter')!;
|
|
77
|
+
const scaleReflection = settings.get('scaleReflaction')!;
|
|
78
|
+
const scaleExpansion = settings.get('scaleExpansion')!;
|
|
79
|
+
const scaleContraction = settings.get('scaleContraction')!;
|
|
80
|
+
|
|
81
|
+
const dim = paramsInitial.length + 1;
|
|
82
|
+
const dimParams = paramsInitial.length;
|
|
83
|
+
|
|
84
|
+
const [optParams, pointObjectives] = getInitialParams(
|
|
85
|
+
objectiveFunc,
|
|
86
|
+
settings,
|
|
87
|
+
paramsInitial,
|
|
88
|
+
restrictionsBottom,
|
|
89
|
+
restrictionsTop,
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
const indexes = new Array<number>(dim);
|
|
93
|
+
for (let i = 0; i < dim; i++)
|
|
94
|
+
indexes[i] = i;
|
|
95
|
+
|
|
96
|
+
const lastIndex = indexes.length - 1;
|
|
97
|
+
|
|
98
|
+
let iteration = 0;
|
|
99
|
+
let best = 0;
|
|
100
|
+
let previousBest = 0;
|
|
101
|
+
let noImprovement = 0;
|
|
102
|
+
|
|
103
|
+
const centroid = new Float32Array(dimParams);
|
|
104
|
+
const reflectionPoint = new Float32Array(dimParams);
|
|
105
|
+
const expansionPoint = new Float32Array(dimParams);
|
|
106
|
+
const contractionPoint = new Float32Array(dimParams);
|
|
107
|
+
const costs = new Array<number>(maxIter);
|
|
108
|
+
|
|
109
|
+
if (dim > 1) {
|
|
110
|
+
let percentage = 0;
|
|
111
|
+
|
|
112
|
+
while (true) {
|
|
113
|
+
indexes.sort((a:number, b:number) => {
|
|
114
|
+
return pointObjectives[a] - pointObjectives[b];
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
percentage = Math.min(100, Math.floor(100 * (iteration) / maxIter));
|
|
118
|
+
pi.update(percentage, `Optimizing pMPO... (${percentage}%)`);
|
|
119
|
+
await new Promise((r) => setTimeout(r, 1));
|
|
120
|
+
|
|
121
|
+
if (pi.canceled)
|
|
122
|
+
break;
|
|
123
|
+
|
|
124
|
+
if (iteration > maxIter)
|
|
125
|
+
break;
|
|
126
|
+
|
|
127
|
+
if (iteration == 0) {
|
|
128
|
+
best = pointObjectives[0];
|
|
129
|
+
previousBest = 2*pointObjectives[indexes[0]];
|
|
130
|
+
}
|
|
131
|
+
costs[iteration] = best;
|
|
132
|
+
|
|
133
|
+
++iteration;
|
|
134
|
+
|
|
135
|
+
best = pointObjectives[indexes[0]];
|
|
136
|
+
if (previousBest - best > tolerance)
|
|
137
|
+
noImprovement = 0;
|
|
138
|
+
else {
|
|
139
|
+
++noImprovement;
|
|
140
|
+
if (noImprovement > 2 * dim)
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
previousBest = best;
|
|
145
|
+
|
|
146
|
+
//centroid
|
|
147
|
+
fillCentroid(centroid, dimParams, indexes[lastIndex], optParams);
|
|
148
|
+
|
|
149
|
+
// reflection
|
|
150
|
+
fillPoint(centroid, reflectionPoint, indexes[lastIndex],
|
|
151
|
+
optParams, scaleReflection, dimParams, restrictionsBottom, restrictionsTop);
|
|
152
|
+
const reflectionScore = objectiveFunc(reflectionPoint);
|
|
153
|
+
|
|
154
|
+
// expansion
|
|
155
|
+
if (reflectionScore < pointObjectives[indexes[lastIndex]]) {
|
|
156
|
+
fillPoint(centroid, expansionPoint, indexes[lastIndex],
|
|
157
|
+
optParams, scaleExpansion, dimParams, restrictionsBottom, restrictionsTop);
|
|
158
|
+
|
|
159
|
+
const expansionScore = objectiveFunc(expansionPoint);
|
|
160
|
+
|
|
161
|
+
if (expansionScore < reflectionScore) {
|
|
162
|
+
pointObjectives[indexes[lastIndex]] = expansionScore;
|
|
163
|
+
|
|
164
|
+
for (let i = 0; i < dimParams; i++)
|
|
165
|
+
optParams[indexes[lastIndex]][i] = expansionPoint[i];
|
|
166
|
+
|
|
167
|
+
continue;
|
|
168
|
+
} else {
|
|
169
|
+
pointObjectives[indexes[lastIndex]] = reflectionScore;
|
|
170
|
+
|
|
171
|
+
for (let i = 0; i < dimParams; i++)
|
|
172
|
+
optParams[indexes[lastIndex]][i] = reflectionPoint[i];
|
|
173
|
+
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Contraction
|
|
179
|
+
fillPoint(centroid, contractionPoint, indexes[lastIndex],
|
|
180
|
+
optParams, scaleContraction, dimParams, restrictionsBottom, restrictionsTop);
|
|
181
|
+
|
|
182
|
+
const contractionScore = objectiveFunc(contractionPoint);
|
|
183
|
+
|
|
184
|
+
if (contractionScore < pointObjectives[indexes[lastIndex]]) {
|
|
185
|
+
pointObjectives[indexes[lastIndex]] = contractionScore;
|
|
186
|
+
|
|
187
|
+
for (let i = 0; i < dimParams; i++)
|
|
188
|
+
optParams[indexes[lastIndex]][i] = contractionPoint[i];
|
|
189
|
+
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
break;
|
|
194
|
+
} // while
|
|
195
|
+
|
|
196
|
+
for (let i = iteration; i < maxIter; i++)
|
|
197
|
+
costs[i] = pointObjectives[indexes[0]];
|
|
198
|
+
} // if
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
optimalPoint: optParams[indexes[0]],
|
|
202
|
+
iterations: iteration,
|
|
203
|
+
};
|
|
204
|
+
}; // optimizeNM
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// Constants and type definitions for probabilistic scoring (pMPO)
|
|
2
|
-
//
|
|
2
|
+
// Source paper https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/
|
|
3
3
|
|
|
4
4
|
/** Minimum number of samples required to compute pMPO */
|
|
5
5
|
export const MIN_SAMPLES_COUNT = 10;
|
|
@@ -13,6 +13,8 @@ export type BasicStats = {
|
|
|
13
13
|
desStd: number,
|
|
14
14
|
nonDesAvg: number,
|
|
15
15
|
nonDesStd: number,
|
|
16
|
+
min: number,
|
|
17
|
+
max: number,
|
|
16
18
|
};
|
|
17
19
|
|
|
18
20
|
/** Descriptor statistics including basic stats, t-statistics and p-value */
|
|
@@ -44,6 +46,7 @@ export type PmpoParams = BasicStats & Cutoff & SigmoidParams & {
|
|
|
44
46
|
intersections: number[],
|
|
45
47
|
x0: number,
|
|
46
48
|
xBound: number,
|
|
49
|
+
inflection: number,
|
|
47
50
|
};
|
|
48
51
|
|
|
49
52
|
export type CorrelationTriple = [string, string, number];
|
|
@@ -74,17 +77,40 @@ export const DESCR_TABLE_TITLE = DESCR_TITLE + ' Statistics';
|
|
|
74
77
|
export const SELECTED_TITLE = 'Selected';
|
|
75
78
|
export const WEIGHT_TITLE = 'Weight';
|
|
76
79
|
export const SCORES_TITLE = 'pMPO score';
|
|
77
|
-
export const DESIRABILITY_COL_NAME = 'Desirability';
|
|
80
|
+
export const DESIRABILITY_COL_NAME = 'Desirability Curve';
|
|
81
|
+
|
|
82
|
+
/** Default p-value threshold for filtering descriptors */
|
|
83
|
+
export const P_VAL_TRES_DEFAULT = 0.001;
|
|
78
84
|
|
|
79
85
|
/** Minimum p-value threshold for filtering descriptors */
|
|
80
|
-
export const P_VAL_TRES_MIN = 0.
|
|
86
|
+
export const P_VAL_TRES_MIN = 0.001;
|
|
87
|
+
|
|
88
|
+
/** Maximum p-value threshold for filtering descriptors */
|
|
89
|
+
export const P_VAL_TRES_MAX = 1;
|
|
90
|
+
|
|
91
|
+
/** Default R-squared threshold for filtering correlated descriptors */
|
|
92
|
+
export const R2_DEFAULT = 0.53;
|
|
81
93
|
|
|
82
94
|
/** Minimum R-squared threshold for filtering correlated descriptors */
|
|
83
95
|
export const R2_MIN = 0.01;
|
|
84
96
|
|
|
97
|
+
/** Maximum R-squared threshold for filtering correlated descriptors */
|
|
98
|
+
export const R2_MAX = 1.0;
|
|
99
|
+
|
|
100
|
+
/** Default q-cutoff for descriptors in the pMPO model */
|
|
101
|
+
export const Q_CUTOFF_DEFAULT = 0.05;
|
|
102
|
+
|
|
85
103
|
/** Minimum q-cutoff for descriptors in the pMPO model */
|
|
86
104
|
export const Q_CUTOFF_MIN = 0.01;
|
|
87
105
|
|
|
106
|
+
/** Maximum q-cutoff for descriptors in the pMPO model */
|
|
107
|
+
export const Q_CUTOFF_MAX = 1;
|
|
108
|
+
|
|
109
|
+
/** Default setting for using sigmoid correction in pMPO */
|
|
110
|
+
export const USE_SIGMOID_DEFAULT = true;
|
|
111
|
+
|
|
112
|
+
export const FORMAT = '0.000';
|
|
113
|
+
|
|
88
114
|
/** Colors used for selected and skipped descriptors */
|
|
89
115
|
export enum COLORS {
|
|
90
116
|
SELECTED = 'rgb(26, 146, 26)',
|
|
@@ -106,3 +132,115 @@ export type DesirabilityProfileProperties = Record<string, {
|
|
|
106
132
|
|
|
107
133
|
export const STAT_GRID_HEIGHT = 75;
|
|
108
134
|
export const DESIRABILITY_COLUMN_WIDTH = 305;
|
|
135
|
+
|
|
136
|
+
const POSITIVE_BASIC_RANGE_SIGMA_COEFFS = [0, 0.25, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5];
|
|
137
|
+
|
|
138
|
+
/** Basic range sigma coefficients for desirability profile points */
|
|
139
|
+
export const BASIC_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS
|
|
140
|
+
.slice(1)
|
|
141
|
+
.map((v) => -v)
|
|
142
|
+
.reverse()
|
|
143
|
+
.concat(POSITIVE_BASIC_RANGE_SIGMA_COEFFS);
|
|
144
|
+
|
|
145
|
+
const EXTRA_RANGE_SIGMA_COEFFS = [0.12, 0.37, 0.63, 0.75, 0.88, 1.25, 1.75, 2.25, 2.75];
|
|
146
|
+
const EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS = POSITIVE_BASIC_RANGE_SIGMA_COEFFS.concat(EXTRA_RANGE_SIGMA_COEFFS).sort();
|
|
147
|
+
|
|
148
|
+
/** Extended range sigma coefficients for desirability profile points */
|
|
149
|
+
export const EXTENDED_RANGE_SIGMA_COEFFS = EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS
|
|
150
|
+
.slice(1)
|
|
151
|
+
.map((v) => -v)
|
|
152
|
+
.reverse()
|
|
153
|
+
.concat(EXTENDED_POSITIVE_RANGE_SIGMA_COEFFS);
|
|
154
|
+
|
|
155
|
+
/** Confusion matrix type */
|
|
156
|
+
export type ConfusionMatrix = {
|
|
157
|
+
TP: number,
|
|
158
|
+
TN: number,
|
|
159
|
+
FP: number,
|
|
160
|
+
FN: number,
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
// Titles for ROC curve columns
|
|
164
|
+
export const TPR_TITLE = 'TPR (Sensitivity)';
|
|
165
|
+
export const FPR_TITLE = 'FPR (1 - Specificity)';
|
|
166
|
+
export const THRESHOLD = 'Threshold';
|
|
167
|
+
|
|
168
|
+
// Number of points in ROC curve
|
|
169
|
+
const ROC_POINTS = 100;
|
|
170
|
+
export const ROC_TRESHOLDS_COUNT = ROC_POINTS + 1;
|
|
171
|
+
|
|
172
|
+
/** ROC curve thresholds from 0.0 to 1.0 */
|
|
173
|
+
export const ROC_TRESHOLDS = new Float32Array(Array.from({length: ROC_TRESHOLDS_COUNT}, (_, i) => i / ROC_POINTS));
|
|
174
|
+
|
|
175
|
+
/** Sample dataframe for pMPO training: https://pmc.ncbi.nlm.nih.gov/articles/PMC4716604/ */
|
|
176
|
+
export const SOURCE_PATH = 'System:AppData/Eda/drugs-props-train.csv';
|
|
177
|
+
|
|
178
|
+
/** Scores of the sample dataframe computed using https://github.com/Merck/pmpo */
|
|
179
|
+
export const SCORES_PATH = 'System:AppData/Eda/drugs-props-train-scores.csv';
|
|
180
|
+
|
|
181
|
+
/** Name of the synthetic drug used in the sample dataframe */
|
|
182
|
+
export const SYNTHETIC_DRUG_NAME = 'Synthetic drug';
|
|
183
|
+
|
|
184
|
+
/** pMPO model evaluation result type */
|
|
185
|
+
export type ModelEvaluationResult = {
|
|
186
|
+
auc: number,
|
|
187
|
+
threshold: number,
|
|
188
|
+
tpr: Float32Array,
|
|
189
|
+
fpr: Float32Array,
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
/** Maximum number of rows for which auto-tuning is applicable */
|
|
193
|
+
export const AUTO_TUNE_MAX_APPLICABLE_ROWS = 10000;
|
|
194
|
+
|
|
195
|
+
/** Default settings for optimization in pMPO parameter tuning */
|
|
196
|
+
export const DEFAULT_OPTIMIZATION_SETTINGS = new Map<string, number>([
|
|
197
|
+
['tolerance', 0.001],
|
|
198
|
+
['maxIter', 25],
|
|
199
|
+
['nonZeroParam', 0.0001],
|
|
200
|
+
['initialScale', 0.02],
|
|
201
|
+
['scaleReflaction', 1],
|
|
202
|
+
['scaleExpansion', 2],
|
|
203
|
+
['scaleContraction', -0.5],
|
|
204
|
+
]);
|
|
205
|
+
|
|
206
|
+
/** Optimal point type for pMPO parameter tuning */
|
|
207
|
+
export type OptimalPoint = {
|
|
208
|
+
pValTresh: number,
|
|
209
|
+
r2Tresh: number,
|
|
210
|
+
qCutoff: number,
|
|
211
|
+
state: 'success' | 'canceled' | 'failed',
|
|
212
|
+
msg: string,
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
/** Minimum bounds for pMPO parameters during optimization */
|
|
216
|
+
export const LOW_PARAMS_BOUNDS = new Float32Array([0.5, Q_CUTOFF_MIN]);
|
|
217
|
+
|
|
218
|
+
/** Maximum bounds for pMPO parameters during optimization */
|
|
219
|
+
export const HIGH_PARAMS_BOUNDS = new Float32Array([R2_MAX, Q_CUTOFF_MAX]);
|
|
220
|
+
|
|
221
|
+
export enum EQUALITY_SIGN {
|
|
222
|
+
GREATER = '>',
|
|
223
|
+
LESS = '<',
|
|
224
|
+
GREATER_OR_EQUAL = '≥',
|
|
225
|
+
LESS_OR_EQUAL = '≤',
|
|
226
|
+
DEFAULT = LESS_OR_EQUAL,
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
export const SIGN_OPTIONS = [
|
|
230
|
+
EQUALITY_SIGN.GREATER,
|
|
231
|
+
EQUALITY_SIGN.LESS,
|
|
232
|
+
EQUALITY_SIGN.GREATER_OR_EQUAL,
|
|
233
|
+
EQUALITY_SIGN.LESS_OR_EQUAL,
|
|
234
|
+
];
|
|
235
|
+
|
|
236
|
+
export const THRESHOLDED_DESIRABILITY_COL_NAME = 'Desirability';
|
|
237
|
+
|
|
238
|
+
export const PREFERABLE_CATEGORIES = ['perfect', 'good', 'true', 't', 'g', 'active', 'a', 'yes', 'y'];
|
|
239
|
+
|
|
240
|
+
export type PmpoInputId = 'descriptors' | 'desirability' | 'threshold' | 'categories';
|
|
241
|
+
export type TooltipContent = string | (() => HTMLElement);
|
|
242
|
+
|
|
243
|
+
export interface PmpoValidationResult {
|
|
244
|
+
valid: boolean;
|
|
245
|
+
errors: Map<PmpoInputId, TooltipContent>;
|
|
246
|
+
}
|