@datagrok/eda 1.4.13 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +7 -5
  2. package/dist/111.js +1 -1
  3. package/dist/111.js.map +1 -1
  4. package/dist/128.js +1 -1
  5. package/dist/128.js.map +1 -1
  6. package/dist/153.js +1 -1
  7. package/dist/153.js.map +1 -1
  8. package/dist/23.js +1 -1
  9. package/dist/23.js.map +1 -1
  10. package/dist/234.js +1 -1
  11. package/dist/234.js.map +1 -1
  12. package/dist/242.js +1 -1
  13. package/dist/242.js.map +1 -1
  14. package/dist/260.js +1 -1
  15. package/dist/260.js.map +1 -1
  16. package/dist/33.js +1 -1
  17. package/dist/33.js.map +1 -1
  18. package/dist/348.js +1 -1
  19. package/dist/348.js.map +1 -1
  20. package/dist/377.js +1 -1
  21. package/dist/377.js.map +1 -1
  22. package/dist/397.js +2 -0
  23. package/dist/397.js.map +1 -0
  24. package/dist/412.js +1 -1
  25. package/dist/412.js.map +1 -1
  26. package/dist/415.js +1 -1
  27. package/dist/415.js.map +1 -1
  28. package/dist/501.js +1 -1
  29. package/dist/501.js.map +1 -1
  30. package/dist/531.js +1 -1
  31. package/dist/531.js.map +1 -1
  32. package/dist/583.js +1 -1
  33. package/dist/583.js.map +1 -1
  34. package/dist/589.js +1 -1
  35. package/dist/589.js.map +1 -1
  36. package/dist/603.js +1 -1
  37. package/dist/603.js.map +1 -1
  38. package/dist/656.js +1 -1
  39. package/dist/656.js.map +1 -1
  40. package/dist/682.js +1 -1
  41. package/dist/682.js.map +1 -1
  42. package/dist/705.js +1 -1
  43. package/dist/705.js.map +1 -1
  44. package/dist/727.js +1 -1
  45. package/dist/727.js.map +1 -1
  46. package/dist/731.js +1 -1
  47. package/dist/731.js.map +1 -1
  48. package/dist/738.js +1 -1
  49. package/dist/738.js.map +1 -1
  50. package/dist/763.js +1 -1
  51. package/dist/763.js.map +1 -1
  52. package/dist/778.js +1 -1
  53. package/dist/778.js.map +1 -1
  54. package/dist/783.js +1 -1
  55. package/dist/783.js.map +1 -1
  56. package/dist/793.js +1 -1
  57. package/dist/793.js.map +1 -1
  58. package/dist/810.js +1 -1
  59. package/dist/810.js.map +1 -1
  60. package/dist/860.js +1 -1
  61. package/dist/860.js.map +1 -1
  62. package/dist/907.js +1 -1
  63. package/dist/907.js.map +1 -1
  64. package/dist/950.js +1 -1
  65. package/dist/950.js.map +1 -1
  66. package/dist/980.js +1 -1
  67. package/dist/980.js.map +1 -1
  68. package/dist/990.js +1 -1
  69. package/dist/990.js.map +1 -1
  70. package/dist/package-test.js +1 -1
  71. package/dist/package-test.js.map +1 -1
  72. package/dist/package.js +1 -1
  73. package/dist/package.js.map +1 -1
  74. package/package.json +5 -5
  75. package/src/package.ts +2 -1
  76. package/src/pareto-optimization/pareto-optimizer.ts +1 -1
  77. package/src/pls/pls-constants.ts +3 -1
  78. package/src/pls/pls-tools.ts +73 -69
  79. package/src/probabilistic-scoring/data-generator.ts +48 -3
  80. package/src/probabilistic-scoring/pmpo-defs.ts +30 -2
  81. package/src/probabilistic-scoring/pmpo-utils.ts +143 -52
  82. package/src/probabilistic-scoring/prob-scoring.ts +475 -102
  83. package/src/probabilistic-scoring/stat-tools.ts +1 -1
  84. package/src/tests/pareto-tests.ts +13 -15
  85. package/src/tests/pmpo-tests.ts +643 -3
  86. package/test-console-output-1.log +221 -93
  87. package/test-record-1.mp4 +0 -0
@@ -1,6 +1,6 @@
1
1
  // Tests for Probabilistic MPO (pMPO)
2
2
  // Reference scores are pre-computed and stored in the 'drugs-props-train-scores.csv' file.
3
- // This scores are computed using the library: https://github.com/Merck/pmpo
3
+ // This scores are computed using the library https://github.com/Merck/pmpo
4
4
 
5
5
  import * as grok from 'datagrok-api/grok';
6
6
  import * as ui from 'datagrok-api/ui';
@@ -11,7 +11,7 @@ import {category, expect, test} from '@datagrok-libraries/test/src/test';
11
11
 
12
12
  import {Pmpo} from '../probabilistic-scoring/prob-scoring';
13
13
  import {P_VAL_TRES_DEFAULT, Q_CUTOFF_DEFAULT, R2_DEFAULT, SCORES_PATH,
14
- SOURCE_PATH} from '../probabilistic-scoring/pmpo-defs';
14
+ SOURCE_PATH, EQUALITY_SIGN} from '../probabilistic-scoring/pmpo-defs';
15
15
  import {getSynteticPmpoData} from '../probabilistic-scoring/data-generator';
16
16
 
17
17
  const TIMEOUT = 10000;
@@ -53,7 +53,7 @@ function getScoreMaxDeviation(sourceDrugCol: DG.Column, sourceScores: DG.Column,
53
53
  return mad;
54
54
  } // getScoreMaxDeviation
55
55
 
56
- category('Probabilistic MPO', () => {
56
+ category('Probabilistic MPO: Computation', () => {
57
57
  // Correctness tests: compare pMPO scores with reference scores
58
58
  PMPO_MODES.forEach((refScoreName) => {
59
59
  const useSigmoid = (refScoreName == SIGMOIDAL);
@@ -155,3 +155,643 @@ category('Probabilistic MPO', () => {
155
155
  expect(descriptors.length, DESCRIPTOR_NAMES.length, 'Inconsistent source data: no enough of columns');
156
156
  }, {timeout: TIMEOUT});
157
157
  });
158
+
159
+ /** Creates a test DataFrame with clearly separated desired/non-desired groups */
160
+ function createValidTestDf(rowCount: number = 20): DG.DataFrame {
161
+ const half = Math.floor(rowCount / 2);
162
+ const desList: boolean[] = [];
163
+ const d1 = new Float64Array(rowCount);
164
+ const d2 = new Float64Array(rowCount);
165
+ const d3 = new Float64Array(rowCount);
166
+
167
+ for (let i = 0; i < rowCount; i++) {
168
+ desList.push(i < half);
169
+ const j = i < half ? i : i - half;
170
+ const t = j / Math.max(half - 1, 1);
171
+ // d1, d2: clearly separated groups → low p-value
172
+ d1[i] = i < half ? 9 + 2 * t : 1 + 2 * t;
173
+ d2[i] = i < half ? 18 + 4 * t : 3 + 4 * t;
174
+ // d3: same distribution in both groups → high p-value
175
+ d3[i] = 5 + 0.1 * j;
176
+ }
177
+
178
+ return DG.DataFrame.fromColumns([
179
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des', desList),
180
+ DG.Column.fromFloat64Array('d1', d1),
181
+ DG.Column.fromFloat64Array('d2', d2),
182
+ DG.Column.fromFloat64Array('d3', d3),
183
+ ]);
184
+ }
185
+
186
+ /** Extracts a ColumnList of named descriptors from the DataFrame */
187
+ function getDescrCols(df: DG.DataFrame, names: string[]): DG.ColumnList {
188
+ return DG.DataFrame.fromColumns(df.columns.byNames(names)).columns;
189
+ }
190
+
191
+ /** Returns default valid params for validateInputs */
192
+ function getValidInputParams(): {
193
+ descriptors: DG.Column[] | null,
194
+ desirability: DG.Column | null,
195
+ threshold: number | null,
196
+ sign: EQUALITY_SIGN,
197
+ desirableCategories: string[] | null,
198
+ pValue: number | null,
199
+ r2: number | null,
200
+ qCutoff: number | null,
201
+ } {
202
+ const df = createValidTestDf();
203
+ return {
204
+ descriptors: df.columns.byNames(['d1', 'd2']),
205
+ desirability: df.col('des')!,
206
+ threshold: null,
207
+ sign: EQUALITY_SIGN.DEFAULT,
208
+ desirableCategories: null,
209
+ pValue: P_VAL_TRES_DEFAULT,
210
+ r2: R2_DEFAULT,
211
+ qCutoff: Q_CUTOFF_DEFAULT,
212
+ };
213
+ }
214
+
215
+ category('Probabilistic MPO: API', () => {
216
+ // --- isApplicable: validates input thresholds, sample count, desirability, and descriptor quality ---
217
+
218
+ // pValThresh = 0.0001 < P_VAL_TRES_MIN (0.001) → rejected
219
+ test('isApplicable: rejects p-value below minimum', async () => {
220
+ const df = createValidTestDf();
221
+ const des = df.col('des')!;
222
+ const descr = getDescrCols(df, ['d1', 'd2']);
223
+ expect(Pmpo.isApplicable(descr, des, 0.0001, R2_DEFAULT, Q_CUTOFF_DEFAULT), false);
224
+ });
225
+
226
+ // r2Tresh = 0.001 < R2_MIN (0.01) → rejected
227
+ test('isApplicable: rejects R² below minimum', async () => {
228
+ const df = createValidTestDf();
229
+ const des = df.col('des')!;
230
+ const descr = getDescrCols(df, ['d1', 'd2']);
231
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, 0.001, Q_CUTOFF_DEFAULT), false);
232
+ });
233
+
234
+ // qCutoff = 0.001 < Q_CUTOFF_MIN (0.01) → rejected
235
+ test('isApplicable: rejects q-cutoff below minimum', async () => {
236
+ const df = createValidTestDf();
237
+ const des = df.col('des')!;
238
+ const descr = getDescrCols(df, ['d1', 'd2']);
239
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, R2_DEFAULT, 0.001), false);
240
+ });
241
+
242
+ // 8 rows < MIN_SAMPLES_COUNT (10) → rejected
243
+ test('isApplicable: rejects too few samples', async () => {
244
+ const df = createValidTestDf(8);
245
+ const des = df.col('des')!;
246
+ const descr = getDescrCols(df, ['d1', 'd2']);
247
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT), false);
248
+ });
249
+
250
+ // All-true desirability → stdev = 0 → rejected
251
+ test('isApplicable: rejects single-category desirability', async () => {
252
+ const n = 20;
253
+ const df = DG.DataFrame.fromColumns([
254
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des', new Array(n).fill(true)),
255
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
256
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
257
+ ]);
258
+ const des = df.col('des')!;
259
+ const descr = getDescrCols(df, ['d1', 'd2']);
260
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT), false);
261
+ });
262
+
263
+ // String column among descriptors → not numerical → rejected
264
+ test('isApplicable: rejects non-numerical descriptor', async () => {
265
+ const n = 20;
266
+ const half = n / 2;
267
+ const df = DG.DataFrame.fromColumns([
268
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des',
269
+ Array.from({length: n}, (_, i) => i < half)),
270
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
271
+ DG.Column.fromStrings('strCol', Array.from({length: n}, (_, i) => 'a' + i)),
272
+ ]);
273
+ const des = df.col('des')!;
274
+ const descr = getDescrCols(df, ['d1', 'strCol']);
275
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT), false);
276
+ });
277
+
278
+ // Both descriptors are constant (stdev = 0) → no non-constant columns → rejected
279
+ test('isApplicable: rejects all-constant descriptors', async () => {
280
+ const n = 20;
281
+ const half = n / 2;
282
+ const df = DG.DataFrame.fromColumns([
283
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des',
284
+ Array.from({length: n}, (_, i) => i < half)),
285
+ DG.Column.fromFloat64Array('c1', new Float64Array(n).fill(5)),
286
+ DG.Column.fromFloat64Array('c2', new Float64Array(n).fill(3)),
287
+ ]);
288
+ const des = df.col('des')!;
289
+ const descr = getDescrCols(df, ['c1', 'c2']);
290
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT), false);
291
+ });
292
+
293
+ // Exactly MIN_SAMPLES_COUNT (10) rows with valid descriptors → accepted
294
+ test('isApplicable: accepts valid data at minimum sample count', async () => {
295
+ const df = createValidTestDf(10);
296
+ const des = df.col('des')!;
297
+ const descr = getDescrCols(df, ['d1', 'd2']);
298
+ expect(Pmpo.isApplicable(descr, des, P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT), true);
299
+ });
300
+
301
+ // --- isTableValid: validates table structure (row count and numeric column variance) ---
302
+
303
+ // 1 row < minimum of 2 → rejected
304
+ test('isTableValid: rejects table with 1 row', async () => {
305
+ const df = DG.DataFrame.fromColumns([
306
+ DG.Column.fromFloat64Array('a', new Float64Array([1])),
307
+ DG.Column.fromFloat64Array('b', new Float64Array([2])),
308
+ ]);
309
+ expect(Pmpo.isTableValid(df, false), false);
310
+ });
311
+
312
+ // All columns filled with a single value → 0 non-constant columns < 2 → rejected
313
+ test('isTableValid: rejects all-constant numeric columns', async () => {
314
+ const n = 10;
315
+ const df = DG.DataFrame.fromColumns([
316
+ DG.Column.fromFloat64Array('a', new Float64Array(n).fill(5)),
317
+ DG.Column.fromFloat64Array('b', new Float64Array(n).fill(3)),
318
+ ]);
319
+ expect(Pmpo.isTableValid(df, false), false);
320
+ });
321
+
322
+ // Only 1 column with variance > 0, need at least 2 → rejected
323
+ test('isTableValid: rejects single non-constant numeric column', async () => {
324
+ const n = 10;
325
+ const df = DG.DataFrame.fromColumns([
326
+ DG.Column.fromFloat64Array('a', Float64Array.from({length: n}, (_, i) => i)),
327
+ DG.Column.fromFloat64Array('b', new Float64Array(n).fill(3)),
328
+ ]);
329
+ expect(Pmpo.isTableValid(df, false), false);
330
+ });
331
+
332
+ // Exactly 2 columns with variance > 0 → minimum met → accepted
333
+ test('isTableValid: accepts two non-constant numeric columns', async () => {
334
+ const n = 10;
335
+ const df = DG.DataFrame.fromColumns([
336
+ DG.Column.fromFloat64Array('a', Float64Array.from({length: n}, (_, i) => i)),
337
+ DG.Column.fromFloat64Array('b', Float64Array.from({length: n}, (_, i) => i * 2)),
338
+ ]);
339
+ expect(Pmpo.isTableValid(df, false), true);
340
+ });
341
+
342
+ // --- fit: trains pMPO model, computes statistics, filters descriptors by p-value and correlation ---
343
+
344
+ // Valid data with well-separated groups → at least one descriptor selected
345
+ test('fit: returns non-empty params', async () => {
346
+ const df = createValidTestDf();
347
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
348
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
349
+ expect(trainRes.params.size > 0, true, 'Expected non-empty params');
350
+ });
351
+
352
+ // Weights are z-scores normalized by their sum → must equal 1
353
+ test('fit: weights sum to 1', async () => {
354
+ const df = createValidTestDf();
355
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
356
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
357
+ let sum = 0;
358
+ trainRes.params.forEach((p) => sum += p.weight);
359
+ expect(Math.abs(sum - 1.0) < 1e-10, true, `Weights sum ${sum} should equal 1.0`);
360
+ });
361
+
362
+ // Correlation filtering can only remove from p-value-selected set, never add
363
+ test('fit: selectedByCorr is subset of selectedByPvalue', async () => {
364
+ const df = createValidTestDf();
365
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2', 'd3']), df.col('des')!,
366
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
367
+ const allInPvalue = trainRes.selectedByCorr.every((d) => trainRes.selectedByPvalue.includes(d));
368
+ expect(allInPvalue, true, 'selectedByCorr must be a subset of selectedByPvalue');
369
+ });
370
+
371
+ // Statistics table should contain one row per input descriptor (3)
372
+ test('fit: statistics table row count matches descriptor count', async () => {
373
+ const descrNames = ['d1', 'd2', 'd3'];
374
+ const df = createValidTestDf();
375
+ const trainRes = Pmpo.fit(df, getDescrCols(df, descrNames), df.col('des')!,
376
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
377
+ expect(trainRes.descrStatsTable.rowCount, descrNames.length);
378
+ });
379
+
380
+ // 8 rows < MIN_SAMPLES_COUNT → isApplicable fails → fit throws
381
+ test('fit: throws on non-applicable data', async () => {
382
+ const df = createValidTestDf(8); // too few samples
383
+ let threw = false;
384
+ try {
385
+ Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
386
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
387
+ } catch (_) {
388
+ threw = true;
389
+ }
390
+ expect(threw, true, 'Expected fit to throw on non-applicable data');
391
+ });
392
+
393
+ // Both groups have identical distributions → t ≈ 0, p ≈ 1 → all filtered → throws
394
+ test('fit: throws when no descriptors pass p-value filter', async () => {
395
+ const n = 20;
396
+ const half = n / 2;
397
+ // Same distribution in both groups → p-value ≈ 1 → all filtered
398
+ const df = DG.DataFrame.fromColumns([
399
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des', Array.from({length: n}, (_, i) => i < half)),
400
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => (i % half) + 1)),
401
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => ((i % half) + 1) * 2)),
402
+ ]);
403
+ let threw = false;
404
+ try {
405
+ Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
406
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
407
+ } catch (_) {
408
+ threw = true;
409
+ }
410
+ expect(threw, true, 'Expected fit to throw when no descriptors pass p-value filter');
411
+ });
412
+
413
+ // --- predict: applies trained pMPO parameters to compute scores ---
414
+
415
+ // Output column length must match input DataFrame row count
416
+ test('predict: returns column with correct length', async () => {
417
+ const df = createValidTestDf();
418
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
419
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
420
+ const prediction = Pmpo.predict(df, trainRes.params, true, SCORES_NAME);
421
+ expect(prediction.length, df.rowCount);
422
+ });
423
+
424
+ // Scores = sum of weight * gaussian * sigmoid, all components >= 0
425
+ test('predict: scores are non-negative', async () => {
426
+ const df = createValidTestDf();
427
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
428
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
429
+ const prediction = Pmpo.predict(df, trainRes.params, true, SCORES_NAME);
430
+ const raw = prediction.getRawData();
431
+ let allNonNeg = true;
432
+ for (let i = 0; i < raw.length; i++)
433
+ if (raw[i] < 0) {allNonNeg = false; break;}
434
+ expect(allNonNeg, true, 'All scores should be non-negative');
435
+ });
436
+
437
+ // Weights sum to 1, gaussian in [0,1], sigmoid in [0,1] → score <= 1
438
+ test('predict: scores do not exceed 1', async () => {
439
+ const df = createValidTestDf();
440
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
441
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
442
+ const prediction = Pmpo.predict(df, trainRes.params, true, SCORES_NAME);
443
+ const raw = prediction.getRawData();
444
+ let maxScore = 0;
445
+ for (let i = 0; i < raw.length; i++) maxScore = Math.max(maxScore, raw[i]);
446
+ expect(maxScore <= 1.0 + 1e-10, true, `Max score ${maxScore} should not exceed 1.0`);
447
+ });
448
+
449
+ // Sigmoid correction divides by (1 + b*c^(-dx)) → different from pure Gaussian
450
+ test('predict: sigmoid and Gaussian modes produce different scores', async () => {
451
+ const df = createValidTestDf();
452
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
453
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
454
+ const sigScores = Pmpo.predict(df, trainRes.params, true, 'sig').getRawData();
455
+ const gauScores = Pmpo.predict(df, trainRes.params, false, 'gau').getRawData();
456
+ let differ = false;
457
+ for (let i = 0; i < df.rowCount; i++)
458
+ if (Math.abs(sigScores[i] - gauScores[i]) > 1e-12) {differ = true; break;}
459
+ expect(differ, true, 'Sigmoid and Gaussian modes should produce different scores');
460
+ });
461
+
462
+ // Params reference descriptor columns not present in the target DataFrame → throws
463
+ test('predict: throws for missing column', async () => {
464
+ const df = createValidTestDf();
465
+ const trainRes = Pmpo.fit(df, getDescrCols(df, ['d1', 'd2']), df.col('des')!,
466
+ P_VAL_TRES_DEFAULT, R2_DEFAULT, Q_CUTOFF_DEFAULT);
467
+ const incompleteDf = DG.DataFrame.fromColumns([
468
+ DG.Column.fromFloat64Array('other', Float64Array.from({length: 5}, (_, i) => i)),
469
+ ]);
470
+ let threw = false;
471
+ try {
472
+ Pmpo.predict(incompleteDf, trainRes.params, true, SCORES_NAME);
473
+ } catch (_) {
474
+ threw = true;
475
+ }
476
+ expect(threw, true, 'Expected predict to throw for missing column');
477
+ });
478
+
479
+ // --- validateInputs ---
480
+
481
+ // --- Settings validation ---
482
+
483
+ test('validateInputs: rejects null p-value', async () => {
484
+ const params = getValidInputParams();
485
+ params.pValue = null;
486
+ const result = Pmpo.validateInputs(params);
487
+ expect(result.valid, false);
488
+ expect(result.errors.size, 0, 'No input-specific errors for null settings');
489
+ });
490
+
491
+ test('validateInputs: rejects null R²', async () => {
492
+ const params = getValidInputParams();
493
+ params.r2 = null;
494
+ expect(Pmpo.validateInputs(params).valid, false);
495
+ });
496
+
497
+ test('validateInputs: rejects null q-cutoff', async () => {
498
+ const params = getValidInputParams();
499
+ params.qCutoff = null;
500
+ expect(Pmpo.validateInputs(params).valid, false);
501
+ });
502
+
503
+ test('validateInputs: rejects p-value out of range', async () => {
504
+ const params = getValidInputParams();
505
+ params.pValue = 0;
506
+ expect(Pmpo.validateInputs(params).valid, false);
507
+ params.pValue = 1.5;
508
+ expect(Pmpo.validateInputs(params).valid, false);
509
+ });
510
+
511
+ test('validateInputs: rejects R² out of range', async () => {
512
+ const params = getValidInputParams();
513
+ params.r2 = -0.1;
514
+ expect(Pmpo.validateInputs(params).valid, false);
515
+ params.r2 = 1.5;
516
+ expect(Pmpo.validateInputs(params).valid, false);
517
+ });
518
+
519
+ test('validateInputs: rejects q-cutoff out of range', async () => {
520
+ const params = getValidInputParams();
521
+ params.qCutoff = 0;
522
+ expect(Pmpo.validateInputs(params).valid, false);
523
+ params.qCutoff = 1.5;
524
+ expect(Pmpo.validateInputs(params).valid, false);
525
+ });
526
+
527
+ // --- Column input validation ---
528
+
529
+ test('validateInputs: rejects null descriptors', async () => {
530
+ const params = getValidInputParams();
531
+ params.descriptors = null;
532
+ expect(Pmpo.validateInputs(params).valid, false);
533
+ });
534
+
535
+ test('validateInputs: rejects null desirability', async () => {
536
+ const params = getValidInputParams();
537
+ params.desirability = null;
538
+ expect(Pmpo.validateInputs(params).valid, false);
539
+ });
540
+
541
+ test('validateInputs: rejects empty descriptors', async () => {
542
+ const params = getValidInputParams();
543
+ params.descriptors = [];
544
+ const result = Pmpo.validateInputs(params);
545
+ expect(result.valid, false);
546
+ expect(result.errors.has('descriptors'), true);
547
+ });
548
+
549
+ // --- Descriptor quality validation ---
550
+
551
+ test('validateInputs: rejects desirability among descriptors', async () => {
552
+ const df = createValidTestDf();
553
+ const des = df.col('des')!;
554
+ const result = Pmpo.validateInputs({
555
+ descriptors: [df.col('d1')!, des],
556
+ desirability: des,
557
+ threshold: null,
558
+ sign: EQUALITY_SIGN.DEFAULT,
559
+ desirableCategories: null,
560
+ pValue: P_VAL_TRES_DEFAULT,
561
+ r2: R2_DEFAULT,
562
+ qCutoff: Q_CUTOFF_DEFAULT,
563
+ });
564
+ expect(result.valid, false);
565
+ expect(result.errors.has('descriptors'), true);
566
+ expect(result.errors.has('desirability'), true);
567
+ });
568
+
569
+ test('validateInputs: rejects zero-variance descriptors', async () => {
570
+ const n = 20;
571
+ const half = n / 2;
572
+ const df = DG.DataFrame.fromColumns([
573
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des', Array.from({length: n}, (_, i) => i < half)),
574
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
575
+ DG.Column.fromFloat64Array('constCol', new Float64Array(n).fill(5)),
576
+ ]);
577
+ const result = Pmpo.validateInputs({
578
+ descriptors: [df.col('d1')!, df.col('constCol')!],
579
+ desirability: df.col('des')!,
580
+ threshold: null,
581
+ sign: EQUALITY_SIGN.DEFAULT,
582
+ desirableCategories: null,
583
+ pValue: P_VAL_TRES_DEFAULT,
584
+ r2: R2_DEFAULT,
585
+ qCutoff: Q_CUTOFF_DEFAULT,
586
+ });
587
+ expect(result.valid, false);
588
+ expect(result.errors.has('descriptors'), true);
589
+ });
590
+
591
+ // --- Boolean desirability validation ---
592
+
593
+ test('validateInputs: accepts valid boolean desirability', async () => {
594
+ const params = getValidInputParams();
595
+ const result = Pmpo.validateInputs(params);
596
+ expect(result.valid, true);
597
+ expect(result.errors.size, 0);
598
+ });
599
+
600
+ test('validateInputs: rejects all-true boolean desirability', async () => {
601
+ const n = 20;
602
+ const df = DG.DataFrame.fromColumns([
603
+ DG.Column.fromList(DG.COLUMN_TYPE.BOOL, 'des', new Array(n).fill(true)),
604
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
605
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
606
+ ]);
607
+ const result = Pmpo.validateInputs({
608
+ descriptors: [df.col('d1')!, df.col('d2')!],
609
+ desirability: df.col('des')!,
610
+ threshold: null,
611
+ sign: EQUALITY_SIGN.DEFAULT,
612
+ desirableCategories: null,
613
+ pValue: P_VAL_TRES_DEFAULT,
614
+ r2: R2_DEFAULT,
615
+ qCutoff: Q_CUTOFF_DEFAULT,
616
+ });
617
+ expect(result.valid, false);
618
+ expect(result.errors.has('desirability'), true);
619
+ });
620
+
621
+ // --- String desirability validation ---
622
+
623
+ test('validateInputs: rejects string desirability with single category', async () => {
624
+ const n = 20;
625
+ const df = DG.DataFrame.fromColumns([
626
+ DG.Column.fromStrings('des', new Array(n).fill('active')),
627
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
628
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
629
+ ]);
630
+ const result = Pmpo.validateInputs({
631
+ descriptors: [df.col('d1')!, df.col('d2')!],
632
+ desirability: df.col('des')!,
633
+ threshold: null,
634
+ sign: EQUALITY_SIGN.DEFAULT,
635
+ desirableCategories: ['active'],
636
+ pValue: P_VAL_TRES_DEFAULT,
637
+ r2: R2_DEFAULT,
638
+ qCutoff: Q_CUTOFF_DEFAULT,
639
+ });
640
+ expect(result.valid, false);
641
+ expect(result.errors.has('desirability'), true);
642
+ });
643
+
644
+ test('validateInputs: rejects no selected categories', async () => {
645
+ const n = 20;
646
+ const half = n / 2;
647
+ const df = DG.DataFrame.fromColumns([
648
+ DG.Column.fromStrings('des', Array.from({length: n}, (_, i) => i < half ? 'active' : 'inactive')),
649
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
650
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
651
+ ]);
652
+ const result = Pmpo.validateInputs({
653
+ descriptors: [df.col('d1')!, df.col('d2')!],
654
+ desirability: df.col('des')!,
655
+ threshold: null,
656
+ sign: EQUALITY_SIGN.DEFAULT,
657
+ desirableCategories: [],
658
+ pValue: P_VAL_TRES_DEFAULT,
659
+ r2: R2_DEFAULT,
660
+ qCutoff: Q_CUTOFF_DEFAULT,
661
+ });
662
+ expect(result.valid, false);
663
+ expect(result.errors.has('desirability'), true);
664
+ });
665
+
666
+ test('validateInputs: rejects all categories selected', async () => {
667
+ const n = 20;
668
+ const half = n / 2;
669
+ const df = DG.DataFrame.fromColumns([
670
+ DG.Column.fromStrings('des', Array.from({length: n}, (_, i) => i < half ? 'active' : 'inactive')),
671
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
672
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
673
+ ]);
674
+ const result = Pmpo.validateInputs({
675
+ descriptors: [df.col('d1')!, df.col('d2')!],
676
+ desirability: df.col('des')!,
677
+ threshold: null,
678
+ sign: EQUALITY_SIGN.DEFAULT,
679
+ desirableCategories: ['active', 'inactive'],
680
+ pValue: P_VAL_TRES_DEFAULT,
681
+ r2: R2_DEFAULT,
682
+ qCutoff: Q_CUTOFF_DEFAULT,
683
+ });
684
+ expect(result.valid, false);
685
+ expect(result.errors.has('desirability'), true);
686
+ });
687
+
688
+ test('validateInputs: accepts valid string desirability', async () => {
689
+ const n = 20;
690
+ const half = n / 2;
691
+ const df = DG.DataFrame.fromColumns([
692
+ DG.Column.fromStrings('des', Array.from({length: n}, (_, i) => i < half ? 'active' : 'inactive')),
693
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i < half ? i + 10 : i)),
694
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i < half ? i * 3 : i)),
695
+ ]);
696
+ const result = Pmpo.validateInputs({
697
+ descriptors: [df.col('d1')!, df.col('d2')!],
698
+ desirability: df.col('des')!,
699
+ threshold: null,
700
+ sign: EQUALITY_SIGN.DEFAULT,
701
+ desirableCategories: ['active'],
702
+ pValue: P_VAL_TRES_DEFAULT,
703
+ r2: R2_DEFAULT,
704
+ qCutoff: Q_CUTOFF_DEFAULT,
705
+ });
706
+ expect(result.valid, true);
707
+ expect(result.errors.size, 0);
708
+ });
709
+
710
+ // --- Numeric desirability validation ---
711
+
712
+ test('validateInputs: rejects constant numeric desirability', async () => {
713
+ const n = 20;
714
+ const df = DG.DataFrame.fromColumns([
715
+ DG.Column.fromFloat64Array('des', new Float64Array(n).fill(5)),
716
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
717
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
718
+ ]);
719
+ const result = Pmpo.validateInputs({
720
+ descriptors: [df.col('d1')!, df.col('d2')!],
721
+ desirability: df.col('des')!,
722
+ threshold: 5,
723
+ sign: EQUALITY_SIGN.DEFAULT,
724
+ desirableCategories: null,
725
+ pValue: P_VAL_TRES_DEFAULT,
726
+ r2: R2_DEFAULT,
727
+ qCutoff: Q_CUTOFF_DEFAULT,
728
+ });
729
+ expect(result.valid, false);
730
+ expect(result.errors.has('desirability'), true);
731
+ });
732
+
733
+ test('validateInputs: rejects null threshold for numeric desirability', async () => {
734
+ const n = 20;
735
+ const df = DG.DataFrame.fromColumns([
736
+ DG.Column.fromFloat64Array('des', Float64Array.from({length: n}, (_, i) => i)),
737
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
738
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
739
+ ]);
740
+ const result = Pmpo.validateInputs({
741
+ descriptors: [df.col('d1')!, df.col('d2')!],
742
+ desirability: df.col('des')!,
743
+ threshold: null,
744
+ sign: EQUALITY_SIGN.DEFAULT,
745
+ desirableCategories: null,
746
+ pValue: P_VAL_TRES_DEFAULT,
747
+ r2: R2_DEFAULT,
748
+ qCutoff: Q_CUTOFF_DEFAULT,
749
+ });
750
+ expect(result.valid, false);
751
+ expect(result.errors.has('desirability'), true);
752
+ });
753
+
754
+ test('validateInputs: rejects threshold producing single group', async () => {
755
+ const n = 20;
756
+ // All values in [0, 19], threshold 100 with <= → all desired, none non-desired
757
+ const df = DG.DataFrame.fromColumns([
758
+ DG.Column.fromFloat64Array('des', Float64Array.from({length: n}, (_, i) => i)),
759
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
760
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
761
+ ]);
762
+ const result = Pmpo.validateInputs({
763
+ descriptors: [df.col('d1')!, df.col('d2')!],
764
+ desirability: df.col('des')!,
765
+ threshold: 100,
766
+ sign: EQUALITY_SIGN.LESS_OR_EQUAL,
767
+ desirableCategories: null,
768
+ pValue: P_VAL_TRES_DEFAULT,
769
+ r2: R2_DEFAULT,
770
+ qCutoff: Q_CUTOFF_DEFAULT,
771
+ });
772
+ expect(result.valid, false);
773
+ expect(result.errors.has('desirability'), true);
774
+ expect(result.errors.has('threshold'), true);
775
+ });
776
+
777
+ test('validateInputs: accepts valid numeric desirability with threshold', async () => {
778
+ const n = 20;
779
+ const df = DG.DataFrame.fromColumns([
780
+ DG.Column.fromFloat64Array('des', Float64Array.from({length: n}, (_, i) => i)),
781
+ DG.Column.fromFloat64Array('d1', Float64Array.from({length: n}, (_, i) => i + 1)),
782
+ DG.Column.fromFloat64Array('d2', Float64Array.from({length: n}, (_, i) => i * 2)),
783
+ ]);
784
+ const result = Pmpo.validateInputs({
785
+ descriptors: [df.col('d1')!, df.col('d2')!],
786
+ desirability: df.col('des')!,
787
+ threshold: 10,
788
+ sign: EQUALITY_SIGN.LESS_OR_EQUAL,
789
+ desirableCategories: null,
790
+ pValue: P_VAL_TRES_DEFAULT,
791
+ r2: R2_DEFAULT,
792
+ qCutoff: Q_CUTOFF_DEFAULT,
793
+ });
794
+ expect(result.valid, true);
795
+ expect(result.errors.size, 0);
796
+ });
797
+ });