deepbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +344 -0
- package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
- package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
- package/dist/Tensor-BQLk1ltW.d.cts +147 -0
- package/dist/Tensor-g8mUClel.d.ts +147 -0
- package/dist/chunk-4S73VUBD.js +677 -0
- package/dist/chunk-4S73VUBD.js.map +1 -0
- package/dist/chunk-5R4S63PF.js +2925 -0
- package/dist/chunk-5R4S63PF.js.map +1 -0
- package/dist/chunk-6AE5FKKQ.cjs +9264 -0
- package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
- package/dist/chunk-AD436M45.js +3854 -0
- package/dist/chunk-AD436M45.js.map +1 -0
- package/dist/chunk-ALS7ETWZ.cjs +4263 -0
- package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
- package/dist/chunk-AU7XHGKJ.js +2092 -0
- package/dist/chunk-AU7XHGKJ.js.map +1 -0
- package/dist/chunk-B5TNKUEY.js +1481 -0
- package/dist/chunk-B5TNKUEY.js.map +1 -0
- package/dist/chunk-BCR7G3A6.js +9136 -0
- package/dist/chunk-BCR7G3A6.js.map +1 -0
- package/dist/chunk-C4PKXY74.cjs +1917 -0
- package/dist/chunk-C4PKXY74.cjs.map +1 -0
- package/dist/chunk-DWZY6PIP.cjs +6400 -0
- package/dist/chunk-DWZY6PIP.cjs.map +1 -0
- package/dist/chunk-E3EU5FZO.cjs +2113 -0
- package/dist/chunk-E3EU5FZO.cjs.map +1 -0
- package/dist/chunk-F3JWBINJ.js +1054 -0
- package/dist/chunk-F3JWBINJ.js.map +1 -0
- package/dist/chunk-FJYLIGJX.js +1940 -0
- package/dist/chunk-FJYLIGJX.js.map +1 -0
- package/dist/chunk-JSCDE774.cjs +729 -0
- package/dist/chunk-JSCDE774.cjs.map +1 -0
- package/dist/chunk-LWECRCW2.cjs +2412 -0
- package/dist/chunk-LWECRCW2.cjs.map +1 -0
- package/dist/chunk-MLBMYKCG.js +6379 -0
- package/dist/chunk-MLBMYKCG.js.map +1 -0
- package/dist/chunk-OX6QXFMV.cjs +3874 -0
- package/dist/chunk-OX6QXFMV.cjs.map +1 -0
- package/dist/chunk-PHV2DKRS.cjs +1072 -0
- package/dist/chunk-PHV2DKRS.cjs.map +1 -0
- package/dist/chunk-PL7TAYKI.js +4056 -0
- package/dist/chunk-PL7TAYKI.js.map +1 -0
- package/dist/chunk-PR647I7R.js +1898 -0
- package/dist/chunk-PR647I7R.js.map +1 -0
- package/dist/chunk-QERHVCHC.cjs +2960 -0
- package/dist/chunk-QERHVCHC.cjs.map +1 -0
- package/dist/chunk-XEG44RF6.cjs +1514 -0
- package/dist/chunk-XEG44RF6.cjs.map +1 -0
- package/dist/chunk-XMWVME2W.js +2377 -0
- package/dist/chunk-XMWVME2W.js.map +1 -0
- package/dist/chunk-ZB75FESB.cjs +1979 -0
- package/dist/chunk-ZB75FESB.cjs.map +1 -0
- package/dist/chunk-ZLW62TJG.cjs +4061 -0
- package/dist/chunk-ZLW62TJG.cjs.map +1 -0
- package/dist/chunk-ZXKBDFP3.js +4235 -0
- package/dist/chunk-ZXKBDFP3.js.map +1 -0
- package/dist/core/index.cjs +204 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +2 -0
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.js +3 -0
- package/dist/core/index.js.map +1 -0
- package/dist/dataframe/index.cjs +22 -0
- package/dist/dataframe/index.cjs.map +1 -0
- package/dist/dataframe/index.d.cts +3 -0
- package/dist/dataframe/index.d.ts +3 -0
- package/dist/dataframe/index.js +5 -0
- package/dist/dataframe/index.js.map +1 -0
- package/dist/datasets/index.cjs +134 -0
- package/dist/datasets/index.cjs.map +1 -0
- package/dist/datasets/index.d.cts +3 -0
- package/dist/datasets/index.d.ts +3 -0
- package/dist/datasets/index.js +5 -0
- package/dist/datasets/index.js.map +1 -0
- package/dist/index-74AB8Cyh.d.cts +1126 -0
- package/dist/index-9oQx1HgV.d.cts +1180 -0
- package/dist/index-BJY2SI4i.d.ts +483 -0
- package/dist/index-BWGhrDlr.d.ts +733 -0
- package/dist/index-B_DK4FKY.d.cts +242 -0
- package/dist/index-BbA2Gxfl.d.ts +456 -0
- package/dist/index-BgHYAoSS.d.cts +837 -0
- package/dist/index-BndMbqsM.d.ts +1439 -0
- package/dist/index-C1mfVYoo.d.ts +2517 -0
- package/dist/index-CCvlwAmL.d.cts +809 -0
- package/dist/index-CDw5CnOU.d.ts +785 -0
- package/dist/index-Cn3SdB0O.d.ts +1126 -0
- package/dist/index-CrqLlS-a.d.ts +776 -0
- package/dist/index-D61yaSMY.d.cts +483 -0
- package/dist/index-D9Loo1_A.d.cts +2517 -0
- package/dist/index-DIT_OO9C.d.cts +785 -0
- package/dist/index-DIp_RrRt.d.ts +242 -0
- package/dist/index-DbultU6X.d.cts +1427 -0
- package/dist/index-DmEg_LCm.d.cts +776 -0
- package/dist/index-DoPWVxPo.d.cts +1439 -0
- package/dist/index-DuCxd-8d.d.ts +837 -0
- package/dist/index-Dx42TZaY.d.ts +809 -0
- package/dist/index-DyZ4QQf5.d.cts +456 -0
- package/dist/index-GFAVyOWO.d.ts +1427 -0
- package/dist/index-WHQLn0e8.d.cts +733 -0
- package/dist/index-ZtI1Iy4L.d.ts +1180 -0
- package/dist/index-eJgeni9c.d.cts +1911 -0
- package/dist/index-tk4lSYod.d.ts +1911 -0
- package/dist/index.cjs +72 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/linalg/index.cjs +86 -0
- package/dist/linalg/index.cjs.map +1 -0
- package/dist/linalg/index.d.cts +3 -0
- package/dist/linalg/index.d.ts +3 -0
- package/dist/linalg/index.js +5 -0
- package/dist/linalg/index.js.map +1 -0
- package/dist/metrics/index.cjs +158 -0
- package/dist/metrics/index.cjs.map +1 -0
- package/dist/metrics/index.d.cts +3 -0
- package/dist/metrics/index.d.ts +3 -0
- package/dist/metrics/index.js +5 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/ml/index.cjs +87 -0
- package/dist/ml/index.cjs.map +1 -0
- package/dist/ml/index.d.cts +3 -0
- package/dist/ml/index.d.ts +3 -0
- package/dist/ml/index.js +6 -0
- package/dist/ml/index.js.map +1 -0
- package/dist/ndarray/index.cjs +501 -0
- package/dist/ndarray/index.cjs.map +1 -0
- package/dist/ndarray/index.d.cts +5 -0
- package/dist/ndarray/index.d.ts +5 -0
- package/dist/ndarray/index.js +4 -0
- package/dist/ndarray/index.js.map +1 -0
- package/dist/nn/index.cjs +142 -0
- package/dist/nn/index.cjs.map +1 -0
- package/dist/nn/index.d.cts +6 -0
- package/dist/nn/index.d.ts +6 -0
- package/dist/nn/index.js +5 -0
- package/dist/nn/index.js.map +1 -0
- package/dist/optim/index.cjs +77 -0
- package/dist/optim/index.cjs.map +1 -0
- package/dist/optim/index.d.cts +4 -0
- package/dist/optim/index.d.ts +4 -0
- package/dist/optim/index.js +4 -0
- package/dist/optim/index.js.map +1 -0
- package/dist/plot/index.cjs +114 -0
- package/dist/plot/index.cjs.map +1 -0
- package/dist/plot/index.d.cts +6 -0
- package/dist/plot/index.d.ts +6 -0
- package/dist/plot/index.js +5 -0
- package/dist/plot/index.js.map +1 -0
- package/dist/preprocess/index.cjs +82 -0
- package/dist/preprocess/index.cjs.map +1 -0
- package/dist/preprocess/index.d.cts +4 -0
- package/dist/preprocess/index.d.ts +4 -0
- package/dist/preprocess/index.js +5 -0
- package/dist/preprocess/index.js.map +1 -0
- package/dist/random/index.cjs +74 -0
- package/dist/random/index.cjs.map +1 -0
- package/dist/random/index.d.cts +3 -0
- package/dist/random/index.d.ts +3 -0
- package/dist/random/index.js +5 -0
- package/dist/random/index.js.map +1 -0
- package/dist/stats/index.cjs +142 -0
- package/dist/stats/index.cjs.map +1 -0
- package/dist/stats/index.d.cts +3 -0
- package/dist/stats/index.d.ts +3 -0
- package/dist/stats/index.js +5 -0
- package/dist/stats/index.js.map +1 -0
- package/dist/tensor-B96jjJLQ.d.cts +205 -0
- package/dist/tensor-B96jjJLQ.d.ts +205 -0
- package/package.json +226 -0
|
@@ -0,0 +1,1979 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var chunk6AE5FKKQ_cjs = require('./chunk-6AE5FKKQ.cjs');
|
|
4
|
+
var chunkJSCDE774_cjs = require('./chunk-JSCDE774.cjs');
|
|
5
|
+
|
|
6
|
+
// src/metrics/index.ts
|
|
7
|
+
var metrics_exports = {};
|
|
8
|
+
chunkJSCDE774_cjs.__export(metrics_exports, {
|
|
9
|
+
accuracy: () => accuracy,
|
|
10
|
+
adjustedMutualInfoScore: () => adjustedMutualInfoScore,
|
|
11
|
+
adjustedR2Score: () => adjustedR2Score,
|
|
12
|
+
adjustedRandScore: () => adjustedRandScore,
|
|
13
|
+
averagePrecisionScore: () => averagePrecisionScore,
|
|
14
|
+
balancedAccuracyScore: () => balancedAccuracyScore,
|
|
15
|
+
calinskiHarabaszScore: () => calinskiHarabaszScore,
|
|
16
|
+
classificationReport: () => classificationReport,
|
|
17
|
+
cohenKappaScore: () => cohenKappaScore,
|
|
18
|
+
completenessScore: () => completenessScore,
|
|
19
|
+
confusionMatrix: () => confusionMatrix,
|
|
20
|
+
daviesBouldinScore: () => daviesBouldinScore,
|
|
21
|
+
explainedVarianceScore: () => explainedVarianceScore,
|
|
22
|
+
f1Score: () => f1Score,
|
|
23
|
+
fbetaScore: () => fbetaScore,
|
|
24
|
+
fowlkesMallowsScore: () => fowlkesMallowsScore,
|
|
25
|
+
hammingLoss: () => hammingLoss,
|
|
26
|
+
homogeneityScore: () => homogeneityScore,
|
|
27
|
+
jaccardScore: () => jaccardScore,
|
|
28
|
+
logLoss: () => logLoss,
|
|
29
|
+
mae: () => mae,
|
|
30
|
+
mape: () => mape,
|
|
31
|
+
matthewsCorrcoef: () => matthewsCorrcoef,
|
|
32
|
+
maxError: () => maxError,
|
|
33
|
+
medianAbsoluteError: () => medianAbsoluteError,
|
|
34
|
+
mse: () => mse,
|
|
35
|
+
normalizedMutualInfoScore: () => normalizedMutualInfoScore,
|
|
36
|
+
precision: () => precision,
|
|
37
|
+
precisionRecallCurve: () => precisionRecallCurve,
|
|
38
|
+
r2Score: () => r2Score,
|
|
39
|
+
recall: () => recall,
|
|
40
|
+
rmse: () => rmse,
|
|
41
|
+
rocAucScore: () => rocAucScore,
|
|
42
|
+
rocCurve: () => rocCurve,
|
|
43
|
+
silhouetteSamples: () => silhouetteSamples,
|
|
44
|
+
silhouetteScore: () => silhouetteScore,
|
|
45
|
+
vMeasureScore: () => vMeasureScore
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// src/metrics/_internal.ts
|
|
49
|
+
function computeLogicalStrides(shape) {
|
|
50
|
+
const strides = new Array(shape.length);
|
|
51
|
+
let stride = 1;
|
|
52
|
+
for (let i = shape.length - 1; i >= 0; i--) {
|
|
53
|
+
const dim = shape[i];
|
|
54
|
+
if (dim === void 0) {
|
|
55
|
+
throw new chunkJSCDE774_cjs.ShapeError("Tensor shape must be fully defined");
|
|
56
|
+
}
|
|
57
|
+
strides[i] = stride;
|
|
58
|
+
stride *= dim;
|
|
59
|
+
}
|
|
60
|
+
return strides;
|
|
61
|
+
}
|
|
62
|
+
function createFlatOffsetter(t) {
|
|
63
|
+
const base = t.offset;
|
|
64
|
+
if (t.ndim <= 1) {
|
|
65
|
+
const stride0 = t.strides[0] ?? 1;
|
|
66
|
+
return (flatIndex) => base + flatIndex * stride0;
|
|
67
|
+
}
|
|
68
|
+
const logicalStrides = computeLogicalStrides(t.shape);
|
|
69
|
+
const strides = t.strides;
|
|
70
|
+
return (flatIndex) => {
|
|
71
|
+
let rem = flatIndex;
|
|
72
|
+
let offset = base;
|
|
73
|
+
for (let axis = 0; axis < logicalStrides.length; axis++) {
|
|
74
|
+
const axisLogicalStride = logicalStrides[axis] ?? 1;
|
|
75
|
+
const coord = Math.floor(rem / axisLogicalStride);
|
|
76
|
+
rem -= coord * axisLogicalStride;
|
|
77
|
+
offset += coord * (strides[axis] ?? 0);
|
|
78
|
+
}
|
|
79
|
+
return offset;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
function assertFiniteNumber(value, name, detail) {
|
|
83
|
+
if (!Number.isFinite(value)) {
|
|
84
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
85
|
+
`${name} must contain only finite numbers; found ${String(value)} at ${detail}`
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
function assertVectorLike(t, name) {
|
|
90
|
+
if (t.ndim <= 1) return;
|
|
91
|
+
if (t.ndim === 2 && (t.shape[1] ?? 0) === 1) return;
|
|
92
|
+
throw new chunkJSCDE774_cjs.ShapeError(`${name} must be 1D or a column vector`);
|
|
93
|
+
}
|
|
94
|
+
function assertSameSizeVectors(a, b, nameA, nameB) {
|
|
95
|
+
if (a.size !== b.size) {
|
|
96
|
+
throw new chunkJSCDE774_cjs.ShapeError(
|
|
97
|
+
`${nameA} (size ${a.size}) and ${nameB} (size ${b.size}) must have same size`
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
assertVectorLike(a, nameA);
|
|
101
|
+
assertVectorLike(b, nameB);
|
|
102
|
+
}
|
|
103
|
+
function assertSameSize(a, b, nameA, nameB) {
|
|
104
|
+
if (a.size !== b.size) {
|
|
105
|
+
throw new chunkJSCDE774_cjs.ShapeError(
|
|
106
|
+
`${nameA} (size ${a.size}) and ${nameB} (size ${b.size}) must have same size`
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// src/metrics/classification.ts
|
|
112
|
+
function getNumericLabelData(t) {
|
|
113
|
+
if (t.dtype === "string") {
|
|
114
|
+
throw new chunkJSCDE774_cjs.DTypeError("metrics do not support string labels");
|
|
115
|
+
}
|
|
116
|
+
if (t.dtype === "int64") {
|
|
117
|
+
throw new chunkJSCDE774_cjs.DTypeError("metrics do not support int64 tensors");
|
|
118
|
+
}
|
|
119
|
+
const data = t.data;
|
|
120
|
+
if (!chunkJSCDE774_cjs.isTypedArray(data) || !chunkJSCDE774_cjs.isNumericTypedArray(data)) {
|
|
121
|
+
throw new chunkJSCDE774_cjs.DTypeError("metrics require numeric tensors");
|
|
122
|
+
}
|
|
123
|
+
return data;
|
|
124
|
+
}
|
|
125
|
+
function readNumericLabel(data, offsetter, index, name) {
|
|
126
|
+
const value = chunkJSCDE774_cjs.getNumericElement(data, offsetter(index));
|
|
127
|
+
assertFiniteNumber(value, name, `index ${index}`);
|
|
128
|
+
return value;
|
|
129
|
+
}
|
|
130
|
+
function ensureBinaryValue(value, name, index) {
|
|
131
|
+
if (value !== 0 && value !== 1) {
|
|
132
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
133
|
+
`${name} must contain only binary values (0 or 1); found ${String(value)} at index ${index}`,
|
|
134
|
+
name,
|
|
135
|
+
value
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
function assertBinaryLabels(yTrue, yPred) {
|
|
140
|
+
if (yTrue.dtype === "string" || yPred.dtype === "string") {
|
|
141
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
142
|
+
"classificationReport requires binary numeric labels (0 or 1)",
|
|
143
|
+
"yTrue"
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
147
|
+
const yPredData = getNumericLabelData(yPred);
|
|
148
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
149
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
150
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
151
|
+
const trueVal = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
152
|
+
const predVal = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
153
|
+
ensureBinaryValue(trueVal, "yTrue", i);
|
|
154
|
+
ensureBinaryValue(predVal, "yPred", i);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
function assertComparableLabelTypes(yTrue, yPred) {
|
|
158
|
+
const trueKind = yTrue.dtype === "string" ? "string" : yTrue.dtype === "int64" ? "int64" : "numeric";
|
|
159
|
+
const predKind = yPred.dtype === "string" ? "string" : yPred.dtype === "int64" ? "int64" : "numeric";
|
|
160
|
+
if (trueKind !== predKind) {
|
|
161
|
+
throw new chunkJSCDE774_cjs.DTypeError("yTrue and yPred must use compatible label types");
|
|
162
|
+
}
|
|
163
|
+
return trueKind;
|
|
164
|
+
}
|
|
165
|
+
function readComparableLabel(t, offsetter, index, name) {
|
|
166
|
+
const offset = offsetter(index);
|
|
167
|
+
const data = t.data;
|
|
168
|
+
if (Array.isArray(data)) {
|
|
169
|
+
return chunkJSCDE774_cjs.getStringElement(data, offset);
|
|
170
|
+
}
|
|
171
|
+
if (data instanceof BigInt64Array) {
|
|
172
|
+
return chunkJSCDE774_cjs.getBigIntElement(data, offset);
|
|
173
|
+
}
|
|
174
|
+
if (!chunkJSCDE774_cjs.isTypedArray(data) || !chunkJSCDE774_cjs.isNumericTypedArray(data)) {
|
|
175
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric or string labels`);
|
|
176
|
+
}
|
|
177
|
+
const value = chunkJSCDE774_cjs.getNumericElement(data, offset);
|
|
178
|
+
assertFiniteNumber(value, name, `index ${index}`);
|
|
179
|
+
return value;
|
|
180
|
+
}
|
|
181
|
+
function buildClassStats(yTrue, yPred) {
|
|
182
|
+
assertComparableLabelTypes(yTrue, yPred);
|
|
183
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
184
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
185
|
+
const stats = /* @__PURE__ */ new Map();
|
|
186
|
+
let totalTp = 0;
|
|
187
|
+
let totalFp = 0;
|
|
188
|
+
let totalFn = 0;
|
|
189
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
190
|
+
const trueVal = readComparableLabel(yTrue, trueOffset, i, "yTrue");
|
|
191
|
+
const predVal = readComparableLabel(yPred, predOffset, i, "yPred");
|
|
192
|
+
let trueStats = stats.get(trueVal);
|
|
193
|
+
if (!trueStats) {
|
|
194
|
+
trueStats = { tp: 0, fp: 0, fn: 0, support: 0 };
|
|
195
|
+
stats.set(trueVal, trueStats);
|
|
196
|
+
}
|
|
197
|
+
let predStats = stats.get(predVal);
|
|
198
|
+
if (!predStats) {
|
|
199
|
+
predStats = { tp: 0, fp: 0, fn: 0, support: 0 };
|
|
200
|
+
stats.set(predVal, predStats);
|
|
201
|
+
}
|
|
202
|
+
trueStats.support += 1;
|
|
203
|
+
if (trueVal === predVal) {
|
|
204
|
+
trueStats.tp += 1;
|
|
205
|
+
totalTp += 1;
|
|
206
|
+
} else {
|
|
207
|
+
predStats.fp += 1;
|
|
208
|
+
trueStats.fn += 1;
|
|
209
|
+
totalFp += 1;
|
|
210
|
+
totalFn += 1;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
const classes = Array.from(stats.keys()).sort((a, b) => {
|
|
214
|
+
if (typeof a === "number" && typeof b === "number") return a - b;
|
|
215
|
+
if (typeof a === "string" && typeof b === "string") return a.localeCompare(b);
|
|
216
|
+
if (typeof a === "bigint" && typeof b === "bigint") return a === b ? 0 : a < b ? -1 : 1;
|
|
217
|
+
return String(a).localeCompare(String(b));
|
|
218
|
+
});
|
|
219
|
+
return { classes, stats, totalTp, totalFp, totalFn };
|
|
220
|
+
}
|
|
221
|
+
function accuracy(yTrue, yPred) {
|
|
222
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
223
|
+
if (yTrue.size === 0) return 0;
|
|
224
|
+
assertComparableLabelTypes(yTrue, yPred);
|
|
225
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
226
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
227
|
+
let correct = 0;
|
|
228
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
229
|
+
const trueVal = readComparableLabel(yTrue, trueOffset, i, "yTrue");
|
|
230
|
+
const predVal = readComparableLabel(yPred, predOffset, i, "yPred");
|
|
231
|
+
if (trueVal === predVal) {
|
|
232
|
+
correct++;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return correct / yTrue.size;
|
|
236
|
+
}
|
|
237
|
+
function precision(yTrue, yPred, average = "binary") {
|
|
238
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
239
|
+
if (yTrue.size === 0) return average === null ? [] : 0;
|
|
240
|
+
if (average === "binary") {
|
|
241
|
+
if (yTrue.dtype === "string" || yPred.dtype === "string") {
|
|
242
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
243
|
+
"Binary average requires numeric labels (0/1). Use 'macro', 'micro', or 'weighted' for string labels."
|
|
244
|
+
);
|
|
245
|
+
}
|
|
246
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
247
|
+
const yPredData = getNumericLabelData(yPred);
|
|
248
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
249
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
250
|
+
let tp = 0;
|
|
251
|
+
let fp = 0;
|
|
252
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
253
|
+
const trueVal = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
254
|
+
const predVal = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
255
|
+
ensureBinaryValue(trueVal, "yTrue", i);
|
|
256
|
+
ensureBinaryValue(predVal, "yPred", i);
|
|
257
|
+
if (predVal === 1) {
|
|
258
|
+
if (trueVal === 1) {
|
|
259
|
+
tp++;
|
|
260
|
+
} else {
|
|
261
|
+
fp++;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return tp + fp === 0 ? 0 : tp / (tp + fp);
|
|
266
|
+
}
|
|
267
|
+
const { classes, stats, totalTp, totalFp } = buildClassStats(yTrue, yPred);
|
|
268
|
+
const precisions = [];
|
|
269
|
+
const supports = [];
|
|
270
|
+
for (const cls of classes) {
|
|
271
|
+
const classStats = stats.get(cls);
|
|
272
|
+
const tp = classStats?.tp ?? 0;
|
|
273
|
+
const fp = classStats?.fp ?? 0;
|
|
274
|
+
const support = classStats?.support ?? 0;
|
|
275
|
+
precisions.push(tp + fp === 0 ? 0 : tp / (tp + fp));
|
|
276
|
+
supports.push(support);
|
|
277
|
+
}
|
|
278
|
+
if (average === null) {
|
|
279
|
+
return precisions;
|
|
280
|
+
}
|
|
281
|
+
if (average === "micro") {
|
|
282
|
+
return totalTp + totalFp === 0 ? 0 : totalTp / (totalTp + totalFp);
|
|
283
|
+
}
|
|
284
|
+
if (average === "macro") {
|
|
285
|
+
const sum = precisions.reduce((acc, val) => acc + val, 0);
|
|
286
|
+
return precisions.length === 0 ? 0 : sum / precisions.length;
|
|
287
|
+
}
|
|
288
|
+
if (average === "weighted") {
|
|
289
|
+
let weightedSum = 0;
|
|
290
|
+
let totalSupport = 0;
|
|
291
|
+
for (let i = 0; i < precisions.length; i++) {
|
|
292
|
+
weightedSum += (precisions[i] ?? 0) * (supports[i] ?? 0);
|
|
293
|
+
totalSupport += supports[i] ?? 0;
|
|
294
|
+
}
|
|
295
|
+
return totalSupport === 0 ? 0 : weightedSum / totalSupport;
|
|
296
|
+
}
|
|
297
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
298
|
+
`Invalid average parameter: ${average}. Must be one of: 'binary', 'micro', 'macro', 'weighted', or null`,
|
|
299
|
+
"average",
|
|
300
|
+
average
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
function recall(yTrue, yPred, average = "binary") {
|
|
304
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
305
|
+
if (yTrue.size === 0) return average === null ? [] : 0;
|
|
306
|
+
if (average === "binary") {
|
|
307
|
+
if (yTrue.dtype === "string" || yPred.dtype === "string") {
|
|
308
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
309
|
+
"Binary average requires numeric labels (0/1). Use 'macro', 'micro', or 'weighted' for string labels."
|
|
310
|
+
);
|
|
311
|
+
}
|
|
312
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
313
|
+
const yPredData = getNumericLabelData(yPred);
|
|
314
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
315
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
316
|
+
let tp = 0;
|
|
317
|
+
let fn = 0;
|
|
318
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
319
|
+
const trueVal = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
320
|
+
const predVal = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
321
|
+
ensureBinaryValue(trueVal, "yTrue", i);
|
|
322
|
+
ensureBinaryValue(predVal, "yPred", i);
|
|
323
|
+
if (trueVal === 1) {
|
|
324
|
+
if (predVal === 1) {
|
|
325
|
+
tp++;
|
|
326
|
+
} else {
|
|
327
|
+
fn++;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
return tp + fn === 0 ? 0 : tp / (tp + fn);
|
|
332
|
+
}
|
|
333
|
+
const { classes, stats, totalTp, totalFn } = buildClassStats(yTrue, yPred);
|
|
334
|
+
const recalls = [];
|
|
335
|
+
const supports = [];
|
|
336
|
+
for (const cls of classes) {
|
|
337
|
+
const classStats = stats.get(cls);
|
|
338
|
+
const tp = classStats?.tp ?? 0;
|
|
339
|
+
const fn = classStats?.fn ?? 0;
|
|
340
|
+
const support = classStats?.support ?? 0;
|
|
341
|
+
recalls.push(tp + fn === 0 ? 0 : tp / (tp + fn));
|
|
342
|
+
supports.push(support);
|
|
343
|
+
}
|
|
344
|
+
if (average === null) {
|
|
345
|
+
return recalls;
|
|
346
|
+
}
|
|
347
|
+
if (average === "micro") {
|
|
348
|
+
return totalTp + totalFn === 0 ? 0 : totalTp / (totalTp + totalFn);
|
|
349
|
+
}
|
|
350
|
+
if (average === "macro") {
|
|
351
|
+
const sum = recalls.reduce((acc, val) => acc + val, 0);
|
|
352
|
+
return recalls.length === 0 ? 0 : sum / recalls.length;
|
|
353
|
+
}
|
|
354
|
+
if (average === "weighted") {
|
|
355
|
+
let weightedSum = 0;
|
|
356
|
+
let totalSupport = 0;
|
|
357
|
+
for (let i = 0; i < recalls.length; i++) {
|
|
358
|
+
weightedSum += (recalls[i] ?? 0) * (supports[i] ?? 0);
|
|
359
|
+
totalSupport += supports[i] ?? 0;
|
|
360
|
+
}
|
|
361
|
+
return totalSupport === 0 ? 0 : weightedSum / totalSupport;
|
|
362
|
+
}
|
|
363
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
364
|
+
`Invalid average parameter: ${average}. Must be one of: 'binary', 'micro', 'macro', 'weighted', or null`,
|
|
365
|
+
"average",
|
|
366
|
+
average
|
|
367
|
+
);
|
|
368
|
+
}
|
|
369
|
+
function f1Score(yTrue, yPred, average = "binary") {
|
|
370
|
+
if (average === "binary" || average === "micro") {
|
|
371
|
+
const p = precision(yTrue, yPred, average);
|
|
372
|
+
const r = recall(yTrue, yPred, average);
|
|
373
|
+
return p + r === 0 ? 0 : 2 * p * r / (p + r);
|
|
374
|
+
}
|
|
375
|
+
const prec = precision(yTrue, yPred, null);
|
|
376
|
+
const rec = recall(yTrue, yPred, null);
|
|
377
|
+
const f1Scores = [];
|
|
378
|
+
for (let i = 0; i < prec.length; i++) {
|
|
379
|
+
const p = prec[i] ?? 0;
|
|
380
|
+
const r = rec[i] ?? 0;
|
|
381
|
+
f1Scores.push(p + r === 0 ? 0 : 2 * p * r / (p + r));
|
|
382
|
+
}
|
|
383
|
+
if (average === null) {
|
|
384
|
+
return f1Scores;
|
|
385
|
+
}
|
|
386
|
+
if (f1Scores.length === 0) return 0;
|
|
387
|
+
if (average === "macro") {
|
|
388
|
+
const sum = f1Scores.reduce((acc, val) => acc + val, 0);
|
|
389
|
+
return sum / f1Scores.length;
|
|
390
|
+
}
|
|
391
|
+
if (average === "weighted") {
|
|
392
|
+
const { classes, stats } = buildClassStats(yTrue, yPred);
|
|
393
|
+
let weightedSum = 0;
|
|
394
|
+
let totalSupport = 0;
|
|
395
|
+
for (let i = 0; i < f1Scores.length; i++) {
|
|
396
|
+
const cls = classes[i];
|
|
397
|
+
const support = cls !== void 0 ? stats.get(cls)?.support ?? 0 : 0;
|
|
398
|
+
weightedSum += (f1Scores[i] ?? 0) * support;
|
|
399
|
+
totalSupport += support;
|
|
400
|
+
}
|
|
401
|
+
return totalSupport === 0 ? 0 : weightedSum / totalSupport;
|
|
402
|
+
}
|
|
403
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
404
|
+
`Invalid average parameter: ${average}. Must be one of: 'binary', 'micro', 'macro', 'weighted', or null`,
|
|
405
|
+
"average",
|
|
406
|
+
average
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
function fbetaScore(yTrue, yPred, beta, average = "binary") {
|
|
410
|
+
if (!Number.isFinite(beta) || beta <= 0) {
|
|
411
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("beta must be a positive finite number", "beta", beta);
|
|
412
|
+
}
|
|
413
|
+
const betaSq = beta * beta;
|
|
414
|
+
if (average === "binary" || average === "micro") {
|
|
415
|
+
const p = precision(yTrue, yPred, average);
|
|
416
|
+
const r = recall(yTrue, yPred, average);
|
|
417
|
+
return p + r === 0 ? 0 : (1 + betaSq) * p * r / (betaSq * p + r);
|
|
418
|
+
}
|
|
419
|
+
const prec = precision(yTrue, yPred, null);
|
|
420
|
+
const rec = recall(yTrue, yPred, null);
|
|
421
|
+
const fbetaScores = [];
|
|
422
|
+
for (let i = 0; i < prec.length; i++) {
|
|
423
|
+
const p = prec[i] ?? 0;
|
|
424
|
+
const r = rec[i] ?? 0;
|
|
425
|
+
fbetaScores.push(p + r === 0 ? 0 : (1 + betaSq) * p * r / (betaSq * p + r));
|
|
426
|
+
}
|
|
427
|
+
if (average === null) {
|
|
428
|
+
return fbetaScores;
|
|
429
|
+
}
|
|
430
|
+
if (fbetaScores.length === 0) return 0;
|
|
431
|
+
if (average === "macro") {
|
|
432
|
+
const sum = fbetaScores.reduce((acc, val) => acc + val, 0);
|
|
433
|
+
return sum / fbetaScores.length;
|
|
434
|
+
}
|
|
435
|
+
if (average === "weighted") {
|
|
436
|
+
const { classes, stats } = buildClassStats(yTrue, yPred);
|
|
437
|
+
let weightedSum = 0;
|
|
438
|
+
let totalSupport = 0;
|
|
439
|
+
for (let i = 0; i < fbetaScores.length; i++) {
|
|
440
|
+
const cls = classes[i];
|
|
441
|
+
const support = cls !== void 0 ? stats.get(cls)?.support ?? 0 : 0;
|
|
442
|
+
weightedSum += (fbetaScores[i] ?? 0) * support;
|
|
443
|
+
totalSupport += support;
|
|
444
|
+
}
|
|
445
|
+
return totalSupport === 0 ? 0 : weightedSum / totalSupport;
|
|
446
|
+
}
|
|
447
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
448
|
+
`Invalid average parameter: ${average}. Must be one of: 'binary', 'micro', 'macro', 'weighted', or null`,
|
|
449
|
+
"average",
|
|
450
|
+
average
|
|
451
|
+
);
|
|
452
|
+
}
|
|
453
|
+
function confusionMatrix(yTrue, yPred) {
|
|
454
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
455
|
+
assertComparableLabelTypes(yTrue, yPred);
|
|
456
|
+
if (yTrue.size === 0) {
|
|
457
|
+
return chunk6AE5FKKQ_cjs.tensor([]).reshape([0, 0]);
|
|
458
|
+
}
|
|
459
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
460
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
461
|
+
const labelSet = /* @__PURE__ */ new Set();
|
|
462
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
463
|
+
labelSet.add(readComparableLabel(yTrue, trueOffset, i, "yTrue"));
|
|
464
|
+
labelSet.add(readComparableLabel(yPred, predOffset, i, "yPred"));
|
|
465
|
+
}
|
|
466
|
+
const labels = Array.from(labelSet).sort((a, b) => {
|
|
467
|
+
if (typeof a === "number" && typeof b === "number") return a - b;
|
|
468
|
+
if (typeof a === "string" && typeof b === "string") return a.localeCompare(b);
|
|
469
|
+
if (typeof a === "bigint" && typeof b === "bigint") return a === b ? 0 : a < b ? -1 : 1;
|
|
470
|
+
return String(a).localeCompare(String(b));
|
|
471
|
+
});
|
|
472
|
+
const labelToIndex = /* @__PURE__ */ new Map();
|
|
473
|
+
for (let i = 0; i < labels.length; i++) {
|
|
474
|
+
const label = labels[i];
|
|
475
|
+
if (label === void 0) continue;
|
|
476
|
+
labelToIndex.set(label, i);
|
|
477
|
+
}
|
|
478
|
+
const nClasses = labels.length;
|
|
479
|
+
const matrix = Array.from({ length: nClasses }, () => new Array(nClasses).fill(0));
|
|
480
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
481
|
+
const trueLabel = readComparableLabel(yTrue, trueOffset, i, "yTrue");
|
|
482
|
+
const predLabel = readComparableLabel(yPred, predOffset, i, "yPred");
|
|
483
|
+
const r = labelToIndex.get(trueLabel);
|
|
484
|
+
const c = labelToIndex.get(predLabel);
|
|
485
|
+
if (r === void 0 || c === void 0) continue;
|
|
486
|
+
const row = matrix[r];
|
|
487
|
+
if (row) row[c] = (row[c] ?? 0) + 1;
|
|
488
|
+
}
|
|
489
|
+
return chunk6AE5FKKQ_cjs.tensor(matrix);
|
|
490
|
+
}
|
|
491
|
+
function classificationReport(yTrue, yPred) {
|
|
492
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
493
|
+
if (yTrue.size === 0) return "Classification Report:\n (empty)";
|
|
494
|
+
assertBinaryLabels(yTrue, yPred);
|
|
495
|
+
const { classes, stats } = buildClassStats(yTrue, yPred);
|
|
496
|
+
const precs = precision(yTrue, yPred, null);
|
|
497
|
+
const recs = recall(yTrue, yPred, null);
|
|
498
|
+
const f1s = f1Score(yTrue, yPred, null);
|
|
499
|
+
const acc = accuracy(yTrue, yPred);
|
|
500
|
+
const maxClassLen = Math.max(...classes.map((c) => String(c).length), "Class".length);
|
|
501
|
+
const colWidth = Math.max(12, maxClassLen + 2);
|
|
502
|
+
let report = "Classification Report:\n";
|
|
503
|
+
report += "Class".padEnd(colWidth) + "Precision".padEnd(12) + "Recall".padEnd(12) + "F1-Score".padEnd(12) + "Support\n";
|
|
504
|
+
report += `${"-".repeat(colWidth + 36 + 7)}
|
|
505
|
+
`;
|
|
506
|
+
let totalSupport = 0;
|
|
507
|
+
let weightedPrec = 0;
|
|
508
|
+
let weightedRec = 0;
|
|
509
|
+
let weightedF1 = 0;
|
|
510
|
+
let macroPrec = 0;
|
|
511
|
+
let macroRec = 0;
|
|
512
|
+
let macroF1 = 0;
|
|
513
|
+
for (const [i, cls] of classes.entries()) {
|
|
514
|
+
const p = precs[i] ?? 0;
|
|
515
|
+
const r = recs[i] ?? 0;
|
|
516
|
+
const f1 = f1s[i] ?? 0;
|
|
517
|
+
const s = stats.get(cls)?.support ?? 0;
|
|
518
|
+
totalSupport += s;
|
|
519
|
+
weightedPrec += p * s;
|
|
520
|
+
weightedRec += r * s;
|
|
521
|
+
weightedF1 += f1 * s;
|
|
522
|
+
macroPrec += p;
|
|
523
|
+
macroRec += r;
|
|
524
|
+
macroF1 += f1;
|
|
525
|
+
report += String(cls).padEnd(colWidth) + p.toFixed(4).padEnd(12) + r.toFixed(4).padEnd(12) + f1.toFixed(4).padEnd(12) + String(s) + "\n";
|
|
526
|
+
}
|
|
527
|
+
report += "\n";
|
|
528
|
+
const nClasses = classes.length;
|
|
529
|
+
if (nClasses > 0) {
|
|
530
|
+
macroPrec /= nClasses;
|
|
531
|
+
macroRec /= nClasses;
|
|
532
|
+
macroF1 /= nClasses;
|
|
533
|
+
} else {
|
|
534
|
+
macroPrec = 0;
|
|
535
|
+
macroRec = 0;
|
|
536
|
+
macroF1 = 0;
|
|
537
|
+
}
|
|
538
|
+
weightedPrec = totalSupport === 0 ? 0 : weightedPrec / totalSupport;
|
|
539
|
+
weightedRec = totalSupport === 0 ? 0 : weightedRec / totalSupport;
|
|
540
|
+
weightedF1 = totalSupport === 0 ? 0 : weightedF1 / totalSupport;
|
|
541
|
+
report += "Accuracy".padEnd(colWidth) + "".padEnd(12) + "".padEnd(12) + acc.toFixed(4).padEnd(12) + String(totalSupport) + "\n";
|
|
542
|
+
report += "Macro Avg".padEnd(colWidth) + macroPrec.toFixed(4).padEnd(12) + macroRec.toFixed(4).padEnd(12) + macroF1.toFixed(4).padEnd(12) + String(totalSupport) + "\n";
|
|
543
|
+
report += "Weighted Avg".padEnd(colWidth) + weightedPrec.toFixed(4).padEnd(12) + weightedRec.toFixed(4).padEnd(12) + weightedF1.toFixed(4).padEnd(12) + String(totalSupport);
|
|
544
|
+
return report;
|
|
545
|
+
}
|
|
546
|
+
function rocCurve(yTrue, yScore) {
|
|
547
|
+
assertSameSizeVectors(yTrue, yScore, "yTrue", "yScore");
|
|
548
|
+
const n = yTrue.size;
|
|
549
|
+
if (n === 0) return [chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([])];
|
|
550
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
551
|
+
const yScoreData = getNumericLabelData(yScore);
|
|
552
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
553
|
+
const scoreOffset = createFlatOffsetter(yScore);
|
|
554
|
+
const pairs = [];
|
|
555
|
+
let nPos = 0;
|
|
556
|
+
let nNeg = 0;
|
|
557
|
+
for (let i = 0; i < n; i++) {
|
|
558
|
+
const label = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
559
|
+
ensureBinaryValue(label, "yTrue", i);
|
|
560
|
+
const score = readNumericLabel(yScoreData, scoreOffset, i, "yScore");
|
|
561
|
+
pairs.push({ score, label });
|
|
562
|
+
if (label === 1) nPos++;
|
|
563
|
+
else nNeg++;
|
|
564
|
+
}
|
|
565
|
+
pairs.sort((a, b) => b.score - a.score);
|
|
566
|
+
if (nPos === 0 || nNeg === 0) return [chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([])];
|
|
567
|
+
const fpr = [0];
|
|
568
|
+
const tpr = [0];
|
|
569
|
+
const thresholds = [Infinity];
|
|
570
|
+
let tp = 0;
|
|
571
|
+
let fp = 0;
|
|
572
|
+
let idx = 0;
|
|
573
|
+
while (idx < pairs.length) {
|
|
574
|
+
const threshold = pairs[idx]?.score ?? 0;
|
|
575
|
+
while (idx < pairs.length && (pairs[idx]?.score ?? 0) === threshold) {
|
|
576
|
+
const label = pairs[idx]?.label ?? 0;
|
|
577
|
+
if (label === 1) tp++;
|
|
578
|
+
else fp++;
|
|
579
|
+
idx++;
|
|
580
|
+
}
|
|
581
|
+
fpr.push(fp / nNeg);
|
|
582
|
+
tpr.push(tp / nPos);
|
|
583
|
+
thresholds.push(threshold);
|
|
584
|
+
}
|
|
585
|
+
return [chunk6AE5FKKQ_cjs.tensor(fpr), chunk6AE5FKKQ_cjs.tensor(tpr), chunk6AE5FKKQ_cjs.tensor(thresholds)];
|
|
586
|
+
}
|
|
587
|
+
function rocAucScore(yTrue, yScore) {
|
|
588
|
+
const curves = rocCurve(yTrue, yScore);
|
|
589
|
+
const fprT = curves[0];
|
|
590
|
+
const tprT = curves[1];
|
|
591
|
+
if (!fprT || !tprT || fprT.size === 0 || tprT.size === 0) return 0.5;
|
|
592
|
+
const fprData = getNumericLabelData(fprT);
|
|
593
|
+
const tprData = getNumericLabelData(tprT);
|
|
594
|
+
const fprOffset = createFlatOffsetter(fprT);
|
|
595
|
+
const tprOffset = createFlatOffsetter(tprT);
|
|
596
|
+
let auc = 0;
|
|
597
|
+
let prevX = 0;
|
|
598
|
+
let prevY = 0;
|
|
599
|
+
for (let i = 1; i < fprT.size; i++) {
|
|
600
|
+
const x = readNumericLabel(fprData, fprOffset, i, "fpr");
|
|
601
|
+
const y = readNumericLabel(tprData, tprOffset, i, "tpr");
|
|
602
|
+
auc += (x - prevX) * ((y + prevY) / 2);
|
|
603
|
+
prevX = x;
|
|
604
|
+
prevY = y;
|
|
605
|
+
}
|
|
606
|
+
return auc;
|
|
607
|
+
}
|
|
608
|
+
function precisionRecallCurve(yTrue, yScore) {
|
|
609
|
+
assertSameSizeVectors(yTrue, yScore, "yTrue", "yScore");
|
|
610
|
+
const n = yTrue.size;
|
|
611
|
+
if (n === 0) return [chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([])];
|
|
612
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
613
|
+
const yScoreData = getNumericLabelData(yScore);
|
|
614
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
615
|
+
const scoreOffset = createFlatOffsetter(yScore);
|
|
616
|
+
const pairs = [];
|
|
617
|
+
let nPos = 0;
|
|
618
|
+
for (let i = 0; i < n; i++) {
|
|
619
|
+
const label = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
620
|
+
ensureBinaryValue(label, "yTrue", i);
|
|
621
|
+
const score = readNumericLabel(yScoreData, scoreOffset, i, "yScore");
|
|
622
|
+
pairs.push({ score, label });
|
|
623
|
+
if (label === 1) nPos++;
|
|
624
|
+
}
|
|
625
|
+
pairs.sort((a, b) => b.score - a.score);
|
|
626
|
+
if (nPos === 0) return [chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([]), chunk6AE5FKKQ_cjs.tensor([])];
|
|
627
|
+
const prec = [1];
|
|
628
|
+
const rec = [0];
|
|
629
|
+
const thresholds = [Infinity];
|
|
630
|
+
let tp = 0;
|
|
631
|
+
let fp = 0;
|
|
632
|
+
let idx = 0;
|
|
633
|
+
while (idx < pairs.length) {
|
|
634
|
+
const threshold = pairs[idx]?.score ?? 0;
|
|
635
|
+
while (idx < pairs.length && (pairs[idx]?.score ?? 0) === threshold) {
|
|
636
|
+
const label = pairs[idx]?.label ?? 0;
|
|
637
|
+
if (label === 1) tp++;
|
|
638
|
+
else fp++;
|
|
639
|
+
idx++;
|
|
640
|
+
}
|
|
641
|
+
const precisionVal = tp + fp === 0 ? 1 : tp / (tp + fp);
|
|
642
|
+
const recallVal = tp / nPos;
|
|
643
|
+
prec.push(precisionVal);
|
|
644
|
+
rec.push(recallVal);
|
|
645
|
+
thresholds.push(threshold);
|
|
646
|
+
}
|
|
647
|
+
return [chunk6AE5FKKQ_cjs.tensor(prec), chunk6AE5FKKQ_cjs.tensor(rec), chunk6AE5FKKQ_cjs.tensor(thresholds)];
|
|
648
|
+
}
|
|
649
|
+
function averagePrecisionScore(yTrue, yScore) {
|
|
650
|
+
const curves = precisionRecallCurve(yTrue, yScore);
|
|
651
|
+
const precT = curves[0];
|
|
652
|
+
const recT = curves[1];
|
|
653
|
+
if (!precT || !recT || precT.size === 0 || recT.size === 0) return 0;
|
|
654
|
+
const precData = getNumericLabelData(precT);
|
|
655
|
+
const recData = getNumericLabelData(recT);
|
|
656
|
+
const precOffset = createFlatOffsetter(precT);
|
|
657
|
+
const recOffset = createFlatOffsetter(recT);
|
|
658
|
+
let ap = 0;
|
|
659
|
+
let prevRecall = readNumericLabel(recData, recOffset, 0, "recall");
|
|
660
|
+
for (let i = 1; i < recT.size; i++) {
|
|
661
|
+
const recall2 = readNumericLabel(recData, recOffset, i, "recall");
|
|
662
|
+
const precision2 = readNumericLabel(precData, precOffset, i, "precision");
|
|
663
|
+
const deltaRecall = recall2 - prevRecall;
|
|
664
|
+
if (deltaRecall > 0) {
|
|
665
|
+
ap += deltaRecall * precision2;
|
|
666
|
+
}
|
|
667
|
+
prevRecall = recall2;
|
|
668
|
+
}
|
|
669
|
+
return ap;
|
|
670
|
+
}
|
|
671
|
+
function logLoss(yTrue, yPred) {
|
|
672
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
673
|
+
if (yTrue.size === 0) return 0;
|
|
674
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
675
|
+
const yPredData = getNumericLabelData(yPred);
|
|
676
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
677
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
678
|
+
const eps = 1e-15;
|
|
679
|
+
let loss = 0;
|
|
680
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
681
|
+
const trueVal = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
682
|
+
ensureBinaryValue(trueVal, "yTrue", i);
|
|
683
|
+
const predRaw = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
684
|
+
if (predRaw < 0 || predRaw > 1) {
|
|
685
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
686
|
+
`yPred must contain probabilities in range [0, 1], found ${String(predRaw)} at index ${i}`,
|
|
687
|
+
"yPred",
|
|
688
|
+
predRaw
|
|
689
|
+
);
|
|
690
|
+
}
|
|
691
|
+
const predVal = Math.max(eps, Math.min(1 - eps, predRaw));
|
|
692
|
+
loss -= trueVal * Math.log(predVal) + (1 - trueVal) * Math.log(1 - predVal);
|
|
693
|
+
}
|
|
694
|
+
return loss / yTrue.size;
|
|
695
|
+
}
|
|
696
|
+
function hammingLoss(yTrue, yPred) {
|
|
697
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
698
|
+
if (yTrue.size === 0) return 0;
|
|
699
|
+
assertComparableLabelTypes(yTrue, yPred);
|
|
700
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
701
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
702
|
+
let errors = 0;
|
|
703
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
704
|
+
const trueVal = readComparableLabel(yTrue, trueOffset, i, "yTrue");
|
|
705
|
+
const predVal = readComparableLabel(yPred, predOffset, i, "yPred");
|
|
706
|
+
if (trueVal !== predVal) {
|
|
707
|
+
errors++;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
return errors / yTrue.size;
|
|
711
|
+
}
|
|
712
|
+
function jaccardScore(yTrue, yPred) {
|
|
713
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
714
|
+
if (yTrue.size === 0) return 1;
|
|
715
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
716
|
+
const yPredData = getNumericLabelData(yPred);
|
|
717
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
718
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
719
|
+
let tp = 0, fp = 0, fn = 0;
|
|
720
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
721
|
+
const trueVal = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
722
|
+
const predVal = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
723
|
+
ensureBinaryValue(trueVal, "yTrue", i);
|
|
724
|
+
ensureBinaryValue(predVal, "yPred", i);
|
|
725
|
+
if (trueVal === 1 && predVal === 1) tp++;
|
|
726
|
+
else if (trueVal === 0 && predVal === 1) fp++;
|
|
727
|
+
else if (trueVal === 1 && predVal === 0) fn++;
|
|
728
|
+
}
|
|
729
|
+
return tp + fp + fn === 0 ? 1 : tp / (tp + fp + fn);
|
|
730
|
+
}
|
|
731
|
+
function matthewsCorrcoef(yTrue, yPred) {
|
|
732
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
733
|
+
if (yTrue.size === 0) return 0;
|
|
734
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
735
|
+
const yPredData = getNumericLabelData(yPred);
|
|
736
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
737
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
738
|
+
let tp = 0, tn = 0, fp = 0, fn = 0;
|
|
739
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
740
|
+
const trueVal = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
741
|
+
const predVal = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
742
|
+
ensureBinaryValue(trueVal, "yTrue", i);
|
|
743
|
+
ensureBinaryValue(predVal, "yPred", i);
|
|
744
|
+
if (trueVal === 1 && predVal === 1) tp++;
|
|
745
|
+
else if (trueVal === 0 && predVal === 0) tn++;
|
|
746
|
+
else if (trueVal === 0 && predVal === 1) fp++;
|
|
747
|
+
else if (trueVal === 1 && predVal === 0) fn++;
|
|
748
|
+
}
|
|
749
|
+
const numerator = tp * tn - fp * fn;
|
|
750
|
+
const denominator = Math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
|
|
751
|
+
return denominator === 0 ? 0 : numerator / denominator;
|
|
752
|
+
}
|
|
753
|
+
function cohenKappaScore(yTrue, yPred) {
|
|
754
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
755
|
+
const n = yTrue.size;
|
|
756
|
+
if (n === 0) return 0;
|
|
757
|
+
const yTrueData = getNumericLabelData(yTrue);
|
|
758
|
+
const yPredData = getNumericLabelData(yPred);
|
|
759
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
760
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
761
|
+
let po = 0;
|
|
762
|
+
const trueCount = /* @__PURE__ */ new Map();
|
|
763
|
+
const predCount = /* @__PURE__ */ new Map();
|
|
764
|
+
for (let i = 0; i < n; i++) {
|
|
765
|
+
const t = readNumericLabel(yTrueData, trueOffset, i, "yTrue");
|
|
766
|
+
const p = readNumericLabel(yPredData, predOffset, i, "yPred");
|
|
767
|
+
if (t === p) {
|
|
768
|
+
po++;
|
|
769
|
+
}
|
|
770
|
+
trueCount.set(t, (trueCount.get(t) ?? 0) + 1);
|
|
771
|
+
predCount.set(p, (predCount.get(p) ?? 0) + 1);
|
|
772
|
+
}
|
|
773
|
+
po /= n;
|
|
774
|
+
let pe = 0;
|
|
775
|
+
const allClasses = /* @__PURE__ */ new Set([...trueCount.keys(), ...predCount.keys()]);
|
|
776
|
+
for (const c of allClasses) {
|
|
777
|
+
const trueProb = (trueCount.get(c) ?? 0) / n;
|
|
778
|
+
const predProb = (predCount.get(c) ?? 0) / n;
|
|
779
|
+
pe += trueProb * predProb;
|
|
780
|
+
}
|
|
781
|
+
const denom = 1 - pe;
|
|
782
|
+
if (denom === 0) return po === 1 ? 1 : 0;
|
|
783
|
+
return (po - pe) / denom;
|
|
784
|
+
}
|
|
785
|
+
function balancedAccuracyScore(yTrue, yPred) {
|
|
786
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
787
|
+
if (yTrue.size === 0) return 0;
|
|
788
|
+
const { classes, stats } = buildClassStats(yTrue, yPred);
|
|
789
|
+
let sumRecall = 0;
|
|
790
|
+
let classCount = 0;
|
|
791
|
+
for (const cls of classes) {
|
|
792
|
+
const classStats = stats.get(cls);
|
|
793
|
+
const support = classStats?.support ?? 0;
|
|
794
|
+
if (support === 0) continue;
|
|
795
|
+
const tp = classStats?.tp ?? 0;
|
|
796
|
+
const fn = classStats?.fn ?? 0;
|
|
797
|
+
const recall2 = tp + fn > 0 ? tp / (tp + fn) : 0;
|
|
798
|
+
sumRecall += recall2;
|
|
799
|
+
classCount++;
|
|
800
|
+
}
|
|
801
|
+
return classCount === 0 ? 0 : sumRecall / classCount;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
// src/metrics/clustering.ts
|
|
805
|
+
function isBigIntTypedArray(x) {
|
|
806
|
+
return x instanceof BigInt64Array || x instanceof BigUint64Array;
|
|
807
|
+
}
|
|
808
|
+
function readIndex(arr, index, name) {
|
|
809
|
+
const v = arr[index];
|
|
810
|
+
if (v === void 0) {
|
|
811
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`${name} index out of range: ${index}`);
|
|
812
|
+
}
|
|
813
|
+
return v;
|
|
814
|
+
}
|
|
815
|
+
function getNumericTensorData(t, name) {
|
|
816
|
+
if (t.dtype === "string") {
|
|
817
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric (string tensors not supported)`);
|
|
818
|
+
}
|
|
819
|
+
if (t.dtype === "int64") {
|
|
820
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric (int64 tensors not supported)`);
|
|
821
|
+
}
|
|
822
|
+
const data = t.data;
|
|
823
|
+
if (!chunkJSCDE774_cjs.isTypedArray(data) || !chunkJSCDE774_cjs.isNumericTypedArray(data)) {
|
|
824
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be a numeric tensor`);
|
|
825
|
+
}
|
|
826
|
+
return data;
|
|
827
|
+
}
|
|
828
|
+
function getFeatureAccessor(X) {
|
|
829
|
+
const data = getNumericTensorData(X, "X");
|
|
830
|
+
if (X.ndim === 0 || X.ndim > 2) {
|
|
831
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must be a 1D or 2D tensor");
|
|
832
|
+
}
|
|
833
|
+
const nSamples = X.shape[0] ?? 0;
|
|
834
|
+
const nFeatures = X.ndim === 1 ? 1 : X.shape[1] ?? 0;
|
|
835
|
+
if (nFeatures === 0) {
|
|
836
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have at least one feature");
|
|
837
|
+
}
|
|
838
|
+
const logicalStrides = computeLogicalStrides(X.shape);
|
|
839
|
+
const logical0 = logicalStrides[0];
|
|
840
|
+
if (logical0 === void 0) throw new chunkJSCDE774_cjs.ShapeError("Invalid logical strides");
|
|
841
|
+
const sampleStride = X.strides[0] ?? logical0 ?? nFeatures;
|
|
842
|
+
const featureStride = X.ndim === 1 ? 0 : X.strides[1] ?? logicalStrides[1] ?? 1;
|
|
843
|
+
if (X.ndim === 2 && featureStride === 0) {
|
|
844
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have a non-degenerate feature stride");
|
|
845
|
+
}
|
|
846
|
+
return {
|
|
847
|
+
data,
|
|
848
|
+
nSamples,
|
|
849
|
+
nFeatures,
|
|
850
|
+
sampleStride,
|
|
851
|
+
featureStride,
|
|
852
|
+
offset: X.offset
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
function readLabelValue(labels, offsetter, index, name) {
|
|
856
|
+
const data = labels.data;
|
|
857
|
+
const flat = offsetter(index);
|
|
858
|
+
if (isBigIntTypedArray(data)) {
|
|
859
|
+
const v = data[flat];
|
|
860
|
+
if (v === void 0) {
|
|
861
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`${name} must contain a value for index ${index}`);
|
|
862
|
+
}
|
|
863
|
+
return v;
|
|
864
|
+
}
|
|
865
|
+
if (chunkJSCDE774_cjs.isTypedArray(data) && chunkJSCDE774_cjs.isNumericTypedArray(data)) {
|
|
866
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, flat);
|
|
867
|
+
assertFiniteNumber(v, name, `index ${index}`);
|
|
868
|
+
if ((labels.dtype === "float32" || labels.dtype === "float64") && !Number.isInteger(v)) {
|
|
869
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
870
|
+
`${name} must contain discrete labels; found non-integer ${String(v)} at index ${index}`
|
|
871
|
+
);
|
|
872
|
+
}
|
|
873
|
+
if (labels.dtype === "int64" && (!Number.isInteger(v) || !Number.isSafeInteger(v))) {
|
|
874
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
875
|
+
`${name} contains an int64 value that cannot be represented safely as a number at index ${index}`
|
|
876
|
+
);
|
|
877
|
+
}
|
|
878
|
+
return v;
|
|
879
|
+
}
|
|
880
|
+
if (Array.isArray(data)) {
|
|
881
|
+
const v = data[flat];
|
|
882
|
+
if (v === void 0 || v === null) {
|
|
883
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`${name} must contain a value for index ${index}`);
|
|
884
|
+
}
|
|
885
|
+
if (typeof v === "string") return v;
|
|
886
|
+
if (typeof v === "boolean") return v;
|
|
887
|
+
if (typeof v === "number") {
|
|
888
|
+
assertFiniteNumber(v, name, `index ${index}`);
|
|
889
|
+
if ((labels.dtype === "float32" || labels.dtype === "float64") && !Number.isInteger(v)) {
|
|
890
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
891
|
+
`${name} must contain discrete labels; found non-integer ${String(v)} at index ${index}`
|
|
892
|
+
);
|
|
893
|
+
}
|
|
894
|
+
if (labels.dtype === "int64" && (!Number.isInteger(v) || !Number.isSafeInteger(v))) {
|
|
895
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
896
|
+
`${name} contains an int64 value that cannot be represented safely as a number at index ${index}`
|
|
897
|
+
);
|
|
898
|
+
}
|
|
899
|
+
return v;
|
|
900
|
+
}
|
|
901
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must contain primitive labels (string, boolean, number, bigint)`);
|
|
902
|
+
}
|
|
903
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} has unsupported backing storage for labels`);
|
|
904
|
+
}
|
|
905
|
+
function encodeLabels(labels, name) {
|
|
906
|
+
if (labels.dtype === "string") {
|
|
907
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name}: string labels not supported for clustering metrics`);
|
|
908
|
+
}
|
|
909
|
+
const n = labels.size;
|
|
910
|
+
const codes = new Int32Array(n);
|
|
911
|
+
const offsetter = createFlatOffsetter(labels);
|
|
912
|
+
const map = /* @__PURE__ */ new Map();
|
|
913
|
+
let next = 0;
|
|
914
|
+
for (let i = 0; i < n; i++) {
|
|
915
|
+
const raw = readLabelValue(labels, offsetter, i, name);
|
|
916
|
+
const existing = map.get(raw);
|
|
917
|
+
if (existing === void 0) {
|
|
918
|
+
map.set(raw, next);
|
|
919
|
+
codes[i] = next;
|
|
920
|
+
next++;
|
|
921
|
+
} else {
|
|
922
|
+
codes[i] = existing;
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
return { codes, nClusters: next };
|
|
926
|
+
}
|
|
927
|
+
function comb2(x) {
|
|
928
|
+
return x <= 1 ? 0 : x * (x - 1) / 2;
|
|
929
|
+
}
|
|
930
|
+
function buildContingencyStats(labelsTrue, labelsPred) {
|
|
931
|
+
assertSameSize(labelsTrue, labelsPred, "labelsTrue", "labelsPred");
|
|
932
|
+
const n = labelsTrue.size;
|
|
933
|
+
const encT = encodeLabels(labelsTrue, "labelsTrue");
|
|
934
|
+
const encP = encodeLabels(labelsPred, "labelsPred");
|
|
935
|
+
const trueCodes = encT.codes;
|
|
936
|
+
const predCodes = encP.codes;
|
|
937
|
+
const nTrue = encT.nClusters;
|
|
938
|
+
const nPred = encP.nClusters;
|
|
939
|
+
const trueCount = new Int32Array(nTrue);
|
|
940
|
+
const predCount = new Int32Array(nPred);
|
|
941
|
+
for (let i = 0; i < n; i++) {
|
|
942
|
+
const t = readIndex(trueCodes, i, "trueCodes");
|
|
943
|
+
const p = readIndex(predCodes, i, "predCodes");
|
|
944
|
+
trueCount[t] = (trueCount[t] ?? 0) + 1;
|
|
945
|
+
predCount[p] = (predCount[p] ?? 0) + 1;
|
|
946
|
+
}
|
|
947
|
+
const denseSize = nTrue * nPred;
|
|
948
|
+
const maxDenseCells = 4e6;
|
|
949
|
+
if (denseSize > 0 && denseSize <= maxDenseCells) {
|
|
950
|
+
const contingency2 = new Int32Array(denseSize);
|
|
951
|
+
for (let i = 0; i < n; i++) {
|
|
952
|
+
const t = readIndex(trueCodes, i, "trueCodes");
|
|
953
|
+
const p = readIndex(predCodes, i, "predCodes");
|
|
954
|
+
const idx = t * nPred + p;
|
|
955
|
+
contingency2[idx] = (contingency2[idx] ?? 0) + 1;
|
|
956
|
+
}
|
|
957
|
+
return {
|
|
958
|
+
contingencyDense: contingency2,
|
|
959
|
+
contingencySparse: null,
|
|
960
|
+
trueCount,
|
|
961
|
+
predCount,
|
|
962
|
+
nTrue,
|
|
963
|
+
nPred,
|
|
964
|
+
n
|
|
965
|
+
};
|
|
966
|
+
}
|
|
967
|
+
const contingency = /* @__PURE__ */ new Map();
|
|
968
|
+
for (let i = 0; i < n; i++) {
|
|
969
|
+
const t = readIndex(trueCodes, i, "trueCodes");
|
|
970
|
+
const p = readIndex(predCodes, i, "predCodes");
|
|
971
|
+
const key = t * nPred + p;
|
|
972
|
+
contingency.set(key, (contingency.get(key) ?? 0) + 1);
|
|
973
|
+
}
|
|
974
|
+
return {
|
|
975
|
+
contingencyDense: null,
|
|
976
|
+
contingencySparse: contingency,
|
|
977
|
+
trueCount,
|
|
978
|
+
predCount,
|
|
979
|
+
nTrue,
|
|
980
|
+
nPred,
|
|
981
|
+
n
|
|
982
|
+
};
|
|
983
|
+
}
|
|
984
|
+
function entropyFromCountArray(counts, n) {
|
|
985
|
+
if (n === 0) return 0;
|
|
986
|
+
let h = 0;
|
|
987
|
+
for (let i = 0; i < counts.length; i++) {
|
|
988
|
+
const c = readIndex(counts, i, "counts");
|
|
989
|
+
if (c > 0) {
|
|
990
|
+
const p = c / n;
|
|
991
|
+
h -= p * Math.log(p);
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
return h;
|
|
995
|
+
}
|
|
996
|
+
function mutualInformationFromContingency(stats) {
|
|
997
|
+
const { contingencyDense, contingencySparse, trueCount, predCount, nPred, n } = stats;
|
|
998
|
+
if (n === 0) return 0;
|
|
999
|
+
let mi = 0;
|
|
1000
|
+
if (contingencyDense) {
|
|
1001
|
+
for (let idx = 0; idx < contingencyDense.length; idx++) {
|
|
1002
|
+
const nij = readIndex(contingencyDense, idx, "contingencyDense");
|
|
1003
|
+
if (nij <= 0) continue;
|
|
1004
|
+
const t = Math.floor(idx / nPred);
|
|
1005
|
+
const p = idx - t * nPred;
|
|
1006
|
+
const ni = readIndex(trueCount, t, "trueCount");
|
|
1007
|
+
const nj = readIndex(predCount, p, "predCount");
|
|
1008
|
+
if (ni > 0 && nj > 0) {
|
|
1009
|
+
mi += nij / n * Math.log(n * nij / (ni * nj));
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
return mi;
|
|
1013
|
+
}
|
|
1014
|
+
if (contingencySparse) {
|
|
1015
|
+
for (const [key, nij] of contingencySparse) {
|
|
1016
|
+
if (nij <= 0) continue;
|
|
1017
|
+
const t = Math.floor(key / nPred);
|
|
1018
|
+
const p = key - t * nPred;
|
|
1019
|
+
const ni = readIndex(trueCount, t, "trueCount");
|
|
1020
|
+
const nj = readIndex(predCount, p, "predCount");
|
|
1021
|
+
if (ni > 0 && nj > 0) {
|
|
1022
|
+
mi += nij / n * Math.log(n * nij / (ni * nj));
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
return mi;
|
|
1026
|
+
}
|
|
1027
|
+
return 0;
|
|
1028
|
+
}
|
|
1029
|
+
function buildLogFactorials(n) {
|
|
1030
|
+
const out = new Float64Array(n + 1);
|
|
1031
|
+
for (let i = 1; i <= n; i++) {
|
|
1032
|
+
out[i] = (out[i - 1] ?? 0) + Math.log(i);
|
|
1033
|
+
}
|
|
1034
|
+
return out;
|
|
1035
|
+
}
|
|
1036
|
+
function logCombination(n, k, logFactorials) {
|
|
1037
|
+
if (k < 0 || k > n) return Number.NEGATIVE_INFINITY;
|
|
1038
|
+
const a = readIndex(logFactorials, n, "logFactorials");
|
|
1039
|
+
const b = readIndex(logFactorials, k, "logFactorials");
|
|
1040
|
+
const c = readIndex(logFactorials, n - k, "logFactorials");
|
|
1041
|
+
return a - b - c;
|
|
1042
|
+
}
|
|
1043
|
+
var LOG_EXP_UNDERFLOW_CUTOFF = -745;
|
|
1044
|
+
function expectedMutualInformation(stats) {
|
|
1045
|
+
const { trueCount, predCount, n } = stats;
|
|
1046
|
+
if (n <= 1) return 0;
|
|
1047
|
+
const rowSums = Array.from(trueCount);
|
|
1048
|
+
const colSums = Array.from(predCount);
|
|
1049
|
+
const logFactorials = buildLogFactorials(n);
|
|
1050
|
+
let emi = 0;
|
|
1051
|
+
let comp = 0;
|
|
1052
|
+
for (const a of rowSums) {
|
|
1053
|
+
if (a <= 0) continue;
|
|
1054
|
+
for (const b of colSums) {
|
|
1055
|
+
if (b <= 0) continue;
|
|
1056
|
+
const nijMin = Math.max(1, a + b - n);
|
|
1057
|
+
const nijMax = Math.min(a, b);
|
|
1058
|
+
if (nijMin > nijMax) continue;
|
|
1059
|
+
const logDenominator = logCombination(n, b, logFactorials);
|
|
1060
|
+
for (let nij = nijMin; nij <= nijMax; nij++) {
|
|
1061
|
+
const logProbability = logCombination(a, nij, logFactorials) + logCombination(n - a, b - nij, logFactorials) - logDenominator;
|
|
1062
|
+
if (logProbability < LOG_EXP_UNDERFLOW_CUTOFF) continue;
|
|
1063
|
+
const probability = Math.exp(logProbability);
|
|
1064
|
+
if (!Number.isFinite(probability) || probability === 0) continue;
|
|
1065
|
+
const miTerm = nij / n * Math.log(n * nij / (a * b));
|
|
1066
|
+
const y = probability * miTerm - comp;
|
|
1067
|
+
const t = emi + y;
|
|
1068
|
+
comp = t - emi - y;
|
|
1069
|
+
emi = t;
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
return emi;
|
|
1074
|
+
}
|
|
1075
|
+
function averageEntropy(hTrue, hPred, method) {
|
|
1076
|
+
if (method === "min") return Math.min(hTrue, hPred);
|
|
1077
|
+
if (method === "max") return Math.max(hTrue, hPred);
|
|
1078
|
+
if (method === "geometric") return Math.sqrt(hTrue * hPred);
|
|
1079
|
+
return (hTrue + hPred) / 2;
|
|
1080
|
+
}
|
|
1081
|
+
function euclideanDistance(data, offset, sampleStride, featureStride, nFeatures, i, j) {
|
|
1082
|
+
let sum = 0;
|
|
1083
|
+
const baseI = offset + i * sampleStride;
|
|
1084
|
+
const baseJ = offset + j * sampleStride;
|
|
1085
|
+
for (let k = 0; k < nFeatures; k++) {
|
|
1086
|
+
const vi = chunkJSCDE774_cjs.getNumericElement(data, baseI + k * featureStride);
|
|
1087
|
+
const vj = chunkJSCDE774_cjs.getNumericElement(data, baseJ + k * featureStride);
|
|
1088
|
+
assertFiniteNumber(vi, "X", `sample ${i}, feature ${k}`);
|
|
1089
|
+
assertFiniteNumber(vj, "X", `sample ${j}, feature ${k}`);
|
|
1090
|
+
const d = vi - vj;
|
|
1091
|
+
sum += d * d;
|
|
1092
|
+
}
|
|
1093
|
+
return Math.sqrt(sum);
|
|
1094
|
+
}
|
|
1095
|
+
function getPrecomputedDistanceAccessor(X, n) {
|
|
1096
|
+
const data = getNumericTensorData(X, "X");
|
|
1097
|
+
if (X.ndim !== 2) {
|
|
1098
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must be a 2D tensor for metric='precomputed'");
|
|
1099
|
+
}
|
|
1100
|
+
const rows = X.shape[0] ?? 0;
|
|
1101
|
+
const cols = X.shape[1] ?? 0;
|
|
1102
|
+
if (rows !== n || cols !== n) {
|
|
1103
|
+
throw new chunkJSCDE774_cjs.ShapeError(
|
|
1104
|
+
"For metric='precomputed', X must be a square [n_samples, n_samples] matrix"
|
|
1105
|
+
);
|
|
1106
|
+
}
|
|
1107
|
+
const logicalStrides = computeLogicalStrides(X.shape);
|
|
1108
|
+
const rowStride = X.strides[0] ?? logicalStrides[0] ?? n;
|
|
1109
|
+
const colStride = X.strides[1] ?? logicalStrides[1] ?? 1;
|
|
1110
|
+
if (rowStride === 0 || colStride === 0) {
|
|
1111
|
+
throw new chunkJSCDE774_cjs.ShapeError("Precomputed distance matrix must have non-degenerate strides");
|
|
1112
|
+
}
|
|
1113
|
+
const base = X.offset;
|
|
1114
|
+
return (i, j) => {
|
|
1115
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, base + i * rowStride + j * colStride);
|
|
1116
|
+
assertFiniteNumber(v, "X", `distance[${i},${j}]`);
|
|
1117
|
+
if (v < 0) {
|
|
1118
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
1119
|
+
`Precomputed distances must be non-negative; found ${String(v)} at [${i},${j}]`
|
|
1120
|
+
);
|
|
1121
|
+
}
|
|
1122
|
+
return v;
|
|
1123
|
+
};
|
|
1124
|
+
}
|
|
1125
|
+
function validateSilhouetteLabels(labels, nSamples) {
|
|
1126
|
+
if (labels.size !== nSamples) {
|
|
1127
|
+
throw new chunkJSCDE774_cjs.ShapeError("labels length must match number of samples");
|
|
1128
|
+
}
|
|
1129
|
+
const enc = encodeLabels(labels, "labels");
|
|
1130
|
+
const k = enc.nClusters;
|
|
1131
|
+
if (k < 2 || k > nSamples - 1) {
|
|
1132
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1133
|
+
"silhouette requires 2 <= n_clusters <= n_samples - 1",
|
|
1134
|
+
"n_clusters",
|
|
1135
|
+
k
|
|
1136
|
+
);
|
|
1137
|
+
}
|
|
1138
|
+
return enc;
|
|
1139
|
+
}
|
|
1140
|
+
function reservoirSampleIndices(n, k, seed) {
|
|
1141
|
+
let state = (seed ?? 0) >>> 0;
|
|
1142
|
+
const hasSeed = seed !== void 0;
|
|
1143
|
+
const randU32 = () => {
|
|
1144
|
+
if (!hasSeed) return Math.random() * 4294967296 >>> 0;
|
|
1145
|
+
state = 1664525 * state + 1013904223 >>> 0;
|
|
1146
|
+
return state;
|
|
1147
|
+
};
|
|
1148
|
+
const randInt = (exclusiveMax) => randU32() % exclusiveMax;
|
|
1149
|
+
const out = new Int32Array(k);
|
|
1150
|
+
for (let i = 0; i < k; i++) out[i] = i;
|
|
1151
|
+
for (let i = k; i < n; i++) {
|
|
1152
|
+
const j = randInt(i + 1);
|
|
1153
|
+
if (j < k) out[j] = i;
|
|
1154
|
+
}
|
|
1155
|
+
return out;
|
|
1156
|
+
}
|
|
1157
|
+
function reencodeSubset(codes) {
|
|
1158
|
+
const out = new Int32Array(codes.length);
|
|
1159
|
+
const map = /* @__PURE__ */ new Map();
|
|
1160
|
+
let next = 0;
|
|
1161
|
+
for (let i = 0; i < codes.length; i++) {
|
|
1162
|
+
const v = readIndex(codes, i, "codes");
|
|
1163
|
+
const existing = map.get(v);
|
|
1164
|
+
if (existing === void 0) {
|
|
1165
|
+
map.set(v, next);
|
|
1166
|
+
out[i] = next;
|
|
1167
|
+
next++;
|
|
1168
|
+
} else {
|
|
1169
|
+
out[i] = existing;
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
return { codes: out, nClusters: next };
|
|
1173
|
+
}
|
|
1174
|
+
function silhouetteMeanEuclidean(X, labels, indices) {
|
|
1175
|
+
const { data, nSamples, nFeatures, sampleStride, featureStride, offset } = getFeatureAccessor(X);
|
|
1176
|
+
const n = indices ? indices.length : nSamples;
|
|
1177
|
+
if (nSamples < 2 || n < 2) {
|
|
1178
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("silhouette requires at least 2 samples", "n_samples", n);
|
|
1179
|
+
}
|
|
1180
|
+
const encAll = validateSilhouetteLabels(labels, nSamples);
|
|
1181
|
+
const subsetCodesRaw = new Int32Array(n);
|
|
1182
|
+
for (let i = 0; i < n; i++) {
|
|
1183
|
+
const src = indices ? readIndex(indices, i, "indices") : i;
|
|
1184
|
+
const c = readIndex(encAll.codes, src, "labels.codes");
|
|
1185
|
+
subsetCodesRaw[i] = c;
|
|
1186
|
+
}
|
|
1187
|
+
const re = reencodeSubset(subsetCodesRaw);
|
|
1188
|
+
const codes = re.codes;
|
|
1189
|
+
const k = re.nClusters;
|
|
1190
|
+
if (k < 2 || k > n - 1) {
|
|
1191
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1192
|
+
"silhouette requires 2 <= n_clusters <= n_samples - 1",
|
|
1193
|
+
"n_clusters",
|
|
1194
|
+
k
|
|
1195
|
+
);
|
|
1196
|
+
}
|
|
1197
|
+
const clusterSizes = new Int32Array(k);
|
|
1198
|
+
for (let i = 0; i < n; i++) {
|
|
1199
|
+
const ci = readIndex(codes, i, "codes");
|
|
1200
|
+
clusterSizes[ci] = (clusterSizes[ci] ?? 0) + 1;
|
|
1201
|
+
}
|
|
1202
|
+
const sumsToClusters = new Float64Array(k);
|
|
1203
|
+
let sum = 0;
|
|
1204
|
+
let comp = 0;
|
|
1205
|
+
for (let i = 0; i < n; i++) {
|
|
1206
|
+
const ci = readIndex(codes, i, "codes");
|
|
1207
|
+
const sizeOwn = readIndex(clusterSizes, ci, "clusterSizes");
|
|
1208
|
+
if (sizeOwn <= 1) continue;
|
|
1209
|
+
sumsToClusters.fill(0);
|
|
1210
|
+
const srcI = indices ? readIndex(indices, i, "indices") : i;
|
|
1211
|
+
for (let j = 0; j < n; j++) {
|
|
1212
|
+
if (i === j) continue;
|
|
1213
|
+
const srcJ = indices ? readIndex(indices, j, "indices") : j;
|
|
1214
|
+
const cj = readIndex(codes, j, "codes");
|
|
1215
|
+
const d = euclideanDistance(data, offset, sampleStride, featureStride, nFeatures, srcI, srcJ);
|
|
1216
|
+
sumsToClusters[cj] = (sumsToClusters[cj] ?? 0) + d;
|
|
1217
|
+
}
|
|
1218
|
+
const a = readIndex(sumsToClusters, ci, "sumsToClusters") / (sizeOwn - 1);
|
|
1219
|
+
let b = Infinity;
|
|
1220
|
+
for (let cl = 0; cl < k; cl++) {
|
|
1221
|
+
if (cl === ci) continue;
|
|
1222
|
+
const sz = readIndex(clusterSizes, cl, "clusterSizes");
|
|
1223
|
+
if (sz <= 0) continue;
|
|
1224
|
+
const mean = readIndex(sumsToClusters, cl, "sumsToClusters") / sz;
|
|
1225
|
+
if (mean < b) b = mean;
|
|
1226
|
+
}
|
|
1227
|
+
if (!Number.isFinite(b) || b === Infinity) continue;
|
|
1228
|
+
const denom = Math.max(a, b);
|
|
1229
|
+
const s = denom > 0 ? (b - a) / denom : 0;
|
|
1230
|
+
const y = s - comp;
|
|
1231
|
+
const t = sum + y;
|
|
1232
|
+
comp = t - sum - y;
|
|
1233
|
+
sum = t;
|
|
1234
|
+
}
|
|
1235
|
+
return sum / n;
|
|
1236
|
+
}
|
|
1237
|
+
function silhouetteMeanPrecomputed(X, labels, indices) {
|
|
1238
|
+
const nSamples = labels.size;
|
|
1239
|
+
if (nSamples < 2) {
|
|
1240
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1241
|
+
"silhouette requires at least 2 samples",
|
|
1242
|
+
"n_samples",
|
|
1243
|
+
nSamples
|
|
1244
|
+
);
|
|
1245
|
+
}
|
|
1246
|
+
const encAll = validateSilhouetteLabels(labels, nSamples);
|
|
1247
|
+
const dist = getPrecomputedDistanceAccessor(X, nSamples);
|
|
1248
|
+
const n = indices ? indices.length : nSamples;
|
|
1249
|
+
if (n < 2) {
|
|
1250
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("silhouette requires at least 2 samples", "n_samples", n);
|
|
1251
|
+
}
|
|
1252
|
+
for (let i = 0; i < n; i++) {
|
|
1253
|
+
const src = indices ? readIndex(indices, i, "indices") : i;
|
|
1254
|
+
const d0 = dist(src, src);
|
|
1255
|
+
if (!Number.isFinite(d0) || Math.abs(d0) > 1e-12) {
|
|
1256
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
1257
|
+
`Precomputed distance matrix diagonal must be ~0; found ${String(d0)} at [${src},${src}]`
|
|
1258
|
+
);
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
const subsetCodesRaw = new Int32Array(n);
|
|
1262
|
+
for (let i = 0; i < n; i++) {
|
|
1263
|
+
const src = indices ? readIndex(indices, i, "indices") : i;
|
|
1264
|
+
const c = readIndex(encAll.codes, src, "labels.codes");
|
|
1265
|
+
subsetCodesRaw[i] = c;
|
|
1266
|
+
}
|
|
1267
|
+
const re = reencodeSubset(subsetCodesRaw);
|
|
1268
|
+
const codes = re.codes;
|
|
1269
|
+
const k = re.nClusters;
|
|
1270
|
+
if (k < 2 || k > n - 1) {
|
|
1271
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1272
|
+
"silhouette requires 2 <= n_clusters <= n_samples - 1",
|
|
1273
|
+
"n_clusters",
|
|
1274
|
+
k
|
|
1275
|
+
);
|
|
1276
|
+
}
|
|
1277
|
+
const clusterSizes = new Int32Array(k);
|
|
1278
|
+
for (let i = 0; i < n; i++) {
|
|
1279
|
+
const ci = readIndex(codes, i, "codes");
|
|
1280
|
+
clusterSizes[ci] = (clusterSizes[ci] ?? 0) + 1;
|
|
1281
|
+
}
|
|
1282
|
+
const sumsToClusters = new Float64Array(k);
|
|
1283
|
+
let sum = 0;
|
|
1284
|
+
let comp = 0;
|
|
1285
|
+
for (let i = 0; i < n; i++) {
|
|
1286
|
+
const ci = readIndex(codes, i, "codes");
|
|
1287
|
+
const sizeOwn = readIndex(clusterSizes, ci, "clusterSizes");
|
|
1288
|
+
if (sizeOwn <= 1) continue;
|
|
1289
|
+
sumsToClusters.fill(0);
|
|
1290
|
+
const srcI = indices ? readIndex(indices, i, "indices") : i;
|
|
1291
|
+
for (let j = 0; j < n; j++) {
|
|
1292
|
+
if (i === j) continue;
|
|
1293
|
+
const srcJ = indices ? readIndex(indices, j, "indices") : j;
|
|
1294
|
+
const cj = readIndex(codes, j, "codes");
|
|
1295
|
+
const d = dist(srcI, srcJ);
|
|
1296
|
+
sumsToClusters[cj] = (sumsToClusters[cj] ?? 0) + d;
|
|
1297
|
+
}
|
|
1298
|
+
const a = readIndex(sumsToClusters, ci, "sumsToClusters") / (sizeOwn - 1);
|
|
1299
|
+
let b = Infinity;
|
|
1300
|
+
for (let cl = 0; cl < k; cl++) {
|
|
1301
|
+
if (cl === ci) continue;
|
|
1302
|
+
const sz = readIndex(clusterSizes, cl, "clusterSizes");
|
|
1303
|
+
if (sz <= 0) continue;
|
|
1304
|
+
const mean = readIndex(sumsToClusters, cl, "sumsToClusters") / sz;
|
|
1305
|
+
if (mean < b) b = mean;
|
|
1306
|
+
}
|
|
1307
|
+
if (!Number.isFinite(b) || b === Infinity) continue;
|
|
1308
|
+
const denom = Math.max(a, b);
|
|
1309
|
+
const s = denom > 0 ? (b - a) / denom : 0;
|
|
1310
|
+
const y = s - comp;
|
|
1311
|
+
const t = sum + y;
|
|
1312
|
+
comp = t - sum - y;
|
|
1313
|
+
sum = t;
|
|
1314
|
+
}
|
|
1315
|
+
return sum / n;
|
|
1316
|
+
}
|
|
1317
|
+
function silhouetteScore(X, labels, metric = "euclidean", options) {
|
|
1318
|
+
const sampleSize = options?.sampleSize;
|
|
1319
|
+
const randomState = options?.randomState;
|
|
1320
|
+
const nSamples = labels.size;
|
|
1321
|
+
if (nSamples < 2) {
|
|
1322
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1323
|
+
"silhouette requires at least 2 samples",
|
|
1324
|
+
"n_samples",
|
|
1325
|
+
nSamples
|
|
1326
|
+
);
|
|
1327
|
+
}
|
|
1328
|
+
const maxFull = 2e3;
|
|
1329
|
+
if (sampleSize === void 0 && nSamples > maxFull) {
|
|
1330
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1331
|
+
`silhouetteScore is O(n\xB2) and n_samples=${nSamples} is too large for full computation; provide options.sampleSize`,
|
|
1332
|
+
"sampleSize",
|
|
1333
|
+
sampleSize
|
|
1334
|
+
);
|
|
1335
|
+
}
|
|
1336
|
+
if (sampleSize !== void 0) {
|
|
1337
|
+
if (!Number.isFinite(sampleSize) || !Number.isInteger(sampleSize)) {
|
|
1338
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("sampleSize must be an integer", "sampleSize", sampleSize);
|
|
1339
|
+
}
|
|
1340
|
+
if (sampleSize < 2 || sampleSize > nSamples) {
|
|
1341
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1342
|
+
"sampleSize must satisfy 2 <= sampleSize <= n_samples",
|
|
1343
|
+
"sampleSize",
|
|
1344
|
+
sampleSize
|
|
1345
|
+
);
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
const indices = sampleSize !== void 0 && sampleSize < nSamples ? reservoirSampleIndices(nSamples, sampleSize, randomState) : null;
|
|
1349
|
+
if (metric === "euclidean") return silhouetteMeanEuclidean(X, labels, indices);
|
|
1350
|
+
if (metric === "precomputed") return silhouetteMeanPrecomputed(X, labels, indices);
|
|
1351
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1352
|
+
`Unsupported metric: '${String(metric)}'. Must be 'euclidean' or 'precomputed'`,
|
|
1353
|
+
"metric",
|
|
1354
|
+
metric
|
|
1355
|
+
);
|
|
1356
|
+
}
|
|
1357
|
+
function silhouetteSamples(X, labels, metric = "euclidean") {
|
|
1358
|
+
const nSamples = labels.size;
|
|
1359
|
+
if (nSamples < 2) {
|
|
1360
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1361
|
+
"silhouette requires at least 2 samples",
|
|
1362
|
+
"n_samples",
|
|
1363
|
+
nSamples
|
|
1364
|
+
);
|
|
1365
|
+
}
|
|
1366
|
+
const enc = validateSilhouetteLabels(labels, nSamples);
|
|
1367
|
+
const codes = enc.codes;
|
|
1368
|
+
const k = enc.nClusters;
|
|
1369
|
+
const silhouettes = new Float64Array(nSamples);
|
|
1370
|
+
const clusterSizes = new Int32Array(k);
|
|
1371
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1372
|
+
const ci = readIndex(codes, i, "codes");
|
|
1373
|
+
clusterSizes[ci] = (clusterSizes[ci] ?? 0) + 1;
|
|
1374
|
+
}
|
|
1375
|
+
const sumsToClusters = new Float64Array(k);
|
|
1376
|
+
if (metric === "euclidean") {
|
|
1377
|
+
const { data, nFeatures, sampleStride, featureStride, offset } = getFeatureAccessor(X);
|
|
1378
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1379
|
+
const ci = readIndex(codes, i, "codes");
|
|
1380
|
+
const sizeOwn = readIndex(clusterSizes, ci, "clusterSizes");
|
|
1381
|
+
if (sizeOwn <= 1) {
|
|
1382
|
+
silhouettes[i] = 0;
|
|
1383
|
+
continue;
|
|
1384
|
+
}
|
|
1385
|
+
sumsToClusters.fill(0);
|
|
1386
|
+
for (let j = 0; j < nSamples; j++) {
|
|
1387
|
+
if (i === j) continue;
|
|
1388
|
+
const cj = readIndex(codes, j, "codes");
|
|
1389
|
+
const d = euclideanDistance(data, offset, sampleStride, featureStride, nFeatures, i, j);
|
|
1390
|
+
sumsToClusters[cj] = (sumsToClusters[cj] ?? 0) + d;
|
|
1391
|
+
}
|
|
1392
|
+
const a = readIndex(sumsToClusters, ci, "sumsToClusters") / (sizeOwn - 1);
|
|
1393
|
+
let b = Infinity;
|
|
1394
|
+
for (let cl = 0; cl < k; cl++) {
|
|
1395
|
+
if (cl === ci) continue;
|
|
1396
|
+
const sz = readIndex(clusterSizes, cl, "clusterSizes");
|
|
1397
|
+
if (sz <= 0) continue;
|
|
1398
|
+
const mean = readIndex(sumsToClusters, cl, "sumsToClusters") / sz;
|
|
1399
|
+
if (mean < b) b = mean;
|
|
1400
|
+
}
|
|
1401
|
+
if (!Number.isFinite(b) || b === Infinity) {
|
|
1402
|
+
silhouettes[i] = 0;
|
|
1403
|
+
continue;
|
|
1404
|
+
}
|
|
1405
|
+
const denom = Math.max(a, b);
|
|
1406
|
+
silhouettes[i] = denom > 0 ? (b - a) / denom : 0;
|
|
1407
|
+
}
|
|
1408
|
+
return chunk6AE5FKKQ_cjs.tensor(silhouettes);
|
|
1409
|
+
}
|
|
1410
|
+
if (metric === "precomputed") {
|
|
1411
|
+
const dist = getPrecomputedDistanceAccessor(X, nSamples);
|
|
1412
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1413
|
+
const d0 = dist(i, i);
|
|
1414
|
+
if (!Number.isFinite(d0) || Math.abs(d0) > 1e-12) {
|
|
1415
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
1416
|
+
`Precomputed distance matrix diagonal must be ~0; found ${String(d0)} at [${i},${i}]`
|
|
1417
|
+
);
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1421
|
+
const ci = readIndex(codes, i, "codes");
|
|
1422
|
+
const sizeOwn = readIndex(clusterSizes, ci, "clusterSizes");
|
|
1423
|
+
if (sizeOwn <= 1) {
|
|
1424
|
+
silhouettes[i] = 0;
|
|
1425
|
+
continue;
|
|
1426
|
+
}
|
|
1427
|
+
sumsToClusters.fill(0);
|
|
1428
|
+
for (let j = 0; j < nSamples; j++) {
|
|
1429
|
+
if (i === j) continue;
|
|
1430
|
+
const cj = readIndex(codes, j, "codes");
|
|
1431
|
+
const d = dist(i, j);
|
|
1432
|
+
sumsToClusters[cj] = (sumsToClusters[cj] ?? 0) + d;
|
|
1433
|
+
}
|
|
1434
|
+
const a = readIndex(sumsToClusters, ci, "sumsToClusters") / (sizeOwn - 1);
|
|
1435
|
+
let b = Infinity;
|
|
1436
|
+
for (let cl = 0; cl < k; cl++) {
|
|
1437
|
+
if (cl === ci) continue;
|
|
1438
|
+
const sz = readIndex(clusterSizes, cl, "clusterSizes");
|
|
1439
|
+
if (sz <= 0) continue;
|
|
1440
|
+
const mean = readIndex(sumsToClusters, cl, "sumsToClusters") / sz;
|
|
1441
|
+
if (mean < b) b = mean;
|
|
1442
|
+
}
|
|
1443
|
+
if (!Number.isFinite(b) || b === Infinity) {
|
|
1444
|
+
silhouettes[i] = 0;
|
|
1445
|
+
continue;
|
|
1446
|
+
}
|
|
1447
|
+
const denom = Math.max(a, b);
|
|
1448
|
+
silhouettes[i] = denom > 0 ? (b - a) / denom : 0;
|
|
1449
|
+
}
|
|
1450
|
+
return chunk6AE5FKKQ_cjs.tensor(silhouettes);
|
|
1451
|
+
}
|
|
1452
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1453
|
+
`Unsupported metric: '${String(metric)}'. Must be 'euclidean' or 'precomputed'`,
|
|
1454
|
+
"metric",
|
|
1455
|
+
metric
|
|
1456
|
+
);
|
|
1457
|
+
}
|
|
1458
|
+
function daviesBouldinScore(X, labels) {
|
|
1459
|
+
const { data, nSamples, nFeatures, sampleStride, featureStride, offset } = getFeatureAccessor(X);
|
|
1460
|
+
if (nSamples === 0) return 0;
|
|
1461
|
+
if (labels.size !== nSamples) {
|
|
1462
|
+
throw new chunkJSCDE774_cjs.ShapeError("labels length must match number of samples");
|
|
1463
|
+
}
|
|
1464
|
+
const enc = encodeLabels(labels, "labels");
|
|
1465
|
+
const codes = enc.codes;
|
|
1466
|
+
const k = enc.nClusters;
|
|
1467
|
+
if (k < 2) return 0;
|
|
1468
|
+
const centroids = new Array(k);
|
|
1469
|
+
for (let c = 0; c < k; c++) centroids[c] = new Float64Array(nFeatures);
|
|
1470
|
+
const clusterSizes = new Int32Array(k);
|
|
1471
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1472
|
+
const c = readIndex(codes, i, "codes");
|
|
1473
|
+
clusterSizes[c] = (clusterSizes[c] ?? 0) + 1;
|
|
1474
|
+
const base = offset + i * sampleStride;
|
|
1475
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1476
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1477
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, base + f * featureStride);
|
|
1478
|
+
assertFiniteNumber(v, "X", `sample ${i}, feature ${f}`);
|
|
1479
|
+
centroid[f] = (centroid[f] ?? 0) + v;
|
|
1480
|
+
}
|
|
1481
|
+
}
|
|
1482
|
+
for (let c = 0; c < k; c++) {
|
|
1483
|
+
const sz = readIndex(clusterSizes, c, "clusterSizes");
|
|
1484
|
+
if (sz <= 0) continue;
|
|
1485
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1486
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1487
|
+
centroid[f] = (centroid[f] ?? 0) / sz;
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
const scatterSum = new Float64Array(k);
|
|
1491
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1492
|
+
const c = readIndex(codes, i, "codes");
|
|
1493
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1494
|
+
let distSq = 0;
|
|
1495
|
+
const base = offset + i * sampleStride;
|
|
1496
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1497
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, base + f * featureStride);
|
|
1498
|
+
assertFiniteNumber(v, "X", `sample ${i}, feature ${f}`);
|
|
1499
|
+
const d = v - (centroid[f] ?? 0);
|
|
1500
|
+
distSq += d * d;
|
|
1501
|
+
}
|
|
1502
|
+
scatterSum[c] = (scatterSum[c] ?? 0) + Math.sqrt(distSq);
|
|
1503
|
+
}
|
|
1504
|
+
const S = new Float64Array(k);
|
|
1505
|
+
for (let c = 0; c < k; c++) {
|
|
1506
|
+
const sz = readIndex(clusterSizes, c, "clusterSizes");
|
|
1507
|
+
const sc = readIndex(scatterSum, c, "scatterSum");
|
|
1508
|
+
S[c] = sz > 0 ? sc / sz : 0;
|
|
1509
|
+
}
|
|
1510
|
+
let db = 0;
|
|
1511
|
+
for (let i = 0; i < k; i++) {
|
|
1512
|
+
let maxRatio = Number.NEGATIVE_INFINITY;
|
|
1513
|
+
const ci = readIndex(centroids, i, "centroids");
|
|
1514
|
+
for (let j = 0; j < k; j++) {
|
|
1515
|
+
if (i === j) continue;
|
|
1516
|
+
const cj = readIndex(centroids, j, "centroids");
|
|
1517
|
+
let distSq = 0;
|
|
1518
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1519
|
+
const d = (ci[f] ?? 0) - (cj[f] ?? 0);
|
|
1520
|
+
distSq += d * d;
|
|
1521
|
+
}
|
|
1522
|
+
const dist = Math.sqrt(distSq);
|
|
1523
|
+
const si = readIndex(S, i, "S");
|
|
1524
|
+
const sj = readIndex(S, j, "S");
|
|
1525
|
+
const ratio = dist === 0 ? Number.POSITIVE_INFINITY : (si + sj) / dist;
|
|
1526
|
+
if (ratio > maxRatio) maxRatio = ratio;
|
|
1527
|
+
}
|
|
1528
|
+
db += maxRatio;
|
|
1529
|
+
}
|
|
1530
|
+
return db / k;
|
|
1531
|
+
}
|
|
1532
|
+
function calinskiHarabaszScore(X, labels) {
|
|
1533
|
+
const { data, nSamples, nFeatures, sampleStride, featureStride, offset } = getFeatureAccessor(X);
|
|
1534
|
+
if (nSamples === 0) return 0;
|
|
1535
|
+
if (labels.size !== nSamples) {
|
|
1536
|
+
throw new chunkJSCDE774_cjs.ShapeError("labels length must match number of samples");
|
|
1537
|
+
}
|
|
1538
|
+
const enc = encodeLabels(labels, "labels");
|
|
1539
|
+
const codes = enc.codes;
|
|
1540
|
+
const k = enc.nClusters;
|
|
1541
|
+
const overallMean = new Float64Array(nFeatures);
|
|
1542
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1543
|
+
const base = offset + i * sampleStride;
|
|
1544
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1545
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, base + f * featureStride);
|
|
1546
|
+
assertFiniteNumber(v, "X", `sample ${i}, feature ${f}`);
|
|
1547
|
+
overallMean[f] = (overallMean[f] ?? 0) + v;
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1551
|
+
overallMean[f] = (overallMean[f] ?? 0) / nSamples;
|
|
1552
|
+
}
|
|
1553
|
+
const centroids = new Array(k);
|
|
1554
|
+
for (let c = 0; c < k; c++) centroids[c] = new Float64Array(nFeatures);
|
|
1555
|
+
const clusterSizes = new Int32Array(k);
|
|
1556
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1557
|
+
const c = readIndex(codes, i, "codes");
|
|
1558
|
+
clusterSizes[c] = (clusterSizes[c] ?? 0) + 1;
|
|
1559
|
+
const base = offset + i * sampleStride;
|
|
1560
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1561
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1562
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, base + f * featureStride);
|
|
1563
|
+
assertFiniteNumber(v, "X", `sample ${i}, feature ${f}`);
|
|
1564
|
+
centroid[f] = (centroid[f] ?? 0) + v;
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
for (let c = 0; c < k; c++) {
|
|
1568
|
+
const sz = readIndex(clusterSizes, c, "clusterSizes");
|
|
1569
|
+
if (sz <= 0) continue;
|
|
1570
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1571
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1572
|
+
centroid[f] = (centroid[f] ?? 0) / sz;
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
let bgss = 0;
|
|
1576
|
+
for (let c = 0; c < k; c++) {
|
|
1577
|
+
const sz = readIndex(clusterSizes, c, "clusterSizes");
|
|
1578
|
+
if (sz <= 0) continue;
|
|
1579
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1580
|
+
let distSq = 0;
|
|
1581
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1582
|
+
const d = (centroid[f] ?? 0) - (overallMean[f] ?? 0);
|
|
1583
|
+
distSq += d * d;
|
|
1584
|
+
}
|
|
1585
|
+
bgss += sz * distSq;
|
|
1586
|
+
}
|
|
1587
|
+
let wgss = 0;
|
|
1588
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1589
|
+
const c = readIndex(codes, i, "codes");
|
|
1590
|
+
const centroid = readIndex(centroids, c, "centroids");
|
|
1591
|
+
const base = offset + i * sampleStride;
|
|
1592
|
+
for (let f = 0; f < nFeatures; f++) {
|
|
1593
|
+
const v = chunkJSCDE774_cjs.getNumericElement(data, base + f * featureStride);
|
|
1594
|
+
assertFiniteNumber(v, "X", `sample ${i}, feature ${f}`);
|
|
1595
|
+
const d = v - (centroid[f] ?? 0);
|
|
1596
|
+
wgss += d * d;
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
if (k < 2 || wgss === 0) return 0;
|
|
1600
|
+
return bgss / (k - 1) / (wgss / (nSamples - k));
|
|
1601
|
+
}
|
|
1602
|
+
function adjustedRandScore(labelsTrue, labelsPred) {
|
|
1603
|
+
assertSameSize(labelsTrue, labelsPred, "labelsTrue", "labelsPred");
|
|
1604
|
+
const n = labelsTrue.size;
|
|
1605
|
+
if (n <= 1) return 1;
|
|
1606
|
+
const stats = buildContingencyStats(labelsTrue, labelsPred);
|
|
1607
|
+
const { contingencyDense, contingencySparse, trueCount, predCount } = stats;
|
|
1608
|
+
let sumComb = 0;
|
|
1609
|
+
if (contingencyDense) {
|
|
1610
|
+
for (let idx = 0; idx < contingencyDense.length; idx++) {
|
|
1611
|
+
const nij = readIndex(contingencyDense, idx, "contingencyDense");
|
|
1612
|
+
if (nij > 0) sumComb += comb2(nij);
|
|
1613
|
+
}
|
|
1614
|
+
} else if (contingencySparse) {
|
|
1615
|
+
for (const nij of contingencySparse.values()) {
|
|
1616
|
+
sumComb += comb2(nij);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
let sumCombTrue = 0;
|
|
1620
|
+
for (let i = 0; i < trueCount.length; i++) {
|
|
1621
|
+
sumCombTrue += comb2(readIndex(trueCount, i, "trueCount"));
|
|
1622
|
+
}
|
|
1623
|
+
let sumCombPred = 0;
|
|
1624
|
+
for (let j = 0; j < predCount.length; j++) {
|
|
1625
|
+
sumCombPred += comb2(readIndex(predCount, j, "predCount"));
|
|
1626
|
+
}
|
|
1627
|
+
const totalPairs = comb2(n);
|
|
1628
|
+
if (totalPairs === 0) return 1;
|
|
1629
|
+
const expectedIndex = sumCombTrue * sumCombPred / totalPairs;
|
|
1630
|
+
const maxIndex = (sumCombTrue + sumCombPred) / 2;
|
|
1631
|
+
const denom = maxIndex - expectedIndex;
|
|
1632
|
+
if (denom === 0) return 1;
|
|
1633
|
+
return (sumComb - expectedIndex) / denom;
|
|
1634
|
+
}
|
|
1635
|
+
function adjustedMutualInfoScore(labelsTrue, labelsPred, averageMethod = "arithmetic") {
|
|
1636
|
+
const stats = buildContingencyStats(labelsTrue, labelsPred);
|
|
1637
|
+
const { n, trueCount, predCount } = stats;
|
|
1638
|
+
if (n <= 1) return 1;
|
|
1639
|
+
const mi = mutualInformationFromContingency(stats);
|
|
1640
|
+
const hTrue = entropyFromCountArray(trueCount, n);
|
|
1641
|
+
const hPred = entropyFromCountArray(predCount, n);
|
|
1642
|
+
const emi = expectedMutualInformation(stats);
|
|
1643
|
+
const normalizer = averageEntropy(hTrue, hPred, averageMethod);
|
|
1644
|
+
if (Math.abs(normalizer) < 1e-15) return 1;
|
|
1645
|
+
const denom = normalizer - emi;
|
|
1646
|
+
if (Math.abs(denom) < 1e-15) return 0;
|
|
1647
|
+
const ami = (mi - emi) / denom;
|
|
1648
|
+
if (!Number.isFinite(ami)) return 0;
|
|
1649
|
+
if (ami > 1) return 1;
|
|
1650
|
+
if (ami < -1) return -1;
|
|
1651
|
+
return ami;
|
|
1652
|
+
}
|
|
1653
|
+
function normalizedMutualInfoScore(labelsTrue, labelsPred, averageMethod = "arithmetic") {
|
|
1654
|
+
const stats = buildContingencyStats(labelsTrue, labelsPred);
|
|
1655
|
+
const { n, trueCount, predCount } = stats;
|
|
1656
|
+
const mi = mutualInformationFromContingency(stats);
|
|
1657
|
+
const ht = entropyFromCountArray(trueCount, n);
|
|
1658
|
+
const hp = entropyFromCountArray(predCount, n);
|
|
1659
|
+
if (ht === 0 || hp === 0) {
|
|
1660
|
+
return ht === 0 && hp === 0 ? 1 : 0;
|
|
1661
|
+
}
|
|
1662
|
+
const normalizer = averageEntropy(ht, hp, averageMethod);
|
|
1663
|
+
if (normalizer === 0) return 0;
|
|
1664
|
+
const nmi = mi / normalizer;
|
|
1665
|
+
if (nmi > 1) return 1;
|
|
1666
|
+
if (nmi < 0) return 0;
|
|
1667
|
+
return nmi;
|
|
1668
|
+
}
|
|
1669
|
+
function fowlkesMallowsScore(labelsTrue, labelsPred) {
|
|
1670
|
+
const stats = buildContingencyStats(labelsTrue, labelsPred);
|
|
1671
|
+
const { contingencyDense, contingencySparse, trueCount, predCount, n } = stats;
|
|
1672
|
+
if (n === 0) return 1;
|
|
1673
|
+
let tk = 0;
|
|
1674
|
+
if (contingencyDense) {
|
|
1675
|
+
for (let idx = 0; idx < contingencyDense.length; idx++) {
|
|
1676
|
+
const nij = readIndex(contingencyDense, idx, "contingencyDense");
|
|
1677
|
+
if (nij > 0) tk += comb2(nij);
|
|
1678
|
+
}
|
|
1679
|
+
} else if (contingencySparse) {
|
|
1680
|
+
for (const nij of contingencySparse.values()) {
|
|
1681
|
+
tk += comb2(nij);
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
let pk = 0;
|
|
1685
|
+
for (let i = 0; i < trueCount.length; i++) pk += comb2(readIndex(trueCount, i, "trueCount"));
|
|
1686
|
+
let qk = 0;
|
|
1687
|
+
for (let j = 0; j < predCount.length; j++) qk += comb2(readIndex(predCount, j, "predCount"));
|
|
1688
|
+
if (pk === 0 || qk === 0) return 0;
|
|
1689
|
+
return tk / Math.sqrt(pk * qk);
|
|
1690
|
+
}
|
|
1691
|
+
function homogeneityScore(labelsTrue, labelsPred) {
|
|
1692
|
+
const stats = buildContingencyStats(labelsTrue, labelsPred);
|
|
1693
|
+
const { contingencyDense, contingencySparse, predCount, trueCount, nPred, n } = stats;
|
|
1694
|
+
if (n === 0) return 1;
|
|
1695
|
+
let hck = 0;
|
|
1696
|
+
if (contingencyDense) {
|
|
1697
|
+
for (let idx = 0; idx < contingencyDense.length; idx++) {
|
|
1698
|
+
const nij = readIndex(contingencyDense, idx, "contingencyDense");
|
|
1699
|
+
if (nij <= 0) continue;
|
|
1700
|
+
const p = idx - Math.floor(idx / nPred) * nPred;
|
|
1701
|
+
const nj = readIndex(predCount, p, "predCount");
|
|
1702
|
+
if (nj > 0) hck -= nij / n * Math.log(nij / nj);
|
|
1703
|
+
}
|
|
1704
|
+
} else if (contingencySparse) {
|
|
1705
|
+
for (const [key, nij] of contingencySparse) {
|
|
1706
|
+
if (nij <= 0) continue;
|
|
1707
|
+
const p = key - Math.floor(key / nPred) * nPred;
|
|
1708
|
+
const nj = readIndex(predCount, p, "predCount");
|
|
1709
|
+
if (nj > 0) hck -= nij / n * Math.log(nij / nj);
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
const hc = entropyFromCountArray(trueCount, n);
|
|
1713
|
+
return hc === 0 ? 1 : 1 - hck / hc;
|
|
1714
|
+
}
|
|
1715
|
+
function completenessScore(labelsTrue, labelsPred) {
|
|
1716
|
+
const stats = buildContingencyStats(labelsTrue, labelsPred);
|
|
1717
|
+
const { contingencyDense, contingencySparse, trueCount, predCount, nPred, n } = stats;
|
|
1718
|
+
if (n === 0) return 1;
|
|
1719
|
+
let hkc = 0;
|
|
1720
|
+
if (contingencyDense) {
|
|
1721
|
+
for (let idx = 0; idx < contingencyDense.length; idx++) {
|
|
1722
|
+
const nij = readIndex(contingencyDense, idx, "contingencyDense");
|
|
1723
|
+
if (nij <= 0) continue;
|
|
1724
|
+
const t = Math.floor(idx / nPred);
|
|
1725
|
+
const ni = readIndex(trueCount, t, "trueCount");
|
|
1726
|
+
if (ni > 0) hkc -= nij / n * Math.log(nij / ni);
|
|
1727
|
+
}
|
|
1728
|
+
} else if (contingencySparse) {
|
|
1729
|
+
for (const [key, nij] of contingencySparse) {
|
|
1730
|
+
if (nij <= 0) continue;
|
|
1731
|
+
const t = Math.floor(key / nPred);
|
|
1732
|
+
const ni = readIndex(trueCount, t, "trueCount");
|
|
1733
|
+
if (ni > 0) hkc -= nij / n * Math.log(nij / ni);
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
const hk = entropyFromCountArray(predCount, n);
|
|
1737
|
+
return hk === 0 ? 1 : 1 - hkc / hk;
|
|
1738
|
+
}
|
|
1739
|
+
function vMeasureScore(labelsTrue, labelsPred, beta = 1) {
|
|
1740
|
+
if (!Number.isFinite(beta) || beta <= 0) {
|
|
1741
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("beta must be a positive finite number", "beta", beta);
|
|
1742
|
+
}
|
|
1743
|
+
const h = homogeneityScore(labelsTrue, labelsPred);
|
|
1744
|
+
const c = completenessScore(labelsTrue, labelsPred);
|
|
1745
|
+
if (h + c === 0) return 0;
|
|
1746
|
+
return (1 + beta) * h * c / (beta * h + c);
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
// src/metrics/regression.ts
|
|
1750
|
+
function getNumericRegressionData(t, name) {
|
|
1751
|
+
if (t.dtype === "string") {
|
|
1752
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric tensors`);
|
|
1753
|
+
}
|
|
1754
|
+
if (t.dtype === "int64") {
|
|
1755
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric tensors (int64 not supported)`);
|
|
1756
|
+
}
|
|
1757
|
+
const data = t.data;
|
|
1758
|
+
if (!chunkJSCDE774_cjs.isTypedArray(data) || !chunkJSCDE774_cjs.isNumericTypedArray(data)) {
|
|
1759
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric tensors`);
|
|
1760
|
+
}
|
|
1761
|
+
return data;
|
|
1762
|
+
}
|
|
1763
|
+
function readNumeric(data, offsetter, index, name) {
|
|
1764
|
+
const value = chunkJSCDE774_cjs.getNumericElement(data, offsetter(index));
|
|
1765
|
+
assertFiniteNumber(value, name, `index ${index}`);
|
|
1766
|
+
return value;
|
|
1767
|
+
}
|
|
1768
|
+
function mse(yTrue, yPred) {
|
|
1769
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1770
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1771
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1772
|
+
if (yTrue.size === 0) return 0;
|
|
1773
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1774
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1775
|
+
let sumSquaredError = 0;
|
|
1776
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1777
|
+
const diff = readNumeric(yTrueData, trueOffset, i, "yTrue") - readNumeric(yPredData, predOffset, i, "yPred");
|
|
1778
|
+
sumSquaredError += diff * diff;
|
|
1779
|
+
}
|
|
1780
|
+
return sumSquaredError / yTrue.size;
|
|
1781
|
+
}
|
|
1782
|
+
function rmse(yTrue, yPred) {
|
|
1783
|
+
return Math.sqrt(mse(yTrue, yPred));
|
|
1784
|
+
}
|
|
1785
|
+
function mae(yTrue, yPred) {
|
|
1786
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1787
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1788
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1789
|
+
if (yTrue.size === 0) return 0;
|
|
1790
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1791
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1792
|
+
let sumAbsError = 0;
|
|
1793
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1794
|
+
const diff = readNumeric(yTrueData, trueOffset, i, "yTrue") - readNumeric(yPredData, predOffset, i, "yPred");
|
|
1795
|
+
sumAbsError += Math.abs(diff);
|
|
1796
|
+
}
|
|
1797
|
+
return sumAbsError / yTrue.size;
|
|
1798
|
+
}
|
|
1799
|
+
function r2Score(yTrue, yPred) {
|
|
1800
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1801
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1802
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1803
|
+
if (yTrue.size === 0) {
|
|
1804
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("r2Score requires at least one sample", "yTrue", yTrue.size);
|
|
1805
|
+
}
|
|
1806
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1807
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1808
|
+
let sumTrue = 0;
|
|
1809
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1810
|
+
sumTrue += readNumeric(yTrueData, trueOffset, i, "yTrue");
|
|
1811
|
+
}
|
|
1812
|
+
const mean = sumTrue / yTrue.size;
|
|
1813
|
+
let ssRes = 0;
|
|
1814
|
+
let ssTot = 0;
|
|
1815
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1816
|
+
const trueVal = readNumeric(yTrueData, trueOffset, i, "yTrue");
|
|
1817
|
+
const predVal = readNumeric(yPredData, predOffset, i, "yPred");
|
|
1818
|
+
ssRes += (trueVal - predVal) ** 2;
|
|
1819
|
+
ssTot += (trueVal - mean) ** 2;
|
|
1820
|
+
}
|
|
1821
|
+
if (ssTot === 0) {
|
|
1822
|
+
return ssRes === 0 ? 1 : 0;
|
|
1823
|
+
}
|
|
1824
|
+
return 1 - ssRes / ssTot;
|
|
1825
|
+
}
|
|
1826
|
+
function adjustedR2Score(yTrue, yPred, nFeatures) {
|
|
1827
|
+
if (!Number.isFinite(nFeatures) || !Number.isInteger(nFeatures) || nFeatures < 0) {
|
|
1828
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1829
|
+
"nFeatures must be a non-negative integer",
|
|
1830
|
+
"nFeatures",
|
|
1831
|
+
nFeatures
|
|
1832
|
+
);
|
|
1833
|
+
}
|
|
1834
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1835
|
+
const n = yTrue.size;
|
|
1836
|
+
const p = nFeatures;
|
|
1837
|
+
if (n <= p + 1) {
|
|
1838
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1839
|
+
`Adjusted R\xB2 requires n > p + 1 (samples > features + 1). Got n=${n}, p=${p}`,
|
|
1840
|
+
"nFeatures",
|
|
1841
|
+
nFeatures
|
|
1842
|
+
);
|
|
1843
|
+
}
|
|
1844
|
+
const r2 = r2Score(yTrue, yPred);
|
|
1845
|
+
return 1 - (1 - r2) * (n - 1) / (n - p - 1);
|
|
1846
|
+
}
|
|
1847
|
+
function mape(yTrue, yPred) {
|
|
1848
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1849
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1850
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1851
|
+
if (yTrue.size === 0) return 0;
|
|
1852
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1853
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1854
|
+
let sumPercentError = 0;
|
|
1855
|
+
let nonZeroCount = 0;
|
|
1856
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1857
|
+
const trueVal = readNumeric(yTrueData, trueOffset, i, "yTrue");
|
|
1858
|
+
const predVal = readNumeric(yPredData, predOffset, i, "yPred");
|
|
1859
|
+
if (trueVal !== 0) {
|
|
1860
|
+
sumPercentError += Math.abs((trueVal - predVal) / trueVal);
|
|
1861
|
+
nonZeroCount++;
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
if (nonZeroCount === 0) {
|
|
1865
|
+
return 0;
|
|
1866
|
+
}
|
|
1867
|
+
return sumPercentError / nonZeroCount * 100;
|
|
1868
|
+
}
|
|
1869
|
+
function medianAbsoluteError(yTrue, yPred) {
|
|
1870
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1871
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1872
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1873
|
+
if (yTrue.size === 0) return 0;
|
|
1874
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1875
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1876
|
+
const errors = [];
|
|
1877
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1878
|
+
const diff = Math.abs(
|
|
1879
|
+
readNumeric(yTrueData, trueOffset, i, "yTrue") - readNumeric(yPredData, predOffset, i, "yPred")
|
|
1880
|
+
);
|
|
1881
|
+
errors.push(diff);
|
|
1882
|
+
}
|
|
1883
|
+
errors.sort((a, b) => a - b);
|
|
1884
|
+
const mid = Math.floor(errors.length / 2);
|
|
1885
|
+
return errors.length % 2 !== 0 ? errors[mid] ?? 0 : ((errors[mid - 1] ?? 0) + (errors[mid] ?? 0)) / 2;
|
|
1886
|
+
}
|
|
1887
|
+
function maxError(yTrue, yPred) {
|
|
1888
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1889
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1890
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1891
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1892
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1893
|
+
let maxErr = 0;
|
|
1894
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1895
|
+
const diff = Math.abs(
|
|
1896
|
+
readNumeric(yTrueData, trueOffset, i, "yTrue") - readNumeric(yPredData, predOffset, i, "yPred")
|
|
1897
|
+
);
|
|
1898
|
+
maxErr = Math.max(maxErr, diff);
|
|
1899
|
+
}
|
|
1900
|
+
return maxErr;
|
|
1901
|
+
}
|
|
1902
|
+
function explainedVarianceScore(yTrue, yPred) {
|
|
1903
|
+
assertSameSizeVectors(yTrue, yPred, "yTrue", "yPred");
|
|
1904
|
+
const yTrueData = getNumericRegressionData(yTrue, "yTrue");
|
|
1905
|
+
const yPredData = getNumericRegressionData(yPred, "yPred");
|
|
1906
|
+
if (yTrue.size === 0) {
|
|
1907
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1908
|
+
"explainedVarianceScore requires at least one sample",
|
|
1909
|
+
"yTrue",
|
|
1910
|
+
yTrue.size
|
|
1911
|
+
);
|
|
1912
|
+
}
|
|
1913
|
+
const trueOffset = createFlatOffsetter(yTrue);
|
|
1914
|
+
const predOffset = createFlatOffsetter(yPred);
|
|
1915
|
+
let sumTrue = 0;
|
|
1916
|
+
let sumResidual = 0;
|
|
1917
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1918
|
+
const trueVal = readNumeric(yTrueData, trueOffset, i, "yTrue");
|
|
1919
|
+
const predVal = readNumeric(yPredData, predOffset, i, "yPred");
|
|
1920
|
+
sumTrue += trueVal;
|
|
1921
|
+
sumResidual += trueVal - predVal;
|
|
1922
|
+
}
|
|
1923
|
+
const meanTrue = sumTrue / yTrue.size;
|
|
1924
|
+
const meanResidual = sumResidual / yTrue.size;
|
|
1925
|
+
let varResidual = 0;
|
|
1926
|
+
let varTrue = 0;
|
|
1927
|
+
for (let i = 0; i < yTrue.size; i++) {
|
|
1928
|
+
const trueVal = readNumeric(yTrueData, trueOffset, i, "yTrue");
|
|
1929
|
+
const predVal = readNumeric(yPredData, predOffset, i, "yPred");
|
|
1930
|
+
const residual = trueVal - predVal;
|
|
1931
|
+
varResidual += (residual - meanResidual) ** 2;
|
|
1932
|
+
varTrue += (trueVal - meanTrue) ** 2;
|
|
1933
|
+
}
|
|
1934
|
+
if (varTrue === 0) {
|
|
1935
|
+
return varResidual === 0 ? 1 : 0;
|
|
1936
|
+
}
|
|
1937
|
+
return 1 - varResidual / varTrue;
|
|
1938
|
+
}
|
|
1939
|
+
|
|
1940
|
+
exports.accuracy = accuracy;
|
|
1941
|
+
exports.adjustedMutualInfoScore = adjustedMutualInfoScore;
|
|
1942
|
+
exports.adjustedR2Score = adjustedR2Score;
|
|
1943
|
+
exports.adjustedRandScore = adjustedRandScore;
|
|
1944
|
+
exports.averagePrecisionScore = averagePrecisionScore;
|
|
1945
|
+
exports.balancedAccuracyScore = balancedAccuracyScore;
|
|
1946
|
+
exports.calinskiHarabaszScore = calinskiHarabaszScore;
|
|
1947
|
+
exports.classificationReport = classificationReport;
|
|
1948
|
+
exports.cohenKappaScore = cohenKappaScore;
|
|
1949
|
+
exports.completenessScore = completenessScore;
|
|
1950
|
+
exports.confusionMatrix = confusionMatrix;
|
|
1951
|
+
exports.daviesBouldinScore = daviesBouldinScore;
|
|
1952
|
+
exports.explainedVarianceScore = explainedVarianceScore;
|
|
1953
|
+
exports.f1Score = f1Score;
|
|
1954
|
+
exports.fbetaScore = fbetaScore;
|
|
1955
|
+
exports.fowlkesMallowsScore = fowlkesMallowsScore;
|
|
1956
|
+
exports.hammingLoss = hammingLoss;
|
|
1957
|
+
exports.homogeneityScore = homogeneityScore;
|
|
1958
|
+
exports.jaccardScore = jaccardScore;
|
|
1959
|
+
exports.logLoss = logLoss;
|
|
1960
|
+
exports.mae = mae;
|
|
1961
|
+
exports.mape = mape;
|
|
1962
|
+
exports.matthewsCorrcoef = matthewsCorrcoef;
|
|
1963
|
+
exports.maxError = maxError;
|
|
1964
|
+
exports.medianAbsoluteError = medianAbsoluteError;
|
|
1965
|
+
exports.metrics_exports = metrics_exports;
|
|
1966
|
+
exports.mse = mse;
|
|
1967
|
+
exports.normalizedMutualInfoScore = normalizedMutualInfoScore;
|
|
1968
|
+
exports.precision = precision;
|
|
1969
|
+
exports.precisionRecallCurve = precisionRecallCurve;
|
|
1970
|
+
exports.r2Score = r2Score;
|
|
1971
|
+
exports.recall = recall;
|
|
1972
|
+
exports.rmse = rmse;
|
|
1973
|
+
exports.rocAucScore = rocAucScore;
|
|
1974
|
+
exports.rocCurve = rocCurve;
|
|
1975
|
+
exports.silhouetteSamples = silhouetteSamples;
|
|
1976
|
+
exports.silhouetteScore = silhouetteScore;
|
|
1977
|
+
exports.vMeasureScore = vMeasureScore;
|
|
1978
|
+
//# sourceMappingURL=chunk-ZB75FESB.cjs.map
|
|
1979
|
+
//# sourceMappingURL=chunk-ZB75FESB.cjs.map
|