@svrnsec/pulse 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +883 -622
  3. package/SECURITY.md +86 -86
  4. package/bin/svrnsec-pulse.js +7 -7
  5. package/dist/{pulse.cjs.js → pulse.cjs} +6379 -6420
  6. package/dist/pulse.cjs.map +1 -0
  7. package/dist/pulse.esm.js +6380 -6421
  8. package/dist/pulse.esm.js.map +1 -1
  9. package/index.d.ts +895 -846
  10. package/package.json +185 -165
  11. package/pkg/pulse_core.js +174 -173
  12. package/src/analysis/audio.js +213 -213
  13. package/src/analysis/authenticityAudit.js +408 -390
  14. package/src/analysis/coherence.js +502 -502
  15. package/src/analysis/coordinatedBehavior.js +825 -0
  16. package/src/analysis/heuristic.js +428 -428
  17. package/src/analysis/jitter.js +446 -446
  18. package/src/analysis/llm.js +473 -472
  19. package/src/analysis/populationEntropy.js +404 -403
  20. package/src/analysis/provider.js +248 -248
  21. package/src/analysis/refraction.js +392 -0
  22. package/src/analysis/trustScore.js +356 -356
  23. package/src/cli/args.js +36 -36
  24. package/src/cli/commands/scan.js +192 -192
  25. package/src/cli/runner.js +157 -157
  26. package/src/collector/adaptive.js +200 -200
  27. package/src/collector/bio.js +297 -287
  28. package/src/collector/canvas.js +247 -239
  29. package/src/collector/dram.js +203 -203
  30. package/src/collector/enf.js +311 -311
  31. package/src/collector/entropy.js +195 -195
  32. package/src/collector/gpu.js +248 -245
  33. package/src/collector/idleAttestation.js +480 -480
  34. package/src/collector/sabTimer.js +189 -191
  35. package/src/fingerprint.js +475 -475
  36. package/src/index.js +342 -342
  37. package/src/integrations/react-native.js +462 -459
  38. package/src/integrations/react.js +184 -185
  39. package/src/middleware/express.js +155 -155
  40. package/src/middleware/next.js +174 -175
  41. package/src/proof/challenge.js +249 -249
  42. package/src/proof/engagementToken.js +426 -394
  43. package/src/proof/fingerprint.js +268 -268
  44. package/src/proof/validator.js +83 -143
  45. package/src/registry/serializer.js +349 -349
  46. package/src/terminal.js +263 -263
  47. package/src/update-notifier.js +259 -264
  48. package/dist/pulse.cjs.js.map +0 -1
@@ -1,390 +1,408 @@
1
- /**
2
- * @svrnsec/pulse — Authenticity Audit
3
- *
4
- * Produces a statistically rigorous, physics-backed estimate of what fraction
5
- * of a user cohort are real humans on real hardware.
6
- *
7
- * This is the "$44 billion question" — the number Twitter and Elon argued
8
- * about for months with no physics-layer evidence on either side. Browser
9
- * fingerprinting can be spoofed. Declared metrics can be gamed. The thermal
10
- * state of a real device at 2 AM cannot.
11
- *
12
- * Method
13
- * ──────
14
- * 1. Cluster tokens by hardware signature
15
- * ENF deviation bucket (±0.025 Hz → localizes to substation/building)
16
- * × DRAM verdict (dram | virtual | ambiguous)
17
- * × Thermal label (hot_to_cold | sustained_hot | step_function …)
18
- * × 10-minute time bucket
19
- * Authentic users scatter across all dimensions.
20
- * A farm in one building, running the same script, on the same hardware
21
- * generation collapses into one tight cluster.
22
- *
23
- * 2. Score each cluster with Population Entropy (5 statistical tests).
24
- * Clusters with sybilScore > FARM_THRESHOLD are classified as bot farms.
25
- *
26
- * 3. Bootstrap a 95% confidence interval on the human-rate estimate.
27
- * Each resample draws tokens with replacement and re-runs classification.
28
- *
29
- * 4. Fingerprint each bot cluster for cross-window tracking.
30
- * Same ENF deviation + thermal pattern reappearing next hour = same farm.
31
- *
32
- * Output
33
- * ──────
34
- * estimatedHumanPct The headline number. Treat anything below 90% as
35
- * a platform health emergency.
36
- *
37
- * confidenceInterval [lo, hi] at the requested confidence level.
38
- * Narrow CI = large cohort + clear signal.
39
- * Wide CI = small cohort or mixed evidence.
40
- *
41
- * botClusters Per-farm breakdown: size, sybilScore, ENF location,
42
- * thermal pattern, dominant attack signal.
43
- *
44
- * grade CLEAN / LOW_FRAUD / MODERATE_FRAUD / HIGH_FRAUD
45
- *
46
- * Typical values
47
- * ──────────────
48
- * Organic product feed, 10k tokens over 1 hour → humanPct ≈ 92–97%
49
- * Incentivised engagement campaign → humanPct ≈ 55–75%
50
- * Coordinated click farm attack → humanPct ≈ 8–35%
51
- */
52
-
53
- import { analysePopulation } from './populationEntropy.js';
54
-
55
- // ── Thresholds ─────────────────────────────────────────────────────────────────
56
-
57
- /** Clusters scoring above this are classified as bot farms. */
58
- const FARM_THRESHOLD = 65;
59
-
60
- /** Minimum tokens in a cluster before we run population analysis on it.
61
- * Smaller clusters are treated as noise and counted as authentic. */
62
- const MIN_CLUSTER_SIZE = 5;
63
-
64
- /** ENF deviation bucket width in Hz. ±0.025 Hz localizes devices to the same
65
- * substation — close enough to imply the same building. */
66
- const ENF_BUCKET_HZ = 0.05;
67
-
68
- /** Time bucket width. 10-minute buckets catch batch-dispatch patterns
69
- * without splitting a legitimate organic traffic surge. */
70
- const TIME_BUCKET_MS = 10 * 60 * 1000;
71
-
72
- /** Bootstrap iterations for confidence interval estimation. */
73
- const BOOTSTRAP_ITERATIONS = 500;
74
-
75
- // ── Grade thresholds ──────────────────────────────────────────────────────────
76
-
77
- const GRADES = [
78
- { min: 90, grade: 'CLEAN', label: 'Authentic cohort', color: 'bgreen' },
79
- { min: 75, grade: 'LOW_FRAUD', label: 'Elevated fraud signal', color: 'byellow' },
80
- { min: 50, grade: 'MODERATE_FRAUD', label: 'Significant bot presence', color: 'byellow' },
81
- { min: 0, grade: 'HIGH_FRAUD', label: 'Platform health emergency', color: 'bred' },
82
- ];
83
-
84
- // ── authenticityAudit ─────────────────────────────────────────────────────────
85
-
86
- /**
87
- * Run a full authenticity audit on a cohort of decoded engagement tokens.
88
- *
89
- * @param {object[]} tokens Decoded engagement token objects
90
- * (from decodeToken / verifyEngagementToken)
91
- * @param {object} [opts]
92
- * @param {number} [opts.windowMs] Analysis window in ms (default: all tokens)
93
- * @param {number} [opts.minClusterSize] Min cluster size for farm analysis (default: 5)
94
- * @param {number} [opts.farmThreshold] sybilScore cutoff for farm classification (default: 65)
95
- * @param {number} [opts.confidenceLevel] Bootstrap CI level, e.g. 0.95 (default: 0.95)
96
- * @param {number} [opts.bootstrapIter] Bootstrap iterations (default: 500)
97
- * @returns {AuthenticityReport}
98
- */
99
- export function authenticityAudit(tokens, opts = {}) {
100
- const {
101
- minClusterSize = MIN_CLUSTER_SIZE,
102
- farmThreshold = FARM_THRESHOLD,
103
- confidenceLevel = 0.95,
104
- bootstrapIter = BOOTSTRAP_ITERATIONS,
105
- } = opts;
106
-
107
- if (!Array.isArray(tokens) || tokens.length === 0) {
108
- return _emptyReport();
109
- }
110
-
111
- // ── 1. Cluster ─────────────────────────────────────────────────────────────
112
- const clusterMap = _clusterTokens(tokens);
113
-
114
- // ── 2. Score each cluster ──────────────────────────────────────────────────
115
- const botClusterIds = new Set();
116
- const clusterResults = [];
117
-
118
- for (const [key, clusterTokens] of clusterMap) {
119
- if (clusterTokens.length < minClusterSize) continue;
120
-
121
- const pop = analysePopulation(clusterTokens);
122
- const isFarm = pop.sybilScore >= farmThreshold;
123
-
124
- const fingerprint = _fingerprint(key, clusterTokens, pop);
125
-
126
- clusterResults.push({
127
- id: fingerprint.id,
128
- size: clusterTokens.length,
129
- sybilScore: pop.sybilScore,
130
- authentic: !isFarm,
131
- signature: fingerprint.signature,
132
- topSignals: _topSignals(pop),
133
- flags: pop.flags,
134
- });
135
-
136
- if (isFarm) {
137
- for (const t of clusterTokens) botClusterIds.add(t);
138
- }
139
- }
140
-
141
- // ── 3. Count fraudulent tokens ─────────────────────────────────────────────
142
- // Tokens in clusters too small to analyse are given benefit of the doubt.
143
- const fraudCount = botClusterIds.size;
144
- const authenticCount = tokens.length - fraudCount;
145
- const rawHumanPct = (authenticCount / tokens.length) * 100;
146
-
147
- // ── 4. Bootstrap confidence interval ──────────────────────────────────────
148
- // We bootstrap the "is this token authentic?" binary labels.
149
- const labels = tokens.map(t => (botClusterIds.has(t) ? 0 : 1));
150
- const ci = _bootstrapCI(labels, confidenceLevel, bootstrapIter);
151
-
152
- // ── 5. Grade and summarise ─────────────────────────────────────────────────
153
- const gradeEntry = GRADES.find(g => rawHumanPct >= g.min) ?? GRADES[GRADES.length - 1];
154
- const botClusters = clusterResults.filter(c => !c.authentic)
155
- .sort((a, b) => b.sybilScore - a.sybilScore);
156
- const authClusters = clusterResults.filter(c => c.authentic);
157
-
158
- return {
159
- // ── Headline ──
160
- cohortSize: tokens.length,
161
- estimatedHumanPct: +rawHumanPct.toFixed(1),
162
- confidenceInterval: ci,
163
- confidenceLevel,
164
-
165
- // ── Cluster breakdown ──
166
- clusterCount: clusterResults.length,
167
- botClusterCount: botClusters.length,
168
- authenticClusterCount: authClusters.length,
169
-
170
- // ── Token counts ──
171
- authenticTokenCount: authenticCount,
172
- fraudulentTokenCount: fraudCount,
173
-
174
- // ── Farm detail ──
175
- botClusters,
176
-
177
- // ── Grade ──
178
- grade: gradeEntry.grade,
179
- label: gradeEntry.label,
180
- color: gradeEntry.color,
181
- recommendation: _recommendation(gradeEntry.grade, botClusters),
182
- };
183
- }
184
-
185
- // ── Clustering ────────────────────────────────────────────────────────────────
186
-
187
- /**
188
- * Bucket tokens into hardware-signature clusters.
189
- *
190
- * Cluster key = ENF deviation bucket × DRAM verdict × thermal label × time bucket
191
- *
192
- * This collapses bot farms (same building, same hardware, same script, same
193
- * time window) into single clusters while leaving organic traffic scattered.
194
- *
195
- * @param {object[]} tokens
196
- * @returns {Map<string, object[]>}
197
- */
198
- function _clusterTokens(tokens) {
199
- const map = new Map();
200
-
201
- for (const token of tokens) {
202
- const key = _clusterKey(token);
203
- if (!map.has(key)) map.set(key, []);
204
- map.get(key).push(token);
205
- }
206
-
207
- return map;
208
- }
209
-
210
- function _clusterKey(token) {
211
- const hw = token.hw ?? {};
212
- const idle = token.idle ?? {};
213
- const iat = token.iat ?? 0;
214
-
215
- // ENF deviation nearest bucket (null/undefined 'no_enf')
216
- const enfBucket = hw.enfDev != null
217
- ? `e${Math.round(hw.enfDev / ENF_BUCKET_HZ)}`
218
- : 'no_enf';
219
-
220
- // DRAM verdict string
221
- const dram = hw.dram ?? 'unknown';
222
-
223
- // Thermal transition label
224
- const therm = idle.therm ?? 'unknown';
225
-
226
- // 10-minute time bucket
227
- const tBucket = Math.floor(iat / TIME_BUCKET_MS);
228
-
229
- return `${enfBucket}:${dram}:${therm}:${tBucket}`;
230
- }
231
-
232
- // ── Bootstrap CI ──────────────────────────────────────────────────────────────
233
-
234
- /**
235
- * Non-parametric bootstrap confidence interval on the mean of a 0/1 vector.
236
- *
237
- * @param {number[]} values 0 (fraudulent) or 1 (authentic) per token
238
- * @param {number} level Confidence level, e.g. 0.95
239
- * @param {number} iters Bootstrap iterations
240
- * @returns {[number, number]} [lo, hi] as percentages (0–100)
241
- */
242
- function _bootstrapCI(values, level, iters) {
243
- const n = values.length;
244
- if (n === 0) return [0, 0];
245
-
246
- const means = new Float64Array(iters);
247
-
248
- for (let i = 0; i < iters; i++) {
249
- let sum = 0;
250
- for (let j = 0; j < n; j++) {
251
- sum += values[(Math.random() * n) | 0];
252
- }
253
- means[i] = (sum / n) * 100;
254
- }
255
-
256
- means.sort();
257
-
258
- const alpha = 1 - level;
259
- const lo = means[(alpha / 2 * iters) | 0];
260
- const hi = means[((1 - alpha / 2) * iters) | 0];
261
-
262
- return [+lo.toFixed(1), +hi.toFixed(1)];
263
- }
264
-
265
- // ── Cluster fingerprinting ────────────────────────────────────────────────────
266
-
267
- /**
268
- * Produce a stable fingerprint for a bot cluster so the same farm can be
269
- * recognised across multiple analysis windows.
270
- *
271
- * Fingerprint components that are stable across time:
272
- * - ENF deviation (tied to physical location / substation)
273
- * - DRAM verdict (tied to hardware generation)
274
- * - Thermal label (tied to operational pattern)
275
- *
276
- * @param {string} key
277
- * @param {object[]} tokens
278
- * @param {object} pop analysePopulation result
279
- * @returns {{ id: string, signature: object }}
280
- */
281
- function _fingerprint(key, tokens, pop) {
282
- const sample = tokens[0] ?? {};
283
- const hw = sample.hw ?? {};
284
- const idle = sample.idle ?? {};
285
-
286
- // Mean ENF deviation across cluster (stable for co-located devices)
287
- const enfDevs = tokens.map(t => t.hw?.enfDev).filter(v => v != null);
288
- const meanEnfDev = enfDevs.length
289
- ? +(enfDevs.reduce((s, v) => s + v, 0) / enfDevs.length).toFixed(4)
290
- : null;
291
-
292
- // Mean idle duration (reveals script-sleep cadence)
293
- const idleDurations = tokens.map(t => t.idle?.dMs).filter(v => v != null);
294
- const meanIdleMs = idleDurations.length
295
- ? Math.round(idleDurations.reduce((s, v) => s + v, 0) / idleDurations.length)
296
- : null;
297
-
298
- const signature = {
299
- enfRegion: hw.enf ?? 'unknown',
300
- dramVerdict: hw.dram ?? 'unknown',
301
- thermalLabel: idle.therm ?? 'unknown',
302
- meanEnfDev,
303
- meanIdleMs,
304
- };
305
-
306
- // Stable ID: hash-like hex derived from the signature (deterministic, not crypto)
307
- const sigStr = JSON.stringify(signature);
308
- const id = 'farm_' + _djb2(sigStr).toString(16).slice(0, 8);
309
-
310
- return { id, signature };
311
- }
312
-
313
- // ── Helpers ───────────────────────────────────────────────────────────────────
314
-
315
- function _topSignals(pop) {
316
- return Object.entries(pop.tests ?? {})
317
- .map(([name, result]) => ({ name, score: result.score ?? 0 }))
318
- .sort((a, b) => b.score - a.score)
319
- .slice(0, 2)
320
- .map(s => s.name);
321
- }
322
-
323
- function _recommendation(grade, botClusters) {
324
- if (grade === 'CLEAN') {
325
- return 'Cohort appears authentic. No action required.';
326
- }
327
- if (grade === 'LOW_FRAUD') {
328
- return `${botClusters.length} suspicious cluster(s) detected. Monitor and consider manual review.`;
329
- }
330
- if (grade === 'MODERATE_FRAUD') {
331
- return `${botClusters.length} bot farm cluster(s) identified. Block tokens from flagged clusters and investigate upstream traffic source.`;
332
- }
333
- return (
334
- `CRITICAL: ${botClusters.length} bot farm cluster(s) account for a majority of traffic. ` +
335
- `Suspend engagement credit for this cohort and audit the traffic acquisition channel.`
336
- );
337
- }
338
-
339
- function _emptyReport() {
340
- return {
341
- cohortSize: 0,
342
- estimatedHumanPct: null,
343
- confidenceInterval: null,
344
- confidenceLevel: 0.95,
345
- clusterCount: 0,
346
- botClusterCount: 0,
347
- authenticClusterCount: 0,
348
- authenticTokenCount: 0,
349
- fraudulentTokenCount: 0,
350
- botClusters: [],
351
- grade: 'CLEAN',
352
- label: 'No data',
353
- color: 'bgreen',
354
- recommendation: 'No tokens provided.',
355
- };
356
- }
357
-
358
- /**
359
- * DJB2 hash — non-cryptographic, deterministic, produces stable cluster IDs.
360
- * @param {string} str
361
- * @returns {number}
362
- */
363
- function _djb2(str) {
364
- let h = 5381;
365
- for (let i = 0; i < str.length; i++) {
366
- h = ((h << 5) + h) ^ str.charCodeAt(i);
367
- h = h >>> 0; // keep unsigned 32-bit
368
- }
369
- return h;
370
- }
371
-
372
- // ── JSDoc types ───────────────────────────────────────────────────────────────
373
-
374
- /**
375
- * @typedef {object} AuthenticityReport
376
- * @property {number} cohortSize Total tokens analysed
377
- * @property {number|null} estimatedHumanPct Estimated % of real humans (0–100)
378
- * @property {[number,number]|null} confidenceInterval [lo, hi] at confidenceLevel
379
- * @property {number} confidenceLevel Bootstrap CI level (e.g. 0.95)
380
- * @property {number} clusterCount Total hardware clusters identified
381
- * @property {number} botClusterCount Clusters classified as bot farms
382
- * @property {number} authenticClusterCount Clusters classified as authentic
383
- * @property {number} authenticTokenCount Tokens NOT in bot farm clusters
384
- * @property {number} fraudulentTokenCount Tokens IN bot farm clusters
385
- * @property {object[]} botClusters Per-farm breakdown (sorted by sybilScore desc)
386
- * @property {string} grade CLEAN|LOW_FRAUD|MODERATE_FRAUD|HIGH_FRAUD
387
- * @property {string} label Human-readable grade label
388
- * @property {string} color ANSI color hint for terminal rendering
389
- * @property {string} recommendation Actionable guidance string
390
- */
1
+ /**
2
+ * @svrnsec/pulse — Authenticity Audit
3
+ *
4
+ * Produces a statistically rigorous, physics-backed estimate of what fraction
5
+ * of a user cohort are real humans on real hardware.
6
+ *
7
+ * This is the "$44 billion question" — the number Twitter and Elon argued
8
+ * about for months with no physics-layer evidence on either side. Browser
9
+ * fingerprinting can be spoofed. Declared metrics can be gamed. The thermal
10
+ * state of a real device at 2 AM cannot.
11
+ *
12
+ * Method
13
+ * ──────
14
+ * 1. Cluster tokens by hardware signature
15
+ * ENF deviation bucket (±0.025 Hz → localizes to substation/building)
16
+ * × DRAM verdict (dram | virtual | ambiguous)
17
+ * × Thermal label (hot_to_cold | sustained_hot | step_function …)
18
+ * × 10-minute time bucket
19
+ * Authentic users scatter across all dimensions.
20
+ * A farm in one building, running the same script, on the same hardware
21
+ * generation collapses into one tight cluster.
22
+ *
23
+ * 2. Score each cluster with Population Entropy (5 statistical tests).
24
+ * Clusters with sybilScore > FARM_THRESHOLD are classified as bot farms.
25
+ *
26
+ * 3. Bootstrap a 95% confidence interval on the human-rate estimate.
27
+ * Each resample draws tokens with replacement and re-runs classification.
28
+ *
29
+ * 4. Fingerprint each bot cluster for cross-window tracking.
30
+ * Same ENF deviation + thermal pattern reappearing next hour = same farm.
31
+ *
32
+ * Output
33
+ * ──────
34
+ * estimatedHumanPct The headline number. Treat anything below 90% as
35
+ * a platform health emergency.
36
+ *
37
+ * confidenceInterval [lo, hi] at the requested confidence level.
38
+ * Narrow CI = large cohort + clear signal.
39
+ * Wide CI = small cohort or mixed evidence.
40
+ *
41
+ * botClusters Per-farm breakdown: size, sybilScore, ENF location,
42
+ * thermal pattern, dominant attack signal.
43
+ *
44
+ * grade CLEAN / LOW_FRAUD / MODERATE_FRAUD / HIGH_FRAUD
45
+ *
46
+ * Typical values
47
+ * ──────────────
48
+ * Organic product feed, 10k tokens over 1 hour → humanPct ≈ 92–97%
49
+ * Incentivised engagement campaign → humanPct ≈ 55–75%
50
+ * Coordinated click farm attack → humanPct ≈ 8–35%
51
+ */
52
+
53
+ import { analysePopulation } from './populationEntropy.js';
54
+
55
+ // ── Thresholds ─────────────────────────────────────────────────────────────────
56
+
57
+ /** Clusters scoring above this are classified as bot farms. */
58
+ const FARM_THRESHOLD = 65;
59
+
60
+ /** Minimum tokens in a cluster before we run population analysis on it.
61
+ * Smaller clusters are treated as noise and counted as authentic. */
62
+ const MIN_CLUSTER_SIZE = 5;
63
+
64
+ /** ENF deviation bucket width in Hz. ±0.025 Hz localizes devices to the same
65
+ * substation — close enough to imply the same building. */
66
+ const ENF_BUCKET_HZ = 0.05;
67
+
68
+ /** Time bucket width. 10-minute buckets catch batch-dispatch patterns
69
+ * without splitting a legitimate organic traffic surge. */
70
+ const TIME_BUCKET_MS = 10 * 60 * 1000;
71
+
72
+ /** Bootstrap iterations for confidence interval estimation. */
73
+ const BOOTSTRAP_ITERATIONS = 500;
74
+
75
+ // ── Grade thresholds ──────────────────────────────────────────────────────────
76
+
77
+ const GRADES = [
78
+ { min: 90, grade: 'CLEAN', label: 'Authentic cohort', color: 'bgreen' },
79
+ { min: 75, grade: 'LOW_FRAUD', label: 'Elevated fraud signal', color: 'byellow' },
80
+ { min: 50, grade: 'MODERATE_FRAUD', label: 'Significant bot presence', color: 'byellow' },
81
+ { min: 0, grade: 'HIGH_FRAUD', label: 'Platform health emergency', color: 'bred' },
82
+ ];
83
+
84
+ // ── authenticityAudit ─────────────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Run a full authenticity audit on a cohort of decoded engagement tokens.
88
+ *
89
+ * @param {object[]} tokens Decoded engagement token objects
90
+ * (from decodeToken / verifyEngagementToken)
91
+ * @param {object} [opts]
92
+ * @param {number} [opts.windowMs] Analysis window in ms (default: all tokens)
93
+ * @param {number} [opts.minClusterSize] Min cluster size for farm analysis (default: 5)
94
+ * @param {number} [opts.farmThreshold] sybilScore cutoff for farm classification (default: 65)
95
+ * @param {number} [opts.confidenceLevel] Bootstrap CI level, e.g. 0.95 (default: 0.95)
96
+ * @param {number} [opts.bootstrapIter] Bootstrap iterations (default: 500)
97
+ * @returns {AuthenticityReport}
98
+ */
99
+ export function authenticityAudit(tokens, opts = {}) {
100
+ const {
101
+ minClusterSize = MIN_CLUSTER_SIZE,
102
+ farmThreshold = FARM_THRESHOLD,
103
+ confidenceLevel = 0.95,
104
+ bootstrapIter = BOOTSTRAP_ITERATIONS,
105
+ } = opts;
106
+
107
+ if (!Array.isArray(tokens) || tokens.length === 0) {
108
+ return _emptyReport();
109
+ }
110
+
111
+ // ── 1. Cluster ─────────────────────────────────────────────────────────────
112
+ const clusterMap = _clusterTokens(tokens);
113
+
114
+ // ── 2. Score each cluster ──────────────────────────────────────────────────
115
+ const botClusterIds = new Set();
116
+ const clusterResults = [];
117
+
118
+ for (const [key, clusterTokens] of clusterMap) {
119
+ if (clusterTokens.length < minClusterSize) continue;
120
+
121
+ const pop = analysePopulation(clusterTokens);
122
+ const isFarm = pop.sybilScore >= farmThreshold;
123
+
124
+ const fingerprint = _fingerprint(key, clusterTokens, pop);
125
+
126
+ clusterResults.push({
127
+ id: fingerprint.id,
128
+ size: clusterTokens.length,
129
+ sybilScore: pop.sybilScore,
130
+ authentic: !isFarm,
131
+ signature: fingerprint.signature,
132
+ topSignals: _topSignals(pop),
133
+ flags: pop.flags,
134
+ });
135
+
136
+ if (isFarm) {
137
+ for (const t of clusterTokens) botClusterIds.add(t);
138
+ }
139
+ }
140
+
141
+ // ── 3. Count fraudulent tokens ─────────────────────────────────────────────
142
+ // Tokens in clusters too small to analyse are given benefit of the doubt.
143
+ const fraudCount = botClusterIds.size;
144
+ const authenticCount = tokens.length - fraudCount;
145
+ const rawHumanPct = (authenticCount / tokens.length) * 100;
146
+
147
+ // ── 4. Bootstrap confidence interval ──────────────────────────────────────
148
+ // We bootstrap the "is this token authentic?" binary labels.
149
+ const labels = tokens.map(t => (botClusterIds.has(t) ? 0 : 1));
150
+ const ci = _bootstrapCI(labels, confidenceLevel, bootstrapIter);
151
+
152
+ // ── 5. Grade and summarise ─────────────────────────────────────────────────
153
+ const gradeEntry = GRADES.find(g => rawHumanPct >= g.min) ?? GRADES[GRADES.length - 1];
154
+ const botClusters = clusterResults.filter(c => !c.authentic)
155
+ .sort((a, b) => b.sybilScore - a.sybilScore);
156
+ const authClusters = clusterResults.filter(c => c.authentic);
157
+
158
+ return {
159
+ // ── Headline ──
160
+ cohortSize: tokens.length,
161
+ estimatedHumanPct: +rawHumanPct.toFixed(1),
162
+ confidenceInterval: ci,
163
+ confidenceLevel,
164
+
165
+ // ── Cluster breakdown ──
166
+ clusterCount: clusterResults.length,
167
+ botClusterCount: botClusters.length,
168
+ authenticClusterCount: authClusters.length,
169
+
170
+ // ── Token counts ──
171
+ authenticTokenCount: authenticCount,
172
+ fraudulentTokenCount: fraudCount,
173
+
174
+ // ── Farm detail ──
175
+ botClusters,
176
+
177
+ // ── Grade ──
178
+ grade: gradeEntry.grade,
179
+ label: gradeEntry.label,
180
+ color: gradeEntry.color,
181
+ recommendation: _recommendation(gradeEntry.grade, botClusters),
182
+ };
183
+ }
184
+
185
+ // ── Clustering ────────────────────────────────────────────────────────────────
186
+
187
+ /**
188
+ * Bucket tokens into hardware-signature clusters.
189
+ *
190
+ * Cluster key = ENF deviation bucket × DRAM verdict × thermal label × time bucket
191
+ *
192
+ * This collapses bot farms (same building, same hardware, same script, same
193
+ * time window) into single clusters while leaving organic traffic scattered.
194
+ *
195
+ * @param {object[]} tokens
196
+ * @returns {Map<string, object[]>}
197
+ */
198
+ function _clusterTokens(tokens) {
199
+ const map = new Map();
200
+
201
+ for (const token of tokens) {
202
+ const key = _clusterKey(token);
203
+ if (!map.has(key)) map.set(key, []);
204
+ map.get(key).push(token);
205
+ }
206
+
207
+ return map;
208
+ }
209
+
210
+ function _clusterKey(token) {
211
+ const hw = token.hw ?? {};
212
+ const iat = token.iat ?? 0;
213
+
214
+ // ENF deviation → nearest bucket (null/undefined → 'no_enf')
215
+ // ±0.025 Hz resolution localizes devices to the same building/substation.
216
+ const enfBucket = hw.enfDev != null
217
+ ? `e${Math.round(hw.enfDev / ENF_BUCKET_HZ)}`
218
+ : 'no_enf';
219
+
220
+ // DRAM verdict string — proxy for hardware generation
221
+ const dram = hw.dram ?? 'unknown';
222
+
223
+ // 10-minute time bucket — captures batch dispatch without splitting organic traffic
224
+ const tBucket = Math.floor(iat / TIME_BUCKET_MS);
225
+
226
+ // Note: thermal label is intentionally NOT part of the key.
227
+ // Clustering by thermal label would make testThermalDiversity a tautology
228
+ // (every cluster would have zero diversity by construction).
229
+ // Thermal diversity is left as a within-cluster discriminator — farms that
230
+ // co-locate in the same ENF + DRAM + time bucket will still show sustained_hot
231
+ // homogeneity; organic users in the same bucket will show hot_to_cold / cooling mix.
232
+ return `${enfBucket}:${dram}:${tBucket}`;
233
+ }
234
+
235
+ // ── Bootstrap CI ──────────────────────────────────────────────────────────────
236
+
237
+ /**
238
+ * Splitmix32 deterministic PRNG seeded from input data.
239
+ * Replaces Math.random() for reproducible bootstrap results.
240
+ */
241
+ function _splitmix32(seed) {
242
+ return function() {
243
+ seed |= 0; seed = seed + 0x9e3779b9 | 0;
244
+ let t = seed ^ seed >>> 16; t = Math.imul(t, 0x21f0aaad);
245
+ t = t ^ t >>> 15; t = Math.imul(t, 0x735a2d97);
246
+ return ((t = t ^ t >>> 15) >>> 0) / 4294967296;
247
+ };
248
+ }
249
+
250
+ /**
251
+ * Non-parametric bootstrap confidence interval on the mean of a 0/1 vector.
252
+ *
253
+ * @param {number[]} values 0 (fraudulent) or 1 (authentic) per token
254
+ * @param {number} level Confidence level, e.g. 0.95
255
+ * @param {number} iters Bootstrap iterations
256
+ * @returns {[number, number]} [lo, hi] as percentages (0–100)
257
+ */
258
+ function _bootstrapCI(values, level, iters) {
259
+ const n = values.length;
260
+ if (n === 0) return [0, 0];
261
+
262
+ const seed = values.reduce((s, v) => (s * 31 + (v * 1000 | 0)) | 0, 0);
263
+ const rand = _splitmix32(seed);
264
+ const means = new Float64Array(iters);
265
+
266
+ for (let i = 0; i < iters; i++) {
267
+ let sum = 0;
268
+ for (let j = 0; j < n; j++) {
269
+ sum += values[(rand() * n) | 0];
270
+ }
271
+ means[i] = (sum / n) * 100;
272
+ }
273
+
274
+ means.sort();
275
+
276
+ const alpha = 1 - level;
277
+ const lo = means[(alpha / 2 * iters) | 0];
278
+ const hi = means[((1 - alpha / 2) * iters) | 0];
279
+
280
+ return [+lo.toFixed(1), +hi.toFixed(1)];
281
+ }
282
+
283
+ // ── Cluster fingerprinting ────────────────────────────────────────────────────
284
+
285
+ /**
286
+ * Produce a stable fingerprint for a bot cluster so the same farm can be
287
+ * recognised across multiple analysis windows.
288
+ *
289
+ * Fingerprint components that are stable across time:
290
+ * - ENF deviation (tied to physical location / substation)
291
+ * - DRAM verdict (tied to hardware generation)
292
+ * - Thermal label (tied to operational pattern)
293
+ *
294
+ * @param {string} key
295
+ * @param {object[]} tokens
296
+ * @param {object} pop analysePopulation result
297
+ * @returns {{ id: string, signature: object }}
298
+ */
299
+ function _fingerprint(key, tokens, pop) {
300
+ const sample = tokens[0] ?? {};
301
+ const hw = sample.hw ?? {};
302
+ const idle = sample.idle ?? {};
303
+
304
+ // Mean ENF deviation across cluster (stable for co-located devices)
305
+ const enfDevs = tokens.map(t => t.hw?.enfDev).filter(v => v != null);
306
+ const meanEnfDev = enfDevs.length
307
+ ? +(enfDevs.reduce((s, v) => s + v, 0) / enfDevs.length).toFixed(4)
308
+ : null;
309
+
310
+ // Mean idle duration (reveals script-sleep cadence)
311
+ const idleDurations = tokens.map(t => t.idle?.dMs).filter(v => v != null);
312
+ const meanIdleMs = idleDurations.length
313
+ ? Math.round(idleDurations.reduce((s, v) => s + v, 0) / idleDurations.length)
314
+ : null;
315
+
316
+ const signature = {
317
+ enfRegion: hw.enf ?? 'unknown',
318
+ dramVerdict: hw.dram ?? 'unknown',
319
+ thermalLabel: idle.therm ?? 'unknown',
320
+ meanEnfDev,
321
+ meanIdleMs,
322
+ };
323
+
324
+ // Stable ID: hash-like hex derived from the signature (deterministic, not crypto)
325
+ const sigStr = JSON.stringify(signature);
326
+ const id = 'farm_' + _djb2(sigStr).toString(16).slice(0, 8);
327
+
328
+ return { id, signature };
329
+ }
330
+
331
+ // ── Helpers ───────────────────────────────────────────────────────────────────
332
+
333
+ function _topSignals(pop) {
334
+ return Object.entries(pop.tests ?? {})
335
+ .map(([name, result]) => ({ name, score: result.score ?? 0 }))
336
+ .sort((a, b) => b.score - a.score)
337
+ .slice(0, 2)
338
+ .map(s => s.name);
339
+ }
340
+
341
+ function _recommendation(grade, botClusters) {
342
+ if (grade === 'CLEAN') {
343
+ return 'Cohort appears authentic. No action required.';
344
+ }
345
+ if (grade === 'LOW_FRAUD') {
346
+ return `${botClusters.length} suspicious cluster(s) detected. Monitor and consider manual review.`;
347
+ }
348
+ if (grade === 'MODERATE_FRAUD') {
349
+ return `${botClusters.length} bot farm cluster(s) identified. Block tokens from flagged clusters and investigate upstream traffic source.`;
350
+ }
351
+ return (
352
+ `CRITICAL: ${botClusters.length} bot farm cluster(s) account for a majority of traffic. ` +
353
+ `Suspend engagement credit for this cohort and audit the traffic acquisition channel.`
354
+ );
355
+ }
356
+
357
+ function _emptyReport() {
358
+ return {
359
+ cohortSize: 0,
360
+ estimatedHumanPct: null,
361
+ confidenceInterval: null,
362
+ confidenceLevel: 0.95,
363
+ clusterCount: 0,
364
+ botClusterCount: 0,
365
+ authenticClusterCount: 0,
366
+ authenticTokenCount: 0,
367
+ fraudulentTokenCount: 0,
368
+ botClusters: [],
369
+ grade: 'CLEAN',
370
+ label: 'No data',
371
+ color: 'bgreen',
372
+ recommendation: 'No tokens provided.',
373
+ };
374
+ }
375
+
376
+ /**
377
+ * DJB2 hash non-cryptographic, deterministic, produces stable cluster IDs.
378
+ * @param {string} str
379
+ * @returns {number}
380
+ */
381
+ function _djb2(str) {
382
+ let h = 5381;
383
+ for (let i = 0; i < str.length; i++) {
384
+ h = ((h << 5) + h) ^ str.charCodeAt(i);
385
+ h = h >>> 0; // keep unsigned 32-bit
386
+ }
387
+ return h;
388
+ }
389
+
390
+ // ── JSDoc types ───────────────────────────────────────────────────────────────
391
+
392
+ /**
393
+ * @typedef {object} AuthenticityReport
394
+ * @property {number} cohortSize Total tokens analysed
395
+ * @property {number|null} estimatedHumanPct Estimated % of real humans (0–100)
396
+ * @property {[number,number]|null} confidenceInterval [lo, hi] at confidenceLevel
397
+ * @property {number} confidenceLevel Bootstrap CI level (e.g. 0.95)
398
+ * @property {number} clusterCount Total hardware clusters identified
399
+ * @property {number} botClusterCount Clusters classified as bot farms
400
+ * @property {number} authenticClusterCount Clusters classified as authentic
401
+ * @property {number} authenticTokenCount Tokens NOT in bot farm clusters
402
+ * @property {number} fraudulentTokenCount Tokens IN bot farm clusters
403
+ * @property {object[]} botClusters Per-farm breakdown (sorted by sybilScore desc)
404
+ * @property {string} grade CLEAN|LOW_FRAUD|MODERATE_FRAUD|HIGH_FRAUD
405
+ * @property {string} label Human-readable grade label
406
+ * @property {string} color ANSI color hint for terminal rendering
407
+ * @property {string} recommendation Actionable guidance string
408
+ */