@svrnsec/pulse 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +883 -782
  3. package/SECURITY.md +27 -22
  4. package/bin/svrnsec-pulse.js +7 -7
  5. package/dist/{pulse.cjs.js → pulse.cjs} +6428 -6413
  6. package/dist/pulse.cjs.map +1 -0
  7. package/dist/pulse.esm.js +6429 -6415
  8. package/dist/pulse.esm.js.map +1 -1
  9. package/index.d.ts +949 -846
  10. package/package.json +189 -184
  11. package/pkg/pulse_core.js +174 -173
  12. package/src/analysis/audio.js +213 -213
  13. package/src/analysis/authenticityAudit.js +408 -393
  14. package/src/analysis/coherence.js +502 -502
  15. package/src/analysis/coordinatedBehavior.js +825 -804
  16. package/src/analysis/heuristic.js +428 -428
  17. package/src/analysis/jitter.js +446 -446
  18. package/src/analysis/llm.js +473 -472
  19. package/src/analysis/populationEntropy.js +404 -403
  20. package/src/analysis/provider.js +248 -248
  21. package/src/analysis/refraction.js +392 -391
  22. package/src/analysis/trustScore.js +356 -356
  23. package/src/cli/args.js +36 -36
  24. package/src/cli/commands/scan.js +192 -192
  25. package/src/cli/runner.js +157 -157
  26. package/src/collector/adaptive.js +200 -200
  27. package/src/collector/bio.js +297 -287
  28. package/src/collector/canvas.js +247 -239
  29. package/src/collector/dram.js +203 -203
  30. package/src/collector/enf.js +311 -311
  31. package/src/collector/entropy.js +195 -195
  32. package/src/collector/gpu.js +248 -245
  33. package/src/collector/idleAttestation.js +480 -480
  34. package/src/collector/sabTimer.js +189 -191
  35. package/src/errors.js +54 -0
  36. package/src/fingerprint.js +475 -475
  37. package/src/index.js +345 -342
  38. package/src/integrations/react-native.js +462 -459
  39. package/src/integrations/react.js +184 -185
  40. package/src/middleware/express.js +155 -155
  41. package/src/middleware/next.js +174 -175
  42. package/src/proof/challenge.js +249 -249
  43. package/src/proof/engagementToken.js +426 -394
  44. package/src/proof/fingerprint.js +268 -268
  45. package/src/proof/validator.js +82 -142
  46. package/src/registry/serializer.js +349 -349
  47. package/src/terminal.js +263 -263
  48. package/src/update-notifier.js +259 -264
  49. package/dist/pulse.cjs.js.map +0 -1
@@ -1,393 +1,408 @@
1
- /**
2
- * @svrnsec/pulse — Authenticity Audit
3
- *
4
- * Produces a statistically rigorous, physics-backed estimate of what fraction
5
- * of a user cohort are real humans on real hardware.
6
- *
7
- * This is the "$44 billion question" — the number Twitter and Elon argued
8
- * about for months with no physics-layer evidence on either side. Browser
9
- * fingerprinting can be spoofed. Declared metrics can be gamed. The thermal
10
- * state of a real device at 2 AM cannot.
11
- *
12
- * Method
13
- * ──────
14
- * 1. Cluster tokens by hardware signature
15
- * ENF deviation bucket (±0.025 Hz → localizes to substation/building)
16
- * × DRAM verdict (dram | virtual | ambiguous)
17
- * × Thermal label (hot_to_cold | sustained_hot | step_function …)
18
- * × 10-minute time bucket
19
- * Authentic users scatter across all dimensions.
20
- * A farm in one building, running the same script, on the same hardware
21
- * generation collapses into one tight cluster.
22
- *
23
- * 2. Score each cluster with Population Entropy (5 statistical tests).
24
- * Clusters with sybilScore > FARM_THRESHOLD are classified as bot farms.
25
- *
26
- * 3. Bootstrap a 95% confidence interval on the human-rate estimate.
27
- * Each resample draws tokens with replacement and re-runs classification.
28
- *
29
- * 4. Fingerprint each bot cluster for cross-window tracking.
30
- * Same ENF deviation + thermal pattern reappearing next hour = same farm.
31
- *
32
- * Output
33
- * ──────
34
- * estimatedHumanPct The headline number. Treat anything below 90% as
35
- * a platform health emergency.
36
- *
37
- * confidenceInterval [lo, hi] at the requested confidence level.
38
- * Narrow CI = large cohort + clear signal.
39
- * Wide CI = small cohort or mixed evidence.
40
- *
41
- * botClusters Per-farm breakdown: size, sybilScore, ENF location,
42
- * thermal pattern, dominant attack signal.
43
- *
44
- * grade CLEAN / LOW_FRAUD / MODERATE_FRAUD / HIGH_FRAUD
45
- *
46
- * Typical values
47
- * ──────────────
48
- * Organic product feed, 10k tokens over 1 hour → humanPct ≈ 92–97%
49
- * Incentivised engagement campaign → humanPct ≈ 55–75%
50
- * Coordinated click farm attack → humanPct ≈ 8–35%
51
- */
52
-
53
- import { analysePopulation } from './populationEntropy.js';
54
-
55
- // ── Thresholds ─────────────────────────────────────────────────────────────────
56
-
57
- /** Clusters scoring above this are classified as bot farms. */
58
- const FARM_THRESHOLD = 65;
59
-
60
- /** Minimum tokens in a cluster before we run population analysis on it.
61
- * Smaller clusters are treated as noise and counted as authentic. */
62
- const MIN_CLUSTER_SIZE = 5;
63
-
64
- /** ENF deviation bucket width in Hz. ±0.025 Hz localizes devices to the same
65
- * substation — close enough to imply the same building. */
66
- const ENF_BUCKET_HZ = 0.05;
67
-
68
- /** Time bucket width. 10-minute buckets catch batch-dispatch patterns
69
- * without splitting a legitimate organic traffic surge. */
70
- const TIME_BUCKET_MS = 10 * 60 * 1000;
71
-
72
- /** Bootstrap iterations for confidence interval estimation. */
73
- const BOOTSTRAP_ITERATIONS = 500;
74
-
75
- // ── Grade thresholds ──────────────────────────────────────────────────────────
76
-
77
- const GRADES = [
78
- { min: 90, grade: 'CLEAN', label: 'Authentic cohort', color: 'bgreen' },
79
- { min: 75, grade: 'LOW_FRAUD', label: 'Elevated fraud signal', color: 'byellow' },
80
- { min: 50, grade: 'MODERATE_FRAUD', label: 'Significant bot presence', color: 'byellow' },
81
- { min: 0, grade: 'HIGH_FRAUD', label: 'Platform health emergency', color: 'bred' },
82
- ];
83
-
84
- // ── authenticityAudit ─────────────────────────────────────────────────────────
85
-
86
- /**
87
- * Run a full authenticity audit on a cohort of decoded engagement tokens.
88
- *
89
- * @param {object[]} tokens Decoded engagement token objects
90
- * (from decodeToken / verifyEngagementToken)
91
- * @param {object} [opts]
92
- * @param {number} [opts.windowMs] Analysis window in ms (default: all tokens)
93
- * @param {number} [opts.minClusterSize] Min cluster size for farm analysis (default: 5)
94
- * @param {number} [opts.farmThreshold] sybilScore cutoff for farm classification (default: 65)
95
- * @param {number} [opts.confidenceLevel] Bootstrap CI level, e.g. 0.95 (default: 0.95)
96
- * @param {number} [opts.bootstrapIter] Bootstrap iterations (default: 500)
97
- * @returns {AuthenticityReport}
98
- */
99
- export function authenticityAudit(tokens, opts = {}) {
100
- const {
101
- minClusterSize = MIN_CLUSTER_SIZE,
102
- farmThreshold = FARM_THRESHOLD,
103
- confidenceLevel = 0.95,
104
- bootstrapIter = BOOTSTRAP_ITERATIONS,
105
- } = opts;
106
-
107
- if (!Array.isArray(tokens) || tokens.length === 0) {
108
- return _emptyReport();
109
- }
110
-
111
- // ── 1. Cluster ─────────────────────────────────────────────────────────────
112
- const clusterMap = _clusterTokens(tokens);
113
-
114
- // ── 2. Score each cluster ──────────────────────────────────────────────────
115
- const botClusterIds = new Set();
116
- const clusterResults = [];
117
-
118
- for (const [key, clusterTokens] of clusterMap) {
119
- if (clusterTokens.length < minClusterSize) continue;
120
-
121
- const pop = analysePopulation(clusterTokens);
122
- const isFarm = pop.sybilScore >= farmThreshold;
123
-
124
- const fingerprint = _fingerprint(key, clusterTokens, pop);
125
-
126
- clusterResults.push({
127
- id: fingerprint.id,
128
- size: clusterTokens.length,
129
- sybilScore: pop.sybilScore,
130
- authentic: !isFarm,
131
- signature: fingerprint.signature,
132
- topSignals: _topSignals(pop),
133
- flags: pop.flags,
134
- });
135
-
136
- if (isFarm) {
137
- for (const t of clusterTokens) botClusterIds.add(t);
138
- }
139
- }
140
-
141
- // ── 3. Count fraudulent tokens ─────────────────────────────────────────────
142
- // Tokens in clusters too small to analyse are given benefit of the doubt.
143
- const fraudCount = botClusterIds.size;
144
- const authenticCount = tokens.length - fraudCount;
145
- const rawHumanPct = (authenticCount / tokens.length) * 100;
146
-
147
- // ── 4. Bootstrap confidence interval ──────────────────────────────────────
148
- // We bootstrap the "is this token authentic?" binary labels.
149
- const labels = tokens.map(t => (botClusterIds.has(t) ? 0 : 1));
150
- const ci = _bootstrapCI(labels, confidenceLevel, bootstrapIter);
151
-
152
- // ── 5. Grade and summarise ─────────────────────────────────────────────────
153
- const gradeEntry = GRADES.find(g => rawHumanPct >= g.min) ?? GRADES[GRADES.length - 1];
154
- const botClusters = clusterResults.filter(c => !c.authentic)
155
- .sort((a, b) => b.sybilScore - a.sybilScore);
156
- const authClusters = clusterResults.filter(c => c.authentic);
157
-
158
- return {
159
- // ── Headline ──
160
- cohortSize: tokens.length,
161
- estimatedHumanPct: +rawHumanPct.toFixed(1),
162
- confidenceInterval: ci,
163
- confidenceLevel,
164
-
165
- // ── Cluster breakdown ──
166
- clusterCount: clusterResults.length,
167
- botClusterCount: botClusters.length,
168
- authenticClusterCount: authClusters.length,
169
-
170
- // ── Token counts ──
171
- authenticTokenCount: authenticCount,
172
- fraudulentTokenCount: fraudCount,
173
-
174
- // ── Farm detail ──
175
- botClusters,
176
-
177
- // ── Grade ──
178
- grade: gradeEntry.grade,
179
- label: gradeEntry.label,
180
- color: gradeEntry.color,
181
- recommendation: _recommendation(gradeEntry.grade, botClusters),
182
- };
183
- }
184
-
185
- // ── Clustering ────────────────────────────────────────────────────────────────
186
-
187
- /**
188
- * Bucket tokens into hardware-signature clusters.
189
- *
190
- * Cluster key = ENF deviation bucket × DRAM verdict × thermal label × time bucket
191
- *
192
- * This collapses bot farms (same building, same hardware, same script, same
193
- * time window) into single clusters while leaving organic traffic scattered.
194
- *
195
- * @param {object[]} tokens
196
- * @returns {Map<string, object[]>}
197
- */
198
- function _clusterTokens(tokens) {
199
- const map = new Map();
200
-
201
- for (const token of tokens) {
202
- const key = _clusterKey(token);
203
- if (!map.has(key)) map.set(key, []);
204
- map.get(key).push(token);
205
- }
206
-
207
- return map;
208
- }
209
-
210
- function _clusterKey(token) {
211
- const hw = token.hw ?? {};
212
- const iat = token.iat ?? 0;
213
-
214
- // ENF deviation → nearest bucket (null/undefined → 'no_enf')
215
- // ±0.025 Hz resolution localizes devices to the same building/substation.
216
- const enfBucket = hw.enfDev != null
217
- ? `e${Math.round(hw.enfDev / ENF_BUCKET_HZ)}`
218
- : 'no_enf';
219
-
220
- // DRAM verdict string — proxy for hardware generation
221
- const dram = hw.dram ?? 'unknown';
222
-
223
- // 10-minute time bucket — captures batch dispatch without splitting organic traffic
224
- const tBucket = Math.floor(iat / TIME_BUCKET_MS);
225
-
226
- // Note: thermal label is intentionally NOT part of the key.
227
- // Clustering by thermal label would make testThermalDiversity a tautology
228
- // (every cluster would have zero diversity by construction).
229
- // Thermal diversity is left as a within-cluster discriminator — farms that
230
- // co-locate in the same ENF + DRAM + time bucket will still show sustained_hot
231
- // homogeneity; organic users in the same bucket will show hot_to_cold / cooling mix.
232
- return `${enfBucket}:${dram}:${tBucket}`;
233
- }
234
-
235
- // ── Bootstrap CI ──────────────────────────────────────────────────────────────
236
-
237
- /**
238
- * Non-parametric bootstrap confidence interval on the mean of a 0/1 vector.
239
- *
240
- * @param {number[]} values 0 (fraudulent) or 1 (authentic) per token
241
- * @param {number} level Confidence level, e.g. 0.95
242
- * @param {number} iters Bootstrap iterations
243
- * @returns {[number, number]} [lo, hi] as percentages (0–100)
244
- */
245
- function _bootstrapCI(values, level, iters) {
246
- const n = values.length;
247
- if (n === 0) return [0, 0];
248
-
249
- const means = new Float64Array(iters);
250
-
251
- for (let i = 0; i < iters; i++) {
252
- let sum = 0;
253
- for (let j = 0; j < n; j++) {
254
- sum += values[(Math.random() * n) | 0];
255
- }
256
- means[i] = (sum / n) * 100;
257
- }
258
-
259
- means.sort();
260
-
261
- const alpha = 1 - level;
262
- const lo = means[(alpha / 2 * iters) | 0];
263
- const hi = means[((1 - alpha / 2) * iters) | 0];
264
-
265
- return [+lo.toFixed(1), +hi.toFixed(1)];
266
- }
267
-
268
- // ── Cluster fingerprinting ────────────────────────────────────────────────────
269
-
270
- /**
271
- * Produce a stable fingerprint for a bot cluster so the same farm can be
272
- * recognised across multiple analysis windows.
273
- *
274
- * Fingerprint components that are stable across time:
275
- * - ENF deviation (tied to physical location / substation)
276
- * - DRAM verdict (tied to hardware generation)
277
- * - Thermal label (tied to operational pattern)
278
- *
279
- * @param {string} key
280
- * @param {object[]} tokens
281
- * @param {object} pop analysePopulation result
282
- * @returns {{ id: string, signature: object }}
283
- */
284
- function _fingerprint(key, tokens, pop) {
285
- const sample = tokens[0] ?? {};
286
- const hw = sample.hw ?? {};
287
- const idle = sample.idle ?? {};
288
-
289
- // Mean ENF deviation across cluster (stable for co-located devices)
290
- const enfDevs = tokens.map(t => t.hw?.enfDev).filter(v => v != null);
291
- const meanEnfDev = enfDevs.length
292
- ? +(enfDevs.reduce((s, v) => s + v, 0) / enfDevs.length).toFixed(4)
293
- : null;
294
-
295
- // Mean idle duration (reveals script-sleep cadence)
296
- const idleDurations = tokens.map(t => t.idle?.dMs).filter(v => v != null);
297
- const meanIdleMs = idleDurations.length
298
- ? Math.round(idleDurations.reduce((s, v) => s + v, 0) / idleDurations.length)
299
- : null;
300
-
301
- const signature = {
302
- enfRegion: hw.enf ?? 'unknown',
303
- dramVerdict: hw.dram ?? 'unknown',
304
- thermalLabel: idle.therm ?? 'unknown',
305
- meanEnfDev,
306
- meanIdleMs,
307
- };
308
-
309
- // Stable ID: hash-like hex derived from the signature (deterministic, not crypto)
310
- const sigStr = JSON.stringify(signature);
311
- const id = 'farm_' + _djb2(sigStr).toString(16).slice(0, 8);
312
-
313
- return { id, signature };
314
- }
315
-
316
- // ── Helpers ───────────────────────────────────────────────────────────────────
317
-
318
- function _topSignals(pop) {
319
- return Object.entries(pop.tests ?? {})
320
- .map(([name, result]) => ({ name, score: result.score ?? 0 }))
321
- .sort((a, b) => b.score - a.score)
322
- .slice(0, 2)
323
- .map(s => s.name);
324
- }
325
-
326
- function _recommendation(grade, botClusters) {
327
- if (grade === 'CLEAN') {
328
- return 'Cohort appears authentic. No action required.';
329
- }
330
- if (grade === 'LOW_FRAUD') {
331
- return `${botClusters.length} suspicious cluster(s) detected. Monitor and consider manual review.`;
332
- }
333
- if (grade === 'MODERATE_FRAUD') {
334
- return `${botClusters.length} bot farm cluster(s) identified. Block tokens from flagged clusters and investigate upstream traffic source.`;
335
- }
336
- return (
337
- `CRITICAL: ${botClusters.length} bot farm cluster(s) account for a majority of traffic. ` +
338
- `Suspend engagement credit for this cohort and audit the traffic acquisition channel.`
339
- );
340
- }
341
-
342
- function _emptyReport() {
343
- return {
344
- cohortSize: 0,
345
- estimatedHumanPct: null,
346
- confidenceInterval: null,
347
- confidenceLevel: 0.95,
348
- clusterCount: 0,
349
- botClusterCount: 0,
350
- authenticClusterCount: 0,
351
- authenticTokenCount: 0,
352
- fraudulentTokenCount: 0,
353
- botClusters: [],
354
- grade: 'CLEAN',
355
- label: 'No data',
356
- color: 'bgreen',
357
- recommendation: 'No tokens provided.',
358
- };
359
- }
360
-
361
- /**
362
- * DJB2 hash — non-cryptographic, deterministic, produces stable cluster IDs.
363
- * @param {string} str
364
- * @returns {number}
365
- */
366
- function _djb2(str) {
367
- let h = 5381;
368
- for (let i = 0; i < str.length; i++) {
369
- h = ((h << 5) + h) ^ str.charCodeAt(i);
370
- h = h >>> 0; // keep unsigned 32-bit
371
- }
372
- return h;
373
- }
374
-
375
- // ── JSDoc types ───────────────────────────────────────────────────────────────
376
-
377
- /**
378
- * @typedef {object} AuthenticityReport
379
- * @property {number} cohortSize Total tokens analysed
380
- * @property {number|null} estimatedHumanPct Estimated % of real humans (0–100)
381
- * @property {[number,number]|null} confidenceInterval [lo, hi] at confidenceLevel
382
- * @property {number} confidenceLevel Bootstrap CI level (e.g. 0.95)
383
- * @property {number} clusterCount Total hardware clusters identified
384
- * @property {number} botClusterCount Clusters classified as bot farms
385
- * @property {number} authenticClusterCount Clusters classified as authentic
386
- * @property {number} authenticTokenCount Tokens NOT in bot farm clusters
387
- * @property {number} fraudulentTokenCount Tokens IN bot farm clusters
388
- * @property {object[]} botClusters Per-farm breakdown (sorted by sybilScore desc)
389
- * @property {string} grade CLEAN|LOW_FRAUD|MODERATE_FRAUD|HIGH_FRAUD
390
- * @property {string} label Human-readable grade label
391
- * @property {string} color ANSI color hint for terminal rendering
392
- * @property {string} recommendation Actionable guidance string
393
- */
1
+ /**
2
+ * @svrnsec/pulse — Authenticity Audit
3
+ *
4
+ * Produces a statistically rigorous, physics-backed estimate of what fraction
5
+ * of a user cohort are real humans on real hardware.
6
+ *
7
+ * This is the "$44 billion question" — the number Twitter and Elon argued
8
+ * about for months with no physics-layer evidence on either side. Browser
9
+ * fingerprinting can be spoofed. Declared metrics can be gamed. The thermal
10
+ * state of a real device at 2 AM cannot.
11
+ *
12
+ * Method
13
+ * ──────
14
+ * 1. Cluster tokens by hardware signature
15
+ * ENF deviation bucket (±0.025 Hz → localizes to substation/building)
16
+ * × DRAM verdict (dram | virtual | ambiguous)
17
+ * × Thermal label (hot_to_cold | sustained_hot | step_function …)
18
+ * × 10-minute time bucket
19
+ * Authentic users scatter across all dimensions.
20
+ * A farm in one building, running the same script, on the same hardware
21
+ * generation collapses into one tight cluster.
22
+ *
23
+ * 2. Score each cluster with Population Entropy (5 statistical tests).
24
+ * Clusters with sybilScore > FARM_THRESHOLD are classified as bot farms.
25
+ *
26
+ * 3. Bootstrap a 95% confidence interval on the human-rate estimate.
27
+ * Each resample draws tokens with replacement and re-runs classification.
28
+ *
29
+ * 4. Fingerprint each bot cluster for cross-window tracking.
30
+ * Same ENF deviation + thermal pattern reappearing next hour = same farm.
31
+ *
32
+ * Output
33
+ * ──────
34
+ * estimatedHumanPct The headline number. Treat anything below 90% as
35
+ * a platform health emergency.
36
+ *
37
+ * confidenceInterval [lo, hi] at the requested confidence level.
38
+ * Narrow CI = large cohort + clear signal.
39
+ * Wide CI = small cohort or mixed evidence.
40
+ *
41
+ * botClusters Per-farm breakdown: size, sybilScore, ENF location,
42
+ * thermal pattern, dominant attack signal.
43
+ *
44
+ * grade CLEAN / LOW_FRAUD / MODERATE_FRAUD / HIGH_FRAUD
45
+ *
46
+ * Typical values
47
+ * ──────────────
48
+ * Organic product feed, 10k tokens over 1 hour → humanPct ≈ 92–97%
49
+ * Incentivised engagement campaign → humanPct ≈ 55–75%
50
+ * Coordinated click farm attack → humanPct ≈ 8–35%
51
+ */
52
+
53
+ import { analysePopulation } from './populationEntropy.js';
54
+
55
+ // ── Thresholds ─────────────────────────────────────────────────────────────────
56
+
57
+ /** Clusters scoring above this are classified as bot farms. */
58
+ const FARM_THRESHOLD = 65;
59
+
60
+ /** Minimum tokens in a cluster before we run population analysis on it.
61
+ * Smaller clusters are treated as noise and counted as authentic. */
62
+ const MIN_CLUSTER_SIZE = 5;
63
+
64
+ /** ENF deviation bucket width in Hz. ±0.025 Hz localizes devices to the same
65
+ * substation — close enough to imply the same building. */
66
+ const ENF_BUCKET_HZ = 0.05;
67
+
68
+ /** Time bucket width. 10-minute buckets catch batch-dispatch patterns
69
+ * without splitting a legitimate organic traffic surge. */
70
+ const TIME_BUCKET_MS = 10 * 60 * 1000;
71
+
72
+ /** Bootstrap iterations for confidence interval estimation. */
73
+ const BOOTSTRAP_ITERATIONS = 500;
74
+
75
+ // ── Grade thresholds ──────────────────────────────────────────────────────────
76
+
77
+ const GRADES = [
78
+ { min: 90, grade: 'CLEAN', label: 'Authentic cohort', color: 'bgreen' },
79
+ { min: 75, grade: 'LOW_FRAUD', label: 'Elevated fraud signal', color: 'byellow' },
80
+ { min: 50, grade: 'MODERATE_FRAUD', label: 'Significant bot presence', color: 'byellow' },
81
+ { min: 0, grade: 'HIGH_FRAUD', label: 'Platform health emergency', color: 'bred' },
82
+ ];
83
+
84
+ // ── authenticityAudit ─────────────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Run a full authenticity audit on a cohort of decoded engagement tokens.
88
+ *
89
+ * @param {object[]} tokens Decoded engagement token objects
90
+ * (from decodeToken / verifyEngagementToken)
91
+ * @param {object} [opts]
92
+ * @param {number} [opts.windowMs] Analysis window in ms (default: all tokens)
93
+ * @param {number} [opts.minClusterSize] Min cluster size for farm analysis (default: 5)
94
+ * @param {number} [opts.farmThreshold] sybilScore cutoff for farm classification (default: 65)
95
+ * @param {number} [opts.confidenceLevel] Bootstrap CI level, e.g. 0.95 (default: 0.95)
96
+ * @param {number} [opts.bootstrapIter] Bootstrap iterations (default: 500)
97
+ * @returns {AuthenticityReport}
98
+ */
99
+ export function authenticityAudit(tokens, opts = {}) {
100
+ const {
101
+ minClusterSize = MIN_CLUSTER_SIZE,
102
+ farmThreshold = FARM_THRESHOLD,
103
+ confidenceLevel = 0.95,
104
+ bootstrapIter = BOOTSTRAP_ITERATIONS,
105
+ } = opts;
106
+
107
+ if (!Array.isArray(tokens) || tokens.length === 0) {
108
+ return _emptyReport();
109
+ }
110
+
111
+ // ── 1. Cluster ─────────────────────────────────────────────────────────────
112
+ const clusterMap = _clusterTokens(tokens);
113
+
114
+ // ── 2. Score each cluster ──────────────────────────────────────────────────
115
+ const botClusterIds = new Set();
116
+ const clusterResults = [];
117
+
118
+ for (const [key, clusterTokens] of clusterMap) {
119
+ if (clusterTokens.length < minClusterSize) continue;
120
+
121
+ const pop = analysePopulation(clusterTokens);
122
+ const isFarm = pop.sybilScore >= farmThreshold;
123
+
124
+ const fingerprint = _fingerprint(key, clusterTokens, pop);
125
+
126
+ clusterResults.push({
127
+ id: fingerprint.id,
128
+ size: clusterTokens.length,
129
+ sybilScore: pop.sybilScore,
130
+ authentic: !isFarm,
131
+ signature: fingerprint.signature,
132
+ topSignals: _topSignals(pop),
133
+ flags: pop.flags,
134
+ });
135
+
136
+ if (isFarm) {
137
+ for (const t of clusterTokens) botClusterIds.add(t);
138
+ }
139
+ }
140
+
141
+ // ── 3. Count fraudulent tokens ─────────────────────────────────────────────
142
+ // Tokens in clusters too small to analyse are given benefit of the doubt.
143
+ const fraudCount = botClusterIds.size;
144
+ const authenticCount = tokens.length - fraudCount;
145
+ const rawHumanPct = (authenticCount / tokens.length) * 100;
146
+
147
+ // ── 4. Bootstrap confidence interval ──────────────────────────────────────
148
+ // We bootstrap the "is this token authentic?" binary labels.
149
+ const labels = tokens.map(t => (botClusterIds.has(t) ? 0 : 1));
150
+ const ci = _bootstrapCI(labels, confidenceLevel, bootstrapIter);
151
+
152
+ // ── 5. Grade and summarise ─────────────────────────────────────────────────
153
+ const gradeEntry = GRADES.find(g => rawHumanPct >= g.min) ?? GRADES[GRADES.length - 1];
154
+ const botClusters = clusterResults.filter(c => !c.authentic)
155
+ .sort((a, b) => b.sybilScore - a.sybilScore);
156
+ const authClusters = clusterResults.filter(c => c.authentic);
157
+
158
+ return {
159
+ // ── Headline ──
160
+ cohortSize: tokens.length,
161
+ estimatedHumanPct: +rawHumanPct.toFixed(1),
162
+ confidenceInterval: ci,
163
+ confidenceLevel,
164
+
165
+ // ── Cluster breakdown ──
166
+ clusterCount: clusterResults.length,
167
+ botClusterCount: botClusters.length,
168
+ authenticClusterCount: authClusters.length,
169
+
170
+ // ── Token counts ──
171
+ authenticTokenCount: authenticCount,
172
+ fraudulentTokenCount: fraudCount,
173
+
174
+ // ── Farm detail ──
175
+ botClusters,
176
+
177
+ // ── Grade ──
178
+ grade: gradeEntry.grade,
179
+ label: gradeEntry.label,
180
+ color: gradeEntry.color,
181
+ recommendation: _recommendation(gradeEntry.grade, botClusters),
182
+ };
183
+ }
184
+
185
+ // ── Clustering ────────────────────────────────────────────────────────────────
186
+
187
+ /**
188
+ * Bucket tokens into hardware-signature clusters.
189
+ *
190
+ * Cluster key = ENF deviation bucket × DRAM verdict × thermal label × time bucket
191
+ *
192
+ * This collapses bot farms (same building, same hardware, same script, same
193
+ * time window) into single clusters while leaving organic traffic scattered.
194
+ *
195
+ * @param {object[]} tokens
196
+ * @returns {Map<string, object[]>}
197
+ */
198
+ function _clusterTokens(tokens) {
199
+ const map = new Map();
200
+
201
+ for (const token of tokens) {
202
+ const key = _clusterKey(token);
203
+ if (!map.has(key)) map.set(key, []);
204
+ map.get(key).push(token);
205
+ }
206
+
207
+ return map;
208
+ }
209
+
210
+ function _clusterKey(token) {
211
+ const hw = token.hw ?? {};
212
+ const iat = token.iat ?? 0;
213
+
214
+ // ENF deviation → nearest bucket (null/undefined → 'no_enf')
215
+ // ±0.025 Hz resolution localizes devices to the same building/substation.
216
+ const enfBucket = hw.enfDev != null
217
+ ? `e${Math.round(hw.enfDev / ENF_BUCKET_HZ)}`
218
+ : 'no_enf';
219
+
220
+ // DRAM verdict string — proxy for hardware generation
221
+ const dram = hw.dram ?? 'unknown';
222
+
223
+ // 10-minute time bucket — captures batch dispatch without splitting organic traffic
224
+ const tBucket = Math.floor(iat / TIME_BUCKET_MS);
225
+
226
+ // Note: thermal label is intentionally NOT part of the key.
227
+ // Clustering by thermal label would make testThermalDiversity a tautology
228
+ // (every cluster would have zero diversity by construction).
229
+ // Thermal diversity is left as a within-cluster discriminator — farms that
230
+ // co-locate in the same ENF + DRAM + time bucket will still show sustained_hot
231
+ // homogeneity; organic users in the same bucket will show hot_to_cold / cooling mix.
232
+ return `${enfBucket}:${dram}:${tBucket}`;
233
+ }
234
+
235
+ // ── Bootstrap CI ──────────────────────────────────────────────────────────────
236
+
237
+ /**
238
+ * Splitmix32 deterministic PRNG seeded from input data.
239
+ * Replaces Math.random() for reproducible bootstrap results.
240
+ */
241
+ function _splitmix32(seed) {
242
+ return function() {
243
+ seed |= 0; seed = seed + 0x9e3779b9 | 0;
244
+ let t = seed ^ seed >>> 16; t = Math.imul(t, 0x21f0aaad);
245
+ t = t ^ t >>> 15; t = Math.imul(t, 0x735a2d97);
246
+ return ((t = t ^ t >>> 15) >>> 0) / 4294967296;
247
+ };
248
+ }
249
+
250
+ /**
251
+ * Non-parametric bootstrap confidence interval on the mean of a 0/1 vector.
252
+ *
253
+ * @param {number[]} values 0 (fraudulent) or 1 (authentic) per token
254
+ * @param {number} level Confidence level, e.g. 0.95
255
+ * @param {number} iters Bootstrap iterations
256
+ * @returns {[number, number]} [lo, hi] as percentages (0–100)
257
+ */
258
+ function _bootstrapCI(values, level, iters) {
259
+ const n = values.length;
260
+ if (n === 0) return [0, 0];
261
+
262
+ const seed = values.reduce((s, v) => (s * 31 + (v * 1000 | 0)) | 0, 0);
263
+ const rand = _splitmix32(seed);
264
+ const means = new Float64Array(iters);
265
+
266
+ for (let i = 0; i < iters; i++) {
267
+ let sum = 0;
268
+ for (let j = 0; j < n; j++) {
269
+ sum += values[(rand() * n) | 0];
270
+ }
271
+ means[i] = (sum / n) * 100;
272
+ }
273
+
274
+ means.sort();
275
+
276
+ const alpha = 1 - level;
277
+ const lo = means[(alpha / 2 * iters) | 0];
278
+ const hi = means[((1 - alpha / 2) * iters) | 0];
279
+
280
+ return [+lo.toFixed(1), +hi.toFixed(1)];
281
+ }
282
+
283
+ // ── Cluster fingerprinting ────────────────────────────────────────────────────
284
+
285
+ /**
286
+ * Produce a stable fingerprint for a bot cluster so the same farm can be
287
+ * recognised across multiple analysis windows.
288
+ *
289
+ * Fingerprint components that are stable across time:
290
+ * - ENF deviation (tied to physical location / substation)
291
+ * - DRAM verdict (tied to hardware generation)
292
+ * - Thermal label (tied to operational pattern)
293
+ *
294
+ * @param {string} key
295
+ * @param {object[]} tokens
296
+ * @param {object} pop analysePopulation result
297
+ * @returns {{ id: string, signature: object }}
298
+ */
299
+ function _fingerprint(key, tokens, pop) {
300
+ const sample = tokens[0] ?? {};
301
+ const hw = sample.hw ?? {};
302
+ const idle = sample.idle ?? {};
303
+
304
+ // Mean ENF deviation across cluster (stable for co-located devices)
305
+ const enfDevs = tokens.map(t => t.hw?.enfDev).filter(v => v != null);
306
+ const meanEnfDev = enfDevs.length
307
+ ? +(enfDevs.reduce((s, v) => s + v, 0) / enfDevs.length).toFixed(4)
308
+ : null;
309
+
310
+ // Mean idle duration (reveals script-sleep cadence)
311
+ const idleDurations = tokens.map(t => t.idle?.dMs).filter(v => v != null);
312
+ const meanIdleMs = idleDurations.length
313
+ ? Math.round(idleDurations.reduce((s, v) => s + v, 0) / idleDurations.length)
314
+ : null;
315
+
316
+ const signature = {
317
+ enfRegion: hw.enf ?? 'unknown',
318
+ dramVerdict: hw.dram ?? 'unknown',
319
+ thermalLabel: idle.therm ?? 'unknown',
320
+ meanEnfDev,
321
+ meanIdleMs,
322
+ };
323
+
324
+ // Stable ID: hash-like hex derived from the signature (deterministic, not crypto)
325
+ const sigStr = JSON.stringify(signature);
326
+ const id = 'farm_' + _djb2(sigStr).toString(16).slice(0, 8);
327
+
328
+ return { id, signature };
329
+ }
330
+
331
+ // ── Helpers ───────────────────────────────────────────────────────────────────
332
+
333
+ function _topSignals(pop) {
334
+ return Object.entries(pop.tests ?? {})
335
+ .map(([name, result]) => ({ name, score: result.score ?? 0 }))
336
+ .sort((a, b) => b.score - a.score)
337
+ .slice(0, 2)
338
+ .map(s => s.name);
339
+ }
340
+
341
+ function _recommendation(grade, botClusters) {
342
+ if (grade === 'CLEAN') {
343
+ return 'Cohort appears authentic. No action required.';
344
+ }
345
+ if (grade === 'LOW_FRAUD') {
346
+ return `${botClusters.length} suspicious cluster(s) detected. Monitor and consider manual review.`;
347
+ }
348
+ if (grade === 'MODERATE_FRAUD') {
349
+ return `${botClusters.length} bot farm cluster(s) identified. Block tokens from flagged clusters and investigate upstream traffic source.`;
350
+ }
351
+ return (
352
+ `CRITICAL: ${botClusters.length} bot farm cluster(s) account for a majority of traffic. ` +
353
+ `Suspend engagement credit for this cohort and audit the traffic acquisition channel.`
354
+ );
355
+ }
356
+
357
+ function _emptyReport() {
358
+ return {
359
+ cohortSize: 0,
360
+ estimatedHumanPct: null,
361
+ confidenceInterval: null,
362
+ confidenceLevel: 0.95,
363
+ clusterCount: 0,
364
+ botClusterCount: 0,
365
+ authenticClusterCount: 0,
366
+ authenticTokenCount: 0,
367
+ fraudulentTokenCount: 0,
368
+ botClusters: [],
369
+ grade: 'CLEAN',
370
+ label: 'No data',
371
+ color: 'bgreen',
372
+ recommendation: 'No tokens provided.',
373
+ };
374
+ }
375
+
376
+ /**
377
+ * DJB2 hash — non-cryptographic, deterministic, produces stable cluster IDs.
378
+ * @param {string} str
379
+ * @returns {number}
380
+ */
381
+ function _djb2(str) {
382
+ let h = 5381;
383
+ for (let i = 0; i < str.length; i++) {
384
+ h = ((h << 5) + h) ^ str.charCodeAt(i);
385
+ h = h >>> 0; // keep unsigned 32-bit
386
+ }
387
+ return h;
388
+ }
389
+
390
+ // ── JSDoc types ───────────────────────────────────────────────────────────────
391
+
392
+ /**
393
+ * @typedef {object} AuthenticityReport
394
+ * @property {number} cohortSize Total tokens analysed
395
+ * @property {number|null} estimatedHumanPct Estimated % of real humans (0–100)
396
+ * @property {[number,number]|null} confidenceInterval [lo, hi] at confidenceLevel
397
+ * @property {number} confidenceLevel Bootstrap CI level (e.g. 0.95)
398
+ * @property {number} clusterCount Total hardware clusters identified
399
+ * @property {number} botClusterCount Clusters classified as bot farms
400
+ * @property {number} authenticClusterCount Clusters classified as authentic
401
+ * @property {number} authenticTokenCount Tokens NOT in bot farm clusters
402
+ * @property {number} fraudulentTokenCount Tokens IN bot farm clusters
403
+ * @property {object[]} botClusters Per-farm breakdown (sorted by sybilScore desc)
404
+ * @property {string} grade CLEAN|LOW_FRAUD|MODERATE_FRAUD|HIGH_FRAUD
405
+ * @property {string} label Human-readable grade label
406
+ * @property {string} color ANSI color hint for terminal rendering
407
+ * @property {string} recommendation Actionable guidance string
408
+ */