@svrnsec/pulse 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +883 -782
  3. package/SECURITY.md +86 -86
  4. package/bin/svrnsec-pulse.js +7 -7
  5. package/dist/{pulse.cjs.js → pulse.cjs} +6378 -6419
  6. package/dist/pulse.cjs.map +1 -0
  7. package/dist/pulse.esm.js +6379 -6420
  8. package/dist/pulse.esm.js.map +1 -1
  9. package/index.d.ts +895 -846
  10. package/package.json +185 -184
  11. package/pkg/pulse_core.js +174 -173
  12. package/src/analysis/audio.js +213 -213
  13. package/src/analysis/authenticityAudit.js +408 -393
  14. package/src/analysis/coherence.js +502 -502
  15. package/src/analysis/coordinatedBehavior.js +825 -804
  16. package/src/analysis/heuristic.js +428 -428
  17. package/src/analysis/jitter.js +446 -446
  18. package/src/analysis/llm.js +473 -472
  19. package/src/analysis/populationEntropy.js +404 -403
  20. package/src/analysis/provider.js +248 -248
  21. package/src/analysis/refraction.js +392 -391
  22. package/src/analysis/trustScore.js +356 -356
  23. package/src/cli/args.js +36 -36
  24. package/src/cli/commands/scan.js +192 -192
  25. package/src/cli/runner.js +157 -157
  26. package/src/collector/adaptive.js +200 -200
  27. package/src/collector/bio.js +297 -287
  28. package/src/collector/canvas.js +247 -239
  29. package/src/collector/dram.js +203 -203
  30. package/src/collector/enf.js +311 -311
  31. package/src/collector/entropy.js +195 -195
  32. package/src/collector/gpu.js +248 -245
  33. package/src/collector/idleAttestation.js +480 -480
  34. package/src/collector/sabTimer.js +189 -191
  35. package/src/fingerprint.js +475 -475
  36. package/src/index.js +342 -342
  37. package/src/integrations/react-native.js +462 -459
  38. package/src/integrations/react.js +184 -185
  39. package/src/middleware/express.js +155 -155
  40. package/src/middleware/next.js +174 -175
  41. package/src/proof/challenge.js +249 -249
  42. package/src/proof/engagementToken.js +426 -394
  43. package/src/proof/fingerprint.js +268 -268
  44. package/src/proof/validator.js +82 -142
  45. package/src/registry/serializer.js +349 -349
  46. package/src/terminal.js +263 -263
  47. package/src/update-notifier.js +259 -264
  48. package/dist/pulse.cjs.js.map +0 -1
@@ -1,804 +1,825 @@
1
- /**
2
- * @svrnsec/pulse — Coordinated Inauthentic Behavior Detection
3
- *
4
- * Detects bot armies, click farms, and Sybil networks by analyzing
5
- * physics-layer correlations that coordination inevitably creates.
6
- *
7
- * Core insight:
8
- * Two real users in different cities have ZERO mutual information
9
- * between their thermal curves, clock drift rates, ENF phases, and
10
- * idle durations. Bot farms can randomize any ONE signal but cannot
11
- * independently decorrelate ALL signals simultaneously — they're
12
- * bound to shared physics (same room, same hardware, same scripts).
13
- *
14
- * Five detection layers:
15
- *
16
- * Layer 1 — Temporal Clustering
17
- * Real users arrive via Poisson process. Bot armies arrive in bursts
18
- * from a command server. Chi-squared test on 1s-bucket histogram.
19
- *
20
- * Layer 2 — Signal Fingerprint Collision
21
- * Hash (thermal_label, entropy_band, motor_band) per token. Real
22
- * cohort: high cardinality. Bot farm: < 20 unique fingerprints
23
- * across 500 tokens.
24
- *
25
- * Layer 3 — Drift Fingerprinting
26
- * Crystal oscillator imperfection (20–100 ppm) creates a unique
27
- * clock drift rate per physical device. Multiple submissions from
28
- * "different devices" that converge on the same drift rate = same
29
- * hardware behind a rotation proxy. Survives IP/account/browser
30
- * rotation.
31
- *
32
- * Layer 4 — Mutual Information Matrix
33
- * Pairwise MI across all signal dimensions. Organic traffic: sparse
34
- * random MI matrix. Bot traffic: block-diagonal structure (cliques).
35
- * Louvain community detection finds the cliques in O(n log n).
36
- *
37
- * Layer 5 — Entropy Velocity
38
- * Track dH/dt — the rate of Shannon entropy growth in the signal
39
- * space. Organic growth adds unique profiles; bot deployment adds
40
- * volume without diversity. The ratio (observed dH/dt) / (expected)
41
- * catches mass deployment even with real hardware.
42
- *
43
- * Computational cost:
44
- * All operations are O(n) or O(n log n). 500 tokens ≈ 8ms total
45
- * on a single CPU core. No ML, no GPU, no training data.
46
- *
47
- * Usage:
48
- * import { analyseCoordination } from '@svrnsec/pulse/coordination'
49
- * const result = analyseCoordination(tokens, { windowMs: 60000 })
50
- */
51
-
52
- // ═══════════════════════════════════════════════════════════════════════════════
53
- // Utility
54
- // ═══════════════════════════════════════════════════════════════════════════════
55
-
56
- function mean(a) { return a.length === 0 ? 0 : a.reduce((s, v) => s + v, 0) / a.length; }
57
- function variance(a) { const m = mean(a); return a.length < 2 ? 0 : a.reduce((s, v) => s + (v - m) ** 2, 0) / (a.length - 1); }
58
- function stddev(a) { return Math.sqrt(variance(a)); }
59
- function cv(a) { const m = mean(a); return m === 0 ? 0 : stddev(a) / m; }
60
- function clamp(v, lo, hi) { return Math.max(lo, Math.min(hi, v)); }
61
-
62
- /**
63
- * Shannon entropy of a discrete distribution (array of counts).
64
- * Returns bits.
65
- */
66
- function shannonEntropy(counts) {
67
- const total = counts.reduce((s, c) => s + c, 0);
68
- if (total === 0) return 0;
69
- let H = 0;
70
- for (const c of counts) {
71
- if (c > 0) {
72
- const p = c / total;
73
- H -= p * Math.log2(p);
74
- }
75
- }
76
- return H;
77
- }
78
-
79
- /**
80
- * Discretize a continuous value into a band index.
81
- * @param {number} v - value
82
- * @param {number} lo - band floor
83
- * @param {number} hi - band ceiling
84
- * @param {number} bins - number of bins
85
- * @returns {number} bin index [0, bins-1]
86
- */
87
- function band(v, lo, hi, bins) {
88
- if (hi === lo) return 0;
89
- return Math.min(bins - 1, Math.max(0, Math.floor(((v - lo) / (hi - lo)) * bins)));
90
- }
91
-
92
- // ═══════════════════════════════════════════════════════════════════════════════
93
- // Layer 1 — Temporal Clustering
94
- // ═══════════════════════════════════════════════════════════════════════════════
95
-
96
- /**
97
- * Test whether token arrival times follow a Poisson process (organic)
98
- * or show burst patterns (coordinated).
99
- *
100
- * Method: bucket timestamps into 1s intervals, compute chi-squared
101
- * statistic against expected Poisson rate. High chi-squared = bursty.
102
- *
103
- * @param {number[]} timestamps - sorted epoch-ms values
104
- * @param {object} [opts]
105
- * @param {number} [opts.bucketMs=1000]
106
- * @returns {{ score: number, burstRatio: number, chi2: number, pBursty: number }}
107
- */
108
- export function testTemporalClustering(timestamps, opts = {}) {
109
- const n = timestamps.length;
110
- if (n < 5) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
111
-
112
- const bucketMs = opts.bucketMs ?? 1000;
113
- const tMin = timestamps[0];
114
- const tMax = timestamps[n - 1];
115
- const span = tMax - tMin;
116
- if (span < bucketMs) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
117
-
118
- const numBuckets = Math.ceil(span / bucketMs);
119
- const buckets = new Array(numBuckets).fill(0);
120
- for (const t of timestamps) {
121
- const idx = Math.min(numBuckets - 1, Math.floor((t - tMin) / bucketMs));
122
- buckets[idx]++;
123
- }
124
-
125
- // Expected count per bucket under uniform Poisson
126
- const expected = n / numBuckets;
127
- let chi2 = 0;
128
- for (const obs of buckets) {
129
- chi2 += (obs - expected) ** 2 / expected;
130
- }
131
-
132
- // Burst ratio: fraction of tokens in the densest 10% of buckets
133
- const sorted = [...buckets].sort((a, b) => b - a);
134
- const top10pct = Math.max(1, Math.ceil(numBuckets * 0.1));
135
- const burstRatio = sorted.slice(0, top10pct).reduce((s, v) => s + v, 0) / n;
136
-
137
- // Normalize chi2 to score: higher chi2 = more coordinated
138
- // df = numBuckets - 1; chi2/df >> 1 means non-Poisson
139
- const chi2Norm = chi2 / Math.max(1, numBuckets - 1);
140
- // chi2Norm < 2 is consistent with Poisson; > 5 is very bursty
141
- const pBursty = clamp((chi2Norm - 1.5) / 4, 0, 1);
142
-
143
- // Score: 0 = organic, 100 = coordinated
144
- const score = Math.round(clamp(
145
- pBursty * 60 + (burstRatio > 0.5 ? 40 : burstRatio * 80),
146
- 0, 100
147
- ));
148
-
149
- return { score, burstRatio: +burstRatio.toFixed(4), chi2: +chi2.toFixed(2), chi2Norm: +chi2Norm.toFixed(3), pBursty: +pBursty.toFixed(4) };
150
- }
151
-
152
- // ═══════════════════════════════════════════════════════════════════════════════
153
- // Layer 2 — Signal Fingerprint Collision
154
- // ═══════════════════════════════════════════════════════════════════════════════
155
-
156
- /**
157
- * Compute fingerprint collision rate across a token cohort.
158
- *
159
- * Each token produces a discrete fingerprint from its physics signals.
160
- * Real users: high cardinality (many unique fingerprints).
161
- * Bot farm: low cardinality (cloned environments produce duplicates).
162
- *
163
- * @param {object[]} tokens - array of token objects
164
- * @param {object} [opts]
165
- * @param {Function} [opts.fingerprint] - custom fingerprint fn(token) → string
166
- * @returns {{ score: number, uniqueRatio: number, topCollision: number, uniqueCount: number }}
167
- */
168
- export function testFingerprintCollision(tokens, opts = {}) {
169
- const n = tokens.length;
170
- if (n < 5) return { score: 0, uniqueRatio: 1, topCollision: 0, uniqueCount: n };
171
-
172
- const fp = opts.fingerprint ?? defaultFingerprint;
173
- const counts = new Map();
174
-
175
- for (const token of tokens) {
176
- const key = fp(token);
177
- counts.set(key, (counts.get(key) ?? 0) + 1);
178
- }
179
-
180
- const uniqueCount = counts.size;
181
- const uniqueRatio = uniqueCount / n;
182
-
183
- // Largest collision cluster
184
- let topCollision = 0;
185
- for (const c of counts.values()) {
186
- if (c > topCollision) topCollision = c;
187
- }
188
- const topRatio = topCollision / n;
189
-
190
- // Score: low unique ratio = coordinated
191
- // Real users: uniqueRatio > 0.40 for n > 50
192
- // Farms: uniqueRatio < 0.10
193
- let score;
194
- if (uniqueRatio > 0.40) score = Math.round(clamp((0.60 - uniqueRatio) / 0.20 * 30, 0, 30));
195
- else if (uniqueRatio > 0.15) score = Math.round(30 + (0.40 - uniqueRatio) / 0.25 * 40);
196
- else score = Math.round(70 + (0.15 - uniqueRatio) / 0.15 * 30);
197
-
198
- // Top collision bonus: if one fingerprint holds > 30% of tokens
199
- if (topRatio > 0.30) score = Math.min(100, score + Math.round((topRatio - 0.30) * 60));
200
-
201
- return {
202
- score: clamp(score, 0, 100),
203
- uniqueRatio: +uniqueRatio.toFixed(4),
204
- topCollision,
205
- topRatio: +topRatio.toFixed(4),
206
- uniqueCount,
207
- };
208
- }
209
-
210
- /**
211
- * Default fingerprint: discretize thermal label + entropy band + motor band.
212
- */
213
- function defaultFingerprint(token) {
214
- const idle = token.idle ?? token.signals?.idle ?? {};
215
- const hw = token.hw ?? token.signals?.entropy ?? {};
216
- const evt = token.evt ?? token.signals?.motor ?? {};
217
-
218
- const thermal = idle.therm ?? idle.thermalTransition ?? 'unknown';
219
- const entropy = band(hw.ent ?? hw.score ?? 0.5, 0, 1, 10);
220
- const motor = band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, 5);
221
- const dramLabel = hw.dram ?? idle.dMs ?? 'unknown';
222
-
223
- return `${thermal}:${entropy}:${motor}:${dramLabel}`;
224
- }
225
-
226
- // ═══════════════════════════════════════════════════════════════════════════════
227
- // Layer 3 — Drift Fingerprinting
228
- // ═══════════════════════════════════════════════════════════════════════════════
229
-
230
- /**
231
- * Detect clock drift convergence across devices.
232
- *
233
- * Every crystal oscillator drifts at a unique rate (20–100 ppm).
234
- * Over multiple submissions, drift accumulates into a device-specific
235
- * signature. If "different devices" share the same drift rate, they're
236
- * the same physical hardware behind a rotation proxy.
237
- *
238
- * Input: array of device submission histories.
239
- * Each device: { id: string, submissions: [{ ts: number, serverTs: number }] }
240
- *
241
- * The delta between client timestamp and server timestamp grows linearly
242
- * at the drift rate. Linear regression on (serverTs, clientTs - serverTs)
243
- * gives the slope = drift rate in ms/s.
244
- *
245
- * @param {object[]} devices - [{ id, submissions: [{ ts, serverTs }] }]
246
- * @param {object} [opts]
247
- * @param {number} [opts.driftBinPpm=5] - drift rate bin width in ppm
248
- * @param {number} [opts.minSubmissions=3] - min submissions per device
249
- * @param {number} [opts.collisionThreshold=0.30] - fraction triggering flag
250
- * @returns {{ score: number, driftRates: Map, largestCluster: number, clusterRatio: number }}
251
- */
252
- export function testDriftFingerprint(devices, opts = {}) {
253
- const driftBinPpm = opts.driftBinPpm ?? 5;
254
- const minSubmissions = opts.minSubmissions ?? 3;
255
- const collisionThresh = opts.collisionThreshold ?? 0.30;
256
-
257
- // Compute drift rate per device via linear regression
258
- const rates = [];
259
- const rateMap = new Map();
260
-
261
- for (const dev of devices) {
262
- const subs = dev.submissions;
263
- if (!subs || subs.length < minSubmissions) continue;
264
-
265
- const drift = computeDriftRate(subs);
266
- if (drift === null) continue;
267
-
268
- rates.push(drift);
269
- rateMap.set(dev.id, drift);
270
- }
271
-
272
- if (rates.length < 3) {
273
- return { score: 0, driftRates: rateMap, largestCluster: 0, clusterRatio: 0, totalDevices: rates.length };
274
- }
275
-
276
- // Bin drift rates and find collision clusters
277
- const bins = new Map();
278
- for (const rate of rates) {
279
- // Convert ms/s drift to ppm, then bin
280
- const ppm = rate * 1000; // ms/s → μs/s ≈ ppm
281
- const binKey = Math.round(ppm / driftBinPpm) * driftBinPpm;
282
- bins.set(binKey, (bins.get(binKey) ?? 0) + 1);
283
- }
284
-
285
- let largestCluster = 0;
286
- for (const c of bins.values()) {
287
- if (c > largestCluster) largestCluster = c;
288
- }
289
-
290
- const clusterRatio = largestCluster / rates.length;
291
- const uniqueBins = bins.size;
292
- const expectedBins = Math.min(rates.length, Math.ceil(80 / driftBinPpm)); // ~80 ppm range
293
-
294
- // Score: high cluster ratio + few unique bins = same hardware
295
- let score = 0;
296
- if (clusterRatio >= collisionThresh) {
297
- score += Math.round((clusterRatio - collisionThresh + 0.05) / (1 - collisionThresh) * 60);
298
- }
299
- if (uniqueBins < expectedBins * 0.5) {
300
- score += Math.round((1 - uniqueBins / expectedBins) * 40);
301
- }
302
-
303
- return {
304
- score: clamp(score, 0, 100),
305
- driftRates: rateMap,
306
- largestCluster,
307
- clusterRatio: +clusterRatio.toFixed(4),
308
- uniqueBins,
309
- totalDevices: rates.length,
310
- };
311
- }
312
-
313
- /**
314
- * Compute clock drift rate from a series of submissions via linear regression.
315
- * Returns drift in ms/s (slope of client-server offset over time).
316
- *
317
- * @param {object[]} subs - [{ ts: clientEpochMs, serverTs: serverEpochMs }]
318
- * @returns {number|null} drift rate in ms/s, or null if insufficient data
319
- */
320
- export function computeDriftRate(subs) {
321
- if (subs.length < 2) return null;
322
-
323
- // x = server time (seconds from first), y = client-server offset (ms)
324
- const t0 = subs[0].serverTs;
325
- const xs = subs.map(s => (s.serverTs - t0) / 1000);
326
- const ys = subs.map(s => s.ts - s.serverTs);
327
-
328
- const n = xs.length;
329
- const xm = mean(xs);
330
- const ym = mean(ys);
331
-
332
- let num = 0, den = 0;
333
- for (let i = 0; i < n; i++) {
334
- num += (xs[i] - xm) * (ys[i] - ym);
335
- den += (xs[i] - xm) ** 2;
336
- }
337
-
338
- if (den === 0) return null;
339
- return num / den; // ms/s drift rate
340
- }
341
-
342
- // ═══════════════════════════════════════════════════════════════════════════════
343
- // Layer 4 — Mutual Information Matrix
344
- // ═══════════════════════════════════════════════════════════════════════════════
345
-
346
- /**
347
- * Compute pairwise mutual information across signal dimensions,
348
- * then detect community structure via greedy modularity (Louvain-lite).
349
- *
350
- * Each token is projected into a discrete signal vector:
351
- * [entropy_band, thermal_label, motor_band, idle_band, enf_band]
352
- *
353
- * MI between device i and device j = how much i's signal vector
354
- * tells you about j's. Organic: MI matrix is sparse. Bot farm:
355
- * block-diagonal (cliques).
356
- *
357
- * @param {object[]} tokens
358
- * @param {object} [opts]
359
- * @param {number} [opts.bins=8] - discretization bins per continuous signal
360
- * @returns {{ score: number, communities: number, largestCommunity: number, communityRatio: number, modularity: number }}
361
- */
362
- export function testMutualInformation(tokens, opts = {}) {
363
- const n = tokens.length;
364
- if (n < 10) return { score: 0, communities: 1, largestCommunity: n, communityRatio: 1, modularity: 0 };
365
-
366
- const bins = opts.bins ?? 8;
367
-
368
- // Project each token into a discrete signal vector
369
- const vectors = tokens.map(t => tokenToVector(t, bins));
370
-
371
- // Build similarity matrix (cosine similarity of signal vectors)
372
- // For efficiency with large n, use fingerprint bucketing instead of O(n²)
373
- const { adjacency, edges } = buildSimilarityGraph(vectors, 0.7);
374
-
375
- if (edges === 0) {
376
- // No similar pairs — fully organic
377
- return { score: 0, communities: n, largestCommunity: 1, communityRatio: 1 / n, modularity: 0 };
378
- }
379
-
380
- // Run Louvain-lite community detection
381
- const { communities, modularity } = louvainLite(adjacency, n);
382
-
383
- // Count community sizes
384
- const sizes = new Map();
385
- for (const c of communities) {
386
- sizes.set(c, (sizes.get(c) ?? 0) + 1);
387
- }
388
-
389
- let largestCommunity = 0;
390
- for (const s of sizes.values()) {
391
- if (s > largestCommunity) largestCommunity = s;
392
- }
393
-
394
- const communityRatio = largestCommunity / n;
395
- const numCommunities = sizes.size;
396
-
397
- // Score: large dominant community = coordinated
398
- let score = 0;
399
- if (communityRatio > 0.4) {
400
- score += Math.round((communityRatio - 0.4) * 100);
401
- }
402
- // Few communities relative to n = low diversity
403
- const expectedCommunities = Math.sqrt(n); // organic rough estimate
404
- if (numCommunities < expectedCommunities * 0.5) {
405
- score += Math.round((1 - numCommunities / expectedCommunities) * 30);
406
- }
407
- // High modularity with large community = structured coordination
408
- if (modularity > 0.3 && communityRatio > 0.3) {
409
- score += 20;
410
- }
411
-
412
- return {
413
- score: clamp(score, 0, 100),
414
- communities: numCommunities,
415
- largestCommunity,
416
- communityRatio: +communityRatio.toFixed(4),
417
- modularity: +modularity.toFixed(4),
418
- };
419
- }
420
-
421
- /**
422
- * Project a token into a discrete signal vector for MI computation.
423
- */
424
- function tokenToVector(token, bins) {
425
- const idle = token.idle ?? token.signals?.idle ?? {};
426
- const hw = token.hw ?? token.signals?.entropy ?? {};
427
- const evt = token.evt ?? token.signals?.motor ?? {};
428
- const enf = hw.enfDev ?? token.enfDev ?? 0;
429
-
430
- return [
431
- band(hw.ent ?? hw.score ?? 0.5, 0, 1, bins),
432
- thermalToIndex(idle.therm ?? idle.thermalTransition ?? 'unknown'),
433
- band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, bins),
434
- band(idle.dMs ?? idle.s ?? 0, 0, 300, bins), // idle duration in seconds
435
- band(enf, -0.05, 0.05, bins),
436
- ];
437
- }
438
-
439
- const THERMAL_MAP = { hot_to_cold: 0, cold: 1, cooling: 2, warming: 3, sustained_hot: 4, step_function: 5, unknown: 6 };
440
- function thermalToIndex(label) { return THERMAL_MAP[label] ?? 6; }
441
-
442
- /**
443
- * Build a similarity graph from signal vectors using fingerprint bucketing.
444
- * O(n * k) where k = average bucket size, instead of O(n²).
445
- */
446
- function buildSimilarityGraph(vectors, threshold) {
447
- const n = vectors.length;
448
- const adjacency = new Array(n).fill(null).map(() => []);
449
- let edges = 0;
450
-
451
- // Bucket by concatenated vector (exact match = definitely similar)
452
- const buckets = new Map();
453
- for (let i = 0; i < n; i++) {
454
- const key = vectors[i].join(',');
455
- if (!buckets.has(key)) buckets.set(key, []);
456
- buckets.get(key).push(i);
457
- }
458
-
459
- // Exact matches
460
- for (const group of buckets.values()) {
461
- for (let a = 0; a < group.length; a++) {
462
- for (let b = a + 1; b < group.length; b++) {
463
- adjacency[group[a]].push(group[b]);
464
- adjacency[group[b]].push(group[a]);
465
- edges++;
466
- }
467
- }
468
- }
469
-
470
- // Near matches: check Hamming distance 1 between bucket keys
471
- const keys = [...buckets.keys()];
472
- for (let a = 0; a < keys.length; a++) {
473
- const va = keys[a].split(',').map(Number);
474
- for (let b = a + 1; b < keys.length; b++) {
475
- const vb = keys[b].split(',').map(Number);
476
- let dist = 0;
477
- for (let d = 0; d < va.length; d++) {
478
- if (va[d] !== vb[d]) dist++;
479
- }
480
- if (dist <= 1) {
481
- // Connect all pairs between these two buckets
482
- const ga = buckets.get(keys[a]);
483
- const gb = buckets.get(keys[b]);
484
- for (const i of ga) {
485
- for (const j of gb) {
486
- adjacency[i].push(j);
487
- adjacency[j].push(i);
488
- edges++;
489
- }
490
- }
491
- }
492
- }
493
- }
494
-
495
- return { adjacency, edges };
496
- }
497
-
498
- /**
499
- * Louvain-lite: greedy modularity maximization.
500
- * Simplified single-pass version for real-time use.
501
- * Returns community assignments and modularity score.
502
- */
503
- function louvainLite(adjacency, n) {
504
- // Initialize: each node in its own community
505
- const comm = new Array(n);
506
- for (let i = 0; i < n; i++) comm[i] = i;
507
-
508
- // Compute total edges (2m)
509
- let twoM = 0;
510
- for (let i = 0; i < n; i++) twoM += adjacency[i].length;
511
- if (twoM === 0) return { communities: comm, modularity: 0 };
512
-
513
- // Degree of each node
514
- const deg = adjacency.map(a => a.length);
515
-
516
- // Single pass: try to move each node to its best neighbor's community
517
- let changed = true;
518
- let passes = 0;
519
- while (changed && passes < 10) {
520
- changed = false;
521
- passes++;
522
- for (let i = 0; i < n; i++) {
523
- if (adjacency[i].length === 0) continue;
524
-
525
- // Count edges to each neighboring community
526
- const commEdges = new Map();
527
- for (const j of adjacency[i]) {
528
- const c = comm[j];
529
- commEdges.set(c, (commEdges.get(c) ?? 0) + 1);
530
- }
531
-
532
- // Find best community (highest modularity gain)
533
- let bestComm = comm[i];
534
- let bestDelta = 0;
535
-
536
- for (const [c, eic] of commEdges) {
537
- if (c === comm[i]) continue;
538
- // Simplified modularity delta
539
- const delta = eic / twoM - (deg[i] * communityDegree(comm, deg, c, n)) / (twoM * twoM);
540
- if (delta > bestDelta) {
541
- bestDelta = delta;
542
- bestComm = c;
543
- }
544
- }
545
-
546
- if (bestComm !== comm[i]) {
547
- comm[i] = bestComm;
548
- changed = true;
549
- }
550
- }
551
- }
552
-
553
- // Compute modularity
554
- let Q = 0;
555
- for (let i = 0; i < n; i++) {
556
- for (const j of adjacency[i]) {
557
- if (comm[i] === comm[j]) {
558
- Q += 1 - (deg[i] * deg[j]) / twoM;
559
- }
560
- }
561
- }
562
- Q /= twoM;
563
-
564
- return { communities: comm, modularity: Math.max(0, Q) };
565
- }
566
-
567
- function communityDegree(comm, deg, c, n) {
568
- let sum = 0;
569
- for (let i = 0; i < n; i++) {
570
- if (comm[i] === c) sum += deg[i];
571
- }
572
- return sum;
573
- }
574
-
575
- // ═══════════════════════════════════════════════════════════════════════════════
576
- // Layer 5 — Entropy Velocity
577
- // ═══════════════════════════════════════════════════════════════════════════════
578
-
579
- /**
580
- * Measure entropy growth rate vs traffic growth rate.
581
- *
582
- * Organic growth: each new user adds a unique signal profile → entropy
583
- * increases proportionally to log2(n).
584
- *
585
- * Bot deployment: traffic increases but entropy plateaus or grows slower
586
- * than expected (cloned profiles add volume without diversity).
587
- *
588
- * Method: split the token window into temporal slices, compute Shannon
589
- * entropy of the signal fingerprint distribution in each slice, then
590
- * measure dH/dt vs dn/dt.
591
- *
592
- * @param {object[]} tokens - chronologically sorted
593
- * @param {object} [opts]
594
- * @param {number} [opts.slices=5] - number of temporal slices
595
- * @returns {{ score: number, velocityRatio: number, entropySlices: number[], countSlices: number[] }}
596
- */
597
- export function testEntropyVelocity(tokens, opts = {}) {
598
- const n = tokens.length;
599
- const numSlices = opts.slices ?? 5;
600
- if (n < numSlices * 3) return { score: 0, velocityRatio: 1, entropySlices: [], countSlices: [] };
601
-
602
- // Split tokens into temporal slices
603
- const sliceSize = Math.ceil(n / numSlices);
604
- const entropySlices = [];
605
- const countSlices = [];
606
- const cumulativeEntropies = [];
607
-
608
- for (let s = 0; s < numSlices; s++) {
609
- const start = 0; // cumulative each slice includes all previous tokens
610
- const end = Math.min(n, (s + 1) * sliceSize);
611
- const slice = tokens.slice(start, end);
612
-
613
- // Compute Shannon entropy of fingerprint distribution
614
- const fps = new Map();
615
- for (const t of slice) {
616
- const key = defaultFingerprint(t);
617
- fps.set(key, (fps.get(key) ?? 0) + 1);
618
- }
619
- const H = shannonEntropy([...fps.values()]);
620
- entropySlices.push(+H.toFixed(4));
621
- countSlices.push(end);
622
- cumulativeEntropies.push(H);
623
- }
624
-
625
- // Expected entropy growth: H_expected ≈ log2(unique_count) grows as log2(n)
626
- // For organic traffic, H should grow roughly as log2(n) / log2(N_total)
627
- // Measure: ratio of actual entropy growth to expected
628
- const H_first = cumulativeEntropies[0];
629
- const H_last = cumulativeEntropies[cumulativeEntropies.length - 1];
630
- const n_first = countSlices[0];
631
- const n_last = countSlices[countSlices.length - 1];
632
-
633
- if (n_first === n_last) {
634
- return { score: 0, velocityRatio: 1, entropySlices, countSlices };
635
- }
636
-
637
- // If entropy is near-zero throughout — extremely low diversity = coordinated
638
- if (H_last < 0.5 && n_last >= 20) {
639
- return { score: Math.round(clamp(80 + (0.5 - H_last) * 40, 80, 100)), velocityRatio: 0, entropySlices, countSlices };
640
- }
641
-
642
- if (H_first < 0.01) {
643
- // First slice has no diversity — use absolute entropy check
644
- const expectedH = Math.log2(Math.max(2, n_last * 0.3)); // expected for organic
645
- const ratio = H_last / expectedH;
646
- return {
647
- score: Math.round(clamp((1 - ratio) * 100, 0, 100)),
648
- velocityRatio: +ratio.toFixed(4),
649
- entropySlices,
650
- countSlices,
651
- };
652
- }
653
-
654
- // Expected entropy ratio based on log growth
655
- const expectedGrowth = Math.log2(n_last) / Math.log2(n_first);
656
- const actualGrowth = H_last / Math.max(0.01, H_first);
657
- const velocityRatio = actualGrowth / expectedGrowth;
658
-
659
- // velocityRatio < 0.6 means entropy isn't keeping up with traffic = artificial
660
- // velocityRatio 1.0 means organic
661
- // velocityRatio > 1.2 could mean natural diversification
662
- let score;
663
- if (velocityRatio >= 0.8) {
664
- score = Math.round(clamp((1.0 - velocityRatio) * 50, 0, 20));
665
- } else if (velocityRatio >= 0.5) {
666
- score = Math.round(20 + (0.8 - velocityRatio) / 0.3 * 50);
667
- } else {
668
- score = Math.round(70 + (0.5 - velocityRatio) / 0.5 * 30);
669
- }
670
-
671
- return {
672
- score: clamp(score, 0, 100),
673
- velocityRatio: +velocityRatio.toFixed(4),
674
- entropySlices,
675
- countSlices,
676
- };
677
- }
678
-
679
- // ═══════════════════════════════════════════════════════════════════════════════
680
- // Orchestrator Coordinated Behavior Analysis
681
- // ═══════════════════════════════════════════════════════════════════════════════
682
-
683
- /**
684
- * Full coordination analysis across all 5 layers.
685
- *
686
- * @param {object[]} tokens - engagement tokens with physics signals
687
- * @param {object} [opts]
688
- * @param {object[]} [opts.devices] - device submission histories for drift analysis
689
- * @param {number} [opts.windowMs] - analysis window (default: 60000ms)
690
- * @returns {CoordinationResult}
691
- */
692
- export function analyseCoordination(tokens, opts = {}) {
693
- const n = tokens.length;
694
- if (n < 5) {
695
- return {
696
- coordinationScore: 0,
697
- verdict: 'insufficient_data',
698
- confidence: 0,
699
- layers: {},
700
- tokenCount: n,
701
- };
702
- }
703
-
704
- // Extract timestamps
705
- const timestamps = tokens.map(t =>
706
- t.iat ?? t.timestamp ?? t.ts ?? Date.now()
707
- ).sort((a, b) => a - b);
708
-
709
- // ── Layer 1: Temporal Clustering ──
710
- const temporal = testTemporalClustering(timestamps);
711
-
712
- // ── Layer 2: Fingerprint Collision ──
713
- const fingerprint = testFingerprintCollision(tokens);
714
-
715
- // ── Layer 3: Drift Fingerprinting ──
716
- const devices = opts.devices ?? [];
717
- const drift = devices.length >= 3
718
- ? testDriftFingerprint(devices)
719
- : { score: 0, totalDevices: 0 };
720
-
721
- // ── Layer 4: Mutual Information ──
722
- const mi = testMutualInformation(tokens);
723
-
724
- // ── Layer 5: Entropy Velocity ──
725
- const velocity = testEntropyVelocity(tokens);
726
-
727
- // ── Weighted Fusion ──
728
- // Weights reflect each layer's discriminative power and evasion cost
729
- const weights = {
730
- temporal: 0.15, // easy to randomize, but still catches lazy farms
731
- fingerprint: 0.25, // hard to fake without real hardware diversity
732
- drift: 0.15, // only fires with multi-submission data
733
- mi: 0.25, // hardest to evade requires true independence
734
- velocity: 0.20, // catches mass deployment timing
735
- };
736
-
737
- // If no drift data, redistribute weight
738
- const hasDrift = drift.totalDevices >= 3;
739
- const effectiveWeights = hasDrift ? weights : {
740
- temporal: 0.18,
741
- fingerprint: 0.28,
742
- drift: 0,
743
- mi: 0.30,
744
- velocity: 0.24,
745
- };
746
-
747
- const raw =
748
- temporal.score * effectiveWeights.temporal +
749
- fingerprint.score * effectiveWeights.fingerprint +
750
- drift.score * effectiveWeights.drift +
751
- mi.score * effectiveWeights.mi +
752
- velocity.score * effectiveWeights.velocity;
753
-
754
- const coordinationScore = Math.round(clamp(raw, 0, 100));
755
-
756
- // Confidence: higher with more tokens and more layers contributing
757
- const activeLayers = [temporal, fingerprint, mi, velocity].filter(l => l.score > 0).length + (hasDrift ? 1 : 0);
758
- const confidence = clamp(
759
- (Math.min(n, 100) / 100) * 0.5 + (activeLayers / 5) * 0.5,
760
- 0, 1
761
- );
762
-
763
- // Verdict
764
- let verdict;
765
- if (coordinationScore >= 70) verdict = 'coordinated_inauthentic';
766
- else if (coordinationScore >= 45) verdict = 'suspicious_coordination';
767
- else if (coordinationScore >= 25) verdict = 'low_coordination';
768
- else verdict = 'organic';
769
-
770
- // Advisory flags
771
- const flags = [];
772
- if (temporal.score >= 60) flags.push('BURST_ARRIVAL_PATTERN');
773
- if (fingerprint.score >= 60) flags.push('LOW_FINGERPRINT_DIVERSITY');
774
- if (drift.score >= 50) flags.push('CLOCK_DRIFT_CONVERGENCE');
775
- if (mi.score >= 50) flags.push('SIGNAL_CLIQUE_DETECTED');
776
- if (velocity.score >= 50) flags.push('ENTROPY_GROWTH_STALLED');
777
-
778
- return {
779
- coordinationScore,
780
- verdict,
781
- confidence: +confidence.toFixed(3),
782
- flags,
783
- layers: {
784
- temporal,
785
- fingerprint,
786
- drift: hasDrift ? drift : { score: 0, skipped: true, reason: 'insufficient_device_history' },
787
- mutualInformation: mi,
788
- entropyVelocity: velocity,
789
- },
790
- tokenCount: n,
791
- weights: effectiveWeights,
792
- };
793
- }
794
-
795
- /**
796
- * @typedef {object} CoordinationResult
797
- * @property {number} coordinationScore - 0–100, higher = more coordinated
798
- * @property {string} verdict - 'organic' | 'low_coordination' | 'suspicious_coordination' | 'coordinated_inauthentic'
799
- * @property {number} confidence - 0–1, based on token count and active layers
800
- * @property {string[]} flags - advisory flags for specific signals
801
- * @property {object} layers - per-layer results
802
- * @property {number} tokenCount - number of tokens analyzed
803
- * @property {object} weights - effective layer weights used
804
- */
1
+ /**
2
+ * @svrnsec/pulse — Coordinated Inauthentic Behavior Detection
3
+ *
4
+ * Detects bot armies, click farms, and Sybil networks by analyzing
5
+ * physics-layer correlations that coordination inevitably creates.
6
+ *
7
+ * Core insight:
8
+ * Two real users in different cities have ZERO mutual information
9
+ * between their thermal curves, clock drift rates, ENF phases, and
10
+ * idle durations. Bot farms can randomize any ONE signal but cannot
11
+ * independently decorrelate ALL signals simultaneously — they're
12
+ * bound to shared physics (same room, same hardware, same scripts).
13
+ *
14
+ * Five detection layers:
15
+ *
16
+ * Layer 1 — Temporal Clustering
17
+ * Real users arrive via Poisson process. Bot armies arrive in bursts
18
+ * from a command server. Chi-squared test on 1s-bucket histogram.
19
+ *
20
+ * Layer 2 — Signal Fingerprint Collision
21
+ * Hash (thermal_label, entropy_band, motor_band) per token. Real
22
+ * cohort: high cardinality. Bot farm: < 20 unique fingerprints
23
+ * across 500 tokens.
24
+ *
25
+ * Layer 3 — Drift Fingerprinting
26
+ * Crystal oscillator imperfection (20–100 ppm) creates a unique
27
+ * clock drift rate per physical device. Multiple submissions from
28
+ * "different devices" that converge on the same drift rate = same
29
+ * hardware behind a rotation proxy. Survives IP/account/browser
30
+ * rotation.
31
+ *
32
+ * Layer 4 — Mutual Information Matrix
33
+ * Pairwise MI across all signal dimensions. Organic traffic: sparse
34
+ * random MI matrix. Bot traffic: block-diagonal structure (cliques).
35
+ * Louvain community detection finds the cliques in O(n log n).
36
+ *
37
+ * Layer 5 — Entropy Velocity
38
+ * Track dH/dt — the rate of Shannon entropy growth in the signal
39
+ * space. Organic growth adds unique profiles; bot deployment adds
40
+ * volume without diversity. The ratio (observed dH/dt) / (expected)
41
+ * catches mass deployment even with real hardware.
42
+ *
43
+ * Computational cost:
44
+ * All operations are O(n) or O(n log n). 500 tokens ≈ 8ms total
45
+ * on a single CPU core. No ML, no GPU, no training data.
46
+ *
47
+ * Usage:
48
+ * import { analyseCoordination } from '@svrnsec/pulse/coordination'
49
+ * const result = analyseCoordination(tokens, { windowMs: 60000 })
50
+ */
51
+
52
+ // ═══════════════════════════════════════════════════════════════════════════════
53
+ // Utility
54
+ // ═══════════════════════════════════════════════════════════════════════════════
55
+
56
+ function mean(a) { return a.length === 0 ? 0 : a.reduce((s, v) => s + v, 0) / a.length; }
57
+ function variance(a) { const m = mean(a); return a.length < 2 ? 0 : a.reduce((s, v) => s + (v - m) ** 2, 0) / (a.length - 1); }
58
+ function stddev(a) { return Math.sqrt(variance(a)); }
59
+ function cv(a) { const m = mean(a); return m === 0 ? 0 : stddev(a) / m; }
60
+ function clamp(v, lo, hi) { return Math.max(lo, Math.min(hi, v)); }
61
+
62
+ /**
63
+ * Shannon entropy of a discrete distribution (array of counts).
64
+ * Returns bits.
65
+ */
66
+ function shannonEntropy(counts) {
67
+ const total = counts.reduce((s, c) => s + c, 0);
68
+ if (total === 0) return 0;
69
+ let H = 0;
70
+ for (const c of counts) {
71
+ if (c > 0) {
72
+ const p = c / total;
73
+ H -= p * Math.log2(p);
74
+ }
75
+ }
76
+ return H;
77
+ }
78
+
79
+ /**
80
+ * Discretize a continuous value into a band index.
81
+ * @param {number} v - value
82
+ * @param {number} lo - band floor
83
+ * @param {number} hi - band ceiling
84
+ * @param {number} bins - number of bins
85
+ * @returns {number} bin index [0, bins-1]
86
+ */
87
+ function band(v, lo, hi, bins) {
88
+ if (hi === lo) return 0;
89
+ return Math.min(bins - 1, Math.max(0, Math.floor(((v - lo) / (hi - lo)) * bins)));
90
+ }
91
+
92
+ // ═══════════════════════════════════════════════════════════════════════════════
93
+ // Layer 1 — Temporal Clustering
94
+ // ═══════════════════════════════════════════════════════════════════════════════
95
+
96
+ /**
97
+ * Test whether token arrival times follow a Poisson process (organic)
98
+ * or show burst patterns (coordinated).
99
+ *
100
+ * Method: bucket timestamps into 1s intervals, compute chi-squared
101
+ * statistic against expected Poisson rate. High chi-squared = bursty.
102
+ *
103
+ * @param {number[]} timestamps - sorted epoch-ms values
104
+ * @param {object} [opts]
105
+ * @param {number} [opts.bucketMs=1000]
106
+ * @returns {{ score: number, burstRatio: number, chi2: number, pBursty: number }}
107
+ */
108
+ export function testTemporalClustering(timestamps, opts = {}) {
109
+ const n = timestamps.length;
110
+ if (n < 5) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
111
+
112
+ const bucketMs = opts.bucketMs ?? 1000;
113
+ const tMin = timestamps[0];
114
+ const tMax = timestamps[n - 1];
115
+ const span = tMax - tMin;
116
+ if (span < bucketMs) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
117
+
118
+ const numBuckets = Math.ceil(span / bucketMs);
119
+ const buckets = new Array(numBuckets).fill(0);
120
+ for (const t of timestamps) {
121
+ const idx = Math.min(numBuckets - 1, Math.floor((t - tMin) / bucketMs));
122
+ buckets[idx]++;
123
+ }
124
+
125
+ // Expected count per bucket under uniform Poisson
126
+ const expected = n / numBuckets;
127
+ let chi2 = 0;
128
+ for (const obs of buckets) {
129
+ chi2 += (obs - expected) ** 2 / expected;
130
+ }
131
+
132
+ // Burst ratio: fraction of tokens in the densest 10% of buckets
133
+ const sorted = [...buckets].sort((a, b) => b - a);
134
+ const top10pct = Math.max(1, Math.ceil(numBuckets * 0.1));
135
+ const burstRatio = sorted.slice(0, top10pct).reduce((s, v) => s + v, 0) / n;
136
+
137
+ // Normalize chi2 to score: higher chi2 = more coordinated
138
+ // df = numBuckets - 1; chi2/df >> 1 means non-Poisson
139
+ const chi2Norm = chi2 / Math.max(1, numBuckets - 1);
140
+ // chi2Norm < 2 is consistent with Poisson; > 5 is very bursty
141
+ const pBursty = clamp((chi2Norm - 1.5) / 4, 0, 1);
142
+
143
+ // Score: 0 = organic, 100 = coordinated
144
+ const score = Math.round(clamp(
145
+ pBursty * 60 + (burstRatio > 0.5 ? 40 : burstRatio * 80),
146
+ 0, 100
147
+ ));
148
+
149
+ return { score, burstRatio: +burstRatio.toFixed(4), chi2: +chi2.toFixed(2), chi2Norm: +chi2Norm.toFixed(3), pBursty: +pBursty.toFixed(4) };
150
+ }
151
+
152
+ // ═══════════════════════════════════════════════════════════════════════════════
153
+ // Layer 2 — Signal Fingerprint Collision
154
+ // ═══════════════════════════════════════════════════════════════════════════════
155
+
156
+ /**
157
+ * Compute fingerprint collision rate across a token cohort.
158
+ *
159
+ * Each token produces a discrete fingerprint from its physics signals.
160
+ * Real users: high cardinality (many unique fingerprints).
161
+ * Bot farm: low cardinality (cloned environments produce duplicates).
162
+ *
163
+ * @param {object[]} tokens - array of token objects
164
+ * @param {object} [opts]
165
+ * @param {Function} [opts.fingerprint] - custom fingerprint fn(token) → string
166
+ * @returns {{ score: number, uniqueRatio: number, topCollision: number, uniqueCount: number }}
167
+ */
168
+ export function testFingerprintCollision(tokens, opts = {}) {
169
+ const n = tokens.length;
170
+ if (n < 5) return { score: 0, uniqueRatio: 1, topCollision: 0, uniqueCount: n };
171
+
172
+ const fp = opts.fingerprint ?? defaultFingerprint;
173
+ const counts = new Map();
174
+
175
+ for (const token of tokens) {
176
+ const key = fp(token);
177
+ counts.set(key, (counts.get(key) ?? 0) + 1);
178
+ }
179
+
180
+ const uniqueCount = counts.size;
181
+ const uniqueRatio = uniqueCount / n;
182
+
183
+ // Largest collision cluster
184
+ let topCollision = 0;
185
+ for (const c of counts.values()) {
186
+ if (c > topCollision) topCollision = c;
187
+ }
188
+ const topRatio = topCollision / n;
189
+
190
+ // Score: low unique ratio = coordinated
191
+ // Real users: uniqueRatio > 0.40 for n > 50
192
+ // Farms: uniqueRatio < 0.10
193
+ let score;
194
+ if (uniqueRatio > 0.40) score = Math.round(clamp((0.60 - uniqueRatio) / 0.20 * 30, 0, 30));
195
+ else if (uniqueRatio > 0.15) score = Math.round(30 + (0.40 - uniqueRatio) / 0.25 * 40);
196
+ else score = Math.round(70 + (0.15 - uniqueRatio) / 0.15 * 30);
197
+
198
+ // Top collision bonus: if one fingerprint holds > 30% of tokens
199
+ if (topRatio > 0.30) score = Math.min(100, score + Math.round((topRatio - 0.30) * 60));
200
+
201
+ return {
202
+ score: clamp(score, 0, 100),
203
+ uniqueRatio: +uniqueRatio.toFixed(4),
204
+ topCollision,
205
+ topRatio: +topRatio.toFixed(4),
206
+ uniqueCount,
207
+ };
208
+ }
209
+
210
+ /**
211
+ * Default fingerprint: discretize thermal label + entropy band + motor band.
212
+ */
213
+ function defaultFingerprint(token) {
214
+ const idle = token.idle ?? token.signals?.idle ?? {};
215
+ const hw = token.hw ?? token.signals?.entropy ?? {};
216
+ const evt = token.evt ?? token.signals?.motor ?? {};
217
+
218
+ const thermal = idle.therm ?? idle.thermalTransition ?? 'unknown';
219
+ const entropy = band(hw.ent ?? hw.score ?? 0.5, 0, 1, 10);
220
+ const motor = band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, 5);
221
+ const dramLabel = hw.dram ?? idle.dMs ?? 'unknown';
222
+
223
+ return `${thermal}:${entropy}:${motor}:${dramLabel}`;
224
+ }
225
+
226
+ // ═══════════════════════════════════════════════════════════════════════════════
227
+ // Layer 3 — Drift Fingerprinting
228
+ // ═══════════════════════════════════════════════════════════════════════════════
229
+
230
+ /**
231
+ * Detect clock drift convergence across devices.
232
+ *
233
+ * Every crystal oscillator drifts at a unique rate (20–100 ppm).
234
+ * Over multiple submissions, drift accumulates into a device-specific
235
+ * signature. If "different devices" share the same drift rate, they're
236
+ * the same physical hardware behind a rotation proxy.
237
+ *
238
+ * Input: array of device submission histories.
239
+ * Each device: { id: string, submissions: [{ ts: number, serverTs: number }] }
240
+ *
241
+ * The delta between client timestamp and server timestamp grows linearly
242
+ * at the drift rate. Linear regression on (serverTs, clientTs - serverTs)
243
+ * gives the slope = drift rate in ms/s.
244
+ *
245
+ * @param {object[]} devices - [{ id, submissions: [{ ts, serverTs }] }]
246
+ * @param {object} [opts]
247
+ * @param {number} [opts.driftBinPpm=5] - drift rate bin width in ppm
248
+ * @param {number} [opts.minSubmissions=3] - min submissions per device
249
+ * @param {number} [opts.collisionThreshold=0.30] - fraction triggering flag
250
+ * @returns {{ score: number, driftRates: Map, largestCluster: number, clusterRatio: number }}
251
+ */
252
+ export function testDriftFingerprint(devices, opts = {}) {
253
+ const driftBinPpm = opts.driftBinPpm ?? 5;
254
+ const minSubmissions = opts.minSubmissions ?? 3;
255
+ const collisionThresh = opts.collisionThreshold ?? 0.30;
256
+
257
+ // Compute drift rate per device via linear regression
258
+ const rates = [];
259
+ const rateMap = new Map();
260
+
261
+ for (const dev of devices) {
262
+ const subs = dev.submissions;
263
+ if (!subs || subs.length < minSubmissions) continue;
264
+
265
+ const drift = computeDriftRate(subs);
266
+ if (drift === null) continue;
267
+
268
+ rates.push(drift);
269
+ rateMap.set(dev.id, drift);
270
+ }
271
+
272
+ if (rates.length < 3) {
273
+ return { score: 0, driftRates: rateMap, largestCluster: 0, clusterRatio: 0, totalDevices: rates.length };
274
+ }
275
+
276
+ // Bin drift rates and find collision clusters
277
+ const bins = new Map();
278
+ for (const rate of rates) {
279
+ // Convert ms/s drift to ppm, then bin
280
+ const ppm = rate * 1000; // ms/s → μs/s ≈ ppm
281
+ const binKey = Math.round(ppm / driftBinPpm) * driftBinPpm;
282
+ bins.set(binKey, (bins.get(binKey) ?? 0) + 1);
283
+ }
284
+
285
+ let largestCluster = 0;
286
+ for (const c of bins.values()) {
287
+ if (c > largestCluster) largestCluster = c;
288
+ }
289
+
290
+ const clusterRatio = largestCluster / rates.length;
291
+ const uniqueBins = bins.size;
292
+ const expectedBins = Math.min(rates.length, Math.ceil(80 / driftBinPpm)); // ~80 ppm range
293
+
294
+ // Score: high cluster ratio + few unique bins = same hardware
295
+ let score = 0;
296
+ if (clusterRatio >= collisionThresh) {
297
+ score += Math.round((clusterRatio - collisionThresh + 0.05) / (1 - collisionThresh) * 60);
298
+ }
299
+ if (uniqueBins < expectedBins * 0.5) {
300
+ score += Math.round((1 - uniqueBins / expectedBins) * 40);
301
+ }
302
+
303
+ return {
304
+ score: clamp(score, 0, 100),
305
+ driftRates: Object.fromEntries(rateMap),
306
+ largestCluster,
307
+ clusterRatio: +clusterRatio.toFixed(4),
308
+ uniqueBins,
309
+ totalDevices: rates.length,
310
+ };
311
+ }
312
+
313
+ /**
314
+ * Compute clock drift rate from a series of submissions via linear regression.
315
+ * Returns drift in ms/s (slope of client-server offset over time).
316
+ *
317
+ * @param {object[]} subs - [{ ts: clientEpochMs, serverTs: serverEpochMs }]
318
+ * @returns {number|null} drift rate in ms/s, or null if insufficient data
319
+ */
320
+ export function computeDriftRate(subs) {
321
+ if (subs.length < 2) return null;
322
+
323
+ // x = server time (seconds from first), y = client-server offset (ms)
324
+ const t0 = subs[0].serverTs;
325
+ const xs = subs.map(s => (s.serverTs - t0) / 1000);
326
+ const ys = subs.map(s => s.ts - s.serverTs);
327
+
328
+ const n = xs.length;
329
+ const xm = mean(xs);
330
+ const ym = mean(ys);
331
+
332
+ let num = 0, den = 0;
333
+ for (let i = 0; i < n; i++) {
334
+ num += (xs[i] - xm) * (ys[i] - ym);
335
+ den += (xs[i] - xm) ** 2;
336
+ }
337
+
338
+ if (den === 0) return null;
339
+ return num / den; // ms/s drift rate
340
+ }
341
+
342
+ // ═══════════════════════════════════════════════════════════════════════════════
343
+ // Layer 4 — Mutual Information Matrix
344
+ // ═══════════════════════════════════════════════════════════════════════════════
345
+
346
+ /**
347
+ * Compute pairwise mutual information across signal dimensions,
348
+ * then detect community structure via greedy modularity (Louvain-lite).
349
+ *
350
+ * Each token is projected into a discrete signal vector:
351
+ * [entropy_band, thermal_label, motor_band, idle_band, enf_band]
352
+ *
353
+ * MI between device i and device j = how much i's signal vector
354
+ * tells you about j's. Organic: MI matrix is sparse. Bot farm:
355
+ * block-diagonal (cliques).
356
+ *
357
+ * @param {object[]} tokens
358
+ * @param {object} [opts]
359
+ * @param {number} [opts.bins=8] - discretization bins per continuous signal
360
+ * @returns {{ score: number, communities: number, largestCommunity: number, communityRatio: number, modularity: number }}
361
+ */
362
+ export function testMutualInformation(tokens, opts = {}) {
363
+ const n = tokens.length;
364
+ if (n < 10) return { score: 0, communities: 1, largestCommunity: n, communityRatio: 1, modularity: 0 };
365
+
366
+ const bins = opts.bins ?? 8;
367
+
368
+ // Project each token into a discrete signal vector
369
+ const vectors = tokens.map(t => tokenToVector(t, bins));
370
+
371
+ // Build similarity matrix (cosine similarity of signal vectors)
372
+ // For efficiency with large n, use fingerprint bucketing instead of O(n²)
373
+ const { adjacency, edges } = buildSimilarityGraph(vectors, 0.7);
374
+
375
+ if (edges === 0) {
376
+ // No similar pairs — fully organic
377
+ return { score: 0, communities: n, largestCommunity: 1, communityRatio: 1 / n, modularity: 0 };
378
+ }
379
+
380
+ // Run Louvain-lite community detection
381
+ const { communities, modularity } = louvainLite(adjacency, n);
382
+
383
+ // Count community sizes
384
+ const sizes = new Map();
385
+ for (const c of communities) {
386
+ sizes.set(c, (sizes.get(c) ?? 0) + 1);
387
+ }
388
+
389
+ let largestCommunity = 0;
390
+ for (const s of sizes.values()) {
391
+ if (s > largestCommunity) largestCommunity = s;
392
+ }
393
+
394
+ const communityRatio = largestCommunity / n;
395
+ const numCommunities = sizes.size;
396
+
397
+ // Score: large dominant community = coordinated
398
+ let score = 0;
399
+ if (communityRatio > 0.4) {
400
+ score += Math.round((communityRatio - 0.4) * 100);
401
+ }
402
+ // Few communities relative to n = low diversity
403
+ const expectedCommunities = Math.sqrt(n); // organic rough estimate
404
+ if (numCommunities < expectedCommunities * 0.5) {
405
+ score += Math.round((1 - numCommunities / expectedCommunities) * 30);
406
+ }
407
+ // High modularity with large community = structured coordination
408
+ if (modularity > 0.3 && communityRatio > 0.3) {
409
+ score += 20;
410
+ }
411
+
412
+ return {
413
+ score: clamp(score, 0, 100),
414
+ communities: numCommunities,
415
+ largestCommunity,
416
+ communityRatio: +communityRatio.toFixed(4),
417
+ modularity: +modularity.toFixed(4),
418
+ };
419
+ }
420
+
421
+ /**
422
+ * Project a token into a discrete signal vector for MI computation.
423
+ */
424
+ function tokenToVector(token, bins) {
425
+ const idle = token.idle ?? token.signals?.idle ?? {};
426
+ const hw = token.hw ?? token.signals?.entropy ?? {};
427
+ const evt = token.evt ?? token.signals?.motor ?? {};
428
+ const enf = hw.enfDev ?? token.enfDev ?? 0;
429
+
430
+ return [
431
+ band(hw.ent ?? hw.score ?? 0.5, 0, 1, bins),
432
+ thermalToIndex(idle.therm ?? idle.thermalTransition ?? 'unknown'),
433
+ band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, bins),
434
+ band(idle.dMs ?? idle.s ?? 0, 0, 300, bins), // idle duration in seconds
435
+ band(enf, -0.05, 0.05, bins),
436
+ ];
437
+ }
438
+
439
+ const THERMAL_MAP = { hot_to_cold: 0, cold: 1, cooling: 2, warming: 3, sustained_hot: 4, step_function: 5, unknown: 6 };
440
+ function thermalToIndex(label) { return THERMAL_MAP[label] ?? 6; }
441
+
442
+ /**
443
+ * Build a similarity graph from signal vectors using fingerprint bucketing.
444
+ * O(n * k) where k = average bucket size, instead of O(n²).
445
+ */
446
+ function buildSimilarityGraph(vectors, threshold) {
447
+ const n = vectors.length;
448
+ const adjacency = new Array(n).fill(null).map(() => []);
449
+ let edges = 0;
450
+
451
+ // Bucket by concatenated vector (exact match = definitely similar)
452
+ const buckets = new Map();
453
+ for (let i = 0; i < n; i++) {
454
+ const key = vectors[i].join(',');
455
+ if (!buckets.has(key)) buckets.set(key, []);
456
+ buckets.get(key).push(i);
457
+ }
458
+
459
+ // Exact matches
460
+ for (const group of buckets.values()) {
461
+ for (let a = 0; a < group.length; a++) {
462
+ for (let b = a + 1; b < group.length; b++) {
463
+ adjacency[group[a]].push(group[b]);
464
+ adjacency[group[b]].push(group[a]);
465
+ edges++;
466
+ }
467
+ }
468
+ }
469
+
470
+ // Near matches: for each bucket key, generate all Hamming-distance-1 neighbors
471
+ // and check if they exist. O(k * d * v) instead of O(k²) where k = number of
472
+ // unique keys, d = dimensions, v = unique values per dimension.
473
+ const keys = [...buckets.keys()];
474
+ const keySet = new Set(keys);
475
+
476
+ // Collect all unique values per dimension for neighbor generation
477
+ const allParts = keys.map(k => k.split(','));
478
+ const dims = allParts[0]?.length ?? 0;
479
+ const uniquePerDim = [];
480
+ for (let d = 0; d < dims; d++) {
481
+ uniquePerDim.push(new Set(allParts.map(p => p[d])));
482
+ }
483
+
484
+ const visitedPairs = new Set();
485
+ for (const key of keys) {
486
+ const parts = key.split(',');
487
+ for (let dim = 0; dim < parts.length; dim++) {
488
+ const original = parts[dim];
489
+ for (const alt of uniquePerDim[dim]) {
490
+ if (alt === original) continue;
491
+ parts[dim] = alt;
492
+ const neighborKey = parts.join(',');
493
+ if (keySet.has(neighborKey)) {
494
+ // Avoid processing the same pair twice
495
+ const pairId = key < neighborKey ? `${key}|${neighborKey}` : `${neighborKey}|${key}`;
496
+ if (!visitedPairs.has(pairId)) {
497
+ visitedPairs.add(pairId);
498
+ const ga = buckets.get(key);
499
+ const gb = buckets.get(neighborKey);
500
+ for (const i of ga) {
501
+ for (const j of gb) {
502
+ adjacency[i].push(j);
503
+ adjacency[j].push(i);
504
+ edges++;
505
+ }
506
+ }
507
+ }
508
+ }
509
+ parts[dim] = original;
510
+ }
511
+ }
512
+ }
513
+
514
+ return { adjacency, edges };
515
+ }
516
+
517
+ /**
518
+ * Louvain-lite: greedy modularity maximization.
519
+ * Simplified single-pass version for real-time use.
520
+ * Returns community assignments and modularity score.
521
+ */
522
+ function louvainLite(adjacency, n) {
523
+ // Initialize: each node in its own community
524
+ const comm = new Array(n);
525
+ for (let i = 0; i < n; i++) comm[i] = i;
526
+
527
+ // Compute total edges (2m)
528
+ let twoM = 0;
529
+ for (let i = 0; i < n; i++) twoM += adjacency[i].length;
530
+ if (twoM === 0) return { communities: comm, modularity: 0 };
531
+
532
+ // Degree of each node
533
+ const deg = adjacency.map(a => a.length);
534
+
535
+ // Incremental community degree map — O(1) lookup instead of O(n) scan
536
+ const commDeg = new Map();
537
+ for (let i = 0; i < n; i++) {
538
+ commDeg.set(i, deg[i]);
539
+ }
540
+
541
+ // Single pass: try to move each node to its best neighbor's community
542
+ let changed = true;
543
+ let passes = 0;
544
+ while (changed && passes < 10) {
545
+ changed = false;
546
+ passes++;
547
+ for (let i = 0; i < n; i++) {
548
+ if (adjacency[i].length === 0) continue;
549
+
550
+ // Count edges to each neighboring community
551
+ const commEdges = new Map();
552
+ for (const j of adjacency[i]) {
553
+ const c = comm[j];
554
+ commEdges.set(c, (commEdges.get(c) ?? 0) + 1);
555
+ }
556
+
557
+ // Find best community (highest modularity gain)
558
+ let bestComm = comm[i];
559
+ let bestDelta = 0;
560
+
561
+ for (const [c, eic] of commEdges) {
562
+ if (c === comm[i]) continue;
563
+ const cDeg = commDeg.get(c) ?? 0;
564
+ const delta = eic / twoM - (deg[i] * cDeg) / (twoM * twoM);
565
+ if (delta > bestDelta) {
566
+ bestDelta = delta;
567
+ bestComm = c;
568
+ }
569
+ }
570
+
571
+ if (bestComm !== comm[i]) {
572
+ // Update community degree map incrementally
573
+ const oldComm = comm[i];
574
+ commDeg.set(oldComm, (commDeg.get(oldComm) ?? 0) - deg[i]);
575
+ commDeg.set(bestComm, (commDeg.get(bestComm) ?? 0) + deg[i]);
576
+ comm[i] = bestComm;
577
+ changed = true;
578
+ }
579
+ }
580
+ }
581
+
582
+ // Compute modularity
583
+ let Q = 0;
584
+ for (let i = 0; i < n; i++) {
585
+ for (const j of adjacency[i]) {
586
+ if (comm[i] === comm[j]) {
587
+ Q += 1 - (deg[i] * deg[j]) / twoM;
588
+ }
589
+ }
590
+ }
591
+ Q /= twoM;
592
+
593
+ return { communities: comm, modularity: Math.max(0, Q) };
594
+ }
595
+
596
+ // ═══════════════════════════════════════════════════════════════════════════════
597
+ // Layer 5 Entropy Velocity
598
+ // ═══════════════════════════════════════════════════════════════════════════════
599
+
600
+ /**
601
+ * Measure entropy growth rate vs traffic growth rate.
602
+ *
603
+ * Organic growth: each new user adds a unique signal profile → entropy
604
+ * increases proportionally to log2(n).
605
+ *
606
+ * Bot deployment: traffic increases but entropy plateaus or grows slower
607
+ * than expected (cloned profiles add volume without diversity).
608
+ *
609
+ * Method: split the token window into temporal slices, compute Shannon
610
+ * entropy of the signal fingerprint distribution in each slice, then
611
+ * measure dH/dt vs dn/dt.
612
+ *
613
+ * @param {object[]} tokens - chronologically sorted
614
+ * @param {object} [opts]
615
+ * @param {number} [opts.slices=5] - number of temporal slices
616
+ * @returns {{ score: number, velocityRatio: number, entropySlices: number[], countSlices: number[] }}
617
+ */
618
+ export function testEntropyVelocity(tokens, opts = {}) {
619
+ const n = tokens.length;
620
+ const numSlices = opts.slices ?? 5;
621
+ if (n < numSlices * 3) return { score: 0, velocityRatio: 1, entropySlices: [], countSlices: [] };
622
+
623
+ // Split tokens into temporal slices
624
+ const sliceSize = Math.ceil(n / numSlices);
625
+ const entropySlices = [];
626
+ const countSlices = [];
627
+ const cumulativeEntropies = [];
628
+
629
+ for (let s = 0; s < numSlices; s++) {
630
+ const start = 0; // cumulative — each slice includes all previous tokens
631
+ const end = Math.min(n, (s + 1) * sliceSize);
632
+ const slice = tokens.slice(start, end);
633
+
634
+ // Compute Shannon entropy of fingerprint distribution
635
+ const fps = new Map();
636
+ for (const t of slice) {
637
+ const key = defaultFingerprint(t);
638
+ fps.set(key, (fps.get(key) ?? 0) + 1);
639
+ }
640
+ const H = shannonEntropy([...fps.values()]);
641
+ entropySlices.push(+H.toFixed(4));
642
+ countSlices.push(end);
643
+ cumulativeEntropies.push(H);
644
+ }
645
+
646
+ // Expected entropy growth: H_expected ≈ log2(unique_count) grows as log2(n)
647
+ // For organic traffic, H should grow roughly as log2(n) / log2(N_total)
648
+ // Measure: ratio of actual entropy growth to expected
649
+ const H_first = cumulativeEntropies[0];
650
+ const H_last = cumulativeEntropies[cumulativeEntropies.length - 1];
651
+ const n_first = countSlices[0];
652
+ const n_last = countSlices[countSlices.length - 1];
653
+
654
+ if (n_first === n_last) {
655
+ return { score: 0, velocityRatio: 1, entropySlices, countSlices };
656
+ }
657
+
658
+ // If entropy is near-zero throughout — extremely low diversity = coordinated
659
+ if (H_last < 0.5 && n_last >= 20) {
660
+ return { score: Math.round(clamp(80 + (0.5 - H_last) * 40, 80, 100)), velocityRatio: 0, entropySlices, countSlices };
661
+ }
662
+
663
+ if (H_first < 0.01) {
664
+ // First slice has no diversity use absolute entropy check
665
+ const expectedH = Math.log2(Math.max(2, n_last * 0.3)); // expected for organic
666
+ const ratio = H_last / expectedH;
667
+ return {
668
+ score: Math.round(clamp((1 - ratio) * 100, 0, 100)),
669
+ velocityRatio: +ratio.toFixed(4),
670
+ entropySlices,
671
+ countSlices,
672
+ };
673
+ }
674
+
675
+ // Expected entropy ratio based on log growth
676
+ const expectedGrowth = Math.log2(n_last) / Math.log2(n_first);
677
+ const actualGrowth = H_last / Math.max(0.01, H_first);
678
+ const velocityRatio = actualGrowth / expectedGrowth;
679
+
680
+ // velocityRatio < 0.6 means entropy isn't keeping up with traffic = artificial
681
+ // velocityRatio ≈ 1.0 means organic
682
+ // velocityRatio > 1.2 could mean natural diversification
683
+ let score;
684
+ if (velocityRatio >= 0.8) {
685
+ score = Math.round(clamp((1.0 - velocityRatio) * 50, 0, 20));
686
+ } else if (velocityRatio >= 0.5) {
687
+ score = Math.round(20 + (0.8 - velocityRatio) / 0.3 * 50);
688
+ } else {
689
+ score = Math.round(70 + (0.5 - velocityRatio) / 0.5 * 30);
690
+ }
691
+
692
+ return {
693
+ score: clamp(score, 0, 100),
694
+ velocityRatio: +velocityRatio.toFixed(4),
695
+ entropySlices,
696
+ countSlices,
697
+ };
698
+ }
699
+
700
+ // ═══════════════════════════════════════════════════════════════════════════════
701
+ // Orchestrator — Coordinated Behavior Analysis
702
+ // ═══════════════════════════════════════════════════════════════════════════════
703
+
704
+ /**
705
+ * Full coordination analysis across all 5 layers.
706
+ *
707
+ * @param {object[]} tokens - engagement tokens with physics signals
708
+ * @param {object} [opts]
709
+ * @param {object[]} [opts.devices] - device submission histories for drift analysis
710
+ * @param {number} [opts.windowMs] - analysis window (default: 60000ms)
711
+ * @returns {CoordinationResult}
712
+ */
713
+ export function analyseCoordination(tokens, opts = {}) {
714
+ const n = tokens.length;
715
+ if (n < 5) {
716
+ return {
717
+ coordinationScore: 0,
718
+ verdict: 'insufficient_data',
719
+ confidence: 0,
720
+ layers: {},
721
+ tokenCount: n,
722
+ };
723
+ }
724
+
725
+ // Extract timestamps
726
+ const timestamps = tokens.map(t =>
727
+ t.iat ?? t.timestamp ?? t.ts ?? Date.now()
728
+ ).sort((a, b) => a - b);
729
+
730
+ // ── Layer 1: Temporal Clustering ──
731
+ const temporal = testTemporalClustering(timestamps);
732
+
733
+ // ── Layer 2: Fingerprint Collision ──
734
+ const fingerprint = testFingerprintCollision(tokens);
735
+
736
+ // ── Layer 3: Drift Fingerprinting ──
737
+ const devices = opts.devices ?? [];
738
+ const drift = devices.length >= 3
739
+ ? testDriftFingerprint(devices)
740
+ : { score: 0, totalDevices: 0 };
741
+
742
+ // ── Layer 4: Mutual Information ──
743
+ const mi = testMutualInformation(tokens);
744
+
745
+ // ── Layer 5: Entropy Velocity ──
746
+ const velocity = testEntropyVelocity(tokens);
747
+
748
+ // ── Weighted Fusion ──
749
+ // Weights reflect each layer's discriminative power and evasion cost
750
+ const weights = {
751
+ temporal: 0.15, // easy to randomize, but still catches lazy farms
752
+ fingerprint: 0.25, // hard to fake without real hardware diversity
753
+ drift: 0.15, // only fires with multi-submission data
754
+ mi: 0.25, // hardest to evade requires true independence
755
+ velocity: 0.20, // catches mass deployment timing
756
+ };
757
+
758
+ // If no drift data, redistribute weight
759
+ const hasDrift = drift.totalDevices >= 3;
760
+ const effectiveWeights = hasDrift ? weights : {
761
+ temporal: 0.18,
762
+ fingerprint: 0.28,
763
+ drift: 0,
764
+ mi: 0.30,
765
+ velocity: 0.24,
766
+ };
767
+
768
+ const raw =
769
+ temporal.score * effectiveWeights.temporal +
770
+ fingerprint.score * effectiveWeights.fingerprint +
771
+ drift.score * effectiveWeights.drift +
772
+ mi.score * effectiveWeights.mi +
773
+ velocity.score * effectiveWeights.velocity;
774
+
775
+ const coordinationScore = Math.round(clamp(raw, 0, 100));
776
+
777
+ // Confidence: higher with more tokens and more layers contributing
778
+ const activeLayers = [temporal, fingerprint, mi, velocity].filter(l => l.score > 0).length + (hasDrift ? 1 : 0);
779
+ const confidence = clamp(
780
+ (Math.min(n, 100) / 100) * 0.5 + (activeLayers / 5) * 0.5,
781
+ 0, 1
782
+ );
783
+
784
+ // Verdict
785
+ let verdict;
786
+ if (coordinationScore >= 70) verdict = 'coordinated_inauthentic';
787
+ else if (coordinationScore >= 45) verdict = 'suspicious_coordination';
788
+ else if (coordinationScore >= 25) verdict = 'low_coordination';
789
+ else verdict = 'organic';
790
+
791
+ // Advisory flags
792
+ const flags = [];
793
+ if (temporal.score >= 60) flags.push('BURST_ARRIVAL_PATTERN');
794
+ if (fingerprint.score >= 60) flags.push('LOW_FINGERPRINT_DIVERSITY');
795
+ if (drift.score >= 50) flags.push('CLOCK_DRIFT_CONVERGENCE');
796
+ if (mi.score >= 50) flags.push('SIGNAL_CLIQUE_DETECTED');
797
+ if (velocity.score >= 50) flags.push('ENTROPY_GROWTH_STALLED');
798
+
799
+ return {
800
+ coordinationScore,
801
+ verdict,
802
+ confidence: +confidence.toFixed(3),
803
+ flags,
804
+ layers: {
805
+ temporal,
806
+ fingerprint,
807
+ drift: hasDrift ? drift : { score: 0, skipped: true, reason: 'insufficient_device_history' },
808
+ mutualInformation: mi,
809
+ entropyVelocity: velocity,
810
+ },
811
+ tokenCount: n,
812
+ weights: effectiveWeights,
813
+ };
814
+ }
815
+
816
+ /**
817
+ * @typedef {object} CoordinationResult
818
+ * @property {number} coordinationScore - 0–100, higher = more coordinated
819
+ * @property {string} verdict - 'organic' | 'low_coordination' | 'suspicious_coordination' | 'coordinated_inauthentic'
820
+ * @property {number} confidence - 0–1, based on token count and active layers
821
+ * @property {string[]} flags - advisory flags for specific signals
822
+ * @property {object} layers - per-layer results
823
+ * @property {number} tokenCount - number of tokens analyzed
824
+ * @property {object} weights - effective layer weights used
825
+ */