@svrnsec/pulse 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +883 -782
- package/SECURITY.md +86 -86
- package/bin/svrnsec-pulse.js +7 -7
- package/dist/{pulse.cjs.js → pulse.cjs} +6378 -6419
- package/dist/pulse.cjs.map +1 -0
- package/dist/pulse.esm.js +6379 -6420
- package/dist/pulse.esm.js.map +1 -1
- package/index.d.ts +895 -846
- package/package.json +185 -184
- package/pkg/pulse_core.js +174 -173
- package/src/analysis/audio.js +213 -213
- package/src/analysis/authenticityAudit.js +408 -393
- package/src/analysis/coherence.js +502 -502
- package/src/analysis/coordinatedBehavior.js +825 -804
- package/src/analysis/heuristic.js +428 -428
- package/src/analysis/jitter.js +446 -446
- package/src/analysis/llm.js +473 -472
- package/src/analysis/populationEntropy.js +404 -403
- package/src/analysis/provider.js +248 -248
- package/src/analysis/refraction.js +392 -391
- package/src/analysis/trustScore.js +356 -356
- package/src/cli/args.js +36 -36
- package/src/cli/commands/scan.js +192 -192
- package/src/cli/runner.js +157 -157
- package/src/collector/adaptive.js +200 -200
- package/src/collector/bio.js +297 -287
- package/src/collector/canvas.js +247 -239
- package/src/collector/dram.js +203 -203
- package/src/collector/enf.js +311 -311
- package/src/collector/entropy.js +195 -195
- package/src/collector/gpu.js +248 -245
- package/src/collector/idleAttestation.js +480 -480
- package/src/collector/sabTimer.js +189 -191
- package/src/fingerprint.js +475 -475
- package/src/index.js +342 -342
- package/src/integrations/react-native.js +462 -459
- package/src/integrations/react.js +184 -185
- package/src/middleware/express.js +155 -155
- package/src/middleware/next.js +174 -175
- package/src/proof/challenge.js +249 -249
- package/src/proof/engagementToken.js +426 -394
- package/src/proof/fingerprint.js +268 -268
- package/src/proof/validator.js +82 -142
- package/src/registry/serializer.js +349 -349
- package/src/terminal.js +263 -263
- package/src/update-notifier.js +259 -264
- package/dist/pulse.cjs.js.map +0 -1
|
@@ -1,804 +1,825 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @svrnsec/pulse — Coordinated Inauthentic Behavior Detection
|
|
3
|
-
*
|
|
4
|
-
* Detects bot armies, click farms, and Sybil networks by analyzing
|
|
5
|
-
* physics-layer correlations that coordination inevitably creates.
|
|
6
|
-
*
|
|
7
|
-
* Core insight:
|
|
8
|
-
* Two real users in different cities have ZERO mutual information
|
|
9
|
-
* between their thermal curves, clock drift rates, ENF phases, and
|
|
10
|
-
* idle durations. Bot farms can randomize any ONE signal but cannot
|
|
11
|
-
* independently decorrelate ALL signals simultaneously — they're
|
|
12
|
-
* bound to shared physics (same room, same hardware, same scripts).
|
|
13
|
-
*
|
|
14
|
-
* Five detection layers:
|
|
15
|
-
*
|
|
16
|
-
* Layer 1 — Temporal Clustering
|
|
17
|
-
* Real users arrive via Poisson process. Bot armies arrive in bursts
|
|
18
|
-
* from a command server. Chi-squared test on 1s-bucket histogram.
|
|
19
|
-
*
|
|
20
|
-
* Layer 2 — Signal Fingerprint Collision
|
|
21
|
-
* Hash (thermal_label, entropy_band, motor_band) per token. Real
|
|
22
|
-
* cohort: high cardinality. Bot farm: < 20 unique fingerprints
|
|
23
|
-
* across 500 tokens.
|
|
24
|
-
*
|
|
25
|
-
* Layer 3 — Drift Fingerprinting
|
|
26
|
-
* Crystal oscillator imperfection (20–100 ppm) creates a unique
|
|
27
|
-
* clock drift rate per physical device. Multiple submissions from
|
|
28
|
-
* "different devices" that converge on the same drift rate = same
|
|
29
|
-
* hardware behind a rotation proxy. Survives IP/account/browser
|
|
30
|
-
* rotation.
|
|
31
|
-
*
|
|
32
|
-
* Layer 4 — Mutual Information Matrix
|
|
33
|
-
* Pairwise MI across all signal dimensions. Organic traffic: sparse
|
|
34
|
-
* random MI matrix. Bot traffic: block-diagonal structure (cliques).
|
|
35
|
-
* Louvain community detection finds the cliques in O(n log n).
|
|
36
|
-
*
|
|
37
|
-
* Layer 5 — Entropy Velocity
|
|
38
|
-
* Track dH/dt — the rate of Shannon entropy growth in the signal
|
|
39
|
-
* space. Organic growth adds unique profiles; bot deployment adds
|
|
40
|
-
* volume without diversity. The ratio (observed dH/dt) / (expected)
|
|
41
|
-
* catches mass deployment even with real hardware.
|
|
42
|
-
*
|
|
43
|
-
* Computational cost:
|
|
44
|
-
* All operations are O(n) or O(n log n). 500 tokens ≈ 8ms total
|
|
45
|
-
* on a single CPU core. No ML, no GPU, no training data.
|
|
46
|
-
*
|
|
47
|
-
* Usage:
|
|
48
|
-
* import { analyseCoordination } from '@svrnsec/pulse/coordination'
|
|
49
|
-
* const result = analyseCoordination(tokens, { windowMs: 60000 })
|
|
50
|
-
*/
|
|
51
|
-
|
|
52
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
53
|
-
// Utility
|
|
54
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
55
|
-
|
|
56
|
-
function mean(a) { return a.length === 0 ? 0 : a.reduce((s, v) => s + v, 0) / a.length; }
|
|
57
|
-
function variance(a) { const m = mean(a); return a.length < 2 ? 0 : a.reduce((s, v) => s + (v - m) ** 2, 0) / (a.length - 1); }
|
|
58
|
-
function stddev(a) { return Math.sqrt(variance(a)); }
|
|
59
|
-
function cv(a) { const m = mean(a); return m === 0 ? 0 : stddev(a) / m; }
|
|
60
|
-
function clamp(v, lo, hi) { return Math.max(lo, Math.min(hi, v)); }
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Shannon entropy of a discrete distribution (array of counts).
|
|
64
|
-
* Returns bits.
|
|
65
|
-
*/
|
|
66
|
-
function shannonEntropy(counts) {
|
|
67
|
-
const total = counts.reduce((s, c) => s + c, 0);
|
|
68
|
-
if (total === 0) return 0;
|
|
69
|
-
let H = 0;
|
|
70
|
-
for (const c of counts) {
|
|
71
|
-
if (c > 0) {
|
|
72
|
-
const p = c / total;
|
|
73
|
-
H -= p * Math.log2(p);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
return H;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Discretize a continuous value into a band index.
|
|
81
|
-
* @param {number} v - value
|
|
82
|
-
* @param {number} lo - band floor
|
|
83
|
-
* @param {number} hi - band ceiling
|
|
84
|
-
* @param {number} bins - number of bins
|
|
85
|
-
* @returns {number} bin index [0, bins-1]
|
|
86
|
-
*/
|
|
87
|
-
function band(v, lo, hi, bins) {
|
|
88
|
-
if (hi === lo) return 0;
|
|
89
|
-
return Math.min(bins - 1, Math.max(0, Math.floor(((v - lo) / (hi - lo)) * bins)));
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
93
|
-
// Layer 1 — Temporal Clustering
|
|
94
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Test whether token arrival times follow a Poisson process (organic)
|
|
98
|
-
* or show burst patterns (coordinated).
|
|
99
|
-
*
|
|
100
|
-
* Method: bucket timestamps into 1s intervals, compute chi-squared
|
|
101
|
-
* statistic against expected Poisson rate. High chi-squared = bursty.
|
|
102
|
-
*
|
|
103
|
-
* @param {number[]} timestamps - sorted epoch-ms values
|
|
104
|
-
* @param {object} [opts]
|
|
105
|
-
* @param {number} [opts.bucketMs=1000]
|
|
106
|
-
* @returns {{ score: number, burstRatio: number, chi2: number, pBursty: number }}
|
|
107
|
-
*/
|
|
108
|
-
export function testTemporalClustering(timestamps, opts = {}) {
|
|
109
|
-
const n = timestamps.length;
|
|
110
|
-
if (n < 5) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
|
|
111
|
-
|
|
112
|
-
const bucketMs = opts.bucketMs ?? 1000;
|
|
113
|
-
const tMin = timestamps[0];
|
|
114
|
-
const tMax = timestamps[n - 1];
|
|
115
|
-
const span = tMax - tMin;
|
|
116
|
-
if (span < bucketMs) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
|
|
117
|
-
|
|
118
|
-
const numBuckets = Math.ceil(span / bucketMs);
|
|
119
|
-
const buckets = new Array(numBuckets).fill(0);
|
|
120
|
-
for (const t of timestamps) {
|
|
121
|
-
const idx = Math.min(numBuckets - 1, Math.floor((t - tMin) / bucketMs));
|
|
122
|
-
buckets[idx]++;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// Expected count per bucket under uniform Poisson
|
|
126
|
-
const expected = n / numBuckets;
|
|
127
|
-
let chi2 = 0;
|
|
128
|
-
for (const obs of buckets) {
|
|
129
|
-
chi2 += (obs - expected) ** 2 / expected;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// Burst ratio: fraction of tokens in the densest 10% of buckets
|
|
133
|
-
const sorted = [...buckets].sort((a, b) => b - a);
|
|
134
|
-
const top10pct = Math.max(1, Math.ceil(numBuckets * 0.1));
|
|
135
|
-
const burstRatio = sorted.slice(0, top10pct).reduce((s, v) => s + v, 0) / n;
|
|
136
|
-
|
|
137
|
-
// Normalize chi2 to score: higher chi2 = more coordinated
|
|
138
|
-
// df = numBuckets - 1; chi2/df >> 1 means non-Poisson
|
|
139
|
-
const chi2Norm = chi2 / Math.max(1, numBuckets - 1);
|
|
140
|
-
// chi2Norm < 2 is consistent with Poisson; > 5 is very bursty
|
|
141
|
-
const pBursty = clamp((chi2Norm - 1.5) / 4, 0, 1);
|
|
142
|
-
|
|
143
|
-
// Score: 0 = organic, 100 = coordinated
|
|
144
|
-
const score = Math.round(clamp(
|
|
145
|
-
pBursty * 60 + (burstRatio > 0.5 ? 40 : burstRatio * 80),
|
|
146
|
-
0, 100
|
|
147
|
-
));
|
|
148
|
-
|
|
149
|
-
return { score, burstRatio: +burstRatio.toFixed(4), chi2: +chi2.toFixed(2), chi2Norm: +chi2Norm.toFixed(3), pBursty: +pBursty.toFixed(4) };
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
153
|
-
// Layer 2 — Signal Fingerprint Collision
|
|
154
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Compute fingerprint collision rate across a token cohort.
|
|
158
|
-
*
|
|
159
|
-
* Each token produces a discrete fingerprint from its physics signals.
|
|
160
|
-
* Real users: high cardinality (many unique fingerprints).
|
|
161
|
-
* Bot farm: low cardinality (cloned environments produce duplicates).
|
|
162
|
-
*
|
|
163
|
-
* @param {object[]} tokens - array of token objects
|
|
164
|
-
* @param {object} [opts]
|
|
165
|
-
* @param {Function} [opts.fingerprint] - custom fingerprint fn(token) → string
|
|
166
|
-
* @returns {{ score: number, uniqueRatio: number, topCollision: number, uniqueCount: number }}
|
|
167
|
-
*/
|
|
168
|
-
export function testFingerprintCollision(tokens, opts = {}) {
|
|
169
|
-
const n = tokens.length;
|
|
170
|
-
if (n < 5) return { score: 0, uniqueRatio: 1, topCollision: 0, uniqueCount: n };
|
|
171
|
-
|
|
172
|
-
const fp = opts.fingerprint ?? defaultFingerprint;
|
|
173
|
-
const counts = new Map();
|
|
174
|
-
|
|
175
|
-
for (const token of tokens) {
|
|
176
|
-
const key = fp(token);
|
|
177
|
-
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
const uniqueCount = counts.size;
|
|
181
|
-
const uniqueRatio = uniqueCount / n;
|
|
182
|
-
|
|
183
|
-
// Largest collision cluster
|
|
184
|
-
let topCollision = 0;
|
|
185
|
-
for (const c of counts.values()) {
|
|
186
|
-
if (c > topCollision) topCollision = c;
|
|
187
|
-
}
|
|
188
|
-
const topRatio = topCollision / n;
|
|
189
|
-
|
|
190
|
-
// Score: low unique ratio = coordinated
|
|
191
|
-
// Real users: uniqueRatio > 0.40 for n > 50
|
|
192
|
-
// Farms: uniqueRatio < 0.10
|
|
193
|
-
let score;
|
|
194
|
-
if (uniqueRatio > 0.40) score = Math.round(clamp((0.60 - uniqueRatio) / 0.20 * 30, 0, 30));
|
|
195
|
-
else if (uniqueRatio > 0.15) score = Math.round(30 + (0.40 - uniqueRatio) / 0.25 * 40);
|
|
196
|
-
else score = Math.round(70 + (0.15 - uniqueRatio) / 0.15 * 30);
|
|
197
|
-
|
|
198
|
-
// Top collision bonus: if one fingerprint holds > 30% of tokens
|
|
199
|
-
if (topRatio > 0.30) score = Math.min(100, score + Math.round((topRatio - 0.30) * 60));
|
|
200
|
-
|
|
201
|
-
return {
|
|
202
|
-
score: clamp(score, 0, 100),
|
|
203
|
-
uniqueRatio: +uniqueRatio.toFixed(4),
|
|
204
|
-
topCollision,
|
|
205
|
-
topRatio: +topRatio.toFixed(4),
|
|
206
|
-
uniqueCount,
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Default fingerprint: discretize thermal label + entropy band + motor band.
|
|
212
|
-
*/
|
|
213
|
-
function defaultFingerprint(token) {
|
|
214
|
-
const idle = token.idle ?? token.signals?.idle ?? {};
|
|
215
|
-
const hw = token.hw ?? token.signals?.entropy ?? {};
|
|
216
|
-
const evt = token.evt ?? token.signals?.motor ?? {};
|
|
217
|
-
|
|
218
|
-
const thermal = idle.therm ?? idle.thermalTransition ?? 'unknown';
|
|
219
|
-
const entropy = band(hw.ent ?? hw.score ?? 0.5, 0, 1, 10);
|
|
220
|
-
const motor = band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, 5);
|
|
221
|
-
const dramLabel = hw.dram ?? idle.dMs ?? 'unknown';
|
|
222
|
-
|
|
223
|
-
return `${thermal}:${entropy}:${motor}:${dramLabel}`;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
227
|
-
// Layer 3 — Drift Fingerprinting
|
|
228
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Detect clock drift convergence across devices.
|
|
232
|
-
*
|
|
233
|
-
* Every crystal oscillator drifts at a unique rate (20–100 ppm).
|
|
234
|
-
* Over multiple submissions, drift accumulates into a device-specific
|
|
235
|
-
* signature. If "different devices" share the same drift rate, they're
|
|
236
|
-
* the same physical hardware behind a rotation proxy.
|
|
237
|
-
*
|
|
238
|
-
* Input: array of device submission histories.
|
|
239
|
-
* Each device: { id: string, submissions: [{ ts: number, serverTs: number }] }
|
|
240
|
-
*
|
|
241
|
-
* The delta between client timestamp and server timestamp grows linearly
|
|
242
|
-
* at the drift rate. Linear regression on (serverTs, clientTs - serverTs)
|
|
243
|
-
* gives the slope = drift rate in ms/s.
|
|
244
|
-
*
|
|
245
|
-
* @param {object[]} devices - [{ id, submissions: [{ ts, serverTs }] }]
|
|
246
|
-
* @param {object} [opts]
|
|
247
|
-
* @param {number} [opts.driftBinPpm=5] - drift rate bin width in ppm
|
|
248
|
-
* @param {number} [opts.minSubmissions=3] - min submissions per device
|
|
249
|
-
* @param {number} [opts.collisionThreshold=0.30] - fraction triggering flag
|
|
250
|
-
* @returns {{ score: number, driftRates: Map, largestCluster: number, clusterRatio: number }}
|
|
251
|
-
*/
|
|
252
|
-
export function testDriftFingerprint(devices, opts = {}) {
|
|
253
|
-
const driftBinPpm = opts.driftBinPpm ?? 5;
|
|
254
|
-
const minSubmissions = opts.minSubmissions ?? 3;
|
|
255
|
-
const collisionThresh = opts.collisionThreshold ?? 0.30;
|
|
256
|
-
|
|
257
|
-
// Compute drift rate per device via linear regression
|
|
258
|
-
const rates = [];
|
|
259
|
-
const rateMap = new Map();
|
|
260
|
-
|
|
261
|
-
for (const dev of devices) {
|
|
262
|
-
const subs = dev.submissions;
|
|
263
|
-
if (!subs || subs.length < minSubmissions) continue;
|
|
264
|
-
|
|
265
|
-
const drift = computeDriftRate(subs);
|
|
266
|
-
if (drift === null) continue;
|
|
267
|
-
|
|
268
|
-
rates.push(drift);
|
|
269
|
-
rateMap.set(dev.id, drift);
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
if (rates.length < 3) {
|
|
273
|
-
return { score: 0, driftRates: rateMap, largestCluster: 0, clusterRatio: 0, totalDevices: rates.length };
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// Bin drift rates and find collision clusters
|
|
277
|
-
const bins = new Map();
|
|
278
|
-
for (const rate of rates) {
|
|
279
|
-
// Convert ms/s drift to ppm, then bin
|
|
280
|
-
const ppm = rate * 1000; // ms/s → μs/s ≈ ppm
|
|
281
|
-
const binKey = Math.round(ppm / driftBinPpm) * driftBinPpm;
|
|
282
|
-
bins.set(binKey, (bins.get(binKey) ?? 0) + 1);
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
let largestCluster = 0;
|
|
286
|
-
for (const c of bins.values()) {
|
|
287
|
-
if (c > largestCluster) largestCluster = c;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
const clusterRatio = largestCluster / rates.length;
|
|
291
|
-
const uniqueBins = bins.size;
|
|
292
|
-
const expectedBins = Math.min(rates.length, Math.ceil(80 / driftBinPpm)); // ~80 ppm range
|
|
293
|
-
|
|
294
|
-
// Score: high cluster ratio + few unique bins = same hardware
|
|
295
|
-
let score = 0;
|
|
296
|
-
if (clusterRatio >= collisionThresh) {
|
|
297
|
-
score += Math.round((clusterRatio - collisionThresh + 0.05) / (1 - collisionThresh) * 60);
|
|
298
|
-
}
|
|
299
|
-
if (uniqueBins < expectedBins * 0.5) {
|
|
300
|
-
score += Math.round((1 - uniqueBins / expectedBins) * 40);
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
return {
|
|
304
|
-
score: clamp(score, 0, 100),
|
|
305
|
-
driftRates: rateMap,
|
|
306
|
-
largestCluster,
|
|
307
|
-
clusterRatio: +clusterRatio.toFixed(4),
|
|
308
|
-
uniqueBins,
|
|
309
|
-
totalDevices: rates.length,
|
|
310
|
-
};
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
/**
|
|
314
|
-
* Compute clock drift rate from a series of submissions via linear regression.
|
|
315
|
-
* Returns drift in ms/s (slope of client-server offset over time).
|
|
316
|
-
*
|
|
317
|
-
* @param {object[]} subs - [{ ts: clientEpochMs, serverTs: serverEpochMs }]
|
|
318
|
-
* @returns {number|null} drift rate in ms/s, or null if insufficient data
|
|
319
|
-
*/
|
|
320
|
-
export function computeDriftRate(subs) {
|
|
321
|
-
if (subs.length < 2) return null;
|
|
322
|
-
|
|
323
|
-
// x = server time (seconds from first), y = client-server offset (ms)
|
|
324
|
-
const t0 = subs[0].serverTs;
|
|
325
|
-
const xs = subs.map(s => (s.serverTs - t0) / 1000);
|
|
326
|
-
const ys = subs.map(s => s.ts - s.serverTs);
|
|
327
|
-
|
|
328
|
-
const n = xs.length;
|
|
329
|
-
const xm = mean(xs);
|
|
330
|
-
const ym = mean(ys);
|
|
331
|
-
|
|
332
|
-
let num = 0, den = 0;
|
|
333
|
-
for (let i = 0; i < n; i++) {
|
|
334
|
-
num += (xs[i] - xm) * (ys[i] - ym);
|
|
335
|
-
den += (xs[i] - xm) ** 2;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
if (den === 0) return null;
|
|
339
|
-
return num / den; // ms/s drift rate
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
343
|
-
// Layer 4 — Mutual Information Matrix
|
|
344
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
345
|
-
|
|
346
|
-
/**
|
|
347
|
-
* Compute pairwise mutual information across signal dimensions,
|
|
348
|
-
* then detect community structure via greedy modularity (Louvain-lite).
|
|
349
|
-
*
|
|
350
|
-
* Each token is projected into a discrete signal vector:
|
|
351
|
-
* [entropy_band, thermal_label, motor_band, idle_band, enf_band]
|
|
352
|
-
*
|
|
353
|
-
* MI between device i and device j = how much i's signal vector
|
|
354
|
-
* tells you about j's. Organic: MI matrix is sparse. Bot farm:
|
|
355
|
-
* block-diagonal (cliques).
|
|
356
|
-
*
|
|
357
|
-
* @param {object[]} tokens
|
|
358
|
-
* @param {object} [opts]
|
|
359
|
-
* @param {number} [opts.bins=8] - discretization bins per continuous signal
|
|
360
|
-
* @returns {{ score: number, communities: number, largestCommunity: number, communityRatio: number, modularity: number }}
|
|
361
|
-
*/
|
|
362
|
-
export function testMutualInformation(tokens, opts = {}) {
|
|
363
|
-
const n = tokens.length;
|
|
364
|
-
if (n < 10) return { score: 0, communities: 1, largestCommunity: n, communityRatio: 1, modularity: 0 };
|
|
365
|
-
|
|
366
|
-
const bins = opts.bins ?? 8;
|
|
367
|
-
|
|
368
|
-
// Project each token into a discrete signal vector
|
|
369
|
-
const vectors = tokens.map(t => tokenToVector(t, bins));
|
|
370
|
-
|
|
371
|
-
// Build similarity matrix (cosine similarity of signal vectors)
|
|
372
|
-
// For efficiency with large n, use fingerprint bucketing instead of O(n²)
|
|
373
|
-
const { adjacency, edges } = buildSimilarityGraph(vectors, 0.7);
|
|
374
|
-
|
|
375
|
-
if (edges === 0) {
|
|
376
|
-
// No similar pairs — fully organic
|
|
377
|
-
return { score: 0, communities: n, largestCommunity: 1, communityRatio: 1 / n, modularity: 0 };
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
// Run Louvain-lite community detection
|
|
381
|
-
const { communities, modularity } = louvainLite(adjacency, n);
|
|
382
|
-
|
|
383
|
-
// Count community sizes
|
|
384
|
-
const sizes = new Map();
|
|
385
|
-
for (const c of communities) {
|
|
386
|
-
sizes.set(c, (sizes.get(c) ?? 0) + 1);
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
let largestCommunity = 0;
|
|
390
|
-
for (const s of sizes.values()) {
|
|
391
|
-
if (s > largestCommunity) largestCommunity = s;
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
const communityRatio = largestCommunity / n;
|
|
395
|
-
const numCommunities = sizes.size;
|
|
396
|
-
|
|
397
|
-
// Score: large dominant community = coordinated
|
|
398
|
-
let score = 0;
|
|
399
|
-
if (communityRatio > 0.4) {
|
|
400
|
-
score += Math.round((communityRatio - 0.4) * 100);
|
|
401
|
-
}
|
|
402
|
-
// Few communities relative to n = low diversity
|
|
403
|
-
const expectedCommunities = Math.sqrt(n); // organic rough estimate
|
|
404
|
-
if (numCommunities < expectedCommunities * 0.5) {
|
|
405
|
-
score += Math.round((1 - numCommunities / expectedCommunities) * 30);
|
|
406
|
-
}
|
|
407
|
-
// High modularity with large community = structured coordination
|
|
408
|
-
if (modularity > 0.3 && communityRatio > 0.3) {
|
|
409
|
-
score += 20;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
return {
|
|
413
|
-
score: clamp(score, 0, 100),
|
|
414
|
-
communities: numCommunities,
|
|
415
|
-
largestCommunity,
|
|
416
|
-
communityRatio: +communityRatio.toFixed(4),
|
|
417
|
-
modularity: +modularity.toFixed(4),
|
|
418
|
-
};
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
/**
|
|
422
|
-
* Project a token into a discrete signal vector for MI computation.
|
|
423
|
-
*/
|
|
424
|
-
function tokenToVector(token, bins) {
|
|
425
|
-
const idle = token.idle ?? token.signals?.idle ?? {};
|
|
426
|
-
const hw = token.hw ?? token.signals?.entropy ?? {};
|
|
427
|
-
const evt = token.evt ?? token.signals?.motor ?? {};
|
|
428
|
-
const enf = hw.enfDev ?? token.enfDev ?? 0;
|
|
429
|
-
|
|
430
|
-
return [
|
|
431
|
-
band(hw.ent ?? hw.score ?? 0.5, 0, 1, bins),
|
|
432
|
-
thermalToIndex(idle.therm ?? idle.thermalTransition ?? 'unknown'),
|
|
433
|
-
band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, bins),
|
|
434
|
-
band(idle.dMs ?? idle.s ?? 0, 0, 300, bins), // idle duration in seconds
|
|
435
|
-
band(enf, -0.05, 0.05, bins),
|
|
436
|
-
];
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
const THERMAL_MAP = { hot_to_cold: 0, cold: 1, cooling: 2, warming: 3, sustained_hot: 4, step_function: 5, unknown: 6 };
|
|
440
|
-
function thermalToIndex(label) { return THERMAL_MAP[label] ?? 6; }
|
|
441
|
-
|
|
442
|
-
/**
|
|
443
|
-
* Build a similarity graph from signal vectors using fingerprint bucketing.
|
|
444
|
-
* O(n * k) where k = average bucket size, instead of O(n²).
|
|
445
|
-
*/
|
|
446
|
-
function buildSimilarityGraph(vectors, threshold) {
|
|
447
|
-
const n = vectors.length;
|
|
448
|
-
const adjacency = new Array(n).fill(null).map(() => []);
|
|
449
|
-
let edges = 0;
|
|
450
|
-
|
|
451
|
-
// Bucket by concatenated vector (exact match = definitely similar)
|
|
452
|
-
const buckets = new Map();
|
|
453
|
-
for (let i = 0; i < n; i++) {
|
|
454
|
-
const key = vectors[i].join(',');
|
|
455
|
-
if (!buckets.has(key)) buckets.set(key, []);
|
|
456
|
-
buckets.get(key).push(i);
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// Exact matches
|
|
460
|
-
for (const group of buckets.values()) {
|
|
461
|
-
for (let a = 0; a < group.length; a++) {
|
|
462
|
-
for (let b = a + 1; b < group.length; b++) {
|
|
463
|
-
adjacency[group[a]].push(group[b]);
|
|
464
|
-
adjacency[group[b]].push(group[a]);
|
|
465
|
-
edges++;
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
// Near matches:
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
*
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
if (
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
//
|
|
681
|
-
//
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
*
|
|
686
|
-
|
|
687
|
-
*
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
const
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @svrnsec/pulse — Coordinated Inauthentic Behavior Detection
|
|
3
|
+
*
|
|
4
|
+
* Detects bot armies, click farms, and Sybil networks by analyzing
|
|
5
|
+
* physics-layer correlations that coordination inevitably creates.
|
|
6
|
+
*
|
|
7
|
+
* Core insight:
|
|
8
|
+
* Two real users in different cities have ZERO mutual information
|
|
9
|
+
* between their thermal curves, clock drift rates, ENF phases, and
|
|
10
|
+
* idle durations. Bot farms can randomize any ONE signal but cannot
|
|
11
|
+
* independently decorrelate ALL signals simultaneously — they're
|
|
12
|
+
* bound to shared physics (same room, same hardware, same scripts).
|
|
13
|
+
*
|
|
14
|
+
* Five detection layers:
|
|
15
|
+
*
|
|
16
|
+
* Layer 1 — Temporal Clustering
|
|
17
|
+
* Real users arrive via Poisson process. Bot armies arrive in bursts
|
|
18
|
+
* from a command server. Chi-squared test on 1s-bucket histogram.
|
|
19
|
+
*
|
|
20
|
+
* Layer 2 — Signal Fingerprint Collision
|
|
21
|
+
* Hash (thermal_label, entropy_band, motor_band) per token. Real
|
|
22
|
+
* cohort: high cardinality. Bot farm: < 20 unique fingerprints
|
|
23
|
+
* across 500 tokens.
|
|
24
|
+
*
|
|
25
|
+
* Layer 3 — Drift Fingerprinting
|
|
26
|
+
* Crystal oscillator imperfection (20–100 ppm) creates a unique
|
|
27
|
+
* clock drift rate per physical device. Multiple submissions from
|
|
28
|
+
* "different devices" that converge on the same drift rate = same
|
|
29
|
+
* hardware behind a rotation proxy. Survives IP/account/browser
|
|
30
|
+
* rotation.
|
|
31
|
+
*
|
|
32
|
+
* Layer 4 — Mutual Information Matrix
|
|
33
|
+
* Pairwise MI across all signal dimensions. Organic traffic: sparse
|
|
34
|
+
* random MI matrix. Bot traffic: block-diagonal structure (cliques).
|
|
35
|
+
* Louvain community detection finds the cliques in O(n log n).
|
|
36
|
+
*
|
|
37
|
+
* Layer 5 — Entropy Velocity
|
|
38
|
+
* Track dH/dt — the rate of Shannon entropy growth in the signal
|
|
39
|
+
* space. Organic growth adds unique profiles; bot deployment adds
|
|
40
|
+
* volume without diversity. The ratio (observed dH/dt) / (expected)
|
|
41
|
+
* catches mass deployment even with real hardware.
|
|
42
|
+
*
|
|
43
|
+
* Computational cost:
|
|
44
|
+
* All operations are O(n) or O(n log n). 500 tokens ≈ 8ms total
|
|
45
|
+
* on a single CPU core. No ML, no GPU, no training data.
|
|
46
|
+
*
|
|
47
|
+
* Usage:
|
|
48
|
+
* import { analyseCoordination } from '@svrnsec/pulse/coordination'
|
|
49
|
+
* const result = analyseCoordination(tokens, { windowMs: 60000 })
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
53
|
+
// Utility
|
|
54
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
55
|
+
|
|
56
|
+
function mean(a) { return a.length === 0 ? 0 : a.reduce((s, v) => s + v, 0) / a.length; }
|
|
57
|
+
function variance(a) { const m = mean(a); return a.length < 2 ? 0 : a.reduce((s, v) => s + (v - m) ** 2, 0) / (a.length - 1); }
|
|
58
|
+
function stddev(a) { return Math.sqrt(variance(a)); }
|
|
59
|
+
function cv(a) { const m = mean(a); return m === 0 ? 0 : stddev(a) / m; }
|
|
60
|
+
function clamp(v, lo, hi) { return Math.max(lo, Math.min(hi, v)); }
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Shannon entropy of a discrete distribution (array of counts).
|
|
64
|
+
* Returns bits.
|
|
65
|
+
*/
|
|
66
|
+
function shannonEntropy(counts) {
|
|
67
|
+
const total = counts.reduce((s, c) => s + c, 0);
|
|
68
|
+
if (total === 0) return 0;
|
|
69
|
+
let H = 0;
|
|
70
|
+
for (const c of counts) {
|
|
71
|
+
if (c > 0) {
|
|
72
|
+
const p = c / total;
|
|
73
|
+
H -= p * Math.log2(p);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return H;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Discretize a continuous value into a band index.
|
|
81
|
+
* @param {number} v - value
|
|
82
|
+
* @param {number} lo - band floor
|
|
83
|
+
* @param {number} hi - band ceiling
|
|
84
|
+
* @param {number} bins - number of bins
|
|
85
|
+
* @returns {number} bin index [0, bins-1]
|
|
86
|
+
*/
|
|
87
|
+
function band(v, lo, hi, bins) {
|
|
88
|
+
if (hi === lo) return 0;
|
|
89
|
+
return Math.min(bins - 1, Math.max(0, Math.floor(((v - lo) / (hi - lo)) * bins)));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
93
|
+
// Layer 1 — Temporal Clustering
|
|
94
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Test whether token arrival times follow a Poisson process (organic)
|
|
98
|
+
* or show burst patterns (coordinated).
|
|
99
|
+
*
|
|
100
|
+
* Method: bucket timestamps into 1s intervals, compute chi-squared
|
|
101
|
+
* statistic against expected Poisson rate. High chi-squared = bursty.
|
|
102
|
+
*
|
|
103
|
+
* @param {number[]} timestamps - sorted epoch-ms values
|
|
104
|
+
* @param {object} [opts]
|
|
105
|
+
* @param {number} [opts.bucketMs=1000]
|
|
106
|
+
* @returns {{ score: number, burstRatio: number, chi2: number, pBursty: number }}
|
|
107
|
+
*/
|
|
108
|
+
export function testTemporalClustering(timestamps, opts = {}) {
|
|
109
|
+
const n = timestamps.length;
|
|
110
|
+
if (n < 5) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
|
|
111
|
+
|
|
112
|
+
const bucketMs = opts.bucketMs ?? 1000;
|
|
113
|
+
const tMin = timestamps[0];
|
|
114
|
+
const tMax = timestamps[n - 1];
|
|
115
|
+
const span = tMax - tMin;
|
|
116
|
+
if (span < bucketMs) return { score: 50, burstRatio: 0, chi2: 0, pBursty: 0 };
|
|
117
|
+
|
|
118
|
+
const numBuckets = Math.ceil(span / bucketMs);
|
|
119
|
+
const buckets = new Array(numBuckets).fill(0);
|
|
120
|
+
for (const t of timestamps) {
|
|
121
|
+
const idx = Math.min(numBuckets - 1, Math.floor((t - tMin) / bucketMs));
|
|
122
|
+
buckets[idx]++;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Expected count per bucket under uniform Poisson
|
|
126
|
+
const expected = n / numBuckets;
|
|
127
|
+
let chi2 = 0;
|
|
128
|
+
for (const obs of buckets) {
|
|
129
|
+
chi2 += (obs - expected) ** 2 / expected;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Burst ratio: fraction of tokens in the densest 10% of buckets
|
|
133
|
+
const sorted = [...buckets].sort((a, b) => b - a);
|
|
134
|
+
const top10pct = Math.max(1, Math.ceil(numBuckets * 0.1));
|
|
135
|
+
const burstRatio = sorted.slice(0, top10pct).reduce((s, v) => s + v, 0) / n;
|
|
136
|
+
|
|
137
|
+
// Normalize chi2 to score: higher chi2 = more coordinated
|
|
138
|
+
// df = numBuckets - 1; chi2/df >> 1 means non-Poisson
|
|
139
|
+
const chi2Norm = chi2 / Math.max(1, numBuckets - 1);
|
|
140
|
+
// chi2Norm < 2 is consistent with Poisson; > 5 is very bursty
|
|
141
|
+
const pBursty = clamp((chi2Norm - 1.5) / 4, 0, 1);
|
|
142
|
+
|
|
143
|
+
// Score: 0 = organic, 100 = coordinated
|
|
144
|
+
const score = Math.round(clamp(
|
|
145
|
+
pBursty * 60 + (burstRatio > 0.5 ? 40 : burstRatio * 80),
|
|
146
|
+
0, 100
|
|
147
|
+
));
|
|
148
|
+
|
|
149
|
+
return { score, burstRatio: +burstRatio.toFixed(4), chi2: +chi2.toFixed(2), chi2Norm: +chi2Norm.toFixed(3), pBursty: +pBursty.toFixed(4) };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
153
|
+
// Layer 2 — Signal Fingerprint Collision
|
|
154
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Compute fingerprint collision rate across a token cohort.
|
|
158
|
+
*
|
|
159
|
+
* Each token produces a discrete fingerprint from its physics signals.
|
|
160
|
+
* Real users: high cardinality (many unique fingerprints).
|
|
161
|
+
* Bot farm: low cardinality (cloned environments produce duplicates).
|
|
162
|
+
*
|
|
163
|
+
* @param {object[]} tokens - array of token objects
|
|
164
|
+
* @param {object} [opts]
|
|
165
|
+
* @param {Function} [opts.fingerprint] - custom fingerprint fn(token) → string
|
|
166
|
+
* @returns {{ score: number, uniqueRatio: number, topCollision: number, uniqueCount: number }}
|
|
167
|
+
*/
|
|
168
|
+
export function testFingerprintCollision(tokens, opts = {}) {
|
|
169
|
+
const n = tokens.length;
|
|
170
|
+
if (n < 5) return { score: 0, uniqueRatio: 1, topCollision: 0, uniqueCount: n };
|
|
171
|
+
|
|
172
|
+
const fp = opts.fingerprint ?? defaultFingerprint;
|
|
173
|
+
const counts = new Map();
|
|
174
|
+
|
|
175
|
+
for (const token of tokens) {
|
|
176
|
+
const key = fp(token);
|
|
177
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const uniqueCount = counts.size;
|
|
181
|
+
const uniqueRatio = uniqueCount / n;
|
|
182
|
+
|
|
183
|
+
// Largest collision cluster
|
|
184
|
+
let topCollision = 0;
|
|
185
|
+
for (const c of counts.values()) {
|
|
186
|
+
if (c > topCollision) topCollision = c;
|
|
187
|
+
}
|
|
188
|
+
const topRatio = topCollision / n;
|
|
189
|
+
|
|
190
|
+
// Score: low unique ratio = coordinated
|
|
191
|
+
// Real users: uniqueRatio > 0.40 for n > 50
|
|
192
|
+
// Farms: uniqueRatio < 0.10
|
|
193
|
+
let score;
|
|
194
|
+
if (uniqueRatio > 0.40) score = Math.round(clamp((0.60 - uniqueRatio) / 0.20 * 30, 0, 30));
|
|
195
|
+
else if (uniqueRatio > 0.15) score = Math.round(30 + (0.40 - uniqueRatio) / 0.25 * 40);
|
|
196
|
+
else score = Math.round(70 + (0.15 - uniqueRatio) / 0.15 * 30);
|
|
197
|
+
|
|
198
|
+
// Top collision bonus: if one fingerprint holds > 30% of tokens
|
|
199
|
+
if (topRatio > 0.30) score = Math.min(100, score + Math.round((topRatio - 0.30) * 60));
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
score: clamp(score, 0, 100),
|
|
203
|
+
uniqueRatio: +uniqueRatio.toFixed(4),
|
|
204
|
+
topCollision,
|
|
205
|
+
topRatio: +topRatio.toFixed(4),
|
|
206
|
+
uniqueCount,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Default fingerprint: discretize thermal label + entropy band + motor band.
|
|
212
|
+
*/
|
|
213
|
+
function defaultFingerprint(token) {
|
|
214
|
+
const idle = token.idle ?? token.signals?.idle ?? {};
|
|
215
|
+
const hw = token.hw ?? token.signals?.entropy ?? {};
|
|
216
|
+
const evt = token.evt ?? token.signals?.motor ?? {};
|
|
217
|
+
|
|
218
|
+
const thermal = idle.therm ?? idle.thermalTransition ?? 'unknown';
|
|
219
|
+
const entropy = band(hw.ent ?? hw.score ?? 0.5, 0, 1, 10);
|
|
220
|
+
const motor = band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, 5);
|
|
221
|
+
const dramLabel = hw.dram ?? idle.dMs ?? 'unknown';
|
|
222
|
+
|
|
223
|
+
return `${thermal}:${entropy}:${motor}:${dramLabel}`;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
227
|
+
// Layer 3 — Drift Fingerprinting
|
|
228
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Detect clock drift convergence across devices.
|
|
232
|
+
*
|
|
233
|
+
* Every crystal oscillator drifts at a unique rate (20–100 ppm).
|
|
234
|
+
* Over multiple submissions, drift accumulates into a device-specific
|
|
235
|
+
* signature. If "different devices" share the same drift rate, they're
|
|
236
|
+
* the same physical hardware behind a rotation proxy.
|
|
237
|
+
*
|
|
238
|
+
* Input: array of device submission histories.
|
|
239
|
+
* Each device: { id: string, submissions: [{ ts: number, serverTs: number }] }
|
|
240
|
+
*
|
|
241
|
+
* The delta between client timestamp and server timestamp grows linearly
|
|
242
|
+
* at the drift rate. Linear regression on (serverTs, clientTs - serverTs)
|
|
243
|
+
* gives the slope = drift rate in ms/s.
|
|
244
|
+
*
|
|
245
|
+
* @param {object[]} devices - [{ id, submissions: [{ ts, serverTs }] }]
|
|
246
|
+
* @param {object} [opts]
|
|
247
|
+
* @param {number} [opts.driftBinPpm=5] - drift rate bin width in ppm
|
|
248
|
+
* @param {number} [opts.minSubmissions=3] - min submissions per device
|
|
249
|
+
* @param {number} [opts.collisionThreshold=0.30] - fraction triggering flag
|
|
250
|
+
* @returns {{ score: number, driftRates: Map, largestCluster: number, clusterRatio: number }}
|
|
251
|
+
*/
|
|
252
|
+
export function testDriftFingerprint(devices, opts = {}) {
|
|
253
|
+
const driftBinPpm = opts.driftBinPpm ?? 5;
|
|
254
|
+
const minSubmissions = opts.minSubmissions ?? 3;
|
|
255
|
+
const collisionThresh = opts.collisionThreshold ?? 0.30;
|
|
256
|
+
|
|
257
|
+
// Compute drift rate per device via linear regression
|
|
258
|
+
const rates = [];
|
|
259
|
+
const rateMap = new Map();
|
|
260
|
+
|
|
261
|
+
for (const dev of devices) {
|
|
262
|
+
const subs = dev.submissions;
|
|
263
|
+
if (!subs || subs.length < minSubmissions) continue;
|
|
264
|
+
|
|
265
|
+
const drift = computeDriftRate(subs);
|
|
266
|
+
if (drift === null) continue;
|
|
267
|
+
|
|
268
|
+
rates.push(drift);
|
|
269
|
+
rateMap.set(dev.id, drift);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (rates.length < 3) {
|
|
273
|
+
return { score: 0, driftRates: rateMap, largestCluster: 0, clusterRatio: 0, totalDevices: rates.length };
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Bin drift rates and find collision clusters
|
|
277
|
+
const bins = new Map();
|
|
278
|
+
for (const rate of rates) {
|
|
279
|
+
// Convert ms/s drift to ppm, then bin
|
|
280
|
+
const ppm = rate * 1000; // ms/s → μs/s ≈ ppm
|
|
281
|
+
const binKey = Math.round(ppm / driftBinPpm) * driftBinPpm;
|
|
282
|
+
bins.set(binKey, (bins.get(binKey) ?? 0) + 1);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
let largestCluster = 0;
|
|
286
|
+
for (const c of bins.values()) {
|
|
287
|
+
if (c > largestCluster) largestCluster = c;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const clusterRatio = largestCluster / rates.length;
|
|
291
|
+
const uniqueBins = bins.size;
|
|
292
|
+
const expectedBins = Math.min(rates.length, Math.ceil(80 / driftBinPpm)); // ~80 ppm range
|
|
293
|
+
|
|
294
|
+
// Score: high cluster ratio + few unique bins = same hardware
|
|
295
|
+
let score = 0;
|
|
296
|
+
if (clusterRatio >= collisionThresh) {
|
|
297
|
+
score += Math.round((clusterRatio - collisionThresh + 0.05) / (1 - collisionThresh) * 60);
|
|
298
|
+
}
|
|
299
|
+
if (uniqueBins < expectedBins * 0.5) {
|
|
300
|
+
score += Math.round((1 - uniqueBins / expectedBins) * 40);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return {
|
|
304
|
+
score: clamp(score, 0, 100),
|
|
305
|
+
driftRates: Object.fromEntries(rateMap),
|
|
306
|
+
largestCluster,
|
|
307
|
+
clusterRatio: +clusterRatio.toFixed(4),
|
|
308
|
+
uniqueBins,
|
|
309
|
+
totalDevices: rates.length,
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Compute clock drift rate from a series of submissions via linear regression.
|
|
315
|
+
* Returns drift in ms/s (slope of client-server offset over time).
|
|
316
|
+
*
|
|
317
|
+
* @param {object[]} subs - [{ ts: clientEpochMs, serverTs: serverEpochMs }]
|
|
318
|
+
* @returns {number|null} drift rate in ms/s, or null if insufficient data
|
|
319
|
+
*/
|
|
320
|
+
export function computeDriftRate(subs) {
|
|
321
|
+
if (subs.length < 2) return null;
|
|
322
|
+
|
|
323
|
+
// x = server time (seconds from first), y = client-server offset (ms)
|
|
324
|
+
const t0 = subs[0].serverTs;
|
|
325
|
+
const xs = subs.map(s => (s.serverTs - t0) / 1000);
|
|
326
|
+
const ys = subs.map(s => s.ts - s.serverTs);
|
|
327
|
+
|
|
328
|
+
const n = xs.length;
|
|
329
|
+
const xm = mean(xs);
|
|
330
|
+
const ym = mean(ys);
|
|
331
|
+
|
|
332
|
+
let num = 0, den = 0;
|
|
333
|
+
for (let i = 0; i < n; i++) {
|
|
334
|
+
num += (xs[i] - xm) * (ys[i] - ym);
|
|
335
|
+
den += (xs[i] - xm) ** 2;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if (den === 0) return null;
|
|
339
|
+
return num / den; // ms/s drift rate
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
343
|
+
// Layer 4 — Mutual Information Matrix
|
|
344
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Compute pairwise mutual information across signal dimensions,
|
|
348
|
+
* then detect community structure via greedy modularity (Louvain-lite).
|
|
349
|
+
*
|
|
350
|
+
* Each token is projected into a discrete signal vector:
|
|
351
|
+
* [entropy_band, thermal_label, motor_band, idle_band, enf_band]
|
|
352
|
+
*
|
|
353
|
+
* MI between device i and device j = how much i's signal vector
|
|
354
|
+
* tells you about j's. Organic: MI matrix is sparse. Bot farm:
|
|
355
|
+
* block-diagonal (cliques).
|
|
356
|
+
*
|
|
357
|
+
* @param {object[]} tokens
|
|
358
|
+
* @param {object} [opts]
|
|
359
|
+
* @param {number} [opts.bins=8] - discretization bins per continuous signal
|
|
360
|
+
* @returns {{ score: number, communities: number, largestCommunity: number, communityRatio: number, modularity: number }}
|
|
361
|
+
*/
|
|
362
|
+
export function testMutualInformation(tokens, opts = {}) {
|
|
363
|
+
const n = tokens.length;
|
|
364
|
+
if (n < 10) return { score: 0, communities: 1, largestCommunity: n, communityRatio: 1, modularity: 0 };
|
|
365
|
+
|
|
366
|
+
const bins = opts.bins ?? 8;
|
|
367
|
+
|
|
368
|
+
// Project each token into a discrete signal vector
|
|
369
|
+
const vectors = tokens.map(t => tokenToVector(t, bins));
|
|
370
|
+
|
|
371
|
+
// Build similarity matrix (cosine similarity of signal vectors)
|
|
372
|
+
// For efficiency with large n, use fingerprint bucketing instead of O(n²)
|
|
373
|
+
const { adjacency, edges } = buildSimilarityGraph(vectors, 0.7);
|
|
374
|
+
|
|
375
|
+
if (edges === 0) {
|
|
376
|
+
// No similar pairs — fully organic
|
|
377
|
+
return { score: 0, communities: n, largestCommunity: 1, communityRatio: 1 / n, modularity: 0 };
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Run Louvain-lite community detection
|
|
381
|
+
const { communities, modularity } = louvainLite(adjacency, n);
|
|
382
|
+
|
|
383
|
+
// Count community sizes
|
|
384
|
+
const sizes = new Map();
|
|
385
|
+
for (const c of communities) {
|
|
386
|
+
sizes.set(c, (sizes.get(c) ?? 0) + 1);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
let largestCommunity = 0;
|
|
390
|
+
for (const s of sizes.values()) {
|
|
391
|
+
if (s > largestCommunity) largestCommunity = s;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const communityRatio = largestCommunity / n;
|
|
395
|
+
const numCommunities = sizes.size;
|
|
396
|
+
|
|
397
|
+
// Score: large dominant community = coordinated
|
|
398
|
+
let score = 0;
|
|
399
|
+
if (communityRatio > 0.4) {
|
|
400
|
+
score += Math.round((communityRatio - 0.4) * 100);
|
|
401
|
+
}
|
|
402
|
+
// Few communities relative to n = low diversity
|
|
403
|
+
const expectedCommunities = Math.sqrt(n); // organic rough estimate
|
|
404
|
+
if (numCommunities < expectedCommunities * 0.5) {
|
|
405
|
+
score += Math.round((1 - numCommunities / expectedCommunities) * 30);
|
|
406
|
+
}
|
|
407
|
+
// High modularity with large community = structured coordination
|
|
408
|
+
if (modularity > 0.3 && communityRatio > 0.3) {
|
|
409
|
+
score += 20;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
score: clamp(score, 0, 100),
|
|
414
|
+
communities: numCommunities,
|
|
415
|
+
largestCommunity,
|
|
416
|
+
communityRatio: +communityRatio.toFixed(4),
|
|
417
|
+
modularity: +modularity.toFixed(4),
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Project a token into a discrete signal vector for MI computation.
|
|
423
|
+
*/
|
|
424
|
+
function tokenToVector(token, bins) {
|
|
425
|
+
const idle = token.idle ?? token.signals?.idle ?? {};
|
|
426
|
+
const hw = token.hw ?? token.signals?.entropy ?? {};
|
|
427
|
+
const evt = token.evt ?? token.signals?.motor ?? {};
|
|
428
|
+
const enf = hw.enfDev ?? token.enfDev ?? 0;
|
|
429
|
+
|
|
430
|
+
return [
|
|
431
|
+
band(hw.ent ?? hw.score ?? 0.5, 0, 1, bins),
|
|
432
|
+
thermalToIndex(idle.therm ?? idle.thermalTransition ?? 'unknown'),
|
|
433
|
+
band(evt.mot ?? evt.consistency ?? 0.5, 0, 1, bins),
|
|
434
|
+
band(idle.dMs ?? idle.s ?? 0, 0, 300, bins), // idle duration in seconds
|
|
435
|
+
band(enf, -0.05, 0.05, bins),
|
|
436
|
+
];
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
const THERMAL_MAP = { hot_to_cold: 0, cold: 1, cooling: 2, warming: 3, sustained_hot: 4, step_function: 5, unknown: 6 };
|
|
440
|
+
function thermalToIndex(label) { return THERMAL_MAP[label] ?? 6; }
|
|
441
|
+
|
|
442
|
+
/**
|
|
443
|
+
* Build a similarity graph from signal vectors using fingerprint bucketing.
|
|
444
|
+
* O(n * k) where k = average bucket size, instead of O(n²).
|
|
445
|
+
*/
|
|
446
|
+
function buildSimilarityGraph(vectors, threshold) {
|
|
447
|
+
const n = vectors.length;
|
|
448
|
+
const adjacency = new Array(n).fill(null).map(() => []);
|
|
449
|
+
let edges = 0;
|
|
450
|
+
|
|
451
|
+
// Bucket by concatenated vector (exact match = definitely similar)
|
|
452
|
+
const buckets = new Map();
|
|
453
|
+
for (let i = 0; i < n; i++) {
|
|
454
|
+
const key = vectors[i].join(',');
|
|
455
|
+
if (!buckets.has(key)) buckets.set(key, []);
|
|
456
|
+
buckets.get(key).push(i);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Exact matches
|
|
460
|
+
for (const group of buckets.values()) {
|
|
461
|
+
for (let a = 0; a < group.length; a++) {
|
|
462
|
+
for (let b = a + 1; b < group.length; b++) {
|
|
463
|
+
adjacency[group[a]].push(group[b]);
|
|
464
|
+
adjacency[group[b]].push(group[a]);
|
|
465
|
+
edges++;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// Near matches: for each bucket key, generate all Hamming-distance-1 neighbors
|
|
471
|
+
// and check if they exist. O(k * d * v) instead of O(k²) where k = number of
|
|
472
|
+
// unique keys, d = dimensions, v = unique values per dimension.
|
|
473
|
+
const keys = [...buckets.keys()];
|
|
474
|
+
const keySet = new Set(keys);
|
|
475
|
+
|
|
476
|
+
// Collect all unique values per dimension for neighbor generation
|
|
477
|
+
const allParts = keys.map(k => k.split(','));
|
|
478
|
+
const dims = allParts[0]?.length ?? 0;
|
|
479
|
+
const uniquePerDim = [];
|
|
480
|
+
for (let d = 0; d < dims; d++) {
|
|
481
|
+
uniquePerDim.push(new Set(allParts.map(p => p[d])));
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
const visitedPairs = new Set();
|
|
485
|
+
for (const key of keys) {
|
|
486
|
+
const parts = key.split(',');
|
|
487
|
+
for (let dim = 0; dim < parts.length; dim++) {
|
|
488
|
+
const original = parts[dim];
|
|
489
|
+
for (const alt of uniquePerDim[dim]) {
|
|
490
|
+
if (alt === original) continue;
|
|
491
|
+
parts[dim] = alt;
|
|
492
|
+
const neighborKey = parts.join(',');
|
|
493
|
+
if (keySet.has(neighborKey)) {
|
|
494
|
+
// Avoid processing the same pair twice
|
|
495
|
+
const pairId = key < neighborKey ? `${key}|${neighborKey}` : `${neighborKey}|${key}`;
|
|
496
|
+
if (!visitedPairs.has(pairId)) {
|
|
497
|
+
visitedPairs.add(pairId);
|
|
498
|
+
const ga = buckets.get(key);
|
|
499
|
+
const gb = buckets.get(neighborKey);
|
|
500
|
+
for (const i of ga) {
|
|
501
|
+
for (const j of gb) {
|
|
502
|
+
adjacency[i].push(j);
|
|
503
|
+
adjacency[j].push(i);
|
|
504
|
+
edges++;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
parts[dim] = original;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
return { adjacency, edges };
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Louvain-lite: greedy modularity maximization.
|
|
519
|
+
* Simplified single-pass version for real-time use.
|
|
520
|
+
* Returns community assignments and modularity score.
|
|
521
|
+
*/
|
|
522
|
+
function louvainLite(adjacency, n) {
|
|
523
|
+
// Initialize: each node in its own community
|
|
524
|
+
const comm = new Array(n);
|
|
525
|
+
for (let i = 0; i < n; i++) comm[i] = i;
|
|
526
|
+
|
|
527
|
+
// Compute total edges (2m)
|
|
528
|
+
let twoM = 0;
|
|
529
|
+
for (let i = 0; i < n; i++) twoM += adjacency[i].length;
|
|
530
|
+
if (twoM === 0) return { communities: comm, modularity: 0 };
|
|
531
|
+
|
|
532
|
+
// Degree of each node
|
|
533
|
+
const deg = adjacency.map(a => a.length);
|
|
534
|
+
|
|
535
|
+
// Incremental community degree map — O(1) lookup instead of O(n) scan
|
|
536
|
+
const commDeg = new Map();
|
|
537
|
+
for (let i = 0; i < n; i++) {
|
|
538
|
+
commDeg.set(i, deg[i]);
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Single pass: try to move each node to its best neighbor's community
|
|
542
|
+
let changed = true;
|
|
543
|
+
let passes = 0;
|
|
544
|
+
while (changed && passes < 10) {
|
|
545
|
+
changed = false;
|
|
546
|
+
passes++;
|
|
547
|
+
for (let i = 0; i < n; i++) {
|
|
548
|
+
if (adjacency[i].length === 0) continue;
|
|
549
|
+
|
|
550
|
+
// Count edges to each neighboring community
|
|
551
|
+
const commEdges = new Map();
|
|
552
|
+
for (const j of adjacency[i]) {
|
|
553
|
+
const c = comm[j];
|
|
554
|
+
commEdges.set(c, (commEdges.get(c) ?? 0) + 1);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// Find best community (highest modularity gain)
|
|
558
|
+
let bestComm = comm[i];
|
|
559
|
+
let bestDelta = 0;
|
|
560
|
+
|
|
561
|
+
for (const [c, eic] of commEdges) {
|
|
562
|
+
if (c === comm[i]) continue;
|
|
563
|
+
const cDeg = commDeg.get(c) ?? 0;
|
|
564
|
+
const delta = eic / twoM - (deg[i] * cDeg) / (twoM * twoM);
|
|
565
|
+
if (delta > bestDelta) {
|
|
566
|
+
bestDelta = delta;
|
|
567
|
+
bestComm = c;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
if (bestComm !== comm[i]) {
|
|
572
|
+
// Update community degree map incrementally
|
|
573
|
+
const oldComm = comm[i];
|
|
574
|
+
commDeg.set(oldComm, (commDeg.get(oldComm) ?? 0) - deg[i]);
|
|
575
|
+
commDeg.set(bestComm, (commDeg.get(bestComm) ?? 0) + deg[i]);
|
|
576
|
+
comm[i] = bestComm;
|
|
577
|
+
changed = true;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// Compute modularity
|
|
583
|
+
let Q = 0;
|
|
584
|
+
for (let i = 0; i < n; i++) {
|
|
585
|
+
for (const j of adjacency[i]) {
|
|
586
|
+
if (comm[i] === comm[j]) {
|
|
587
|
+
Q += 1 - (deg[i] * deg[j]) / twoM;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
Q /= twoM;
|
|
592
|
+
|
|
593
|
+
return { communities: comm, modularity: Math.max(0, Q) };
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
597
|
+
// Layer 5 — Entropy Velocity
|
|
598
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Measure entropy growth rate vs traffic growth rate.
|
|
602
|
+
*
|
|
603
|
+
* Organic growth: each new user adds a unique signal profile → entropy
|
|
604
|
+
* increases proportionally to log2(n).
|
|
605
|
+
*
|
|
606
|
+
* Bot deployment: traffic increases but entropy plateaus or grows slower
|
|
607
|
+
* than expected (cloned profiles add volume without diversity).
|
|
608
|
+
*
|
|
609
|
+
* Method: split the token window into temporal slices, compute Shannon
|
|
610
|
+
* entropy of the signal fingerprint distribution in each slice, then
|
|
611
|
+
* measure dH/dt vs dn/dt.
|
|
612
|
+
*
|
|
613
|
+
* @param {object[]} tokens - chronologically sorted
|
|
614
|
+
* @param {object} [opts]
|
|
615
|
+
* @param {number} [opts.slices=5] - number of temporal slices
|
|
616
|
+
* @returns {{ score: number, velocityRatio: number, entropySlices: number[], countSlices: number[] }}
|
|
617
|
+
*/
|
|
618
|
+
export function testEntropyVelocity(tokens, opts = {}) {
|
|
619
|
+
const n = tokens.length;
|
|
620
|
+
const numSlices = opts.slices ?? 5;
|
|
621
|
+
if (n < numSlices * 3) return { score: 0, velocityRatio: 1, entropySlices: [], countSlices: [] };
|
|
622
|
+
|
|
623
|
+
// Split tokens into temporal slices
|
|
624
|
+
const sliceSize = Math.ceil(n / numSlices);
|
|
625
|
+
const entropySlices = [];
|
|
626
|
+
const countSlices = [];
|
|
627
|
+
const cumulativeEntropies = [];
|
|
628
|
+
|
|
629
|
+
for (let s = 0; s < numSlices; s++) {
|
|
630
|
+
const start = 0; // cumulative — each slice includes all previous tokens
|
|
631
|
+
const end = Math.min(n, (s + 1) * sliceSize);
|
|
632
|
+
const slice = tokens.slice(start, end);
|
|
633
|
+
|
|
634
|
+
// Compute Shannon entropy of fingerprint distribution
|
|
635
|
+
const fps = new Map();
|
|
636
|
+
for (const t of slice) {
|
|
637
|
+
const key = defaultFingerprint(t);
|
|
638
|
+
fps.set(key, (fps.get(key) ?? 0) + 1);
|
|
639
|
+
}
|
|
640
|
+
const H = shannonEntropy([...fps.values()]);
|
|
641
|
+
entropySlices.push(+H.toFixed(4));
|
|
642
|
+
countSlices.push(end);
|
|
643
|
+
cumulativeEntropies.push(H);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Expected entropy growth: H_expected ≈ log2(unique_count) grows as log2(n)
|
|
647
|
+
// For organic traffic, H should grow roughly as log2(n) / log2(N_total)
|
|
648
|
+
// Measure: ratio of actual entropy growth to expected
|
|
649
|
+
const H_first = cumulativeEntropies[0];
|
|
650
|
+
const H_last = cumulativeEntropies[cumulativeEntropies.length - 1];
|
|
651
|
+
const n_first = countSlices[0];
|
|
652
|
+
const n_last = countSlices[countSlices.length - 1];
|
|
653
|
+
|
|
654
|
+
if (n_first === n_last) {
|
|
655
|
+
return { score: 0, velocityRatio: 1, entropySlices, countSlices };
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
// If entropy is near-zero throughout — extremely low diversity = coordinated
|
|
659
|
+
if (H_last < 0.5 && n_last >= 20) {
|
|
660
|
+
return { score: Math.round(clamp(80 + (0.5 - H_last) * 40, 80, 100)), velocityRatio: 0, entropySlices, countSlices };
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
if (H_first < 0.01) {
|
|
664
|
+
// First slice has no diversity — use absolute entropy check
|
|
665
|
+
const expectedH = Math.log2(Math.max(2, n_last * 0.3)); // expected for organic
|
|
666
|
+
const ratio = H_last / expectedH;
|
|
667
|
+
return {
|
|
668
|
+
score: Math.round(clamp((1 - ratio) * 100, 0, 100)),
|
|
669
|
+
velocityRatio: +ratio.toFixed(4),
|
|
670
|
+
entropySlices,
|
|
671
|
+
countSlices,
|
|
672
|
+
};
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
// Expected entropy ratio based on log growth
|
|
676
|
+
const expectedGrowth = Math.log2(n_last) / Math.log2(n_first);
|
|
677
|
+
const actualGrowth = H_last / Math.max(0.01, H_first);
|
|
678
|
+
const velocityRatio = actualGrowth / expectedGrowth;
|
|
679
|
+
|
|
680
|
+
// velocityRatio < 0.6 means entropy isn't keeping up with traffic = artificial
|
|
681
|
+
// velocityRatio ≈ 1.0 means organic
|
|
682
|
+
// velocityRatio > 1.2 could mean natural diversification
|
|
683
|
+
let score;
|
|
684
|
+
if (velocityRatio >= 0.8) {
|
|
685
|
+
score = Math.round(clamp((1.0 - velocityRatio) * 50, 0, 20));
|
|
686
|
+
} else if (velocityRatio >= 0.5) {
|
|
687
|
+
score = Math.round(20 + (0.8 - velocityRatio) / 0.3 * 50);
|
|
688
|
+
} else {
|
|
689
|
+
score = Math.round(70 + (0.5 - velocityRatio) / 0.5 * 30);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
return {
|
|
693
|
+
score: clamp(score, 0, 100),
|
|
694
|
+
velocityRatio: +velocityRatio.toFixed(4),
|
|
695
|
+
entropySlices,
|
|
696
|
+
countSlices,
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
701
|
+
// Orchestrator — Coordinated Behavior Analysis
|
|
702
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* Full coordination analysis across all 5 layers.
|
|
706
|
+
*
|
|
707
|
+
* @param {object[]} tokens - engagement tokens with physics signals
|
|
708
|
+
* @param {object} [opts]
|
|
709
|
+
* @param {object[]} [opts.devices] - device submission histories for drift analysis
|
|
710
|
+
* @param {number} [opts.windowMs] - analysis window (default: 60000ms)
|
|
711
|
+
* @returns {CoordinationResult}
|
|
712
|
+
*/
|
|
713
|
+
export function analyseCoordination(tokens, opts = {}) {
|
|
714
|
+
const n = tokens.length;
|
|
715
|
+
if (n < 5) {
|
|
716
|
+
return {
|
|
717
|
+
coordinationScore: 0,
|
|
718
|
+
verdict: 'insufficient_data',
|
|
719
|
+
confidence: 0,
|
|
720
|
+
layers: {},
|
|
721
|
+
tokenCount: n,
|
|
722
|
+
};
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
// Extract timestamps
|
|
726
|
+
const timestamps = tokens.map(t =>
|
|
727
|
+
t.iat ?? t.timestamp ?? t.ts ?? Date.now()
|
|
728
|
+
).sort((a, b) => a - b);
|
|
729
|
+
|
|
730
|
+
// ── Layer 1: Temporal Clustering ──
|
|
731
|
+
const temporal = testTemporalClustering(timestamps);
|
|
732
|
+
|
|
733
|
+
// ── Layer 2: Fingerprint Collision ──
|
|
734
|
+
const fingerprint = testFingerprintCollision(tokens);
|
|
735
|
+
|
|
736
|
+
// ── Layer 3: Drift Fingerprinting ──
|
|
737
|
+
const devices = opts.devices ?? [];
|
|
738
|
+
const drift = devices.length >= 3
|
|
739
|
+
? testDriftFingerprint(devices)
|
|
740
|
+
: { score: 0, totalDevices: 0 };
|
|
741
|
+
|
|
742
|
+
// ── Layer 4: Mutual Information ──
|
|
743
|
+
const mi = testMutualInformation(tokens);
|
|
744
|
+
|
|
745
|
+
// ── Layer 5: Entropy Velocity ──
|
|
746
|
+
const velocity = testEntropyVelocity(tokens);
|
|
747
|
+
|
|
748
|
+
// ── Weighted Fusion ──
|
|
749
|
+
// Weights reflect each layer's discriminative power and evasion cost
|
|
750
|
+
const weights = {
|
|
751
|
+
temporal: 0.15, // easy to randomize, but still catches lazy farms
|
|
752
|
+
fingerprint: 0.25, // hard to fake without real hardware diversity
|
|
753
|
+
drift: 0.15, // only fires with multi-submission data
|
|
754
|
+
mi: 0.25, // hardest to evade — requires true independence
|
|
755
|
+
velocity: 0.20, // catches mass deployment timing
|
|
756
|
+
};
|
|
757
|
+
|
|
758
|
+
// If no drift data, redistribute weight
|
|
759
|
+
const hasDrift = drift.totalDevices >= 3;
|
|
760
|
+
const effectiveWeights = hasDrift ? weights : {
|
|
761
|
+
temporal: 0.18,
|
|
762
|
+
fingerprint: 0.28,
|
|
763
|
+
drift: 0,
|
|
764
|
+
mi: 0.30,
|
|
765
|
+
velocity: 0.24,
|
|
766
|
+
};
|
|
767
|
+
|
|
768
|
+
const raw =
|
|
769
|
+
temporal.score * effectiveWeights.temporal +
|
|
770
|
+
fingerprint.score * effectiveWeights.fingerprint +
|
|
771
|
+
drift.score * effectiveWeights.drift +
|
|
772
|
+
mi.score * effectiveWeights.mi +
|
|
773
|
+
velocity.score * effectiveWeights.velocity;
|
|
774
|
+
|
|
775
|
+
const coordinationScore = Math.round(clamp(raw, 0, 100));
|
|
776
|
+
|
|
777
|
+
// Confidence: higher with more tokens and more layers contributing
|
|
778
|
+
const activeLayers = [temporal, fingerprint, mi, velocity].filter(l => l.score > 0).length + (hasDrift ? 1 : 0);
|
|
779
|
+
const confidence = clamp(
|
|
780
|
+
(Math.min(n, 100) / 100) * 0.5 + (activeLayers / 5) * 0.5,
|
|
781
|
+
0, 1
|
|
782
|
+
);
|
|
783
|
+
|
|
784
|
+
// Verdict
|
|
785
|
+
let verdict;
|
|
786
|
+
if (coordinationScore >= 70) verdict = 'coordinated_inauthentic';
|
|
787
|
+
else if (coordinationScore >= 45) verdict = 'suspicious_coordination';
|
|
788
|
+
else if (coordinationScore >= 25) verdict = 'low_coordination';
|
|
789
|
+
else verdict = 'organic';
|
|
790
|
+
|
|
791
|
+
// Advisory flags
|
|
792
|
+
const flags = [];
|
|
793
|
+
if (temporal.score >= 60) flags.push('BURST_ARRIVAL_PATTERN');
|
|
794
|
+
if (fingerprint.score >= 60) flags.push('LOW_FINGERPRINT_DIVERSITY');
|
|
795
|
+
if (drift.score >= 50) flags.push('CLOCK_DRIFT_CONVERGENCE');
|
|
796
|
+
if (mi.score >= 50) flags.push('SIGNAL_CLIQUE_DETECTED');
|
|
797
|
+
if (velocity.score >= 50) flags.push('ENTROPY_GROWTH_STALLED');
|
|
798
|
+
|
|
799
|
+
return {
|
|
800
|
+
coordinationScore,
|
|
801
|
+
verdict,
|
|
802
|
+
confidence: +confidence.toFixed(3),
|
|
803
|
+
flags,
|
|
804
|
+
layers: {
|
|
805
|
+
temporal,
|
|
806
|
+
fingerprint,
|
|
807
|
+
drift: hasDrift ? drift : { score: 0, skipped: true, reason: 'insufficient_device_history' },
|
|
808
|
+
mutualInformation: mi,
|
|
809
|
+
entropyVelocity: velocity,
|
|
810
|
+
},
|
|
811
|
+
tokenCount: n,
|
|
812
|
+
weights: effectiveWeights,
|
|
813
|
+
};
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
/**
|
|
817
|
+
* @typedef {object} CoordinationResult
|
|
818
|
+
* @property {number} coordinationScore - 0–100, higher = more coordinated
|
|
819
|
+
* @property {string} verdict - 'organic' | 'low_coordination' | 'suspicious_coordination' | 'coordinated_inauthentic'
|
|
820
|
+
* @property {number} confidence - 0–1, based on token count and active layers
|
|
821
|
+
* @property {string[]} flags - advisory flags for specific signals
|
|
822
|
+
* @property {object} layers - per-layer results
|
|
823
|
+
* @property {number} tokenCount - number of tokens analyzed
|
|
824
|
+
* @property {object} weights - effective layer weights used
|
|
825
|
+
*/
|