volume-anomaly 0.1.0 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -24
- package/build/index.cjs +73 -11
- package/build/index.mjs +73 -11
- package/package.json +15 -2
package/README.md
CHANGED
|
@@ -124,6 +124,44 @@ interface PredictionResult {
|
|
|
124
124
|
}
|
|
125
125
|
```
|
|
126
126
|
|
|
127
|
+
**Practical usage with `getAggregatedTrades` from `backtest-kit`:**
|
|
128
|
+
|
|
129
|
+
> ⚠️ **Never pass the same trades in both `historical` and `recent`.** Training calibrates the baseline. If `recent` overlaps with `historical`, any anomaly in that period is absorbed into the baseline — the detector learns to treat it as normal and misses it. Always slice so that `recent` starts where `historical` ends.
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
import { predict } from 'volume-anomaly';
|
|
133
|
+
import type { IAggregatedTradeData } from 'volume-anomaly';
|
|
134
|
+
|
|
135
|
+
// Your data-fetching function — returns the last `limit` trades, oldest first:
|
|
136
|
+
declare function getAggregatedTrades(
|
|
137
|
+
symbol: string,
|
|
138
|
+
limit: number,
|
|
139
|
+
): Promise<IAggregatedTradeData[]>;
|
|
140
|
+
|
|
141
|
+
// ── One-shot (single API call, zero overlap) ──────────────────────────────────
|
|
142
|
+
|
|
143
|
+
const N_train = 1200; // calibration window
|
|
144
|
+
const N_detect = 200; // window to evaluate
|
|
145
|
+
|
|
146
|
+
const all = await getAggregatedTrades('BTCUSDT', N_train + N_detect);
|
|
147
|
+
const historical = all.slice(0, N_train); // older 1200 trades — baseline
|
|
148
|
+
const recent = all.slice(N_train); // newest 200 trades — no overlap
|
|
149
|
+
|
|
150
|
+
const result = predict(historical, recent, 0.75);
|
|
151
|
+
// {
|
|
152
|
+
// anomaly: true,
|
|
153
|
+
// confidence: 0.83,
|
|
154
|
+
// direction: 'long', // 'long' | 'short' | 'neutral'
|
|
155
|
+
// imbalance: 0.61,
|
|
156
|
+
// }
|
|
157
|
+
|
|
158
|
+
if (result.anomaly) {
|
|
159
|
+
console.log(`direction=${result.direction} confidence=${result.confidence.toFixed(2)}`);
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
`predict()` trains a fresh detector on every call. For continuous monitoring (many `detect()` calls from one trained model) use `VolumeAnomalyDetector` directly — see the class API below.
|
|
164
|
+
|
|
127
165
|
---
|
|
128
166
|
|
|
129
167
|
### `new VolumeAnomalyDetector(config?)`
|
|
@@ -350,7 +388,7 @@ S⁻ₜ = max(0, S⁻_{t-1} − xₜ + μ₀ − k)
|
|
|
350
388
|
μ₀ = mean(|imbalance|) over the training window
|
|
351
389
|
σ₀² = var(|imbalance|) sample variance
|
|
352
390
|
k = cusumKSigmas · σ₀ (default 0.5σ)
|
|
353
|
-
h = cusumHSigmas · σ₀ (default
|
|
391
|
+
h = cusumHSigmas · σ₀ (default 5σ)
|
|
354
392
|
```
|
|
355
393
|
|
|
356
394
|
**Average run length under H₀ (ARL₀):** the expected number of observations before a false alarm. For Gaussian series, the approximate relationship between h, k and ARL₀ is:
|
|
@@ -511,11 +549,12 @@ import {
|
|
|
511
549
|
hawkesLogLikelihood,
|
|
512
550
|
hawkesFit,
|
|
513
551
|
hawkesLambda,
|
|
552
|
+
hawkesPeakLambda, // max λ(tᵢ) over window — used by the detector
|
|
514
553
|
hawkesAnomalyScore,
|
|
515
554
|
|
|
516
555
|
// CUSUM
|
|
517
556
|
cusumFit,
|
|
518
|
-
cusumUpdate, // returns { state, alarm }
|
|
557
|
+
cusumUpdate, // returns { state, alarm, preResetState }
|
|
519
558
|
cusumInitState,
|
|
520
559
|
cusumAnomalyScore,
|
|
521
560
|
cusumBatch,
|
|
@@ -544,6 +583,10 @@ Returns `{ params, logLik, stationarity, converged }`. `stationarity = α/β`. I
|
|
|
544
583
|
|
|
545
584
|
Evaluates `λ(t)` at a specific time given a history of prior events. All timestamps must be `< t`.
|
|
546
585
|
|
|
586
|
+
### `hawkesPeakLambda(timestamps, params)`
|
|
587
|
+
|
|
588
|
+
Returns the **maximum** `λ(tᵢ)` over all events in `timestamps` using the O(n) recursive A(i) trick. This is what the detector uses internally instead of `hawkesLambda` — a burst that decayed by the last event is still captured. `hawkesLambda` evaluates at a single point; `hawkesPeakLambda` scans the full window.
|
|
589
|
+
|
|
547
590
|
### `cusumUpdate(state, x, params)`
|
|
548
591
|
|
|
549
592
|
Pure function. Returns `{ state: CusumState, alarm: boolean, preResetState: CusumState }`. Does **not** mutate the input state. `preResetState` holds the accumulator values *before* the alarm reset — use it for scoring, since `state.sPos/sNeg` are zeroed when `alarm = true`.
|
|
@@ -572,26 +615,47 @@ BOCPD update is technically O(r_max) where r_max is the number of surviving run-
|
|
|
572
615
|
|
|
573
616
|
---
|
|
574
617
|
|
|
575
|
-
## Training
|
|
618
|
+
## Training and detection window sizes
|
|
619
|
+
|
|
620
|
+
### `train()` — historical window
|
|
576
621
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
|
580
|
-
|
|
581
|
-
|
|
|
582
|
-
|
|
|
583
|
-
|
|
|
622
|
+
The rolling imbalance series used to calibrate CUSUM and BOCPD has length `max(0, N − windowSize + 1)`. Too few trades → empty or near-empty calibration series → CUSUM baseline is a fallback (μ₀ = 0, σ₀ = 1) and Hawkes MLE is unreliable.
|
|
623
|
+
|
|
624
|
+
| Trades in `historical` | Rolling windows for calibration¹ | Hawkes MLE | Notes |
|
|
625
|
+
|------------------------|----------------------------------|------------|-------|
|
|
626
|
+
| < 50 | — | — | **Rejected — `train()` throws** |
|
|
627
|
+
| 50–99 | 1–50 | Borderline | CUSUM/BOCPD barely calibrated; Hawkes fallback path active (< 10 events triggers flat Poisson) |
|
|
628
|
+
| 100–199 | 51–150 | Adequate | Practical minimum; mean/σ estimates reasonable |
|
|
629
|
+
| 200–499 | 151–450 | Good | Stable MLE; recommended baseline for liquid pairs |
|
|
630
|
+
| 500–2000 | 451–1951 | Robust | Best calibration; use for low-activity or volatile pairs |
|
|
631
|
+
| > 2000 | > 1951 | Robust | Beware regime staleness — window may span multiple market conditions |
|
|
632
|
+
|
|
633
|
+
¹ Assumes default `windowSize = 50`.
|
|
584
634
|
|
|
585
635
|
The training window should represent **normal, in-control market conditions**. Fitting on data that already contains anomalies will inflate the baseline and reduce sensitivity. If your market opens with a gap or major event, use a calmer historical window from the previous session.
|
|
586
636
|
|
|
587
|
-
|
|
637
|
+
### `detect()` — recent window
|
|
638
|
+
|
|
639
|
+
The same rolling logic applies: CUSUM and BOCPD only receive data when `trades ≥ windowSize`. Below that threshold only the Hawkes score contributes, and maximum confidence is `0.4 × hawkesScore ≤ 0.40` — the anomaly flag **cannot fire** at the default threshold of 0.75.
|
|
640
|
+
|
|
641
|
+
| Trades in `recent` | Rolling windows¹ | All three detectors active | Notes |
|
|
642
|
+
|--------------------|-----------------|---------------------------|-------|
|
|
643
|
+
| < `windowSize` (< 50) | 0 | **No** | Hawkes-only; `anomaly` cannot fire at default threshold |
|
|
644
|
+
| = `windowSize` (= 50) | 1 | Barely | Minimum for full detection; CUSUM/BOCPD signal is very sparse |
|
|
645
|
+
| 2× `windowSize` (100) | 51 | Yes | **Recommended minimum** for production use |
|
|
646
|
+
| 4× `windowSize` (200) | 151 | Yes | Good — default in code examples |
|
|
647
|
+
| 10× `windowSize` (500) | 451 | Yes | Best accuracy; higher latency |
|
|
648
|
+
|
|
649
|
+
**Rule of thumb:** `recent ≥ 2 × windowSize`. On BTC/USDT perpetual (windowSize = 50), 200 trades typically spans 5–30 seconds and is comfortably available from a real-time buffer.
|
|
650
|
+
|
|
651
|
+
### `windowSize` guidance
|
|
588
652
|
|
|
589
|
-
| `windowSize` |
|
|
590
|
-
|
|
591
|
-
| 20 |
|
|
592
|
-
| 50 (default) |
|
|
593
|
-
| 100 |
|
|
594
|
-
| 200 |
|
|
653
|
+
| `windowSize` | Sensitivity | Lag | Minimum `train()` | Minimum `detect()` for full signal |
|
|
654
|
+
|-------------|-------------|-----|-------------------|-------------------------------------|
|
|
655
|
+
| 20 | Very high | Low | 50 trades (code minimum) | 40 trades |
|
|
656
|
+
| 50 (default) | Balanced | Moderate | 100 trades (recommended) | 100 trades |
|
|
657
|
+
| 100 | Lower | Higher | 200 trades | 200 trades |
|
|
658
|
+
| 200 | Low | High | 400 trades | 400 trades |
|
|
595
659
|
|
|
596
660
|
On high-volume pairs (BTC/USDT perpetual), 50 trades may span only 1–2 seconds. On low-volume pairs, 50 trades may span minutes. Calibrate to the effective time scale that matters for your entry.
|
|
597
661
|
|
|
@@ -641,21 +705,28 @@ async function onCandle(candles: Candle[], recentTrades: IAggregatedTradeData[])
|
|
|
641
705
|
|
|
642
706
|
## Tests
|
|
643
707
|
|
|
644
|
-
**
|
|
708
|
+
**735 tests** across **18 test files**. All passing. 100% statement/function/line coverage, 98.72% branch (two unreachable `??` guards).
|
|
645
709
|
|
|
646
|
-
| File | Tests |
|
|
647
|
-
|
|
710
|
+
| File | Tests | What is covered |
|
|
711
|
+
|------|-------|-----------------|
|
|
648
712
|
| `hawkes.test.ts` | 20 | Imbalance formula, LL computation, MLE fitting, λ evaluation and decay, anomaly score monotonicity and supercritical clamp |
|
|
649
713
|
| `cusum.test.ts` | 15 | Parameter estimation, state update (pure function), accumulation, alarm + reset, score range, batch detection |
|
|
650
|
-
| `bocpd.test.ts` | 13 | Init state,
|
|
651
|
-
| `detector.test.ts` | 20 | Pre-train guard, isTrained flag, minimum training size, DetectionResult fields, confidence range, empty window, signal score range
|
|
714
|
+
| `bocpd.test.ts` | 13 | Init state, update, probability normalisation, run length growth in stable regime, CP spike on distribution shift, immutability, batch |
|
|
715
|
+
| `detector.test.ts` | 20 | Pre-train guard, isTrained flag, minimum training size, DetectionResult fields, confidence range, empty window, signal score range |
|
|
652
716
|
| `detect.test.ts` | 36 | End-to-end anomaly detection, confidence thresholds, signal composition, edge inputs |
|
|
653
717
|
| `seeded.test.ts` | 67 | Deterministic seeded scenarios covering long/short/neutral bursts across parameter space |
|
|
654
|
-
| `predict.test.ts` | 24 | Direction assignment, trained imbalanceThreshold, imbalancePercentile config, trending vs balanced threshold
|
|
718
|
+
| `predict.test.ts` | 24 | Direction assignment, trained imbalanceThreshold, imbalancePercentile config, trending vs balanced threshold |
|
|
655
719
|
| `invariants.test.ts` | 29 | Monotonicity, score bounds, immutability, score weight validation |
|
|
656
720
|
| `adversarial.test.ts` | 58 | Adversarial inputs: NaN propagation, extreme values, Inf timestamps, zero-qty trades |
|
|
657
721
|
| `falsepositive.test.ts` | 18 | Scenarios that must NOT trigger: gradual drift, HFT clusters, trending market, whale trades, overnight gaps |
|
|
658
|
-
| `edgecases.test.ts` |
|
|
722
|
+
| `edgecases.test.ts` | 80 | Boundary conditions, signal threshold exact values (strict >), detect < windowSize bypass, train twice, cusumBatch multiple alarms |
|
|
723
|
+
| `realdata.test.ts` | 23 | Real BTCUSDT-2025-03-01 data: 4 spike windows + 1 calm baseline |
|
|
724
|
+
| `robustness.test.ts` | 66 | Mathematical invariants: range/symmetry/monotonicity for all functions, BOCPD normalisation Σexp(lp) ≤ 1, 100-case property-based detector test |
|
|
725
|
+
| `extreme.test.ts` | 52 | Stuck-at-extremum: hazardLambda edge cases, μ = 0, extreme β, degenerate Nelder-Mead, Welford drift, β₀ = 0 |
|
|
726
|
+
| `newextreme.test.ts` | 58 | NaN propagation in CUSUM/BOCPD, hawkesAnomalyScore extremes, cusumAnomalyScore h = NaN, prevRL = Inf/NaN, kappa0 = 0 |
|
|
727
|
+
| `thirdextreme.test.ts` | 74 | hazardLambda = 0 collapse, β ≤ 0, hawkesFit n = 0/T = 0, hawkesPeakLambda n = 1/β = 0, volumeImbalance NaN qty, cusumFit NaN filter |
|
|
728
|
+
| `fourthextreme.test.ts` | 63 | hawkesAnomalyScore NaN peak + valid params, cusumUpdate NaN params, cusumAnomalyScore NaN state, bocpdUpdate beta0 = Inf, Infinity qty |
|
|
729
|
+
| `perf.test.ts` | 19 | Latency P95 bounds, throughput (detect(200) ≥ 800/s), scaling ratios, stability over 500 sequential calls |
|
|
659
730
|
|
|
660
731
|
```bash
|
|
661
732
|
npm test
|
package/build/index.cjs
CHANGED
|
@@ -108,6 +108,15 @@ function volumeImbalance(trades) {
|
|
|
108
108
|
const total = buyVol + sellVol;
|
|
109
109
|
if (total === 0)
|
|
110
110
|
return 0;
|
|
111
|
+
// When total = Infinity (an overflowed qty) the division (buyVol−sellVol)/Infinity
|
|
112
|
+
// is NaN even for a one-sided burst (Inf/Inf = NaN in IEEE 754).
|
|
113
|
+
// Compare sides directly to get the correct ±1 / 0 answer.
|
|
114
|
+
// NaN total (from NaN qty) falls through to the regular division — GIGO.
|
|
115
|
+
if (total === Infinity) {
|
|
116
|
+
if (buyVol === sellVol)
|
|
117
|
+
return 0; // both Infinity — symmetric burst
|
|
118
|
+
return buyVol > sellVol ? 1 : -1;
|
|
119
|
+
}
|
|
111
120
|
return (buyVol - sellVol) / total;
|
|
112
121
|
}
|
|
113
122
|
// ─── Log-likelihood (O(n) recursive) ────────────────────────────────────────
|
|
@@ -120,6 +129,11 @@ function hawkesLogLikelihood(timestamps, params) {
|
|
|
120
129
|
const n = timestamps.length;
|
|
121
130
|
if (n === 0)
|
|
122
131
|
return 0;
|
|
132
|
+
// β ≤ 0: kernel exp(−β·dt) does not decay (diverges or flat).
|
|
133
|
+
// Compensator = (α/β)·(1−exp(−β·(T−tᵢ))) → Inf·0 = NaN when β=0.
|
|
134
|
+
// Return −Infinity so the optimizer treats this as an infeasible region.
|
|
135
|
+
if (beta <= 0)
|
|
136
|
+
return -Infinity;
|
|
123
137
|
// Use observation window length, not absolute time, so the LL is invariant
|
|
124
138
|
// to timestamp origin (works for both t0=0 and Unix-epoch seconds).
|
|
125
139
|
const t0 = timestamps[0];
|
|
@@ -251,8 +265,15 @@ function hawkesAnomalyScore(peakLambda, params, empiricalRate = 0) {
|
|
|
251
265
|
const meanLambda = params.mu / (1 - branching);
|
|
252
266
|
// sigmoid centred at 2× baseline
|
|
253
267
|
const sig = (ratio) => 1 / (1 + Math.exp(-(ratio - 2) * 2));
|
|
254
|
-
|
|
255
|
-
|
|
268
|
+
// meanLambda = 0 when mu = 0: ratio = peakLambda / 0 = Infinity (score=1) when
|
|
269
|
+
// peakLambda > 0, or NaN (0/0) when peakLambda = 0. Guard the NaN case.
|
|
270
|
+
// NaN peakLambda (e.g. timestamps contained NaN): treat as "no signal" → 0.
|
|
271
|
+
const intensityScore = meanLambda > 0
|
|
272
|
+
? (Number.isNaN(peakLambda) ? 0 : sig(peakLambda / meanLambda))
|
|
273
|
+
: peakLambda > 0 ? 1 : 0;
|
|
274
|
+
const rateScore = empiricalRate > 0
|
|
275
|
+
? (params.mu > 0 ? sig(empiricalRate / params.mu) : 1)
|
|
276
|
+
: 0;
|
|
256
277
|
return Math.max(intensityScore, rateScore);
|
|
257
278
|
}
|
|
258
279
|
|
|
@@ -278,12 +299,16 @@ function hawkesAnomalyScore(peakLambda, params, empiricalRate = 0) {
|
|
|
278
299
|
* values — e.g. array of |imbalance| from a calm training window.
|
|
279
300
|
*/
|
|
280
301
|
function cusumFit(values, kSigmas = 0.5, hSigmas = 4) {
|
|
281
|
-
|
|
302
|
+
// Drop non-finite values (NaN, ±Infinity) before computing statistics.
|
|
303
|
+
// A single NaN in `values` would make mu0 = NaN, which later poisons the
|
|
304
|
+
// CUSUM accumulator even for valid observations (Math.max(0, x − NaN) = NaN).
|
|
305
|
+
const clean = values.filter(Number.isFinite);
|
|
306
|
+
if (clean.length === 0) {
|
|
282
307
|
return { mu0: 0, std0: 1, k: kSigmas, h: hSigmas };
|
|
283
308
|
}
|
|
284
|
-
const n =
|
|
285
|
-
const mu0 =
|
|
286
|
-
const var0 =
|
|
309
|
+
const n = clean.length;
|
|
310
|
+
const mu0 = clean.reduce((s, x) => s + x, 0) / n;
|
|
311
|
+
const var0 = clean.reduce((s, x) => s + (x - mu0) ** 2, 0) / Math.max(n - 1, 1);
|
|
287
312
|
const std0 = Math.sqrt(var0) || 1e-6;
|
|
288
313
|
return {
|
|
289
314
|
mu0,
|
|
@@ -297,7 +322,18 @@ function cusumFit(values, kSigmas = 0.5, hSigmas = 4) {
|
|
|
297
322
|
* Pure function — does not mutate input.
|
|
298
323
|
*/
|
|
299
324
|
function cusumUpdate(state, x, params) {
|
|
325
|
+
// Non-finite x (NaN, ±Infinity that doesn't trigger alarm) would poison the
|
|
326
|
+
// accumulators via Math.max(0, NaN) = NaN. Skip the update entirely for NaN;
|
|
327
|
+
// ±Infinity is handled naturally (Inf ≥ h → alarm fires and resets state).
|
|
328
|
+
if (Number.isNaN(x)) {
|
|
329
|
+
return { alarm: false, preResetState: state, state };
|
|
330
|
+
}
|
|
300
331
|
const { mu0, k, h } = params;
|
|
332
|
+
// NaN in mu0 or k also poisons the accumulator (x − NaN = NaN).
|
|
333
|
+
// Treat corrupt params as a no-op, same semantics as x=NaN.
|
|
334
|
+
if (!Number.isFinite(mu0) || !Number.isFinite(k)) {
|
|
335
|
+
return { alarm: false, preResetState: state, state };
|
|
336
|
+
}
|
|
301
337
|
const sPos = Math.max(0, state.sPos + (x - mu0) - k);
|
|
302
338
|
const sNeg = Math.max(0, state.sNeg - (x - mu0) - k);
|
|
303
339
|
const alarm = sPos >= h || sNeg >= h;
|
|
@@ -321,7 +357,12 @@ function cusumInitState() {
|
|
|
321
357
|
*/
|
|
322
358
|
function cusumAnomalyScore(state, params) {
|
|
323
359
|
const s = Math.max(state.sPos, state.sNeg);
|
|
324
|
-
|
|
360
|
+
// Math.max(NaN, finite) = NaN in JS (unlike some other languages).
|
|
361
|
+
// A poisoned state must not propagate NaN to the confidence score.
|
|
362
|
+
if (Number.isNaN(s))
|
|
363
|
+
return 0;
|
|
364
|
+
// NaN <= 0 is false in IEEE 754, so guard explicitly against non-finite h.
|
|
365
|
+
if (params.h <= 0 || !Number.isFinite(params.h))
|
|
325
366
|
return 0;
|
|
326
367
|
return Math.min(s / params.h, 1);
|
|
327
368
|
}
|
|
@@ -389,6 +430,7 @@ function bocpdInitState() {
|
|
|
389
430
|
logProbs: [0], // P(r₀ = 0) = 1 → log = 0
|
|
390
431
|
suffStats: [ssEmpty()],
|
|
391
432
|
t: 0,
|
|
433
|
+
minRl: 0,
|
|
392
434
|
};
|
|
393
435
|
}
|
|
394
436
|
/**
|
|
@@ -426,12 +468,20 @@ function bocpdUpdate(state, x, prior, hazardLambda = 200) {
|
|
|
426
468
|
const keep = normLogProbs.map((lp) => lp > PRUNE_THRESH);
|
|
427
469
|
const prunedLogProbs = normLogProbs.filter((_, i) => keep[i]);
|
|
428
470
|
const prunedSuffStats = newSuffStats.filter((_, i) => keep[i]);
|
|
471
|
+
// Track actual run-length offset after pruning.
|
|
472
|
+
// normLogProbs[0] → RL 0; normLogProbs[i] (i>0) → RL state.minRl + i.
|
|
473
|
+
// When H=0 (hazardLambda=∞) the changepoint entry (i=0) gets log-prob −∞ and is
|
|
474
|
+
// pruned; the first surviving entry then represents RL state.minRl + firstKept, not 0.
|
|
475
|
+
const firstKept = keep.indexOf(true);
|
|
476
|
+
const newMinRl = firstKept <= 0 ? 0 : state.minRl + firstKept;
|
|
429
477
|
const newState = {
|
|
430
478
|
logProbs: prunedLogProbs,
|
|
431
479
|
suffStats: prunedSuffStats,
|
|
432
480
|
t: state.t + 1,
|
|
481
|
+
minRl: newMinRl,
|
|
433
482
|
};
|
|
434
|
-
// MAP run length
|
|
483
|
+
// MAP run length: index in normLogProbs → actual run-length.
|
|
484
|
+
// normLogProbs[0] → RL 0; normLogProbs[r] (r>0) → RL state.minRl + r.
|
|
435
485
|
let mapR = 0;
|
|
436
486
|
let mapLP = -Infinity;
|
|
437
487
|
for (let r = 0; r < normLogProbs.length; r++) {
|
|
@@ -440,10 +490,14 @@ function bocpdUpdate(state, x, prior, hazardLambda = 200) {
|
|
|
440
490
|
mapR = r;
|
|
441
491
|
}
|
|
442
492
|
}
|
|
493
|
+
const mapRunLength = mapR === 0 ? 0 : state.minRl + mapR;
|
|
494
|
+
// normLogProbs[0] can be NaN when all log-probs are NaN (e.g. x=NaN, kappa0=0).
|
|
495
|
+
// `?? -Infinity` only guards undefined/null, not NaN. Clamp to 0 explicitly.
|
|
496
|
+
const rawCp = Math.exp(normLogProbs[0] ?? -Infinity);
|
|
443
497
|
return {
|
|
444
498
|
state: newState,
|
|
445
|
-
mapRunLength
|
|
446
|
-
cpProbability:
|
|
499
|
+
mapRunLength,
|
|
500
|
+
cpProbability: Number.isFinite(rawCp) ? rawCp : 0,
|
|
447
501
|
};
|
|
448
502
|
}
|
|
449
503
|
// ─── Score ────────────────────────────────────────────────────────────────────
|
|
@@ -474,7 +528,9 @@ function bocpdUpdate(state, x, prior, hazardLambda = 200) {
|
|
|
474
528
|
* @param prevRunLength mapRunLength from the previous bocpdUpdate call.
|
|
475
529
|
*/
|
|
476
530
|
function bocpdAnomalyScore(result, prevRunLength = 0) {
|
|
477
|
-
|
|
531
|
+
// NaN <= 0 is false (IEEE 754), and (Infinity - finite) / Infinity = NaN.
|
|
532
|
+
// Guard both: require prevRunLength to be a finite positive number.
|
|
533
|
+
if (!Number.isFinite(prevRunLength) || prevRunLength <= 0)
|
|
478
534
|
return 0;
|
|
479
535
|
const drop = Math.max(0, (prevRunLength - result.mapRunLength) / prevRunLength);
|
|
480
536
|
return 1 / (1 + Math.exp(-(drop - 0.5) * 8));
|
|
@@ -538,6 +594,12 @@ class VolumeAnomalyDetector {
|
|
|
538
594
|
constructor(config = {}) {
|
|
539
595
|
this.cfg = { ...DEFAULTS, ...config };
|
|
540
596
|
if (config.scoreWeights) {
|
|
597
|
+
if (!config.scoreWeights.every(Number.isFinite)) {
|
|
598
|
+
throw new Error(`scoreWeights must be finite numbers, got ${config.scoreWeights}`);
|
|
599
|
+
}
|
|
600
|
+
if (config.scoreWeights.some((w) => w < 0)) {
|
|
601
|
+
throw new Error(`scoreWeights must be non-negative, got ${config.scoreWeights}`);
|
|
602
|
+
}
|
|
541
603
|
const sum = config.scoreWeights.reduce((a, b) => a + b, 0);
|
|
542
604
|
if (Math.abs(sum - 1) > 1e-6) {
|
|
543
605
|
throw new Error(`scoreWeights must sum to 1, got ${sum}`);
|
package/build/index.mjs
CHANGED
|
@@ -106,6 +106,15 @@ function volumeImbalance(trades) {
|
|
|
106
106
|
const total = buyVol + sellVol;
|
|
107
107
|
if (total === 0)
|
|
108
108
|
return 0;
|
|
109
|
+
// When total = Infinity (an overflowed qty) the division (buyVol−sellVol)/Infinity
|
|
110
|
+
// is NaN even for a one-sided burst (Inf/Inf = NaN in IEEE 754).
|
|
111
|
+
// Compare sides directly to get the correct ±1 / 0 answer.
|
|
112
|
+
// NaN total (from NaN qty) falls through to the regular division — GIGO.
|
|
113
|
+
if (total === Infinity) {
|
|
114
|
+
if (buyVol === sellVol)
|
|
115
|
+
return 0; // both Infinity — symmetric burst
|
|
116
|
+
return buyVol > sellVol ? 1 : -1;
|
|
117
|
+
}
|
|
109
118
|
return (buyVol - sellVol) / total;
|
|
110
119
|
}
|
|
111
120
|
// ─── Log-likelihood (O(n) recursive) ────────────────────────────────────────
|
|
@@ -118,6 +127,11 @@ function hawkesLogLikelihood(timestamps, params) {
|
|
|
118
127
|
const n = timestamps.length;
|
|
119
128
|
if (n === 0)
|
|
120
129
|
return 0;
|
|
130
|
+
// β ≤ 0: kernel exp(−β·dt) does not decay (diverges or flat).
|
|
131
|
+
// Compensator = (α/β)·(1−exp(−β·(T−tᵢ))) → Inf·0 = NaN when β=0.
|
|
132
|
+
// Return −Infinity so the optimizer treats this as an infeasible region.
|
|
133
|
+
if (beta <= 0)
|
|
134
|
+
return -Infinity;
|
|
121
135
|
// Use observation window length, not absolute time, so the LL is invariant
|
|
122
136
|
// to timestamp origin (works for both t0=0 and Unix-epoch seconds).
|
|
123
137
|
const t0 = timestamps[0];
|
|
@@ -249,8 +263,15 @@ function hawkesAnomalyScore(peakLambda, params, empiricalRate = 0) {
|
|
|
249
263
|
const meanLambda = params.mu / (1 - branching);
|
|
250
264
|
// sigmoid centred at 2× baseline
|
|
251
265
|
const sig = (ratio) => 1 / (1 + Math.exp(-(ratio - 2) * 2));
|
|
252
|
-
|
|
253
|
-
|
|
266
|
+
// meanLambda = 0 when mu = 0: ratio = peakLambda / 0 = Infinity (score=1) when
|
|
267
|
+
// peakLambda > 0, or NaN (0/0) when peakLambda = 0. Guard the NaN case.
|
|
268
|
+
// NaN peakLambda (e.g. timestamps contained NaN): treat as "no signal" → 0.
|
|
269
|
+
const intensityScore = meanLambda > 0
|
|
270
|
+
? (Number.isNaN(peakLambda) ? 0 : sig(peakLambda / meanLambda))
|
|
271
|
+
: peakLambda > 0 ? 1 : 0;
|
|
272
|
+
const rateScore = empiricalRate > 0
|
|
273
|
+
? (params.mu > 0 ? sig(empiricalRate / params.mu) : 1)
|
|
274
|
+
: 0;
|
|
254
275
|
return Math.max(intensityScore, rateScore);
|
|
255
276
|
}
|
|
256
277
|
|
|
@@ -276,12 +297,16 @@ function hawkesAnomalyScore(peakLambda, params, empiricalRate = 0) {
|
|
|
276
297
|
* values — e.g. array of |imbalance| from a calm training window.
|
|
277
298
|
*/
|
|
278
299
|
function cusumFit(values, kSigmas = 0.5, hSigmas = 4) {
|
|
279
|
-
|
|
300
|
+
// Drop non-finite values (NaN, ±Infinity) before computing statistics.
|
|
301
|
+
// A single NaN in `values` would make mu0 = NaN, which later poisons the
|
|
302
|
+
// CUSUM accumulator even for valid observations (Math.max(0, x − NaN) = NaN).
|
|
303
|
+
const clean = values.filter(Number.isFinite);
|
|
304
|
+
if (clean.length === 0) {
|
|
280
305
|
return { mu0: 0, std0: 1, k: kSigmas, h: hSigmas };
|
|
281
306
|
}
|
|
282
|
-
const n =
|
|
283
|
-
const mu0 =
|
|
284
|
-
const var0 =
|
|
307
|
+
const n = clean.length;
|
|
308
|
+
const mu0 = clean.reduce((s, x) => s + x, 0) / n;
|
|
309
|
+
const var0 = clean.reduce((s, x) => s + (x - mu0) ** 2, 0) / Math.max(n - 1, 1);
|
|
285
310
|
const std0 = Math.sqrt(var0) || 1e-6;
|
|
286
311
|
return {
|
|
287
312
|
mu0,
|
|
@@ -295,7 +320,18 @@ function cusumFit(values, kSigmas = 0.5, hSigmas = 4) {
|
|
|
295
320
|
* Pure function — does not mutate input.
|
|
296
321
|
*/
|
|
297
322
|
function cusumUpdate(state, x, params) {
|
|
323
|
+
// Non-finite x (NaN, ±Infinity that doesn't trigger alarm) would poison the
|
|
324
|
+
// accumulators via Math.max(0, NaN) = NaN. Skip the update entirely for NaN;
|
|
325
|
+
// ±Infinity is handled naturally (Inf ≥ h → alarm fires and resets state).
|
|
326
|
+
if (Number.isNaN(x)) {
|
|
327
|
+
return { alarm: false, preResetState: state, state };
|
|
328
|
+
}
|
|
298
329
|
const { mu0, k, h } = params;
|
|
330
|
+
// NaN in mu0 or k also poisons the accumulator (x − NaN = NaN).
|
|
331
|
+
// Treat corrupt params as a no-op, same semantics as x=NaN.
|
|
332
|
+
if (!Number.isFinite(mu0) || !Number.isFinite(k)) {
|
|
333
|
+
return { alarm: false, preResetState: state, state };
|
|
334
|
+
}
|
|
299
335
|
const sPos = Math.max(0, state.sPos + (x - mu0) - k);
|
|
300
336
|
const sNeg = Math.max(0, state.sNeg - (x - mu0) - k);
|
|
301
337
|
const alarm = sPos >= h || sNeg >= h;
|
|
@@ -319,7 +355,12 @@ function cusumInitState() {
|
|
|
319
355
|
*/
|
|
320
356
|
function cusumAnomalyScore(state, params) {
|
|
321
357
|
const s = Math.max(state.sPos, state.sNeg);
|
|
322
|
-
|
|
358
|
+
// Math.max(NaN, finite) = NaN in JS (unlike some other languages).
|
|
359
|
+
// A poisoned state must not propagate NaN to the confidence score.
|
|
360
|
+
if (Number.isNaN(s))
|
|
361
|
+
return 0;
|
|
362
|
+
// NaN <= 0 is false in IEEE 754, so guard explicitly against non-finite h.
|
|
363
|
+
if (params.h <= 0 || !Number.isFinite(params.h))
|
|
323
364
|
return 0;
|
|
324
365
|
return Math.min(s / params.h, 1);
|
|
325
366
|
}
|
|
@@ -387,6 +428,7 @@ function bocpdInitState() {
|
|
|
387
428
|
logProbs: [0], // P(r₀ = 0) = 1 → log = 0
|
|
388
429
|
suffStats: [ssEmpty()],
|
|
389
430
|
t: 0,
|
|
431
|
+
minRl: 0,
|
|
390
432
|
};
|
|
391
433
|
}
|
|
392
434
|
/**
|
|
@@ -424,12 +466,20 @@ function bocpdUpdate(state, x, prior, hazardLambda = 200) {
|
|
|
424
466
|
const keep = normLogProbs.map((lp) => lp > PRUNE_THRESH);
|
|
425
467
|
const prunedLogProbs = normLogProbs.filter((_, i) => keep[i]);
|
|
426
468
|
const prunedSuffStats = newSuffStats.filter((_, i) => keep[i]);
|
|
469
|
+
// Track actual run-length offset after pruning.
|
|
470
|
+
// normLogProbs[0] → RL 0; normLogProbs[i] (i>0) → RL state.minRl + i.
|
|
471
|
+
// When H=0 (hazardLambda=∞) the changepoint entry (i=0) gets log-prob −∞ and is
|
|
472
|
+
// pruned; the first surviving entry then represents RL state.minRl + firstKept, not 0.
|
|
473
|
+
const firstKept = keep.indexOf(true);
|
|
474
|
+
const newMinRl = firstKept <= 0 ? 0 : state.minRl + firstKept;
|
|
427
475
|
const newState = {
|
|
428
476
|
logProbs: prunedLogProbs,
|
|
429
477
|
suffStats: prunedSuffStats,
|
|
430
478
|
t: state.t + 1,
|
|
479
|
+
minRl: newMinRl,
|
|
431
480
|
};
|
|
432
|
-
// MAP run length
|
|
481
|
+
// MAP run length: index in normLogProbs → actual run-length.
|
|
482
|
+
// normLogProbs[0] → RL 0; normLogProbs[r] (r>0) → RL state.minRl + r.
|
|
433
483
|
let mapR = 0;
|
|
434
484
|
let mapLP = -Infinity;
|
|
435
485
|
for (let r = 0; r < normLogProbs.length; r++) {
|
|
@@ -438,10 +488,14 @@ function bocpdUpdate(state, x, prior, hazardLambda = 200) {
|
|
|
438
488
|
mapR = r;
|
|
439
489
|
}
|
|
440
490
|
}
|
|
491
|
+
const mapRunLength = mapR === 0 ? 0 : state.minRl + mapR;
|
|
492
|
+
// normLogProbs[0] can be NaN when all log-probs are NaN (e.g. x=NaN, kappa0=0).
|
|
493
|
+
// `?? -Infinity` only guards undefined/null, not NaN. Clamp to 0 explicitly.
|
|
494
|
+
const rawCp = Math.exp(normLogProbs[0] ?? -Infinity);
|
|
441
495
|
return {
|
|
442
496
|
state: newState,
|
|
443
|
-
mapRunLength
|
|
444
|
-
cpProbability:
|
|
497
|
+
mapRunLength,
|
|
498
|
+
cpProbability: Number.isFinite(rawCp) ? rawCp : 0,
|
|
445
499
|
};
|
|
446
500
|
}
|
|
447
501
|
// ─── Score ────────────────────────────────────────────────────────────────────
|
|
@@ -472,7 +526,9 @@ function bocpdUpdate(state, x, prior, hazardLambda = 200) {
|
|
|
472
526
|
* @param prevRunLength mapRunLength from the previous bocpdUpdate call.
|
|
473
527
|
*/
|
|
474
528
|
function bocpdAnomalyScore(result, prevRunLength = 0) {
|
|
475
|
-
|
|
529
|
+
// NaN <= 0 is false (IEEE 754), and (Infinity - finite) / Infinity = NaN.
|
|
530
|
+
// Guard both: require prevRunLength to be a finite positive number.
|
|
531
|
+
if (!Number.isFinite(prevRunLength) || prevRunLength <= 0)
|
|
476
532
|
return 0;
|
|
477
533
|
const drop = Math.max(0, (prevRunLength - result.mapRunLength) / prevRunLength);
|
|
478
534
|
return 1 / (1 + Math.exp(-(drop - 0.5) * 8));
|
|
@@ -536,6 +592,12 @@ class VolumeAnomalyDetector {
|
|
|
536
592
|
constructor(config = {}) {
|
|
537
593
|
this.cfg = { ...DEFAULTS, ...config };
|
|
538
594
|
if (config.scoreWeights) {
|
|
595
|
+
if (!config.scoreWeights.every(Number.isFinite)) {
|
|
596
|
+
throw new Error(`scoreWeights must be finite numbers, got ${config.scoreWeights}`);
|
|
597
|
+
}
|
|
598
|
+
if (config.scoreWeights.some((w) => w < 0)) {
|
|
599
|
+
throw new Error(`scoreWeights must be non-negative, got ${config.scoreWeights}`);
|
|
600
|
+
}
|
|
539
601
|
const sum = config.scoreWeights.reduce((a, b) => a + b, 0);
|
|
540
602
|
if (Math.abs(sum - 1) > 1e-6) {
|
|
541
603
|
throw new Error(`scoreWeights must sum to 1, got ${sum}`);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "volume-anomaly",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "1.2.3",
|
|
4
|
+
"description": "Statistical volume anomaly detection for trade streams - Hawkes process, CUSUM, and Bayesian Online Changepoint Detection (BOCPD). Zero dependencies. TypeScript.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./build/index.cjs",
|
|
7
7
|
"module": "./build/index.mjs",
|
|
@@ -24,6 +24,19 @@
|
|
|
24
24
|
"test:watch": "vitest",
|
|
25
25
|
"prepublishOnly": "npm run build"
|
|
26
26
|
},
|
|
27
|
+
"keywords": [
|
|
28
|
+
"anomaly-detection",
|
|
29
|
+
"volume",
|
|
30
|
+
"order-flow",
|
|
31
|
+
"hawkes-process",
|
|
32
|
+
"cusum",
|
|
33
|
+
"bocpd",
|
|
34
|
+
"changepoint-detection",
|
|
35
|
+
"trading",
|
|
36
|
+
"market-microstructure",
|
|
37
|
+
"typescript",
|
|
38
|
+
"zero-dependencies"
|
|
39
|
+
],
|
|
27
40
|
"devDependencies": {
|
|
28
41
|
"@rollup/plugin-typescript": "^12.3.0",
|
|
29
42
|
"@types/node": "^20.10.0",
|