@diogonzafe/tokenwatch 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -542,6 +542,34 @@ Budget webhook payload:
542
542
  { "text": "[tokenwatch] Budget alert: user \"user_123\" reached $1.0031 USD (threshold: $1)" }
543
543
  ```
544
544
 
545
+ ### Cost anomaly detection
546
+
547
+ Alert when a single call is anomalously expensive compared to the recent average — catches runaway agents, infinite loops, or abusive usage:
548
+
549
+ ```ts
550
+ const tracker = createTracker({
551
+ anomalyDetection: {
552
+ multiplierThreshold: 3, // alert if call cost > 3× rolling average
553
+ webhookUrl: 'https://hooks.slack.com/...',
554
+ windowHours: 24, // lookback window (default: 24h)
555
+ mode: 'once', // 'once' (default) or 'always'
556
+ },
557
+ })
558
+ ```
559
+
560
+ Checks two axes independently for each tracked call:
561
+ - **Per-model** — compares the call's cost to the average of all prior calls for that model within `windowHours`
562
+ - **Per-user** — same check scoped to a specific user (only fires when `userId` is present)
563
+
564
+ No alert fires on the first call for a given model or user (no history = no baseline). Alerts are cleared when `reset()` is called.
565
+
566
+ Anomaly webhook payload example:
567
+ ```json
568
+ { "text": "[tokenwatch] Anomaly: model \"gpt-4o\" call cost $0.5000 is 4.2x above 24h average ($0.0119)" }
569
+ ```
570
+
571
+ `mode: 'always'` fires on every anomalous call instead of latching after the first.
572
+
545
573
  ---
546
574
 
547
575
  ## CLI
@@ -593,6 +621,89 @@ Data updates automatically every 3 seconds without refreshing the page. Requires
593
621
 
594
622
  ---
595
623
 
624
+ ## Production Usage
625
+
626
+ ### Storage choice
627
+
628
+ | Setup | Recommended storage |
629
+ |---|---|
630
+ | Single process (monolith, lambda, single pod) | `'sqlite'` — zero config, persists across restarts |
631
+ | Multi-instance (Kubernetes, PaaS with ≥2 pods) | `PostgresStorage` / `MySQLStorage` / `MongoStorage` — shared, unified data |
632
+ | Ephemeral / testing | `'memory'` (default) — resets on restart |
633
+
634
+ ### CI / test environments
635
+
636
+ Disable network calls and staleness warnings in CI:
637
+
638
+ ```ts
639
+ const tracker = createTracker({
640
+ syncPrices: false, // skip remote price fetch — use bundled prices
641
+ warnIfStaleAfterHours: 0, // suppress staleness warning
642
+ })
643
+ ```
644
+
645
+ ### On-prem / air-gapped deployments
646
+
647
+ The daily GitHub Actions workflow updates `prices.json` and publishes a new npm package. Teams that cannot reach GitHub at runtime have two options:
648
+
649
+ 1. **Pin and vendor** — copy `prices.json` from the installed package into your repo and commit it. Pass overrides via `customPrices` for any new models.
650
+ 2. **Self-host the sync** — fork the `scripts/scrape-prices.mjs` script and run it on your own schedule, pointing to your internal registry.
651
+
652
+ Either way, set `syncPrices: false` so the library doesn't try to fetch from GitHub at runtime.
653
+
654
+ ### Anomaly detection in production
655
+
656
+ Enable `anomalyDetection` to catch runaway agents or abuse early:
657
+
658
+ ```ts
659
+ const tracker = createTracker({
660
+ storage: new PostgresStorage(pool),
661
+ anomalyDetection: {
662
+ multiplierThreshold: 3, // alert if a call costs 3x above the rolling average
663
+ webhookUrl: 'https://hooks.slack.com/...',
664
+ windowHours: 24, // baseline window (default: 24h)
665
+ },
666
+ })
667
+ ```
668
+
669
+ ---
670
+
671
+ ## OpenTelemetry Exporter
672
+
673
+ Push tracked usage as metrics to any OTel-compatible backend (Datadog, Honeycomb, Grafana, New Relic, etc.) without changing your existing instrumentation:
674
+
675
+ ```bash
676
+ npm install @opentelemetry/api
677
+ ```
678
+
679
+ ```ts
680
+ import { createTracker } from '@diogonzafe/tokenwatch'
681
+ import { OTelExporter } from '@diogonzafe/tokenwatch/exporters'
682
+
683
+ const tracker = createTracker({
684
+ exporter: new OTelExporter(), // uses the globally-registered MeterProvider
685
+ })
686
+ ```
687
+
688
+ Four metrics are emitted per call, all with `model`, `session.id`, `user.id`, and `feature` attributes (optional fields omitted when absent):
689
+
690
+ | Metric | Type | Description |
691
+ |---|---|---|
692
+ | `tokenwatch.calls` | Counter | Number of LLM API calls |
693
+ | `tokenwatch.input_tokens` | Counter | Input tokens (includes cached + cache-creation) |
694
+ | `tokenwatch.output_tokens` | Counter | Output tokens |
695
+ | `tokenwatch.cost_usd` | Histogram | Cost per call in USD |
696
+
697
+ You must configure a `MeterProvider` before creating the exporter (e.g. using the OpenTelemetry SDK). `OTelExporter` has no compile-time dependency on `@opentelemetry/api` — it resolves it at runtime and throws a helpful error if the package is not installed.
698
+
699
+ Custom meter name:
700
+
701
+ ```ts
702
+ new OTelExporter({ meterName: 'my-service' })
703
+ ```
704
+
705
+ ---
706
+
596
707
  ## Privacy & Security
597
708
 
598
709
  - Prompt and response **content is never read or stored** — only token counts and model names
@@ -1,4 +1,4 @@
1
- import { I as IStorage, U as UsageEntry } from './index-CJKk1hHw.cjs';
1
+ import { d as IStorage, U as UsageEntry } from './index-D9xq0RNg.cjs';
2
2
 
3
3
  /**
4
4
  * IStorage adapter for PostgreSQL using the `pg` driver.
@@ -1,4 +1,4 @@
1
- import { I as IStorage, U as UsageEntry } from './index-CJKk1hHw.js';
1
+ import { d as IStorage, U as UsageEntry } from './index-D9xq0RNg.js';
2
2
 
3
3
  /**
4
4
  * IStorage adapter for PostgreSQL using the `pg` driver.
package/dist/cli.js CHANGED
@@ -245,7 +245,7 @@ function maybeSuggestCheaperModel(model, costUSD, inputTokens, outputTokens, lay
245
245
 
246
246
  // prices.json
247
247
  var prices_default = {
248
- updated_at: "2026-04-22",
248
+ updated_at: "2026-04-23",
249
249
  source: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
250
250
  models: {
251
251
  "gpt-4o": {
@@ -1594,7 +1594,14 @@ var TrackerConfigSchema = z.object({
1594
1594
  perUser: BudgetConfigSchema.optional(),
1595
1595
  perSession: BudgetConfigSchema.optional()
1596
1596
  }).optional(),
1597
- suggestions: z.boolean().optional().default(false)
1597
+ suggestions: z.boolean().optional().default(false),
1598
+ anomalyDetection: z.object({
1599
+ multiplierThreshold: z.number().positive(),
1600
+ webhookUrl: z.string().url(),
1601
+ windowHours: z.number().positive().optional().default(24),
1602
+ mode: z.enum(["once", "always"]).optional().default("once")
1603
+ }).optional(),
1604
+ exporter: z.custom((v) => v !== null && typeof v === "object" && typeof v.export === "function").optional()
1598
1605
  });
1599
1606
  function createTracker(config = {}) {
1600
1607
  const parsed = TrackerConfigSchema.safeParse(config);
@@ -1611,7 +1618,9 @@ ${issues}`);
1611
1618
  customPrices,
1612
1619
  warnIfStaleAfterHours,
1613
1620
  budgets,
1614
- suggestions
1621
+ suggestions,
1622
+ anomalyDetection,
1623
+ exporter
1615
1624
  } = parsed.data;
1616
1625
  const storage = typeof storageOption === "object" ? storageOption : createStorage(storageOption);
1617
1626
  let remotePrices;
@@ -1644,6 +1653,7 @@ ${issues}`);
1644
1653
  let alertFired = false;
1645
1654
  const firedUserAlerts = /* @__PURE__ */ new Set();
1646
1655
  const firedSessionAlerts = /* @__PURE__ */ new Set();
1656
+ const firedAnomalyKeys = /* @__PURE__ */ new Set();
1647
1657
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
1648
1658
  function resolveModelPrice(model) {
1649
1659
  maybeWarnStaleness();
@@ -1668,7 +1678,12 @@ ${issues}`);
1668
1678
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1669
1679
  };
1670
1680
  storage.record(full);
1681
+ if (exporter) {
1682
+ Promise.resolve(exporter.export(full)).catch(() => {
1683
+ });
1684
+ }
1671
1685
  maybeFireAlerts(full);
1686
+ if (anomalyDetection) maybeDetectAnomaly(full);
1672
1687
  if (suggestions) {
1673
1688
  maybeSuggestCheaperModel(entry.model, costUSD, entry.inputTokens, entry.outputTokens, {
1674
1689
  bundledPrices,
@@ -1838,11 +1853,56 @@ ${issues}`);
1838
1853
  basedOnPeriod: { from: first, to: last }
1839
1854
  };
1840
1855
  }
1856
+ function maybeDetectAnomaly(entry) {
1857
+ if (entry.costUSD <= 0) return;
1858
+ const { multiplierThreshold, webhookUrl: aUrl, windowHours: wh, mode: modeRaw } = anomalyDetection;
1859
+ const wHours = wh ?? 24;
1860
+ const mode = modeRaw ?? "once";
1861
+ const windowStart = Date.now() - wHours * 60 * 60 * 1e3;
1862
+ const entryTs = new Date(entry.timestamp).getTime();
1863
+ function checkEntity(key, label, predicate) {
1864
+ if (mode !== "always" && firedAnomalyKeys.has(key)) return;
1865
+ if (mode !== "always") firedAnomalyKeys.add(key);
1866
+ Promise.resolve(storage.getAll()).then((all) => {
1867
+ const history = all.filter(
1868
+ (e) => predicate(e) && new Date(e.timestamp).getTime() >= windowStart && new Date(e.timestamp).getTime() !== entryTs
1869
+ );
1870
+ if (history.length === 0) {
1871
+ if (mode !== "always") firedAnomalyKeys.delete(key);
1872
+ return;
1873
+ }
1874
+ const avg = history.reduce((s, e) => s + e.costUSD, 0) / history.length;
1875
+ if (avg <= 0 || entry.costUSD <= avg * multiplierThreshold) {
1876
+ if (mode !== "always") firedAnomalyKeys.delete(key);
1877
+ return;
1878
+ }
1879
+ const multiple = (entry.costUSD / avg).toFixed(1);
1880
+ fireWebhook(aUrl, {
1881
+ text: `[tokenwatch] Anomaly: ${label} call cost $${entry.costUSD.toFixed(4)} is ${multiple}x above ${wHours}h average ($${avg.toFixed(4)})`
1882
+ });
1883
+ }).catch(() => {
1884
+ if (mode !== "always") firedAnomalyKeys.delete(key);
1885
+ });
1886
+ }
1887
+ if (entry.userId) {
1888
+ checkEntity(
1889
+ `user:${entry.userId}`,
1890
+ `user "${entry.userId}"`,
1891
+ (e) => e.userId === entry.userId
1892
+ );
1893
+ }
1894
+ checkEntity(
1895
+ `model:${entry.model}`,
1896
+ `model "${entry.model}"`,
1897
+ (e) => e.model === entry.model
1898
+ );
1899
+ }
1841
1900
  async function reset() {
1842
1901
  await Promise.resolve(storage.clearAll());
1843
1902
  alertFired = false;
1844
1903
  firedUserAlerts.clear();
1845
1904
  firedSessionAlerts.clear();
1905
+ firedAnomalyKeys.clear();
1846
1906
  }
1847
1907
  async function resetSession(sessionId) {
1848
1908
  await Promise.resolve(storage.clearSession(sessionId));